google_play_scraper 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Rakefile +9 -0
- data/lib/google_play_scraper.rb +9 -0
- data/lib/google_play_scraper/app.rb +3 -0
- data/lib/google_play_scraper/parser.rb +67 -0
- data/lib/google_play_scraper/search.rb +43 -0
- data/lib/google_play_scraper/search_options.rb +9 -0
- data/lib/google_play_scraper/url.rb +3 -0
- data/lib/google_play_scraper/version.rb +3 -0
- data/spec/google_play_scraper/search_spec.rb +29 -0
- data/spec/spec_helper.rb +7 -0
- metadata +106 -0
data/Rakefile
ADDED
@@ -0,0 +1,67 @@
|
|
1
|
+
require 'nokogiri'
|
2
|
+
|
3
|
+
class GooglePlayScraper::Parser
|
4
|
+
|
5
|
+
LOGO_CSS_SELECTOR = '.card-content .cover .cover-image-container .cover-outer-align .cover-inner-align img'
|
6
|
+
|
7
|
+
attr_reader :raw_html
|
8
|
+
|
9
|
+
def initialize(raw_html)
|
10
|
+
@raw_html = raw_html
|
11
|
+
end
|
12
|
+
|
13
|
+
def results
|
14
|
+
doc = Nokogiri::HTML(raw_html)
|
15
|
+
|
16
|
+
results = []
|
17
|
+
|
18
|
+
doc.css('.card').each do |app_container|
|
19
|
+
results << create_app(app_container)
|
20
|
+
end
|
21
|
+
|
22
|
+
results
|
23
|
+
end
|
24
|
+
|
25
|
+
def create_app(app_container)
|
26
|
+
app = GooglePlayScraper::App.new
|
27
|
+
app.logo_url = extract_logo_url(app_container)
|
28
|
+
app.logo_url_small = extract_logo_url_small(app_container)
|
29
|
+
app.url = extract_app_url(app_container)
|
30
|
+
app.id = extract_app_id(app_container)
|
31
|
+
app.name = extract_app_name(app_container)
|
32
|
+
app.developer = extract_developer(app_container)
|
33
|
+
|
34
|
+
app
|
35
|
+
end
|
36
|
+
|
37
|
+
def extract_developer(app_container)
|
38
|
+
span_tag = app_container.css('.card-content .details .subtitle-container .subtitle').first
|
39
|
+
span_tag.content
|
40
|
+
end
|
41
|
+
|
42
|
+
def extract_app_name(app_container)
|
43
|
+
a_tag = app_container.css('.card-content .details a.title').first
|
44
|
+
a_tag['title']
|
45
|
+
end
|
46
|
+
|
47
|
+
def extract_app_id(app_container)
|
48
|
+
a_tag = app_container.css('.card-content a.card-click-target').first
|
49
|
+
uri = Addressable::URI.parse(a_tag['href'])
|
50
|
+
uri.query_values['id']
|
51
|
+
end
|
52
|
+
|
53
|
+
def extract_app_url(app_container)
|
54
|
+
a_tag = app_container.css('.card-content a.card-click-target').first
|
55
|
+
GooglePlayScraper::GOOGLE_PLAY_BASE_URL + a_tag['href']
|
56
|
+
end
|
57
|
+
|
58
|
+
def extract_logo_url(app_container)
|
59
|
+
img_tag = app_container.css(LOGO_CSS_SELECTOR).first
|
60
|
+
img_tag['data-cover-large']
|
61
|
+
end
|
62
|
+
|
63
|
+
def extract_logo_url_small(app_container)
|
64
|
+
img_tag = app_container.css(LOGO_CSS_SELECTOR).first
|
65
|
+
img_tag['data-cover-small']
|
66
|
+
end
|
67
|
+
end
|
@@ -0,0 +1,43 @@
|
|
1
|
+
require 'addressable/uri'
|
2
|
+
require 'net/http'
|
3
|
+
|
4
|
+
class GooglePlayScraper::Search
|
5
|
+
|
6
|
+
include GooglePlayScraper::SearchOptions
|
7
|
+
|
8
|
+
attr_reader :query, :options
|
9
|
+
|
10
|
+
GOOGLE_PLAY_SEARCH_URL = GooglePlayScraper::GOOGLE_PLAY_BASE_URL + "/store/search"
|
11
|
+
|
12
|
+
DEFAULT_OPTIONS = {
|
13
|
+
CATEGORY => 'apps',
|
14
|
+
LANGUAGE => 'en',
|
15
|
+
APPLY_SORTING => 1, # 0 means no sorting
|
16
|
+
SAFE_SEARCH => 0, # apply safesearch to results
|
17
|
+
NUMBER_OF_RESULTS => 10 # number of results to display
|
18
|
+
}
|
19
|
+
|
20
|
+
def initialize(query, options = {})
|
21
|
+
@options = DEFAULT_OPTIONS.merge(options)
|
22
|
+
@options['q'] = query
|
23
|
+
end
|
24
|
+
|
25
|
+
def run
|
26
|
+
uri = build_uri
|
27
|
+
http = Net::HTTP.new(uri.host, 443)
|
28
|
+
http.use_ssl = true
|
29
|
+
http.verify_mode = OpenSSL::SSL::VERIFY_NONE
|
30
|
+
|
31
|
+
request = Net::HTTP::Get.new(uri.request_uri)
|
32
|
+
response = http.request(request)
|
33
|
+
|
34
|
+
parser = GooglePlayScraper::Parser.new(response.body)
|
35
|
+
parser.results
|
36
|
+
end
|
37
|
+
|
38
|
+
def build_uri
|
39
|
+
uri = Addressable::URI.parse(GOOGLE_PLAY_SEARCH_URL)
|
40
|
+
uri.query_values = options
|
41
|
+
uri
|
42
|
+
end
|
43
|
+
end
|
@@ -0,0 +1,29 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe GooglePlayScraper::Search do
|
4
|
+
context 'searching' do
|
5
|
+
it 'should be able to search for angry birds' do
|
6
|
+
search = GooglePlayScraper::Search.new('Angry Birds')
|
7
|
+
results = search.run
|
8
|
+
results.size.should == 10
|
9
|
+
results.first.name.should == 'Angry Birds'
|
10
|
+
results.first.developer.should == 'Rovio Mobile Ltd.'
|
11
|
+
results.first.id.should == 'com.rovio.angrybirds'
|
12
|
+
results.first.logo_url.should == 'https://lh6.ggpht.com/M9q_Zs_CRt2rbA41nTMhrPqiBxhUEUN8Z1f_mn9m89_TiHbIbUF8hjnc_zwevvLsRIJy=w340'
|
13
|
+
results.first.logo_url_small.should == 'https://lh6.ggpht.com/M9q_Zs_CRt2rbA41nTMhrPqiBxhUEUN8Z1f_mn9m89_TiHbIbUF8hjnc_zwevvLsRIJy=w170'
|
14
|
+
results.first.url.should == 'https://play.google.com/store/apps/details?id=com.rovio.angrybirds'
|
15
|
+
end
|
16
|
+
|
17
|
+
it 'should be able to search for the sparq.me scanner' do
|
18
|
+
search = GooglePlayScraper::Search.new('SPARQ.ME')
|
19
|
+
results = search.run
|
20
|
+
results.size.should == 1
|
21
|
+
results.first.name.should == 'SPARQ.ME'
|
22
|
+
results.first.developer.should == 'MSKYNET, Inc.'
|
23
|
+
results.first.id.should == 'com.sparqcode.sparqeye.android'
|
24
|
+
results.first.logo_url.should == 'https://lh3.ggpht.com/m8vHSrnD8kqMpYkgyQDxWpren2Pi5Vn-Eemj-xyHwcGUsbsi0rcYy0dR5Qzi5B566ig=w340'
|
25
|
+
results.first.logo_url_small.should == 'https://lh3.ggpht.com/m8vHSrnD8kqMpYkgyQDxWpren2Pi5Vn-Eemj-xyHwcGUsbsi0rcYy0dR5Qzi5B566ig=w170'
|
26
|
+
results.first.url.should == 'https://play.google.com/store/apps/details?id=com.sparqcode.sparqeye.android'
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
data/spec/spec_helper.rb
ADDED
metadata
ADDED
@@ -0,0 +1,106 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: google_play_scraper
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Mike Emery
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2013-07-16 00:00:00.000000000 Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: nokogiri
|
16
|
+
requirement: !ruby/object:Gem::Requirement
|
17
|
+
none: false
|
18
|
+
requirements:
|
19
|
+
- - ! '>='
|
20
|
+
- !ruby/object:Gem::Version
|
21
|
+
version: 1.5.0
|
22
|
+
type: :runtime
|
23
|
+
prerelease: false
|
24
|
+
version_requirements: !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
26
|
+
requirements:
|
27
|
+
- - ! '>='
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
version: 1.5.0
|
30
|
+
- !ruby/object:Gem::Dependency
|
31
|
+
name: addressable
|
32
|
+
requirement: !ruby/object:Gem::Requirement
|
33
|
+
none: false
|
34
|
+
requirements:
|
35
|
+
- - ~>
|
36
|
+
- !ruby/object:Gem::Version
|
37
|
+
version: 2.3.0
|
38
|
+
type: :runtime
|
39
|
+
prerelease: false
|
40
|
+
version_requirements: !ruby/object:Gem::Requirement
|
41
|
+
none: false
|
42
|
+
requirements:
|
43
|
+
- - ~>
|
44
|
+
- !ruby/object:Gem::Version
|
45
|
+
version: 2.3.0
|
46
|
+
- !ruby/object:Gem::Dependency
|
47
|
+
name: rspec
|
48
|
+
requirement: !ruby/object:Gem::Requirement
|
49
|
+
none: false
|
50
|
+
requirements:
|
51
|
+
- - ~>
|
52
|
+
- !ruby/object:Gem::Version
|
53
|
+
version: 2.14.1
|
54
|
+
type: :development
|
55
|
+
prerelease: false
|
56
|
+
version_requirements: !ruby/object:Gem::Requirement
|
57
|
+
none: false
|
58
|
+
requirements:
|
59
|
+
- - ~>
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: 2.14.1
|
62
|
+
description: Uses Nokogiri to scrape app search results on the Google Play app search
|
63
|
+
results page. Returns results as Ruby objects
|
64
|
+
email:
|
65
|
+
- mike@sparq.it
|
66
|
+
executables: []
|
67
|
+
extensions: []
|
68
|
+
extra_rdoc_files: []
|
69
|
+
files:
|
70
|
+
- lib/google_play_scraper/app.rb
|
71
|
+
- lib/google_play_scraper/parser.rb
|
72
|
+
- lib/google_play_scraper/search.rb
|
73
|
+
- lib/google_play_scraper/search_options.rb
|
74
|
+
- lib/google_play_scraper/url.rb
|
75
|
+
- lib/google_play_scraper/version.rb
|
76
|
+
- lib/google_play_scraper.rb
|
77
|
+
- Rakefile
|
78
|
+
- spec/google_play_scraper/search_spec.rb
|
79
|
+
- spec/spec_helper.rb
|
80
|
+
homepage: ''
|
81
|
+
licenses: []
|
82
|
+
post_install_message:
|
83
|
+
rdoc_options: []
|
84
|
+
require_paths:
|
85
|
+
- lib
|
86
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
87
|
+
none: false
|
88
|
+
requirements:
|
89
|
+
- - ! '>='
|
90
|
+
- !ruby/object:Gem::Version
|
91
|
+
version: '0'
|
92
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
93
|
+
none: false
|
94
|
+
requirements:
|
95
|
+
- - ! '>='
|
96
|
+
- !ruby/object:Gem::Version
|
97
|
+
version: '0'
|
98
|
+
requirements: []
|
99
|
+
rubyforge_project: google_play_scraper
|
100
|
+
rubygems_version: 1.8.25
|
101
|
+
signing_key:
|
102
|
+
specification_version: 3
|
103
|
+
summary: Get Google Play App search results from ruby by scraping their website.
|
104
|
+
test_files:
|
105
|
+
- spec/google_play_scraper/search_spec.rb
|
106
|
+
- spec/spec_helper.rb
|