google_play_scraper 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
data/Rakefile ADDED
@@ -0,0 +1,9 @@
1
+ require 'bundler'
2
+
3
+ require 'rspec/core'
4
+ require 'rspec/core/rake_task'
5
+ desc "Run all specs in spec directory (excluding plugin specs)"
6
+ RSpec::Core::RakeTask.new(:spec)
7
+ task :default => :spec
8
+
9
+ Bundler::GemHelper.install_tasks
@@ -0,0 +1,9 @@
1
+ require 'google_play_scraper/url'
2
+ require 'google_play_scraper/search_options'
3
+ require 'google_play_scraper/app'
4
+ require 'google_play_scraper/search'
5
+ require 'google_play_scraper/parser'
6
+ require 'nokogiri'
7
+
8
+ module GooglePlayScraper
9
+ end
@@ -0,0 +1,3 @@
1
+ class GooglePlayScraper::App
2
+ attr_accessor :id, :url, :name, :developer, :logo_url, :logo_url_small
3
+ end
@@ -0,0 +1,67 @@
1
+ require 'nokogiri'
2
+
3
+ class GooglePlayScraper::Parser
4
+
5
+ LOGO_CSS_SELECTOR = '.card-content .cover .cover-image-container .cover-outer-align .cover-inner-align img'
6
+
7
+ attr_reader :raw_html
8
+
9
+ def initialize(raw_html)
10
+ @raw_html = raw_html
11
+ end
12
+
13
+ def results
14
+ doc = Nokogiri::HTML(raw_html)
15
+
16
+ results = []
17
+
18
+ doc.css('.card').each do |app_container|
19
+ results << create_app(app_container)
20
+ end
21
+
22
+ results
23
+ end
24
+
25
+ def create_app(app_container)
26
+ app = GooglePlayScraper::App.new
27
+ app.logo_url = extract_logo_url(app_container)
28
+ app.logo_url_small = extract_logo_url_small(app_container)
29
+ app.url = extract_app_url(app_container)
30
+ app.id = extract_app_id(app_container)
31
+ app.name = extract_app_name(app_container)
32
+ app.developer = extract_developer(app_container)
33
+
34
+ app
35
+ end
36
+
37
+ def extract_developer(app_container)
38
+ span_tag = app_container.css('.card-content .details .subtitle-container .subtitle').first
39
+ span_tag.content
40
+ end
41
+
42
+ def extract_app_name(app_container)
43
+ a_tag = app_container.css('.card-content .details a.title').first
44
+ a_tag['title']
45
+ end
46
+
47
+ def extract_app_id(app_container)
48
+ a_tag = app_container.css('.card-content a.card-click-target').first
49
+ uri = Addressable::URI.parse(a_tag['href'])
50
+ uri.query_values['id']
51
+ end
52
+
53
+ def extract_app_url(app_container)
54
+ a_tag = app_container.css('.card-content a.card-click-target').first
55
+ GooglePlayScraper::GOOGLE_PLAY_BASE_URL + a_tag['href']
56
+ end
57
+
58
+ def extract_logo_url(app_container)
59
+ img_tag = app_container.css(LOGO_CSS_SELECTOR).first
60
+ img_tag['data-cover-large']
61
+ end
62
+
63
+ def extract_logo_url_small(app_container)
64
+ img_tag = app_container.css(LOGO_CSS_SELECTOR).first
65
+ img_tag['data-cover-small']
66
+ end
67
+ end
@@ -0,0 +1,43 @@
1
+ require 'addressable/uri'
2
+ require 'net/http'
3
+
4
+ class GooglePlayScraper::Search
5
+
6
+ include GooglePlayScraper::SearchOptions
7
+
8
+ attr_reader :query, :options
9
+
10
+ GOOGLE_PLAY_SEARCH_URL = GooglePlayScraper::GOOGLE_PLAY_BASE_URL + "/store/search"
11
+
12
+ DEFAULT_OPTIONS = {
13
+ CATEGORY => 'apps',
14
+ LANGUAGE => 'en',
15
+ APPLY_SORTING => 1, # 0 means no sorting
16
+ SAFE_SEARCH => 0, # apply safesearch to results
17
+ NUMBER_OF_RESULTS => 10 # number of results to display
18
+ }
19
+
20
+ def initialize(query, options = {})
21
+ @options = DEFAULT_OPTIONS.merge(options)
22
+ @options['q'] = query
23
+ end
24
+
25
+ def run
26
+ uri = build_uri
27
+ http = Net::HTTP.new(uri.host, 443)
28
+ http.use_ssl = true
29
+ http.verify_mode = OpenSSL::SSL::VERIFY_NONE
30
+
31
+ request = Net::HTTP::Get.new(uri.request_uri)
32
+ response = http.request(request)
33
+
34
+ parser = GooglePlayScraper::Parser.new(response.body)
35
+ parser.results
36
+ end
37
+
38
+ def build_uri
39
+ uri = Addressable::URI.parse(GOOGLE_PLAY_SEARCH_URL)
40
+ uri.query_values = options
41
+ uri
42
+ end
43
+ end
@@ -0,0 +1,9 @@
1
+ module GooglePlayScraper
2
+ module SearchOptions
3
+ CATEGORY = :c
4
+ LANGUAGE = :hl
5
+ APPLY_SORTING = :sort
6
+ SAFE_SEARCH = :safe
7
+ NUMBER_OF_RESULTS = :num
8
+ end
9
+ end
@@ -0,0 +1,3 @@
1
+ module GooglePlayScraper
2
+ GOOGLE_PLAY_BASE_URL = 'https://play.google.com'
3
+ end
@@ -0,0 +1,3 @@
1
+ module GooglePlayScraper
2
+ VERSION = "0.0.1"
3
+ end
@@ -0,0 +1,29 @@
1
+ require 'spec_helper'
2
+
3
+ describe GooglePlayScraper::Search do
4
+ context 'searching' do
5
+ it 'should be able to search for angry birds' do
6
+ search = GooglePlayScraper::Search.new('Angry Birds')
7
+ results = search.run
8
+ results.size.should == 10
9
+ results.first.name.should == 'Angry Birds'
10
+ results.first.developer.should == 'Rovio Mobile Ltd.'
11
+ results.first.id.should == 'com.rovio.angrybirds'
12
+ results.first.logo_url.should == 'https://lh6.ggpht.com/M9q_Zs_CRt2rbA41nTMhrPqiBxhUEUN8Z1f_mn9m89_TiHbIbUF8hjnc_zwevvLsRIJy=w340'
13
+ results.first.logo_url_small.should == 'https://lh6.ggpht.com/M9q_Zs_CRt2rbA41nTMhrPqiBxhUEUN8Z1f_mn9m89_TiHbIbUF8hjnc_zwevvLsRIJy=w170'
14
+ results.first.url.should == 'https://play.google.com/store/apps/details?id=com.rovio.angrybirds'
15
+ end
16
+
17
+ it 'should be able to search for the sparq.me scanner' do
18
+ search = GooglePlayScraper::Search.new('SPARQ.ME')
19
+ results = search.run
20
+ results.size.should == 1
21
+ results.first.name.should == 'SPARQ.ME'
22
+ results.first.developer.should == 'MSKYNET, Inc.'
23
+ results.first.id.should == 'com.sparqcode.sparqeye.android'
24
+ results.first.logo_url.should == 'https://lh3.ggpht.com/m8vHSrnD8kqMpYkgyQDxWpren2Pi5Vn-Eemj-xyHwcGUsbsi0rcYy0dR5Qzi5B566ig=w340'
25
+ results.first.logo_url_small.should == 'https://lh3.ggpht.com/m8vHSrnD8kqMpYkgyQDxWpren2Pi5Vn-Eemj-xyHwcGUsbsi0rcYy0dR5Qzi5B566ig=w170'
26
+ results.first.url.should == 'https://play.google.com/store/apps/details?id=com.sparqcode.sparqeye.android'
27
+ end
28
+ end
29
+ end
@@ -0,0 +1,7 @@
1
+ require 'rubygems'
2
+ require 'bundler/setup'
3
+ require 'google_play_scraper'
4
+
5
+ RSpec.configure do |config|
6
+ # some (optional) config here
7
+ end
metadata ADDED
@@ -0,0 +1,106 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: google_play_scraper
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Mike Emery
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2013-07-16 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: nokogiri
16
+ requirement: !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: 1.5.0
22
+ type: :runtime
23
+ prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ! '>='
28
+ - !ruby/object:Gem::Version
29
+ version: 1.5.0
30
+ - !ruby/object:Gem::Dependency
31
+ name: addressable
32
+ requirement: !ruby/object:Gem::Requirement
33
+ none: false
34
+ requirements:
35
+ - - ~>
36
+ - !ruby/object:Gem::Version
37
+ version: 2.3.0
38
+ type: :runtime
39
+ prerelease: false
40
+ version_requirements: !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ~>
44
+ - !ruby/object:Gem::Version
45
+ version: 2.3.0
46
+ - !ruby/object:Gem::Dependency
47
+ name: rspec
48
+ requirement: !ruby/object:Gem::Requirement
49
+ none: false
50
+ requirements:
51
+ - - ~>
52
+ - !ruby/object:Gem::Version
53
+ version: 2.14.1
54
+ type: :development
55
+ prerelease: false
56
+ version_requirements: !ruby/object:Gem::Requirement
57
+ none: false
58
+ requirements:
59
+ - - ~>
60
+ - !ruby/object:Gem::Version
61
+ version: 2.14.1
62
+ description: Uses Nokogiri to scrape app search results on the Google Play app search
63
+ results page. Returns results as Ruby objects
64
+ email:
65
+ - mike@sparq.it
66
+ executables: []
67
+ extensions: []
68
+ extra_rdoc_files: []
69
+ files:
70
+ - lib/google_play_scraper/app.rb
71
+ - lib/google_play_scraper/parser.rb
72
+ - lib/google_play_scraper/search.rb
73
+ - lib/google_play_scraper/search_options.rb
74
+ - lib/google_play_scraper/url.rb
75
+ - lib/google_play_scraper/version.rb
76
+ - lib/google_play_scraper.rb
77
+ - Rakefile
78
+ - spec/google_play_scraper/search_spec.rb
79
+ - spec/spec_helper.rb
80
+ homepage: ''
81
+ licenses: []
82
+ post_install_message:
83
+ rdoc_options: []
84
+ require_paths:
85
+ - lib
86
+ required_ruby_version: !ruby/object:Gem::Requirement
87
+ none: false
88
+ requirements:
89
+ - - ! '>='
90
+ - !ruby/object:Gem::Version
91
+ version: '0'
92
+ required_rubygems_version: !ruby/object:Gem::Requirement
93
+ none: false
94
+ requirements:
95
+ - - ! '>='
96
+ - !ruby/object:Gem::Version
97
+ version: '0'
98
+ requirements: []
99
+ rubyforge_project: google_play_scraper
100
+ rubygems_version: 1.8.25
101
+ signing_key:
102
+ specification_version: 3
103
+ summary: Get Google Play App search results from ruby by scraping their website.
104
+ test_files:
105
+ - spec/google_play_scraper/search_spec.rb
106
+ - spec/spec_helper.rb