google_play_scraper 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/Rakefile ADDED
@@ -0,0 +1,9 @@
1
+ require 'bundler'
2
+
3
+ require 'rspec/core'
4
+ require 'rspec/core/rake_task'
5
+ desc "Run all specs in spec directory (excluding plugin specs)"
6
+ RSpec::Core::RakeTask.new(:spec)
7
+ task :default => :spec
8
+
9
+ Bundler::GemHelper.install_tasks
@@ -0,0 +1,9 @@
1
+ require 'google_play_scraper/url'
2
+ require 'google_play_scraper/search_options'
3
+ require 'google_play_scraper/app'
4
+ require 'google_play_scraper/search'
5
+ require 'google_play_scraper/parser'
6
+ require 'nokogiri'
7
+
8
+ module GooglePlayScraper
9
+ end
@@ -0,0 +1,3 @@
1
+ class GooglePlayScraper::App
2
+ attr_accessor :id, :url, :name, :developer, :logo_url, :logo_url_small
3
+ end
@@ -0,0 +1,67 @@
1
+ require 'nokogiri'
2
+
3
+ class GooglePlayScraper::Parser
4
+
5
+ LOGO_CSS_SELECTOR = '.card-content .cover .cover-image-container .cover-outer-align .cover-inner-align img'
6
+
7
+ attr_reader :raw_html
8
+
9
+ def initialize(raw_html)
10
+ @raw_html = raw_html
11
+ end
12
+
13
+ def results
14
+ doc = Nokogiri::HTML(raw_html)
15
+
16
+ results = []
17
+
18
+ doc.css('.card').each do |app_container|
19
+ results << create_app(app_container)
20
+ end
21
+
22
+ results
23
+ end
24
+
25
+ def create_app(app_container)
26
+ app = GooglePlayScraper::App.new
27
+ app.logo_url = extract_logo_url(app_container)
28
+ app.logo_url_small = extract_logo_url_small(app_container)
29
+ app.url = extract_app_url(app_container)
30
+ app.id = extract_app_id(app_container)
31
+ app.name = extract_app_name(app_container)
32
+ app.developer = extract_developer(app_container)
33
+
34
+ app
35
+ end
36
+
37
+ def extract_developer(app_container)
38
+ span_tag = app_container.css('.card-content .details .subtitle-container .subtitle').first
39
+ span_tag.content
40
+ end
41
+
42
+ def extract_app_name(app_container)
43
+ a_tag = app_container.css('.card-content .details a.title').first
44
+ a_tag['title']
45
+ end
46
+
47
+ def extract_app_id(app_container)
48
+ a_tag = app_container.css('.card-content a.card-click-target').first
49
+ uri = Addressable::URI.parse(a_tag['href'])
50
+ uri.query_values['id']
51
+ end
52
+
53
+ def extract_app_url(app_container)
54
+ a_tag = app_container.css('.card-content a.card-click-target').first
55
+ GooglePlayScraper::GOOGLE_PLAY_BASE_URL + a_tag['href']
56
+ end
57
+
58
+ def extract_logo_url(app_container)
59
+ img_tag = app_container.css(LOGO_CSS_SELECTOR).first
60
+ img_tag['data-cover-large']
61
+ end
62
+
63
+ def extract_logo_url_small(app_container)
64
+ img_tag = app_container.css(LOGO_CSS_SELECTOR).first
65
+ img_tag['data-cover-small']
66
+ end
67
+ end
@@ -0,0 +1,43 @@
1
+ require 'addressable/uri'
2
+ require 'net/http'
3
+
4
+ class GooglePlayScraper::Search
5
+
6
+ include GooglePlayScraper::SearchOptions
7
+
8
+ attr_reader :query, :options
9
+
10
+ GOOGLE_PLAY_SEARCH_URL = GooglePlayScraper::GOOGLE_PLAY_BASE_URL + "/store/search"
11
+
12
+ DEFAULT_OPTIONS = {
13
+ CATEGORY => 'apps',
14
+ LANGUAGE => 'en',
15
+ APPLY_SORTING => 1, # 0 means no sorting
16
+ SAFE_SEARCH => 0, # apply safesearch to results
17
+ NUMBER_OF_RESULTS => 10 # number of results to display
18
+ }
19
+
20
+ def initialize(query, options = {})
21
+ @options = DEFAULT_OPTIONS.merge(options)
22
+ @options['q'] = query
23
+ end
24
+
25
+ def run
26
+ uri = build_uri
27
+ http = Net::HTTP.new(uri.host, 443)
28
+ http.use_ssl = true
29
+ http.verify_mode = OpenSSL::SSL::VERIFY_NONE
30
+
31
+ request = Net::HTTP::Get.new(uri.request_uri)
32
+ response = http.request(request)
33
+
34
+ parser = GooglePlayScraper::Parser.new(response.body)
35
+ parser.results
36
+ end
37
+
38
+ def build_uri
39
+ uri = Addressable::URI.parse(GOOGLE_PLAY_SEARCH_URL)
40
+ uri.query_values = options
41
+ uri
42
+ end
43
+ end
@@ -0,0 +1,9 @@
1
+ module GooglePlayScraper
2
+ module SearchOptions
3
+ CATEGORY = :c
4
+ LANGUAGE = :hl
5
+ APPLY_SORTING = :sort
6
+ SAFE_SEARCH = :safe
7
+ NUMBER_OF_RESULTS = :num
8
+ end
9
+ end
@@ -0,0 +1,3 @@
1
+ module GooglePlayScraper
2
+ GOOGLE_PLAY_BASE_URL = 'https://play.google.com'
3
+ end
@@ -0,0 +1,3 @@
1
+ module GooglePlayScraper
2
+ VERSION = "0.0.1"
3
+ end
@@ -0,0 +1,29 @@
1
+ require 'spec_helper'
2
+
3
+ describe GooglePlayScraper::Search do
4
+ context 'searching' do
5
+ it 'should be able to search for angry birds' do
6
+ search = GooglePlayScraper::Search.new('Angry Birds')
7
+ results = search.run
8
+ results.size.should == 10
9
+ results.first.name.should == 'Angry Birds'
10
+ results.first.developer.should == 'Rovio Mobile Ltd.'
11
+ results.first.id.should == 'com.rovio.angrybirds'
12
+ results.first.logo_url.should == 'https://lh6.ggpht.com/M9q_Zs_CRt2rbA41nTMhrPqiBxhUEUN8Z1f_mn9m89_TiHbIbUF8hjnc_zwevvLsRIJy=w340'
13
+ results.first.logo_url_small.should == 'https://lh6.ggpht.com/M9q_Zs_CRt2rbA41nTMhrPqiBxhUEUN8Z1f_mn9m89_TiHbIbUF8hjnc_zwevvLsRIJy=w170'
14
+ results.first.url.should == 'https://play.google.com/store/apps/details?id=com.rovio.angrybirds'
15
+ end
16
+
17
+ it 'should be able to search for the sparq.me scanner' do
18
+ search = GooglePlayScraper::Search.new('SPARQ.ME')
19
+ results = search.run
20
+ results.size.should == 1
21
+ results.first.name.should == 'SPARQ.ME'
22
+ results.first.developer.should == 'MSKYNET, Inc.'
23
+ results.first.id.should == 'com.sparqcode.sparqeye.android'
24
+ results.first.logo_url.should == 'https://lh3.ggpht.com/m8vHSrnD8kqMpYkgyQDxWpren2Pi5Vn-Eemj-xyHwcGUsbsi0rcYy0dR5Qzi5B566ig=w340'
25
+ results.first.logo_url_small.should == 'https://lh3.ggpht.com/m8vHSrnD8kqMpYkgyQDxWpren2Pi5Vn-Eemj-xyHwcGUsbsi0rcYy0dR5Qzi5B566ig=w170'
26
+ results.first.url.should == 'https://play.google.com/store/apps/details?id=com.sparqcode.sparqeye.android'
27
+ end
28
+ end
29
+ end
@@ -0,0 +1,7 @@
1
+ require 'rubygems'
2
+ require 'bundler/setup'
3
+ require 'google_play_scraper'
4
+
5
+ RSpec.configure do |config|
6
+ # some (optional) config here
7
+ end
metadata ADDED
@@ -0,0 +1,106 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: google_play_scraper
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Mike Emery
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2013-07-16 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: nokogiri
16
+ requirement: !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: 1.5.0
22
+ type: :runtime
23
+ prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ! '>='
28
+ - !ruby/object:Gem::Version
29
+ version: 1.5.0
30
+ - !ruby/object:Gem::Dependency
31
+ name: addressable
32
+ requirement: !ruby/object:Gem::Requirement
33
+ none: false
34
+ requirements:
35
+ - - ~>
36
+ - !ruby/object:Gem::Version
37
+ version: 2.3.0
38
+ type: :runtime
39
+ prerelease: false
40
+ version_requirements: !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ~>
44
+ - !ruby/object:Gem::Version
45
+ version: 2.3.0
46
+ - !ruby/object:Gem::Dependency
47
+ name: rspec
48
+ requirement: !ruby/object:Gem::Requirement
49
+ none: false
50
+ requirements:
51
+ - - ~>
52
+ - !ruby/object:Gem::Version
53
+ version: 2.14.1
54
+ type: :development
55
+ prerelease: false
56
+ version_requirements: !ruby/object:Gem::Requirement
57
+ none: false
58
+ requirements:
59
+ - - ~>
60
+ - !ruby/object:Gem::Version
61
+ version: 2.14.1
62
+ description: Uses Nokogiri to scrape app search results on the Google Play app search
63
+ results page. Returns results as Ruby objects
64
+ email:
65
+ - mike@sparq.it
66
+ executables: []
67
+ extensions: []
68
+ extra_rdoc_files: []
69
+ files:
70
+ - lib/google_play_scraper/app.rb
71
+ - lib/google_play_scraper/parser.rb
72
+ - lib/google_play_scraper/search.rb
73
+ - lib/google_play_scraper/search_options.rb
74
+ - lib/google_play_scraper/url.rb
75
+ - lib/google_play_scraper/version.rb
76
+ - lib/google_play_scraper.rb
77
+ - Rakefile
78
+ - spec/google_play_scraper/search_spec.rb
79
+ - spec/spec_helper.rb
80
+ homepage: ''
81
+ licenses: []
82
+ post_install_message:
83
+ rdoc_options: []
84
+ require_paths:
85
+ - lib
86
+ required_ruby_version: !ruby/object:Gem::Requirement
87
+ none: false
88
+ requirements:
89
+ - - ! '>='
90
+ - !ruby/object:Gem::Version
91
+ version: '0'
92
+ required_rubygems_version: !ruby/object:Gem::Requirement
93
+ none: false
94
+ requirements:
95
+ - - ! '>='
96
+ - !ruby/object:Gem::Version
97
+ version: '0'
98
+ requirements: []
99
+ rubyforge_project: google_play_scraper
100
+ rubygems_version: 1.8.25
101
+ signing_key:
102
+ specification_version: 3
103
+ summary: Get Google Play App search results from ruby by scraping their website.
104
+ test_files:
105
+ - spec/google_play_scraper/search_spec.rb
106
+ - spec/spec_helper.rb