market_bot 0.17.0 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. checksums.yaml +4 -4
  2. data/.travis.yml +1 -1
  3. data/Guardfile +70 -0
  4. data/README.markdown +45 -95
  5. data/Rakefile +0 -89
  6. data/bin/app_categories +17 -0
  7. data/bin/benchmark_parser_app +24 -0
  8. data/bin/console +14 -0
  9. data/bin/setup +7 -0
  10. data/bin/update_test_data +30 -0
  11. data/lib/market_bot.rb +20 -15
  12. data/lib/market_bot/exceptions.rb +2 -1
  13. data/lib/market_bot/play/app.rb +188 -0
  14. data/lib/market_bot/play/app/constants.rb +33 -0
  15. data/lib/market_bot/play/chart.rb +118 -0
  16. data/lib/market_bot/play/chart/constants.rb +74 -0
  17. data/lib/market_bot/play/constants.rb +7 -0
  18. data/lib/market_bot/play/developer.rb +32 -0
  19. data/lib/market_bot/util.rb +17 -0
  20. data/lib/market_bot/version.rb +1 -1
  21. data/market_bot.gemspec +6 -3
  22. data/spec/market_bot/play/app_spec.rb +201 -0
  23. data/spec/market_bot/play/chart_spec.rb +126 -0
  24. data/spec/market_bot/play/data/app-com.bluefroggaming.popdat.txt +99 -0
  25. data/spec/market_bot/play/data/app-com.mg.android.txt +103 -0
  26. data/spec/market_bot/play/data/chart-topselling_paid-GAME_ARCADE-0.txt +97 -0
  27. data/spec/market_bot/play/data/chart-topselling_paid-GAME_ARCADE-1.txt +97 -0
  28. data/spec/market_bot/play/data/chart-topselling_paid-GAME_ARCADE-2.txt +97 -0
  29. data/spec/market_bot/play/data/chart-topselling_paid-GAME_ARCADE-3.txt +97 -0
  30. data/spec/market_bot/play/data/chart-topselling_paid-GAME_ARCADE-4.txt +97 -0
  31. data/spec/market_bot/play/data/chart-topselling_paid-GAME_ARCADE-5.txt +97 -0
  32. data/spec/market_bot/play/data/chart-topselling_paid-GAME_ARCADE-6.txt +97 -0
  33. data/spec/market_bot/play/data/chart-topselling_paid-GAME_ARCADE-7.txt +97 -0
  34. data/spec/market_bot/play/data/developer-zynga.txt +97 -0
  35. data/spec/market_bot/play/developer_spec.rb +52 -0
  36. data/spec/market_bot_spec.rb +14 -0
  37. data/spec/spec_helper.rb +5 -11
  38. metadata +57 -52
  39. data/NOTES.txt +0 -61
  40. data/lib/market_bot/android/app.rb +0 -273
  41. data/lib/market_bot/android/developer.rb +0 -32
  42. data/lib/market_bot/android/leaderboard.rb +0 -232
  43. data/lib/market_bot/android/leaderboard/constants.rb +0 -59
  44. data/lib/market_bot/android/search_query.rb +0 -35
  45. data/lib/market_bot/movie/leaderboard.rb +0 -167
  46. data/lib/market_bot/movie/leaderboard/constants.rb +0 -30
  47. data/lib/market_bot/movie/search_query.rb +0 -32
  48. data/spec/market_bot/android/app_spec.rb +0 -253
  49. data/spec/market_bot/android/data/app_1.txt +0 -99
  50. data/spec/market_bot/android/data/app_2.txt +0 -100
  51. data/spec/market_bot/android/data/app_3.txt +0 -103
  52. data/spec/market_bot/android/data/app_4.txt +0 -117
  53. data/spec/market_bot/android/data/developer-zynga.txt +0 -97
  54. data/spec/market_bot/android/data/leaderboard-apps_editors_choice.txt +0 -97
  55. data/spec/market_bot/android/data/leaderboard-apps_topselling_paid-page1.txt +0 -97
  56. data/spec/market_bot/android/data/leaderboard-apps_topselling_paid-page2.txt +0 -97
  57. data/spec/market_bot/android/data/leaderboard-apps_topselling_paid-page3.txt +0 -97
  58. data/spec/market_bot/android/data/leaderboard-apps_topselling_paid-page4.txt +0 -97
  59. data/spec/market_bot/android/developer_spec.rb +0 -57
  60. data/spec/market_bot/android/leaderboard_spec.rb +0 -140
  61. data/spec/market_bot/android/search_query_spec.rb +0 -6
  62. data/spec/market_bot/movie/data/leaderboard-movies_comedy_topselling_paid.txt +0 -327
  63. data/spec/market_bot/movie/leaderboard_spec.rb +0 -67
@@ -1,32 +0,0 @@
1
- module MarketBot
2
- module Android
3
-
4
- # Developer pages are extremely similar to leaderboard pages.
5
- # Amazingly, this inheritence hack works!
6
- #
7
- # BUG: This code only retrieves the first page of results.
8
- # This means you will only get the first 24 apps for a developer.
9
- # Some developers have hundreds of apps so this needs fixed!!!
10
- class Developer < MarketBot::Android::Leaderboard
11
- def initialize(developer, options={})
12
- super(developer, nil, options)
13
- end
14
-
15
- def market_urls(options={})
16
- results = []
17
-
18
- country = options[:country] || 'us'
19
-
20
- url = "https://play.google.com/store/apps/developer?"
21
- url << "id=#{URI.escape(identifier)}&"
22
- url << "gl=#{country}&"
23
- url << "hl=en"
24
-
25
- results << url
26
-
27
- return results
28
- end
29
- end
30
-
31
- end
32
- end
@@ -1,232 +0,0 @@
1
- module MarketBot
2
- module Android
3
-
4
- class Leaderboard
5
- attr_reader :identifier, :category
6
- attr_reader :hydra
7
-
8
- MAX_STARS = 5
9
- PERCENT_DENOM = 100
10
-
11
- def self.parse(html)
12
- if html.include?('<title>Editor&#39;s Choice')
13
- parse_editors_choice_page(html)
14
- else
15
- parse_normal_page(html)
16
- end
17
- end
18
-
19
- def self.parse_normal_page(html)
20
- results = []
21
- doc = Nokogiri::HTML(html)
22
-
23
- doc.css('.card').each do |snippet_node|
24
- result = {}
25
-
26
- details_node = snippet_node.css('.details')
27
-
28
- unless snippet_node.css('img').empty?
29
- result[:icon_url] = snippet_node.css('img').first.attributes['src'].value
30
- end
31
-
32
- unless snippet_node.css('.current-rating').empty?
33
- stars_style = snippet_node.css('.current-rating').first.attributes['style'].value
34
- stars_width_percent = stars_style[/width:\s+([0-9.]+)%/, 1].to_f
35
- result[:stars] = (MAX_STARS * stars_width_percent/PERCENT_DENOM).round(1).to_s
36
- else
37
- result[:stars] = nil
38
- end
39
-
40
- result[:title] = details_node.css('.title').first.attributes['title'].to_s
41
-
42
- if (price_elem = details_node.css('.buy span').first)
43
- result[:price_usd] = price_elem.text
44
- end
45
-
46
- result[:developer] = details_node.css('.subtitle').first.attributes['title'].to_s
47
- result[:market_id] = details_node.css('.title').first.attributes['href'].to_s.gsub('/store/apps/details?id=', '').gsub(/&feature=.*$/, '')
48
- result[:market_url] = "https://play.google.com/store/apps/details?id=#{result[:market_id]}&hl=en"
49
-
50
- result[:price_usd] = '$0.00' if result[:price_usd] == 'Install'
51
-
52
- results << result
53
- end
54
-
55
- results
56
- end
57
-
58
- def self.parse_editors_choice_page(html)
59
- results = []
60
-
61
- doc = Nokogiri::HTML(html)
62
-
63
- doc.css('.fsg-snippet').each do |snippet_node|
64
- result = {}
65
-
66
- result[:title] = snippet_node.css('.title').text
67
- result[:price_usd] = nil
68
- result[:developer] = snippet_node.css('.attribution').text
69
- result[:market_id] = snippet_node.attributes['data-docid'].text
70
- result[:market_url] = "https://play.google.com/store/apps/details?id=#{result[:market_id]}&hl=en"
71
-
72
- results << result
73
- end
74
-
75
- results
76
- end
77
-
78
- # This is the initializer method for the Leaderboard class.
79
- #
80
- # Leaderboard gets initialized by default with a specified identifier, an optional app category, along with optional
81
- # request options.
82
- #
83
- # * *Args* :
84
- # - +identifier+ -> The identifier is used to get the results for distinct leaderboards.
85
- # Valid identifiers include:
86
- # :topselling_paid
87
- # :topselling_free
88
- # :topselling_new_free
89
- # :topselling_new_paid
90
- # :editors_choice
91
- # :topselling_paid_game
92
- # :movers_shakers
93
- # :featured
94
- # :tablet_featured
95
- # :topgrossing
96
- # - +category+ -> The category switches between the actual categories, or genres, of apps within a given leaderboard.
97
- # Valid categories include:
98
- # :game
99
- # :arcade
100
- # :brain
101
- # :cards
102
- # :casual
103
- # :game_wallpaper
104
- # :racing
105
- # :sports_games
106
- # :game_widgets
107
- # :application
108
- # :books_and_reference
109
- # :business
110
- # :comics
111
- # :communication
112
- # :education
113
- # :entertainment
114
- # :finance
115
- # :health_and_fitness
116
- # :libraries_and_demo
117
- # :lifestyle
118
- # :app_wallpaper
119
- # :media_and_video
120
- # :medical
121
- # :music_and_audio
122
- # :news_and_magazines
123
- # :personalization
124
- # :photography
125
- # :productivity
126
- # :shopping
127
- # :social
128
- # :sports
129
- # :tools
130
- # :transportation
131
- # :travel_and_local
132
- # :weather
133
- # :app_widgets
134
- # - +options+ -> The optional options Hash contains keys :hydra and :request_opts. :hydra can be used to specify
135
- # a custom Hydra instance, while :request_opts is a Hash containing further options for the Play
136
- # Store HTTP request.
137
- #
138
- def initialize(identifier, category=nil, options={})
139
- @identifier = identifier
140
- @category = category
141
- @hydra = options[:hydra] || MarketBot.hydra
142
- @parsed_results = []
143
- @pending_pages = []
144
- @request_opts = options[:request_opts] || {}
145
- @request_opts[:timeout] ||= MarketBot.timeout
146
- @request_opts[:connecttimeout] ||= MarketBot.connecttimeout
147
- end
148
-
149
- def market_urls(options={})
150
- results = []
151
-
152
- min_page = options[:min_page] || 1
153
- max_page = options[:max_page] || 25
154
- country = options[:country] || 'us'
155
-
156
- (min_page..max_page).each do |page|
157
- start_val = (page - 1) * 24
158
-
159
- url = 'https://play.google.com/store/apps'
160
- url << "/category/#{category.to_s.upcase}" if category
161
- url << "/collection/#{identifier.to_s}?"
162
- url << "start=#{start_val}&"
163
- url << "gl=#{country}&"
164
- url << "num=24&hl=en"
165
-
166
- results << url
167
- end
168
-
169
- results
170
- end
171
-
172
- def enqueue_update(options={},&block)
173
- @callback = block
174
- if @identifier.to_s.downcase == 'editors_choice' && category == nil
175
- url = 'https://play.google.com/store/apps/collection/editors_choice?&hl=en'
176
- process_page(url, 1)
177
- else
178
- min_rank = options[:min_rank] || 1
179
- max_rank = options[:max_rank] || 500
180
- country = options[:country] || 'us'
181
-
182
- min_page = rank_to_page(min_rank)
183
- max_page = rank_to_page(max_rank)
184
-
185
- @parsed_results = []
186
-
187
- urls = market_urls(:min_page => min_page, :max_page => max_page, :country => country)
188
- urls.each_index{ |i| process_page(urls[i], i+1) }
189
- end
190
-
191
- self
192
- end
193
-
194
- def update(options={})
195
- enqueue_update(options)
196
- @hydra.run
197
-
198
- self
199
- end
200
-
201
- def rank_to_page(rank)
202
- ((rank - 1) / 24) + 1
203
- end
204
-
205
- def results
206
- raise 'Results do not exist yet.' unless @parsed_results
207
- @parsed_results.reject{ |page| page.nil? || page.empty? }.flatten
208
- end
209
-
210
- private
211
- def process_page(url, page_num)
212
- @pending_pages << page_num
213
- request = Typhoeus::Request.new(url, @request_opts)
214
- request.on_complete do |response|
215
- # HACK: Typhoeus <= 0.4.2 returns a response, 0.5.0pre returns the request.
216
- response = response.response if response.is_a?(Typhoeus::Request)
217
-
218
- result = Leaderboard.parse(response.body)
219
- update_callback(result, page_num)
220
- end
221
- @hydra.queue(request)
222
- end
223
-
224
- def update_callback(result, page)
225
- @parsed_results[page] = result
226
- @pending_pages.delete(page)
227
- @callback.call(self) if @callback and @pending_pages.empty?
228
- end
229
- end
230
-
231
- end
232
- end
@@ -1,59 +0,0 @@
1
- module MarketBot
2
- module Android
3
-
4
- class Leaderboard
5
- IDENTIFIERS = [
6
- :editors_choice,
7
- :featured,
8
- :movers_shakers,
9
- :tablet_featured,
10
- :topgrossing,
11
- :topselling_free,
12
- :topselling_new_free,
13
- :topselling_new_paid,
14
- :topselling_paid,
15
- :topselling_paid_game
16
- ]
17
-
18
- CATEGORIES = [
19
- :application,
20
- :app_wallpaper,
21
- :app_widgets,
22
- :arcade,
23
- :books_and_reference,
24
- :brain,
25
- :business,
26
- :cards,
27
- :casual,
28
- :comics,
29
- :communication,
30
- :education,
31
- :entertainment,
32
- :finance,
33
- :game,
34
- :game_wallpaper,
35
- :game_widgets,
36
- :health_and_fitness,
37
- :libraries_and_demo,
38
- :lifestyle,
39
- :media_and_video,
40
- :medical,
41
- :music_and_audio,
42
- :news_and_magazines,
43
- :personalization,
44
- :photography,
45
- :productivity,
46
- :racing,
47
- :shopping,
48
- :social,
49
- :sports,
50
- :sports_games,
51
- :tools,
52
- :transportation,
53
- :travel_and_local,
54
- :weather
55
- ]
56
- end
57
-
58
- end
59
- end
@@ -1,35 +0,0 @@
1
- module MarketBot
2
- module Android
3
-
4
- # Search query pages are extremely similar to leaderboard pages.
5
- # Amazingly, this inheritence hack works!
6
- class SearchQuery < MarketBot::Android::Leaderboard
7
- def initialize(query, options={})
8
- super(query, nil, options)
9
- end
10
-
11
- def market_urls(options={})
12
- results = []
13
-
14
- min_page = options[:min_page] || 1
15
- max_page = options[:max_page] || 25
16
- country = options[:country] || 'us'
17
-
18
- (min_page..max_page).each do |page|
19
- start_val = (page - 1) * 24
20
-
21
- url = "https://play.google.com/store/search?"
22
- url << "q=#{URI.escape(identifier)}&"
23
- url << "c=apps&start=#{start_val}&"
24
- url << "gl=#{country}&"
25
- url << "num=24&hl=en"
26
-
27
- results << url
28
- end
29
-
30
- results
31
- end
32
- end
33
-
34
- end
35
- end
@@ -1,167 +0,0 @@
1
- module MarketBot
2
- module Movie
3
-
4
- class Leaderboard
5
- attr_reader :identifier, :category
6
- attr_reader :hydra
7
-
8
- MAX_STARS = 5
9
- PERCENT_DENOM = 100
10
-
11
- def self.parse(html)
12
- results = []
13
- doc = Nokogiri::HTML(html)
14
-
15
- doc.css('.card-list').each do |snippet_node|
16
- result = {}
17
-
18
- details_node = snippet_node.css('.details')
19
-
20
- unless snippet_node.css('.current-rating').empty?
21
- stars_style = snippet_node.css('.current-rating').first.attributes['style'].value
22
- stars_width_percent = stars_style[/width:\s+([0-9.]+)%/, 1].to_f
23
- result[:stars] = (MAX_STARS * stars_width_percent/PERCENT_DENOM).round(1).to_s
24
- else
25
- result[:stars] = nil
26
- end
27
-
28
- result[:title] = details_node.css('.title').first.attributes['title'].to_s
29
-
30
- if (price_elem = details_node.css('.buy span').first)
31
- result[:price_usd] = price_elem.text
32
- end
33
-
34
- result[:genre] = details_node.css('.subtitle').first.attributes['title'].to_s
35
- movie_detail_url = details_node.css('.title').first.attributes['href'].to_s.gsub('/store/movies/details', '')
36
- result[:market_id] = movie_detail_url.split('?id=').last
37
- result[:market_url] = "https://play.google.com/store/movies/details#{movie_detail_url}&hl=en"
38
-
39
- result[:price_usd] = '$0.00' if result[:price_usd] == 'Install'
40
-
41
- results << result
42
- end
43
-
44
- results
45
- end
46
-
47
- # This is the initializer method for the Leaderboard class.
48
- #
49
- # Leaderboard gets initialized by default with a specified identifier, an optional movies category, along with optional
50
- # request options.
51
- #
52
- # * *Args* :
53
- # - +identifier+ -> The identifier is used to get the results for distinct leaderboards.
54
- # Valid identifiers include:
55
- # :topselling_paid
56
- # :topselling_paid_show
57
- # - +category+ -> The category switches between the actual categories, or genres, of movies within a given leaderboard.
58
- # Valid categories include:
59
- #
60
- # :action_and_adventure,
61
- # :animation,
62
- # :classics,
63
- # :comedy,
64
- # :crime,
65
- # :documentary,
66
- # :drama,
67
- # :family,
68
- # :horror,
69
- # :independent,
70
- # :indian_cinema,
71
- # :music,
72
- # :sci_fi_and_fantasy,
73
- # :short_films,
74
- # :sports,
75
- # :world_cinema
76
- ## - +options+ -> The optional options Hash contains keys :hydra and :request_opts. :hydra can be used to specify
77
- # a custom Hydra instance, while :request_opts is a Hash containing further options for the Play
78
- # Store HTTP request.
79
- #
80
- def initialize(identifier, category=nil, options={})
81
- @identifier = identifier
82
- @category = category
83
- @hydra = options[:hydra] || MarketBot.hydra
84
- @request_opts = options[:request_opts] || {}
85
- @parsed_results = []
86
- @pending_pages = []
87
- end
88
-
89
- def market_urls(options={})
90
- results = []
91
-
92
- min_page = options[:min_page] || 1
93
- max_page = options[:max_page] || 25
94
- country = options[:country] || 'us'
95
-
96
- (min_page..max_page).each do |page|
97
- start_val = (page - 1) * 24
98
-
99
- url = 'https://play.google.com/store/movies'
100
- url << "/category/#{category.to_s.upcase}" if category
101
- url << "/collection/#{identifier.to_s}?"
102
- url << "start=#{start_val}&"
103
- url << "gl=#{country}&"
104
- url << "num=24&hl=en"
105
-
106
- results << url
107
- end
108
-
109
- results
110
- end
111
-
112
- def enqueue_update(options={},&block)
113
- @callback = block
114
- min_rank = options[:min_rank] || 1
115
- max_rank = options[:max_rank] || 500
116
- country = options[:country] || 'us'
117
-
118
- min_page = rank_to_page(min_rank)
119
- max_page = rank_to_page(max_rank)
120
-
121
- @parsed_results = []
122
-
123
- urls = market_urls(:min_page => min_page, :max_page => max_page, :country => country)
124
- urls.each_index{ |i| process_page(urls[i], i+1) }
125
-
126
- self
127
- end
128
-
129
- def update(options={})
130
- enqueue_update(options)
131
- @hydra.run
132
-
133
- self
134
- end
135
-
136
- def rank_to_page(rank)
137
- ((rank - 1) / 24) + 1
138
- end
139
-
140
- def results
141
- raise 'Results do not exist yet.' unless @parsed_results
142
- @parsed_results.reject{ |page| page.nil? || page.empty? }.flatten
143
- end
144
-
145
- private
146
- def process_page(url, page_num)
147
- @pending_pages << page_num
148
- request = Typhoeus::Request.new(url, @request_opts)
149
- request.on_complete do |response|
150
- # HACK: Typhoeus <= 0.4.2 returns a response, 0.5.0pre returns the request.
151
- response = response.response if response.is_a?(Typhoeus::Request)
152
-
153
- result = Leaderboard.parse(response.body)
154
- update_callback(result, page_num)
155
- end
156
- @hydra.queue(request)
157
- end
158
-
159
- def update_callback(result, page)
160
- @parsed_results[page] = result
161
- @pending_pages.delete(page)
162
- @callback.call(self) if @callback and @pending_pages.empty?
163
- end
164
- end
165
-
166
- end
167
- end