market_bot 0.17.0 → 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (63) hide show
  1. checksums.yaml +4 -4
  2. data/.travis.yml +1 -1
  3. data/Guardfile +70 -0
  4. data/README.markdown +45 -95
  5. data/Rakefile +0 -89
  6. data/bin/app_categories +17 -0
  7. data/bin/benchmark_parser_app +24 -0
  8. data/bin/console +14 -0
  9. data/bin/setup +7 -0
  10. data/bin/update_test_data +30 -0
  11. data/lib/market_bot.rb +20 -15
  12. data/lib/market_bot/exceptions.rb +2 -1
  13. data/lib/market_bot/play/app.rb +188 -0
  14. data/lib/market_bot/play/app/constants.rb +33 -0
  15. data/lib/market_bot/play/chart.rb +118 -0
  16. data/lib/market_bot/play/chart/constants.rb +74 -0
  17. data/lib/market_bot/play/constants.rb +7 -0
  18. data/lib/market_bot/play/developer.rb +32 -0
  19. data/lib/market_bot/util.rb +17 -0
  20. data/lib/market_bot/version.rb +1 -1
  21. data/market_bot.gemspec +6 -3
  22. data/spec/market_bot/play/app_spec.rb +201 -0
  23. data/spec/market_bot/play/chart_spec.rb +126 -0
  24. data/spec/market_bot/play/data/app-com.bluefroggaming.popdat.txt +99 -0
  25. data/spec/market_bot/play/data/app-com.mg.android.txt +103 -0
  26. data/spec/market_bot/play/data/chart-topselling_paid-GAME_ARCADE-0.txt +97 -0
  27. data/spec/market_bot/play/data/chart-topselling_paid-GAME_ARCADE-1.txt +97 -0
  28. data/spec/market_bot/play/data/chart-topselling_paid-GAME_ARCADE-2.txt +97 -0
  29. data/spec/market_bot/play/data/chart-topselling_paid-GAME_ARCADE-3.txt +97 -0
  30. data/spec/market_bot/play/data/chart-topselling_paid-GAME_ARCADE-4.txt +97 -0
  31. data/spec/market_bot/play/data/chart-topselling_paid-GAME_ARCADE-5.txt +97 -0
  32. data/spec/market_bot/play/data/chart-topselling_paid-GAME_ARCADE-6.txt +97 -0
  33. data/spec/market_bot/play/data/chart-topselling_paid-GAME_ARCADE-7.txt +97 -0
  34. data/spec/market_bot/play/data/developer-zynga.txt +97 -0
  35. data/spec/market_bot/play/developer_spec.rb +52 -0
  36. data/spec/market_bot_spec.rb +14 -0
  37. data/spec/spec_helper.rb +5 -11
  38. metadata +57 -52
  39. data/NOTES.txt +0 -61
  40. data/lib/market_bot/android/app.rb +0 -273
  41. data/lib/market_bot/android/developer.rb +0 -32
  42. data/lib/market_bot/android/leaderboard.rb +0 -232
  43. data/lib/market_bot/android/leaderboard/constants.rb +0 -59
  44. data/lib/market_bot/android/search_query.rb +0 -35
  45. data/lib/market_bot/movie/leaderboard.rb +0 -167
  46. data/lib/market_bot/movie/leaderboard/constants.rb +0 -30
  47. data/lib/market_bot/movie/search_query.rb +0 -32
  48. data/spec/market_bot/android/app_spec.rb +0 -253
  49. data/spec/market_bot/android/data/app_1.txt +0 -99
  50. data/spec/market_bot/android/data/app_2.txt +0 -100
  51. data/spec/market_bot/android/data/app_3.txt +0 -103
  52. data/spec/market_bot/android/data/app_4.txt +0 -117
  53. data/spec/market_bot/android/data/developer-zynga.txt +0 -97
  54. data/spec/market_bot/android/data/leaderboard-apps_editors_choice.txt +0 -97
  55. data/spec/market_bot/android/data/leaderboard-apps_topselling_paid-page1.txt +0 -97
  56. data/spec/market_bot/android/data/leaderboard-apps_topselling_paid-page2.txt +0 -97
  57. data/spec/market_bot/android/data/leaderboard-apps_topselling_paid-page3.txt +0 -97
  58. data/spec/market_bot/android/data/leaderboard-apps_topselling_paid-page4.txt +0 -97
  59. data/spec/market_bot/android/developer_spec.rb +0 -57
  60. data/spec/market_bot/android/leaderboard_spec.rb +0 -140
  61. data/spec/market_bot/android/search_query_spec.rb +0 -6
  62. data/spec/market_bot/movie/data/leaderboard-movies_comedy_topselling_paid.txt +0 -327
  63. data/spec/market_bot/movie/leaderboard_spec.rb +0 -67
@@ -1,32 +0,0 @@
1
- module MarketBot
2
- module Android
3
-
4
- # Developer pages are extremely similar to leaderboard pages.
5
- # Amazingly, this inheritence hack works!
6
- #
7
- # BUG: This code only retrieves the first page of results.
8
- # This means you will only get the first 24 apps for a developer.
9
- # Some developers have hundreds of apps so this needs fixed!!!
10
- class Developer < MarketBot::Android::Leaderboard
11
- def initialize(developer, options={})
12
- super(developer, nil, options)
13
- end
14
-
15
- def market_urls(options={})
16
- results = []
17
-
18
- country = options[:country] || 'us'
19
-
20
- url = "https://play.google.com/store/apps/developer?"
21
- url << "id=#{URI.escape(identifier)}&"
22
- url << "gl=#{country}&"
23
- url << "hl=en"
24
-
25
- results << url
26
-
27
- return results
28
- end
29
- end
30
-
31
- end
32
- end
@@ -1,232 +0,0 @@
1
- module MarketBot
2
- module Android
3
-
4
- class Leaderboard
5
- attr_reader :identifier, :category
6
- attr_reader :hydra
7
-
8
- MAX_STARS = 5
9
- PERCENT_DENOM = 100
10
-
11
- def self.parse(html)
12
- if html.include?('<title>Editor&#39;s Choice')
13
- parse_editors_choice_page(html)
14
- else
15
- parse_normal_page(html)
16
- end
17
- end
18
-
19
- def self.parse_normal_page(html)
20
- results = []
21
- doc = Nokogiri::HTML(html)
22
-
23
- doc.css('.card').each do |snippet_node|
24
- result = {}
25
-
26
- details_node = snippet_node.css('.details')
27
-
28
- unless snippet_node.css('img').empty?
29
- result[:icon_url] = snippet_node.css('img').first.attributes['src'].value
30
- end
31
-
32
- unless snippet_node.css('.current-rating').empty?
33
- stars_style = snippet_node.css('.current-rating').first.attributes['style'].value
34
- stars_width_percent = stars_style[/width:\s+([0-9.]+)%/, 1].to_f
35
- result[:stars] = (MAX_STARS * stars_width_percent/PERCENT_DENOM).round(1).to_s
36
- else
37
- result[:stars] = nil
38
- end
39
-
40
- result[:title] = details_node.css('.title').first.attributes['title'].to_s
41
-
42
- if (price_elem = details_node.css('.buy span').first)
43
- result[:price_usd] = price_elem.text
44
- end
45
-
46
- result[:developer] = details_node.css('.subtitle').first.attributes['title'].to_s
47
- result[:market_id] = details_node.css('.title').first.attributes['href'].to_s.gsub('/store/apps/details?id=', '').gsub(/&feature=.*$/, '')
48
- result[:market_url] = "https://play.google.com/store/apps/details?id=#{result[:market_id]}&hl=en"
49
-
50
- result[:price_usd] = '$0.00' if result[:price_usd] == 'Install'
51
-
52
- results << result
53
- end
54
-
55
- results
56
- end
57
-
58
- def self.parse_editors_choice_page(html)
59
- results = []
60
-
61
- doc = Nokogiri::HTML(html)
62
-
63
- doc.css('.fsg-snippet').each do |snippet_node|
64
- result = {}
65
-
66
- result[:title] = snippet_node.css('.title').text
67
- result[:price_usd] = nil
68
- result[:developer] = snippet_node.css('.attribution').text
69
- result[:market_id] = snippet_node.attributes['data-docid'].text
70
- result[:market_url] = "https://play.google.com/store/apps/details?id=#{result[:market_id]}&hl=en"
71
-
72
- results << result
73
- end
74
-
75
- results
76
- end
77
-
78
- # This is the initializer method for the Leaderboard class.
79
- #
80
- # Leaderboard gets initialized by default with a specified identifier, an optional app category, along with optional
81
- # request options.
82
- #
83
- # * *Args* :
84
- # - +identifier+ -> The identifier is used to get the results for distinct leaderboards.
85
- # Valid identifiers include:
86
- # :topselling_paid
87
- # :topselling_free
88
- # :topselling_new_free
89
- # :topselling_new_paid
90
- # :editors_choice
91
- # :topselling_paid_game
92
- # :movers_shakers
93
- # :featured
94
- # :tablet_featured
95
- # :topgrossing
96
- # - +category+ -> The category switches between the actual categories, or genres, of apps within a given leaderboard.
97
- # Valid categories include:
98
- # :game
99
- # :arcade
100
- # :brain
101
- # :cards
102
- # :casual
103
- # :game_wallpaper
104
- # :racing
105
- # :sports_games
106
- # :game_widgets
107
- # :application
108
- # :books_and_reference
109
- # :business
110
- # :comics
111
- # :communication
112
- # :education
113
- # :entertainment
114
- # :finance
115
- # :health_and_fitness
116
- # :libraries_and_demo
117
- # :lifestyle
118
- # :app_wallpaper
119
- # :media_and_video
120
- # :medical
121
- # :music_and_audio
122
- # :news_and_magazines
123
- # :personalization
124
- # :photography
125
- # :productivity
126
- # :shopping
127
- # :social
128
- # :sports
129
- # :tools
130
- # :transportation
131
- # :travel_and_local
132
- # :weather
133
- # :app_widgets
134
- # - +options+ -> The optional options Hash contains keys :hydra and :request_opts. :hydra can be used to specify
135
- # a custom Hydra instance, while :request_opts is a Hash containing further options for the Play
136
- # Store HTTP request.
137
- #
138
- def initialize(identifier, category=nil, options={})
139
- @identifier = identifier
140
- @category = category
141
- @hydra = options[:hydra] || MarketBot.hydra
142
- @parsed_results = []
143
- @pending_pages = []
144
- @request_opts = options[:request_opts] || {}
145
- @request_opts[:timeout] ||= MarketBot.timeout
146
- @request_opts[:connecttimeout] ||= MarketBot.connecttimeout
147
- end
148
-
149
- def market_urls(options={})
150
- results = []
151
-
152
- min_page = options[:min_page] || 1
153
- max_page = options[:max_page] || 25
154
- country = options[:country] || 'us'
155
-
156
- (min_page..max_page).each do |page|
157
- start_val = (page - 1) * 24
158
-
159
- url = 'https://play.google.com/store/apps'
160
- url << "/category/#{category.to_s.upcase}" if category
161
- url << "/collection/#{identifier.to_s}?"
162
- url << "start=#{start_val}&"
163
- url << "gl=#{country}&"
164
- url << "num=24&hl=en"
165
-
166
- results << url
167
- end
168
-
169
- results
170
- end
171
-
172
- def enqueue_update(options={},&block)
173
- @callback = block
174
- if @identifier.to_s.downcase == 'editors_choice' && category == nil
175
- url = 'https://play.google.com/store/apps/collection/editors_choice?&hl=en'
176
- process_page(url, 1)
177
- else
178
- min_rank = options[:min_rank] || 1
179
- max_rank = options[:max_rank] || 500
180
- country = options[:country] || 'us'
181
-
182
- min_page = rank_to_page(min_rank)
183
- max_page = rank_to_page(max_rank)
184
-
185
- @parsed_results = []
186
-
187
- urls = market_urls(:min_page => min_page, :max_page => max_page, :country => country)
188
- urls.each_index{ |i| process_page(urls[i], i+1) }
189
- end
190
-
191
- self
192
- end
193
-
194
- def update(options={})
195
- enqueue_update(options)
196
- @hydra.run
197
-
198
- self
199
- end
200
-
201
- def rank_to_page(rank)
202
- ((rank - 1) / 24) + 1
203
- end
204
-
205
- def results
206
- raise 'Results do not exist yet.' unless @parsed_results
207
- @parsed_results.reject{ |page| page.nil? || page.empty? }.flatten
208
- end
209
-
210
- private
211
- def process_page(url, page_num)
212
- @pending_pages << page_num
213
- request = Typhoeus::Request.new(url, @request_opts)
214
- request.on_complete do |response|
215
- # HACK: Typhoeus <= 0.4.2 returns a response, 0.5.0pre returns the request.
216
- response = response.response if response.is_a?(Typhoeus::Request)
217
-
218
- result = Leaderboard.parse(response.body)
219
- update_callback(result, page_num)
220
- end
221
- @hydra.queue(request)
222
- end
223
-
224
- def update_callback(result, page)
225
- @parsed_results[page] = result
226
- @pending_pages.delete(page)
227
- @callback.call(self) if @callback and @pending_pages.empty?
228
- end
229
- end
230
-
231
- end
232
- end
@@ -1,59 +0,0 @@
1
- module MarketBot
2
- module Android
3
-
4
- class Leaderboard
5
- IDENTIFIERS = [
6
- :editors_choice,
7
- :featured,
8
- :movers_shakers,
9
- :tablet_featured,
10
- :topgrossing,
11
- :topselling_free,
12
- :topselling_new_free,
13
- :topselling_new_paid,
14
- :topselling_paid,
15
- :topselling_paid_game
16
- ]
17
-
18
- CATEGORIES = [
19
- :application,
20
- :app_wallpaper,
21
- :app_widgets,
22
- :arcade,
23
- :books_and_reference,
24
- :brain,
25
- :business,
26
- :cards,
27
- :casual,
28
- :comics,
29
- :communication,
30
- :education,
31
- :entertainment,
32
- :finance,
33
- :game,
34
- :game_wallpaper,
35
- :game_widgets,
36
- :health_and_fitness,
37
- :libraries_and_demo,
38
- :lifestyle,
39
- :media_and_video,
40
- :medical,
41
- :music_and_audio,
42
- :news_and_magazines,
43
- :personalization,
44
- :photography,
45
- :productivity,
46
- :racing,
47
- :shopping,
48
- :social,
49
- :sports,
50
- :sports_games,
51
- :tools,
52
- :transportation,
53
- :travel_and_local,
54
- :weather
55
- ]
56
- end
57
-
58
- end
59
- end
@@ -1,35 +0,0 @@
1
- module MarketBot
2
- module Android
3
-
4
- # Search query pages are extremely similar to leaderboard pages.
5
- # Amazingly, this inheritence hack works!
6
- class SearchQuery < MarketBot::Android::Leaderboard
7
- def initialize(query, options={})
8
- super(query, nil, options)
9
- end
10
-
11
- def market_urls(options={})
12
- results = []
13
-
14
- min_page = options[:min_page] || 1
15
- max_page = options[:max_page] || 25
16
- country = options[:country] || 'us'
17
-
18
- (min_page..max_page).each do |page|
19
- start_val = (page - 1) * 24
20
-
21
- url = "https://play.google.com/store/search?"
22
- url << "q=#{URI.escape(identifier)}&"
23
- url << "c=apps&start=#{start_val}&"
24
- url << "gl=#{country}&"
25
- url << "num=24&hl=en"
26
-
27
- results << url
28
- end
29
-
30
- results
31
- end
32
- end
33
-
34
- end
35
- end
@@ -1,167 +0,0 @@
1
- module MarketBot
2
- module Movie
3
-
4
- class Leaderboard
5
- attr_reader :identifier, :category
6
- attr_reader :hydra
7
-
8
- MAX_STARS = 5
9
- PERCENT_DENOM = 100
10
-
11
- def self.parse(html)
12
- results = []
13
- doc = Nokogiri::HTML(html)
14
-
15
- doc.css('.card-list').each do |snippet_node|
16
- result = {}
17
-
18
- details_node = snippet_node.css('.details')
19
-
20
- unless snippet_node.css('.current-rating').empty?
21
- stars_style = snippet_node.css('.current-rating').first.attributes['style'].value
22
- stars_width_percent = stars_style[/width:\s+([0-9.]+)%/, 1].to_f
23
- result[:stars] = (MAX_STARS * stars_width_percent/PERCENT_DENOM).round(1).to_s
24
- else
25
- result[:stars] = nil
26
- end
27
-
28
- result[:title] = details_node.css('.title').first.attributes['title'].to_s
29
-
30
- if (price_elem = details_node.css('.buy span').first)
31
- result[:price_usd] = price_elem.text
32
- end
33
-
34
- result[:genre] = details_node.css('.subtitle').first.attributes['title'].to_s
35
- movie_detail_url = details_node.css('.title').first.attributes['href'].to_s.gsub('/store/movies/details', '')
36
- result[:market_id] = movie_detail_url.split('?id=').last
37
- result[:market_url] = "https://play.google.com/store/movies/details#{movie_detail_url}&hl=en"
38
-
39
- result[:price_usd] = '$0.00' if result[:price_usd] == 'Install'
40
-
41
- results << result
42
- end
43
-
44
- results
45
- end
46
-
47
- # This is the initializer method for the Leaderboard class.
48
- #
49
- # Leaderboard gets initialized by default with a specified identifier, an optional movies category, along with optional
50
- # request options.
51
- #
52
- # * *Args* :
53
- # - +identifier+ -> The identifier is used to get the results for distinct leaderboards.
54
- # Valid identifiers include:
55
- # :topselling_paid
56
- # :topselling_paid_show
57
- # - +category+ -> The category switches between the actual categories, or genres, of movies within a given leaderboard.
58
- # Valid categories include:
59
- #
60
- # :action_and_adventure,
61
- # :animation,
62
- # :classics,
63
- # :comedy,
64
- # :crime,
65
- # :documentary,
66
- # :drama,
67
- # :family,
68
- # :horror,
69
- # :independent,
70
- # :indian_cinema,
71
- # :music,
72
- # :sci_fi_and_fantasy,
73
- # :short_films,
74
- # :sports,
75
- # :world_cinema
76
- ## - +options+ -> The optional options Hash contains keys :hydra and :request_opts. :hydra can be used to specify
77
- # a custom Hydra instance, while :request_opts is a Hash containing further options for the Play
78
- # Store HTTP request.
79
- #
80
- def initialize(identifier, category=nil, options={})
81
- @identifier = identifier
82
- @category = category
83
- @hydra = options[:hydra] || MarketBot.hydra
84
- @request_opts = options[:request_opts] || {}
85
- @parsed_results = []
86
- @pending_pages = []
87
- end
88
-
89
- def market_urls(options={})
90
- results = []
91
-
92
- min_page = options[:min_page] || 1
93
- max_page = options[:max_page] || 25
94
- country = options[:country] || 'us'
95
-
96
- (min_page..max_page).each do |page|
97
- start_val = (page - 1) * 24
98
-
99
- url = 'https://play.google.com/store/movies'
100
- url << "/category/#{category.to_s.upcase}" if category
101
- url << "/collection/#{identifier.to_s}?"
102
- url << "start=#{start_val}&"
103
- url << "gl=#{country}&"
104
- url << "num=24&hl=en"
105
-
106
- results << url
107
- end
108
-
109
- results
110
- end
111
-
112
- def enqueue_update(options={},&block)
113
- @callback = block
114
- min_rank = options[:min_rank] || 1
115
- max_rank = options[:max_rank] || 500
116
- country = options[:country] || 'us'
117
-
118
- min_page = rank_to_page(min_rank)
119
- max_page = rank_to_page(max_rank)
120
-
121
- @parsed_results = []
122
-
123
- urls = market_urls(:min_page => min_page, :max_page => max_page, :country => country)
124
- urls.each_index{ |i| process_page(urls[i], i+1) }
125
-
126
- self
127
- end
128
-
129
- def update(options={})
130
- enqueue_update(options)
131
- @hydra.run
132
-
133
- self
134
- end
135
-
136
- def rank_to_page(rank)
137
- ((rank - 1) / 24) + 1
138
- end
139
-
140
- def results
141
- raise 'Results do not exist yet.' unless @parsed_results
142
- @parsed_results.reject{ |page| page.nil? || page.empty? }.flatten
143
- end
144
-
145
- private
146
- def process_page(url, page_num)
147
- @pending_pages << page_num
148
- request = Typhoeus::Request.new(url, @request_opts)
149
- request.on_complete do |response|
150
- # HACK: Typhoeus <= 0.4.2 returns a response, 0.5.0pre returns the request.
151
- response = response.response if response.is_a?(Typhoeus::Request)
152
-
153
- result = Leaderboard.parse(response.body)
154
- update_callback(result, page_num)
155
- end
156
- @hydra.queue(request)
157
- end
158
-
159
- def update_callback(result, page)
160
- @parsed_results[page] = result
161
- @pending_pages.delete(page)
162
- @callback.call(self) if @callback and @pending_pages.empty?
163
- end
164
- end
165
-
166
- end
167
- end