market_bot 0.17.0 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. checksums.yaml +4 -4
  2. data/.travis.yml +1 -1
  3. data/Guardfile +70 -0
  4. data/README.markdown +45 -95
  5. data/Rakefile +0 -89
  6. data/bin/app_categories +17 -0
  7. data/bin/benchmark_parser_app +24 -0
  8. data/bin/console +14 -0
  9. data/bin/setup +7 -0
  10. data/bin/update_test_data +30 -0
  11. data/lib/market_bot.rb +20 -15
  12. data/lib/market_bot/exceptions.rb +2 -1
  13. data/lib/market_bot/play/app.rb +188 -0
  14. data/lib/market_bot/play/app/constants.rb +33 -0
  15. data/lib/market_bot/play/chart.rb +118 -0
  16. data/lib/market_bot/play/chart/constants.rb +74 -0
  17. data/lib/market_bot/play/constants.rb +7 -0
  18. data/lib/market_bot/play/developer.rb +32 -0
  19. data/lib/market_bot/util.rb +17 -0
  20. data/lib/market_bot/version.rb +1 -1
  21. data/market_bot.gemspec +6 -3
  22. data/spec/market_bot/play/app_spec.rb +201 -0
  23. data/spec/market_bot/play/chart_spec.rb +126 -0
  24. data/spec/market_bot/play/data/app-com.bluefroggaming.popdat.txt +99 -0
  25. data/spec/market_bot/play/data/app-com.mg.android.txt +103 -0
  26. data/spec/market_bot/play/data/chart-topselling_paid-GAME_ARCADE-0.txt +97 -0
  27. data/spec/market_bot/play/data/chart-topselling_paid-GAME_ARCADE-1.txt +97 -0
  28. data/spec/market_bot/play/data/chart-topselling_paid-GAME_ARCADE-2.txt +97 -0
  29. data/spec/market_bot/play/data/chart-topselling_paid-GAME_ARCADE-3.txt +97 -0
  30. data/spec/market_bot/play/data/chart-topselling_paid-GAME_ARCADE-4.txt +97 -0
  31. data/spec/market_bot/play/data/chart-topselling_paid-GAME_ARCADE-5.txt +97 -0
  32. data/spec/market_bot/play/data/chart-topselling_paid-GAME_ARCADE-6.txt +97 -0
  33. data/spec/market_bot/play/data/chart-topselling_paid-GAME_ARCADE-7.txt +97 -0
  34. data/spec/market_bot/play/data/developer-zynga.txt +97 -0
  35. data/spec/market_bot/play/developer_spec.rb +52 -0
  36. data/spec/market_bot_spec.rb +14 -0
  37. data/spec/spec_helper.rb +5 -11
  38. metadata +57 -52
  39. data/NOTES.txt +0 -61
  40. data/lib/market_bot/android/app.rb +0 -273
  41. data/lib/market_bot/android/developer.rb +0 -32
  42. data/lib/market_bot/android/leaderboard.rb +0 -232
  43. data/lib/market_bot/android/leaderboard/constants.rb +0 -59
  44. data/lib/market_bot/android/search_query.rb +0 -35
  45. data/lib/market_bot/movie/leaderboard.rb +0 -167
  46. data/lib/market_bot/movie/leaderboard/constants.rb +0 -30
  47. data/lib/market_bot/movie/search_query.rb +0 -32
  48. data/spec/market_bot/android/app_spec.rb +0 -253
  49. data/spec/market_bot/android/data/app_1.txt +0 -99
  50. data/spec/market_bot/android/data/app_2.txt +0 -100
  51. data/spec/market_bot/android/data/app_3.txt +0 -103
  52. data/spec/market_bot/android/data/app_4.txt +0 -117
  53. data/spec/market_bot/android/data/developer-zynga.txt +0 -97
  54. data/spec/market_bot/android/data/leaderboard-apps_editors_choice.txt +0 -97
  55. data/spec/market_bot/android/data/leaderboard-apps_topselling_paid-page1.txt +0 -97
  56. data/spec/market_bot/android/data/leaderboard-apps_topselling_paid-page2.txt +0 -97
  57. data/spec/market_bot/android/data/leaderboard-apps_topselling_paid-page3.txt +0 -97
  58. data/spec/market_bot/android/data/leaderboard-apps_topselling_paid-page4.txt +0 -97
  59. data/spec/market_bot/android/developer_spec.rb +0 -57
  60. data/spec/market_bot/android/leaderboard_spec.rb +0 -140
  61. data/spec/market_bot/android/search_query_spec.rb +0 -6
  62. data/spec/market_bot/movie/data/leaderboard-movies_comedy_topselling_paid.txt +0 -327
  63. data/spec/market_bot/movie/leaderboard_spec.rb +0 -67
data/lib/market_bot.rb CHANGED
@@ -5,19 +5,15 @@ require 'nokogiri'
5
5
 
6
6
  require 'market_bot/version'
7
7
  require 'market_bot/exceptions'
8
- require 'market_bot/android/app'
9
- require 'market_bot/android/leaderboard/constants'
10
- require 'market_bot/android/leaderboard'
11
- require 'market_bot/android/search_query'
12
- require 'market_bot/android/developer'
13
- require 'market_bot/movie/leaderboard'
14
- require 'market_bot/movie/search_query'
8
+ require 'market_bot/util'
9
+ require 'market_bot/play/constants'
10
+ require 'market_bot/play/app/constants'
11
+ require 'market_bot/play/app'
12
+ require 'market_bot/play/chart/constants'
13
+ require 'market_bot/play/chart'
14
+ require 'market_bot/play/developer'
15
15
 
16
16
  module MarketBot
17
- def self.hydra
18
- @hydra ||= Typhoeus::Hydra.new(:max_concurrency => 5)
19
- end
20
-
21
17
  def self.timeout
22
18
  @timeout ||= 10
23
19
  end
@@ -26,12 +22,21 @@ module MarketBot
26
22
  @timeout = val
27
23
  end
28
24
 
29
- def self.connecttimeout
30
- @connecttimeout ||= 10
25
+ def self.connect_timeout
26
+ @connect_timeout ||= 10
27
+ end
28
+
29
+ def self.connect_timeout=(val)
30
+ @connect_timeout = val
31
+ end
32
+
33
+ def self.user_agent
34
+ @user_agent ||= "MarketBot/#{MarketBot::VERSION} / " \
35
+ "(+https://github.com/chadrem/market_bot)"
31
36
  end
32
37
 
33
- def self.connecttimeout=(val)
34
- @connecttimeout = val
38
+ def self.user_agent=(val)
39
+ @user_agent = val
35
40
  end
36
41
  end
37
42
 
@@ -1,5 +1,6 @@
1
1
  module MarketBot
2
2
  class MarketBotError < StandardError; end
3
3
  class ResponseError < MarketBotError; end
4
- class AppNotFoundError < ResponseError; end
4
+ class NotFoundError < ResponseError; end
5
+ class UnavailableError < ResponseError; end
5
6
  end
@@ -0,0 +1,188 @@
1
+ module MarketBot
2
+ module Play
3
+ class App
4
+ attr_reader *ATTRIBUTES
5
+ attr_reader :package
6
+ attr_reader :lang
7
+ attr_reader :result
8
+
9
+ def self.parse(html, opts={})
10
+ result = {}
11
+
12
+ doc = Nokogiri::HTML(html)
13
+ meta_info = doc.css('.meta-info')
14
+ meta_info.each do |info|
15
+ field_name = info.css('.title').text.strip
16
+
17
+ case field_name
18
+ when 'Updated'
19
+ result[:updated] = info.at_css('.content').text.strip
20
+ when 'Installs'
21
+ result[:installs] = info.at_css('.content').text.strip
22
+ when 'Size'
23
+ result[:size] = info.at_css('.content').text.strip
24
+ when 'Current Version'
25
+ result[:current_version] = info.at_css('.content').text.strip
26
+ when 'Requires Android'
27
+ result[:requires_android] = info.at_css('.content').text.strip
28
+ when 'Contact Developer', 'Developer'
29
+ info.css('.dev-link').each do |node|
30
+ node_href = node[:href]
31
+ if node_href =~ /^mailto:/
32
+ result[:email] = node_href.gsub(/^mailto:/,'')
33
+ else
34
+ if q_param = URI(node_href).query.split('&').select{ |p| p =~ /q=/ }.first
35
+ actual_url = q_param.gsub('q=', '')
36
+ end
37
+
38
+ result[:website_url] = actual_url
39
+ end
40
+ end
41
+
42
+ end
43
+ end
44
+
45
+ result[:content_rating] = doc.at_css("div.content[itemprop='contentRating']").text
46
+
47
+ result[:price] = doc.at_css('meta[itemprop="price"]')[:content]
48
+
49
+ category_div = doc.at_css('.category')
50
+ result[:category] = category_div.text.strip rescue nil
51
+ result[:category_url] = File.split(category_div["href"])[1]
52
+
53
+ result[:description] = doc.at_css('div[itemprop="description"]').inner_html.strip
54
+ result[:title] = doc.at_css('div.id-app-title').text
55
+
56
+ score = doc.at_css('.score-container')
57
+ unless score.nil?
58
+ node = score.at_css('.score')
59
+ result[:rating] = node.text.strip
60
+ node = score.at_css('meta[itemprop="ratingCount"]')
61
+ result[:votes] = node[:content].strip.to_i
62
+ end
63
+
64
+ node = doc.at_css('div[itemprop="author"]')
65
+ result[:developer] = node.at_css('.primary').text.strip
66
+
67
+ result[:more_from_developer] = []
68
+ result[:similar] = []
69
+
70
+ node = doc.css('.recommendation')
71
+ node.css('.rec-cluster').each do |recommended|
72
+ assoc_app_type = recommended.at_css('.heading').text.strip.eql?('Similar' ) ? :similar : :more_from_developer
73
+ recommended.css('.card').each do |card|
74
+ assoc_app = {}
75
+ assoc_app[:package] = card['data-docid'].strip
76
+
77
+ result[assoc_app_type] << assoc_app
78
+ end
79
+ end
80
+
81
+ node = doc.at_css('.cover-image')
82
+ unless node.nil?
83
+ url = MarketBot::Util.fix_content_url(node[:src])
84
+ result[:cover_image_url] = url
85
+ end
86
+
87
+ result[:screenshot_urls] = []
88
+ doc.css('.screenshot').each do |node|
89
+ result[:screenshot_urls] << MarketBot::Util.fix_content_url(node[:src])
90
+ end
91
+
92
+ result[:full_screenshot_urls] = []
93
+ doc.css('.full-screenshot').each do |node|
94
+ result[:full_screenshot_urls] << MarketBot::Util.fix_content_url(node[:src])
95
+ end
96
+
97
+ result[:reviews] = []
98
+ unless opts[:skip_reviews] # Review parsing is CPU intensive.
99
+ doc.css('.single-review').each do |node|
100
+ review = {}
101
+ review[:author] = node.at_css('.author-name').text.strip if node.at_css('.author-name')
102
+ raw_tag = node.at_css('.current-rating').to_s
103
+ if raw_tag.match(/100%;/i)
104
+ review[:score] = 5
105
+ elsif raw_tag.match(/80%;/i)
106
+ review[:score] = 4
107
+ elsif raw_tag.match(/60%;/i)
108
+ review[:score] = 3
109
+ elsif raw_tag.match(/40%;/i)
110
+ review[:score] = 2
111
+ elsif raw_tag.match(/20%;/i)
112
+ review[:score] = 1
113
+ end
114
+ if node.at_css('.review-title')
115
+ review[:title] = node.at_css('.review-title').text.strip
116
+ end
117
+ if node.at_css('.review-body')
118
+ review[:text] = node.at_css('.review-body').text
119
+ .sub!(review[:title],'')
120
+ .sub!(node.at_css('.review-link').text, '')
121
+ .strip
122
+ end
123
+ if review
124
+ result[:reviews] << review
125
+ end
126
+ end
127
+ end
128
+
129
+ result[:rating_distribution] = { 5 => nil, 4 => nil, 3 => nil, 2 => nil, 1 => nil }
130
+
131
+ histogram = doc.css('div.rating-histogram')
132
+ cur_index = 5
133
+ %w(five four three two one).each do |slot|
134
+ node = histogram.at_css(".#{slot.to_s}")
135
+ result[:rating_distribution][cur_index] = node.css('.bar-number').text.gsub(/,/,'').to_i
136
+ cur_index -= 1
137
+
138
+ end
139
+
140
+ result[:html] = html
141
+
142
+ result
143
+ end
144
+
145
+ def initialize(package, opts={})
146
+ @package = package
147
+ @lang = opts[:lang] || MarketBot::Play::DEFAULT_LANG
148
+ @request_opts = MarketBot::Util.build_request_opts(opts[:request_opts])
149
+ end
150
+
151
+ def store_url
152
+ "https://play.google.com/store/apps/details?id=#{@package}&hl=#{@lang}"
153
+ end
154
+
155
+ def update
156
+ req = Typhoeus::Request.new(store_url, @request_opts)
157
+ req.run
158
+ response_handler(req.response)
159
+
160
+ self
161
+ end
162
+
163
+ private
164
+
165
+ def response_handler(response)
166
+ if response.success?
167
+ @result = self.class.parse(response.body)
168
+
169
+ ATTRIBUTES.each do |a|
170
+ attr_name = "@#{a}"
171
+ attr_value = @result[a]
172
+ instance_variable_set(attr_name, attr_value)
173
+ end
174
+ else
175
+ codes = "code=#{response.code}, return_code=#{response.return_code}"
176
+ case response.code
177
+ when 404
178
+ raise MarketBot::NotFoundError.new("Unable to find app in store: #{codes}")
179
+ when 403
180
+ raise MarketBot::UnavailableError.new("Unavailable app (country restriction?): #{codes}")
181
+ else
182
+ raise MarketBot::ResponseError.new("Unhandled response: #{codes}")
183
+ end
184
+ end
185
+ end
186
+ end
187
+ end
188
+ end
@@ -0,0 +1,33 @@
1
+ module MarketBot
2
+ module Play
3
+ class App
4
+ ATTRIBUTES = [
5
+ :category,
6
+ :category_url,
7
+ :content_rating,
8
+ :cover_image_url,
9
+ :current_version,
10
+ :description,
11
+ :developer,
12
+ :email,
13
+ :full_screenshot_urls,
14
+ :html,
15
+ :installs,
16
+ :more_from_developer,
17
+ :price,
18
+ :rating,
19
+ :rating_distribution,
20
+ :requires_android,
21
+ :reviews,
22
+ :screenshot_urls,
23
+ :similar,
24
+ :size,
25
+ :title,
26
+ :updated,
27
+ :votes,
28
+ :website_url,
29
+ :whats_new
30
+ ]
31
+ end
32
+ end
33
+ end
@@ -0,0 +1,118 @@
1
+ module MarketBot
2
+ module Play
3
+ class Chart
4
+ attr_reader :collection
5
+ attr_reader :category
6
+ attr_reader :country
7
+ attr_reader :lang
8
+ attr_reader :result
9
+
10
+ def self.parse(html, opts={})
11
+ opts[:lang] ||= MarketBot::Play::DEFAULT_LANG
12
+
13
+ results = []
14
+ doc = Nokogiri::HTML(html)
15
+
16
+ doc.css('.card').each do |snippet_node|
17
+ result = {}
18
+
19
+ details_node = snippet_node.css('.details')
20
+
21
+ unless snippet_node.css('img').empty?
22
+ result[:icon_url] = MarketBot::Util.fix_content_url(snippet_node.css('img').first.attributes['src'].value)
23
+ end
24
+
25
+ unless snippet_node.css('.current-rating').empty?
26
+ stars_style = snippet_node.css('.current-rating').first.attributes['style'].value
27
+ stars_width_percent = stars_style[/width:\s+([0-9.]+)%/, 1].to_f
28
+ result[:stars] = (5 * stars_width_percent / 100).round(1).to_s
29
+ else
30
+ result[:stars] = nil
31
+ end
32
+
33
+ title_node = details_node.css('.title').first
34
+ result[:title] = title_node.attributes['title'].to_s
35
+ result[:rank] = title_node.text.gsub(/\..*/, '').to_i
36
+
37
+ if (price_elem = details_node.css('.buy span').first)
38
+ result[:price] = price_elem.text
39
+ end
40
+
41
+ result[:developer] = details_node.css('.subtitle').first.attributes['title'].to_s
42
+ result[:package] = details_node.css('.title').first.attributes['href'].to_s.gsub('/store/apps/details?id=', '').gsub(/&feature=.*$/, '')
43
+ result[:store_url] = "https://play.google.com/store/apps/details?id=#{result[:package]}&hl=#{opts[:lang]}"
44
+
45
+ result[:price] = '0' if result[:price] == 'Free'
46
+
47
+ results << result
48
+ end
49
+
50
+ results
51
+ end
52
+
53
+ def initialize(collection, category=nil, opts={})
54
+ @collection = collection
55
+ @category = category
56
+ @request_opts = MarketBot::Util.build_request_opts(opts[:request_opts])
57
+ @lang = opts[:lang] || MarketBot::Play::DEFAULT_LANG
58
+ @country = opts[:country] || MarketBot::Play::DEFAULT_COUNTRY
59
+ @max_pages = opts[:max_pages] || MarketBot::Play::Chart::MAX_PAGES
60
+ end
61
+
62
+ def store_urls
63
+ urls = []
64
+ start = 0
65
+ num = 100
66
+
67
+ @max_pages.times do |i|
68
+ url = 'https://play.google.com/store/apps'
69
+ url << "/category/#{@category}" if @category
70
+ url << "/collection/#{@collection}?"
71
+ url << "start=#{start}&"
72
+ url << "gl=#{@country}&"
73
+ url << "num=#{num}&"
74
+ url << "hl=#{@lang}"
75
+
76
+ urls << url
77
+ start += num
78
+ end
79
+
80
+ urls
81
+ end
82
+
83
+ def update(opts={})
84
+ @result = []
85
+
86
+ store_urls.each do |url|
87
+ req = Typhoeus::Request.new(url, @request_opts)
88
+ req.run
89
+
90
+ break unless response_handler(req.response)
91
+ end
92
+
93
+ @result.flatten!
94
+
95
+ self
96
+ end
97
+
98
+ private
99
+
100
+ def response_handler(response)
101
+ if response.success?
102
+ r = self.class.parse(response.body, lang: @lang)
103
+
104
+ if @result.empty? ||
105
+ (!@result.empty? &&@result[-1][-1][:rank] + 1 == r[0][:rank])
106
+ @result << r
107
+ return true
108
+ end
109
+
110
+ return false
111
+ else
112
+ codes = "code=#{response.code}, return_code=#{response.return_code}"
113
+ raise MarketBot::ResponseError.new("Unhandled response: #{codes}")
114
+ end
115
+ end
116
+ end
117
+ end
118
+ end
@@ -0,0 +1,74 @@
1
+ module MarketBot
2
+ module Play
3
+ class Chart
4
+ MAX_PAGES = 6
5
+
6
+ COLLECTIONS = %w{
7
+ topselling_free
8
+ topselling_paid
9
+ topgrossing
10
+ topselling_free
11
+ movers_shakers
12
+ topgrossing
13
+ topselling_new_free
14
+ topselling_new_paid
15
+ }
16
+
17
+ CATEGORIES = %w{
18
+ ANDROID_WEAR
19
+ BOOKS_AND_REFERENCE
20
+ BUSINESS
21
+ COMICS
22
+ COMMUNICATION
23
+ EDUCATION
24
+ ENTERTAINMENT
25
+ FINANCE
26
+ HEALTH_AND_FITNESS
27
+ LIBRARIES_AND_DEMO
28
+ LIFESTYLE
29
+ APP_WALLPAPER
30
+ MEDIA_AND_VIDEO
31
+ MEDICAL
32
+ MUSIC_AND_AUDIO
33
+ NEWS_AND_MAGAZINES
34
+ PERSONALIZATION
35
+ PHOTOGRAPHY
36
+ PRODUCTIVITY
37
+ SHOPPING
38
+ SOCIAL
39
+ SPORTS
40
+ TOOLS
41
+ TRANSPORTATION
42
+ TRAVEL_AND_LOCAL
43
+ WEATHER
44
+ APP_WIDGETS
45
+ GAME_ACTION
46
+ GAME_ADVENTURE
47
+ GAME_ARCADE
48
+ GAME_BOARD
49
+ GAME_CARD
50
+ GAME_CASINO
51
+ GAME_CASUAL
52
+ GAME_EDUCATIONAL
53
+ GAME_MUSIC
54
+ GAME_PUZZLE
55
+ GAME_RACING
56
+ GAME_ROLE_PLAYING
57
+ GAME_SIMULATION
58
+ GAME_SPORTS
59
+ GAME_STRATEGY
60
+ GAME_TRIVIA
61
+ GAME_WORD
62
+ FAMILY?age=AGE_RANGE1
63
+ FAMILY?age=AGE_RANGE2
64
+ FAMILY?age=AGE_RANGE3
65
+ FAMILY_ACTION
66
+ FAMILY_BRAINGAMES
67
+ FAMILY_CREATE
68
+ FAMILY_EDUCATION
69
+ FAMILY_MUSICVIDEO
70
+ FAMILY_PRETEND
71
+ }
72
+ end
73
+ end
74
+ end