market_bot 0.17.0 → 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (63) hide show
  1. checksums.yaml +4 -4
  2. data/.travis.yml +1 -1
  3. data/Guardfile +70 -0
  4. data/README.markdown +45 -95
  5. data/Rakefile +0 -89
  6. data/bin/app_categories +17 -0
  7. data/bin/benchmark_parser_app +24 -0
  8. data/bin/console +14 -0
  9. data/bin/setup +7 -0
  10. data/bin/update_test_data +30 -0
  11. data/lib/market_bot.rb +20 -15
  12. data/lib/market_bot/exceptions.rb +2 -1
  13. data/lib/market_bot/play/app.rb +188 -0
  14. data/lib/market_bot/play/app/constants.rb +33 -0
  15. data/lib/market_bot/play/chart.rb +118 -0
  16. data/lib/market_bot/play/chart/constants.rb +74 -0
  17. data/lib/market_bot/play/constants.rb +7 -0
  18. data/lib/market_bot/play/developer.rb +32 -0
  19. data/lib/market_bot/util.rb +17 -0
  20. data/lib/market_bot/version.rb +1 -1
  21. data/market_bot.gemspec +6 -3
  22. data/spec/market_bot/play/app_spec.rb +201 -0
  23. data/spec/market_bot/play/chart_spec.rb +126 -0
  24. data/spec/market_bot/play/data/app-com.bluefroggaming.popdat.txt +99 -0
  25. data/spec/market_bot/play/data/app-com.mg.android.txt +103 -0
  26. data/spec/market_bot/play/data/chart-topselling_paid-GAME_ARCADE-0.txt +97 -0
  27. data/spec/market_bot/play/data/chart-topselling_paid-GAME_ARCADE-1.txt +97 -0
  28. data/spec/market_bot/play/data/chart-topselling_paid-GAME_ARCADE-2.txt +97 -0
  29. data/spec/market_bot/play/data/chart-topselling_paid-GAME_ARCADE-3.txt +97 -0
  30. data/spec/market_bot/play/data/chart-topselling_paid-GAME_ARCADE-4.txt +97 -0
  31. data/spec/market_bot/play/data/chart-topselling_paid-GAME_ARCADE-5.txt +97 -0
  32. data/spec/market_bot/play/data/chart-topselling_paid-GAME_ARCADE-6.txt +97 -0
  33. data/spec/market_bot/play/data/chart-topselling_paid-GAME_ARCADE-7.txt +97 -0
  34. data/spec/market_bot/play/data/developer-zynga.txt +97 -0
  35. data/spec/market_bot/play/developer_spec.rb +52 -0
  36. data/spec/market_bot_spec.rb +14 -0
  37. data/spec/spec_helper.rb +5 -11
  38. metadata +57 -52
  39. data/NOTES.txt +0 -61
  40. data/lib/market_bot/android/app.rb +0 -273
  41. data/lib/market_bot/android/developer.rb +0 -32
  42. data/lib/market_bot/android/leaderboard.rb +0 -232
  43. data/lib/market_bot/android/leaderboard/constants.rb +0 -59
  44. data/lib/market_bot/android/search_query.rb +0 -35
  45. data/lib/market_bot/movie/leaderboard.rb +0 -167
  46. data/lib/market_bot/movie/leaderboard/constants.rb +0 -30
  47. data/lib/market_bot/movie/search_query.rb +0 -32
  48. data/spec/market_bot/android/app_spec.rb +0 -253
  49. data/spec/market_bot/android/data/app_1.txt +0 -99
  50. data/spec/market_bot/android/data/app_2.txt +0 -100
  51. data/spec/market_bot/android/data/app_3.txt +0 -103
  52. data/spec/market_bot/android/data/app_4.txt +0 -117
  53. data/spec/market_bot/android/data/developer-zynga.txt +0 -97
  54. data/spec/market_bot/android/data/leaderboard-apps_editors_choice.txt +0 -97
  55. data/spec/market_bot/android/data/leaderboard-apps_topselling_paid-page1.txt +0 -97
  56. data/spec/market_bot/android/data/leaderboard-apps_topselling_paid-page2.txt +0 -97
  57. data/spec/market_bot/android/data/leaderboard-apps_topselling_paid-page3.txt +0 -97
  58. data/spec/market_bot/android/data/leaderboard-apps_topselling_paid-page4.txt +0 -97
  59. data/spec/market_bot/android/developer_spec.rb +0 -57
  60. data/spec/market_bot/android/leaderboard_spec.rb +0 -140
  61. data/spec/market_bot/android/search_query_spec.rb +0 -6
  62. data/spec/market_bot/movie/data/leaderboard-movies_comedy_topselling_paid.txt +0 -327
  63. data/spec/market_bot/movie/leaderboard_spec.rb +0 -67
data/lib/market_bot.rb CHANGED
@@ -5,19 +5,15 @@ require 'nokogiri'
5
5
 
6
6
  require 'market_bot/version'
7
7
  require 'market_bot/exceptions'
8
- require 'market_bot/android/app'
9
- require 'market_bot/android/leaderboard/constants'
10
- require 'market_bot/android/leaderboard'
11
- require 'market_bot/android/search_query'
12
- require 'market_bot/android/developer'
13
- require 'market_bot/movie/leaderboard'
14
- require 'market_bot/movie/search_query'
8
+ require 'market_bot/util'
9
+ require 'market_bot/play/constants'
10
+ require 'market_bot/play/app/constants'
11
+ require 'market_bot/play/app'
12
+ require 'market_bot/play/chart/constants'
13
+ require 'market_bot/play/chart'
14
+ require 'market_bot/play/developer'
15
15
 
16
16
  module MarketBot
17
- def self.hydra
18
- @hydra ||= Typhoeus::Hydra.new(:max_concurrency => 5)
19
- end
20
-
21
17
  def self.timeout
22
18
  @timeout ||= 10
23
19
  end
@@ -26,12 +22,21 @@ module MarketBot
26
22
  @timeout = val
27
23
  end
28
24
 
29
- def self.connecttimeout
30
- @connecttimeout ||= 10
25
+ def self.connect_timeout
26
+ @connect_timeout ||= 10
27
+ end
28
+
29
+ def self.connect_timeout=(val)
30
+ @connect_timeout = val
31
+ end
32
+
33
+ def self.user_agent
34
+ @user_agent ||= "MarketBot/#{MarketBot::VERSION} / " \
35
+ "(+https://github.com/chadrem/market_bot)"
31
36
  end
32
37
 
33
- def self.connecttimeout=(val)
34
- @connecttimeout = val
38
+ def self.user_agent=(val)
39
+ @user_agent = val
35
40
  end
36
41
  end
37
42
 
@@ -1,5 +1,6 @@
1
1
  module MarketBot
2
2
  class MarketBotError < StandardError; end
3
3
  class ResponseError < MarketBotError; end
4
- class AppNotFoundError < ResponseError; end
4
+ class NotFoundError < ResponseError; end
5
+ class UnavailableError < ResponseError; end
5
6
  end
@@ -0,0 +1,188 @@
1
+ module MarketBot
2
+ module Play
3
+ class App
4
+ attr_reader *ATTRIBUTES
5
+ attr_reader :package
6
+ attr_reader :lang
7
+ attr_reader :result
8
+
9
+ def self.parse(html, opts={})
10
+ result = {}
11
+
12
+ doc = Nokogiri::HTML(html)
13
+ meta_info = doc.css('.meta-info')
14
+ meta_info.each do |info|
15
+ field_name = info.css('.title').text.strip
16
+
17
+ case field_name
18
+ when 'Updated'
19
+ result[:updated] = info.at_css('.content').text.strip
20
+ when 'Installs'
21
+ result[:installs] = info.at_css('.content').text.strip
22
+ when 'Size'
23
+ result[:size] = info.at_css('.content').text.strip
24
+ when 'Current Version'
25
+ result[:current_version] = info.at_css('.content').text.strip
26
+ when 'Requires Android'
27
+ result[:requires_android] = info.at_css('.content').text.strip
28
+ when 'Contact Developer', 'Developer'
29
+ info.css('.dev-link').each do |node|
30
+ node_href = node[:href]
31
+ if node_href =~ /^mailto:/
32
+ result[:email] = node_href.gsub(/^mailto:/,'')
33
+ else
34
+ if q_param = URI(node_href).query.split('&').select{ |p| p =~ /q=/ }.first
35
+ actual_url = q_param.gsub('q=', '')
36
+ end
37
+
38
+ result[:website_url] = actual_url
39
+ end
40
+ end
41
+
42
+ end
43
+ end
44
+
45
+ result[:content_rating] = doc.at_css("div.content[itemprop='contentRating']").text
46
+
47
+ result[:price] = doc.at_css('meta[itemprop="price"]')[:content]
48
+
49
+ category_div = doc.at_css('.category')
50
+ result[:category] = category_div.text.strip rescue nil
51
+ result[:category_url] = File.split(category_div["href"])[1]
52
+
53
+ result[:description] = doc.at_css('div[itemprop="description"]').inner_html.strip
54
+ result[:title] = doc.at_css('div.id-app-title').text
55
+
56
+ score = doc.at_css('.score-container')
57
+ unless score.nil?
58
+ node = score.at_css('.score')
59
+ result[:rating] = node.text.strip
60
+ node = score.at_css('meta[itemprop="ratingCount"]')
61
+ result[:votes] = node[:content].strip.to_i
62
+ end
63
+
64
+ node = doc.at_css('div[itemprop="author"]')
65
+ result[:developer] = node.at_css('.primary').text.strip
66
+
67
+ result[:more_from_developer] = []
68
+ result[:similar] = []
69
+
70
+ node = doc.css('.recommendation')
71
+ node.css('.rec-cluster').each do |recommended|
72
+ assoc_app_type = recommended.at_css('.heading').text.strip.eql?('Similar' ) ? :similar : :more_from_developer
73
+ recommended.css('.card').each do |card|
74
+ assoc_app = {}
75
+ assoc_app[:package] = card['data-docid'].strip
76
+
77
+ result[assoc_app_type] << assoc_app
78
+ end
79
+ end
80
+
81
+ node = doc.at_css('.cover-image')
82
+ unless node.nil?
83
+ url = MarketBot::Util.fix_content_url(node[:src])
84
+ result[:cover_image_url] = url
85
+ end
86
+
87
+ result[:screenshot_urls] = []
88
+ doc.css('.screenshot').each do |node|
89
+ result[:screenshot_urls] << MarketBot::Util.fix_content_url(node[:src])
90
+ end
91
+
92
+ result[:full_screenshot_urls] = []
93
+ doc.css('.full-screenshot').each do |node|
94
+ result[:full_screenshot_urls] << MarketBot::Util.fix_content_url(node[:src])
95
+ end
96
+
97
+ result[:reviews] = []
98
+ unless opts[:skip_reviews] # Review parsing is CPU intensive.
99
+ doc.css('.single-review').each do |node|
100
+ review = {}
101
+ review[:author] = node.at_css('.author-name').text.strip if node.at_css('.author-name')
102
+ raw_tag = node.at_css('.current-rating').to_s
103
+ if raw_tag.match(/100%;/i)
104
+ review[:score] = 5
105
+ elsif raw_tag.match(/80%;/i)
106
+ review[:score] = 4
107
+ elsif raw_tag.match(/60%;/i)
108
+ review[:score] = 3
109
+ elsif raw_tag.match(/40%;/i)
110
+ review[:score] = 2
111
+ elsif raw_tag.match(/20%;/i)
112
+ review[:score] = 1
113
+ end
114
+ if node.at_css('.review-title')
115
+ review[:title] = node.at_css('.review-title').text.strip
116
+ end
117
+ if node.at_css('.review-body')
118
+ review[:text] = node.at_css('.review-body').text
119
+ .sub!(review[:title],'')
120
+ .sub!(node.at_css('.review-link').text, '')
121
+ .strip
122
+ end
123
+ if review
124
+ result[:reviews] << review
125
+ end
126
+ end
127
+ end
128
+
129
+ result[:rating_distribution] = { 5 => nil, 4 => nil, 3 => nil, 2 => nil, 1 => nil }
130
+
131
+ histogram = doc.css('div.rating-histogram')
132
+ cur_index = 5
133
+ %w(five four three two one).each do |slot|
134
+ node = histogram.at_css(".#{slot.to_s}")
135
+ result[:rating_distribution][cur_index] = node.css('.bar-number').text.gsub(/,/,'').to_i
136
+ cur_index -= 1
137
+
138
+ end
139
+
140
+ result[:html] = html
141
+
142
+ result
143
+ end
144
+
145
+ def initialize(package, opts={})
146
+ @package = package
147
+ @lang = opts[:lang] || MarketBot::Play::DEFAULT_LANG
148
+ @request_opts = MarketBot::Util.build_request_opts(opts[:request_opts])
149
+ end
150
+
151
+ def store_url
152
+ "https://play.google.com/store/apps/details?id=#{@package}&hl=#{@lang}"
153
+ end
154
+
155
+ def update
156
+ req = Typhoeus::Request.new(store_url, @request_opts)
157
+ req.run
158
+ response_handler(req.response)
159
+
160
+ self
161
+ end
162
+
163
+ private
164
+
165
+ def response_handler(response)
166
+ if response.success?
167
+ @result = self.class.parse(response.body)
168
+
169
+ ATTRIBUTES.each do |a|
170
+ attr_name = "@#{a}"
171
+ attr_value = @result[a]
172
+ instance_variable_set(attr_name, attr_value)
173
+ end
174
+ else
175
+ codes = "code=#{response.code}, return_code=#{response.return_code}"
176
+ case response.code
177
+ when 404
178
+ raise MarketBot::NotFoundError.new("Unable to find app in store: #{codes}")
179
+ when 403
180
+ raise MarketBot::UnavailableError.new("Unavailable app (country restriction?): #{codes}")
181
+ else
182
+ raise MarketBot::ResponseError.new("Unhandled response: #{codes}")
183
+ end
184
+ end
185
+ end
186
+ end
187
+ end
188
+ end
@@ -0,0 +1,33 @@
1
+ module MarketBot
2
+ module Play
3
+ class App
4
+ ATTRIBUTES = [
5
+ :category,
6
+ :category_url,
7
+ :content_rating,
8
+ :cover_image_url,
9
+ :current_version,
10
+ :description,
11
+ :developer,
12
+ :email,
13
+ :full_screenshot_urls,
14
+ :html,
15
+ :installs,
16
+ :more_from_developer,
17
+ :price,
18
+ :rating,
19
+ :rating_distribution,
20
+ :requires_android,
21
+ :reviews,
22
+ :screenshot_urls,
23
+ :similar,
24
+ :size,
25
+ :title,
26
+ :updated,
27
+ :votes,
28
+ :website_url,
29
+ :whats_new
30
+ ]
31
+ end
32
+ end
33
+ end
@@ -0,0 +1,118 @@
1
+ module MarketBot
2
+ module Play
3
+ class Chart
4
+ attr_reader :collection
5
+ attr_reader :category
6
+ attr_reader :country
7
+ attr_reader :lang
8
+ attr_reader :result
9
+
10
+ def self.parse(html, opts={})
11
+ opts[:lang] ||= MarketBot::Play::DEFAULT_LANG
12
+
13
+ results = []
14
+ doc = Nokogiri::HTML(html)
15
+
16
+ doc.css('.card').each do |snippet_node|
17
+ result = {}
18
+
19
+ details_node = snippet_node.css('.details')
20
+
21
+ unless snippet_node.css('img').empty?
22
+ result[:icon_url] = MarketBot::Util.fix_content_url(snippet_node.css('img').first.attributes['src'].value)
23
+ end
24
+
25
+ unless snippet_node.css('.current-rating').empty?
26
+ stars_style = snippet_node.css('.current-rating').first.attributes['style'].value
27
+ stars_width_percent = stars_style[/width:\s+([0-9.]+)%/, 1].to_f
28
+ result[:stars] = (5 * stars_width_percent / 100).round(1).to_s
29
+ else
30
+ result[:stars] = nil
31
+ end
32
+
33
+ title_node = details_node.css('.title').first
34
+ result[:title] = title_node.attributes['title'].to_s
35
+ result[:rank] = title_node.text.gsub(/\..*/, '').to_i
36
+
37
+ if (price_elem = details_node.css('.buy span').first)
38
+ result[:price] = price_elem.text
39
+ end
40
+
41
+ result[:developer] = details_node.css('.subtitle').first.attributes['title'].to_s
42
+ result[:package] = details_node.css('.title').first.attributes['href'].to_s.gsub('/store/apps/details?id=', '').gsub(/&feature=.*$/, '')
43
+ result[:store_url] = "https://play.google.com/store/apps/details?id=#{result[:package]}&hl=#{opts[:lang]}"
44
+
45
+ result[:price] = '0' if result[:price] == 'Free'
46
+
47
+ results << result
48
+ end
49
+
50
+ results
51
+ end
52
+
53
+ def initialize(collection, category=nil, opts={})
54
+ @collection = collection
55
+ @category = category
56
+ @request_opts = MarketBot::Util.build_request_opts(opts[:request_opts])
57
+ @lang = opts[:lang] || MarketBot::Play::DEFAULT_LANG
58
+ @country = opts[:country] || MarketBot::Play::DEFAULT_COUNTRY
59
+ @max_pages = opts[:max_pages] || MarketBot::Play::Chart::MAX_PAGES
60
+ end
61
+
62
+ def store_urls
63
+ urls = []
64
+ start = 0
65
+ num = 100
66
+
67
+ @max_pages.times do |i|
68
+ url = 'https://play.google.com/store/apps'
69
+ url << "/category/#{@category}" if @category
70
+ url << "/collection/#{@collection}?"
71
+ url << "start=#{start}&"
72
+ url << "gl=#{@country}&"
73
+ url << "num=#{num}&"
74
+ url << "hl=#{@lang}"
75
+
76
+ urls << url
77
+ start += num
78
+ end
79
+
80
+ urls
81
+ end
82
+
83
+ def update(opts={})
84
+ @result = []
85
+
86
+ store_urls.each do |url|
87
+ req = Typhoeus::Request.new(url, @request_opts)
88
+ req.run
89
+
90
+ break unless response_handler(req.response)
91
+ end
92
+
93
+ @result.flatten!
94
+
95
+ self
96
+ end
97
+
98
+ private
99
+
100
+ def response_handler(response)
101
+ if response.success?
102
+ r = self.class.parse(response.body, lang: @lang)
103
+
104
+ if @result.empty? ||
105
+ (!@result.empty? &&@result[-1][-1][:rank] + 1 == r[0][:rank])
106
+ @result << r
107
+ return true
108
+ end
109
+
110
+ return false
111
+ else
112
+ codes = "code=#{response.code}, return_code=#{response.return_code}"
113
+ raise MarketBot::ResponseError.new("Unhandled response: #{codes}")
114
+ end
115
+ end
116
+ end
117
+ end
118
+ end
@@ -0,0 +1,74 @@
1
+ module MarketBot
2
+ module Play
3
+ class Chart
4
+ MAX_PAGES = 6
5
+
6
+ COLLECTIONS = %w{
7
+ topselling_free
8
+ topselling_paid
9
+ topgrossing
10
+ topselling_free
11
+ movers_shakers
12
+ topgrossing
13
+ topselling_new_free
14
+ topselling_new_paid
15
+ }
16
+
17
+ CATEGORIES = %w{
18
+ ANDROID_WEAR
19
+ BOOKS_AND_REFERENCE
20
+ BUSINESS
21
+ COMICS
22
+ COMMUNICATION
23
+ EDUCATION
24
+ ENTERTAINMENT
25
+ FINANCE
26
+ HEALTH_AND_FITNESS
27
+ LIBRARIES_AND_DEMO
28
+ LIFESTYLE
29
+ APP_WALLPAPER
30
+ MEDIA_AND_VIDEO
31
+ MEDICAL
32
+ MUSIC_AND_AUDIO
33
+ NEWS_AND_MAGAZINES
34
+ PERSONALIZATION
35
+ PHOTOGRAPHY
36
+ PRODUCTIVITY
37
+ SHOPPING
38
+ SOCIAL
39
+ SPORTS
40
+ TOOLS
41
+ TRANSPORTATION
42
+ TRAVEL_AND_LOCAL
43
+ WEATHER
44
+ APP_WIDGETS
45
+ GAME_ACTION
46
+ GAME_ADVENTURE
47
+ GAME_ARCADE
48
+ GAME_BOARD
49
+ GAME_CARD
50
+ GAME_CASINO
51
+ GAME_CASUAL
52
+ GAME_EDUCATIONAL
53
+ GAME_MUSIC
54
+ GAME_PUZZLE
55
+ GAME_RACING
56
+ GAME_ROLE_PLAYING
57
+ GAME_SIMULATION
58
+ GAME_SPORTS
59
+ GAME_STRATEGY
60
+ GAME_TRIVIA
61
+ GAME_WORD
62
+ FAMILY?age=AGE_RANGE1
63
+ FAMILY?age=AGE_RANGE2
64
+ FAMILY?age=AGE_RANGE3
65
+ FAMILY_ACTION
66
+ FAMILY_BRAINGAMES
67
+ FAMILY_CREATE
68
+ FAMILY_EDUCATION
69
+ FAMILY_MUSICVIDEO
70
+ FAMILY_PRETEND
71
+ }
72
+ end
73
+ end
74
+ end