app-reviews 0.0.1 → 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,5 +1,5 @@
1
1
  #!/usr/bin/env ruby
2
2
 
3
- require File.join(File.dirname(__FILE__), '../lib/app-reviews-crawler')
3
+ require File.join(File.dirname(__FILE__), '../lib/app-reviews/crawler')
4
4
 
5
- AppReviewsCrawler.new.execute ARGV
5
+ AppReviews::Crawler.new.execute ARGV
@@ -1,2 +1,16 @@
1
+ require 'app-reviews/app_store/reviews'
2
+ require 'app-reviews/play_store/reviews'
3
+ require 'app-reviews/t_store/reviews'
4
+
1
5
  module AppReviews
6
+ def self.create(store, app_id)
7
+ case store
8
+ when 'appstore'
9
+ AppStore::Reviews.new app_id
10
+ when 'play'
11
+ PlayStore::Reviews.new app_id
12
+ when 'tstore'
13
+ TStore::Reviews.new app_id
14
+ end
15
+ end
2
16
  end
@@ -0,0 +1,66 @@
1
+ require 'nokogiri'
2
+ require 'rexml/document'
3
+ include REXML
4
+
5
+ module AppReviews
6
+ module AppStore
7
+ class ReviewPage
8
+ def initialize(text, page)
9
+ @doc = Nokogiri::XML(text)
10
+ @page = page
11
+ end
12
+
13
+ def items
14
+ path = "Document > View > ScrollView > VBoxView > View > MatrixView > VBoxView > VBoxView > VBoxView"
15
+ @doc.css(path).each do |link|
16
+ begin
17
+ review = parse link
18
+ next if review.nil?
19
+ break unless yield review
20
+ rescue Exception => e
21
+ print_parse_error e, link
22
+ end
23
+ end
24
+ end
25
+
26
+ def last_page
27
+ @doc.css("MatrixView > VBoxView > VBoxView > HBoxView:nth-child(2) > TextView > SetFontStyle").each do |link|
28
+ link.content.split(' ').last.to_i
29
+ end
30
+ end
31
+
32
+ private
33
+ def parse(link)
34
+ node = link.css('TextView > SetFontStyle')
35
+ date = node[2].content.gsub("\n", '').strip
36
+ index = date.rindex('- ')
37
+ return if index.nil?
38
+ index += 1
39
+ date = date[index..-1].strip
40
+ name_el = node.css('GotoURL > b')
41
+ title_el = node[0].css('b')
42
+ name = name_el.first.content.strip
43
+ title = title_el.first.content
44
+ text = node[3].content.strip
45
+ node = link.css('HBoxView > HBoxView > HBoxView')
46
+ rating = node.attr('alt').value
47
+
48
+ {
49
+ title: title,
50
+ name: name,
51
+ text: text,
52
+ rating: rating,
53
+ date: date,
54
+ }
55
+ end
56
+
57
+ def print_parse_error(e, link)
58
+ puts e
59
+ puts e.backtrace
60
+ puts "page: #{@page}"
61
+ puts link
62
+ end
63
+ end
64
+ end
65
+ end
66
+
@@ -0,0 +1,49 @@
1
+ require 'app-reviews/base_reviews'
2
+ require 'app-reviews/app_store/review_page'
3
+ require 'httpclient'
4
+ require 'open-uri'
5
+
6
+ module AppReviews
7
+ module AppStore
8
+ class Reviews
9
+ include BaseReviews
10
+
11
+ def each
12
+ unless @list.nil?
13
+ return @list.each do |item|
14
+ yield item
15
+ end
16
+ end
17
+
18
+ @list = []
19
+ country_codes = [143441, 143466, 143463]
20
+ country_codes.each do |country_code|
21
+ get_reviews(country_code) do |review|
22
+ @list << review
23
+ yield review
24
+ end
25
+ end
26
+ end
27
+
28
+ private
29
+ def get_reviews(country)
30
+ (@start_page..@end_page).each do |page|
31
+ url = "http://phobos.apple.com/WebObjects/MZStore.woa/wa/viewContentsUserReviews?sortOrdering=4&onlyLatestVersion=false&sortAscending=true&pageNumber=#{(page - 1)}&type=Purple+Software&id=#{@app_id}"
32
+
33
+ f = open(url, "User-Agent" => "iTunes-iPhone/2.2 (2)", "X-Apple-Store-Front" => "#{country}-1")
34
+ # File.new('review.xml', 'w').puts f.read
35
+ # exit
36
+ review_page = AppStore::ReviewPage.new f.read, page
37
+ break unless review_page.items do |item|
38
+ return false if Date.parse(item[:date]) < @from_date
39
+ yield item
40
+ true
41
+ end
42
+
43
+ break if review_page.last_page < page + 1
44
+ end
45
+ end
46
+ end
47
+ end
48
+ end
49
+
@@ -0,0 +1,37 @@
1
+ module AppReviews
2
+ module BaseReviews
3
+ include Enumerable
4
+
5
+ def set_last_date(date)
6
+ @last_date = date
7
+ end
8
+
9
+ def last_date
10
+ @last_date
11
+ end
12
+
13
+ def set_page(start_page, end_page)
14
+ @start_page = start_page
15
+ @end_page = end_page
16
+ end
17
+
18
+ def set_from_date(date)
19
+ @from_date = date
20
+ end
21
+
22
+ def count
23
+ @list.count
24
+ end
25
+
26
+ def last
27
+ @list.last
28
+ end
29
+
30
+ private
31
+ def initialize(app_id)
32
+ @app_id = app_id
33
+ @list = nil
34
+ end
35
+ end
36
+ end
37
+
@@ -0,0 +1,47 @@
1
+ # encoding: utf-8
2
+ require "app-reviews/version"
3
+ require 'active_support/core_ext'
4
+ require 'app-reviews'
5
+ require 'yaml'
6
+
7
+ module AppReviews
8
+ class Crawler
9
+ def execute(argv)
10
+ return print_usage unless validate argv
11
+ store, app_id, from_date_str = argv
12
+ start_page = 1
13
+ end_page = 10000
14
+ from_date = Date.parse(from_date_str)
15
+ puts "store: #{store}"
16
+ puts "app_id: #{app_id}"
17
+ puts "from_date: #{from_date}"
18
+ puts "page: #{start_page} ~ #{end_page}"
19
+
20
+ reviews = AppReviews.create(store, app_id)
21
+ return print_usage unless reviews
22
+ reviews.set_page start_page, end_page
23
+ reviews.set_from_date from_date
24
+ reviews.each do |item|
25
+ puts item.to_yaml
26
+ end
27
+
28
+ puts "Review Count: #{reviews.count}"
29
+ puts "Review Last Date: #{reviews.last[:date]}"
30
+ end
31
+
32
+ def create_reviews(store, app_id)
33
+ end
34
+
35
+ def validate(argv)
36
+ return false if argv.size < 3
37
+ true
38
+ end
39
+
40
+ def print_usage
41
+ puts "USAGE: app-reviews-crawler appstore|play|tstore store_app_id from_date"
42
+ puts "example# app-reviews-crawler appstore 383844387 2012-03-26"
43
+ puts "example# app-reviews-crawler play com.thinkreals.pocketstyle2 2012-03-26"
44
+ puts "example# app-reviews-crawler tstore 0000033534 2012-03-26"
45
+ end
46
+ end
47
+ end
@@ -0,0 +1,55 @@
1
+ # encoding: utf-8
2
+ require 'rexml/document'
3
+ include REXML
4
+
5
+ module AppReviews
6
+ module PlayStore
7
+ class ReviewPage
8
+ def initialize(text, page)
9
+ result = JSON(text[5..-1])
10
+ html = result['htmlContent'].gsub('<hr>', '<hr />')
11
+ @xml = Document.new("<body>#{html}</body>")
12
+ @page = page
13
+ end
14
+
15
+ def items
16
+ @xml.root.elements.each do |item|
17
+ next if item.elements.size < 1
18
+ begin
19
+ name_el = item.elements['span'].elements['strong']
20
+ next unless name_el
21
+ date = item.elements[2, 'span'].text.sub('님이', '').strip
22
+
23
+ title = item.elements['div'].elements['h4'].text.strip
24
+ if item.elements['p']
25
+ text = item.elements['p'].text
26
+ else
27
+ text = nil
28
+ end
29
+ text = text.strip if text
30
+ name = name_el.text.strip
31
+ rating = item.elements['div'].elements['div'].attribute('title').value.strip
32
+ review = {
33
+ title: title,
34
+ text: text,
35
+ name: name,
36
+ rating: rating,
37
+ date: date,
38
+ }
39
+ break unless yield review
40
+ rescue Exception => e
41
+ print_error e, item
42
+ end
43
+ end
44
+ end
45
+
46
+ def print_error(e, item)
47
+ puts e
48
+ puts e.backtrace
49
+ puts "page: #{@page}"
50
+ puts item
51
+ end
52
+ end
53
+ end
54
+ end
55
+
@@ -0,0 +1,42 @@
1
+ # encoding: utf-8
2
+ require 'app-reviews/base_reviews'
3
+ require 'app-reviews/play_store/review_page'
4
+ require 'httpclient'
5
+
6
+ module AppReviews
7
+ module PlayStore
8
+ class Reviews
9
+ include BaseReviews
10
+
11
+ def each
12
+ unless @list.nil?
13
+ return @list.each do |item|
14
+ yield item
15
+ end
16
+ end
17
+
18
+ @list = []
19
+ url = "https://play.google.com/store/getreviews"
20
+ params = {
21
+ id: @app_id,
22
+ reviewSortOrder: 0,
23
+ reviewType: 1,
24
+ pageNum: 0,
25
+ }
26
+
27
+ (@start_page..@end_page).each do |page|
28
+ params[:pageNum] = page - 1
29
+ client = HTTPClient.new
30
+ content = client.post_content(url, params.to_query)
31
+ review_page = PlayStore::ReviewPage.new content, page
32
+ break unless review_page.items do |item|
33
+ return false if Date.strptime(item[:date], '%Y년 %m월 %d일') < @from_date
34
+ @list << item
35
+ yield item
36
+ true
37
+ end
38
+ end
39
+ end
40
+ end
41
+ end
42
+ end
@@ -0,0 +1,44 @@
1
+ require 'rexml/document'
2
+ include REXML
3
+
4
+ module AppReviews
5
+ module TStore
6
+ class ReviewPage
7
+ def initialize(content, page)
8
+ content = content.gsub(/<textarea[^<]+<\/textarea>/, '')
9
+ content = content.gsub('gif""', 'gif"')
10
+ xml = Document.new(content)
11
+ @table = xml.root.elements['body'].elements[2, 'form'].elements[2, 'div'].elements['table']
12
+ end
13
+
14
+ def items
15
+ @table.elements.each do |item|
16
+ begin
17
+ td = item.elements['td']
18
+ next unless td
19
+ date = td.elements['p'].elements['span'].text.strip
20
+ name = td.elements['p'].elements[2, 'strong'].text.strip
21
+ text = td.elements[2, 'p'].elements['span'].elements['div'].text.gsub('&nbsp;', ' ').strip
22
+ review = {
23
+ text: text,
24
+ name: name,
25
+ date: date,
26
+ }
27
+ break unless yield review
28
+ rescue Exception => e
29
+ print_error e, item
30
+ end
31
+ end
32
+ end
33
+
34
+ private
35
+ def print_error(e, item)
36
+ puts e
37
+ puts e.backtrace
38
+ puts "page: #{@page}"
39
+ puts item
40
+ end
41
+ end
42
+ end
43
+ end
44
+
@@ -0,0 +1,41 @@
1
+ require 'app-reviews/base_reviews'
2
+ require 'app-reviews/t_store/review_page'
3
+ require 'httpclient'
4
+
5
+ module AppReviews
6
+ module TStore
7
+ class Reviews
8
+ include BaseReviews
9
+
10
+ def each
11
+ unless @list.nil?
12
+ return @list.each do |item|
13
+ yield item
14
+ end
15
+ end
16
+
17
+ @list = []
18
+ url = "http://www.tstore.co.kr/userpoc/multi/popReply.omp"
19
+ params = {
20
+ prodId: @app_id,
21
+ currentPage: 0,
22
+ flag: 'L',
23
+ replyType: 0,
24
+ }
25
+
26
+ (@start_page..@end_page).each do |page|
27
+ params[:currentPage] = page
28
+ client = HTTPClient.new
29
+ content = client.post_content(url, params.to_query)
30
+ review_page = TStore::ReviewPage.new content, page
31
+ break unless review_page.items do |item|
32
+ return false if Date.strptime(item[:date], '%Y-%m-%d') < @from_date
33
+ @list << item
34
+ yield item
35
+ true
36
+ end
37
+ end
38
+ end
39
+ end
40
+ end
41
+ end
@@ -1,3 +1,3 @@
1
1
  module AppReviews
2
- VERSION = "0.0.1"
2
+ VERSION = "0.0.2"
3
3
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: app-reviews
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.0.2
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -26,16 +26,16 @@ files:
26
26
  - Rakefile
27
27
  - app-reviews.gemspec
28
28
  - bin/app-reviews-crawler
29
- - lib/app-reviews-crawler.rb
30
29
  - lib/app-reviews.rb
30
+ - lib/app-reviews/app_store/review_page.rb
31
+ - lib/app-reviews/app_store/reviews.rb
32
+ - lib/app-reviews/base_reviews.rb
33
+ - lib/app-reviews/crawler.rb
34
+ - lib/app-reviews/play_store/review_page.rb
35
+ - lib/app-reviews/play_store/reviews.rb
36
+ - lib/app-reviews/t_store/review_page.rb
37
+ - lib/app-reviews/t_store/reviews.rb
31
38
  - lib/app-reviews/version.rb
32
- - lib/app_store_review_page.rb
33
- - lib/app_store_reviews.rb
34
- - lib/base_app_reviews.rb
35
- - lib/play_store_review_page.rb
36
- - lib/play_store_reviews.rb
37
- - lib/tstore_review_page.rb
38
- - lib/tstore_reviews.rb
39
39
  homepage: ''
40
40
  licenses: []
41
41
  post_install_message:
@@ -1,55 +0,0 @@
1
- # encoding: utf-8
2
- require "app-reviews/version"
3
- require 'app_store_reviews'
4
- require 'play_store_reviews'
5
- require 'tstore_reviews'
6
- require 'active_support/core_ext'
7
-
8
- class AppReviewsCrawler
9
- def execute(argv)
10
- return print_usage unless validate argv
11
- store, app_id, from_date_str = argv
12
- start_page = 1
13
- end_page = 10000
14
- from_date = Date.parse(from_date_str)
15
- puts "store: #{store}"
16
- puts "app_id: #{app_id}"
17
- puts "from_date: #{from_date}"
18
- puts "page: #{start_page} ~ #{end_page}"
19
-
20
- reviews = create_reviews(store, app_id)
21
- return print_usage unless reviews
22
- reviews.set_page start_page, end_page
23
- reviews.set_from_date from_date
24
- require 'yaml'
25
- reviews.each do |item|
26
- puts item.to_yaml
27
- end
28
-
29
- puts "Review Count: #{reviews.count}"
30
- puts "Review Last Date: #{reviews.last[:date]}"
31
- end
32
-
33
- def create_reviews(store, app_id)
34
- case store
35
- when 'appstore'
36
- AppStoreReviews.new app_id
37
- when 'play'
38
- PlayStoreReviews.new app_id
39
- when 'tstore'
40
- TstoreReviews.new app_id
41
- end
42
- end
43
-
44
- def validate(argv)
45
- return false if argv.size < 3
46
- true
47
- end
48
-
49
- def print_usage
50
- puts "USAGE: app-reviews-crawler appstore|play|tstore store_app_id from_date"
51
- puts "example# app-reviews-crawler appstore 383844387 2012-03-26"
52
- puts "example# app-reviews-crawler play com.thinkreals.pocketstyle2 2012-03-26"
53
- puts "example# app-reviews-crawler tstore 0000033534 2012-03-26"
54
- end
55
- end
@@ -1,62 +0,0 @@
1
- require 'nokogiri'
2
- require 'rexml/document'
3
- include REXML
4
-
5
- class AppStoreReviewPage
6
- def initialize(text, page)
7
- @doc = Nokogiri::XML(text)
8
- @page = page
9
- end
10
-
11
- def items
12
- path = "Document > View > ScrollView > VBoxView > View > MatrixView > VBoxView > VBoxView > VBoxView"
13
- @doc.css(path).each do |link|
14
- begin
15
- review = parse link
16
- next if review.nil?
17
- break unless yield review
18
- rescue Exception => e
19
- print_parse_error e, link
20
- end
21
- end
22
- end
23
-
24
- def last_page
25
- @doc.css("MatrixView > VBoxView > VBoxView > HBoxView:nth-child(2) > TextView > SetFontStyle").each do |link|
26
- link.content.split(' ').last.to_i
27
- end
28
- end
29
-
30
- private
31
- def parse(link)
32
- node = link.css('TextView > SetFontStyle')
33
- date = node[2].content.gsub("\n", '').strip
34
- index = date.rindex('- ')
35
- return if index.nil?
36
- index += 1
37
- date = date[index..-1].strip
38
- name_el = node.css('GotoURL > b')
39
- title_el = node[0].css('b')
40
- name = name_el.first.content.strip
41
- title = title_el.first.content
42
- text = node[3].content.strip
43
- node = link.css('HBoxView > HBoxView > HBoxView')
44
- rating = node.attr('alt').value
45
-
46
- {
47
- title: title,
48
- name: name,
49
- text: text,
50
- rating: rating,
51
- date: date,
52
- }
53
- end
54
-
55
- def print_parse_error(e, link)
56
- puts e
57
- puts e.backtrace
58
- puts "page: #{@page}"
59
- puts link
60
- end
61
- end
62
-
@@ -1,45 +0,0 @@
1
- require 'base_app_reviews'
2
- require 'app_store_review_page'
3
- require 'httpclient'
4
- require 'open-uri'
5
-
6
- class AppStoreReviews
7
- include BaseAppReviews
8
-
9
- def each
10
- unless @list.nil?
11
- return @list.each do |item|
12
- yield item
13
- end
14
- end
15
-
16
- @list = []
17
- country_codes = [143441, 143466, 143463]
18
- country_codes.each do |country_code|
19
- get_reviews(country_code) do |review|
20
- @list << review
21
- yield review
22
- end
23
- end
24
- end
25
-
26
- private
27
- def get_reviews(country)
28
- (@start_page..@end_page).each do |page|
29
- url = "http://phobos.apple.com/WebObjects/MZStore.woa/wa/viewContentsUserReviews?sortOrdering=4&onlyLatestVersion=false&sortAscending=true&pageNumber=#{(page - 1)}&type=Purple+Software&id=#{@app_id}"
30
-
31
- f = open(url, "User-Agent" => "iTunes-iPhone/2.2 (2)", "X-Apple-Store-Front" => "#{country}-1")
32
- # File.new('review.xml', 'w').puts f.read
33
- # exit
34
- review_page = AppStoreReviewPage.new f.read, page
35
- break unless review_page.items do |item|
36
- return false if Date.parse(item[:date]) < @from_date
37
- yield item
38
- true
39
- end
40
-
41
- break if review_page.last_page < page + 1
42
- end
43
- end
44
- end
45
-
@@ -1,30 +0,0 @@
1
- module BaseAppReviews
2
- include Enumerable
3
-
4
- def initialize(app_id)
5
- @app_id = app_id
6
- @list = nil
7
- end
8
-
9
- def set_last_date(date)
10
- @last_date = date
11
- end
12
-
13
- def last_date
14
- @last_date
15
- end
16
-
17
- def set_page(start_page, end_page)
18
- @start_page = start_page
19
- @end_page = end_page
20
- end
21
-
22
- def set_from_date(date)
23
- @from_date = date
24
- end
25
-
26
- def last
27
- @list.last
28
- end
29
- end
30
-
@@ -1,51 +0,0 @@
1
- # encoding: utf-8
2
- require 'rexml/document'
3
- include REXML
4
-
5
- class PlayStoreReviewPage
6
- def initialize(text, page)
7
- result = JSON(text[5..-1])
8
- html = result['htmlContent'].gsub('<hr>', '<hr />')
9
- @xml = Document.new("<body>#{html}</body>")
10
- @page = page
11
- end
12
-
13
- def items
14
- @xml.root.elements.each do |item|
15
- next if item.elements.size < 1
16
- begin
17
- name_el = item.elements['span'].elements['strong']
18
- next unless name_el
19
- date = item.elements[2, 'span'].text.sub('님이', '').strip
20
-
21
- title = item.elements['div'].elements['h4'].text.strip
22
- if item.elements['p']
23
- text = item.elements['p'].text
24
- else
25
- text = nil
26
- end
27
- text = text.strip if text
28
- name = name_el.text.strip
29
- rating = item.elements['div'].elements['div'].attribute('title').value.strip
30
- review = {
31
- title: title,
32
- text: text,
33
- name: name,
34
- rating: rating,
35
- date: date,
36
- }
37
- break unless yield review
38
- rescue Exception => e
39
- print_error e, item
40
- end
41
- end
42
- end
43
-
44
- def print_error(e, item)
45
- puts e
46
- puts e.backtrace
47
- puts "page: #{@page}"
48
- puts item
49
- end
50
- end
51
-
@@ -1,38 +0,0 @@
1
- # encoding: utf-8
2
- require 'httpclient'
3
- require 'play_store_review_page'
4
-
5
- class PlayStoreReviews
6
- include BaseAppReviews
7
-
8
- def each
9
- unless @list.nil?
10
- return @list.each do |item|
11
- yield item
12
- end
13
- end
14
-
15
- @list = []
16
- url = "https://play.google.com/store/getreviews"
17
- params = {
18
- id: @app_id,
19
- reviewSortOrder: 0,
20
- reviewType: 1,
21
- pageNum: 0,
22
- }
23
-
24
- (@start_page..@end_page).each do |page|
25
- params[:pageNum] = page - 1
26
- client = HTTPClient.new
27
- content = client.post_content(url, params.to_query)
28
- review_page = PlayStoreReviewPage.new content, page
29
- break unless review_page.items do |item|
30
- return false if Date.strptime(item[:date], '%Y년 %m월 %d일') < @from_date
31
- @list << item
32
- yield item
33
- true
34
- end
35
- end
36
- end
37
- end
38
-
@@ -1,40 +0,0 @@
1
- require 'rexml/document'
2
- include REXML
3
-
4
- class TstoreReviewPage
5
- def initialize(content, page)
6
- content = content.gsub(/<textarea[^<]+<\/textarea>/, '')
7
- content = content.gsub('gif""', 'gif"')
8
- xml = Document.new(content)
9
- @table = xml.root.elements['body'].elements[2, 'form'].elements[2, 'div'].elements['table']
10
- end
11
-
12
- def items
13
- @table.elements.each do |item|
14
- begin
15
- td = item.elements['td']
16
- next unless td
17
- date = td.elements['p'].elements['span'].text.strip
18
- name = td.elements['p'].elements[2, 'strong'].text.strip
19
- text = td.elements[2, 'p'].elements['span'].elements['div'].text.gsub('&nbsp;', ' ').strip
20
- review = {
21
- text: text,
22
- name: name,
23
- date: date,
24
- }
25
- break unless yield review
26
- rescue Exception => e
27
- print_error e, item
28
- end
29
- end
30
- end
31
-
32
- private
33
- def print_error(e, item)
34
- puts e
35
- puts e.backtrace
36
- puts "page: #{@page}"
37
- puts item
38
- end
39
- end
40
-
@@ -1,37 +0,0 @@
1
- require 'httpclient'
2
- require 'tstore_review_page'
3
-
4
- class TstoreReviews
5
- include BaseAppReviews
6
-
7
- def each
8
- unless @list.nil?
9
- return @list.each do |item|
10
- yield item
11
- end
12
- end
13
-
14
- @list = []
15
- url = "http://www.tstore.co.kr/userpoc/multi/popReply.omp"
16
- params = {
17
- prodId: @app_id,
18
- currentPage: 0,
19
- flag: 'L',
20
- replyType: 0,
21
- }
22
-
23
- (@start_page..@end_page).each do |page|
24
- params[:currentPage] = page
25
- client = HTTPClient.new
26
- content = client.post_content(url, params.to_query)
27
- review_page = TstoreReviewPage.new content, page
28
- break unless review_page.items do |item|
29
- return false if Date.strptime(item[:date], '%Y-%m-%d') < @from_date
30
- @list << item
31
- yield item
32
- true
33
- end
34
- end
35
- end
36
- end
37
-