app-reviews 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/.gitignore ADDED
@@ -0,0 +1,17 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in app-reviews.gemspec
4
+ gemspec
data/LICENSE ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2012 Mu-ik Jeon
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,29 @@
1
+ # AppReviews
2
+
3
+ TODO: Write a gem description
4
+
5
+ ## Installation
6
+
7
+ Add this line to your application's Gemfile:
8
+
9
+ gem 'app-reviews'
10
+
11
+ And then execute:
12
+
13
+ $ bundle
14
+
15
+ Or install it yourself as:
16
+
17
+ $ gem install app-reviews
18
+
19
+ ## Usage
20
+
21
+ TODO: Write usage instructions here
22
+
23
+ ## Contributing
24
+
25
+ 1. Fork it
26
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
27
+ 3. Commit your changes (`git commit -am 'Added some feature'`)
28
+ 4. Push to the branch (`git push origin my-new-feature`)
29
+ 5. Create new Pull Request
data/Rakefile ADDED
@@ -0,0 +1,2 @@
1
+ #!/usr/bin/env rake
2
+ require "bundler/gem_tasks"
@@ -0,0 +1,17 @@
1
+ # -*- encoding: utf-8 -*-
2
+ require File.expand_path('../lib/app-reviews/version', __FILE__)
3
+
4
+ Gem::Specification.new do |gem|
5
+ gem.authors = ["Mu-ik Jeon"]
6
+ gem.email = ["muikor@gmail.com"]
7
+ gem.description = %q{Mobile App Review Crawler}
8
+ gem.summary = %q{mobile app review crawler}
9
+ gem.homepage = ""
10
+
11
+ gem.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
12
+ gem.files = `git ls-files`.split("\n")
13
+ gem.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
14
+ gem.name = "app-reviews"
15
+ gem.require_paths = ["lib"]
16
+ gem.version = AppReviews::VERSION
17
+ end
@@ -0,0 +1,5 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require File.join(File.dirname(__FILE__), '../lib/app-reviews-crawler')
4
+
5
+ AppReviewsCrawler.new.execute ARGV
@@ -0,0 +1,3 @@
1
+ module AppReviews
2
+ VERSION = "0.0.1"
3
+ end
@@ -0,0 +1,55 @@
1
+ # encoding: utf-8
2
+ require "app-reviews/version"
3
+ require 'app_store_reviews'
4
+ require 'play_store_reviews'
5
+ require 'tstore_reviews'
6
+ require 'active_support/core_ext'
7
+
8
+ class AppReviewsCrawler
9
+ def execute(argv)
10
+ return print_usage unless validate argv
11
+ store, app_id, from_date_str = argv
12
+ start_page = 1
13
+ end_page = 10000
14
+ from_date = Date.parse(from_date_str)
15
+ puts "store: #{store}"
16
+ puts "app_id: #{app_id}"
17
+ puts "from_date: #{from_date}"
18
+ puts "page: #{start_page} ~ #{end_page}"
19
+
20
+ reviews = create_reviews(store, app_id)
21
+ return print_usage unless reviews
22
+ reviews.set_page start_page, end_page
23
+ reviews.set_from_date from_date
24
+ require 'yaml'
25
+ reviews.each do |item|
26
+ puts item.to_yaml
27
+ end
28
+
29
+ puts "Review Count: #{reviews.count}"
30
+ puts "Review Last Date: #{reviews.last[:date]}"
31
+ end
32
+
33
+ def create_reviews(store, app_id)
34
+ case store
35
+ when 'appstore'
36
+ AppStoreReviews.new app_id
37
+ when 'play'
38
+ PlayStoreReviews.new app_id
39
+ when 'tstore'
40
+ TstoreReviews.new app_id
41
+ end
42
+ end
43
+
44
+ def validate(argv)
45
+ return false if argv.size < 3
46
+ true
47
+ end
48
+
49
+ def print_usage
50
+ puts "USAGE: app-reviews-crawler appstore|play|tstore store_app_id from_date"
51
+ puts "example# app-reviews-crawler appstore 383844387 2012-03-26"
52
+ puts "example# app-reviews-crawler play com.thinkreals.pocketstyle2 2012-03-26"
53
+ puts "example# app-reviews-crawler tstore 0000033534 2012-03-26"
54
+ end
55
+ end
@@ -0,0 +1,2 @@
1
+ module AppReviews
2
+ end
@@ -0,0 +1,62 @@
1
+ require 'nokogiri'
2
+ require 'rexml/document'
3
+ include REXML
4
+
5
+ class AppStoreReviewPage
6
+ def initialize(text, page)
7
+ @doc = Nokogiri::XML(text)
8
+ @page = page
9
+ end
10
+
11
+ def items
12
+ path = "Document > View > ScrollView > VBoxView > View > MatrixView > VBoxView > VBoxView > VBoxView"
13
+ @doc.css(path).each do |link|
14
+ begin
15
+ review = parse link
16
+ next if review.nil?
17
+ break unless yield review
18
+ rescue Exception => e
19
+ print_parse_error e, link
20
+ end
21
+ end
22
+ end
23
+
24
+ def last_page
25
+ @doc.css("MatrixView > VBoxView > VBoxView > HBoxView:nth-child(2) > TextView > SetFontStyle").each do |link|
26
+ link.content.split(' ').last.to_i
27
+ end
28
+ end
29
+
30
+ private
31
+ def parse(link)
32
+ node = link.css('TextView > SetFontStyle')
33
+ date = node[2].content.gsub("\n", '').strip
34
+ index = date.rindex('- ')
35
+ return if index.nil?
36
+ index += 1
37
+ date = date[index..-1].strip
38
+ name_el = node.css('GotoURL > b')
39
+ title_el = node[0].css('b')
40
+ name = name_el.first.content.strip
41
+ title = title_el.first.content
42
+ text = node[3].content.strip
43
+ node = link.css('HBoxView > HBoxView > HBoxView')
44
+ rating = node.attr('alt').value
45
+
46
+ {
47
+ title: title,
48
+ name: name,
49
+ text: text,
50
+ rating: rating,
51
+ date: date,
52
+ }
53
+ end
54
+
55
+ def print_parse_error(e, link)
56
+ puts e
57
+ puts e.backtrace
58
+ puts "page: #{@page}"
59
+ puts link
60
+ end
61
+ end
62
+
@@ -0,0 +1,45 @@
1
+ require 'base_app_reviews'
2
+ require 'app_store_review_page'
3
+ require 'httpclient'
4
+ require 'open-uri'
5
+
6
+ class AppStoreReviews
7
+ include BaseAppReviews
8
+
9
+ def each
10
+ unless @list.nil?
11
+ return @list.each do |item|
12
+ yield item
13
+ end
14
+ end
15
+
16
+ @list = []
17
+ country_codes = [143441, 143466, 143463]
18
+ country_codes.each do |country_code|
19
+ get_reviews(country_code) do |review|
20
+ @list << review
21
+ yield review
22
+ end
23
+ end
24
+ end
25
+
26
+ private
27
+ def get_reviews(country)
28
+ (@start_page..@end_page).each do |page|
29
+ url = "http://phobos.apple.com/WebObjects/MZStore.woa/wa/viewContentsUserReviews?sortOrdering=4&onlyLatestVersion=false&sortAscending=true&pageNumber=#{(page - 1)}&type=Purple+Software&id=#{@app_id}"
30
+
31
+ f = open(url, "User-Agent" => "iTunes-iPhone/2.2 (2)", "X-Apple-Store-Front" => "#{country}-1")
32
+ # File.new('review.xml', 'w').puts f.read
33
+ # exit
34
+ review_page = AppStoreReviewPage.new f.read, page
35
+ break unless review_page.items do |item|
36
+ return false if Date.parse(item[:date]) < @from_date
37
+ yield item
38
+ true
39
+ end
40
+
41
+ break if review_page.last_page < page + 1
42
+ end
43
+ end
44
+ end
45
+
@@ -0,0 +1,30 @@
1
+ module BaseAppReviews
2
+ include Enumerable
3
+
4
+ def initialize(app_id)
5
+ @app_id = app_id
6
+ @list = nil
7
+ end
8
+
9
+ def set_last_date(date)
10
+ @last_date = date
11
+ end
12
+
13
+ def last_date
14
+ @last_date
15
+ end
16
+
17
+ def set_page(start_page, end_page)
18
+ @start_page = start_page
19
+ @end_page = end_page
20
+ end
21
+
22
+ def set_from_date(date)
23
+ @from_date = date
24
+ end
25
+
26
+ def last
27
+ @list.last
28
+ end
29
+ end
30
+
@@ -0,0 +1,51 @@
1
+ # encoding: utf-8
2
+ require 'rexml/document'
3
+ include REXML
4
+
5
+ class PlayStoreReviewPage
6
+ def initialize(text, page)
7
+ result = JSON(text[5..-1])
8
+ html = result['htmlContent'].gsub('<hr>', '<hr />')
9
+ @xml = Document.new("<body>#{html}</body>")
10
+ @page = page
11
+ end
12
+
13
+ def items
14
+ @xml.root.elements.each do |item|
15
+ next if item.elements.size < 1
16
+ begin
17
+ name_el = item.elements['span'].elements['strong']
18
+ next unless name_el
19
+ date = item.elements[2, 'span'].text.sub('님이', '').strip
20
+
21
+ title = item.elements['div'].elements['h4'].text.strip
22
+ if item.elements['p']
23
+ text = item.elements['p'].text
24
+ else
25
+ text = nil
26
+ end
27
+ text = text.strip if text
28
+ name = name_el.text.strip
29
+ rating = item.elements['div'].elements['div'].attribute('title').value.strip
30
+ review = {
31
+ title: title,
32
+ text: text,
33
+ name: name,
34
+ rating: rating,
35
+ date: date,
36
+ }
37
+ break unless yield review
38
+ rescue Exception => e
39
+ print_error e, item
40
+ end
41
+ end
42
+ end
43
+
44
+ def print_error(e, item)
45
+ puts e
46
+ puts e.backtrace
47
+ puts "page: #{@page}"
48
+ puts item
49
+ end
50
+ end
51
+
@@ -0,0 +1,38 @@
1
+ # encoding: utf-8
2
+ require 'httpclient'
3
+ require 'play_store_review_page'
4
+
5
+ class PlayStoreReviews
6
+ include BaseAppReviews
7
+
8
+ def each
9
+ unless @list.nil?
10
+ return @list.each do |item|
11
+ yield item
12
+ end
13
+ end
14
+
15
+ @list = []
16
+ url = "https://play.google.com/store/getreviews"
17
+ params = {
18
+ id: @app_id,
19
+ reviewSortOrder: 0,
20
+ reviewType: 1,
21
+ pageNum: 0,
22
+ }
23
+
24
+ (@start_page..@end_page).each do |page|
25
+ params[:pageNum] = page - 1
26
+ client = HTTPClient.new
27
+ content = client.post_content(url, params.to_query)
28
+ review_page = PlayStoreReviewPage.new content, page
29
+ break unless review_page.items do |item|
30
+ return false if Date.strptime(item[:date], '%Y년 %m월 %d일') < @from_date
31
+ @list << item
32
+ yield item
33
+ true
34
+ end
35
+ end
36
+ end
37
+ end
38
+
@@ -0,0 +1,40 @@
1
+ require 'rexml/document'
2
+ include REXML
3
+
4
+ class TstoreReviewPage
5
+ def initialize(content, page)
6
+ content = content.gsub(/<textarea[^<]+<\/textarea>/, '')
7
+ content = content.gsub('gif""', 'gif"')
8
+ xml = Document.new(content)
9
+ @table = xml.root.elements['body'].elements[2, 'form'].elements[2, 'div'].elements['table']
10
+ end
11
+
12
+ def items
13
+ @table.elements.each do |item|
14
+ begin
15
+ td = item.elements['td']
16
+ next unless td
17
+ date = td.elements['p'].elements['span'].text.strip
18
+ name = td.elements['p'].elements[2, 'strong'].text.strip
19
+ text = td.elements[2, 'p'].elements['span'].elements['div'].text.gsub('&nbsp;', ' ').strip
20
+ review = {
21
+ text: text,
22
+ name: name,
23
+ date: date,
24
+ }
25
+ break unless yield review
26
+ rescue Exception => e
27
+ print_error e, item
28
+ end
29
+ end
30
+ end
31
+
32
+ private
33
+ def print_error(e, item)
34
+ puts e
35
+ puts e.backtrace
36
+ puts "page: #{@page}"
37
+ puts item
38
+ end
39
+ end
40
+
@@ -0,0 +1,37 @@
1
+ require 'httpclient'
2
+ require 'tstore_review_page'
3
+
4
+ class TstoreReviews
5
+ include BaseAppReviews
6
+
7
+ def each
8
+ unless @list.nil?
9
+ return @list.each do |item|
10
+ yield item
11
+ end
12
+ end
13
+
14
+ @list = []
15
+ url = "http://www.tstore.co.kr/userpoc/multi/popReply.omp"
16
+ params = {
17
+ prodId: @app_id,
18
+ currentPage: 0,
19
+ flag: 'L',
20
+ replyType: 0,
21
+ }
22
+
23
+ (@start_page..@end_page).each do |page|
24
+ params[:currentPage] = page
25
+ client = HTTPClient.new
26
+ content = client.post_content(url, params.to_query)
27
+ review_page = TstoreReviewPage.new content, page
28
+ break unless review_page.items do |item|
29
+ return false if Date.strptime(item[:date], '%Y-%m-%d') < @from_date
30
+ @list << item
31
+ yield item
32
+ true
33
+ end
34
+ end
35
+ end
36
+ end
37
+
metadata ADDED
@@ -0,0 +1,63 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: app-reviews
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Mu-ik Jeon
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2012-03-29 00:00:00.000000000Z
13
+ dependencies: []
14
+ description: Mobile App Review Crawler
15
+ email:
16
+ - muikor@gmail.com
17
+ executables:
18
+ - app-reviews-crawler
19
+ extensions: []
20
+ extra_rdoc_files: []
21
+ files:
22
+ - .gitignore
23
+ - Gemfile
24
+ - LICENSE
25
+ - README.md
26
+ - Rakefile
27
+ - app-reviews.gemspec
28
+ - bin/app-reviews-crawler
29
+ - lib/app-reviews-crawler.rb
30
+ - lib/app-reviews.rb
31
+ - lib/app-reviews/version.rb
32
+ - lib/app_store_review_page.rb
33
+ - lib/app_store_reviews.rb
34
+ - lib/base_app_reviews.rb
35
+ - lib/play_store_review_page.rb
36
+ - lib/play_store_reviews.rb
37
+ - lib/tstore_review_page.rb
38
+ - lib/tstore_reviews.rb
39
+ homepage: ''
40
+ licenses: []
41
+ post_install_message:
42
+ rdoc_options: []
43
+ require_paths:
44
+ - lib
45
+ required_ruby_version: !ruby/object:Gem::Requirement
46
+ none: false
47
+ requirements:
48
+ - - ! '>='
49
+ - !ruby/object:Gem::Version
50
+ version: '0'
51
+ required_rubygems_version: !ruby/object:Gem::Requirement
52
+ none: false
53
+ requirements:
54
+ - - ! '>='
55
+ - !ruby/object:Gem::Version
56
+ version: '0'
57
+ requirements: []
58
+ rubyforge_project:
59
+ rubygems_version: 1.8.19
60
+ signing_key:
61
+ specification_version: 3
62
+ summary: mobile app review crawler
63
+ test_files: []