app-reviews 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
data/.gitignore ADDED
@@ -0,0 +1,17 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in app-reviews.gemspec
4
+ gemspec
data/LICENSE ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2012 Mu-ik Jeon
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,29 @@
1
+ # AppReviews
2
+
3
+ TODO: Write a gem description
4
+
5
+ ## Installation
6
+
7
+ Add this line to your application's Gemfile:
8
+
9
+ gem 'app-reviews'
10
+
11
+ And then execute:
12
+
13
+ $ bundle
14
+
15
+ Or install it yourself as:
16
+
17
+ $ gem install app-reviews
18
+
19
+ ## Usage
20
+
21
+ TODO: Write usage instructions here
22
+
23
+ ## Contributing
24
+
25
+ 1. Fork it
26
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
27
+ 3. Commit your changes (`git commit -am 'Added some feature'`)
28
+ 4. Push to the branch (`git push origin my-new-feature`)
29
+ 5. Create new Pull Request
data/Rakefile ADDED
@@ -0,0 +1,2 @@
1
+ #!/usr/bin/env rake
2
+ require "bundler/gem_tasks"
@@ -0,0 +1,17 @@
1
+ # -*- encoding: utf-8 -*-
2
+ require File.expand_path('../lib/app-reviews/version', __FILE__)
3
+
4
+ Gem::Specification.new do |gem|
5
+ gem.authors = ["Mu-ik Jeon"]
6
+ gem.email = ["muikor@gmail.com"]
7
+ gem.description = %q{Mobile App Review Crawler}
8
+ gem.summary = %q{mobile app review crawler}
9
+ gem.homepage = ""
10
+
11
+ gem.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
12
+ gem.files = `git ls-files`.split("\n")
13
+ gem.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
14
+ gem.name = "app-reviews"
15
+ gem.require_paths = ["lib"]
16
+ gem.version = AppReviews::VERSION
17
+ end
@@ -0,0 +1,5 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require File.join(File.dirname(__FILE__), '../lib/app-reviews-crawler')
4
+
5
+ AppReviewsCrawler.new.execute ARGV
@@ -0,0 +1,3 @@
1
+ module AppReviews
2
+ VERSION = "0.0.1"
3
+ end
@@ -0,0 +1,55 @@
1
+ # encoding: utf-8
2
+ require "app-reviews/version"
3
+ require 'app_store_reviews'
4
+ require 'play_store_reviews'
5
+ require 'tstore_reviews'
6
+ require 'active_support/core_ext'
7
+
8
+ class AppReviewsCrawler
9
+ def execute(argv)
10
+ return print_usage unless validate argv
11
+ store, app_id, from_date_str = argv
12
+ start_page = 1
13
+ end_page = 10000
14
+ from_date = Date.parse(from_date_str)
15
+ puts "store: #{store}"
16
+ puts "app_id: #{app_id}"
17
+ puts "from_date: #{from_date}"
18
+ puts "page: #{start_page} ~ #{end_page}"
19
+
20
+ reviews = create_reviews(store, app_id)
21
+ return print_usage unless reviews
22
+ reviews.set_page start_page, end_page
23
+ reviews.set_from_date from_date
24
+ require 'yaml'
25
+ reviews.each do |item|
26
+ puts item.to_yaml
27
+ end
28
+
29
+ puts "Review Count: #{reviews.count}"
30
+ puts "Review Last Date: #{reviews.last[:date]}"
31
+ end
32
+
33
+ def create_reviews(store, app_id)
34
+ case store
35
+ when 'appstore'
36
+ AppStoreReviews.new app_id
37
+ when 'play'
38
+ PlayStoreReviews.new app_id
39
+ when 'tstore'
40
+ TstoreReviews.new app_id
41
+ end
42
+ end
43
+
44
+ def validate(argv)
45
+ return false if argv.size < 3
46
+ true
47
+ end
48
+
49
+ def print_usage
50
+ puts "USAGE: app-reviews-crawler appstore|play|tstore store_app_id from_date"
51
+ puts "example# app-reviews-crawler appstore 383844387 2012-03-26"
52
+ puts "example# app-reviews-crawler play com.thinkreals.pocketstyle2 2012-03-26"
53
+ puts "example# app-reviews-crawler tstore 0000033534 2012-03-26"
54
+ end
55
+ end
@@ -0,0 +1,2 @@
1
+ module AppReviews
2
+ end
@@ -0,0 +1,62 @@
1
+ require 'nokogiri'
2
+ require 'rexml/document'
3
+ include REXML
4
+
5
+ class AppStoreReviewPage
6
+ def initialize(text, page)
7
+ @doc = Nokogiri::XML(text)
8
+ @page = page
9
+ end
10
+
11
+ def items
12
+ path = "Document > View > ScrollView > VBoxView > View > MatrixView > VBoxView > VBoxView > VBoxView"
13
+ @doc.css(path).each do |link|
14
+ begin
15
+ review = parse link
16
+ next if review.nil?
17
+ break unless yield review
18
+ rescue Exception => e
19
+ print_parse_error e, link
20
+ end
21
+ end
22
+ end
23
+
24
+ def last_page
25
+ @doc.css("MatrixView > VBoxView > VBoxView > HBoxView:nth-child(2) > TextView > SetFontStyle").each do |link|
26
+ link.content.split(' ').last.to_i
27
+ end
28
+ end
29
+
30
+ private
31
+ def parse(link)
32
+ node = link.css('TextView > SetFontStyle')
33
+ date = node[2].content.gsub("\n", '').strip
34
+ index = date.rindex('- ')
35
+ return if index.nil?
36
+ index += 1
37
+ date = date[index..-1].strip
38
+ name_el = node.css('GotoURL > b')
39
+ title_el = node[0].css('b')
40
+ name = name_el.first.content.strip
41
+ title = title_el.first.content
42
+ text = node[3].content.strip
43
+ node = link.css('HBoxView > HBoxView > HBoxView')
44
+ rating = node.attr('alt').value
45
+
46
+ {
47
+ title: title,
48
+ name: name,
49
+ text: text,
50
+ rating: rating,
51
+ date: date,
52
+ }
53
+ end
54
+
55
+ def print_parse_error(e, link)
56
+ puts e
57
+ puts e.backtrace
58
+ puts "page: #{@page}"
59
+ puts link
60
+ end
61
+ end
62
+
@@ -0,0 +1,45 @@
1
+ require 'base_app_reviews'
2
+ require 'app_store_review_page'
3
+ require 'httpclient'
4
+ require 'open-uri'
5
+
6
+ class AppStoreReviews
7
+ include BaseAppReviews
8
+
9
+ def each
10
+ unless @list.nil?
11
+ return @list.each do |item|
12
+ yield item
13
+ end
14
+ end
15
+
16
+ @list = []
17
+ country_codes = [143441, 143466, 143463]
18
+ country_codes.each do |country_code|
19
+ get_reviews(country_code) do |review|
20
+ @list << review
21
+ yield review
22
+ end
23
+ end
24
+ end
25
+
26
+ private
27
+ def get_reviews(country)
28
+ (@start_page..@end_page).each do |page|
29
+ url = "http://phobos.apple.com/WebObjects/MZStore.woa/wa/viewContentsUserReviews?sortOrdering=4&onlyLatestVersion=false&sortAscending=true&pageNumber=#{(page - 1)}&type=Purple+Software&id=#{@app_id}"
30
+
31
+ f = open(url, "User-Agent" => "iTunes-iPhone/2.2 (2)", "X-Apple-Store-Front" => "#{country}-1")
32
+ # File.new('review.xml', 'w').puts f.read
33
+ # exit
34
+ review_page = AppStoreReviewPage.new f.read, page
35
+ break unless review_page.items do |item|
36
+ return false if Date.parse(item[:date]) < @from_date
37
+ yield item
38
+ true
39
+ end
40
+
41
+ break if review_page.last_page < page + 1
42
+ end
43
+ end
44
+ end
45
+
@@ -0,0 +1,30 @@
1
+ module BaseAppReviews
2
+ include Enumerable
3
+
4
+ def initialize(app_id)
5
+ @app_id = app_id
6
+ @list = nil
7
+ end
8
+
9
+ def set_last_date(date)
10
+ @last_date = date
11
+ end
12
+
13
+ def last_date
14
+ @last_date
15
+ end
16
+
17
+ def set_page(start_page, end_page)
18
+ @start_page = start_page
19
+ @end_page = end_page
20
+ end
21
+
22
+ def set_from_date(date)
23
+ @from_date = date
24
+ end
25
+
26
+ def last
27
+ @list.last
28
+ end
29
+ end
30
+
@@ -0,0 +1,51 @@
1
+ # encoding: utf-8
2
+ require 'rexml/document'
3
+ include REXML
4
+
5
+ class PlayStoreReviewPage
6
+ def initialize(text, page)
7
+ result = JSON(text[5..-1])
8
+ html = result['htmlContent'].gsub('<hr>', '<hr />')
9
+ @xml = Document.new("<body>#{html}</body>")
10
+ @page = page
11
+ end
12
+
13
+ def items
14
+ @xml.root.elements.each do |item|
15
+ next if item.elements.size < 1
16
+ begin
17
+ name_el = item.elements['span'].elements['strong']
18
+ next unless name_el
19
+ date = item.elements[2, 'span'].text.sub('님이', '').strip
20
+
21
+ title = item.elements['div'].elements['h4'].text.strip
22
+ if item.elements['p']
23
+ text = item.elements['p'].text
24
+ else
25
+ text = nil
26
+ end
27
+ text = text.strip if text
28
+ name = name_el.text.strip
29
+ rating = item.elements['div'].elements['div'].attribute('title').value.strip
30
+ review = {
31
+ title: title,
32
+ text: text,
33
+ name: name,
34
+ rating: rating,
35
+ date: date,
36
+ }
37
+ break unless yield review
38
+ rescue Exception => e
39
+ print_error e, item
40
+ end
41
+ end
42
+ end
43
+
44
+ def print_error(e, item)
45
+ puts e
46
+ puts e.backtrace
47
+ puts "page: #{@page}"
48
+ puts item
49
+ end
50
+ end
51
+
@@ -0,0 +1,38 @@
1
+ # encoding: utf-8
2
+ require 'httpclient'
3
+ require 'play_store_review_page'
4
+
5
+ class PlayStoreReviews
6
+ include BaseAppReviews
7
+
8
+ def each
9
+ unless @list.nil?
10
+ return @list.each do |item|
11
+ yield item
12
+ end
13
+ end
14
+
15
+ @list = []
16
+ url = "https://play.google.com/store/getreviews"
17
+ params = {
18
+ id: @app_id,
19
+ reviewSortOrder: 0,
20
+ reviewType: 1,
21
+ pageNum: 0,
22
+ }
23
+
24
+ (@start_page..@end_page).each do |page|
25
+ params[:pageNum] = page - 1
26
+ client = HTTPClient.new
27
+ content = client.post_content(url, params.to_query)
28
+ review_page = PlayStoreReviewPage.new content, page
29
+ break unless review_page.items do |item|
30
+ return false if Date.strptime(item[:date], '%Y년 %m월 %d일') < @from_date
31
+ @list << item
32
+ yield item
33
+ true
34
+ end
35
+ end
36
+ end
37
+ end
38
+
@@ -0,0 +1,40 @@
1
+ require 'rexml/document'
2
+ include REXML
3
+
4
+ class TstoreReviewPage
5
+ def initialize(content, page)
6
+ content = content.gsub(/<textarea[^<]+<\/textarea>/, '')
7
+ content = content.gsub('gif""', 'gif"')
8
+ xml = Document.new(content)
9
+ @table = xml.root.elements['body'].elements[2, 'form'].elements[2, 'div'].elements['table']
10
+ end
11
+
12
+ def items
13
+ @table.elements.each do |item|
14
+ begin
15
+ td = item.elements['td']
16
+ next unless td
17
+ date = td.elements['p'].elements['span'].text.strip
18
+ name = td.elements['p'].elements[2, 'strong'].text.strip
19
+ text = td.elements[2, 'p'].elements['span'].elements['div'].text.gsub('&nbsp;', ' ').strip
20
+ review = {
21
+ text: text,
22
+ name: name,
23
+ date: date,
24
+ }
25
+ break unless yield review
26
+ rescue Exception => e
27
+ print_error e, item
28
+ end
29
+ end
30
+ end
31
+
32
+ private
33
+ def print_error(e, item)
34
+ puts e
35
+ puts e.backtrace
36
+ puts "page: #{@page}"
37
+ puts item
38
+ end
39
+ end
40
+
@@ -0,0 +1,37 @@
1
+ require 'httpclient'
2
+ require 'tstore_review_page'
3
+
4
+ class TstoreReviews
5
+ include BaseAppReviews
6
+
7
+ def each
8
+ unless @list.nil?
9
+ return @list.each do |item|
10
+ yield item
11
+ end
12
+ end
13
+
14
+ @list = []
15
+ url = "http://www.tstore.co.kr/userpoc/multi/popReply.omp"
16
+ params = {
17
+ prodId: @app_id,
18
+ currentPage: 0,
19
+ flag: 'L',
20
+ replyType: 0,
21
+ }
22
+
23
+ (@start_page..@end_page).each do |page|
24
+ params[:currentPage] = page
25
+ client = HTTPClient.new
26
+ content = client.post_content(url, params.to_query)
27
+ review_page = TstoreReviewPage.new content, page
28
+ break unless review_page.items do |item|
29
+ return false if Date.strptime(item[:date], '%Y-%m-%d') < @from_date
30
+ @list << item
31
+ yield item
32
+ true
33
+ end
34
+ end
35
+ end
36
+ end
37
+
metadata ADDED
@@ -0,0 +1,63 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: app-reviews
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Mu-ik Jeon
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2012-03-29 00:00:00.000000000Z
13
+ dependencies: []
14
+ description: Mobile App Review Crawler
15
+ email:
16
+ - muikor@gmail.com
17
+ executables:
18
+ - app-reviews-crawler
19
+ extensions: []
20
+ extra_rdoc_files: []
21
+ files:
22
+ - .gitignore
23
+ - Gemfile
24
+ - LICENSE
25
+ - README.md
26
+ - Rakefile
27
+ - app-reviews.gemspec
28
+ - bin/app-reviews-crawler
29
+ - lib/app-reviews-crawler.rb
30
+ - lib/app-reviews.rb
31
+ - lib/app-reviews/version.rb
32
+ - lib/app_store_review_page.rb
33
+ - lib/app_store_reviews.rb
34
+ - lib/base_app_reviews.rb
35
+ - lib/play_store_review_page.rb
36
+ - lib/play_store_reviews.rb
37
+ - lib/tstore_review_page.rb
38
+ - lib/tstore_reviews.rb
39
+ homepage: ''
40
+ licenses: []
41
+ post_install_message:
42
+ rdoc_options: []
43
+ require_paths:
44
+ - lib
45
+ required_ruby_version: !ruby/object:Gem::Requirement
46
+ none: false
47
+ requirements:
48
+ - - ! '>='
49
+ - !ruby/object:Gem::Version
50
+ version: '0'
51
+ required_rubygems_version: !ruby/object:Gem::Requirement
52
+ none: false
53
+ requirements:
54
+ - - ! '>='
55
+ - !ruby/object:Gem::Version
56
+ version: '0'
57
+ requirements: []
58
+ rubyforge_project:
59
+ rubygems_version: 1.8.19
60
+ signing_key:
61
+ specification_version: 3
62
+ summary: mobile app review crawler
63
+ test_files: []