app-reviews 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +17 -0
- data/Gemfile +4 -0
- data/LICENSE +22 -0
- data/README.md +29 -0
- data/Rakefile +2 -0
- data/app-reviews.gemspec +17 -0
- data/bin/app-reviews-crawler +5 -0
- data/lib/app-reviews/version.rb +3 -0
- data/lib/app-reviews-crawler.rb +55 -0
- data/lib/app-reviews.rb +2 -0
- data/lib/app_store_review_page.rb +62 -0
- data/lib/app_store_reviews.rb +45 -0
- data/lib/base_app_reviews.rb +30 -0
- data/lib/play_store_review_page.rb +51 -0
- data/lib/play_store_reviews.rb +38 -0
- data/lib/tstore_review_page.rb +40 -0
- data/lib/tstore_reviews.rb +37 -0
- metadata +63 -0
data/.gitignore
ADDED
data/Gemfile
ADDED
data/LICENSE
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Copyright (c) 2012 Mu-ik Jeon
|
2
|
+
|
3
|
+
MIT License
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,29 @@
|
|
1
|
+
# AppReviews
|
2
|
+
|
3
|
+
TODO: Write a gem description
|
4
|
+
|
5
|
+
## Installation
|
6
|
+
|
7
|
+
Add this line to your application's Gemfile:
|
8
|
+
|
9
|
+
gem 'app-reviews'
|
10
|
+
|
11
|
+
And then execute:
|
12
|
+
|
13
|
+
$ bundle
|
14
|
+
|
15
|
+
Or install it yourself as:
|
16
|
+
|
17
|
+
$ gem install app-reviews
|
18
|
+
|
19
|
+
## Usage
|
20
|
+
|
21
|
+
TODO: Write usage instructions here
|
22
|
+
|
23
|
+
## Contributing
|
24
|
+
|
25
|
+
1. Fork it
|
26
|
+
2. Create your feature branch (`git checkout -b my-new-feature`)
|
27
|
+
3. Commit your changes (`git commit -am 'Added some feature'`)
|
28
|
+
4. Push to the branch (`git push origin my-new-feature`)
|
29
|
+
5. Create new Pull Request
|
data/Rakefile
ADDED
data/app-reviews.gemspec
ADDED
@@ -0,0 +1,17 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
require File.expand_path('../lib/app-reviews/version', __FILE__)
|
3
|
+
|
4
|
+
Gem::Specification.new do |gem|
|
5
|
+
gem.authors = ["Mu-ik Jeon"]
|
6
|
+
gem.email = ["muikor@gmail.com"]
|
7
|
+
gem.description = %q{Mobile App Review Crawler}
|
8
|
+
gem.summary = %q{mobile app review crawler}
|
9
|
+
gem.homepage = ""
|
10
|
+
|
11
|
+
gem.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
|
12
|
+
gem.files = `git ls-files`.split("\n")
|
13
|
+
gem.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
14
|
+
gem.name = "app-reviews"
|
15
|
+
gem.require_paths = ["lib"]
|
16
|
+
gem.version = AppReviews::VERSION
|
17
|
+
end
|
@@ -0,0 +1,55 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require "app-reviews/version"
|
3
|
+
require 'app_store_reviews'
|
4
|
+
require 'play_store_reviews'
|
5
|
+
require 'tstore_reviews'
|
6
|
+
require 'active_support/core_ext'
|
7
|
+
|
8
|
+
class AppReviewsCrawler
|
9
|
+
def execute(argv)
|
10
|
+
return print_usage unless validate argv
|
11
|
+
store, app_id, from_date_str = argv
|
12
|
+
start_page = 1
|
13
|
+
end_page = 10000
|
14
|
+
from_date = Date.parse(from_date_str)
|
15
|
+
puts "store: #{store}"
|
16
|
+
puts "app_id: #{app_id}"
|
17
|
+
puts "from_date: #{from_date}"
|
18
|
+
puts "page: #{start_page} ~ #{end_page}"
|
19
|
+
|
20
|
+
reviews = create_reviews(store, app_id)
|
21
|
+
return print_usage unless reviews
|
22
|
+
reviews.set_page start_page, end_page
|
23
|
+
reviews.set_from_date from_date
|
24
|
+
require 'yaml'
|
25
|
+
reviews.each do |item|
|
26
|
+
puts item.to_yaml
|
27
|
+
end
|
28
|
+
|
29
|
+
puts "Review Count: #{reviews.count}"
|
30
|
+
puts "Review Last Date: #{reviews.last[:date]}"
|
31
|
+
end
|
32
|
+
|
33
|
+
def create_reviews(store, app_id)
|
34
|
+
case store
|
35
|
+
when 'appstore'
|
36
|
+
AppStoreReviews.new app_id
|
37
|
+
when 'play'
|
38
|
+
PlayStoreReviews.new app_id
|
39
|
+
when 'tstore'
|
40
|
+
TstoreReviews.new app_id
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
def validate(argv)
|
45
|
+
return false if argv.size < 3
|
46
|
+
true
|
47
|
+
end
|
48
|
+
|
49
|
+
def print_usage
|
50
|
+
puts "USAGE: app-reviews-crawler appstore|play|tstore store_app_id from_date"
|
51
|
+
puts "example# app-reviews-crawler appstore 383844387 2012-03-26"
|
52
|
+
puts "example# app-reviews-crawler play com.thinkreals.pocketstyle2 2012-03-26"
|
53
|
+
puts "example# app-reviews-crawler tstore 0000033534 2012-03-26"
|
54
|
+
end
|
55
|
+
end
|
data/lib/app-reviews.rb
ADDED
@@ -0,0 +1,62 @@
|
|
1
|
+
require 'nokogiri'
|
2
|
+
require 'rexml/document'
|
3
|
+
include REXML
|
4
|
+
|
5
|
+
class AppStoreReviewPage
|
6
|
+
def initialize(text, page)
|
7
|
+
@doc = Nokogiri::XML(text)
|
8
|
+
@page = page
|
9
|
+
end
|
10
|
+
|
11
|
+
def items
|
12
|
+
path = "Document > View > ScrollView > VBoxView > View > MatrixView > VBoxView > VBoxView > VBoxView"
|
13
|
+
@doc.css(path).each do |link|
|
14
|
+
begin
|
15
|
+
review = parse link
|
16
|
+
next if review.nil?
|
17
|
+
break unless yield review
|
18
|
+
rescue Exception => e
|
19
|
+
print_parse_error e, link
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
def last_page
|
25
|
+
@doc.css("MatrixView > VBoxView > VBoxView > HBoxView:nth-child(2) > TextView > SetFontStyle").each do |link|
|
26
|
+
link.content.split(' ').last.to_i
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
private
|
31
|
+
def parse(link)
|
32
|
+
node = link.css('TextView > SetFontStyle')
|
33
|
+
date = node[2].content.gsub("\n", '').strip
|
34
|
+
index = date.rindex('- ')
|
35
|
+
return if index.nil?
|
36
|
+
index += 1
|
37
|
+
date = date[index..-1].strip
|
38
|
+
name_el = node.css('GotoURL > b')
|
39
|
+
title_el = node[0].css('b')
|
40
|
+
name = name_el.first.content.strip
|
41
|
+
title = title_el.first.content
|
42
|
+
text = node[3].content.strip
|
43
|
+
node = link.css('HBoxView > HBoxView > HBoxView')
|
44
|
+
rating = node.attr('alt').value
|
45
|
+
|
46
|
+
{
|
47
|
+
title: title,
|
48
|
+
name: name,
|
49
|
+
text: text,
|
50
|
+
rating: rating,
|
51
|
+
date: date,
|
52
|
+
}
|
53
|
+
end
|
54
|
+
|
55
|
+
def print_parse_error(e, link)
|
56
|
+
puts e
|
57
|
+
puts e.backtrace
|
58
|
+
puts "page: #{@page}"
|
59
|
+
puts link
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
@@ -0,0 +1,45 @@
|
|
1
|
+
require 'base_app_reviews'
|
2
|
+
require 'app_store_review_page'
|
3
|
+
require 'httpclient'
|
4
|
+
require 'open-uri'
|
5
|
+
|
6
|
+
class AppStoreReviews
|
7
|
+
include BaseAppReviews
|
8
|
+
|
9
|
+
def each
|
10
|
+
unless @list.nil?
|
11
|
+
return @list.each do |item|
|
12
|
+
yield item
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
@list = []
|
17
|
+
country_codes = [143441, 143466, 143463]
|
18
|
+
country_codes.each do |country_code|
|
19
|
+
get_reviews(country_code) do |review|
|
20
|
+
@list << review
|
21
|
+
yield review
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
private
|
27
|
+
def get_reviews(country)
|
28
|
+
(@start_page..@end_page).each do |page|
|
29
|
+
url = "http://phobos.apple.com/WebObjects/MZStore.woa/wa/viewContentsUserReviews?sortOrdering=4&onlyLatestVersion=false&sortAscending=true&pageNumber=#{(page - 1)}&type=Purple+Software&id=#{@app_id}"
|
30
|
+
|
31
|
+
f = open(url, "User-Agent" => "iTunes-iPhone/2.2 (2)", "X-Apple-Store-Front" => "#{country}-1")
|
32
|
+
# File.new('review.xml', 'w').puts f.read
|
33
|
+
# exit
|
34
|
+
review_page = AppStoreReviewPage.new f.read, page
|
35
|
+
break unless review_page.items do |item|
|
36
|
+
return false if Date.parse(item[:date]) < @from_date
|
37
|
+
yield item
|
38
|
+
true
|
39
|
+
end
|
40
|
+
|
41
|
+
break if review_page.last_page < page + 1
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
@@ -0,0 +1,30 @@
|
|
1
|
+
module BaseAppReviews
|
2
|
+
include Enumerable
|
3
|
+
|
4
|
+
def initialize(app_id)
|
5
|
+
@app_id = app_id
|
6
|
+
@list = nil
|
7
|
+
end
|
8
|
+
|
9
|
+
def set_last_date(date)
|
10
|
+
@last_date = date
|
11
|
+
end
|
12
|
+
|
13
|
+
def last_date
|
14
|
+
@last_date
|
15
|
+
end
|
16
|
+
|
17
|
+
def set_page(start_page, end_page)
|
18
|
+
@start_page = start_page
|
19
|
+
@end_page = end_page
|
20
|
+
end
|
21
|
+
|
22
|
+
def set_from_date(date)
|
23
|
+
@from_date = date
|
24
|
+
end
|
25
|
+
|
26
|
+
def last
|
27
|
+
@list.last
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
@@ -0,0 +1,51 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require 'rexml/document'
|
3
|
+
include REXML
|
4
|
+
|
5
|
+
class PlayStoreReviewPage
|
6
|
+
def initialize(text, page)
|
7
|
+
result = JSON(text[5..-1])
|
8
|
+
html = result['htmlContent'].gsub('<hr>', '<hr />')
|
9
|
+
@xml = Document.new("<body>#{html}</body>")
|
10
|
+
@page = page
|
11
|
+
end
|
12
|
+
|
13
|
+
def items
|
14
|
+
@xml.root.elements.each do |item|
|
15
|
+
next if item.elements.size < 1
|
16
|
+
begin
|
17
|
+
name_el = item.elements['span'].elements['strong']
|
18
|
+
next unless name_el
|
19
|
+
date = item.elements[2, 'span'].text.sub('님이', '').strip
|
20
|
+
|
21
|
+
title = item.elements['div'].elements['h4'].text.strip
|
22
|
+
if item.elements['p']
|
23
|
+
text = item.elements['p'].text
|
24
|
+
else
|
25
|
+
text = nil
|
26
|
+
end
|
27
|
+
text = text.strip if text
|
28
|
+
name = name_el.text.strip
|
29
|
+
rating = item.elements['div'].elements['div'].attribute('title').value.strip
|
30
|
+
review = {
|
31
|
+
title: title,
|
32
|
+
text: text,
|
33
|
+
name: name,
|
34
|
+
rating: rating,
|
35
|
+
date: date,
|
36
|
+
}
|
37
|
+
break unless yield review
|
38
|
+
rescue Exception => e
|
39
|
+
print_error e, item
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
def print_error(e, item)
|
45
|
+
puts e
|
46
|
+
puts e.backtrace
|
47
|
+
puts "page: #{@page}"
|
48
|
+
puts item
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
@@ -0,0 +1,38 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require 'httpclient'
|
3
|
+
require 'play_store_review_page'
|
4
|
+
|
5
|
+
class PlayStoreReviews
|
6
|
+
include BaseAppReviews
|
7
|
+
|
8
|
+
def each
|
9
|
+
unless @list.nil?
|
10
|
+
return @list.each do |item|
|
11
|
+
yield item
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
@list = []
|
16
|
+
url = "https://play.google.com/store/getreviews"
|
17
|
+
params = {
|
18
|
+
id: @app_id,
|
19
|
+
reviewSortOrder: 0,
|
20
|
+
reviewType: 1,
|
21
|
+
pageNum: 0,
|
22
|
+
}
|
23
|
+
|
24
|
+
(@start_page..@end_page).each do |page|
|
25
|
+
params[:pageNum] = page - 1
|
26
|
+
client = HTTPClient.new
|
27
|
+
content = client.post_content(url, params.to_query)
|
28
|
+
review_page = PlayStoreReviewPage.new content, page
|
29
|
+
break unless review_page.items do |item|
|
30
|
+
return false if Date.strptime(item[:date], '%Y년 %m월 %d일') < @from_date
|
31
|
+
@list << item
|
32
|
+
yield item
|
33
|
+
true
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
@@ -0,0 +1,40 @@
|
|
1
|
+
require 'rexml/document'
|
2
|
+
include REXML
|
3
|
+
|
4
|
+
class TstoreReviewPage
|
5
|
+
def initialize(content, page)
|
6
|
+
content = content.gsub(/<textarea[^<]+<\/textarea>/, '')
|
7
|
+
content = content.gsub('gif""', 'gif"')
|
8
|
+
xml = Document.new(content)
|
9
|
+
@table = xml.root.elements['body'].elements[2, 'form'].elements[2, 'div'].elements['table']
|
10
|
+
end
|
11
|
+
|
12
|
+
def items
|
13
|
+
@table.elements.each do |item|
|
14
|
+
begin
|
15
|
+
td = item.elements['td']
|
16
|
+
next unless td
|
17
|
+
date = td.elements['p'].elements['span'].text.strip
|
18
|
+
name = td.elements['p'].elements[2, 'strong'].text.strip
|
19
|
+
text = td.elements[2, 'p'].elements['span'].elements['div'].text.gsub(' ', ' ').strip
|
20
|
+
review = {
|
21
|
+
text: text,
|
22
|
+
name: name,
|
23
|
+
date: date,
|
24
|
+
}
|
25
|
+
break unless yield review
|
26
|
+
rescue Exception => e
|
27
|
+
print_error e, item
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
private
|
33
|
+
def print_error(e, item)
|
34
|
+
puts e
|
35
|
+
puts e.backtrace
|
36
|
+
puts "page: #{@page}"
|
37
|
+
puts item
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
@@ -0,0 +1,37 @@
|
|
1
|
+
require 'httpclient'
|
2
|
+
require 'tstore_review_page'
|
3
|
+
|
4
|
+
class TstoreReviews
|
5
|
+
include BaseAppReviews
|
6
|
+
|
7
|
+
def each
|
8
|
+
unless @list.nil?
|
9
|
+
return @list.each do |item|
|
10
|
+
yield item
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
@list = []
|
15
|
+
url = "http://www.tstore.co.kr/userpoc/multi/popReply.omp"
|
16
|
+
params = {
|
17
|
+
prodId: @app_id,
|
18
|
+
currentPage: 0,
|
19
|
+
flag: 'L',
|
20
|
+
replyType: 0,
|
21
|
+
}
|
22
|
+
|
23
|
+
(@start_page..@end_page).each do |page|
|
24
|
+
params[:currentPage] = page
|
25
|
+
client = HTTPClient.new
|
26
|
+
content = client.post_content(url, params.to_query)
|
27
|
+
review_page = TstoreReviewPage.new content, page
|
28
|
+
break unless review_page.items do |item|
|
29
|
+
return false if Date.strptime(item[:date], '%Y-%m-%d') < @from_date
|
30
|
+
@list << item
|
31
|
+
yield item
|
32
|
+
true
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
metadata
ADDED
@@ -0,0 +1,63 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: app-reviews
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Mu-ik Jeon
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2012-03-29 00:00:00.000000000Z
|
13
|
+
dependencies: []
|
14
|
+
description: Mobile App Review Crawler
|
15
|
+
email:
|
16
|
+
- muikor@gmail.com
|
17
|
+
executables:
|
18
|
+
- app-reviews-crawler
|
19
|
+
extensions: []
|
20
|
+
extra_rdoc_files: []
|
21
|
+
files:
|
22
|
+
- .gitignore
|
23
|
+
- Gemfile
|
24
|
+
- LICENSE
|
25
|
+
- README.md
|
26
|
+
- Rakefile
|
27
|
+
- app-reviews.gemspec
|
28
|
+
- bin/app-reviews-crawler
|
29
|
+
- lib/app-reviews-crawler.rb
|
30
|
+
- lib/app-reviews.rb
|
31
|
+
- lib/app-reviews/version.rb
|
32
|
+
- lib/app_store_review_page.rb
|
33
|
+
- lib/app_store_reviews.rb
|
34
|
+
- lib/base_app_reviews.rb
|
35
|
+
- lib/play_store_review_page.rb
|
36
|
+
- lib/play_store_reviews.rb
|
37
|
+
- lib/tstore_review_page.rb
|
38
|
+
- lib/tstore_reviews.rb
|
39
|
+
homepage: ''
|
40
|
+
licenses: []
|
41
|
+
post_install_message:
|
42
|
+
rdoc_options: []
|
43
|
+
require_paths:
|
44
|
+
- lib
|
45
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
46
|
+
none: false
|
47
|
+
requirements:
|
48
|
+
- - ! '>='
|
49
|
+
- !ruby/object:Gem::Version
|
50
|
+
version: '0'
|
51
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
52
|
+
none: false
|
53
|
+
requirements:
|
54
|
+
- - ! '>='
|
55
|
+
- !ruby/object:Gem::Version
|
56
|
+
version: '0'
|
57
|
+
requirements: []
|
58
|
+
rubyforge_project:
|
59
|
+
rubygems_version: 1.8.19
|
60
|
+
signing_key:
|
61
|
+
specification_version: 3
|
62
|
+
summary: mobile app review crawler
|
63
|
+
test_files: []
|