googleplay_dev_scraper 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +20 -0
- data/ChangeLog +40 -0
- data/Gemfile +3 -0
- data/README.ja.md +172 -0
- data/README.md +135 -0
- data/Rakefile +7 -0
- data/bin/googleplay_dev_scraper +122 -0
- data/dot.googleplay_dev_scraper +22 -0
- data/googleplay_dev_scraper.gemspec +25 -0
- data/lib/googleplay_dev_scraper.rb +4 -0
- data/lib/googleplay_dev_scraper/scraper.rb +172 -0
- data/lib/googleplay_dev_scraper/scraper_base.rb +66 -0
- data/lib/googleplay_dev_scraper/scraper_config.rb +71 -0
- data/lib/googleplay_dev_scraper/version.rb +3 -0
- data/spec/scraper_config_spec.rb +66 -0
- data/spec/scraper_spec.rb +39 -0
- data/spec/spec_helper.rb +34 -0
- metadata +121 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: b6aefe94f964073e9d527321688dc59f193ee6de
|
4
|
+
data.tar.gz: 816f696b633e74030b30d283db45e972cc064cf0
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 5450bde67ec8fe15a7574e7432f6153963da771f3ae6fdea3958709a488456313a35d8c42b4a8fee1c5ca92e54c98f9dda9c1e9097012926d33c858a35051ede
|
7
|
+
data.tar.gz: 6a82afc021d2a482db58286b4204d0234671512cf8a88d5e743eb425c2331dcd65cac04f9f0b830652453d88446cda2fe85cf70ec46e418cdd9287e834297fb7
|
data/.gitignore
ADDED
data/ChangeLog
ADDED
@@ -0,0 +1,40 @@
|
|
1
|
+
2013/7/18 : Takuya Murakami
|
2
|
+
|
3
|
+
* ver 1.0.0
|
4
|
+
* gem name changed to 'googleplay_dev_scraper'
|
5
|
+
* use DateTime class for some api
|
6
|
+
* remove google checkout specific apis (payouts, order_detail, auto_deliver)
|
7
|
+
|
8
|
+
2013/6/21 : Takuya Murakami
|
9
|
+
|
10
|
+
* add 'wallet_orders' command
|
11
|
+
|
12
|
+
2013/3/30 : Takuya Murakami
|
13
|
+
|
14
|
+
* ver 0.2.2
|
15
|
+
* Fixed URL for sales report for v2 API.
|
16
|
+
* Refactoring
|
17
|
+
|
18
|
+
2013/3/30 : Philipp.Sandhaus@cewecolor.de
|
19
|
+
|
20
|
+
* Fixed URL for app statistic download
|
21
|
+
|
22
|
+
2012/12/13 : Takuya Murakami
|
23
|
+
|
24
|
+
* ver 0.2.1
|
25
|
+
* Use YAML for configuration file format.
|
26
|
+
* Refactoring
|
27
|
+
|
28
|
+
2012/12/13 : Takuya Murakami
|
29
|
+
|
30
|
+
* ver 0.1.3
|
31
|
+
* Change project name : 'Google Play Scraper'
|
32
|
+
* Support gem
|
33
|
+
|
34
|
+
2012/12/13 : Takuya Murakami
|
35
|
+
|
36
|
+
* Use Bundler to install mechanize
|
37
|
+
|
38
|
+
2012/07/10 : Takuya Murakami
|
39
|
+
|
40
|
+
* Add support to download application statistics
|
data/Gemfile
ADDED
data/README.ja.md
ADDED
@@ -0,0 +1,172 @@
|
|
1
|
+
Android 開発者向け Google Play / Google Wallet Scraper
|
2
|
+
======================================================
|
3
|
+
|
4
|
+
はじめに
|
5
|
+
========
|
6
|
+
|
7
|
+
このツールは、Google Play デベロッパーコンソール、
|
8
|
+
および Google Wallet で提供される販売者向けの売上
|
9
|
+
レポートなどの CSV ファイルを自動でダウンロードするための
|
10
|
+
ツールです。
|
11
|
+
|
12
|
+
Google Play デベロッパーコンソールからは以下のものをダウンロードできます。
|
13
|
+
|
14
|
+
* 販売レポート
|
15
|
+
* 予想販売レポート
|
16
|
+
* アプリ統計情報
|
17
|
+
|
18
|
+
Google Wallet Merchant Center からは以下のものをダウンロードできます。
|
19
|
+
|
20
|
+
* オーダー一覧
|
21
|
+
|
22
|
+
売上の集計をするなり、経理システムにぶち込むなり、お好きにどうぞ。
|
23
|
+
|
24
|
+
|
25
|
+
必要システム
|
26
|
+
============
|
27
|
+
|
28
|
+
以下のものが必要です。
|
29
|
+
|
30
|
+
* Ruby 1.9.3以上
|
31
|
+
* RubyGems
|
32
|
+
|
33
|
+
以下のようにしてインストールします。
|
34
|
+
|
35
|
+
$ gem install googleplay_dev_scraper
|
36
|
+
|
37
|
+
|
38
|
+
設定
|
39
|
+
====
|
40
|
+
|
41
|
+
設定ファイルを ~/.googleplay_dev_scraper に YAML フォーマットで作成してください。
|
42
|
+
以下にサンプルを示します。
|
43
|
+
|
44
|
+
Google Play メールアドレスとパスワード、デベロッパIDを設定してください。
|
45
|
+
(素のパスワードを設定するのでアクセス権には注意)
|
46
|
+
|
47
|
+
デベロッパID は、developer console にログインした後の URL 末尾の
|
48
|
+
dev_acc=... の数字です。
|
49
|
+
|
50
|
+
```
|
51
|
+
# GooglePlay dev scraper config file sample (YAML format)
|
52
|
+
#
|
53
|
+
# Place this content to your ~/.googleplay_dev_scraper or
|
54
|
+
# ./.googleplay_dev_scraper.
|
55
|
+
#
|
56
|
+
# WARNING: This file contains password, be careful
|
57
|
+
# of file permission.
|
58
|
+
|
59
|
+
# Your E-mail address to login google play
|
60
|
+
email: foo@example.com
|
61
|
+
|
62
|
+
# Your password to login google play
|
63
|
+
password: "Your Password"
|
64
|
+
|
65
|
+
# Developer account ID
|
66
|
+
# You can find your developer account ID in the URL
|
67
|
+
# after 'dev_acc=...' when login the developer console.
|
68
|
+
dev_acc: "12345678901234567890"
|
69
|
+
|
70
|
+
# Proxy host and port number (if needed)
|
71
|
+
#proxy_host: proxy.example.com
|
72
|
+
#proxy_port: 8080
|
73
|
+
```
|
74
|
+
|
75
|
+
なお、設定値はコマンドラインで与えることもできます。詳細は
|
76
|
+
--help オプションで確認してください。
|
77
|
+
|
78
|
+
|
79
|
+
使い方
|
80
|
+
======
|
81
|
+
|
82
|
+
売上レポート取得
|
83
|
+
----------------
|
84
|
+
|
85
|
+
2011年10月の売上を取得する場合は以下のようにします。
|
86
|
+
結果は標準出力に出力されます。
|
87
|
+
|
88
|
+
$ googleplay_dev_scraper sales 2011 10
|
89
|
+
|
90
|
+
また推定売上レポートもダウンロードできます。
|
91
|
+
|
92
|
+
$ googleplay_dev_scraper estimated 2011 10
|
93
|
+
|
94
|
+
|
95
|
+
オーダー一覧取得
|
96
|
+
----------------
|
97
|
+
|
98
|
+
オーダーの一覧を取得します。
|
99
|
+
開始日と終了日を指定します。時刻は日本時間で指定。
|
100
|
+
|
101
|
+
$ googleplay_dev_scraper orders "2011-08-01 00:00:00" "2011-09-30 23:59:59"
|
102
|
+
|
103
|
+
|
104
|
+
アプリケーション統計情報取得
|
105
|
+
----------------------------
|
106
|
+
|
107
|
+
Developer Console の統計情報 CSV エクスポートと同じものを得ます。
|
108
|
+
対象となるアプリのパッケージ名と、開始日/終了日を指定してください。
|
109
|
+
|
110
|
+
$ googleplay_dev_scraper appstats your.package.name 20120101 20120630 > stat.zip
|
111
|
+
|
112
|
+
ZIP ファイルが標準出力に出力されるので、リダイレクトでファイルに
|
113
|
+
落としてください。
|
114
|
+
|
115
|
+
|
116
|
+
API の利用
|
117
|
+
==========
|
118
|
+
|
119
|
+
例:
|
120
|
+
|
121
|
+
```
|
122
|
+
require 'googleplay_dev_scraper'
|
123
|
+
|
124
|
+
scraper = GooglePlayDevScraper::Scraper.new
|
125
|
+
|
126
|
+
# set config (Note: config file is not read via API access)
|
127
|
+
scraper.config.email = "foo@example.com"
|
128
|
+
scraper.config.password = "YOUR_PASSWORD"
|
129
|
+
scraper.config.dev_acc = "1234567890"
|
130
|
+
|
131
|
+
# get sales report / estimated sales report
|
132
|
+
puts scraper.get_sales_report(2012, 11)
|
133
|
+
puts scraper.get_estimated_sales_report(2012, 12)
|
134
|
+
|
135
|
+
# get orders
|
136
|
+
puts scraper.get_order_list(DateTime.parse("2012-11-01"), DateTime.parse("2012-11-30"))
|
137
|
+
```
|
138
|
+
|
139
|
+
内部動作とか
|
140
|
+
============
|
141
|
+
|
142
|
+
Mechanize を使って Web サイトに自動アクセスし、フォームを叩いて
|
143
|
+
CSV を入手するだけです。
|
144
|
+
|
145
|
+
本体は scraper.rb です。ソース見れば何やってるかはわかると思います。
|
146
|
+
Rails アプリの中で使うとか、お好きにどうぞ。
|
147
|
+
|
148
|
+
|
149
|
+
ライセンス
|
150
|
+
==========
|
151
|
+
|
152
|
+
Public domain 扱いとします。
|
153
|
+
|
154
|
+
|
155
|
+
免責事項
|
156
|
+
========
|
157
|
+
|
158
|
+
* 無保証です。
|
159
|
+
* Google 側のサイトの作りが変わったら当然動作しなくなります。
|
160
|
+
* Google から怒られても責任は取りません。
|
161
|
+
* 動かなくても文句言わない。自分で直すように。
|
162
|
+
* 直したら修正を送るなり pull request するなりしてくれると嬉しい。
|
163
|
+
|
164
|
+
|
165
|
+
ひとりごと
|
166
|
+
==========
|
167
|
+
|
168
|
+
* Google さん、Android 向けの Google Wallet API (オーダー一覧とか)解放してくれるとすごく嬉しいのですが、、、
|
169
|
+
|
170
|
+
---
|
171
|
+
'13/7/18
|
172
|
+
Takuya Murakami, E-mail: tmurakam at tmurakam.org
|
data/README.md
ADDED
@@ -0,0 +1,135 @@
|
|
1
|
+
Google Play / Google Wallet Scraper for Android Developers
|
2
|
+
==========================================================
|
3
|
+
|
4
|
+
Introduction
|
5
|
+
============
|
6
|
+
|
7
|
+
This tool is designed to download CSV report files from
|
8
|
+
Google Play developer console and google wallet.
|
9
|
+
|
10
|
+
It can download following CSV files from Google Play
|
11
|
+
developer console
|
12
|
+
|
13
|
+
* Sales report (monthly report)
|
14
|
+
* Estimateds sales report
|
15
|
+
* Application statistics
|
16
|
+
|
17
|
+
It can download following CSV files from Google Wallet.
|
18
|
+
|
19
|
+
* Order list (almost realtime)
|
20
|
+
|
21
|
+
You don't need any merchant key, because this tool scrapes
|
22
|
+
google play / wallet website.
|
23
|
+
|
24
|
+
Requirements/Installation
|
25
|
+
=========================
|
26
|
+
|
27
|
+
* Ruby >=1.9.3
|
28
|
+
* RubyGems
|
29
|
+
|
30
|
+
To install:
|
31
|
+
|
32
|
+
$ gem install googleplay_dev_scraper
|
33
|
+
|
34
|
+
Configuration
|
35
|
+
=============
|
36
|
+
|
37
|
+
Create configuration file at ~/.googleplay_dev_scraper,
|
38
|
+
or ./.googleplay_dev_scraper in YAML format.
|
39
|
+
|
40
|
+
```
|
41
|
+
# GooglePlay scraper config file sample (YAML format)
|
42
|
+
#
|
43
|
+
# Place this content to your ~/.googleplay_dev_scraper or
|
44
|
+
# ./.googleplay_dev_scraper.
|
45
|
+
#
|
46
|
+
# WARNING: This file contains password, be careful
|
47
|
+
# of file permission.
|
48
|
+
|
49
|
+
# Your E-mail address to login google play
|
50
|
+
email: foo@example.com
|
51
|
+
|
52
|
+
# Your password to login google play
|
53
|
+
password: "Your Password"
|
54
|
+
|
55
|
+
# Developer account ID
|
56
|
+
# You can find your developer account ID in the URL
|
57
|
+
# after 'dev_acc=...' when login the developer console.
|
58
|
+
dev_acc: "12345678901234567890"
|
59
|
+
|
60
|
+
# Proxy host and port number (if needed)
|
61
|
+
#proxy_host: proxy.example.com
|
62
|
+
#proxy_port: 8080
|
63
|
+
```
|
64
|
+
|
65
|
+
You can specify configuration parameters with command line
|
66
|
+
options. See details with --help option.
|
67
|
+
|
68
|
+
How to use
|
69
|
+
==========
|
70
|
+
|
71
|
+
Get sales report
|
72
|
+
----------------
|
73
|
+
|
74
|
+
To download sales report for October 2011:
|
75
|
+
|
76
|
+
$ googleplay_dev_scraper sales 2011 10
|
77
|
+
|
78
|
+
Or you can download estimated report too:
|
79
|
+
|
80
|
+
$ googleplay_dev_scraper estimated 2011 10
|
81
|
+
|
82
|
+
Get order report
|
83
|
+
----------------
|
84
|
+
|
85
|
+
To download order report, specify start and end time as:
|
86
|
+
|
87
|
+
$ googleplay_dev_scraper orders "2011-08-01 00:00:00" "2011-09-30 23:59:59"
|
88
|
+
|
89
|
+
Get application statistics
|
90
|
+
--------------------------
|
91
|
+
|
92
|
+
Export application statistics in CSV format.
|
93
|
+
Specify application package name and start/end date.
|
94
|
+
|
95
|
+
$ googleplay_dev_scraper appstats your.package.name 20120101 20120630 > stat.zip
|
96
|
+
|
97
|
+
Note: You must redirect output to zip file!
|
98
|
+
|
99
|
+
API usage
|
100
|
+
=========
|
101
|
+
|
102
|
+
Example:
|
103
|
+
|
104
|
+
```
|
105
|
+
require 'googleplay_dev_scraper'
|
106
|
+
|
107
|
+
scraper = GooglePlayDevScraper::Scraper.new
|
108
|
+
|
109
|
+
# set config (Note: config file is not read via API access)
|
110
|
+
scraper.config.email = "foo@example.com"
|
111
|
+
scraper.config.password = "YOUR_PASSWORD"
|
112
|
+
scraper.config.dev_acc = "1234567890"
|
113
|
+
|
114
|
+
# get sales report / estimated sales report
|
115
|
+
puts scraper.get_sales_report(2012, 11)
|
116
|
+
puts scraper.get_estimated_sales_report(2012, 12)
|
117
|
+
|
118
|
+
# get orders
|
119
|
+
puts scraper.get_order_list(DateTime.parse("2012-11-01 00:00:00", DateTime.parse("2012-11-30T23:59:59"))
|
120
|
+
```
|
121
|
+
|
122
|
+
License
|
123
|
+
=======
|
124
|
+
|
125
|
+
Public domain
|
126
|
+
|
127
|
+
|
128
|
+
Disclaimer
|
129
|
+
==========
|
130
|
+
|
131
|
+
* NO WARRANTY
|
132
|
+
|
133
|
+
---
|
134
|
+
'13/7/18
|
135
|
+
Takuya Murakami, E-mail: tmurakam at tmurakam.org
|
data/Rakefile
ADDED
@@ -0,0 +1,122 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# -*- coding: utf-8 -*-
|
3
|
+
|
4
|
+
# design time...
|
5
|
+
lib = File.expand_path('../lib', File.dirname(__FILE__))
|
6
|
+
$LOAD_PATH.unshift(lib)
|
7
|
+
|
8
|
+
require 'optparse'
|
9
|
+
require 'googleplay_dev_scraper'
|
10
|
+
|
11
|
+
Version = GooglePlayDevScraper::VERSION
|
12
|
+
|
13
|
+
module GooglePlayDevScraper
|
14
|
+
class Tool
|
15
|
+
def initialize
|
16
|
+
@scraper = Scraper.new
|
17
|
+
|
18
|
+
@show_details = false
|
19
|
+
@auto_archive = false
|
20
|
+
end
|
21
|
+
|
22
|
+
def main
|
23
|
+
@scraper.config.load_config
|
24
|
+
parse_arguments
|
25
|
+
|
26
|
+
usage if ARGV.size < 1
|
27
|
+
|
28
|
+
command = ARGV.shift
|
29
|
+
case command
|
30
|
+
when "sales"
|
31
|
+
sales
|
32
|
+
when "estimated"
|
33
|
+
estimated
|
34
|
+
when "orders"
|
35
|
+
orders
|
36
|
+
when "appstats"
|
37
|
+
appstats
|
38
|
+
when "wallet_orders"
|
39
|
+
wallet_orders
|
40
|
+
else
|
41
|
+
usage
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
def usage
|
46
|
+
STDERR.puts "Usage: #{$0} [options] command [arguments...]"
|
47
|
+
STDERR.puts " #{$0} --help"
|
48
|
+
STDERR.puts " Commands:"
|
49
|
+
STDERR.puts " sales <year> <month> Get monthly sales report"
|
50
|
+
STDERR.puts " estimated <year> <month> Get estimated sales report"
|
51
|
+
STDERR.puts " orders <start_date> <end_date> Get order list"
|
52
|
+
STDERR.puts " appstats <package_name> <startDay> <endDay> Get app stats (in zip file)"
|
53
|
+
exit 1
|
54
|
+
end
|
55
|
+
|
56
|
+
def parse_arguments
|
57
|
+
config = @scraper.config
|
58
|
+
|
59
|
+
opts = OptionParser.new do |opt|
|
60
|
+
opt.on('-u email', 'set email address') {|v| config.email = v}
|
61
|
+
opt.on('-p password', 'set password') {|v| config.password = v}
|
62
|
+
opt.on('-a dev_acc', 'set dev_acc') {|v| config.dev_acc = v}
|
63
|
+
|
64
|
+
opt.on('-P host:port', 'Set HTTP proxy/port') {|v|
|
65
|
+
a = v.split(/:/)
|
66
|
+
config.proxy_host = a[0]
|
67
|
+
config.proxy_port = a[1]
|
68
|
+
}
|
69
|
+
opt.on('-v', '--version', 'Show version') {
|
70
|
+
puts Version
|
71
|
+
exit 0
|
72
|
+
}
|
73
|
+
opt.parse!(ARGV)
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
def sales
|
78
|
+
usage if (ARGV.size < 2)
|
79
|
+
|
80
|
+
year = ARGV[0]
|
81
|
+
month = ARGV[1]
|
82
|
+
|
83
|
+
puts @scraper.get_sales_report(year, month)
|
84
|
+
end
|
85
|
+
|
86
|
+
def estimated
|
87
|
+
usage if (ARGV.size < 2)
|
88
|
+
|
89
|
+
year = ARGV[0]
|
90
|
+
month = ARGV[1]
|
91
|
+
|
92
|
+
puts @scraper.get_estimated_sales_report(year, month)
|
93
|
+
end
|
94
|
+
|
95
|
+
# get daily orders
|
96
|
+
def orders
|
97
|
+
usage if (ARGV.size < 2)
|
98
|
+
|
99
|
+
startdate = DateTime.parse(ARGV[0])
|
100
|
+
enddate = DateTime.parse(ARGV[1])
|
101
|
+
|
102
|
+
puts @scraper.get_order_list(startdate, enddate)
|
103
|
+
end
|
104
|
+
|
105
|
+
def wallet_orders
|
106
|
+
puts @scraper.get_wallet_orders
|
107
|
+
end
|
108
|
+
|
109
|
+
def appstats
|
110
|
+
usage if ARGV.size < 3
|
111
|
+
package = ARGV[0]
|
112
|
+
startDay = ARGV[1]
|
113
|
+
endDay = ARGV[2]
|
114
|
+
|
115
|
+
puts @scraper.get_appstats(package, startDay, endDay)
|
116
|
+
end
|
117
|
+
end
|
118
|
+
end
|
119
|
+
|
120
|
+
tool = GooglePlayDevScraper::Tool.new
|
121
|
+
tool.main
|
122
|
+
|
@@ -0,0 +1,22 @@
|
|
1
|
+
# GooglePlay scraper config file sample (YAML format)
|
2
|
+
#
|
3
|
+
# Place this content to your ~/.googleplay_dev_scraper or
|
4
|
+
# ./.googleplay_dev_scraper.
|
5
|
+
#
|
6
|
+
# WARNING: This file contains password, be careful
|
7
|
+
# of file permission.
|
8
|
+
|
9
|
+
# Your E-mail address to login google play
|
10
|
+
email: foo@example.com
|
11
|
+
|
12
|
+
# Your password to login google play
|
13
|
+
password: "Your Password"
|
14
|
+
|
15
|
+
# Developer account ID
|
16
|
+
# You can find your developer account ID in the URL
|
17
|
+
# after 'dev_acc=...' when login the developer console.
|
18
|
+
dev_acc: "12345678901234567890"
|
19
|
+
|
20
|
+
# Proxy host and port number (if needed)
|
21
|
+
#proxy_host: proxy.example.com
|
22
|
+
#proxy_port: 8080
|
@@ -0,0 +1,25 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'googleplay_dev_scraper/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |gem|
|
7
|
+
gem.name = "googleplay_dev_scraper"
|
8
|
+
gem.version = GooglePlayDevScraper::VERSION
|
9
|
+
gem.authors = ["Takuya Murakami"]
|
10
|
+
gem.email = ["tmurakam@tmurakam.org"]
|
11
|
+
gem.description = %q{Scraping and download CSV data from Google Play developer console and Google Wallet.}
|
12
|
+
gem.summary = %q{Scraper for Google Play developer console and Google wallet}
|
13
|
+
gem.homepage = "https://github.com/tmurakam/googleplay_dev_scraper"
|
14
|
+
|
15
|
+
gem.files = `git ls-files`.split($/)
|
16
|
+
gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
|
17
|
+
gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
|
18
|
+
gem.require_paths = ["lib"]
|
19
|
+
|
20
|
+
gem.add_dependency('mechanize', '>= 2.5.0')
|
21
|
+
|
22
|
+
gem.add_development_dependency 'rspec'
|
23
|
+
gem.add_development_dependency 'rake'
|
24
|
+
gem.add_development_dependency 'rdoc'
|
25
|
+
end
|
@@ -0,0 +1,172 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# -*- coding: utf-8 -*-
|
3
|
+
|
4
|
+
# = GooglePlay Scraper
|
5
|
+
# Author:: Takuya Murakami
|
6
|
+
# License:: Public domain
|
7
|
+
|
8
|
+
require 'mechanize'
|
9
|
+
require 'csv'
|
10
|
+
require 'yaml'
|
11
|
+
require 'date'
|
12
|
+
|
13
|
+
module GooglePlayDevScraper
|
14
|
+
#
|
15
|
+
# Google Play and google checkout scraper
|
16
|
+
#
|
17
|
+
class Scraper < ScraperBase
|
18
|
+
|
19
|
+
def initialize
|
20
|
+
super
|
21
|
+
end
|
22
|
+
|
23
|
+
def body_string
|
24
|
+
@agent.page.body.force_encoding("UTF-8")
|
25
|
+
end
|
26
|
+
|
27
|
+
# Get sales report (report_type = payout_report)
|
28
|
+
# [year]
|
29
|
+
# Year (ex. 2012)
|
30
|
+
# [month]
|
31
|
+
# Month (1 - 12)
|
32
|
+
# [Return]
|
33
|
+
# CSV string
|
34
|
+
#
|
35
|
+
def get_sales_report(year, month)
|
36
|
+
#url = sprintf('https://play.google.com/apps/publish/salesreport/download?report_date=%04d_%02d&report_type=payout_report&dev_acc=%s', year, month, @config.dev_acc)
|
37
|
+
url = sprintf('https://play.google.com/apps/publish/v2/salesreport/download?report_date=%04d_%02d&report_type=payout_report&dev_acc=%s', year, month, @config.dev_acc)
|
38
|
+
try_get(url)
|
39
|
+
|
40
|
+
body_string
|
41
|
+
end
|
42
|
+
|
43
|
+
# Get estimated sales report (report_type = sales_report)
|
44
|
+
#
|
45
|
+
# [year]
|
46
|
+
# Year (ex. 2012)
|
47
|
+
# [month]
|
48
|
+
# Month (1 - 12)
|
49
|
+
# [Return]
|
50
|
+
# CSV string
|
51
|
+
#
|
52
|
+
def get_estimated_sales_report(year, month)
|
53
|
+
#https://play.google.com/apps/publish/v2/salesreport/download?report_date=2013_03&report_type=sales_report&dev_acc=09924472108471074593
|
54
|
+
url = sprintf('https://play.google.com/apps/publish/v2/salesreport/download?report_date=%04d_%02d&report_type=sales_report&dev_acc=%s', year, month, @config.dev_acc)
|
55
|
+
try_get(url)
|
56
|
+
|
57
|
+
body_string
|
58
|
+
end
|
59
|
+
|
60
|
+
# Get order list
|
61
|
+
#
|
62
|
+
# [start_date]
|
63
|
+
# start time (DateTime)
|
64
|
+
# [end_date]
|
65
|
+
# end time (DateTime)
|
66
|
+
# [Return]
|
67
|
+
# CSV string
|
68
|
+
def get_order_list(start_time, end_time)
|
69
|
+
# unix time in ms
|
70
|
+
start_ut = start_time.to_time.to_i * 1000
|
71
|
+
end_ut = end_time.to_time.to_i * 1000
|
72
|
+
|
73
|
+
try_get("https://wallet.google.com/merchant/pages/")
|
74
|
+
if @agent.page.uri.path =~ /(bcid-[^\/]+)\/(oid-[^\/]+)\/(cid-[^\/]+)\//
|
75
|
+
bcid = $1
|
76
|
+
oid = $2
|
77
|
+
cid = $3
|
78
|
+
|
79
|
+
# You can check the URL with your browser.
|
80
|
+
# (download csv file, and check download history with chrome/firefox)
|
81
|
+
try_get("https://wallet.google.com/merchant/pages/" +
|
82
|
+
bcid + "/" + oid + "/" + cid +
|
83
|
+
"/purchaseorderdownload?startTime=#{start_ut}" +
|
84
|
+
"&endTime=#{end_ut}")
|
85
|
+
body_string
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
89
|
+
# Get application statistics CSV in zip
|
90
|
+
#
|
91
|
+
# [package]
|
92
|
+
# package name
|
93
|
+
# [start_day]
|
94
|
+
# start date (yyyyMMdd)
|
95
|
+
# [end_day]
|
96
|
+
# end date (yyyyMMdd)
|
97
|
+
# [Return]
|
98
|
+
# application statics zip data
|
99
|
+
#
|
100
|
+
def get_appstats(package, start_day, end_day)
|
101
|
+
dim = "overall,country,language,os_version,device,app_version,carrier&met=active_device_installs,daily_device_installs,daily_device_uninstalls,daily_device_upgrades,active_user_installs,total_user_installs,daily_user_installs,daily_user_uninstalls,daily_avg_rating,total_avg_rating"
|
102
|
+
url = "https://play.google.com/apps/publish/v2/statistics/download"
|
103
|
+
url += "?package=#{package}"
|
104
|
+
url += "&sd=#{start_day}&ed=#{end_day}"
|
105
|
+
url += "&dim=#{dim}"
|
106
|
+
#url += "&dev_acc=#{@config.dev_acc}"
|
107
|
+
|
108
|
+
STDERR.puts "URL = #{url}"
|
109
|
+
try_get(url)
|
110
|
+
@agent.page.body
|
111
|
+
end
|
112
|
+
|
113
|
+
# dump CSV (util)
|
114
|
+
def dump_csv(csv_string)
|
115
|
+
headers = nil
|
116
|
+
CSV.parse(csv_string) do |row|
|
117
|
+
unless headers
|
118
|
+
headers = row
|
119
|
+
next
|
120
|
+
end
|
121
|
+
|
122
|
+
i = 0
|
123
|
+
row.each do |column|
|
124
|
+
puts "#{headers[i]} : #{column}"
|
125
|
+
i = i + 1
|
126
|
+
end
|
127
|
+
puts
|
128
|
+
end
|
129
|
+
end
|
130
|
+
|
131
|
+
#
|
132
|
+
# Get order list from wallet html page
|
133
|
+
#
|
134
|
+
def get_wallet_orders
|
135
|
+
try_get("https://wallet.google.com/merchant/pages/")
|
136
|
+
html = body_string
|
137
|
+
|
138
|
+
doc = Nokogiri::HTML(html)
|
139
|
+
|
140
|
+
#doc.xpath("//table[@id='purchaseOrderListTable']")
|
141
|
+
|
142
|
+
result = ""
|
143
|
+
|
144
|
+
doc.xpath("//tr[@class='orderRow']").each do |e|
|
145
|
+
order_id = e['id']
|
146
|
+
|
147
|
+
date = nil
|
148
|
+
desc = nil
|
149
|
+
total = nil
|
150
|
+
status = nil
|
151
|
+
|
152
|
+
e.children.each do |e2|
|
153
|
+
case e2['class']
|
154
|
+
when /wallet-date-column/
|
155
|
+
date = e2.content
|
156
|
+
when /wallet-description-column/
|
157
|
+
desc = e2.content
|
158
|
+
when /wallet-total-column/
|
159
|
+
total = e2.content
|
160
|
+
when /wallet-status-column/
|
161
|
+
e3 = e2.children.first
|
162
|
+
status = e3['title'] unless e3.nil?
|
163
|
+
end
|
164
|
+
end
|
165
|
+
|
166
|
+
result += [order_id, date, desc, status, total].join(",") + "\n"
|
167
|
+
end
|
168
|
+
|
169
|
+
result
|
170
|
+
end
|
171
|
+
end
|
172
|
+
end
|
@@ -0,0 +1,66 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# -*- coding: utf-8 -*-
|
3
|
+
|
4
|
+
# = GooglePlay Scraper
|
5
|
+
# Author:: Takuya Murakami
|
6
|
+
# License:: Public domain
|
7
|
+
|
8
|
+
require 'mechanize'
|
9
|
+
require 'csv'
|
10
|
+
require 'yaml'
|
11
|
+
|
12
|
+
module GooglePlayDevScraper
|
13
|
+
class ScraperBase
|
14
|
+
attr_accessor :agent
|
15
|
+
attr_accessor :config
|
16
|
+
|
17
|
+
def initialize
|
18
|
+
@agent = nil
|
19
|
+
@config = ScraperConfig.new
|
20
|
+
end
|
21
|
+
|
22
|
+
def setup
|
23
|
+
#Mechanize.log = Logger.new("mechanize.log")
|
24
|
+
#Mechanize.log.level = Logger::INFO
|
25
|
+
|
26
|
+
unless @agent
|
27
|
+
@agent = Mechanize.new
|
28
|
+
end
|
29
|
+
if @config.proxy_host && @config.proxy_host.length >= 1
|
30
|
+
@agent.set_proxy(@config.proxy_host, @config.proxy_port)
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
def try_get(url)
|
35
|
+
unless @agent
|
36
|
+
setup
|
37
|
+
end
|
38
|
+
|
39
|
+
# try to get
|
40
|
+
@agent.get(url)
|
41
|
+
|
42
|
+
# login needed?
|
43
|
+
if @agent.page.uri.host != "accounts.google.com" || @agent.page.uri.path != "/ServiceLogin"
|
44
|
+
# already login-ed
|
45
|
+
return
|
46
|
+
end
|
47
|
+
|
48
|
+
# do login
|
49
|
+
form = @agent.page.forms.find {|f| f.form_node['id'] == "gaia_loginform"}
|
50
|
+
unless form
|
51
|
+
raise 'No login form'
|
52
|
+
end
|
53
|
+
form.field_with(:name => "Email").value = @config.email
|
54
|
+
form.field_with(:name => "Passwd").value = @config.password
|
55
|
+
form.click_button
|
56
|
+
|
57
|
+
if @agent.page.uri.host == "accounts.google.com"
|
58
|
+
STDERR.puts "login failed? : uri = " + @agent.page.uri.to_s
|
59
|
+
raise 'Google login failed'
|
60
|
+
end
|
61
|
+
|
62
|
+
# retry get
|
63
|
+
@agent.get(url)
|
64
|
+
end
|
65
|
+
end
|
66
|
+
end
|
@@ -0,0 +1,71 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# -*- coding: utf-8 -*-
|
3
|
+
|
4
|
+
# = GooglePlay dev Scraper
|
5
|
+
# Author:: Takuya Murakami
|
6
|
+
# License:: Public domain
|
7
|
+
|
8
|
+
require 'mechanize'
|
9
|
+
require 'csv'
|
10
|
+
require 'yaml'
|
11
|
+
|
12
|
+
module GooglePlayDevScraper
|
13
|
+
#
|
14
|
+
# Configurations
|
15
|
+
#
|
16
|
+
class ScraperConfig
|
17
|
+
# Google account
|
18
|
+
attr_accessor :email
|
19
|
+
|
20
|
+
# Password to login google account
|
21
|
+
attr_accessor :password
|
22
|
+
|
23
|
+
# developer account ID
|
24
|
+
attr_accessor :dev_acc
|
25
|
+
|
26
|
+
# HTTP proxy host
|
27
|
+
attr_accessor :proxy_host
|
28
|
+
|
29
|
+
# HTTP proxy port
|
30
|
+
attr_accessor :proxy_port
|
31
|
+
|
32
|
+
def initialize
|
33
|
+
@dev_acc = nil
|
34
|
+
end
|
35
|
+
|
36
|
+
def load_config(path = nil)
|
37
|
+
config_files = [ path, ".googleplay_dev_scraper", "#{ENV['HOME']}/.googleplay_dev_scraper" ]
|
38
|
+
|
39
|
+
config_files.each do |file|
|
40
|
+
load_config_file(file)
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
def load_config_file(file)
|
45
|
+
if file && File.exists?(file)
|
46
|
+
open(file) do |f|
|
47
|
+
begin
|
48
|
+
read_config(f.read)
|
49
|
+
rescue Psych::SyntaxError => e
|
50
|
+
STDERR.puts "Error: configuration file syntax: #{file}"
|
51
|
+
exit 1
|
52
|
+
rescue
|
53
|
+
STDERR.puts "Error: load configuration file: #{file}"
|
54
|
+
exit 1
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
def read_config(data)
|
61
|
+
h = YAML.load(data)
|
62
|
+
if h
|
63
|
+
@email ||= h['email']
|
64
|
+
@password ||= h['password']
|
65
|
+
@dev_acc ||= h['dev_acc']
|
66
|
+
@proxy_host ||= h['proxy_host']
|
67
|
+
@proxy_port ||= h['proxy_port']
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
@@ -0,0 +1,66 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
require 'spec_helper'
|
3
|
+
|
4
|
+
describe GooglePlayDevScraper::ScraperConfig do
|
5
|
+
before do
|
6
|
+
@config = GooglePlayDevScraper::ScraperConfig.new
|
7
|
+
end
|
8
|
+
|
9
|
+
context "read_config" do
|
10
|
+
it "read empty config" do
|
11
|
+
yaml = <<EOF
|
12
|
+
# no data
|
13
|
+
EOF
|
14
|
+
@config.read_config(yaml)
|
15
|
+
|
16
|
+
@config.email.should be_nil
|
17
|
+
@config.password.should be_nil
|
18
|
+
@config.dev_acc.should be_nil
|
19
|
+
@config.proxy_host.should be_nil
|
20
|
+
@config.proxy_port.should be_nil
|
21
|
+
end
|
22
|
+
|
23
|
+
it "read normal config" do
|
24
|
+
yaml = <<EOF
|
25
|
+
email: EMAIL
|
26
|
+
password: PASSWORD
|
27
|
+
dev_acc: DEV_ACC
|
28
|
+
proxy_host: PROXY_HOST
|
29
|
+
proxy_port: PROXY_PORT
|
30
|
+
EOF
|
31
|
+
@config.read_config(yaml)
|
32
|
+
|
33
|
+
@config.email.should == "EMAIL"
|
34
|
+
@config.password.should == "PASSWORD"
|
35
|
+
@config.dev_acc.should == "DEV_ACC"
|
36
|
+
@config.proxy_host.should == "PROXY_HOST"
|
37
|
+
@config.proxy_port.should == "PROXY_PORT"
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
context "load_config" do
|
42
|
+
before do
|
43
|
+
# make mock
|
44
|
+
def @config.load_config_file(file)
|
45
|
+
@config_files ||= Array.new
|
46
|
+
@config_files.push(file)
|
47
|
+
end
|
48
|
+
|
49
|
+
def @config.config_files
|
50
|
+
@config_files
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
it "without path" do
|
55
|
+
@config.load_config
|
56
|
+
a = @config.config_files.should ==
|
57
|
+
[ nil, ".googleplay_dev_scraper", ENV['HOME'] + "/.googleplay_dev_scraper"]
|
58
|
+
end
|
59
|
+
|
60
|
+
it "with path" do
|
61
|
+
@config.load_config("/some/path")
|
62
|
+
a = @config.config_files.should ==
|
63
|
+
[ "/some/path", ".googleplay_dev_scraper", ENV['HOME'] + "/.googleplay_dev_scraper"]
|
64
|
+
end
|
65
|
+
end
|
66
|
+
end
|
@@ -0,0 +1,39 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
require 'spec_helper'
|
3
|
+
|
4
|
+
describe GooglePlayDevScraper::Scraper do
|
5
|
+
before do
|
6
|
+
@scraper = ScraperMock.new
|
7
|
+
|
8
|
+
@dev_acc = "1234567890"
|
9
|
+
@scraper.config.dev_acc = @dev_acc
|
10
|
+
end
|
11
|
+
|
12
|
+
context "Setup" do
|
13
|
+
it "setup without proxy" do
|
14
|
+
@scraper.setup
|
15
|
+
|
16
|
+
@scraper.agent.proxy_addr.should be_nil
|
17
|
+
@scraper.agent.proxy_port.should be_nil
|
18
|
+
end
|
19
|
+
|
20
|
+
it "setup with proxy" do
|
21
|
+
@scraper.config.proxy_host = "proxy.example.com"
|
22
|
+
@scraper.config.proxy_port = 12345
|
23
|
+
|
24
|
+
@scraper.setup
|
25
|
+
|
26
|
+
@scraper.agent.proxy_addr.should == "proxy.example.com"
|
27
|
+
@scraper.agent.proxy_port.should == 12345
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
context "get sales report" do
|
32
|
+
it "normal access" do
|
33
|
+
@scraper.get_sales_report(2012, 11)
|
34
|
+
@scraper.accessed_url.should == "https://play.google.com/apps/publish/v2/salesreport/download?report_date=2012_11&report_type=payout_report&dev_acc=#{@dev_acc}"
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
end
|
39
|
+
|
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1,34 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
require 'rubygems'
|
3
|
+
require 'googleplay_dev_scraper'
|
4
|
+
|
5
|
+
class ScraperMock < GooglePlayDevScraper::Scraper
|
6
|
+
attr_reader :accessed_url
|
7
|
+
|
8
|
+
def initialize
|
9
|
+
super
|
10
|
+
@agent = MechanizeMock.new
|
11
|
+
end
|
12
|
+
|
13
|
+
def try_get(url)
|
14
|
+
setup
|
15
|
+
@accessed_url = url
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
class MechanizeMock < Mechanize
|
20
|
+
def initialize
|
21
|
+
super
|
22
|
+
@page_mock = PageMock.new
|
23
|
+
end
|
24
|
+
|
25
|
+
def page
|
26
|
+
@page_mock
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
class PageMock
|
31
|
+
def body
|
32
|
+
return "BODY"
|
33
|
+
end
|
34
|
+
end
|
metadata
ADDED
@@ -0,0 +1,121 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: googleplay_dev_scraper
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 1.0.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Takuya Murakami
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2013-07-18 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: mechanize
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - '>='
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: 2.5.0
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - '>='
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: 2.5.0
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: rspec
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - '>='
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - '>='
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: rake
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - '>='
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '0'
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - '>='
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: rdoc
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - '>='
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
62
|
+
type: :development
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - '>='
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '0'
|
69
|
+
description: Scraping and download CSV data from Google Play developer console and
|
70
|
+
Google Wallet.
|
71
|
+
email:
|
72
|
+
- tmurakam@tmurakam.org
|
73
|
+
executables:
|
74
|
+
- googleplay_dev_scraper
|
75
|
+
extensions: []
|
76
|
+
extra_rdoc_files: []
|
77
|
+
files:
|
78
|
+
- .gitignore
|
79
|
+
- ChangeLog
|
80
|
+
- Gemfile
|
81
|
+
- README.ja.md
|
82
|
+
- README.md
|
83
|
+
- Rakefile
|
84
|
+
- bin/googleplay_dev_scraper
|
85
|
+
- dot.googleplay_dev_scraper
|
86
|
+
- googleplay_dev_scraper.gemspec
|
87
|
+
- lib/googleplay_dev_scraper.rb
|
88
|
+
- lib/googleplay_dev_scraper/scraper.rb
|
89
|
+
- lib/googleplay_dev_scraper/scraper_base.rb
|
90
|
+
- lib/googleplay_dev_scraper/scraper_config.rb
|
91
|
+
- lib/googleplay_dev_scraper/version.rb
|
92
|
+
- spec/scraper_config_spec.rb
|
93
|
+
- spec/scraper_spec.rb
|
94
|
+
- spec/spec_helper.rb
|
95
|
+
homepage: https://github.com/tmurakam/googleplay_dev_scraper
|
96
|
+
licenses: []
|
97
|
+
metadata: {}
|
98
|
+
post_install_message:
|
99
|
+
rdoc_options: []
|
100
|
+
require_paths:
|
101
|
+
- lib
|
102
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
103
|
+
requirements:
|
104
|
+
- - '>='
|
105
|
+
- !ruby/object:Gem::Version
|
106
|
+
version: '0'
|
107
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
108
|
+
requirements:
|
109
|
+
- - '>='
|
110
|
+
- !ruby/object:Gem::Version
|
111
|
+
version: '0'
|
112
|
+
requirements: []
|
113
|
+
rubyforge_project:
|
114
|
+
rubygems_version: 2.0.5
|
115
|
+
signing_key:
|
116
|
+
specification_version: 4
|
117
|
+
summary: Scraper for Google Play developer console and Google wallet
|
118
|
+
test_files:
|
119
|
+
- spec/scraper_config_spec.rb
|
120
|
+
- spec/scraper_spec.rb
|
121
|
+
- spec/spec_helper.rb
|