googleplay_dev_scraper 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +20 -0
- data/ChangeLog +40 -0
- data/Gemfile +3 -0
- data/README.ja.md +172 -0
- data/README.md +135 -0
- data/Rakefile +7 -0
- data/bin/googleplay_dev_scraper +122 -0
- data/dot.googleplay_dev_scraper +22 -0
- data/googleplay_dev_scraper.gemspec +25 -0
- data/lib/googleplay_dev_scraper.rb +4 -0
- data/lib/googleplay_dev_scraper/scraper.rb +172 -0
- data/lib/googleplay_dev_scraper/scraper_base.rb +66 -0
- data/lib/googleplay_dev_scraper/scraper_config.rb +71 -0
- data/lib/googleplay_dev_scraper/version.rb +3 -0
- data/spec/scraper_config_spec.rb +66 -0
- data/spec/scraper_spec.rb +39 -0
- data/spec/spec_helper.rb +34 -0
- metadata +121 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: b6aefe94f964073e9d527321688dc59f193ee6de
|
4
|
+
data.tar.gz: 816f696b633e74030b30d283db45e972cc064cf0
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 5450bde67ec8fe15a7574e7432f6153963da771f3ae6fdea3958709a488456313a35d8c42b4a8fee1c5ca92e54c98f9dda9c1e9097012926d33c858a35051ede
|
7
|
+
data.tar.gz: 6a82afc021d2a482db58286b4204d0234671512cf8a88d5e743eb425c2331dcd65cac04f9f0b830652453d88446cda2fe85cf70ec46e418cdd9287e834297fb7
|
data/.gitignore
ADDED
data/ChangeLog
ADDED
@@ -0,0 +1,40 @@
|
|
1
|
+
2013/7/18 : Takuya Murakami
|
2
|
+
|
3
|
+
* ver 1.0.0
|
4
|
+
* gem name changed to 'googleplay_dev_scraper'
|
5
|
+
* use DateTime class for some api
|
6
|
+
* remove google checkout specific apis (payouts, order_detail, auto_deliver)
|
7
|
+
|
8
|
+
2013/6/21 : Takuya Murakami
|
9
|
+
|
10
|
+
* add 'wallet_orders' command
|
11
|
+
|
12
|
+
2013/3/30 : Takuya Murakami
|
13
|
+
|
14
|
+
* ver 0.2.2
|
15
|
+
* Fixed URL for sales report for v2 API.
|
16
|
+
* Refactoring
|
17
|
+
|
18
|
+
2013/3/30 : Philipp.Sandhaus@cewecolor.de
|
19
|
+
|
20
|
+
* Fixed URL for app statistic download
|
21
|
+
|
22
|
+
2012/12/13 : Takuya Murakami
|
23
|
+
|
24
|
+
* ver 0.2.1
|
25
|
+
* Use YAML for configuration file format.
|
26
|
+
* Refactoring
|
27
|
+
|
28
|
+
2012/12/13 : Takuya Murakami
|
29
|
+
|
30
|
+
* ver 0.1.3
|
31
|
+
* Change project name : 'Google Play Scraper'
|
32
|
+
* Support gem
|
33
|
+
|
34
|
+
2012/12/13 : Takuya Murakami
|
35
|
+
|
36
|
+
* Use Bundler to install mechanize
|
37
|
+
|
38
|
+
2012/07/10 : Takuya Murakami
|
39
|
+
|
40
|
+
* Add support to download application statistics
|
data/Gemfile
ADDED
data/README.ja.md
ADDED
@@ -0,0 +1,172 @@
|
|
1
|
+
Android 開発者向け Google Play / Google Wallet Scraper
|
2
|
+
======================================================
|
3
|
+
|
4
|
+
はじめに
|
5
|
+
========
|
6
|
+
|
7
|
+
このツールは、Google Play デベロッパーコンソール、
|
8
|
+
および Google Wallet で提供される販売者向けの売上
|
9
|
+
レポートなどの CSV ファイルを自動でダウンロードするための
|
10
|
+
ツールです。
|
11
|
+
|
12
|
+
Google Play デベロッパーコンソールからは以下のものをダウンロードできます。
|
13
|
+
|
14
|
+
* 販売レポート
|
15
|
+
* 予想販売レポート
|
16
|
+
* アプリ統計情報
|
17
|
+
|
18
|
+
Google Wallet Merchant Center からは以下のものをダウンロードできます。
|
19
|
+
|
20
|
+
* オーダー一覧
|
21
|
+
|
22
|
+
売上の集計をするなり、経理システムにぶち込むなり、お好きにどうぞ。
|
23
|
+
|
24
|
+
|
25
|
+
必要システム
|
26
|
+
============
|
27
|
+
|
28
|
+
以下のものが必要です。
|
29
|
+
|
30
|
+
* Ruby 1.9.3以上
|
31
|
+
* RubyGems
|
32
|
+
|
33
|
+
以下のようにしてインストールします。
|
34
|
+
|
35
|
+
$ gem install googleplay_dev_scraper
|
36
|
+
|
37
|
+
|
38
|
+
設定
|
39
|
+
====
|
40
|
+
|
41
|
+
設定ファイルを ~/.googleplay_dev_scraper に YAML フォーマットで作成してください。
|
42
|
+
以下にサンプルを示します。
|
43
|
+
|
44
|
+
Google Play メールアドレスとパスワード、デベロッパIDを設定してください。
|
45
|
+
(素のパスワードを設定するのでアクセス権には注意)
|
46
|
+
|
47
|
+
デベロッパID は、developer console にログインした後の URL 末尾の
|
48
|
+
dev_acc=... の数字です。
|
49
|
+
|
50
|
+
```
|
51
|
+
# GooglePlay dev scraper config file sample (YAML format)
|
52
|
+
#
|
53
|
+
# Place this content to your ~/.googleplay_dev_scraper or
|
54
|
+
# ./.googleplay_dev_scraper.
|
55
|
+
#
|
56
|
+
# WARNING: This file contains password, be careful
|
57
|
+
# of file permission.
|
58
|
+
|
59
|
+
# Your E-mail address to login google play
|
60
|
+
email: foo@example.com
|
61
|
+
|
62
|
+
# Your password to login google play
|
63
|
+
password: "Your Password"
|
64
|
+
|
65
|
+
# Developer account ID
|
66
|
+
# You can find your developer account ID in the URL
|
67
|
+
# after 'dev_acc=...' when login the developer console.
|
68
|
+
dev_acc: "12345678901234567890"
|
69
|
+
|
70
|
+
# Proxy host and port number (if needed)
|
71
|
+
#proxy_host: proxy.example.com
|
72
|
+
#proxy_port: 8080
|
73
|
+
```
|
74
|
+
|
75
|
+
なお、設定値はコマンドラインで与えることもできます。詳細は
|
76
|
+
--help オプションで確認してください。
|
77
|
+
|
78
|
+
|
79
|
+
使い方
|
80
|
+
======
|
81
|
+
|
82
|
+
売上レポート取得
|
83
|
+
----------------
|
84
|
+
|
85
|
+
2011年10月の売上を取得する場合は以下のようにします。
|
86
|
+
結果は標準出力に出力されます。
|
87
|
+
|
88
|
+
$ googleplay_dev_scraper sales 2011 10
|
89
|
+
|
90
|
+
また推定売上レポートもダウンロードできます。
|
91
|
+
|
92
|
+
$ googleplay_dev_scraper estimated 2011 10
|
93
|
+
|
94
|
+
|
95
|
+
オーダー一覧取得
|
96
|
+
----------------
|
97
|
+
|
98
|
+
オーダーの一覧を取得します。
|
99
|
+
開始日と終了日を指定します。時刻は日本時間で指定。
|
100
|
+
|
101
|
+
$ googleplay_dev_scraper orders "2011-08-01 00:00:00" "2011-09-30 23:59:59"
|
102
|
+
|
103
|
+
|
104
|
+
アプリケーション統計情報取得
|
105
|
+
----------------------------
|
106
|
+
|
107
|
+
Developer Console の統計情報 CSV エクスポートと同じものを得ます。
|
108
|
+
対象となるアプリのパッケージ名と、開始日/終了日を指定してください。
|
109
|
+
|
110
|
+
$ googleplay_dev_scraper appstats your.package.name 20120101 20120630 > stat.zip
|
111
|
+
|
112
|
+
ZIP ファイルが標準出力に出力されるので、リダイレクトでファイルに
|
113
|
+
落としてください。
|
114
|
+
|
115
|
+
|
116
|
+
API の利用
|
117
|
+
==========
|
118
|
+
|
119
|
+
例:
|
120
|
+
|
121
|
+
```
|
122
|
+
require 'googleplay_dev_scraper'
|
123
|
+
|
124
|
+
scraper = GooglePlayDevScraper::Scraper.new
|
125
|
+
|
126
|
+
# set config (Note: config file is not read via API access)
|
127
|
+
scraper.config.email = "foo@example.com"
|
128
|
+
scraper.config.password = "YOUR_PASSWORD"
|
129
|
+
scraper.config.dev_acc = "1234567890"
|
130
|
+
|
131
|
+
# get sales report / estimated sales report
|
132
|
+
puts scraper.get_sales_report(2012, 11)
|
133
|
+
puts scraper.get_estimated_sales_report(2012, 12)
|
134
|
+
|
135
|
+
# get orders
|
136
|
+
puts scraper.get_order_list(DateTime.parse("2012-11-01"), DateTime.parse("2012-11-30"))
|
137
|
+
```
|
138
|
+
|
139
|
+
内部動作とか
|
140
|
+
============
|
141
|
+
|
142
|
+
Mechanize を使って Web サイトに自動アクセスし、フォームを叩いて
|
143
|
+
CSV を入手するだけです。
|
144
|
+
|
145
|
+
本体は scraper.rb です。ソース見れば何やってるかはわかると思います。
|
146
|
+
Rails アプリの中で使うとか、お好きにどうぞ。
|
147
|
+
|
148
|
+
|
149
|
+
ライセンス
|
150
|
+
==========
|
151
|
+
|
152
|
+
Public domain 扱いとします。
|
153
|
+
|
154
|
+
|
155
|
+
免責事項
|
156
|
+
========
|
157
|
+
|
158
|
+
* 無保証です。
|
159
|
+
* Google 側のサイトの作りが変わったら当然動作しなくなります。
|
160
|
+
* Google から怒られても責任は取りません。
|
161
|
+
* 動かなくても文句言わない。自分で直すように。
|
162
|
+
* 直したら修正を送るなり pull request するなりしてくれると嬉しい。
|
163
|
+
|
164
|
+
|
165
|
+
ひとりごと
|
166
|
+
==========
|
167
|
+
|
168
|
+
* Google さん、Android 向けの Google Wallet API (オーダー一覧とか)解放してくれるとすごく嬉しいのですが、、、
|
169
|
+
|
170
|
+
---
|
171
|
+
'13/7/18
|
172
|
+
Takuya Murakami, E-mail: tmurakam at tmurakam.org
|
data/README.md
ADDED
@@ -0,0 +1,135 @@
|
|
1
|
+
Google Play / Google Wallet Scraper for Android Developers
|
2
|
+
==========================================================
|
3
|
+
|
4
|
+
Introduction
|
5
|
+
============
|
6
|
+
|
7
|
+
This tool is designed to download CSV report files from
|
8
|
+
Google Play developer console and google wallet.
|
9
|
+
|
10
|
+
It can download following CSV files from Google Play
|
11
|
+
developer console
|
12
|
+
|
13
|
+
* Sales report (monthly report)
|
14
|
+
* Estimateds sales report
|
15
|
+
* Application statistics
|
16
|
+
|
17
|
+
It can download following CSV files from Google Wallet.
|
18
|
+
|
19
|
+
* Order list (almost realtime)
|
20
|
+
|
21
|
+
You don't need any merchant key, because this tool scrapes
|
22
|
+
google play / wallet website.
|
23
|
+
|
24
|
+
Requirements/Installation
|
25
|
+
=========================
|
26
|
+
|
27
|
+
* Ruby >=1.9.3
|
28
|
+
* RubyGems
|
29
|
+
|
30
|
+
To install:
|
31
|
+
|
32
|
+
$ gem install googleplay_dev_scraper
|
33
|
+
|
34
|
+
Configuration
|
35
|
+
=============
|
36
|
+
|
37
|
+
Create configuration file at ~/.googleplay_dev_scraper,
|
38
|
+
or ./.googleplay_dev_scraper in YAML format.
|
39
|
+
|
40
|
+
```
|
41
|
+
# GooglePlay scraper config file sample (YAML format)
|
42
|
+
#
|
43
|
+
# Place this content to your ~/.googleplay_dev_scraper or
|
44
|
+
# ./.googleplay_dev_scraper.
|
45
|
+
#
|
46
|
+
# WARNING: This file contains password, be careful
|
47
|
+
# of file permission.
|
48
|
+
|
49
|
+
# Your E-mail address to login google play
|
50
|
+
email: foo@example.com
|
51
|
+
|
52
|
+
# Your password to login google play
|
53
|
+
password: "Your Password"
|
54
|
+
|
55
|
+
# Developer account ID
|
56
|
+
# You can find your developer account ID in the URL
|
57
|
+
# after 'dev_acc=...' when login the developer console.
|
58
|
+
dev_acc: "12345678901234567890"
|
59
|
+
|
60
|
+
# Proxy host and port number (if needed)
|
61
|
+
#proxy_host: proxy.example.com
|
62
|
+
#proxy_port: 8080
|
63
|
+
```
|
64
|
+
|
65
|
+
You can specify configuration parameters with command line
|
66
|
+
options. See details with --help option.
|
67
|
+
|
68
|
+
How to use
|
69
|
+
==========
|
70
|
+
|
71
|
+
Get sales report
|
72
|
+
----------------
|
73
|
+
|
74
|
+
To download sales report for October 2011:
|
75
|
+
|
76
|
+
$ googleplay_dev_scraper sales 2011 10
|
77
|
+
|
78
|
+
Or you can download estimated report too:
|
79
|
+
|
80
|
+
$ googleplay_dev_scraper estimated 2011 10
|
81
|
+
|
82
|
+
Get order report
|
83
|
+
----------------
|
84
|
+
|
85
|
+
To download order report, specify start and end time as:
|
86
|
+
|
87
|
+
$ googleplay_dev_scraper orders "2011-08-01 00:00:00" "2011-09-30 23:59:59"
|
88
|
+
|
89
|
+
Get application statistics
|
90
|
+
--------------------------
|
91
|
+
|
92
|
+
Export application statistics in CSV format.
|
93
|
+
Specify application package name and start/end date.
|
94
|
+
|
95
|
+
$ googleplay_dev_scraper appstats your.package.name 20120101 20120630 > stat.zip
|
96
|
+
|
97
|
+
Note: You must redirect output to zip file!
|
98
|
+
|
99
|
+
API usage
|
100
|
+
=========
|
101
|
+
|
102
|
+
Example:
|
103
|
+
|
104
|
+
```
|
105
|
+
require 'googleplay_dev_scraper'
|
106
|
+
|
107
|
+
scraper = GooglePlayDevScraper::Scraper.new
|
108
|
+
|
109
|
+
# set config (Note: config file is not read via API access)
|
110
|
+
scraper.config.email = "foo@example.com"
|
111
|
+
scraper.config.password = "YOUR_PASSWORD"
|
112
|
+
scraper.config.dev_acc = "1234567890"
|
113
|
+
|
114
|
+
# get sales report / estimated sales report
|
115
|
+
puts scraper.get_sales_report(2012, 11)
|
116
|
+
puts scraper.get_estimated_sales_report(2012, 12)
|
117
|
+
|
118
|
+
# get orders
|
119
|
+
puts scraper.get_order_list(DateTime.parse("2012-11-01 00:00:00", DateTime.parse("2012-11-30T23:59:59"))
|
120
|
+
```
|
121
|
+
|
122
|
+
License
|
123
|
+
=======
|
124
|
+
|
125
|
+
Public domain
|
126
|
+
|
127
|
+
|
128
|
+
Disclaimer
|
129
|
+
==========
|
130
|
+
|
131
|
+
* NO WARRANTY
|
132
|
+
|
133
|
+
---
|
134
|
+
'13/7/18
|
135
|
+
Takuya Murakami, E-mail: tmurakam at tmurakam.org
|
data/Rakefile
ADDED
@@ -0,0 +1,122 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# -*- coding: utf-8 -*-
|
3
|
+
|
4
|
+
# design time...
|
5
|
+
lib = File.expand_path('../lib', File.dirname(__FILE__))
|
6
|
+
$LOAD_PATH.unshift(lib)
|
7
|
+
|
8
|
+
require 'optparse'
|
9
|
+
require 'googleplay_dev_scraper'
|
10
|
+
|
11
|
+
Version = GooglePlayDevScraper::VERSION
|
12
|
+
|
13
|
+
module GooglePlayDevScraper
|
14
|
+
class Tool
|
15
|
+
def initialize
|
16
|
+
@scraper = Scraper.new
|
17
|
+
|
18
|
+
@show_details = false
|
19
|
+
@auto_archive = false
|
20
|
+
end
|
21
|
+
|
22
|
+
def main
|
23
|
+
@scraper.config.load_config
|
24
|
+
parse_arguments
|
25
|
+
|
26
|
+
usage if ARGV.size < 1
|
27
|
+
|
28
|
+
command = ARGV.shift
|
29
|
+
case command
|
30
|
+
when "sales"
|
31
|
+
sales
|
32
|
+
when "estimated"
|
33
|
+
estimated
|
34
|
+
when "orders"
|
35
|
+
orders
|
36
|
+
when "appstats"
|
37
|
+
appstats
|
38
|
+
when "wallet_orders"
|
39
|
+
wallet_orders
|
40
|
+
else
|
41
|
+
usage
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
def usage
|
46
|
+
STDERR.puts "Usage: #{$0} [options] command [arguments...]"
|
47
|
+
STDERR.puts " #{$0} --help"
|
48
|
+
STDERR.puts " Commands:"
|
49
|
+
STDERR.puts " sales <year> <month> Get monthly sales report"
|
50
|
+
STDERR.puts " estimated <year> <month> Get estimated sales report"
|
51
|
+
STDERR.puts " orders <start_date> <end_date> Get order list"
|
52
|
+
STDERR.puts " appstats <package_name> <startDay> <endDay> Get app stats (in zip file)"
|
53
|
+
exit 1
|
54
|
+
end
|
55
|
+
|
56
|
+
def parse_arguments
|
57
|
+
config = @scraper.config
|
58
|
+
|
59
|
+
opts = OptionParser.new do |opt|
|
60
|
+
opt.on('-u email', 'set email address') {|v| config.email = v}
|
61
|
+
opt.on('-p password', 'set password') {|v| config.password = v}
|
62
|
+
opt.on('-a dev_acc', 'set dev_acc') {|v| config.dev_acc = v}
|
63
|
+
|
64
|
+
opt.on('-P host:port', 'Set HTTP proxy/port') {|v|
|
65
|
+
a = v.split(/:/)
|
66
|
+
config.proxy_host = a[0]
|
67
|
+
config.proxy_port = a[1]
|
68
|
+
}
|
69
|
+
opt.on('-v', '--version', 'Show version') {
|
70
|
+
puts Version
|
71
|
+
exit 0
|
72
|
+
}
|
73
|
+
opt.parse!(ARGV)
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
def sales
|
78
|
+
usage if (ARGV.size < 2)
|
79
|
+
|
80
|
+
year = ARGV[0]
|
81
|
+
month = ARGV[1]
|
82
|
+
|
83
|
+
puts @scraper.get_sales_report(year, month)
|
84
|
+
end
|
85
|
+
|
86
|
+
def estimated
|
87
|
+
usage if (ARGV.size < 2)
|
88
|
+
|
89
|
+
year = ARGV[0]
|
90
|
+
month = ARGV[1]
|
91
|
+
|
92
|
+
puts @scraper.get_estimated_sales_report(year, month)
|
93
|
+
end
|
94
|
+
|
95
|
+
# get daily orders
|
96
|
+
def orders
|
97
|
+
usage if (ARGV.size < 2)
|
98
|
+
|
99
|
+
startdate = DateTime.parse(ARGV[0])
|
100
|
+
enddate = DateTime.parse(ARGV[1])
|
101
|
+
|
102
|
+
puts @scraper.get_order_list(startdate, enddate)
|
103
|
+
end
|
104
|
+
|
105
|
+
def wallet_orders
|
106
|
+
puts @scraper.get_wallet_orders
|
107
|
+
end
|
108
|
+
|
109
|
+
def appstats
|
110
|
+
usage if ARGV.size < 3
|
111
|
+
package = ARGV[0]
|
112
|
+
startDay = ARGV[1]
|
113
|
+
endDay = ARGV[2]
|
114
|
+
|
115
|
+
puts @scraper.get_appstats(package, startDay, endDay)
|
116
|
+
end
|
117
|
+
end
|
118
|
+
end
|
119
|
+
|
120
|
+
tool = GooglePlayDevScraper::Tool.new
|
121
|
+
tool.main
|
122
|
+
|
@@ -0,0 +1,22 @@
|
|
1
|
+
# GooglePlay scraper config file sample (YAML format)
|
2
|
+
#
|
3
|
+
# Place this content to your ~/.googleplay_dev_scraper or
|
4
|
+
# ./.googleplay_dev_scraper.
|
5
|
+
#
|
6
|
+
# WARNING: This file contains password, be careful
|
7
|
+
# of file permission.
|
8
|
+
|
9
|
+
# Your E-mail address to login google play
|
10
|
+
email: foo@example.com
|
11
|
+
|
12
|
+
# Your password to login google play
|
13
|
+
password: "Your Password"
|
14
|
+
|
15
|
+
# Developer account ID
|
16
|
+
# You can find your developer account ID in the URL
|
17
|
+
# after 'dev_acc=...' when login the developer console.
|
18
|
+
dev_acc: "12345678901234567890"
|
19
|
+
|
20
|
+
# Proxy host and port number (if needed)
|
21
|
+
#proxy_host: proxy.example.com
|
22
|
+
#proxy_port: 8080
|
@@ -0,0 +1,25 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'googleplay_dev_scraper/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |gem|
|
7
|
+
gem.name = "googleplay_dev_scraper"
|
8
|
+
gem.version = GooglePlayDevScraper::VERSION
|
9
|
+
gem.authors = ["Takuya Murakami"]
|
10
|
+
gem.email = ["tmurakam@tmurakam.org"]
|
11
|
+
gem.description = %q{Scraping and download CSV data from Google Play developer console and Google Wallet.}
|
12
|
+
gem.summary = %q{Scraper for Google Play developer console and Google wallet}
|
13
|
+
gem.homepage = "https://github.com/tmurakam/googleplay_dev_scraper"
|
14
|
+
|
15
|
+
gem.files = `git ls-files`.split($/)
|
16
|
+
gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
|
17
|
+
gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
|
18
|
+
gem.require_paths = ["lib"]
|
19
|
+
|
20
|
+
gem.add_dependency('mechanize', '>= 2.5.0')
|
21
|
+
|
22
|
+
gem.add_development_dependency 'rspec'
|
23
|
+
gem.add_development_dependency 'rake'
|
24
|
+
gem.add_development_dependency 'rdoc'
|
25
|
+
end
|
@@ -0,0 +1,172 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# -*- coding: utf-8 -*-
|
3
|
+
|
4
|
+
# = GooglePlay Scraper
|
5
|
+
# Author:: Takuya Murakami
|
6
|
+
# License:: Public domain
|
7
|
+
|
8
|
+
require 'mechanize'
|
9
|
+
require 'csv'
|
10
|
+
require 'yaml'
|
11
|
+
require 'date'
|
12
|
+
|
13
|
+
module GooglePlayDevScraper
|
14
|
+
#
|
15
|
+
# Google Play and google checkout scraper
|
16
|
+
#
|
17
|
+
class Scraper < ScraperBase
|
18
|
+
|
19
|
+
def initialize
|
20
|
+
super
|
21
|
+
end
|
22
|
+
|
23
|
+
def body_string
|
24
|
+
@agent.page.body.force_encoding("UTF-8")
|
25
|
+
end
|
26
|
+
|
27
|
+
# Get sales report (report_type = payout_report)
|
28
|
+
# [year]
|
29
|
+
# Year (ex. 2012)
|
30
|
+
# [month]
|
31
|
+
# Month (1 - 12)
|
32
|
+
# [Return]
|
33
|
+
# CSV string
|
34
|
+
#
|
35
|
+
def get_sales_report(year, month)
|
36
|
+
#url = sprintf('https://play.google.com/apps/publish/salesreport/download?report_date=%04d_%02d&report_type=payout_report&dev_acc=%s', year, month, @config.dev_acc)
|
37
|
+
url = sprintf('https://play.google.com/apps/publish/v2/salesreport/download?report_date=%04d_%02d&report_type=payout_report&dev_acc=%s', year, month, @config.dev_acc)
|
38
|
+
try_get(url)
|
39
|
+
|
40
|
+
body_string
|
41
|
+
end
|
42
|
+
|
43
|
+
# Get estimated sales report (report_type = sales_report)
|
44
|
+
#
|
45
|
+
# [year]
|
46
|
+
# Year (ex. 2012)
|
47
|
+
# [month]
|
48
|
+
# Month (1 - 12)
|
49
|
+
# [Return]
|
50
|
+
# CSV string
|
51
|
+
#
|
52
|
+
def get_estimated_sales_report(year, month)
|
53
|
+
#https://play.google.com/apps/publish/v2/salesreport/download?report_date=2013_03&report_type=sales_report&dev_acc=09924472108471074593
|
54
|
+
url = sprintf('https://play.google.com/apps/publish/v2/salesreport/download?report_date=%04d_%02d&report_type=sales_report&dev_acc=%s', year, month, @config.dev_acc)
|
55
|
+
try_get(url)
|
56
|
+
|
57
|
+
body_string
|
58
|
+
end
|
59
|
+
|
60
|
+
# Get order list
|
61
|
+
#
|
62
|
+
# [start_date]
|
63
|
+
# start time (DateTime)
|
64
|
+
# [end_date]
|
65
|
+
# end time (DateTime)
|
66
|
+
# [Return]
|
67
|
+
# CSV string
|
68
|
+
def get_order_list(start_time, end_time)
|
69
|
+
# unix time in ms
|
70
|
+
start_ut = start_time.to_time.to_i * 1000
|
71
|
+
end_ut = end_time.to_time.to_i * 1000
|
72
|
+
|
73
|
+
try_get("https://wallet.google.com/merchant/pages/")
|
74
|
+
if @agent.page.uri.path =~ /(bcid-[^\/]+)\/(oid-[^\/]+)\/(cid-[^\/]+)\//
|
75
|
+
bcid = $1
|
76
|
+
oid = $2
|
77
|
+
cid = $3
|
78
|
+
|
79
|
+
# You can check the URL with your browser.
|
80
|
+
# (download csv file, and check download history with chrome/firefox)
|
81
|
+
try_get("https://wallet.google.com/merchant/pages/" +
|
82
|
+
bcid + "/" + oid + "/" + cid +
|
83
|
+
"/purchaseorderdownload?startTime=#{start_ut}" +
|
84
|
+
"&endTime=#{end_ut}")
|
85
|
+
body_string
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
89
|
+
# Get application statistics CSV in zip
|
90
|
+
#
|
91
|
+
# [package]
|
92
|
+
# package name
|
93
|
+
# [start_day]
|
94
|
+
# start date (yyyyMMdd)
|
95
|
+
# [end_day]
|
96
|
+
# end date (yyyyMMdd)
|
97
|
+
# [Return]
|
98
|
+
# application statics zip data
|
99
|
+
#
|
100
|
+
def get_appstats(package, start_day, end_day)
|
101
|
+
dim = "overall,country,language,os_version,device,app_version,carrier&met=active_device_installs,daily_device_installs,daily_device_uninstalls,daily_device_upgrades,active_user_installs,total_user_installs,daily_user_installs,daily_user_uninstalls,daily_avg_rating,total_avg_rating"
|
102
|
+
url = "https://play.google.com/apps/publish/v2/statistics/download"
|
103
|
+
url += "?package=#{package}"
|
104
|
+
url += "&sd=#{start_day}&ed=#{end_day}"
|
105
|
+
url += "&dim=#{dim}"
|
106
|
+
#url += "&dev_acc=#{@config.dev_acc}"
|
107
|
+
|
108
|
+
STDERR.puts "URL = #{url}"
|
109
|
+
try_get(url)
|
110
|
+
@agent.page.body
|
111
|
+
end
|
112
|
+
|
113
|
+
# dump CSV (util)
|
114
|
+
def dump_csv(csv_string)
|
115
|
+
headers = nil
|
116
|
+
CSV.parse(csv_string) do |row|
|
117
|
+
unless headers
|
118
|
+
headers = row
|
119
|
+
next
|
120
|
+
end
|
121
|
+
|
122
|
+
i = 0
|
123
|
+
row.each do |column|
|
124
|
+
puts "#{headers[i]} : #{column}"
|
125
|
+
i = i + 1
|
126
|
+
end
|
127
|
+
puts
|
128
|
+
end
|
129
|
+
end
|
130
|
+
|
131
|
+
#
|
132
|
+
# Get order list from wallet html page
|
133
|
+
#
|
134
|
+
def get_wallet_orders
|
135
|
+
try_get("https://wallet.google.com/merchant/pages/")
|
136
|
+
html = body_string
|
137
|
+
|
138
|
+
doc = Nokogiri::HTML(html)
|
139
|
+
|
140
|
+
#doc.xpath("//table[@id='purchaseOrderListTable']")
|
141
|
+
|
142
|
+
result = ""
|
143
|
+
|
144
|
+
doc.xpath("//tr[@class='orderRow']").each do |e|
|
145
|
+
order_id = e['id']
|
146
|
+
|
147
|
+
date = nil
|
148
|
+
desc = nil
|
149
|
+
total = nil
|
150
|
+
status = nil
|
151
|
+
|
152
|
+
e.children.each do |e2|
|
153
|
+
case e2['class']
|
154
|
+
when /wallet-date-column/
|
155
|
+
date = e2.content
|
156
|
+
when /wallet-description-column/
|
157
|
+
desc = e2.content
|
158
|
+
when /wallet-total-column/
|
159
|
+
total = e2.content
|
160
|
+
when /wallet-status-column/
|
161
|
+
e3 = e2.children.first
|
162
|
+
status = e3['title'] unless e3.nil?
|
163
|
+
end
|
164
|
+
end
|
165
|
+
|
166
|
+
result += [order_id, date, desc, status, total].join(",") + "\n"
|
167
|
+
end
|
168
|
+
|
169
|
+
result
|
170
|
+
end
|
171
|
+
end
|
172
|
+
end
|
@@ -0,0 +1,66 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# -*- coding: utf-8 -*-
|
3
|
+
|
4
|
+
# = GooglePlay Scraper
|
5
|
+
# Author:: Takuya Murakami
|
6
|
+
# License:: Public domain
|
7
|
+
|
8
|
+
require 'mechanize'
|
9
|
+
require 'csv'
|
10
|
+
require 'yaml'
|
11
|
+
|
12
|
+
module GooglePlayDevScraper
|
13
|
+
class ScraperBase
|
14
|
+
attr_accessor :agent
|
15
|
+
attr_accessor :config
|
16
|
+
|
17
|
+
def initialize
|
18
|
+
@agent = nil
|
19
|
+
@config = ScraperConfig.new
|
20
|
+
end
|
21
|
+
|
22
|
+
def setup
|
23
|
+
#Mechanize.log = Logger.new("mechanize.log")
|
24
|
+
#Mechanize.log.level = Logger::INFO
|
25
|
+
|
26
|
+
unless @agent
|
27
|
+
@agent = Mechanize.new
|
28
|
+
end
|
29
|
+
if @config.proxy_host && @config.proxy_host.length >= 1
|
30
|
+
@agent.set_proxy(@config.proxy_host, @config.proxy_port)
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
def try_get(url)
|
35
|
+
unless @agent
|
36
|
+
setup
|
37
|
+
end
|
38
|
+
|
39
|
+
# try to get
|
40
|
+
@agent.get(url)
|
41
|
+
|
42
|
+
# login needed?
|
43
|
+
if @agent.page.uri.host != "accounts.google.com" || @agent.page.uri.path != "/ServiceLogin"
|
44
|
+
# already login-ed
|
45
|
+
return
|
46
|
+
end
|
47
|
+
|
48
|
+
# do login
|
49
|
+
form = @agent.page.forms.find {|f| f.form_node['id'] == "gaia_loginform"}
|
50
|
+
unless form
|
51
|
+
raise 'No login form'
|
52
|
+
end
|
53
|
+
form.field_with(:name => "Email").value = @config.email
|
54
|
+
form.field_with(:name => "Passwd").value = @config.password
|
55
|
+
form.click_button
|
56
|
+
|
57
|
+
if @agent.page.uri.host == "accounts.google.com"
|
58
|
+
STDERR.puts "login failed? : uri = " + @agent.page.uri.to_s
|
59
|
+
raise 'Google login failed'
|
60
|
+
end
|
61
|
+
|
62
|
+
# retry get
|
63
|
+
@agent.get(url)
|
64
|
+
end
|
65
|
+
end
|
66
|
+
end
|
@@ -0,0 +1,71 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# -*- coding: utf-8 -*-
|
3
|
+
|
4
|
+
# = GooglePlay dev Scraper
|
5
|
+
# Author:: Takuya Murakami
|
6
|
+
# License:: Public domain
|
7
|
+
|
8
|
+
require 'mechanize'
|
9
|
+
require 'csv'
|
10
|
+
require 'yaml'
|
11
|
+
|
12
|
+
module GooglePlayDevScraper
|
13
|
+
#
|
14
|
+
# Configurations
|
15
|
+
#
|
16
|
+
class ScraperConfig
|
17
|
+
# Google account
|
18
|
+
attr_accessor :email
|
19
|
+
|
20
|
+
# Password to login google account
|
21
|
+
attr_accessor :password
|
22
|
+
|
23
|
+
# developer account ID
|
24
|
+
attr_accessor :dev_acc
|
25
|
+
|
26
|
+
# HTTP proxy host
|
27
|
+
attr_accessor :proxy_host
|
28
|
+
|
29
|
+
# HTTP proxy port
|
30
|
+
attr_accessor :proxy_port
|
31
|
+
|
32
|
+
def initialize
|
33
|
+
@dev_acc = nil
|
34
|
+
end
|
35
|
+
|
36
|
+
def load_config(path = nil)
|
37
|
+
config_files = [ path, ".googleplay_dev_scraper", "#{ENV['HOME']}/.googleplay_dev_scraper" ]
|
38
|
+
|
39
|
+
config_files.each do |file|
|
40
|
+
load_config_file(file)
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
def load_config_file(file)
|
45
|
+
if file && File.exists?(file)
|
46
|
+
open(file) do |f|
|
47
|
+
begin
|
48
|
+
read_config(f.read)
|
49
|
+
rescue Psych::SyntaxError => e
|
50
|
+
STDERR.puts "Error: configuration file syntax: #{file}"
|
51
|
+
exit 1
|
52
|
+
rescue
|
53
|
+
STDERR.puts "Error: load configuration file: #{file}"
|
54
|
+
exit 1
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
def read_config(data)
|
61
|
+
h = YAML.load(data)
|
62
|
+
if h
|
63
|
+
@email ||= h['email']
|
64
|
+
@password ||= h['password']
|
65
|
+
@dev_acc ||= h['dev_acc']
|
66
|
+
@proxy_host ||= h['proxy_host']
|
67
|
+
@proxy_port ||= h['proxy_port']
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
@@ -0,0 +1,66 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
require 'spec_helper'
|
3
|
+
|
4
|
+
describe GooglePlayDevScraper::ScraperConfig do
|
5
|
+
before do
|
6
|
+
@config = GooglePlayDevScraper::ScraperConfig.new
|
7
|
+
end
|
8
|
+
|
9
|
+
context "read_config" do
|
10
|
+
it "read empty config" do
|
11
|
+
yaml = <<EOF
|
12
|
+
# no data
|
13
|
+
EOF
|
14
|
+
@config.read_config(yaml)
|
15
|
+
|
16
|
+
@config.email.should be_nil
|
17
|
+
@config.password.should be_nil
|
18
|
+
@config.dev_acc.should be_nil
|
19
|
+
@config.proxy_host.should be_nil
|
20
|
+
@config.proxy_port.should be_nil
|
21
|
+
end
|
22
|
+
|
23
|
+
it "read normal config" do
|
24
|
+
yaml = <<EOF
|
25
|
+
email: EMAIL
|
26
|
+
password: PASSWORD
|
27
|
+
dev_acc: DEV_ACC
|
28
|
+
proxy_host: PROXY_HOST
|
29
|
+
proxy_port: PROXY_PORT
|
30
|
+
EOF
|
31
|
+
@config.read_config(yaml)
|
32
|
+
|
33
|
+
@config.email.should == "EMAIL"
|
34
|
+
@config.password.should == "PASSWORD"
|
35
|
+
@config.dev_acc.should == "DEV_ACC"
|
36
|
+
@config.proxy_host.should == "PROXY_HOST"
|
37
|
+
@config.proxy_port.should == "PROXY_PORT"
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
context "load_config" do
|
42
|
+
before do
|
43
|
+
# make mock
|
44
|
+
def @config.load_config_file(file)
|
45
|
+
@config_files ||= Array.new
|
46
|
+
@config_files.push(file)
|
47
|
+
end
|
48
|
+
|
49
|
+
def @config.config_files
|
50
|
+
@config_files
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
it "without path" do
|
55
|
+
@config.load_config
|
56
|
+
a = @config.config_files.should ==
|
57
|
+
[ nil, ".googleplay_dev_scraper", ENV['HOME'] + "/.googleplay_dev_scraper"]
|
58
|
+
end
|
59
|
+
|
60
|
+
it "with path" do
|
61
|
+
@config.load_config("/some/path")
|
62
|
+
a = @config.config_files.should ==
|
63
|
+
[ "/some/path", ".googleplay_dev_scraper", ENV['HOME'] + "/.googleplay_dev_scraper"]
|
64
|
+
end
|
65
|
+
end
|
66
|
+
end
|
@@ -0,0 +1,39 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
require 'spec_helper'
|
3
|
+
|
4
|
+
describe GooglePlayDevScraper::Scraper do
|
5
|
+
before do
|
6
|
+
@scraper = ScraperMock.new
|
7
|
+
|
8
|
+
@dev_acc = "1234567890"
|
9
|
+
@scraper.config.dev_acc = @dev_acc
|
10
|
+
end
|
11
|
+
|
12
|
+
context "Setup" do
|
13
|
+
it "setup without proxy" do
|
14
|
+
@scraper.setup
|
15
|
+
|
16
|
+
@scraper.agent.proxy_addr.should be_nil
|
17
|
+
@scraper.agent.proxy_port.should be_nil
|
18
|
+
end
|
19
|
+
|
20
|
+
it "setup with proxy" do
|
21
|
+
@scraper.config.proxy_host = "proxy.example.com"
|
22
|
+
@scraper.config.proxy_port = 12345
|
23
|
+
|
24
|
+
@scraper.setup
|
25
|
+
|
26
|
+
@scraper.agent.proxy_addr.should == "proxy.example.com"
|
27
|
+
@scraper.agent.proxy_port.should == 12345
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
context "get sales report" do
|
32
|
+
it "normal access" do
|
33
|
+
@scraper.get_sales_report(2012, 11)
|
34
|
+
@scraper.accessed_url.should == "https://play.google.com/apps/publish/v2/salesreport/download?report_date=2012_11&report_type=payout_report&dev_acc=#{@dev_acc}"
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
end
|
39
|
+
|
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1,34 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
require 'rubygems'
|
3
|
+
require 'googleplay_dev_scraper'
|
4
|
+
|
5
|
+
class ScraperMock < GooglePlayDevScraper::Scraper
|
6
|
+
attr_reader :accessed_url
|
7
|
+
|
8
|
+
def initialize
|
9
|
+
super
|
10
|
+
@agent = MechanizeMock.new
|
11
|
+
end
|
12
|
+
|
13
|
+
def try_get(url)
|
14
|
+
setup
|
15
|
+
@accessed_url = url
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
class MechanizeMock < Mechanize
|
20
|
+
def initialize
|
21
|
+
super
|
22
|
+
@page_mock = PageMock.new
|
23
|
+
end
|
24
|
+
|
25
|
+
def page
|
26
|
+
@page_mock
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
class PageMock
|
31
|
+
def body
|
32
|
+
return "BODY"
|
33
|
+
end
|
34
|
+
end
|
metadata
ADDED
@@ -0,0 +1,121 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: googleplay_dev_scraper
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 1.0.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Takuya Murakami
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2013-07-18 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: mechanize
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - '>='
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: 2.5.0
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - '>='
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: 2.5.0
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: rspec
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - '>='
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - '>='
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: rake
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - '>='
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '0'
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - '>='
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: rdoc
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - '>='
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
62
|
+
type: :development
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - '>='
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '0'
|
69
|
+
description: Scraping and download CSV data from Google Play developer console and
|
70
|
+
Google Wallet.
|
71
|
+
email:
|
72
|
+
- tmurakam@tmurakam.org
|
73
|
+
executables:
|
74
|
+
- googleplay_dev_scraper
|
75
|
+
extensions: []
|
76
|
+
extra_rdoc_files: []
|
77
|
+
files:
|
78
|
+
- .gitignore
|
79
|
+
- ChangeLog
|
80
|
+
- Gemfile
|
81
|
+
- README.ja.md
|
82
|
+
- README.md
|
83
|
+
- Rakefile
|
84
|
+
- bin/googleplay_dev_scraper
|
85
|
+
- dot.googleplay_dev_scraper
|
86
|
+
- googleplay_dev_scraper.gemspec
|
87
|
+
- lib/googleplay_dev_scraper.rb
|
88
|
+
- lib/googleplay_dev_scraper/scraper.rb
|
89
|
+
- lib/googleplay_dev_scraper/scraper_base.rb
|
90
|
+
- lib/googleplay_dev_scraper/scraper_config.rb
|
91
|
+
- lib/googleplay_dev_scraper/version.rb
|
92
|
+
- spec/scraper_config_spec.rb
|
93
|
+
- spec/scraper_spec.rb
|
94
|
+
- spec/spec_helper.rb
|
95
|
+
homepage: https://github.com/tmurakam/googleplay_dev_scraper
|
96
|
+
licenses: []
|
97
|
+
metadata: {}
|
98
|
+
post_install_message:
|
99
|
+
rdoc_options: []
|
100
|
+
require_paths:
|
101
|
+
- lib
|
102
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
103
|
+
requirements:
|
104
|
+
- - '>='
|
105
|
+
- !ruby/object:Gem::Version
|
106
|
+
version: '0'
|
107
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
108
|
+
requirements:
|
109
|
+
- - '>='
|
110
|
+
- !ruby/object:Gem::Version
|
111
|
+
version: '0'
|
112
|
+
requirements: []
|
113
|
+
rubyforge_project:
|
114
|
+
rubygems_version: 2.0.5
|
115
|
+
signing_key:
|
116
|
+
specification_version: 4
|
117
|
+
summary: Scraper for Google Play developer console and Google wallet
|
118
|
+
test_files:
|
119
|
+
- spec/scraper_config_spec.rb
|
120
|
+
- spec/scraper_spec.rb
|
121
|
+
- spec/spec_helper.rb
|