embulk-input-google_analytics 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: f461f4cc05a23ba6ed9b53e23c826fc69b7c82c4
4
+ data.tar.gz: d73dea5ede7fb7ce0b348409808e2a7036392fa7
5
+ SHA512:
6
+ metadata.gz: 5d014c2caf49c2b73c325433cb55580581184634b1a4b7dbff9d7030278f8b569b2bcf5d934e0638f92b8114ac3a0f6e1dd8df7fff0747ff4d51ab855af534ec
7
+ data.tar.gz: 3f36eed0bb7b8926d7aeecb7902eb0631e6a31f86e91e81f041b2e97a5894abb1a9bc47329b1acd9d96d80ed482a6915dfce4561fdb406ad37b2c6ed582d8d42
data/.gitignore ADDED
@@ -0,0 +1,6 @@
1
+ *~
2
+ /pkg/
3
+ /tmp/
4
+ /.bundle/
5
+ /Gemfile.lock
6
+ /coverage/
data/.ruby-version ADDED
@@ -0,0 +1 @@
1
+ jruby-9.0.4.0
data/CHANGELOG.md ADDED
@@ -0,0 +1,3 @@
1
+ ## 0.1.0 - 2016-07-07
2
+
3
+ The first release!!
data/Gemfile ADDED
@@ -0,0 +1,2 @@
1
+ source 'https://rubygems.org/'
2
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,21 @@
1
+
2
+ MIT License
3
+
4
+ Permission is hereby granted, free of charge, to any person obtaining
5
+ a copy of this software and associated documentation files (the
6
+ "Software"), to deal in the Software without restriction, including
7
+ without limitation the rights to use, copy, modify, merge, publish,
8
+ distribute, sublicense, and/or sell copies of the Software, and to
9
+ permit persons to whom the Software is furnished to do so, subject to
10
+ the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be
13
+ included in all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
19
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
20
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
21
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,58 @@
1
+ [![CircleCI](https://circleci.com/gh/treasure-data/embulk-input-google_analytics/tree/master.svg?style=svg)](https://circleci.com/gh/treasure-data/embulk-input-google_analytics/tree/master)
2
+ [![Code Climate](https://codeclimate.com/github/treasure-data/embulk-input-google_analytics/badges/gpa.svg)](https://codeclimate.com/github/treasure-data/embulk-input-google_analytics)
3
+ [![Test Coverage](https://codeclimate.com/github/treasure-data/embulk-input-google_analytics/badges/coverage.svg)](https://codeclimate.com/github/treasure-data/embulk-input-google_analytics/coverage)
4
+ [![Issue Count](https://codeclimate.com/github/treasure-data/embulk-input-google_analytics/badges/issue_count.svg)](https://codeclimate.com/github/treasure-data/embulk-input-google_analytics)
5
+ [![Gem Version](https://badge.fury.io/rb/embulk-input-google_analytics.svg)](https://badge.fury.io/rb/embulk-input-google_analytics)
6
+
7
+ # Google Analytics input plugin for Embulk
8
+
9
+ Embulk input plugin for Google Analytics reports.
10
+
11
+ ## Configuration
12
+
13
+ - **json_key_content**: See example config.
14
+ - **view_id**: View ID for target data. You can find it on [Google Analytics page (you need a permission to access Admin page](https://lucidpress.zendesk.com/hc/en-us/articles/207335356-Find-your-Google-Analytics-Tracking-ID-View-ID) (string, required)
15
+ - **time_series**: Only `ga:dateHour` or `ga:date` (string, required)
16
+ - **dimensions**: Target dimensions (array, default: `[]` )
17
+ - **metrics**: Target metrics (array, default: `[]` )
18
+ - **start_date**: Target report start date (string, default: [7 days ago](https://developers.google.com/analytics/devguides/reporting/core/v4/rest/v4/reports/batchGet#reportrequest))
19
+ - **end_date**: Target report end date (string, default: [1 day ago](https://developers.google.com/analytics/devguides/reporting/core/v4/rest/v4/reports/batchGet#reportrequest))
20
+
21
+ ## Example
22
+
23
+ ```yaml
24
+ in:
25
+ type: google_analytics
26
+ json_key_content: |
27
+ {
28
+ "type": "service_account",
29
+ "project_id": "....",
30
+ "private_key_id": "....",
31
+ "private_key": "-----BEGIN PRIVATE KEY-----\n..........................\n-----END PRIVATE KEY-----\n",
32
+ "client_email": ".....",
33
+ "client_id": ".........",
34
+ "auth_uri": "https://accounts.google.com/o/oauth2/auth",
35
+ "token_uri": "https://accounts.google.com/o/oauth2/token",
36
+ "auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs",
37
+ "client_x509_cert_url": ".........."
38
+ }
39
+ view_id: 123111111
40
+ time_series: "ga:dateHour" # hourly basis
41
+
42
+ # https://developers.google.com/analytics/devguides/reporting/core/dimsmets
43
+ dimensions:
44
+ - "ga:browser"
45
+ metrics:
46
+ - "ga:visits"
47
+ - "ga:pageviews"
48
+
49
+ start_date: "2016-06-27"
50
+ end_date: "2016-06-28"
51
+ ```
52
+
53
+
54
+ ## Build
55
+
56
+ ```
57
+ $ rake
58
+ ```
data/Rakefile ADDED
@@ -0,0 +1,15 @@
1
+ require "bundler/gem_tasks"
2
+
3
+ task default: :test
4
+
5
+ desc "Run tests"
6
+ task :test do
7
+ ruby("--debug", "test/run-test.rb", "--use-color=yes", "--collector=dir")
8
+ end
9
+
10
+ desc "Run tests with coverage"
11
+ task :cov do
12
+ ENV["COVERAGE"] = "1"
13
+ ruby("--debug", "test/run-test.rb", "--use-color=yes", "--collector=dir")
14
+ end
15
+
data/circle.yml ADDED
@@ -0,0 +1,23 @@
1
+ machine:
2
+ java:
3
+ version: oraclejdk8
4
+ ruby:
5
+ version: jruby-9.0.4.0
6
+ environment:
7
+ JRUBY_OPTS: "-Xcli.debug=true --debug"
8
+
9
+ dependencies:
10
+ pre:
11
+ - ruby -v
12
+ - bundle -v
13
+ - grep bundler <<< "$(gem list)" || gem i bundler --no-document
14
+ - bundle -v
15
+
16
+ test:
17
+ override:
18
+ - bundle exec rake cov
19
+
20
+ general:
21
+ artifacts:
22
+ - "coverage"
23
+
@@ -0,0 +1,29 @@
1
+
2
+ Gem::Specification.new do |spec|
3
+ spec.name = "embulk-input-google_analytics"
4
+ spec.version = "0.1.0"
5
+ spec.authors = ["uu59"]
6
+ spec.summary = "Google Analytics input plugin for Embulk"
7
+ spec.description = "Loads records from Google Analytics."
8
+ spec.email = ["k@uu59.org"]
9
+ spec.licenses = ["MIT"]
10
+ spec.homepage = "https://github.com/treasure-data/embulk-input-google_analytics"
11
+
12
+ spec.files = `git ls-files`.split("\n") + Dir["classpath/*.jar"]
13
+ spec.test_files = spec.files.grep(%r{^(test|spec)/})
14
+ spec.require_paths = ["lib"]
15
+
16
+ spec.add_dependency "httpclient"
17
+ spec.add_dependency "google-api-client", "~> 0.9"
18
+ spec.add_dependency "signet"
19
+ spec.add_dependency "activesupport" # for Time.zone.parse
20
+
21
+ spec.add_development_dependency 'embulk', ['>= 0.8.9']
22
+ spec.add_development_dependency 'bundler', ['>= 1.10.6']
23
+ spec.add_development_dependency 'rake', ['>= 10.0']
24
+ spec.add_development_dependency 'test-unit'
25
+ spec.add_development_dependency 'test-unit-rr'
26
+ spec.add_development_dependency 'simplecov'
27
+ spec.add_development_dependency "codeclimate-test-reporter"
28
+ spec.add_development_dependency "pry"
29
+ end
@@ -0,0 +1,160 @@
1
+ require "active_support/core_ext/time"
2
+ require "google/apis/analyticsreporting_v4"
3
+ require "google/apis/analytics_v3"
4
+
5
+ module Embulk
6
+ module Input
7
+ module GoogleAnalytics
8
+ class Client
9
+ attr_reader :task
10
+
11
+ def initialize(task, is_preview = false)
12
+ @task = task
13
+ @is_preview = is_preview
14
+ end
15
+
16
+ def preview?
17
+ @is_preview
18
+ end
19
+
20
+ def each_report_row(&block)
21
+ page_token = nil
22
+ Embulk.logger.info "view_id:#{view_id} timezone has been set as '#{get_profile[:timezone]}'"
23
+
24
+ loop do
25
+ result = get_reports(page_token)
26
+ report = result.to_h[:reports].first
27
+
28
+ unless page_token
29
+ # display for first request only
30
+ Embulk.logger.info "Total: #{report[:data][:row_count]} rows. Fetched first response"
31
+ end
32
+
33
+ if !report[:data].has_key?(:rows)
34
+ Embulk.logger.warn "Result doesn't contain rows."
35
+ break
36
+ end
37
+
38
+ if report[:data][:rows].empty?
39
+ Embulk.logger.warn "Result has 0 rows."
40
+ break
41
+ end
42
+
43
+ dimensions = report[:column_header][:dimensions]
44
+ metrics = report[:column_header][:metric_header][:metric_header_entries].map{|m| m[:name]}
45
+ report[:data][:rows].each do |row|
46
+ dim = dimensions.zip(row[:dimensions]).to_h
47
+ met = metrics.zip(row[:metrics].first[:values]).to_h
48
+ format_row = dim.merge(met)
49
+ time = format_row[task["time_series"]]
50
+ format_row[task["time_series"]] = time_parse_with_profile_timezone(time)
51
+ block.call format_row
52
+ end
53
+
54
+ break if preview?
55
+
56
+ unless page_token = report[:next_page_token]
57
+ break
58
+ end
59
+ Embulk.logger.info "Fetching report with page_token: #{page_token}"
60
+ end
61
+ end
62
+
63
+ def get_profile
64
+ @profile ||=
65
+ begin
66
+ profile = get_all_profiles.to_h[:items].find do |prof|
67
+ prof[:id] == view_id
68
+ end
69
+
70
+ unless profile
71
+ raise Embulk::ConfigError.new("Can't find view_id:#{view_id} profile via Google Analytics API.")
72
+ end
73
+
74
+ profile
75
+ end
76
+ end
77
+
78
+ def get_all_profiles
79
+ service = Google::Apis::AnalyticsV3::AnalyticsService.new
80
+ service.authorization = auth
81
+
82
+ Embulk.logger.debug "Fetching profile from API"
83
+ service.list_profiles("~all", "~all")
84
+ end
85
+
86
+ def time_parse_with_profile_timezone(time_string)
87
+ date_format =
88
+ case task["time_series"]
89
+ when "ga:dateHour"
90
+ "%Y%m%d%H"
91
+ when "ga:date"
92
+ "%Y%m%d"
93
+ end
94
+ parts = Date._strptime(time_string, date_format)
95
+
96
+ orig_timezone = Time.zone
97
+ Time.zone = get_profile[:timezone]
98
+ Time.zone.local(*parts.values_at(:year, :mon, :mday, :hour)).to_time
99
+ ensure
100
+ Time.zone = orig_timezone
101
+ end
102
+
103
+ def get_reports(page_token = nil)
104
+ # https://developers.google.com/analytics/devguides/reporting/core/v4/rest/v4/reports/batchGet
105
+ service = Google::Apis::AnalyticsreportingV4::AnalyticsReportingService.new
106
+ service.authorization = auth
107
+
108
+ request = Google::Apis::AnalyticsreportingV4::GetReportsRequest.new
109
+ request.report_requests = build_report_request(page_token)
110
+
111
+ Embulk.logger.info "Query to Core Report API: #{request.to_json}"
112
+ service.batch_get_reports request
113
+ end
114
+
115
+ def get_columns_list
116
+ # https://developers.google.com/analytics/devguides/reporting/metadata/v3/reference/metadata/columns/list
117
+ service = Google::Apis::AnalyticsV3::AnalyticsService.new
118
+ service.authorization = auth
119
+ service.list_metadata_columns("ga").to_h[:items]
120
+ end
121
+
122
+ def build_report_request(page_token = nil)
123
+ query = {
124
+ view_id: view_id,
125
+ dimensions: [{name: task["time_series"]}] + task["dimensions"].map{|d| {name: d}},
126
+ metrics: task["metrics"].map{|m| {expression: m}},
127
+ include_empty_rows: true,
128
+ page_size: preview? ? 10 : 10000,
129
+ }
130
+
131
+ if task["start_date"] || task["end_date"]
132
+ query[:date_ranges] = [{
133
+ start_date: task["start_date"],
134
+ end_date: task["end_date"],
135
+ }]
136
+ end
137
+
138
+ if page_token
139
+ query[:page_token] = page_token
140
+ end
141
+
142
+ [query]
143
+ end
144
+
145
+ def view_id
146
+ task["view_id"]
147
+ end
148
+
149
+ def auth
150
+ Google::Auth::ServiceAccountCredentials.make_creds(
151
+ json_key_io: StringIO.new(task["json_key_content"]),
152
+ scope: "https://www.googleapis.com/auth/analytics.readonly"
153
+ )
154
+ rescue => e
155
+ raise ConfigError.new(e.message)
156
+ end
157
+ end
158
+ end
159
+ end
160
+ end
@@ -0,0 +1,101 @@
1
+ module Embulk
2
+ module Input
3
+ module GoogleAnalytics
4
+ class Plugin < InputPlugin
5
+ ::Embulk::Plugin.register_input("google_analytics", self)
6
+
7
+ # https://developers.google.com/analytics/devguides/reporting/core/dimsmets
8
+
9
+ def self.transaction(config, &control)
10
+ task = task_from_config(config)
11
+ unless %w(ga:date ga:dateHour).include?(task["time_series"])
12
+ raise ConfigError.new("Unknown time_series '#{task["time_series"]}'. Use 'ga:dateHour' or 'ga:date'")
13
+ end
14
+ columns_list = Client.new(task).get_columns_list
15
+
16
+ columns = columns_from_task(task).map do |col_name|
17
+ col_info = columns_list.find{|col| col[:id] == col_name}
18
+ raise ConfigError.new("Unknown metric/dimension '#{col_name}'") unless col_info
19
+
20
+ col_type =
21
+ case col_info[:attributes][:dataType]
22
+ when "STRING"
23
+ :string
24
+ when "INTEGER"
25
+ :long
26
+ when "PERCENT", "FLOAT", "CURRENCY"
27
+ :double
28
+ when "TIME"
29
+ :timestamp
30
+ end
31
+
32
+ # time_series column should be timestamp
33
+ if col_name == task["time_series"]
34
+ col_type = :timestamp
35
+ end
36
+ Column.new(nil, canonicalize_column_name(col_name), col_type)
37
+ end
38
+
39
+ resume(task, columns, 1, &control)
40
+ end
41
+
42
+ def self.resume(task, columns, count, &control)
43
+ task_reports = yield(task, columns, count)
44
+
45
+ next_config_diff = {}
46
+ return next_config_diff
47
+ end
48
+
49
+ def self.task_from_config(config)
50
+ json_key_content = config.param("json_key_content", :string)
51
+ {
52
+ "json_key_content" => json_key_content,
53
+ "view_id" => config.param("view_id", :string),
54
+ "dimensions" => config.param("dimensions", :array, default: []),
55
+ "metrics" => config.param("metrics", :array, default: []),
56
+ "time_series" => config.param("time_series", :string),
57
+ "start_date" => config.param("start_date", :string, default: nil),
58
+ "end_date" => config.param("end_date", :string, default: nil),
59
+ }
60
+ end
61
+
62
+ def self.columns_from_task(task)
63
+ [
64
+ task["time_series"],
65
+ task["dimensions"],
66
+ task["metrics"],
67
+ ].flatten.uniq
68
+ end
69
+
70
+ def self.canonicalize_column_name(name)
71
+ # ga:dateHour -> date_hour
72
+ name.gsub(/^ga:/, "").gsub(/[A-Z]+/, "_\\0").gsub(/^_/, "").downcase
73
+ end
74
+
75
+ def init
76
+ end
77
+
78
+ def run
79
+ client = Client.new(task, preview?)
80
+ columns = self.class.columns_from_task(task)
81
+
82
+ client.each_report_row do |row|
83
+ values = row.values_at(*columns)
84
+ page_builder.add values
85
+ end
86
+ page_builder.finish
87
+
88
+ task_report = {}
89
+ return task_report
90
+ end
91
+
92
+ def preview?
93
+ org.embulk.spi.Exec.isPreview()
94
+ rescue java.lang.NullPointerException
95
+ false
96
+ end
97
+
98
+ end
99
+ end
100
+ end
101
+ end
@@ -0,0 +1,9 @@
1
+ require "embulk/input/google_analytics/client"
2
+ require "embulk/input/google_analytics/plugin"
3
+
4
+ module Embulk
5
+ module Input
6
+ module GoogleAnalytics
7
+ end
8
+ end
9
+ end
@@ -0,0 +1,290 @@
1
+ require "embulk"
2
+ Embulk.setup
3
+
4
+ require "embulk/input/google_analytics"
5
+ require "override_assert_raise"
6
+ require "fixture_helper"
7
+
8
+ module Embulk
9
+ module Input
10
+ module GoogleAnalytics
11
+ class TestClient < Test::Unit::TestCase
12
+ include OverrideAssertRaise
13
+ include FixtureHelper
14
+
15
+ sub_test_case "get_profile" do
16
+ setup do
17
+ conf = valid_config["in"]
18
+ @task = task(embulk_config(conf))
19
+ @client = Client.new(@task)
20
+ end
21
+
22
+ test "find view_id profile" do
23
+ target_profile = {
24
+ id: @task["view_id"],
25
+ }
26
+
27
+ mock(@client).get_all_profiles do
28
+ {
29
+ items: [
30
+ { id: 1 },
31
+ target_profile,
32
+ { id: 2 },
33
+ ]
34
+ }
35
+ end
36
+
37
+ assert_equal target_profile, @client.get_profile
38
+ end
39
+
40
+ test "raise ConfigError when view_id is not found" do
41
+ mock(@client).get_all_profiles do
42
+ {
43
+ items: [
44
+ { id: 1 },
45
+ { id: 2 },
46
+ ]
47
+ }
48
+ end
49
+
50
+ assert_raise(Embulk::ConfigError) do
51
+ @client.get_profile
52
+ end
53
+ end
54
+ end
55
+
56
+ sub_test_case "build_report_request" do
57
+ setup do
58
+ conf = valid_config["in"]
59
+ @task = task(embulk_config(conf))
60
+ @client = Client.new(@task)
61
+ end
62
+
63
+ test "page_token = nil" do
64
+ req = @client.build_report_request
65
+ expected = [
66
+ {
67
+ view_id: "101111111",
68
+ dimensions: [
69
+ {name: "ga:dateHour"}, {name: "ga:browser"}
70
+ ],
71
+ metrics: [
72
+ {expression: "ga:visits"}, {expression: "ga:pageviews"}
73
+ ],
74
+ include_empty_rows: true,
75
+ page_size: 10000
76
+ }
77
+ ]
78
+ assert_equal expected, req
79
+ end
80
+
81
+ test "page_token = 123" do
82
+ req = @client.build_report_request(123)
83
+ expected = [
84
+ {
85
+ view_id: "101111111",
86
+ dimensions: [
87
+ {name: "ga:dateHour"}, {name: "ga:browser"}
88
+ ],
89
+ metrics: [
90
+ {expression: "ga:visits"}, {expression: "ga:pageviews"}
91
+ ],
92
+ include_empty_rows: true,
93
+ page_size: 10000,
94
+ page_token: 123
95
+ }
96
+ ]
97
+ assert_equal expected, req
98
+ end
99
+
100
+ test "date range given" do
101
+ conf = valid_config["in"]
102
+ conf["start_date"] = "2000-01-01"
103
+ conf["end_date"] = "2000-01-07"
104
+ task = task(embulk_config(conf))
105
+ client = Client.new(task)
106
+ req = client.build_report_request
107
+
108
+ expected = [
109
+ {
110
+ view_id: "101111111",
111
+ dimensions: [
112
+ {name: "ga:dateHour"}, {name: "ga:browser"}
113
+ ],
114
+ metrics: [
115
+ {expression: "ga:visits"}, {expression: "ga:pageviews"}
116
+ ],
117
+ include_empty_rows: true,
118
+ page_size: 10000,
119
+ date_ranges: [
120
+ {
121
+ start_date: conf["start_date"],
122
+ end_date: conf["end_date"],
123
+ }
124
+ ]
125
+ }
126
+ ]
127
+ assert_equal expected, req
128
+ end
129
+ end
130
+
131
+ sub_test_case "time_parse_with_profile_timezone" do
132
+ setup do
133
+ conf = valid_config["in"]
134
+ conf["time_series"] = time_series
135
+ @client = Client.new(task(embulk_config(conf)))
136
+ end
137
+
138
+ sub_test_case "dateHour" do
139
+ setup do
140
+ stub(@client).get_profile { {timezone: "America/Los_Angeles" } }
141
+ end
142
+
143
+ test "in dst" do
144
+ time = @client.time_parse_with_profile_timezone("2016060122")
145
+ assert_equal Time.parse("2016-06-01 22:00:00 -07:00"), time
146
+ end
147
+
148
+ test "not in dst" do
149
+ time = @client.time_parse_with_profile_timezone("2016010122")
150
+ assert_equal Time.parse("2016-01-01 22:00:00 -08:00"), time
151
+ end
152
+
153
+ def time_series
154
+ "ga:dateHour"
155
+ end
156
+ end
157
+
158
+ sub_test_case "date" do
159
+ setup do
160
+ stub(@client).get_profile { {timezone: "America/Los_Angeles" } }
161
+ end
162
+
163
+ test "in dst" do
164
+ time = @client.time_parse_with_profile_timezone("20160601")
165
+ assert_equal Time.parse("2016-06-01 00:00:00 PDT"), time
166
+ end
167
+
168
+ test "not in dst" do
169
+ time = @client.time_parse_with_profile_timezone("2016010122")
170
+ assert_equal Time.parse("2016-01-01 00:00:00 PST"), time
171
+ end
172
+
173
+ def time_series
174
+ "ga:date"
175
+ end
176
+ end
177
+ end
178
+
179
+ sub_test_case "auth" do
180
+ setup do
181
+ conf = valid_config["in"]
182
+ @client = Client.new(task(embulk_config(conf)))
183
+ end
184
+
185
+ test "raise ConfigError when auth failed" do
186
+ stub(Google::Auth::ServiceAccountCredentials).make_creds { raise "some error" }
187
+ assert_raise(Embulk::ConfigError) do
188
+ @client.auth
189
+ end
190
+ end
191
+ end
192
+
193
+ sub_test_case "each_report_row" do
194
+ setup do
195
+ conf = valid_config["in"]
196
+ @client = Client.new(task(embulk_config(conf)))
197
+ stub(@client).get_profile { {timezone: "Asia/Tokyo"} }
198
+ @logger = Logger.new(File::NULL)
199
+ stub(Embulk).logger { @logger }
200
+ end
201
+
202
+ test "without pagination" do
203
+ stub(@client).get_reports { report }
204
+ fetched_rows = []
205
+ @client.each_report_row do |row|
206
+ fetched_rows << row
207
+ end
208
+
209
+ expected = [
210
+ {
211
+ "ga:dateHour" => @client.time_parse_with_profile_timezone("2016060120"),
212
+ "ga:browser" => "curl",
213
+ "ga:visits" => "1",
214
+ "ga:pageviews" => "1",
215
+ },
216
+ {
217
+ "ga:dateHour" => @client.time_parse_with_profile_timezone("2016060121"),
218
+ "ga:browser" => "curl",
219
+ "ga:visits" => "2",
220
+ "ga:pageviews" => "2",
221
+ },
222
+ {
223
+ "ga:dateHour" => @client.time_parse_with_profile_timezone("2016060122"),
224
+ "ga:browser" => "curl",
225
+ "ga:visits" => "3",
226
+ "ga:pageviews" => "3",
227
+ },
228
+ ]
229
+ assert_equal expected, fetched_rows
230
+ end
231
+
232
+ test "with pagination" do
233
+ next_page_token = "10000"
234
+ mock(@client).get_reports(nil) { report_with_pages }
235
+ mock(@client).get_reports(next_page_token) { report }
236
+ fetched_rows = []
237
+ @client.each_report_row do |row|
238
+ fetched_rows << row
239
+ end
240
+ assert_equal 6, fetched_rows.length
241
+ end
242
+
243
+ sub_test_case "logger" do
244
+ test "with empty rows" do
245
+ response = report.dup
246
+ response[:reports].first[:data][:rows] = []
247
+ response[:reports].first[:data][:row_count] = 0
248
+ stub(@client).get_reports { response }
249
+
250
+ mock(@logger).warn("Result has 0 rows.")
251
+ @client.each_report_row {}
252
+ end
253
+
254
+ test "without rows" do
255
+ response = report.dup
256
+ response[:reports].first[:data].delete(:rows)
257
+ stub(@client).get_reports { response }
258
+
259
+ mock(@logger).warn("Result doesn't contain rows.")
260
+ @client.each_report_row {}
261
+ end
262
+ end
263
+
264
+ def report_with_pages
265
+ response = report.dup
266
+ response[:reports].first[:next_page_token] = "10000"
267
+ response
268
+ end
269
+
270
+ def report
271
+ json = fixture_read("reports.json")
272
+ JSON.parse(json, symbolize_names: true)
273
+ end
274
+ end
275
+
276
+ def task(config)
277
+ Plugin.task_from_config(config)
278
+ end
279
+
280
+ def valid_config
281
+ fixture_load("valid.yml")
282
+ end
283
+
284
+ def embulk_config(hash)
285
+ Embulk::DataSource.new(hash)
286
+ end
287
+ end
288
+ end
289
+ end
290
+ end
@@ -0,0 +1,214 @@
1
+ require "embulk"
2
+ Embulk.setup
3
+
4
+ require "embulk/input/google_analytics"
5
+ require "override_assert_raise"
6
+ require "fixture_helper"
7
+
8
+ module Embulk
9
+ module Input
10
+ module GoogleAnalytics
11
+ class TestPlugin < Test::Unit::TestCase
12
+ include OverrideAssertRaise
13
+ include FixtureHelper
14
+
15
+ sub_test_case ".transaction" do
16
+ setup do
17
+ any_instance_of(Client) do |klass|
18
+ stub(klass).get_columns_list do
19
+ [
20
+ {id: "ga:dateHour", attributes: {dataType: "STRING"}},
21
+ {id: "ga:date", attributes: {dataType: "STRING"}},
22
+ {id: "ga:browser", attributes: {dataType: "STRING"}},
23
+ {id: "ga:visits", attributes: {dataType: "INTEGER"}},
24
+ {id: "ga:pageviews", attributes: {dataType: "INTEGER"}},
25
+ ]
26
+ end
27
+ end
28
+ end
29
+
30
+ test "not raised exception" do
31
+ stub(Plugin).resume { Hash.new }
32
+ assert_nothing_raised do
33
+ Plugin.transaction(embulk_config(valid_config["in"]))
34
+ end
35
+ end
36
+
37
+ test "assemble expected columns" do
38
+ columns = [
39
+ Embulk::Column.new(nil, "date_hour", :timestamp),
40
+ Embulk::Column.new(nil, "browser", :string),
41
+ Embulk::Column.new(nil, "visits", :long),
42
+ Embulk::Column.new(nil, "pageviews", :long),
43
+ ]
44
+ mock(Plugin).resume(anything, columns, 1)
45
+ Plugin.transaction(embulk_config(valid_config["in"]))
46
+ end
47
+
48
+ sub_test_case "raise error when unknown column given" do
49
+ setup { stub(Plugin).resume { Hash.new } }
50
+
51
+ test "for dimensions" do
52
+ conf = valid_config["in"]
53
+ conf["dimensions"] << unknown_col_name
54
+ assert_raise(Embulk::ConfigError.new(expected_message)) do
55
+ Plugin.transaction(embulk_config(conf))
56
+ end
57
+ end
58
+
59
+ test "for metrics" do
60
+ conf = valid_config["in"]
61
+ conf["metrics"] << unknown_col_name
62
+ assert_raise(Embulk::ConfigError.new(expected_message)) do
63
+ Plugin.transaction(embulk_config(conf))
64
+ end
65
+ end
66
+
67
+ test "for time_series" do
68
+ conf = valid_config["in"]
69
+ conf["time_series"] = unknown_col_name
70
+ message = "Unknown time_series 'ga:foooooo'. Use 'ga:dateHour' or 'ga:date'"
71
+ assert_raise(Embulk::ConfigError.new(message)) do
72
+ Plugin.transaction(embulk_config(conf))
73
+ end
74
+ end
75
+
76
+ def unknown_col_name
77
+ "ga:foooooo"
78
+ end
79
+
80
+ def expected_message
81
+ "Unknown metric/dimension '#{unknown_col_name}'"
82
+ end
83
+ end
84
+
85
+ sub_test_case "type conversion" do
86
+ setup do
87
+ any_instance_of(Client) do |klass|
88
+ stub(klass).get_columns_list do
89
+ [
90
+ {id: "ga:dateHour", attributes: {dataType: "STRING"}},
91
+ {id: "ga:itemsPerPurchase", attributes: {dataType: "FLOAT"}},
92
+ {id: "ga:visits", attributes: {dataType: "INTEGER"}},
93
+ {id: "ga:CPM", attributes: {dataType: "CURRENCY"}},
94
+ {id: "ga:CTR", attributes: {dataType: "PERCENT"}},
95
+ {id: "ga:sessionDuration", attributes: {dataType: "TIME"}},
96
+ ]
97
+ end
98
+ end
99
+ end
100
+
101
+ test "Convert Embulk data types" do
102
+ conf = valid_config["in"]
103
+ conf["dimensions"] = []
104
+ conf["metrics"] = [
105
+ "ga:sessionDuration",
106
+ "ga:CPM",
107
+ "ga:CTR",
108
+ "ga:visits",
109
+ "ga:itemsPerPurchase",
110
+ ]
111
+ expected_columns = [
112
+ Column.new(nil, "date_hour", :timestamp),
113
+ Column.new(nil, "session_duration", :timestamp),
114
+ Column.new(nil, "cpm", :double),
115
+ Column.new(nil, "ctr", :double),
116
+ Column.new(nil, "visits", :long),
117
+ Column.new(nil, "items_per_purchase", :double),
118
+ ]
119
+
120
+ mock(Plugin).resume(anything, expected_columns, anything)
121
+ Plugin.transaction(embulk_config(conf))
122
+ end
123
+ end
124
+ end
125
+
126
+ sub_test_case ".run" do
127
+ sub_test_case "returned value should be added into page_builder" do
128
+ setup do
129
+ @page_builder = Object.new
130
+ conf = valid_config["in"]
131
+ conf["time_series"] = time_series
132
+ @plugin = Plugin.new(embulk_config(conf), nil, nil, @page_builder)
133
+ end
134
+
135
+ sub_test_case "time_series: 'ga:dateHour'" do
136
+ def time_series
137
+ "ga:dateHour"
138
+ end
139
+
140
+ test "HH:00:00 time given" do
141
+ Time.zone = "America/Los_Angeles"
142
+ time = Time.zone.parse("2016-06-01 12:00:00").to_time
143
+ any_instance_of(Client) do |klass|
144
+ stub(klass).each_report_row do |block|
145
+ row = {
146
+ "ga:dateHour" => time,
147
+ "ga:browser" => "wget",
148
+ "ga:visits" => 3,
149
+ "ga:pageviews" => 4,
150
+ }
151
+ block.call row
152
+ end
153
+ end
154
+
155
+ mock(@page_builder).add([time, "wget", 3, 4])
156
+ mock(@page_builder).finish
157
+ @plugin.run
158
+ end
159
+ end
160
+
161
+ sub_test_case "time_series: 'ga:date'" do
162
+ def time_series
163
+ "ga:date"
164
+ end
165
+
166
+ test "00:00:00 time given" do
167
+ Time.zone = "America/Los_Angeles"
168
+ time = Time.zone.parse("2016-06-01 00:00:00").to_time
169
+ any_instance_of(Client) do |klass|
170
+ stub(klass).each_report_row do |block|
171
+ row = {
172
+ "ga:date" => time,
173
+ "ga:browser" => "wget",
174
+ "ga:visits" => 3,
175
+ "ga:pageviews" => 4,
176
+ }
177
+ block.call row
178
+ end
179
+ end
180
+
181
+ mock(@page_builder).add([time, "wget", 3, 4])
182
+ mock(@page_builder).finish
183
+ @plugin.run
184
+ end
185
+ end
186
+ end
187
+ end
188
+
189
+ sub_test_case "canonicalize_column_name" do
190
+ data do
191
+ [
192
+ ["typical", ["ga:dateHour", "date_hour"]],
193
+ ["all capital", ["ga:CPM", "cpm"]],
194
+ ["capitals with word", ["ga:goalXXValue", "goal_xxvalue"]],
195
+ ["ID", ["ga:adwordsCustomerID", "adwords_customer_id"]],
196
+ ["word + capitals", ["ga:dcmCTR", "dcm_ctr"]],
197
+ ]
198
+ end
199
+ test "converting" do |(target, expected)|
200
+ assert_equal expected, Plugin.canonicalize_column_name(target)
201
+ end
202
+ end
203
+
204
+ def valid_config
205
+ fixture_load("valid.yml")
206
+ end
207
+
208
+ def embulk_config(hash)
209
+ Embulk::DataSource.new(hash)
210
+ end
211
+ end
212
+ end
213
+ end
214
+ end
@@ -0,0 +1,16 @@
1
+ require "yaml"
2
+ require "pathname"
3
+
4
+ module FixtureHelper
5
+ def fixture_dir
6
+ Pathname.new(__FILE__).dirname.join("fixtures")
7
+ end
8
+
9
+ def fixture_read(name)
10
+ fixture_dir.join(name).read
11
+ end
12
+
13
+ def fixture_load(name)
14
+ YAML.load fixture_read(name)
15
+ end
16
+ end
@@ -0,0 +1,71 @@
1
+ {
2
+ "reports": [
3
+ {
4
+ "column_header": {
5
+ "dimensions": [
6
+ "ga:dateHour",
7
+ "ga:browser"
8
+ ],
9
+ "metric_header": {
10
+ "metric_header_entries": [
11
+ {
12
+ "type": "INTEGER",
13
+ "name": "ga:visits"
14
+ },
15
+ {
16
+ "type": "INTEGER",
17
+ "name": "ga:pageviews"
18
+ }
19
+ ]
20
+ }
21
+ },
22
+ "data": {
23
+ "row_count": 3,
24
+ "rows": [
25
+ {
26
+ "metrics": [
27
+ {
28
+ "values": [
29
+ "1",
30
+ "1"
31
+ ]
32
+ }
33
+ ],
34
+ "dimensions": [
35
+ "2016060120",
36
+ "curl"
37
+ ]
38
+ },
39
+ {
40
+ "metrics": [
41
+ {
42
+ "values": [
43
+ "2",
44
+ "2"
45
+ ]
46
+ }
47
+ ],
48
+ "dimensions": [
49
+ "2016060121",
50
+ "curl"
51
+ ]
52
+ },
53
+ {
54
+ "metrics": [
55
+ {
56
+ "values": [
57
+ "3",
58
+ "3"
59
+ ]
60
+ }
61
+ ],
62
+ "dimensions": [
63
+ "2016060122",
64
+ "curl"
65
+ ]
66
+ }
67
+ ]
68
+ }
69
+ }
70
+ ]
71
+ }
@@ -0,0 +1,17 @@
1
+ in:
2
+ type: google_analytics
3
+ json_key_content: |
4
+ {
5
+ "type": "service_account"
6
+ }
7
+ view_id: 101111111
8
+ time_series: "ga:dateHour"
9
+
10
+ dimensions:
11
+ - "ga:browser"
12
+ metrics:
13
+ - "ga:visits"
14
+ - "ga:pageviews"
15
+
16
+ out:
17
+ type: stdout
@@ -0,0 +1,24 @@
1
+ module OverrideAssertRaise
2
+ # NOTE: Embulk 0.7.1+ required to raise ConfigError to do as `ConfigError.new("message")`,
3
+ # original `assert_raise` method can't catch that, but `begin .. rescue` can.
4
+ # So we override assert_raise as below.
5
+ def assert_raise(expected_class_or_instance = StandardError, &block)
6
+ begin
7
+ expected_class = expected_class_or_instance.is_a?(Class) ? expected_class_or_instance : expected_class_or_instance.class
8
+ block.call
9
+ assert_equal expected_class, nil
10
+ rescue ::Test::Unit::AssertionFailedError => e
11
+ # failed assert raises this Error and that extends StandardError, so rescue it first
12
+ raise e
13
+ rescue expected_class => e
14
+ # https://github.com/test-unit/test-unit/issues/94
15
+ if expected_class_or_instance == expected_class
16
+ assert true # passed
17
+ else
18
+ assert_equal expected_class_or_instance.message, e.message
19
+ end
20
+ rescue => e
21
+ assert_equal(expected_class, e.class) # not expected one raised
22
+ end
23
+ end
24
+ end
data/test/run-test.rb ADDED
@@ -0,0 +1,25 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ base_dir = File.expand_path(File.join(File.dirname(__FILE__), ".."))
4
+ lib_dir = File.join(base_dir, "lib")
5
+ test_dir = File.join(base_dir, "test")
6
+
7
+ require "test-unit"
8
+ require "test/unit/rr"
9
+
10
+ $LOAD_PATH.unshift(lib_dir)
11
+ $LOAD_PATH.unshift(test_dir)
12
+
13
+ ENV["TEST_UNIT_MAX_DIFF_TARGET_STRING_SIZE"] ||= "5000"
14
+
15
+ if ENV["COVERAGE"]
16
+ if ENV["CI"]
17
+ require "codeclimate-test-reporter"
18
+ CodeClimate::TestReporter.start
19
+ else
20
+ require 'simplecov'
21
+ SimpleCov.start 'test_frameworks'
22
+ end
23
+ end
24
+
25
+ exit Test::Unit::AutoRunner.run(true, test_dir)
metadata ADDED
@@ -0,0 +1,238 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: embulk-input-google_analytics
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - uu59
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2016-07-07 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ requirement: !ruby/object:Gem::Requirement
15
+ requirements:
16
+ - - ">="
17
+ - !ruby/object:Gem::Version
18
+ version: '0'
19
+ name: httpclient
20
+ prerelease: false
21
+ type: :runtime
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
27
+ - !ruby/object:Gem::Dependency
28
+ requirement: !ruby/object:Gem::Requirement
29
+ requirements:
30
+ - - "~>"
31
+ - !ruby/object:Gem::Version
32
+ version: '0.9'
33
+ name: google-api-client
34
+ prerelease: false
35
+ type: :runtime
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '0.9'
41
+ - !ruby/object:Gem::Dependency
42
+ requirement: !ruby/object:Gem::Requirement
43
+ requirements:
44
+ - - ">="
45
+ - !ruby/object:Gem::Version
46
+ version: '0'
47
+ name: signet
48
+ prerelease: false
49
+ type: :runtime
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ - !ruby/object:Gem::Dependency
56
+ requirement: !ruby/object:Gem::Requirement
57
+ requirements:
58
+ - - ">="
59
+ - !ruby/object:Gem::Version
60
+ version: '0'
61
+ name: activesupport
62
+ prerelease: false
63
+ type: :runtime
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ - !ruby/object:Gem::Dependency
70
+ requirement: !ruby/object:Gem::Requirement
71
+ requirements:
72
+ - - ">="
73
+ - !ruby/object:Gem::Version
74
+ version: 0.8.9
75
+ name: embulk
76
+ prerelease: false
77
+ type: :development
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - ">="
81
+ - !ruby/object:Gem::Version
82
+ version: 0.8.9
83
+ - !ruby/object:Gem::Dependency
84
+ requirement: !ruby/object:Gem::Requirement
85
+ requirements:
86
+ - - ">="
87
+ - !ruby/object:Gem::Version
88
+ version: 1.10.6
89
+ name: bundler
90
+ prerelease: false
91
+ type: :development
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - ">="
95
+ - !ruby/object:Gem::Version
96
+ version: 1.10.6
97
+ - !ruby/object:Gem::Dependency
98
+ requirement: !ruby/object:Gem::Requirement
99
+ requirements:
100
+ - - ">="
101
+ - !ruby/object:Gem::Version
102
+ version: '10.0'
103
+ name: rake
104
+ prerelease: false
105
+ type: :development
106
+ version_requirements: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - ">="
109
+ - !ruby/object:Gem::Version
110
+ version: '10.0'
111
+ - !ruby/object:Gem::Dependency
112
+ requirement: !ruby/object:Gem::Requirement
113
+ requirements:
114
+ - - ">="
115
+ - !ruby/object:Gem::Version
116
+ version: '0'
117
+ name: test-unit
118
+ prerelease: false
119
+ type: :development
120
+ version_requirements: !ruby/object:Gem::Requirement
121
+ requirements:
122
+ - - ">="
123
+ - !ruby/object:Gem::Version
124
+ version: '0'
125
+ - !ruby/object:Gem::Dependency
126
+ requirement: !ruby/object:Gem::Requirement
127
+ requirements:
128
+ - - ">="
129
+ - !ruby/object:Gem::Version
130
+ version: '0'
131
+ name: test-unit-rr
132
+ prerelease: false
133
+ type: :development
134
+ version_requirements: !ruby/object:Gem::Requirement
135
+ requirements:
136
+ - - ">="
137
+ - !ruby/object:Gem::Version
138
+ version: '0'
139
+ - !ruby/object:Gem::Dependency
140
+ requirement: !ruby/object:Gem::Requirement
141
+ requirements:
142
+ - - ">="
143
+ - !ruby/object:Gem::Version
144
+ version: '0'
145
+ name: simplecov
146
+ prerelease: false
147
+ type: :development
148
+ version_requirements: !ruby/object:Gem::Requirement
149
+ requirements:
150
+ - - ">="
151
+ - !ruby/object:Gem::Version
152
+ version: '0'
153
+ - !ruby/object:Gem::Dependency
154
+ requirement: !ruby/object:Gem::Requirement
155
+ requirements:
156
+ - - ">="
157
+ - !ruby/object:Gem::Version
158
+ version: '0'
159
+ name: codeclimate-test-reporter
160
+ prerelease: false
161
+ type: :development
162
+ version_requirements: !ruby/object:Gem::Requirement
163
+ requirements:
164
+ - - ">="
165
+ - !ruby/object:Gem::Version
166
+ version: '0'
167
+ - !ruby/object:Gem::Dependency
168
+ requirement: !ruby/object:Gem::Requirement
169
+ requirements:
170
+ - - ">="
171
+ - !ruby/object:Gem::Version
172
+ version: '0'
173
+ name: pry
174
+ prerelease: false
175
+ type: :development
176
+ version_requirements: !ruby/object:Gem::Requirement
177
+ requirements:
178
+ - - ">="
179
+ - !ruby/object:Gem::Version
180
+ version: '0'
181
+ description: Loads records from Google Analytics.
182
+ email:
183
+ - k@uu59.org
184
+ executables: []
185
+ extensions: []
186
+ extra_rdoc_files: []
187
+ files:
188
+ - ".gitignore"
189
+ - ".ruby-version"
190
+ - CHANGELOG.md
191
+ - Gemfile
192
+ - LICENSE.txt
193
+ - README.md
194
+ - Rakefile
195
+ - circle.yml
196
+ - embulk-input-google_analytics.gemspec
197
+ - lib/embulk/input/google_analytics.rb
198
+ - lib/embulk/input/google_analytics/client.rb
199
+ - lib/embulk/input/google_analytics/plugin.rb
200
+ - test/embulk/input/google_analytics/test_client.rb
201
+ - test/embulk/input/google_analytics/test_plugin.rb
202
+ - test/fixture_helper.rb
203
+ - test/fixtures/reports.json
204
+ - test/fixtures/valid.yml
205
+ - test/override_assert_raise.rb
206
+ - test/run-test.rb
207
+ homepage: https://github.com/treasure-data/embulk-input-google_analytics
208
+ licenses:
209
+ - MIT
210
+ metadata: {}
211
+ post_install_message:
212
+ rdoc_options: []
213
+ require_paths:
214
+ - lib
215
+ required_ruby_version: !ruby/object:Gem::Requirement
216
+ requirements:
217
+ - - ">="
218
+ - !ruby/object:Gem::Version
219
+ version: '0'
220
+ required_rubygems_version: !ruby/object:Gem::Requirement
221
+ requirements:
222
+ - - ">="
223
+ - !ruby/object:Gem::Version
224
+ version: '0'
225
+ requirements: []
226
+ rubyforge_project:
227
+ rubygems_version: 2.4.8
228
+ signing_key:
229
+ specification_version: 4
230
+ summary: Google Analytics input plugin for Embulk
231
+ test_files:
232
+ - test/embulk/input/google_analytics/test_client.rb
233
+ - test/embulk/input/google_analytics/test_plugin.rb
234
+ - test/fixture_helper.rb
235
+ - test/fixtures/reports.json
236
+ - test/fixtures/valid.yml
237
+ - test/override_assert_raise.rb
238
+ - test/run-test.rb