embulk-input-google_analytics 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: f461f4cc05a23ba6ed9b53e23c826fc69b7c82c4
4
+ data.tar.gz: d73dea5ede7fb7ce0b348409808e2a7036392fa7
5
+ SHA512:
6
+ metadata.gz: 5d014c2caf49c2b73c325433cb55580581184634b1a4b7dbff9d7030278f8b569b2bcf5d934e0638f92b8114ac3a0f6e1dd8df7fff0747ff4d51ab855af534ec
7
+ data.tar.gz: 3f36eed0bb7b8926d7aeecb7902eb0631e6a31f86e91e81f041b2e97a5894abb1a9bc47329b1acd9d96d80ed482a6915dfce4561fdb406ad37b2c6ed582d8d42
data/.gitignore ADDED
@@ -0,0 +1,6 @@
1
+ *~
2
+ /pkg/
3
+ /tmp/
4
+ /.bundle/
5
+ /Gemfile.lock
6
+ /coverage/
data/.ruby-version ADDED
@@ -0,0 +1 @@
1
+ jruby-9.0.4.0
data/CHANGELOG.md ADDED
@@ -0,0 +1,3 @@
1
+ ## 0.1.0 - 2016-07-07
2
+
3
+ The first release!!
data/Gemfile ADDED
@@ -0,0 +1,2 @@
1
+ source 'https://rubygems.org/'
2
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,21 @@
1
+
2
+ MIT License
3
+
4
+ Permission is hereby granted, free of charge, to any person obtaining
5
+ a copy of this software and associated documentation files (the
6
+ "Software"), to deal in the Software without restriction, including
7
+ without limitation the rights to use, copy, modify, merge, publish,
8
+ distribute, sublicense, and/or sell copies of the Software, and to
9
+ permit persons to whom the Software is furnished to do so, subject to
10
+ the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be
13
+ included in all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
19
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
20
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
21
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,58 @@
1
+ [![CircleCI](https://circleci.com/gh/treasure-data/embulk-input-google_analytics/tree/master.svg?style=svg)](https://circleci.com/gh/treasure-data/embulk-input-google_analytics/tree/master)
2
+ [![Code Climate](https://codeclimate.com/github/treasure-data/embulk-input-google_analytics/badges/gpa.svg)](https://codeclimate.com/github/treasure-data/embulk-input-google_analytics)
3
+ [![Test Coverage](https://codeclimate.com/github/treasure-data/embulk-input-google_analytics/badges/coverage.svg)](https://codeclimate.com/github/treasure-data/embulk-input-google_analytics/coverage)
4
+ [![Issue Count](https://codeclimate.com/github/treasure-data/embulk-input-google_analytics/badges/issue_count.svg)](https://codeclimate.com/github/treasure-data/embulk-input-google_analytics)
5
+ [![Gem Version](https://badge.fury.io/rb/embulk-input-google_analytics.svg)](https://badge.fury.io/rb/embulk-input-google_analytics)
6
+
7
+ # Google Analytics input plugin for Embulk
8
+
9
+ Embulk input plugin for Google Analytics reports.
10
+
11
+ ## Configuration
12
+
13
+ - **json_key_content**: See example config.
14
+ - **view_id**: View ID for target data. You can find it on [Google Analytics page (you need a permission to access Admin page](https://lucidpress.zendesk.com/hc/en-us/articles/207335356-Find-your-Google-Analytics-Tracking-ID-View-ID) (string, required)
15
+ - **time_series**: Only `ga:dateHour` or `ga:date` (string, required)
16
+ - **dimensions**: Target dimensions (array, default: `[]` )
17
+ - **metrics**: Target metrics (array, default: `[]` )
18
+ - **start_date**: Target report start date (string, default: [7 days ago](https://developers.google.com/analytics/devguides/reporting/core/v4/rest/v4/reports/batchGet#reportrequest))
19
+ - **end_date**: Target report end date (string, default: [1 day ago](https://developers.google.com/analytics/devguides/reporting/core/v4/rest/v4/reports/batchGet#reportrequest))
20
+
21
+ ## Example
22
+
23
+ ```yaml
24
+ in:
25
+ type: google_analytics
26
+ json_key_content: |
27
+ {
28
+ "type": "service_account",
29
+ "project_id": "....",
30
+ "private_key_id": "....",
31
+ "private_key": "-----BEGIN PRIVATE KEY-----\n..........................\n-----END PRIVATE KEY-----\n",
32
+ "client_email": ".....",
33
+ "client_id": ".........",
34
+ "auth_uri": "https://accounts.google.com/o/oauth2/auth",
35
+ "token_uri": "https://accounts.google.com/o/oauth2/token",
36
+ "auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs",
37
+ "client_x509_cert_url": ".........."
38
+ }
39
+ view_id: 123111111
40
+ time_series: "ga:dateHour" # hourly basis
41
+
42
+ # https://developers.google.com/analytics/devguides/reporting/core/dimsmets
43
+ dimensions:
44
+ - "ga:browser"
45
+ metrics:
46
+ - "ga:visits"
47
+ - "ga:pageviews"
48
+
49
+ start_date: "2016-06-27"
50
+ end_date: "2016-06-28"
51
+ ```
52
+
53
+
54
+ ## Build
55
+
56
+ ```
57
+ $ rake
58
+ ```
data/Rakefile ADDED
@@ -0,0 +1,15 @@
1
+ require "bundler/gem_tasks"
2
+
3
+ task default: :test
4
+
5
+ desc "Run tests"
6
+ task :test do
7
+ ruby("--debug", "test/run-test.rb", "--use-color=yes", "--collector=dir")
8
+ end
9
+
10
+ desc "Run tests with coverage"
11
+ task :cov do
12
+ ENV["COVERAGE"] = "1"
13
+ ruby("--debug", "test/run-test.rb", "--use-color=yes", "--collector=dir")
14
+ end
15
+
data/circle.yml ADDED
@@ -0,0 +1,23 @@
1
+ machine:
2
+ java:
3
+ version: oraclejdk8
4
+ ruby:
5
+ version: jruby-9.0.4.0
6
+ environment:
7
+ JRUBY_OPTS: "-Xcli.debug=true --debug"
8
+
9
+ dependencies:
10
+ pre:
11
+ - ruby -v
12
+ - bundle -v
13
+ - grep bundler <<< "$(gem list)" || gem i bundler --no-document
14
+ - bundle -v
15
+
16
+ test:
17
+ override:
18
+ - bundle exec rake cov
19
+
20
+ general:
21
+ artifacts:
22
+ - "coverage"
23
+
@@ -0,0 +1,29 @@
1
+
2
+ Gem::Specification.new do |spec|
3
+ spec.name = "embulk-input-google_analytics"
4
+ spec.version = "0.1.0"
5
+ spec.authors = ["uu59"]
6
+ spec.summary = "Google Analytics input plugin for Embulk"
7
+ spec.description = "Loads records from Google Analytics."
8
+ spec.email = ["k@uu59.org"]
9
+ spec.licenses = ["MIT"]
10
+ spec.homepage = "https://github.com/treasure-data/embulk-input-google_analytics"
11
+
12
+ spec.files = `git ls-files`.split("\n") + Dir["classpath/*.jar"]
13
+ spec.test_files = spec.files.grep(%r{^(test|spec)/})
14
+ spec.require_paths = ["lib"]
15
+
16
+ spec.add_dependency "httpclient"
17
+ spec.add_dependency "google-api-client", "~> 0.9"
18
+ spec.add_dependency "signet"
19
+ spec.add_dependency "activesupport" # for Time.zone.parse
20
+
21
+ spec.add_development_dependency 'embulk', ['>= 0.8.9']
22
+ spec.add_development_dependency 'bundler', ['>= 1.10.6']
23
+ spec.add_development_dependency 'rake', ['>= 10.0']
24
+ spec.add_development_dependency 'test-unit'
25
+ spec.add_development_dependency 'test-unit-rr'
26
+ spec.add_development_dependency 'simplecov'
27
+ spec.add_development_dependency "codeclimate-test-reporter"
28
+ spec.add_development_dependency "pry"
29
+ end
@@ -0,0 +1,160 @@
1
+ require "active_support/core_ext/time"
2
+ require "google/apis/analyticsreporting_v4"
3
+ require "google/apis/analytics_v3"
4
+
5
+ module Embulk
6
+ module Input
7
+ module GoogleAnalytics
8
+ class Client
9
+ attr_reader :task
10
+
11
+ def initialize(task, is_preview = false)
12
+ @task = task
13
+ @is_preview = is_preview
14
+ end
15
+
16
+ def preview?
17
+ @is_preview
18
+ end
19
+
20
+ def each_report_row(&block)
21
+ page_token = nil
22
+ Embulk.logger.info "view_id:#{view_id} timezone has been set as '#{get_profile[:timezone]}'"
23
+
24
+ loop do
25
+ result = get_reports(page_token)
26
+ report = result.to_h[:reports].first
27
+
28
+ unless page_token
29
+ # display for first request only
30
+ Embulk.logger.info "Total: #{report[:data][:row_count]} rows. Fetched first response"
31
+ end
32
+
33
+ if !report[:data].has_key?(:rows)
34
+ Embulk.logger.warn "Result doesn't contain rows."
35
+ break
36
+ end
37
+
38
+ if report[:data][:rows].empty?
39
+ Embulk.logger.warn "Result has 0 rows."
40
+ break
41
+ end
42
+
43
+ dimensions = report[:column_header][:dimensions]
44
+ metrics = report[:column_header][:metric_header][:metric_header_entries].map{|m| m[:name]}
45
+ report[:data][:rows].each do |row|
46
+ dim = dimensions.zip(row[:dimensions]).to_h
47
+ met = metrics.zip(row[:metrics].first[:values]).to_h
48
+ format_row = dim.merge(met)
49
+ time = format_row[task["time_series"]]
50
+ format_row[task["time_series"]] = time_parse_with_profile_timezone(time)
51
+ block.call format_row
52
+ end
53
+
54
+ break if preview?
55
+
56
+ unless page_token = report[:next_page_token]
57
+ break
58
+ end
59
+ Embulk.logger.info "Fetching report with page_token: #{page_token}"
60
+ end
61
+ end
62
+
63
+ def get_profile
64
+ @profile ||=
65
+ begin
66
+ profile = get_all_profiles.to_h[:items].find do |prof|
67
+ prof[:id] == view_id
68
+ end
69
+
70
+ unless profile
71
+ raise Embulk::ConfigError.new("Can't find view_id:#{view_id} profile via Google Analytics API.")
72
+ end
73
+
74
+ profile
75
+ end
76
+ end
77
+
78
+ def get_all_profiles
79
+ service = Google::Apis::AnalyticsV3::AnalyticsService.new
80
+ service.authorization = auth
81
+
82
+ Embulk.logger.debug "Fetching profile from API"
83
+ service.list_profiles("~all", "~all")
84
+ end
85
+
86
+ def time_parse_with_profile_timezone(time_string)
87
+ date_format =
88
+ case task["time_series"]
89
+ when "ga:dateHour"
90
+ "%Y%m%d%H"
91
+ when "ga:date"
92
+ "%Y%m%d"
93
+ end
94
+ parts = Date._strptime(time_string, date_format)
95
+
96
+ orig_timezone = Time.zone
97
+ Time.zone = get_profile[:timezone]
98
+ Time.zone.local(*parts.values_at(:year, :mon, :mday, :hour)).to_time
99
+ ensure
100
+ Time.zone = orig_timezone
101
+ end
102
+
103
+ def get_reports(page_token = nil)
104
+ # https://developers.google.com/analytics/devguides/reporting/core/v4/rest/v4/reports/batchGet
105
+ service = Google::Apis::AnalyticsreportingV4::AnalyticsReportingService.new
106
+ service.authorization = auth
107
+
108
+ request = Google::Apis::AnalyticsreportingV4::GetReportsRequest.new
109
+ request.report_requests = build_report_request(page_token)
110
+
111
+ Embulk.logger.info "Query to Core Report API: #{request.to_json}"
112
+ service.batch_get_reports request
113
+ end
114
+
115
+ def get_columns_list
116
+ # https://developers.google.com/analytics/devguides/reporting/metadata/v3/reference/metadata/columns/list
117
+ service = Google::Apis::AnalyticsV3::AnalyticsService.new
118
+ service.authorization = auth
119
+ service.list_metadata_columns("ga").to_h[:items]
120
+ end
121
+
122
+ def build_report_request(page_token = nil)
123
+ query = {
124
+ view_id: view_id,
125
+ dimensions: [{name: task["time_series"]}] + task["dimensions"].map{|d| {name: d}},
126
+ metrics: task["metrics"].map{|m| {expression: m}},
127
+ include_empty_rows: true,
128
+ page_size: preview? ? 10 : 10000,
129
+ }
130
+
131
+ if task["start_date"] || task["end_date"]
132
+ query[:date_ranges] = [{
133
+ start_date: task["start_date"],
134
+ end_date: task["end_date"],
135
+ }]
136
+ end
137
+
138
+ if page_token
139
+ query[:page_token] = page_token
140
+ end
141
+
142
+ [query]
143
+ end
144
+
145
+ def view_id
146
+ task["view_id"]
147
+ end
148
+
149
+ def auth
150
+ Google::Auth::ServiceAccountCredentials.make_creds(
151
+ json_key_io: StringIO.new(task["json_key_content"]),
152
+ scope: "https://www.googleapis.com/auth/analytics.readonly"
153
+ )
154
+ rescue => e
155
+ raise ConfigError.new(e.message)
156
+ end
157
+ end
158
+ end
159
+ end
160
+ end
@@ -0,0 +1,101 @@
1
+ module Embulk
2
+ module Input
3
+ module GoogleAnalytics
4
+ class Plugin < InputPlugin
5
+ ::Embulk::Plugin.register_input("google_analytics", self)
6
+
7
+ # https://developers.google.com/analytics/devguides/reporting/core/dimsmets
8
+
9
+ def self.transaction(config, &control)
10
+ task = task_from_config(config)
11
+ unless %w(ga:date ga:dateHour).include?(task["time_series"])
12
+ raise ConfigError.new("Unknown time_series '#{task["time_series"]}'. Use 'ga:dateHour' or 'ga:date'")
13
+ end
14
+ columns_list = Client.new(task).get_columns_list
15
+
16
+ columns = columns_from_task(task).map do |col_name|
17
+ col_info = columns_list.find{|col| col[:id] == col_name}
18
+ raise ConfigError.new("Unknown metric/dimension '#{col_name}'") unless col_info
19
+
20
+ col_type =
21
+ case col_info[:attributes][:dataType]
22
+ when "STRING"
23
+ :string
24
+ when "INTEGER"
25
+ :long
26
+ when "PERCENT", "FLOAT", "CURRENCY"
27
+ :double
28
+ when "TIME"
29
+ :timestamp
30
+ end
31
+
32
+ # time_series column should be timestamp
33
+ if col_name == task["time_series"]
34
+ col_type = :timestamp
35
+ end
36
+ Column.new(nil, canonicalize_column_name(col_name), col_type)
37
+ end
38
+
39
+ resume(task, columns, 1, &control)
40
+ end
41
+
42
+ def self.resume(task, columns, count, &control)
43
+ task_reports = yield(task, columns, count)
44
+
45
+ next_config_diff = {}
46
+ return next_config_diff
47
+ end
48
+
49
+ def self.task_from_config(config)
50
+ json_key_content = config.param("json_key_content", :string)
51
+ {
52
+ "json_key_content" => json_key_content,
53
+ "view_id" => config.param("view_id", :string),
54
+ "dimensions" => config.param("dimensions", :array, default: []),
55
+ "metrics" => config.param("metrics", :array, default: []),
56
+ "time_series" => config.param("time_series", :string),
57
+ "start_date" => config.param("start_date", :string, default: nil),
58
+ "end_date" => config.param("end_date", :string, default: nil),
59
+ }
60
+ end
61
+
62
+ def self.columns_from_task(task)
63
+ [
64
+ task["time_series"],
65
+ task["dimensions"],
66
+ task["metrics"],
67
+ ].flatten.uniq
68
+ end
69
+
70
+ def self.canonicalize_column_name(name)
71
+ # ga:dateHour -> date_hour
72
+ name.gsub(/^ga:/, "").gsub(/[A-Z]+/, "_\\0").gsub(/^_/, "").downcase
73
+ end
74
+
75
+ def init
76
+ end
77
+
78
+ def run
79
+ client = Client.new(task, preview?)
80
+ columns = self.class.columns_from_task(task)
81
+
82
+ client.each_report_row do |row|
83
+ values = row.values_at(*columns)
84
+ page_builder.add values
85
+ end
86
+ page_builder.finish
87
+
88
+ task_report = {}
89
+ return task_report
90
+ end
91
+
92
+ def preview?
93
+ org.embulk.spi.Exec.isPreview()
94
+ rescue java.lang.NullPointerException
95
+ false
96
+ end
97
+
98
+ end
99
+ end
100
+ end
101
+ end
@@ -0,0 +1,9 @@
1
+ require "embulk/input/google_analytics/client"
2
+ require "embulk/input/google_analytics/plugin"
3
+
4
+ module Embulk
5
+ module Input
6
+ module GoogleAnalytics
7
+ end
8
+ end
9
+ end
@@ -0,0 +1,290 @@
1
+ require "embulk"
2
+ Embulk.setup
3
+
4
+ require "embulk/input/google_analytics"
5
+ require "override_assert_raise"
6
+ require "fixture_helper"
7
+
8
+ module Embulk
9
+ module Input
10
+ module GoogleAnalytics
11
+ class TestClient < Test::Unit::TestCase
12
+ include OverrideAssertRaise
13
+ include FixtureHelper
14
+
15
+ sub_test_case "get_profile" do
16
+ setup do
17
+ conf = valid_config["in"]
18
+ @task = task(embulk_config(conf))
19
+ @client = Client.new(@task)
20
+ end
21
+
22
+ test "find view_id profile" do
23
+ target_profile = {
24
+ id: @task["view_id"],
25
+ }
26
+
27
+ mock(@client).get_all_profiles do
28
+ {
29
+ items: [
30
+ { id: 1 },
31
+ target_profile,
32
+ { id: 2 },
33
+ ]
34
+ }
35
+ end
36
+
37
+ assert_equal target_profile, @client.get_profile
38
+ end
39
+
40
+ test "raise ConfigError when view_id is not found" do
41
+ mock(@client).get_all_profiles do
42
+ {
43
+ items: [
44
+ { id: 1 },
45
+ { id: 2 },
46
+ ]
47
+ }
48
+ end
49
+
50
+ assert_raise(Embulk::ConfigError) do
51
+ @client.get_profile
52
+ end
53
+ end
54
+ end
55
+
56
+ sub_test_case "build_report_request" do
57
+ setup do
58
+ conf = valid_config["in"]
59
+ @task = task(embulk_config(conf))
60
+ @client = Client.new(@task)
61
+ end
62
+
63
+ test "page_token = nil" do
64
+ req = @client.build_report_request
65
+ expected = [
66
+ {
67
+ view_id: "101111111",
68
+ dimensions: [
69
+ {name: "ga:dateHour"}, {name: "ga:browser"}
70
+ ],
71
+ metrics: [
72
+ {expression: "ga:visits"}, {expression: "ga:pageviews"}
73
+ ],
74
+ include_empty_rows: true,
75
+ page_size: 10000
76
+ }
77
+ ]
78
+ assert_equal expected, req
79
+ end
80
+
81
+ test "page_token = 123" do
82
+ req = @client.build_report_request(123)
83
+ expected = [
84
+ {
85
+ view_id: "101111111",
86
+ dimensions: [
87
+ {name: "ga:dateHour"}, {name: "ga:browser"}
88
+ ],
89
+ metrics: [
90
+ {expression: "ga:visits"}, {expression: "ga:pageviews"}
91
+ ],
92
+ include_empty_rows: true,
93
+ page_size: 10000,
94
+ page_token: 123
95
+ }
96
+ ]
97
+ assert_equal expected, req
98
+ end
99
+
100
+ test "date range given" do
101
+ conf = valid_config["in"]
102
+ conf["start_date"] = "2000-01-01"
103
+ conf["end_date"] = "2000-01-07"
104
+ task = task(embulk_config(conf))
105
+ client = Client.new(task)
106
+ req = client.build_report_request
107
+
108
+ expected = [
109
+ {
110
+ view_id: "101111111",
111
+ dimensions: [
112
+ {name: "ga:dateHour"}, {name: "ga:browser"}
113
+ ],
114
+ metrics: [
115
+ {expression: "ga:visits"}, {expression: "ga:pageviews"}
116
+ ],
117
+ include_empty_rows: true,
118
+ page_size: 10000,
119
+ date_ranges: [
120
+ {
121
+ start_date: conf["start_date"],
122
+ end_date: conf["end_date"],
123
+ }
124
+ ]
125
+ }
126
+ ]
127
+ assert_equal expected, req
128
+ end
129
+ end
130
+
131
+ sub_test_case "time_parse_with_profile_timezone" do
132
+ setup do
133
+ conf = valid_config["in"]
134
+ conf["time_series"] = time_series
135
+ @client = Client.new(task(embulk_config(conf)))
136
+ end
137
+
138
+ sub_test_case "dateHour" do
139
+ setup do
140
+ stub(@client).get_profile { {timezone: "America/Los_Angeles" } }
141
+ end
142
+
143
+ test "in dst" do
144
+ time = @client.time_parse_with_profile_timezone("2016060122")
145
+ assert_equal Time.parse("2016-06-01 22:00:00 -07:00"), time
146
+ end
147
+
148
+ test "not in dst" do
149
+ time = @client.time_parse_with_profile_timezone("2016010122")
150
+ assert_equal Time.parse("2016-01-01 22:00:00 -08:00"), time
151
+ end
152
+
153
+ def time_series
154
+ "ga:dateHour"
155
+ end
156
+ end
157
+
158
+ sub_test_case "date" do
159
+ setup do
160
+ stub(@client).get_profile { {timezone: "America/Los_Angeles" } }
161
+ end
162
+
163
+ test "in dst" do
164
+ time = @client.time_parse_with_profile_timezone("20160601")
165
+ assert_equal Time.parse("2016-06-01 00:00:00 PDT"), time
166
+ end
167
+
168
+ test "not in dst" do
169
+ time = @client.time_parse_with_profile_timezone("2016010122")
170
+ assert_equal Time.parse("2016-01-01 00:00:00 PST"), time
171
+ end
172
+
173
+ def time_series
174
+ "ga:date"
175
+ end
176
+ end
177
+ end
178
+
179
+ sub_test_case "auth" do
180
+ setup do
181
+ conf = valid_config["in"]
182
+ @client = Client.new(task(embulk_config(conf)))
183
+ end
184
+
185
+ test "raise ConfigError when auth failed" do
186
+ stub(Google::Auth::ServiceAccountCredentials).make_creds { raise "some error" }
187
+ assert_raise(Embulk::ConfigError) do
188
+ @client.auth
189
+ end
190
+ end
191
+ end
192
+
193
+ sub_test_case "each_report_row" do
194
+ setup do
195
+ conf = valid_config["in"]
196
+ @client = Client.new(task(embulk_config(conf)))
197
+ stub(@client).get_profile { {timezone: "Asia/Tokyo"} }
198
+ @logger = Logger.new(File::NULL)
199
+ stub(Embulk).logger { @logger }
200
+ end
201
+
202
+ test "without pagination" do
203
+ stub(@client).get_reports { report }
204
+ fetched_rows = []
205
+ @client.each_report_row do |row|
206
+ fetched_rows << row
207
+ end
208
+
209
+ expected = [
210
+ {
211
+ "ga:dateHour" => @client.time_parse_with_profile_timezone("2016060120"),
212
+ "ga:browser" => "curl",
213
+ "ga:visits" => "1",
214
+ "ga:pageviews" => "1",
215
+ },
216
+ {
217
+ "ga:dateHour" => @client.time_parse_with_profile_timezone("2016060121"),
218
+ "ga:browser" => "curl",
219
+ "ga:visits" => "2",
220
+ "ga:pageviews" => "2",
221
+ },
222
+ {
223
+ "ga:dateHour" => @client.time_parse_with_profile_timezone("2016060122"),
224
+ "ga:browser" => "curl",
225
+ "ga:visits" => "3",
226
+ "ga:pageviews" => "3",
227
+ },
228
+ ]
229
+ assert_equal expected, fetched_rows
230
+ end
231
+
232
+ test "with pagination" do
233
+ next_page_token = "10000"
234
+ mock(@client).get_reports(nil) { report_with_pages }
235
+ mock(@client).get_reports(next_page_token) { report }
236
+ fetched_rows = []
237
+ @client.each_report_row do |row|
238
+ fetched_rows << row
239
+ end
240
+ assert_equal 6, fetched_rows.length
241
+ end
242
+
243
+ sub_test_case "logger" do
244
+ test "with empty rows" do
245
+ response = report.dup
246
+ response[:reports].first[:data][:rows] = []
247
+ response[:reports].first[:data][:row_count] = 0
248
+ stub(@client).get_reports { response }
249
+
250
+ mock(@logger).warn("Result has 0 rows.")
251
+ @client.each_report_row {}
252
+ end
253
+
254
+ test "without rows" do
255
+ response = report.dup
256
+ response[:reports].first[:data].delete(:rows)
257
+ stub(@client).get_reports { response }
258
+
259
+ mock(@logger).warn("Result doesn't contain rows.")
260
+ @client.each_report_row {}
261
+ end
262
+ end
263
+
264
+ def report_with_pages
265
+ response = report.dup
266
+ response[:reports].first[:next_page_token] = "10000"
267
+ response
268
+ end
269
+
270
+ def report
271
+ json = fixture_read("reports.json")
272
+ JSON.parse(json, symbolize_names: true)
273
+ end
274
+ end
275
+
276
+ def task(config)
277
+ Plugin.task_from_config(config)
278
+ end
279
+
280
+ def valid_config
281
+ fixture_load("valid.yml")
282
+ end
283
+
284
+ def embulk_config(hash)
285
+ Embulk::DataSource.new(hash)
286
+ end
287
+ end
288
+ end
289
+ end
290
+ end
@@ -0,0 +1,214 @@
1
+ require "embulk"
2
+ Embulk.setup
3
+
4
+ require "embulk/input/google_analytics"
5
+ require "override_assert_raise"
6
+ require "fixture_helper"
7
+
8
+ module Embulk
9
+ module Input
10
+ module GoogleAnalytics
11
+ class TestPlugin < Test::Unit::TestCase
12
+ include OverrideAssertRaise
13
+ include FixtureHelper
14
+
15
+ sub_test_case ".transaction" do
16
+ setup do
17
+ any_instance_of(Client) do |klass|
18
+ stub(klass).get_columns_list do
19
+ [
20
+ {id: "ga:dateHour", attributes: {dataType: "STRING"}},
21
+ {id: "ga:date", attributes: {dataType: "STRING"}},
22
+ {id: "ga:browser", attributes: {dataType: "STRING"}},
23
+ {id: "ga:visits", attributes: {dataType: "INTEGER"}},
24
+ {id: "ga:pageviews", attributes: {dataType: "INTEGER"}},
25
+ ]
26
+ end
27
+ end
28
+ end
29
+
30
+ test "not raised exception" do
31
+ stub(Plugin).resume { Hash.new }
32
+ assert_nothing_raised do
33
+ Plugin.transaction(embulk_config(valid_config["in"]))
34
+ end
35
+ end
36
+
37
+ test "assemble expected columns" do
38
+ columns = [
39
+ Embulk::Column.new(nil, "date_hour", :timestamp),
40
+ Embulk::Column.new(nil, "browser", :string),
41
+ Embulk::Column.new(nil, "visits", :long),
42
+ Embulk::Column.new(nil, "pageviews", :long),
43
+ ]
44
+ mock(Plugin).resume(anything, columns, 1)
45
+ Plugin.transaction(embulk_config(valid_config["in"]))
46
+ end
47
+
48
+ sub_test_case "raise error when unknown column given" do
49
+ setup { stub(Plugin).resume { Hash.new } }
50
+
51
+ test "for dimensions" do
52
+ conf = valid_config["in"]
53
+ conf["dimensions"] << unknown_col_name
54
+ assert_raise(Embulk::ConfigError.new(expected_message)) do
55
+ Plugin.transaction(embulk_config(conf))
56
+ end
57
+ end
58
+
59
+ test "for metrics" do
60
+ conf = valid_config["in"]
61
+ conf["metrics"] << unknown_col_name
62
+ assert_raise(Embulk::ConfigError.new(expected_message)) do
63
+ Plugin.transaction(embulk_config(conf))
64
+ end
65
+ end
66
+
67
+ test "for time_series" do
68
+ conf = valid_config["in"]
69
+ conf["time_series"] = unknown_col_name
70
+ message = "Unknown time_series 'ga:foooooo'. Use 'ga:dateHour' or 'ga:date'"
71
+ assert_raise(Embulk::ConfigError.new(message)) do
72
+ Plugin.transaction(embulk_config(conf))
73
+ end
74
+ end
75
+
76
+ def unknown_col_name
77
+ "ga:foooooo"
78
+ end
79
+
80
+ def expected_message
81
+ "Unknown metric/dimension '#{unknown_col_name}'"
82
+ end
83
+ end
84
+
85
+ sub_test_case "type conversion" do
86
+ setup do
87
+ any_instance_of(Client) do |klass|
88
+ stub(klass).get_columns_list do
89
+ [
90
+ {id: "ga:dateHour", attributes: {dataType: "STRING"}},
91
+ {id: "ga:itemsPerPurchase", attributes: {dataType: "FLOAT"}},
92
+ {id: "ga:visits", attributes: {dataType: "INTEGER"}},
93
+ {id: "ga:CPM", attributes: {dataType: "CURRENCY"}},
94
+ {id: "ga:CTR", attributes: {dataType: "PERCENT"}},
95
+ {id: "ga:sessionDuration", attributes: {dataType: "TIME"}},
96
+ ]
97
+ end
98
+ end
99
+ end
100
+
101
+ test "Convert Embulk data types" do
102
+ conf = valid_config["in"]
103
+ conf["dimensions"] = []
104
+ conf["metrics"] = [
105
+ "ga:sessionDuration",
106
+ "ga:CPM",
107
+ "ga:CTR",
108
+ "ga:visits",
109
+ "ga:itemsPerPurchase",
110
+ ]
111
+ expected_columns = [
112
+ Column.new(nil, "date_hour", :timestamp),
113
+ Column.new(nil, "session_duration", :timestamp),
114
+ Column.new(nil, "cpm", :double),
115
+ Column.new(nil, "ctr", :double),
116
+ Column.new(nil, "visits", :long),
117
+ Column.new(nil, "items_per_purchase", :double),
118
+ ]
119
+
120
+ mock(Plugin).resume(anything, expected_columns, anything)
121
+ Plugin.transaction(embulk_config(conf))
122
+ end
123
+ end
124
+ end
125
+
126
+ sub_test_case ".run" do
127
+ sub_test_case "returned value should be added into page_builder" do
128
+ setup do
129
+ @page_builder = Object.new
130
+ conf = valid_config["in"]
131
+ conf["time_series"] = time_series
132
+ @plugin = Plugin.new(embulk_config(conf), nil, nil, @page_builder)
133
+ end
134
+
135
+ sub_test_case "time_series: 'ga:dateHour'" do
136
+ def time_series
137
+ "ga:dateHour"
138
+ end
139
+
140
+ test "HH:00:00 time given" do
141
+ Time.zone = "America/Los_Angeles"
142
+ time = Time.zone.parse("2016-06-01 12:00:00").to_time
143
+ any_instance_of(Client) do |klass|
144
+ stub(klass).each_report_row do |block|
145
+ row = {
146
+ "ga:dateHour" => time,
147
+ "ga:browser" => "wget",
148
+ "ga:visits" => 3,
149
+ "ga:pageviews" => 4,
150
+ }
151
+ block.call row
152
+ end
153
+ end
154
+
155
+ mock(@page_builder).add([time, "wget", 3, 4])
156
+ mock(@page_builder).finish
157
+ @plugin.run
158
+ end
159
+ end
160
+
161
+ sub_test_case "time_series: 'ga:date'" do
162
+ def time_series
163
+ "ga:date"
164
+ end
165
+
166
+ test "00:00:00 time given" do
167
+ Time.zone = "America/Los_Angeles"
168
+ time = Time.zone.parse("2016-06-01 00:00:00").to_time
169
+ any_instance_of(Client) do |klass|
170
+ stub(klass).each_report_row do |block|
171
+ row = {
172
+ "ga:date" => time,
173
+ "ga:browser" => "wget",
174
+ "ga:visits" => 3,
175
+ "ga:pageviews" => 4,
176
+ }
177
+ block.call row
178
+ end
179
+ end
180
+
181
+ mock(@page_builder).add([time, "wget", 3, 4])
182
+ mock(@page_builder).finish
183
+ @plugin.run
184
+ end
185
+ end
186
+ end
187
+ end
188
+
189
+ sub_test_case "canonicalize_column_name" do
190
+ data do
191
+ [
192
+ ["typical", ["ga:dateHour", "date_hour"]],
193
+ ["all capital", ["ga:CPM", "cpm"]],
194
+ ["capitals with word", ["ga:goalXXValue", "goal_xxvalue"]],
195
+ ["ID", ["ga:adwordsCustomerID", "adwords_customer_id"]],
196
+ ["word + capitals", ["ga:dcmCTR", "dcm_ctr"]],
197
+ ]
198
+ end
199
+ test "converting" do |(target, expected)|
200
+ assert_equal expected, Plugin.canonicalize_column_name(target)
201
+ end
202
+ end
203
+
204
+ def valid_config
205
+ fixture_load("valid.yml")
206
+ end
207
+
208
+ def embulk_config(hash)
209
+ Embulk::DataSource.new(hash)
210
+ end
211
+ end
212
+ end
213
+ end
214
+ end
@@ -0,0 +1,16 @@
1
+ require "yaml"
2
+ require "pathname"
3
+
4
+ module FixtureHelper
5
+ def fixture_dir
6
+ Pathname.new(__FILE__).dirname.join("fixtures")
7
+ end
8
+
9
+ def fixture_read(name)
10
+ fixture_dir.join(name).read
11
+ end
12
+
13
+ def fixture_load(name)
14
+ YAML.load fixture_read(name)
15
+ end
16
+ end
@@ -0,0 +1,71 @@
1
+ {
2
+ "reports": [
3
+ {
4
+ "column_header": {
5
+ "dimensions": [
6
+ "ga:dateHour",
7
+ "ga:browser"
8
+ ],
9
+ "metric_header": {
10
+ "metric_header_entries": [
11
+ {
12
+ "type": "INTEGER",
13
+ "name": "ga:visits"
14
+ },
15
+ {
16
+ "type": "INTEGER",
17
+ "name": "ga:pageviews"
18
+ }
19
+ ]
20
+ }
21
+ },
22
+ "data": {
23
+ "row_count": 3,
24
+ "rows": [
25
+ {
26
+ "metrics": [
27
+ {
28
+ "values": [
29
+ "1",
30
+ "1"
31
+ ]
32
+ }
33
+ ],
34
+ "dimensions": [
35
+ "2016060120",
36
+ "curl"
37
+ ]
38
+ },
39
+ {
40
+ "metrics": [
41
+ {
42
+ "values": [
43
+ "2",
44
+ "2"
45
+ ]
46
+ }
47
+ ],
48
+ "dimensions": [
49
+ "2016060121",
50
+ "curl"
51
+ ]
52
+ },
53
+ {
54
+ "metrics": [
55
+ {
56
+ "values": [
57
+ "3",
58
+ "3"
59
+ ]
60
+ }
61
+ ],
62
+ "dimensions": [
63
+ "2016060122",
64
+ "curl"
65
+ ]
66
+ }
67
+ ]
68
+ }
69
+ }
70
+ ]
71
+ }
@@ -0,0 +1,17 @@
1
+ in:
2
+ type: google_analytics
3
+ json_key_content: |
4
+ {
5
+ "type": "service_account"
6
+ }
7
+ view_id: 101111111
8
+ time_series: "ga:dateHour"
9
+
10
+ dimensions:
11
+ - "ga:browser"
12
+ metrics:
13
+ - "ga:visits"
14
+ - "ga:pageviews"
15
+
16
+ out:
17
+ type: stdout
@@ -0,0 +1,24 @@
1
+ module OverrideAssertRaise
2
+ # NOTE: Embulk 0.7.1+ required to raise ConfigError to do as `ConfigError.new("message")`,
3
+ # original `assert_raise` method can't catch that, but `begin .. rescue` can.
4
+ # So we override assert_raise as below.
5
+ def assert_raise(expected_class_or_instance = StandardError, &block)
6
+ begin
7
+ expected_class = expected_class_or_instance.is_a?(Class) ? expected_class_or_instance : expected_class_or_instance.class
8
+ block.call
9
+ assert_equal expected_class, nil
10
+ rescue ::Test::Unit::AssertionFailedError => e
11
+ # failed assert raises this Error and that extends StandardError, so rescue it first
12
+ raise e
13
+ rescue expected_class => e
14
+ # https://github.com/test-unit/test-unit/issues/94
15
+ if expected_class_or_instance == expected_class
16
+ assert true # passed
17
+ else
18
+ assert_equal expected_class_or_instance.message, e.message
19
+ end
20
+ rescue => e
21
+ assert_equal(expected_class, e.class) # not expected one raised
22
+ end
23
+ end
24
+ end
data/test/run-test.rb ADDED
@@ -0,0 +1,25 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ base_dir = File.expand_path(File.join(File.dirname(__FILE__), ".."))
4
+ lib_dir = File.join(base_dir, "lib")
5
+ test_dir = File.join(base_dir, "test")
6
+
7
+ require "test-unit"
8
+ require "test/unit/rr"
9
+
10
+ $LOAD_PATH.unshift(lib_dir)
11
+ $LOAD_PATH.unshift(test_dir)
12
+
13
+ ENV["TEST_UNIT_MAX_DIFF_TARGET_STRING_SIZE"] ||= "5000"
14
+
15
+ if ENV["COVERAGE"]
16
+ if ENV["CI"]
17
+ require "codeclimate-test-reporter"
18
+ CodeClimate::TestReporter.start
19
+ else
20
+ require 'simplecov'
21
+ SimpleCov.start 'test_frameworks'
22
+ end
23
+ end
24
+
25
+ exit Test::Unit::AutoRunner.run(true, test_dir)
metadata ADDED
@@ -0,0 +1,238 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: embulk-input-google_analytics
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - uu59
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2016-07-07 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ requirement: !ruby/object:Gem::Requirement
15
+ requirements:
16
+ - - ">="
17
+ - !ruby/object:Gem::Version
18
+ version: '0'
19
+ name: httpclient
20
+ prerelease: false
21
+ type: :runtime
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
27
+ - !ruby/object:Gem::Dependency
28
+ requirement: !ruby/object:Gem::Requirement
29
+ requirements:
30
+ - - "~>"
31
+ - !ruby/object:Gem::Version
32
+ version: '0.9'
33
+ name: google-api-client
34
+ prerelease: false
35
+ type: :runtime
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '0.9'
41
+ - !ruby/object:Gem::Dependency
42
+ requirement: !ruby/object:Gem::Requirement
43
+ requirements:
44
+ - - ">="
45
+ - !ruby/object:Gem::Version
46
+ version: '0'
47
+ name: signet
48
+ prerelease: false
49
+ type: :runtime
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ - !ruby/object:Gem::Dependency
56
+ requirement: !ruby/object:Gem::Requirement
57
+ requirements:
58
+ - - ">="
59
+ - !ruby/object:Gem::Version
60
+ version: '0'
61
+ name: activesupport
62
+ prerelease: false
63
+ type: :runtime
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ - !ruby/object:Gem::Dependency
70
+ requirement: !ruby/object:Gem::Requirement
71
+ requirements:
72
+ - - ">="
73
+ - !ruby/object:Gem::Version
74
+ version: 0.8.9
75
+ name: embulk
76
+ prerelease: false
77
+ type: :development
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - ">="
81
+ - !ruby/object:Gem::Version
82
+ version: 0.8.9
83
+ - !ruby/object:Gem::Dependency
84
+ requirement: !ruby/object:Gem::Requirement
85
+ requirements:
86
+ - - ">="
87
+ - !ruby/object:Gem::Version
88
+ version: 1.10.6
89
+ name: bundler
90
+ prerelease: false
91
+ type: :development
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - ">="
95
+ - !ruby/object:Gem::Version
96
+ version: 1.10.6
97
+ - !ruby/object:Gem::Dependency
98
+ requirement: !ruby/object:Gem::Requirement
99
+ requirements:
100
+ - - ">="
101
+ - !ruby/object:Gem::Version
102
+ version: '10.0'
103
+ name: rake
104
+ prerelease: false
105
+ type: :development
106
+ version_requirements: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - ">="
109
+ - !ruby/object:Gem::Version
110
+ version: '10.0'
111
+ - !ruby/object:Gem::Dependency
112
+ requirement: !ruby/object:Gem::Requirement
113
+ requirements:
114
+ - - ">="
115
+ - !ruby/object:Gem::Version
116
+ version: '0'
117
+ name: test-unit
118
+ prerelease: false
119
+ type: :development
120
+ version_requirements: !ruby/object:Gem::Requirement
121
+ requirements:
122
+ - - ">="
123
+ - !ruby/object:Gem::Version
124
+ version: '0'
125
+ - !ruby/object:Gem::Dependency
126
+ requirement: !ruby/object:Gem::Requirement
127
+ requirements:
128
+ - - ">="
129
+ - !ruby/object:Gem::Version
130
+ version: '0'
131
+ name: test-unit-rr
132
+ prerelease: false
133
+ type: :development
134
+ version_requirements: !ruby/object:Gem::Requirement
135
+ requirements:
136
+ - - ">="
137
+ - !ruby/object:Gem::Version
138
+ version: '0'
139
+ - !ruby/object:Gem::Dependency
140
+ requirement: !ruby/object:Gem::Requirement
141
+ requirements:
142
+ - - ">="
143
+ - !ruby/object:Gem::Version
144
+ version: '0'
145
+ name: simplecov
146
+ prerelease: false
147
+ type: :development
148
+ version_requirements: !ruby/object:Gem::Requirement
149
+ requirements:
150
+ - - ">="
151
+ - !ruby/object:Gem::Version
152
+ version: '0'
153
+ - !ruby/object:Gem::Dependency
154
+ requirement: !ruby/object:Gem::Requirement
155
+ requirements:
156
+ - - ">="
157
+ - !ruby/object:Gem::Version
158
+ version: '0'
159
+ name: codeclimate-test-reporter
160
+ prerelease: false
161
+ type: :development
162
+ version_requirements: !ruby/object:Gem::Requirement
163
+ requirements:
164
+ - - ">="
165
+ - !ruby/object:Gem::Version
166
+ version: '0'
167
+ - !ruby/object:Gem::Dependency
168
+ requirement: !ruby/object:Gem::Requirement
169
+ requirements:
170
+ - - ">="
171
+ - !ruby/object:Gem::Version
172
+ version: '0'
173
+ name: pry
174
+ prerelease: false
175
+ type: :development
176
+ version_requirements: !ruby/object:Gem::Requirement
177
+ requirements:
178
+ - - ">="
179
+ - !ruby/object:Gem::Version
180
+ version: '0'
181
+ description: Loads records from Google Analytics.
182
+ email:
183
+ - k@uu59.org
184
+ executables: []
185
+ extensions: []
186
+ extra_rdoc_files: []
187
+ files:
188
+ - ".gitignore"
189
+ - ".ruby-version"
190
+ - CHANGELOG.md
191
+ - Gemfile
192
+ - LICENSE.txt
193
+ - README.md
194
+ - Rakefile
195
+ - circle.yml
196
+ - embulk-input-google_analytics.gemspec
197
+ - lib/embulk/input/google_analytics.rb
198
+ - lib/embulk/input/google_analytics/client.rb
199
+ - lib/embulk/input/google_analytics/plugin.rb
200
+ - test/embulk/input/google_analytics/test_client.rb
201
+ - test/embulk/input/google_analytics/test_plugin.rb
202
+ - test/fixture_helper.rb
203
+ - test/fixtures/reports.json
204
+ - test/fixtures/valid.yml
205
+ - test/override_assert_raise.rb
206
+ - test/run-test.rb
207
+ homepage: https://github.com/treasure-data/embulk-input-google_analytics
208
+ licenses:
209
+ - MIT
210
+ metadata: {}
211
+ post_install_message:
212
+ rdoc_options: []
213
+ require_paths:
214
+ - lib
215
+ required_ruby_version: !ruby/object:Gem::Requirement
216
+ requirements:
217
+ - - ">="
218
+ - !ruby/object:Gem::Version
219
+ version: '0'
220
+ required_rubygems_version: !ruby/object:Gem::Requirement
221
+ requirements:
222
+ - - ">="
223
+ - !ruby/object:Gem::Version
224
+ version: '0'
225
+ requirements: []
226
+ rubyforge_project:
227
+ rubygems_version: 2.4.8
228
+ signing_key:
229
+ specification_version: 4
230
+ summary: Google Analytics input plugin for Embulk
231
+ test_files:
232
+ - test/embulk/input/google_analytics/test_client.rb
233
+ - test/embulk/input/google_analytics/test_plugin.rb
234
+ - test/fixture_helper.rb
235
+ - test/fixtures/reports.json
236
+ - test/fixtures/valid.yml
237
+ - test/override_assert_raise.rb
238
+ - test/run-test.rb