embulk-input-mixpanel 0.3.2 → 0.3.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +7 -0
- data/README.md +4 -0
- data/embulk-input-mixpanel.gemspec +1 -1
- data/lib/embulk/input/mixpanel.rb +113 -106
- data/lib/embulk/input/mixpanel_api/client.rb +46 -10
- data/lib/range_generator.rb +79 -0
- data/lib/timezone_validator.rb +15 -0
- data/test/embulk/input/mixpanel_api/test_client.rb +65 -0
- data/test/embulk/input/test_mixpanel.rb +86 -30
- data/test/test_range_generator.rb +94 -0
- data/test/test_timezone_validator.rb +24 -0
- metadata +8 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 48c99d14bae13fa0257e070bc5b99d444679392a
|
4
|
+
data.tar.gz: 8327dd45de51b2dab42754f212e5c6861621de7f
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ecb93dd7bade9b667d94a5f5cea247d6f46b1e1ad87d6046f6377f9495456e40f91b97a44a1440cbd04b8d4bbb15a2a223fbed7612469e08e427e32bc4e94db9
|
7
|
+
data.tar.gz: 94e3728eed0ce17d7178557556077f1d37049881567715523955c9888560d4886d774440206570cbf463822f2f82f19b2d216aa293bd8be1d1f0b27d3ea566a1
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,10 @@
|
|
1
|
+
## 0.3.3 - 2015-10-29
|
2
|
+
|
3
|
+
* [enhancement] Exponential backoff retry [#31](https://github.com/treasure-data/embulk-input-mixpanel/pull/31)
|
4
|
+
* [enhancement] Treat unguessed columns [#30](https://github.com/treasure-data/embulk-input-mixpanel/pull/30)
|
5
|
+
* [enhancement] Loosely guess [#27](https://github.com/treasure-data/embulk-input-mixpanel/pull/27)
|
6
|
+
* [maintenance] Refactor [#26](https://github.com/treasure-data/embulk-input-mixpanel/pull/26)
|
7
|
+
|
1
8
|
## 0.3.2 - 2015-10-06
|
2
9
|
|
3
10
|
* [enhancement] Support embulk 0.7 [#25](https://github.com/treasure-data/embulk-input-mixpanel/pull/25)
|
data/README.md
CHANGED
@@ -38,9 +38,13 @@ To get it, you should log in mixpanel website, and click gear icon at the lower
|
|
38
38
|
- NOTE: Mixpanel API supports to export data from at least 2 days before to at most the previous day.
|
39
39
|
- **fetch_days**: Count of days range for exporting (integer, optional, default: from_date - (today - 1))
|
40
40
|
- NOTE: Mixpanel doesn't support to from_date > today - 2
|
41
|
+
- **fetch_unknown_columns**: If you want this plugin fetches unknown (unconfigured in config) columns (boolean, optional, default: true)
|
42
|
+
- NOTE: If true, `unknown_columns` column is created and added unknown columns' data.
|
41
43
|
- **event**: The event or events to filter data (array, optional, default: nil)
|
42
44
|
- **where**: Expression to filter data (c.f. https://mixpanel.com/docs/api-documentation/data-export-api#segmentation-expressions) (string, optional, default: nil)
|
43
45
|
- **bucket**:The data backet to filter data (string, optional, default: nil)
|
46
|
+
- **retry_initial_wait_sec** Wait seconds for exponential backoff initial value (integer, default: 1)
|
47
|
+
- **retry_limit**: Try to retry this times (integer, default: 5)
|
44
48
|
|
45
49
|
## Example
|
46
50
|
|
@@ -1,5 +1,7 @@
|
|
1
1
|
require "tzinfo"
|
2
2
|
require "embulk/input/mixpanel_api/client"
|
3
|
+
require "range_generator"
|
4
|
+
require "timezone_validator"
|
3
5
|
|
4
6
|
module Embulk
|
5
7
|
module Input
|
@@ -7,6 +9,7 @@ module Embulk
|
|
7
9
|
Plugin.register_input("mixpanel", self)
|
8
10
|
|
9
11
|
GUESS_RECORDS_COUNT = 10
|
12
|
+
NOT_PROPERTY_COLUMN = "event".freeze
|
10
13
|
|
11
14
|
# NOTE: It takes long time to fetch data between from_date to
|
12
15
|
# to_date by one API request. So this plugin fetches data
|
@@ -14,68 +17,36 @@ module Embulk
|
|
14
17
|
SLICE_DAYS_COUNT = 7
|
15
18
|
|
16
19
|
def self.transaction(config, &control)
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
# generated by from_date and yeasterday.
|
37
|
-
dates = from_date..(Date.today - 1)
|
38
|
-
elsif days < 1
|
39
|
-
raise ConfigError.new "days '#{days}' is invalid. Please specify bigger number than 0."
|
40
|
-
else
|
41
|
-
# When 'days' is specified in config file and it is satisfied,
|
42
|
-
# so it is used for dates.
|
43
|
-
dates = from_date..(from_date + days - 1)
|
44
|
-
end
|
45
|
-
|
46
|
-
target_dates = dates.find_all {|date| date < Date.today}
|
47
|
-
|
48
|
-
Embulk.logger.info "Try to fetch data from #{target_dates.first} to #{target_dates.last}"
|
49
|
-
|
50
|
-
overtimes = dates.to_a - target_dates
|
51
|
-
unless overtimes.empty?
|
52
|
-
Embulk.logger.warn "These dates are too early access, ignored them: from #{overtimes.first} to #{overtimes.last}"
|
53
|
-
end
|
54
|
-
end
|
55
|
-
|
56
|
-
task[:dates] = target_dates.map {|date| date.to_s}
|
57
|
-
|
58
|
-
task[:api_key] = config.param(:api_key, :string)
|
59
|
-
task[:api_secret] = config.param(:api_secret, :string)
|
60
|
-
task[:timezone] = config.param(:timezone, :string)
|
61
|
-
|
62
|
-
begin
|
63
|
-
# raises exception if timezone is invalid string
|
64
|
-
TZInfo::Timezone.get(task[:timezone])
|
65
|
-
rescue => e
|
66
|
-
Embulk.logger.error "'#{task[:timezone]}' is invalid timezone"
|
67
|
-
raise ConfigError.new e.message
|
68
|
-
end
|
20
|
+
timezone = config.param(:timezone, :string)
|
21
|
+
TimezoneValidator.new(timezone).validate
|
22
|
+
|
23
|
+
from_date = config.param(:from_date, :string, default: (Date.today - 2).to_s)
|
24
|
+
fetch_days = config.param(:fetch_days, :integer, default: nil)
|
25
|
+
range = RangeGenerator.new(from_date, fetch_days).generate_range
|
26
|
+
Embulk.logger.info "Try to fetch data from #{range.first} to #{range.last}"
|
27
|
+
|
28
|
+
task = {
|
29
|
+
params: export_params(config),
|
30
|
+
dates: range,
|
31
|
+
timezone: timezone,
|
32
|
+
api_key: config.param(:api_key, :string),
|
33
|
+
api_secret: config.param(:api_secret, :string),
|
34
|
+
schema: config.param(:columns, :array),
|
35
|
+
fetch_unknown_columns: config.param(:fetch_unknown_columns, :bool, default: true),
|
36
|
+
retry_initial_wait_sec: config.param(:retry_initial_wait_sec, :integer, default: 1),
|
37
|
+
retry_limit: config.param(:retry_limit, :integer, default: 5),
|
38
|
+
}
|
69
39
|
|
70
|
-
columns = []
|
71
|
-
task[:schema] = config.param(:columns, :array)
|
72
|
-
task[:schema].each do |column|
|
40
|
+
columns = task[:schema].map do |column|
|
73
41
|
name = column["name"]
|
74
42
|
type = column["type"].to_sym
|
75
43
|
|
76
|
-
|
44
|
+
Column.new(nil, name, type, column["format"])
|
77
45
|
end
|
78
46
|
|
47
|
+
# for unknown columns
|
48
|
+
columns << Column.new(nil, "unknown_columns", :string)
|
49
|
+
|
79
50
|
resume(task, columns, 1, &control)
|
80
51
|
end
|
81
52
|
|
@@ -94,36 +65,15 @@ module Embulk
|
|
94
65
|
def self.guess(config)
|
95
66
|
client = MixpanelApi::Client.new(config.param(:api_key, :string), config.param(:api_secret, :string))
|
96
67
|
|
97
|
-
|
98
|
-
|
99
|
-
from_date = Date.parse(from_date_str)
|
68
|
+
range = guess_range(config)
|
69
|
+
Embulk.logger.info "Guessing schema using #{range.first}..#{range.last} records"
|
100
70
|
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
# NOTE: to_date is yeasterday if from_date..Date.Today doesn't have
|
106
|
-
# more SLICE_DAYS_COUNT days.
|
107
|
-
to_date = [from_date + SLICE_DAYS_COUNT, Date.today - 1].min
|
108
|
-
|
109
|
-
params = export_params(config)
|
110
|
-
params = params.merge(
|
111
|
-
from_date: from_date.to_s,
|
112
|
-
to_date: to_date.to_s,
|
71
|
+
params = export_params(config).merge(
|
72
|
+
from_date: range.first,
|
73
|
+
to_date: range.last,
|
113
74
|
)
|
114
75
|
|
115
|
-
|
116
|
-
sample_records = records.first(GUESS_RECORDS_COUNT)
|
117
|
-
properties = Guess::SchemaGuess.from_hash_records(sample_records.map{|r| r["properties"]})
|
118
|
-
columns = properties.map do |col|
|
119
|
-
result = {
|
120
|
-
name: col.name,
|
121
|
-
type: col.type,
|
122
|
-
}
|
123
|
-
result[:format] = col.format if col.format
|
124
|
-
result
|
125
|
-
end
|
126
|
-
columns.unshift(name: "event", type: :string)
|
76
|
+
columns = guess_from_records(client.export(params))
|
127
77
|
return {"columns" => columns}
|
128
78
|
end
|
129
79
|
|
@@ -134,33 +84,18 @@ module Embulk
|
|
134
84
|
@timezone = task[:timezone]
|
135
85
|
@schema = task[:schema]
|
136
86
|
@dates = task[:dates]
|
87
|
+
@fetch_unknown_columns = task[:fetch_unknown_columns]
|
137
88
|
end
|
138
89
|
|
139
90
|
def run
|
140
|
-
client = MixpanelApi::Client.new(@api_key, @api_secret)
|
141
91
|
@dates.each_slice(SLICE_DAYS_COUNT) do |dates|
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
)
|
150
|
-
|
151
|
-
records = client.export(params)
|
152
|
-
|
153
|
-
records.each do |record|
|
154
|
-
values = @schema.map do |column|
|
155
|
-
case column["name"]
|
156
|
-
when "event"
|
157
|
-
record["event"]
|
158
|
-
when "time"
|
159
|
-
time = record["properties"]["time"]
|
160
|
-
adjust_timezone(time)
|
161
|
-
else
|
162
|
-
record["properties"][column["name"]]
|
163
|
-
end
|
92
|
+
Embulk.logger.info "Fetching data from #{dates.first} to #{dates.last} ..."
|
93
|
+
|
94
|
+
fetch(dates).each do |record|
|
95
|
+
values = extract_values(record)
|
96
|
+
if @fetch_unknown_columns
|
97
|
+
unknown_values = extract_unknown_values(record)
|
98
|
+
values << unknown_values.to_json
|
164
99
|
end
|
165
100
|
page_builder.add(values)
|
166
101
|
end
|
@@ -176,6 +111,50 @@ module Embulk
|
|
176
111
|
|
177
112
|
private
|
178
113
|
|
114
|
+
def extract_values(record)
|
115
|
+
@schema.map do |column|
|
116
|
+
extract_value(record, column["name"])
|
117
|
+
end
|
118
|
+
end
|
119
|
+
|
120
|
+
def extract_value(record, name)
|
121
|
+
case name
|
122
|
+
when NOT_PROPERTY_COLUMN
|
123
|
+
record[NOT_PROPERTY_COLUMN]
|
124
|
+
when "time"
|
125
|
+
time = record["properties"]["time"]
|
126
|
+
adjust_timezone(time)
|
127
|
+
else
|
128
|
+
record["properties"][name]
|
129
|
+
end
|
130
|
+
end
|
131
|
+
|
132
|
+
def extract_unknown_values(record)
|
133
|
+
record_keys = record["properties"].keys + [NOT_PROPERTY_COLUMN]
|
134
|
+
schema_keys = @schema.map {|column| column["name"]}
|
135
|
+
unknown_keys = record_keys - schema_keys
|
136
|
+
|
137
|
+
unless unknown_keys.empty?
|
138
|
+
Embulk.logger.warn("Unknown columns exists in record: #{unknown_keys.join(', ')}")
|
139
|
+
end
|
140
|
+
|
141
|
+
unknown_keys.inject({}) do |result, key|
|
142
|
+
result[key] = extract_value(record, key)
|
143
|
+
result
|
144
|
+
end
|
145
|
+
end
|
146
|
+
|
147
|
+
def fetch(dates)
|
148
|
+
from_date = dates.first
|
149
|
+
to_date = dates.last
|
150
|
+
params = @params.merge(
|
151
|
+
"from_date" => from_date,
|
152
|
+
"to_date" => to_date,
|
153
|
+
)
|
154
|
+
client = MixpanelApi::Client.new(@api_key, @api_secret)
|
155
|
+
client.export_with_retry(params, task[:retry_initial_wait_sec], task[:retry_limit])
|
156
|
+
end
|
157
|
+
|
179
158
|
def adjust_timezone(epoch)
|
180
159
|
# Adjust timezone offset to get UTC time
|
181
160
|
# c.f. https://mixpanel.com/docs/api-documentation/exporting-raw-data-you-inserted-into-mixpanel#export
|
@@ -203,6 +182,34 @@ module Embulk
|
|
203
182
|
bucket: config.param(:bucket, :string, default: nil),
|
204
183
|
}
|
205
184
|
end
|
185
|
+
|
186
|
+
def self.default_guess_start_date
|
187
|
+
Date.today - SLICE_DAYS_COUNT - 1
|
188
|
+
end
|
189
|
+
|
190
|
+
def self.guess_range(config)
|
191
|
+
from_date = config.param(:from_date, :string, default: default_guess_start_date.to_s)
|
192
|
+
fetch_days = config.param(:fetch_days, :integer, default: SLICE_DAYS_COUNT)
|
193
|
+
range = RangeGenerator.new(from_date, fetch_days).generate_range
|
194
|
+
if range.empty?
|
195
|
+
return default_guess_start_date..(Date.today - 1)
|
196
|
+
end
|
197
|
+
range
|
198
|
+
end
|
199
|
+
|
200
|
+
def self.guess_from_records(records)
|
201
|
+
sample_props = records.first(GUESS_RECORDS_COUNT).map{|r| r["properties"]}
|
202
|
+
schema = Guess::SchemaGuess.from_hash_records(sample_props)
|
203
|
+
columns = schema.map do |col|
|
204
|
+
result = {
|
205
|
+
name: col.name,
|
206
|
+
type: col.type,
|
207
|
+
}
|
208
|
+
result[:format] = col.format if col.format
|
209
|
+
result
|
210
|
+
end
|
211
|
+
columns.unshift(name: NOT_PROPERTY_COLUMN, type: :string)
|
212
|
+
end
|
206
213
|
end
|
207
214
|
|
208
215
|
end
|
@@ -15,32 +15,68 @@ module Embulk
|
|
15
15
|
@api_secret = api_secret
|
16
16
|
end
|
17
17
|
|
18
|
+
def export_with_retry(params = {}, retry_initial_wait_sec, retry_limit)
|
19
|
+
body = with_retry(retry_initial_wait_sec, retry_limit) do
|
20
|
+
request(params)
|
21
|
+
end
|
22
|
+
|
23
|
+
response_to_enum(body)
|
24
|
+
end
|
25
|
+
|
18
26
|
def export(params = {})
|
27
|
+
body = request(params)
|
28
|
+
response_to_enum(body)
|
29
|
+
end
|
30
|
+
|
31
|
+
private
|
32
|
+
|
33
|
+
def response_to_enum(response_body)
|
34
|
+
Enumerator.new do |y|
|
35
|
+
response_body.lines.each do |json|
|
36
|
+
# TODO: raise Embulk::DataError when invalid json given for Embulk 0.7+
|
37
|
+
y << JSON.parse(json)
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
def request(params)
|
19
43
|
# https://mixpanel.com/docs/api-documentation/exporting-raw-data-you-inserted-into-mixpanel
|
20
44
|
params[:expire] ||= Time.now.to_i + TIMEOUT_SECONDS
|
21
45
|
params[:sig] = signature(params)
|
22
|
-
|
23
46
|
Embulk.logger.debug "Export param: #{params.to_s}"
|
24
47
|
|
25
48
|
response = httpclient.get(ENDPOINT_EXPORT, params)
|
26
|
-
|
27
49
|
Embulk.logger.debug "response code: #{response.code}"
|
28
|
-
|
29
|
-
|
50
|
+
case response.code
|
51
|
+
when 400..499
|
30
52
|
raise ConfigError.new response.body
|
31
|
-
|
53
|
+
when 500..599
|
32
54
|
raise RuntimeError, response.body
|
33
55
|
end
|
56
|
+
response.body
|
57
|
+
end
|
34
58
|
|
35
|
-
|
36
|
-
|
37
|
-
|
59
|
+
def with_retry(initial_wait, retry_limit, &block)
|
60
|
+
retry_count = 0
|
61
|
+
wait_sec = initial_wait
|
62
|
+
begin
|
63
|
+
yield
|
64
|
+
rescue Embulk::ConfigError => e # TODO: rescue Embulk::DataError for Embulk 0.7+
|
65
|
+
# Don't retry
|
66
|
+
raise e
|
67
|
+
rescue => e
|
68
|
+
if retry_limit <= retry_count
|
69
|
+
Embulk.logger.error "'#{e}(#{e.class})' error occured and reached retry limit (#{retry_limit} times)"
|
70
|
+
raise e
|
38
71
|
end
|
72
|
+
retry_count += 1
|
73
|
+
Embulk.logger.warn "Retrying after #{wait_sec} seconds [#{retry_count}/#{retry_limit}] '#{e}(#{e.class})' error occured"
|
74
|
+
sleep wait_sec
|
75
|
+
wait_sec *= 2
|
76
|
+
retry
|
39
77
|
end
|
40
78
|
end
|
41
79
|
|
42
|
-
private
|
43
|
-
|
44
80
|
def signature(params)
|
45
81
|
# https://mixpanel.com/docs/api-documentation/data-export-api#auth-implementation
|
46
82
|
sorted_keys = params.keys.map(&:to_s).sort
|
@@ -0,0 +1,79 @@
|
|
1
|
+
class RangeGenerator
|
2
|
+
attr_reader :from_date_str, :fetch_days
|
3
|
+
|
4
|
+
def initialize(from_date_str, fetch_days)
|
5
|
+
@from_date_str = from_date_str
|
6
|
+
@fetch_days = fetch_days
|
7
|
+
end
|
8
|
+
|
9
|
+
def generate_range
|
10
|
+
validate
|
11
|
+
show_warnings
|
12
|
+
range_only_past.map{|date| date.to_s}
|
13
|
+
end
|
14
|
+
|
15
|
+
private
|
16
|
+
|
17
|
+
def from_date
|
18
|
+
Date.parse(from_date_str)
|
19
|
+
end
|
20
|
+
|
21
|
+
def validate
|
22
|
+
begin
|
23
|
+
from_date
|
24
|
+
rescue ArgumentError # invalid date
|
25
|
+
raise Embulk::ConfigError.new "from_date '#{from_date_str}' is invalid date"
|
26
|
+
end
|
27
|
+
|
28
|
+
if fetch_days && fetch_days < 1
|
29
|
+
# `days` only allowed nil or positive number
|
30
|
+
raise Embulk::ConfigError.new "fetch_days '#{fetch_days}' is invalid. Please specify bigger number than 0."
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
def show_warnings
|
35
|
+
if from_date_too_early?
|
36
|
+
Embulk.logger.warn "Mixpanel allow 2 days before to from_date, so no data is input."
|
37
|
+
end
|
38
|
+
|
39
|
+
if overdays?
|
40
|
+
Embulk.logger.warn "These dates are too early access, ignored them: from #{overdays.first} to #{overdays.last}"
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
def range
|
45
|
+
if from_date_too_early?
|
46
|
+
return []
|
47
|
+
end
|
48
|
+
|
49
|
+
if fetch_days
|
50
|
+
from_date..(from_date + fetch_days - 1)
|
51
|
+
else
|
52
|
+
from_date..yesterday
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
def range_only_past
|
57
|
+
range.find_all{|date| date < today}
|
58
|
+
end
|
59
|
+
|
60
|
+
def overdays?
|
61
|
+
! overdays.empty?
|
62
|
+
end
|
63
|
+
|
64
|
+
def overdays
|
65
|
+
range.to_a - range_only_past.to_a
|
66
|
+
end
|
67
|
+
|
68
|
+
def from_date_too_early?
|
69
|
+
from_date > yesterday
|
70
|
+
end
|
71
|
+
|
72
|
+
def yesterday
|
73
|
+
today - 1
|
74
|
+
end
|
75
|
+
|
76
|
+
def today
|
77
|
+
@today ||= Date.today
|
78
|
+
end
|
79
|
+
end
|
@@ -0,0 +1,15 @@
|
|
1
|
+
class TimezoneValidator
|
2
|
+
def initialize(timezone)
|
3
|
+
@timezone = timezone
|
4
|
+
end
|
5
|
+
|
6
|
+
def validate
|
7
|
+
begin
|
8
|
+
# raises exception if timezone is invalid string
|
9
|
+
TZInfo::Timezone.get(@timezone)
|
10
|
+
rescue => e
|
11
|
+
Embulk.logger.error "'#{@timezone}' is invalid timezone"
|
12
|
+
raise Embulk::ConfigError.new e.message
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
@@ -89,6 +89,71 @@ module Embulk
|
|
89
89
|
end
|
90
90
|
end
|
91
91
|
|
92
|
+
class ExportRetryTest < self
|
93
|
+
def setup
|
94
|
+
@httpclient = HTTPClient.new
|
95
|
+
@client = Client.new(API_KEY, API_SECRET)
|
96
|
+
@retry_initial_wait_sec = 1
|
97
|
+
@retry_limit = 3
|
98
|
+
stub_client
|
99
|
+
end
|
100
|
+
|
101
|
+
def test_retry_with_500
|
102
|
+
stub_response(failure_response(500))
|
103
|
+
|
104
|
+
@retry_limit.times do |n|
|
105
|
+
mock(@client).sleep(@retry_initial_wait_sec * (2**n))
|
106
|
+
end
|
107
|
+
mock(Embulk.logger).warn(/retry/i).times(@retry_limit)
|
108
|
+
mock(Embulk.logger).error(/retry/i).once
|
109
|
+
|
110
|
+
assert_raise do
|
111
|
+
@client.export_with_retry(params, @retry_initial_wait_sec, @retry_limit)
|
112
|
+
end
|
113
|
+
end
|
114
|
+
|
115
|
+
def test_retry_with_timeout
|
116
|
+
@httpclient.connect_timeout = 0.000000000000000000001
|
117
|
+
|
118
|
+
@retry_limit.times do |n|
|
119
|
+
mock(@client).sleep(@retry_initial_wait_sec * (2**n))
|
120
|
+
end
|
121
|
+
mock(Embulk.logger).warn(/retry/i).times(@retry_limit)
|
122
|
+
mock(Embulk.logger).error(/retry/i).once
|
123
|
+
|
124
|
+
assert_raise(HTTPClient::TimeoutError) do
|
125
|
+
@client.export_with_retry(params, @retry_initial_wait_sec, @retry_limit)
|
126
|
+
end
|
127
|
+
end
|
128
|
+
|
129
|
+
def test_not_retry_with_401
|
130
|
+
@httpclient.test_loopback_http_response << "HTTP/1.1 401\r\n\r\n"
|
131
|
+
mock(Embulk.logger).warn(/retry/i).never
|
132
|
+
mock(Embulk.logger).error(/retry/i).never
|
133
|
+
|
134
|
+
assert_raise(Embulk::ConfigError) do
|
135
|
+
@client.export_with_retry(params, 0, 1).each do |record|
|
136
|
+
record
|
137
|
+
end
|
138
|
+
end
|
139
|
+
end
|
140
|
+
|
141
|
+
def test_not_retry_with_invalid_json
|
142
|
+
omit "Embulk 0.6 or earlier has no DataError (enable to uncomment below code)"
|
143
|
+
|
144
|
+
# @httpclient.test_loopback_http_response << "HTTP/1.1 200\r\n\r\ninvalid json"
|
145
|
+
# mock(Embulk.logger).warn(/retry/i).never
|
146
|
+
# mock(Embulk.logger).error(/retry/i).never
|
147
|
+
|
148
|
+
# assert_raise(Embulk::DataError) do
|
149
|
+
# @client.export_with_retry(params, 0, 1).each do |record|
|
150
|
+
# # DataError will raised in each block
|
151
|
+
# record
|
152
|
+
# end
|
153
|
+
# end
|
154
|
+
end
|
155
|
+
end
|
156
|
+
|
92
157
|
private
|
93
158
|
|
94
159
|
def stub_client
|
@@ -50,6 +50,11 @@ module Embulk
|
|
50
50
|
end
|
51
51
|
|
52
52
|
class GuessTest < self
|
53
|
+
def setup
|
54
|
+
# Do nothing from parent
|
55
|
+
mute_warn
|
56
|
+
end
|
57
|
+
|
53
58
|
def test_from_date_old_date
|
54
59
|
config = {
|
55
60
|
type: "mixpanel",
|
@@ -58,9 +63,8 @@ module Embulk
|
|
58
63
|
from_date: FROM_DATE,
|
59
64
|
}
|
60
65
|
|
61
|
-
|
62
|
-
|
63
|
-
stub_export(from_date, to_date)
|
66
|
+
stub_export_all
|
67
|
+
mock(Embulk.logger).info(/^Guessing.*#{Regexp.escape FROM_DATE}\.\./)
|
64
68
|
|
65
69
|
actual = Mixpanel.guess(embulk_config(config))
|
66
70
|
assert_equal(expected, actual)
|
@@ -74,25 +78,25 @@ module Embulk
|
|
74
78
|
from_date: Date.today.to_s,
|
75
79
|
}
|
76
80
|
|
77
|
-
|
78
|
-
|
79
|
-
|
81
|
+
stub_export_all
|
82
|
+
mock(Embulk.logger).info(/Guessing.*#{Regexp.escape Mixpanel.default_guess_start_date.to_s}/)
|
83
|
+
|
84
|
+
Mixpanel.guess(embulk_config(config))
|
80
85
|
end
|
81
86
|
|
82
87
|
def test_from_date_yesterday
|
88
|
+
from_date = (Date.today - 1).to_s
|
83
89
|
config = {
|
84
90
|
type: "mixpanel",
|
85
91
|
api_key: API_KEY,
|
86
92
|
api_secret: API_SECRET,
|
87
|
-
from_date:
|
93
|
+
from_date: from_date,
|
88
94
|
}
|
89
95
|
|
90
|
-
|
91
|
-
|
92
|
-
stub_export(from_date, to_date)
|
96
|
+
stub_export_all
|
97
|
+
mock(Embulk.logger).info(/Guessing.*#{Regexp.escape from_date}/)
|
93
98
|
|
94
|
-
|
95
|
-
assert_equal(expected, actual)
|
99
|
+
Mixpanel.guess(embulk_config(config))
|
96
100
|
end
|
97
101
|
|
98
102
|
def test_no_from_date
|
@@ -102,31 +106,24 @@ module Embulk
|
|
102
106
|
api_secret: API_SECRET,
|
103
107
|
}
|
104
108
|
|
105
|
-
|
106
|
-
|
107
|
-
stub_export(from_date, to_date)
|
109
|
+
stub_export_all
|
110
|
+
mock(Embulk.logger).info(/Guessing.*#{Regexp.escape Mixpanel.default_guess_start_date.to_s}/)
|
108
111
|
|
109
|
-
|
110
|
-
assert_equal(expected, actual)
|
112
|
+
Mixpanel.guess(embulk_config(config))
|
111
113
|
end
|
112
114
|
|
113
115
|
private
|
114
116
|
|
115
|
-
def
|
116
|
-
params = {
|
117
|
-
api_key: API_KEY,
|
118
|
-
event: nil,
|
119
|
-
where: nil,
|
120
|
-
bucket: nil,
|
121
|
-
from_date: from_date.to_s,
|
122
|
-
to_date: to_date.to_s,
|
123
|
-
}
|
124
|
-
|
117
|
+
def stub_export_all
|
125
118
|
any_instance_of(MixpanelApi::Client) do |klass|
|
126
|
-
stub(klass).export(
|
119
|
+
stub(klass).export(anything) { records }
|
127
120
|
end
|
128
121
|
end
|
129
122
|
|
123
|
+
def mute_warn
|
124
|
+
stub(Embulk.logger).warn(anything) {}
|
125
|
+
end
|
126
|
+
|
130
127
|
def embulk_config(config)
|
131
128
|
DataSource[*config.to_a.flatten(1)]
|
132
129
|
end
|
@@ -336,9 +333,12 @@ module Embulk
|
|
336
333
|
end
|
337
334
|
|
338
335
|
def columns
|
339
|
-
|
336
|
+
unknown_columns = [Column.new(nil, "unknown_columns", :string)]
|
337
|
+
configured_columns = schema.map do |col|
|
340
338
|
Column.new(nil, col["name"], col["type"].to_sym)
|
341
339
|
end
|
340
|
+
|
341
|
+
configured_columns + unknown_columns
|
342
342
|
end
|
343
343
|
end
|
344
344
|
|
@@ -371,7 +371,7 @@ module Embulk
|
|
371
371
|
def setup_client
|
372
372
|
|
373
373
|
any_instance_of(MixpanelApi::Client) do |klass|
|
374
|
-
stub(klass).
|
374
|
+
stub(klass).request { records_raw_response }
|
375
375
|
end
|
376
376
|
end
|
377
377
|
|
@@ -415,6 +415,52 @@ module Embulk
|
|
415
415
|
@plugin.run
|
416
416
|
end
|
417
417
|
|
418
|
+
class UnknownColumnsTest < self
|
419
|
+
def setup
|
420
|
+
super
|
421
|
+
@page_builder = Object.new
|
422
|
+
@plugin = Mixpanel.new(task, nil, nil, @page_builder)
|
423
|
+
end
|
424
|
+
|
425
|
+
def test_run
|
426
|
+
Embulk.logger.warn(anything)
|
427
|
+
stub(@plugin).preview? { false }
|
428
|
+
|
429
|
+
# NOTE: Expect records are contained same record
|
430
|
+
record = records.first
|
431
|
+
properties = record["properties"]
|
432
|
+
|
433
|
+
time = properties["time"]
|
434
|
+
tz = TZInfo::Timezone.get(TIMEZONE)
|
435
|
+
offset = tz.period_for_local(time, true).offset.utc_offset
|
436
|
+
adjusted_time = time - offset
|
437
|
+
|
438
|
+
added = [
|
439
|
+
properties["foo"],
|
440
|
+
adjusted_time,
|
441
|
+
{"int" => properties["int"], "event" => record["event"]}.to_json
|
442
|
+
]
|
443
|
+
|
444
|
+
mock(@page_builder).add(added).times(records.length * 2)
|
445
|
+
mock(@page_builder).finish
|
446
|
+
|
447
|
+
@plugin.run
|
448
|
+
end
|
449
|
+
|
450
|
+
private
|
451
|
+
|
452
|
+
def task
|
453
|
+
super.merge(schema: schema, fetch_unknown_columns: true)
|
454
|
+
end
|
455
|
+
|
456
|
+
def schema
|
457
|
+
[
|
458
|
+
{"name" => "foo", "type" => "long"},
|
459
|
+
{"name" => "time", "type" => "long"},
|
460
|
+
]
|
461
|
+
end
|
462
|
+
end
|
463
|
+
|
418
464
|
private
|
419
465
|
|
420
466
|
def timezone_offset_seconds
|
@@ -440,6 +486,9 @@ module Embulk
|
|
440
486
|
schema: schema,
|
441
487
|
dates: DATES.to_a.map(&:to_s),
|
442
488
|
params: Mixpanel.export_params(embulk_config),
|
489
|
+
fetch_unknown_columns: false,
|
490
|
+
retry_initial_wait_sec: 2,
|
491
|
+
retry_limit: 3,
|
443
492
|
}
|
444
493
|
end
|
445
494
|
|
@@ -456,6 +505,10 @@ module Embulk
|
|
456
505
|
] * 30
|
457
506
|
end
|
458
507
|
|
508
|
+
def records_raw_response
|
509
|
+
records.map(&:to_json).join("\n")
|
510
|
+
end
|
511
|
+
|
459
512
|
def record_epoch
|
460
513
|
1234567890
|
461
514
|
end
|
@@ -467,6 +520,9 @@ module Embulk
|
|
467
520
|
api_secret: API_SECRET,
|
468
521
|
from_date: FROM_DATE,
|
469
522
|
fetch_days: DAYS,
|
523
|
+
fetch_unknown_columns: false,
|
524
|
+
retry_initial_wait_sec: 2,
|
525
|
+
retry_limit: 3,
|
470
526
|
}
|
471
527
|
end
|
472
528
|
|
@@ -0,0 +1,94 @@
|
|
1
|
+
require "range_generator"
|
2
|
+
require "override_assert_raise"
|
3
|
+
|
4
|
+
class RangeGeneratorTest < Test::Unit::TestCase
|
5
|
+
include OverrideAssertRaise
|
6
|
+
|
7
|
+
class GenerateRangeTest < self
|
8
|
+
data do
|
9
|
+
{
|
10
|
+
from_date: ["aaaaaaaaa", 1],
|
11
|
+
fetch_days: ["2010-01-01", -9],
|
12
|
+
}
|
13
|
+
end
|
14
|
+
def test_invalid(args)
|
15
|
+
assert_raise(Embulk::ConfigError) do
|
16
|
+
generate_range(*args)
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
def test_all_days_past
|
21
|
+
days = 5
|
22
|
+
from = "2010-01-01"
|
23
|
+
expected_from = Date.parse(from)
|
24
|
+
expected_to = Date.parse("2010-01-05")
|
25
|
+
|
26
|
+
expected = (expected_from..expected_to).to_a.map{|date| date.to_s}
|
27
|
+
|
28
|
+
actual = RangeGenerator.new(from, days).generate_range
|
29
|
+
|
30
|
+
assert_equal(expected, actual)
|
31
|
+
end
|
32
|
+
|
33
|
+
class OverDaysTest < self
|
34
|
+
def setup
|
35
|
+
@from = Date.today - 5
|
36
|
+
@days = 10
|
37
|
+
@warn_message_regexp = /ignored them/
|
38
|
+
end
|
39
|
+
|
40
|
+
def test_range_only_past
|
41
|
+
expected_to = Date.today - 1
|
42
|
+
expected = (@from..expected_to).to_a.map{|date| date.to_s}
|
43
|
+
|
44
|
+
stub(Embulk.logger).warn(@warn_message_regexp)
|
45
|
+
|
46
|
+
assert_equal(expected, generate_range)
|
47
|
+
end
|
48
|
+
|
49
|
+
def test_warn
|
50
|
+
mock(Embulk.logger).warn(@warn_message_regexp)
|
51
|
+
|
52
|
+
generate_range
|
53
|
+
end
|
54
|
+
|
55
|
+
private
|
56
|
+
|
57
|
+
def generate_range
|
58
|
+
super(@from.to_s, @days)
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
class FromDateEarlyTest < self
|
63
|
+
def setup
|
64
|
+
@from = Date.today + 5
|
65
|
+
@days = 10
|
66
|
+
@warn_message_regexp = /allow 2 days/
|
67
|
+
end
|
68
|
+
|
69
|
+
def test_empty_range
|
70
|
+
stub(Embulk.logger).warn(@warn_message_regexp)
|
71
|
+
|
72
|
+
assert_equal([], generate_range)
|
73
|
+
end
|
74
|
+
|
75
|
+
def test_warn
|
76
|
+
mock(Embulk.logger).warn(@warn_message_regexp)
|
77
|
+
|
78
|
+
generate_range
|
79
|
+
end
|
80
|
+
|
81
|
+
private
|
82
|
+
|
83
|
+
def generate_range
|
84
|
+
super(@from.to_s, @days)
|
85
|
+
end
|
86
|
+
end
|
87
|
+
|
88
|
+
private
|
89
|
+
|
90
|
+
def generate_range(from_date_str, fetch_days)
|
91
|
+
RangeGenerator.new(from_date_str, fetch_days).generate_range
|
92
|
+
end
|
93
|
+
end
|
94
|
+
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
require "timezone_validator"
|
2
|
+
require "override_assert_raise"
|
3
|
+
|
4
|
+
class TimezoneValidatorTest < Test::Unit::TestCase
|
5
|
+
include OverrideAssertRaise
|
6
|
+
|
7
|
+
def test_valid
|
8
|
+
valid_timezone = "Asia/Tokyo"
|
9
|
+
|
10
|
+
assert_nothing_raised do
|
11
|
+
TimezoneValidator.new(valid_timezone).validate
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
def test_invalid
|
16
|
+
invalid_timezone = "Asia/Tokyoooooooooooooo"
|
17
|
+
|
18
|
+
mock(Embulk.logger).error(/#{Regexp.new(invalid_timezone)}/)
|
19
|
+
|
20
|
+
assert_raise(Embulk::ConfigError) do
|
21
|
+
TimezoneValidator.new(invalid_timezone).validate
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-input-mixpanel
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- yoshihara
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2015-10-
|
12
|
+
date: 2015-10-29 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
@@ -199,11 +199,15 @@ files:
|
|
199
199
|
- gemfiles/template.erb
|
200
200
|
- lib/embulk/input/mixpanel.rb
|
201
201
|
- lib/embulk/input/mixpanel_api/client.rb
|
202
|
+
- lib/range_generator.rb
|
203
|
+
- lib/timezone_validator.rb
|
202
204
|
- test/embulk/input/mixpanel_api/test_client.rb
|
203
205
|
- test/embulk/input/test_mixpanel.rb
|
204
206
|
- test/override_assert_raise.rb
|
205
207
|
- test/prepare_embulk.rb
|
206
208
|
- test/run-test.rb
|
209
|
+
- test/test_range_generator.rb
|
210
|
+
- test/test_timezone_validator.rb
|
207
211
|
homepage: https://github.com/treasure-data/embulk-input-mixpanel
|
208
212
|
licenses:
|
209
213
|
- Apache2
|
@@ -234,3 +238,5 @@ test_files:
|
|
234
238
|
- test/override_assert_raise.rb
|
235
239
|
- test/prepare_embulk.rb
|
236
240
|
- test/run-test.rb
|
241
|
+
- test/test_range_generator.rb
|
242
|
+
- test/test_timezone_validator.rb
|