embulk-input-mixpanel 0.5.1 → 0.5.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/embulk-input-mixpanel.gemspec +1 -1
- data/lib/embulk/input/mixpanel.rb +28 -3
- data/lib/embulk/input/mixpanel_api/client.rb +20 -14
- data/lib/range_generator.rb +6 -10
- data/test/embulk/input/test_mixpanel.rb +11 -5
- data/test/test_range_generator.rb +2 -2
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 8534316e5eae7127b70afc10d9b247e872643c32
|
4
|
+
data.tar.gz: e629adeb4d42e6386564bd13cfba3bce107ecd9c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 70287307438223546321af68362fc63fd56f592eb09d17d543d53986990109bb53b285a8467500617f7a8b99ab4991b6850a7b53c0df241282ee2259f5a8808d
|
7
|
+
data.tar.gz: f8e370285d393fada617ccc2d4dcb9d5cd54396cf47757a9af6f425bdfb83ac77c6354127171a6859939367698cd9b23e0915b8c954e557533140dcc1ea1525c
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,7 @@
|
|
1
|
+
## 0.5.2 - 2017-07-26
|
2
|
+
* [enhancement]Enable realtime data export[#47](https://github.com/treasure-data/embulk-input-mixpanel/pull/47)
|
3
|
+
* [maintenance]Fix incorrect error message[#49](https://github.com/treasure-data/embulk-input-mixpanel/pull/49)
|
4
|
+
|
1
5
|
## 0.5.1 - 2016-12-13
|
2
6
|
* Enable TCP Keepalive to protect from NAT [#48](https://github.com/treasure-data/embulk-input-mixpanel/pull/48)
|
3
7
|
|
@@ -51,6 +51,7 @@ module Embulk
|
|
51
51
|
fetch_custom_properties: config.param(:fetch_custom_properties, :bool, default: true),
|
52
52
|
retry_initial_wait_sec: config.param(:retry_initial_wait_sec, :integer, default: 1),
|
53
53
|
retry_limit: config.param(:retry_limit, :integer, default: 5),
|
54
|
+
latest_fetched_time: config.param(:latest_fetched_time, :integer, default: 0),
|
54
55
|
}
|
55
56
|
|
56
57
|
if task[:fetch_unknown_columns] && task[:fetch_custom_properties]
|
@@ -82,9 +83,12 @@ module Embulk
|
|
82
83
|
# NOTE: If this plugin supports to run by multi threads, this
|
83
84
|
# implementation is terrible.
|
84
85
|
task_report = task_reports.first
|
85
|
-
next_to_date = Date.parse(task_report[:to_date])
|
86
|
+
next_to_date = Date.parse(task_report[:to_date])
|
86
87
|
|
87
|
-
next_config_diff = {
|
88
|
+
next_config_diff = {
|
89
|
+
from_date: next_to_date.to_s,
|
90
|
+
latest_fetched_time: task_report[:latest_fetched_time],
|
91
|
+
}
|
88
92
|
return next_config_diff
|
89
93
|
end
|
90
94
|
|
@@ -133,13 +137,28 @@ module Embulk
|
|
133
137
|
|
134
138
|
def run
|
135
139
|
self.class.giveup_when_mixpanel_is_down
|
140
|
+
prev_latest_fetched_time = task[:latest_fetched_time] || 0
|
141
|
+
prev_latest_fetched_time_format = Time.at(prev_latest_fetched_time).strftime("%F %T %z")
|
142
|
+
current_latest_fetched_time = prev_latest_fetched_time
|
136
143
|
|
137
144
|
@dates.each_slice(SLICE_DAYS_COUNT) do |dates|
|
145
|
+
ignored_record_count = 0
|
138
146
|
unless preview?
|
139
147
|
Embulk.logger.info "Fetching data from #{dates.first} to #{dates.last} ..."
|
140
148
|
end
|
141
149
|
|
142
150
|
fetch(dates).each do |record|
|
151
|
+
record_time = record["properties"]["time"]
|
152
|
+
if record_time <= prev_latest_fetched_time
|
153
|
+
ignored_record_count += 1
|
154
|
+
next
|
155
|
+
end
|
156
|
+
|
157
|
+
current_latest_fetched_time= [
|
158
|
+
current_latest_fetched_time,
|
159
|
+
record_time,
|
160
|
+
].max
|
161
|
+
|
143
162
|
values = extract_values(record)
|
144
163
|
if @fetch_unknown_columns
|
145
164
|
unknown_values = extract_unknown_values(record)
|
@@ -151,12 +170,18 @@ module Embulk
|
|
151
170
|
page_builder.add(values)
|
152
171
|
end
|
153
172
|
|
173
|
+
if ignored_record_count > 0
|
174
|
+
Embulk.logger.warn "Skipped already loaded #{ignored_record_count} records. These record times are older or equal than previous fetched record time (#{prev_latest_fetched_time} @ #{prev_latest_fetched_time_format})."
|
175
|
+
end
|
154
176
|
break if preview?
|
155
177
|
end
|
156
178
|
|
157
179
|
page_builder.finish
|
158
180
|
|
159
|
-
task_report = {
|
181
|
+
task_report = {
|
182
|
+
latest_fetched_time: current_latest_fetched_time,
|
183
|
+
to_date: @dates.last || Date.today - 1,
|
184
|
+
}
|
160
185
|
return task_report
|
161
186
|
end
|
162
187
|
|
@@ -100,18 +100,24 @@ module Embulk
|
|
100
100
|
set_signatures(params)
|
101
101
|
|
102
102
|
buf = ""
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
103
|
+
error_response = ''
|
104
|
+
response = httpclient.get(ENDPOINT_EXPORT, params) do |response, chunk|
|
105
|
+
# Only process data if response status is 200..299
|
106
|
+
if response.status/100 == 2
|
107
|
+
chunk.each_line do |line|
|
108
|
+
begin
|
109
|
+
record = JSON.parse(buf + line)
|
110
|
+
block.call record
|
111
|
+
buf = ""
|
112
|
+
rescue JSON::ParserError => e
|
113
|
+
buf << line
|
114
|
+
end
|
111
115
|
end
|
116
|
+
else
|
117
|
+
error_response << chunk
|
112
118
|
end
|
113
119
|
end
|
114
|
-
handle_error(response)
|
120
|
+
handle_error(response, error_response)
|
115
121
|
end
|
116
122
|
|
117
123
|
def request_small_dataset(params, range)
|
@@ -123,21 +129,21 @@ module Embulk
|
|
123
129
|
# cannot satisfied requested Range, get full body
|
124
130
|
res = httpclient.get(ENDPOINT_EXPORT, params)
|
125
131
|
end
|
126
|
-
handle_error(res)
|
132
|
+
handle_error(res,res.body)
|
127
133
|
response_to_enum(res.body)
|
128
134
|
end
|
129
135
|
|
130
|
-
def handle_error(response)
|
136
|
+
def handle_error(response, error_response)
|
131
137
|
Embulk.logger.debug "response code: #{response.code}"
|
132
138
|
case response.code
|
133
139
|
when 400..499
|
134
140
|
if response.code == 429
|
135
141
|
# [429] {"error": "too many export requests in progress for this project"}
|
136
|
-
raise RuntimeError.new("[#{response.code}] #{
|
142
|
+
raise RuntimeError.new("[#{response.code}] #{error_response} (will retry)")
|
137
143
|
end
|
138
|
-
raise ConfigError.new("[#{response.code}] #{
|
144
|
+
raise ConfigError.new("[#{response.code}] #{error_response}")
|
139
145
|
when 500..599
|
140
|
-
raise RuntimeError.new("[#{response.code}] #{
|
146
|
+
raise RuntimeError.new("[#{response.code}] #{error_response}")
|
141
147
|
end
|
142
148
|
end
|
143
149
|
|
data/lib/range_generator.rb
CHANGED
@@ -9,7 +9,7 @@ class RangeGenerator
|
|
9
9
|
def generate_range
|
10
10
|
validate
|
11
11
|
show_warnings
|
12
|
-
|
12
|
+
range_only_present.map{|date| date.to_s}
|
13
13
|
end
|
14
14
|
|
15
15
|
private
|
@@ -49,12 +49,12 @@ class RangeGenerator
|
|
49
49
|
if fetch_days
|
50
50
|
from_date..(from_date + fetch_days - 1)
|
51
51
|
else
|
52
|
-
from_date..
|
52
|
+
from_date..today
|
53
53
|
end
|
54
54
|
end
|
55
55
|
|
56
|
-
def
|
57
|
-
range.find_all{|date| date
|
56
|
+
def range_only_present
|
57
|
+
range.find_all{|date| date <= today}
|
58
58
|
end
|
59
59
|
|
60
60
|
def overdays?
|
@@ -62,15 +62,11 @@ class RangeGenerator
|
|
62
62
|
end
|
63
63
|
|
64
64
|
def overdays
|
65
|
-
range.to_a -
|
65
|
+
range.to_a - range_only_present.to_a
|
66
66
|
end
|
67
67
|
|
68
68
|
def from_date_too_early?
|
69
|
-
from_date >
|
70
|
-
end
|
71
|
-
|
72
|
-
def yesterday
|
73
|
-
today - 1
|
69
|
+
from_date > today
|
74
70
|
end
|
75
71
|
|
76
72
|
def today
|
@@ -72,12 +72,12 @@ module Embulk
|
|
72
72
|
assert_equal(expected, actual)
|
73
73
|
end
|
74
74
|
|
75
|
-
def
|
75
|
+
def test_from_date_future
|
76
76
|
config = {
|
77
77
|
type: "mixpanel",
|
78
78
|
api_key: API_KEY,
|
79
79
|
api_secret: API_SECRET,
|
80
|
-
from_date: Date.today.to_s,
|
80
|
+
from_date: (Date.today + 1).to_s,
|
81
81
|
}
|
82
82
|
|
83
83
|
stub_export_all
|
@@ -245,7 +245,7 @@ module Embulk
|
|
245
245
|
end
|
246
246
|
|
247
247
|
def target_dates
|
248
|
-
dates.find_all{|d| d
|
248
|
+
dates.find_all{|d| d <= Date.today}.map {|date| date.to_s}
|
249
249
|
end
|
250
250
|
|
251
251
|
def transaction_config
|
@@ -375,9 +375,9 @@ module Embulk
|
|
375
375
|
|
376
376
|
def test_resume
|
377
377
|
today = Date.today
|
378
|
-
control = proc { [{to_date: today.to_s}] }
|
378
|
+
control = proc { [{to_date: today.to_s, latest_fetched_time: 999}] }
|
379
379
|
actual = Mixpanel.resume(transaction_task, columns, 1, &control)
|
380
|
-
assert_equal({from_date: today.
|
380
|
+
assert_equal({from_date: today.to_s, latest_fetched_time: 999}, actual)
|
381
381
|
end
|
382
382
|
|
383
383
|
def control
|
@@ -504,6 +504,7 @@ module Embulk
|
|
504
504
|
fetch_custom_properties: false,
|
505
505
|
retry_initial_wait_sec: 0,
|
506
506
|
retry_limit: 3,
|
507
|
+
latest_fetched_time: 0,
|
507
508
|
}
|
508
509
|
end
|
509
510
|
end
|
@@ -564,6 +565,7 @@ module Embulk
|
|
564
565
|
added = [
|
565
566
|
record["event"],
|
566
567
|
record["properties"]["$specified"],
|
568
|
+
record["properties"]["time"] - 32400, # timezone adjust
|
567
569
|
custom_property_keys.map{|k| {k => record["properties"][k] }}.inject(&:merge)
|
568
570
|
]
|
569
571
|
|
@@ -583,6 +585,7 @@ module Embulk
|
|
583
585
|
{
|
584
586
|
"event" => "EV",
|
585
587
|
"properties" => {
|
588
|
+
"time" => 1000000,
|
586
589
|
"$os" => "Android",
|
587
590
|
"$specified" => "foo",
|
588
591
|
"$foobar" => "foobar",
|
@@ -594,6 +597,7 @@ module Embulk
|
|
594
597
|
[
|
595
598
|
{"name" => "event", "type" => "string"},
|
596
599
|
{"name" => "$specified", "type" => "string"},
|
600
|
+
{"name" => "time", "type" => "integer"},
|
597
601
|
]
|
598
602
|
end
|
599
603
|
end
|
@@ -674,6 +678,7 @@ module Embulk
|
|
674
678
|
fetch_custom_properties: false,
|
675
679
|
retry_initial_wait_sec: 2,
|
676
680
|
retry_limit: 3,
|
681
|
+
latest_fetched_time: 0,
|
677
682
|
}
|
678
683
|
end
|
679
684
|
|
@@ -709,6 +714,7 @@ module Embulk
|
|
709
714
|
fetch_custom_properties: false,
|
710
715
|
retry_initial_wait_sec: 2,
|
711
716
|
retry_limit: 3,
|
717
|
+
latest_fetched_time: 0,
|
712
718
|
}
|
713
719
|
end
|
714
720
|
|
@@ -37,8 +37,8 @@ class RangeGeneratorTest < Test::Unit::TestCase
|
|
37
37
|
@warn_message_regexp = /ignored them/
|
38
38
|
end
|
39
39
|
|
40
|
-
def
|
41
|
-
expected_to = Date.today
|
40
|
+
def test_range_only_present
|
41
|
+
expected_to = Date.today
|
42
42
|
expected = (@from..expected_to).to_a.map{|date| date.to_s}
|
43
43
|
|
44
44
|
stub(Embulk.logger).warn(@warn_message_regexp)
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-input-mixpanel
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.5.
|
4
|
+
version: 0.5.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- yoshihara
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date:
|
12
|
+
date: 2017-07-26 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|