embulk-input-mixpanel 0.5.1 → 0.5.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/embulk-input-mixpanel.gemspec +1 -1
- data/lib/embulk/input/mixpanel.rb +28 -3
- data/lib/embulk/input/mixpanel_api/client.rb +20 -14
- data/lib/range_generator.rb +6 -10
- data/test/embulk/input/test_mixpanel.rb +11 -5
- data/test/test_range_generator.rb +2 -2
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 8534316e5eae7127b70afc10d9b247e872643c32
|
4
|
+
data.tar.gz: e629adeb4d42e6386564bd13cfba3bce107ecd9c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 70287307438223546321af68362fc63fd56f592eb09d17d543d53986990109bb53b285a8467500617f7a8b99ab4991b6850a7b53c0df241282ee2259f5a8808d
|
7
|
+
data.tar.gz: f8e370285d393fada617ccc2d4dcb9d5cd54396cf47757a9af6f425bdfb83ac77c6354127171a6859939367698cd9b23e0915b8c954e557533140dcc1ea1525c
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,7 @@
|
|
1
|
+
## 0.5.2 - 2017-07-26
|
2
|
+
* [enhancement]Enable realtime data export[#47](https://github.com/treasure-data/embulk-input-mixpanel/pull/47)
|
3
|
+
* [maintenance]Fix incorrect error message[#49](https://github.com/treasure-data/embulk-input-mixpanel/pull/49)
|
4
|
+
|
1
5
|
## 0.5.1 - 2016-12-13
|
2
6
|
* Enable TCP Keepalive to protect from NAT [#48](https://github.com/treasure-data/embulk-input-mixpanel/pull/48)
|
3
7
|
|
@@ -51,6 +51,7 @@ module Embulk
|
|
51
51
|
fetch_custom_properties: config.param(:fetch_custom_properties, :bool, default: true),
|
52
52
|
retry_initial_wait_sec: config.param(:retry_initial_wait_sec, :integer, default: 1),
|
53
53
|
retry_limit: config.param(:retry_limit, :integer, default: 5),
|
54
|
+
latest_fetched_time: config.param(:latest_fetched_time, :integer, default: 0),
|
54
55
|
}
|
55
56
|
|
56
57
|
if task[:fetch_unknown_columns] && task[:fetch_custom_properties]
|
@@ -82,9 +83,12 @@ module Embulk
|
|
82
83
|
# NOTE: If this plugin supports to run by multi threads, this
|
83
84
|
# implementation is terrible.
|
84
85
|
task_report = task_reports.first
|
85
|
-
next_to_date = Date.parse(task_report[:to_date])
|
86
|
+
next_to_date = Date.parse(task_report[:to_date])
|
86
87
|
|
87
|
-
next_config_diff = {
|
88
|
+
next_config_diff = {
|
89
|
+
from_date: next_to_date.to_s,
|
90
|
+
latest_fetched_time: task_report[:latest_fetched_time],
|
91
|
+
}
|
88
92
|
return next_config_diff
|
89
93
|
end
|
90
94
|
|
@@ -133,13 +137,28 @@ module Embulk
|
|
133
137
|
|
134
138
|
def run
|
135
139
|
self.class.giveup_when_mixpanel_is_down
|
140
|
+
prev_latest_fetched_time = task[:latest_fetched_time] || 0
|
141
|
+
prev_latest_fetched_time_format = Time.at(prev_latest_fetched_time).strftime("%F %T %z")
|
142
|
+
current_latest_fetched_time = prev_latest_fetched_time
|
136
143
|
|
137
144
|
@dates.each_slice(SLICE_DAYS_COUNT) do |dates|
|
145
|
+
ignored_record_count = 0
|
138
146
|
unless preview?
|
139
147
|
Embulk.logger.info "Fetching data from #{dates.first} to #{dates.last} ..."
|
140
148
|
end
|
141
149
|
|
142
150
|
fetch(dates).each do |record|
|
151
|
+
record_time = record["properties"]["time"]
|
152
|
+
if record_time <= prev_latest_fetched_time
|
153
|
+
ignored_record_count += 1
|
154
|
+
next
|
155
|
+
end
|
156
|
+
|
157
|
+
current_latest_fetched_time= [
|
158
|
+
current_latest_fetched_time,
|
159
|
+
record_time,
|
160
|
+
].max
|
161
|
+
|
143
162
|
values = extract_values(record)
|
144
163
|
if @fetch_unknown_columns
|
145
164
|
unknown_values = extract_unknown_values(record)
|
@@ -151,12 +170,18 @@ module Embulk
|
|
151
170
|
page_builder.add(values)
|
152
171
|
end
|
153
172
|
|
173
|
+
if ignored_record_count > 0
|
174
|
+
Embulk.logger.warn "Skipped already loaded #{ignored_record_count} records. These record times are older or equal than previous fetched record time (#{prev_latest_fetched_time} @ #{prev_latest_fetched_time_format})."
|
175
|
+
end
|
154
176
|
break if preview?
|
155
177
|
end
|
156
178
|
|
157
179
|
page_builder.finish
|
158
180
|
|
159
|
-
task_report = {
|
181
|
+
task_report = {
|
182
|
+
latest_fetched_time: current_latest_fetched_time,
|
183
|
+
to_date: @dates.last || Date.today - 1,
|
184
|
+
}
|
160
185
|
return task_report
|
161
186
|
end
|
162
187
|
|
@@ -100,18 +100,24 @@ module Embulk
|
|
100
100
|
set_signatures(params)
|
101
101
|
|
102
102
|
buf = ""
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
103
|
+
error_response = ''
|
104
|
+
response = httpclient.get(ENDPOINT_EXPORT, params) do |response, chunk|
|
105
|
+
# Only process data if response status is 200..299
|
106
|
+
if response.status/100 == 2
|
107
|
+
chunk.each_line do |line|
|
108
|
+
begin
|
109
|
+
record = JSON.parse(buf + line)
|
110
|
+
block.call record
|
111
|
+
buf = ""
|
112
|
+
rescue JSON::ParserError => e
|
113
|
+
buf << line
|
114
|
+
end
|
111
115
|
end
|
116
|
+
else
|
117
|
+
error_response << chunk
|
112
118
|
end
|
113
119
|
end
|
114
|
-
handle_error(response)
|
120
|
+
handle_error(response, error_response)
|
115
121
|
end
|
116
122
|
|
117
123
|
def request_small_dataset(params, range)
|
@@ -123,21 +129,21 @@ module Embulk
|
|
123
129
|
# cannot satisfied requested Range, get full body
|
124
130
|
res = httpclient.get(ENDPOINT_EXPORT, params)
|
125
131
|
end
|
126
|
-
handle_error(res)
|
132
|
+
handle_error(res,res.body)
|
127
133
|
response_to_enum(res.body)
|
128
134
|
end
|
129
135
|
|
130
|
-
def handle_error(response)
|
136
|
+
def handle_error(response, error_response)
|
131
137
|
Embulk.logger.debug "response code: #{response.code}"
|
132
138
|
case response.code
|
133
139
|
when 400..499
|
134
140
|
if response.code == 429
|
135
141
|
# [429] {"error": "too many export requests in progress for this project"}
|
136
|
-
raise RuntimeError.new("[#{response.code}] #{
|
142
|
+
raise RuntimeError.new("[#{response.code}] #{error_response} (will retry)")
|
137
143
|
end
|
138
|
-
raise ConfigError.new("[#{response.code}] #{
|
144
|
+
raise ConfigError.new("[#{response.code}] #{error_response}")
|
139
145
|
when 500..599
|
140
|
-
raise RuntimeError.new("[#{response.code}] #{
|
146
|
+
raise RuntimeError.new("[#{response.code}] #{error_response}")
|
141
147
|
end
|
142
148
|
end
|
143
149
|
|
data/lib/range_generator.rb
CHANGED
@@ -9,7 +9,7 @@ class RangeGenerator
|
|
9
9
|
def generate_range
|
10
10
|
validate
|
11
11
|
show_warnings
|
12
|
-
|
12
|
+
range_only_present.map{|date| date.to_s}
|
13
13
|
end
|
14
14
|
|
15
15
|
private
|
@@ -49,12 +49,12 @@ class RangeGenerator
|
|
49
49
|
if fetch_days
|
50
50
|
from_date..(from_date + fetch_days - 1)
|
51
51
|
else
|
52
|
-
from_date..
|
52
|
+
from_date..today
|
53
53
|
end
|
54
54
|
end
|
55
55
|
|
56
|
-
def
|
57
|
-
range.find_all{|date| date
|
56
|
+
def range_only_present
|
57
|
+
range.find_all{|date| date <= today}
|
58
58
|
end
|
59
59
|
|
60
60
|
def overdays?
|
@@ -62,15 +62,11 @@ class RangeGenerator
|
|
62
62
|
end
|
63
63
|
|
64
64
|
def overdays
|
65
|
-
range.to_a -
|
65
|
+
range.to_a - range_only_present.to_a
|
66
66
|
end
|
67
67
|
|
68
68
|
def from_date_too_early?
|
69
|
-
from_date >
|
70
|
-
end
|
71
|
-
|
72
|
-
def yesterday
|
73
|
-
today - 1
|
69
|
+
from_date > today
|
74
70
|
end
|
75
71
|
|
76
72
|
def today
|
@@ -72,12 +72,12 @@ module Embulk
|
|
72
72
|
assert_equal(expected, actual)
|
73
73
|
end
|
74
74
|
|
75
|
-
def
|
75
|
+
def test_from_date_future
|
76
76
|
config = {
|
77
77
|
type: "mixpanel",
|
78
78
|
api_key: API_KEY,
|
79
79
|
api_secret: API_SECRET,
|
80
|
-
from_date: Date.today.to_s,
|
80
|
+
from_date: (Date.today + 1).to_s,
|
81
81
|
}
|
82
82
|
|
83
83
|
stub_export_all
|
@@ -245,7 +245,7 @@ module Embulk
|
|
245
245
|
end
|
246
246
|
|
247
247
|
def target_dates
|
248
|
-
dates.find_all{|d| d
|
248
|
+
dates.find_all{|d| d <= Date.today}.map {|date| date.to_s}
|
249
249
|
end
|
250
250
|
|
251
251
|
def transaction_config
|
@@ -375,9 +375,9 @@ module Embulk
|
|
375
375
|
|
376
376
|
def test_resume
|
377
377
|
today = Date.today
|
378
|
-
control = proc { [{to_date: today.to_s}] }
|
378
|
+
control = proc { [{to_date: today.to_s, latest_fetched_time: 999}] }
|
379
379
|
actual = Mixpanel.resume(transaction_task, columns, 1, &control)
|
380
|
-
assert_equal({from_date: today.
|
380
|
+
assert_equal({from_date: today.to_s, latest_fetched_time: 999}, actual)
|
381
381
|
end
|
382
382
|
|
383
383
|
def control
|
@@ -504,6 +504,7 @@ module Embulk
|
|
504
504
|
fetch_custom_properties: false,
|
505
505
|
retry_initial_wait_sec: 0,
|
506
506
|
retry_limit: 3,
|
507
|
+
latest_fetched_time: 0,
|
507
508
|
}
|
508
509
|
end
|
509
510
|
end
|
@@ -564,6 +565,7 @@ module Embulk
|
|
564
565
|
added = [
|
565
566
|
record["event"],
|
566
567
|
record["properties"]["$specified"],
|
568
|
+
record["properties"]["time"] - 32400, # timezone adjust
|
567
569
|
custom_property_keys.map{|k| {k => record["properties"][k] }}.inject(&:merge)
|
568
570
|
]
|
569
571
|
|
@@ -583,6 +585,7 @@ module Embulk
|
|
583
585
|
{
|
584
586
|
"event" => "EV",
|
585
587
|
"properties" => {
|
588
|
+
"time" => 1000000,
|
586
589
|
"$os" => "Android",
|
587
590
|
"$specified" => "foo",
|
588
591
|
"$foobar" => "foobar",
|
@@ -594,6 +597,7 @@ module Embulk
|
|
594
597
|
[
|
595
598
|
{"name" => "event", "type" => "string"},
|
596
599
|
{"name" => "$specified", "type" => "string"},
|
600
|
+
{"name" => "time", "type" => "integer"},
|
597
601
|
]
|
598
602
|
end
|
599
603
|
end
|
@@ -674,6 +678,7 @@ module Embulk
|
|
674
678
|
fetch_custom_properties: false,
|
675
679
|
retry_initial_wait_sec: 2,
|
676
680
|
retry_limit: 3,
|
681
|
+
latest_fetched_time: 0,
|
677
682
|
}
|
678
683
|
end
|
679
684
|
|
@@ -709,6 +714,7 @@ module Embulk
|
|
709
714
|
fetch_custom_properties: false,
|
710
715
|
retry_initial_wait_sec: 2,
|
711
716
|
retry_limit: 3,
|
717
|
+
latest_fetched_time: 0,
|
712
718
|
}
|
713
719
|
end
|
714
720
|
|
@@ -37,8 +37,8 @@ class RangeGeneratorTest < Test::Unit::TestCase
|
|
37
37
|
@warn_message_regexp = /ignored them/
|
38
38
|
end
|
39
39
|
|
40
|
-
def
|
41
|
-
expected_to = Date.today
|
40
|
+
def test_range_only_present
|
41
|
+
expected_to = Date.today
|
42
42
|
expected = (@from..expected_to).to_a.map{|date| date.to_s}
|
43
43
|
|
44
44
|
stub(Embulk.logger).warn(@warn_message_regexp)
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-input-mixpanel
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.5.
|
4
|
+
version: 0.5.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- yoshihara
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date:
|
12
|
+
date: 2017-07-26 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|