embulk-input-mixpanel 0.5.13 → 0.5.14
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/embulk-input-mixpanel.gemspec +1 -1
- data/lib/embulk/input/mixpanel.rb +26 -13
- data/test/embulk/input/test_mixpanel.rb +1 -1
- data/test/test_range_generator.rb +1 -2
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: cf14d1cd9b35cd1dbde2a915a31fa71b7c321fc3
|
4
|
+
data.tar.gz: 3bd9b09fd763e0b42537f88879146606dbd92d1d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 5c1b12301829f2d451013c0883a06d442fad5321c1b497d7d8761ffd6bd7313ada5fa2c2ede54752a56b2e17f0f324741a449831deb4112625d0200ba2448fd6
|
7
|
+
data.tar.gz: d7fd325a8a6f70c0ff61330f5f3f0f1733242f20f2c5349bd8491ebcfab9c78bbf1a6eb0e870027b5b9ffe61bb15f7fdc073dc979b30278853a9c3ade7894be3
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,7 @@
|
|
1
|
+
## 0.5.14 - 2018-10-22
|
2
|
+
|
3
|
+
* [enhancement] Handle the wrong period during transition from standard to daylight saving time exception [#61](https://github.com/treasure-data/embulk-input-mixpanel/pull/61)
|
4
|
+
|
1
5
|
## 0.5.13 - 2018-10-04
|
2
6
|
|
3
7
|
* [enhancement] Limit number of returned records in guess and preview [#60](https://github.com/treasure-data/embulk-input-mixpanel/pull/60)
|
@@ -180,7 +180,13 @@ module Embulk
|
|
180
180
|
prev_latest_fetched_time_format = Time.at(prev_latest_fetched_time).strftime("%F %T %z")
|
181
181
|
current_latest_fetched_time = prev_latest_fetched_time
|
182
182
|
@dates.each_slice(task[:slice_range]) do |slice_dates|
|
183
|
-
|
183
|
+
ignored_fetched_record_count = 0
|
184
|
+
# There is the issue with Mixpanel time field during the transition from standard to daylight saving time
|
185
|
+
# in the US timezone i.e. 11 Mar 2018 2AM - 2:59AM, time within that period must not be existed,
|
186
|
+
# due to daylight saving, time will be forwarded 1 hour from 2AM to 3AM.
|
187
|
+
#
|
188
|
+
# All of records with wrong timezone will be ignored instead of throw exception out
|
189
|
+
ignored_wrong_daylight_tz_record_count = 0
|
184
190
|
unless preview?
|
185
191
|
Embulk.logger.info "Fetching data from #{slice_dates.first} to #{slice_dates.last} ..."
|
186
192
|
end
|
@@ -194,7 +200,7 @@ module Embulk
|
|
194
200
|
record_time = record["properties"][record_time_column]
|
195
201
|
if @incremental_column.nil?
|
196
202
|
if record_time <= prev_latest_fetched_time
|
197
|
-
|
203
|
+
ignored_fetched_record_count += 1
|
198
204
|
next
|
199
205
|
end
|
200
206
|
end
|
@@ -204,15 +210,19 @@ module Embulk
|
|
204
210
|
record_time,
|
205
211
|
].max
|
206
212
|
end
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
213
|
+
begin
|
214
|
+
values = extract_values(record)
|
215
|
+
if @fetch_unknown_columns
|
216
|
+
unknown_values = extract_unknown_values(record)
|
217
|
+
values << unknown_values.to_json
|
218
|
+
end
|
219
|
+
if task[:fetch_custom_properties]
|
220
|
+
values << collect_custom_properties(record)
|
221
|
+
end
|
222
|
+
page_builder.add(values)
|
223
|
+
rescue TZInfo::PeriodNotFound
|
224
|
+
ignored_wrong_daylight_tz_record_count += 1
|
214
225
|
end
|
215
|
-
page_builder.add(values)
|
216
226
|
end
|
217
227
|
rescue MixpanelApi::IncompleteExportResponseError
|
218
228
|
if !task[:allow_partial_import]
|
@@ -220,8 +230,11 @@ module Embulk
|
|
220
230
|
raise
|
221
231
|
end
|
222
232
|
end
|
223
|
-
if
|
224
|
-
Embulk.logger.warn "Skipped already loaded #{
|
233
|
+
if ignored_fetched_record_count > 0
|
234
|
+
Embulk.logger.warn "Skipped already loaded #{ignored_fetched_record_count} records. These record times are older or equal than previous fetched record time (#{prev_latest_fetched_time} @ #{prev_latest_fetched_time_format})."
|
235
|
+
end
|
236
|
+
if ignored_wrong_daylight_tz_record_count > 0
|
237
|
+
Embulk.logger.warn "Skipped #{ignored_wrong_daylight_tz_record_count} records due to corrupted Mixpanel time transition from standard to daylight saving"
|
225
238
|
end
|
226
239
|
break if preview?
|
227
240
|
end
|
@@ -314,7 +327,7 @@ module Embulk
|
|
314
327
|
# Adjust timezone offset to get UTC time
|
315
328
|
# c.f. https://mixpanel.com/docs/api-documentation/exporting-raw-data-you-inserted-into-mixpanel#export
|
316
329
|
tz = TZInfo::Timezone.get(@timezone)
|
317
|
-
offset = tz.period_for_local(epoch, true).offset.
|
330
|
+
offset = tz.period_for_local(epoch, true).offset.utc_total_offset
|
318
331
|
epoch - offset
|
319
332
|
end
|
320
333
|
|
@@ -798,7 +798,7 @@ module Embulk
|
|
798
798
|
|
799
799
|
time = properties["time"]
|
800
800
|
tz = TZInfo::Timezone.get(TIMEZONE)
|
801
|
-
offset = tz.period_for_local(time, true).offset.
|
801
|
+
offset = tz.period_for_local(time, true).offset.utc_total_offset
|
802
802
|
adjusted_time = time - offset
|
803
803
|
|
804
804
|
added = [
|
@@ -5,7 +5,6 @@ require "active_support/core_ext/time"
|
|
5
5
|
class RangeGeneratorTest < Test::Unit::TestCase
|
6
6
|
include OverrideAssertRaise
|
7
7
|
DEFAULT_TIMEZONE = "America/Chicago"
|
8
|
-
DEFAULT_LOCAL = ActiveSupport::TimeZone["UTC"]
|
9
8
|
class GenerateRangeTest < self
|
10
9
|
data do
|
11
10
|
{
|
@@ -114,7 +113,7 @@ class RangeGeneratorTest < Test::Unit::TestCase
|
|
114
113
|
RangeGenerator.new(from_date_str, fetch_days, DEFAULT_TIMEZONE).generate_range
|
115
114
|
end
|
116
115
|
def today
|
117
|
-
|
116
|
+
ActiveSupport::TimeZone[DEFAULT_TIMEZONE].today
|
118
117
|
end
|
119
118
|
end
|
120
119
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-input-mixpanel
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.5.
|
4
|
+
version: 0.5.14
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- yoshihara
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2018-10-
|
12
|
+
date: 2018-10-22 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|