embulk-input-mixpanel 0.5.1 → 0.5.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: a00fda7cfbd7a835af6407887de6be6680178752
4
- data.tar.gz: a736976f532f0395b32031b12f86ac13982468a0
3
+ metadata.gz: 8534316e5eae7127b70afc10d9b247e872643c32
4
+ data.tar.gz: e629adeb4d42e6386564bd13cfba3bce107ecd9c
5
5
  SHA512:
6
- metadata.gz: cefb098dc10c716f228385ba644d1743b911b140cc96148188de0c1ab7ac8000ac262ae4a510949eb7de30f37d0219119438c90d19fd701d34c860bd367d507d
7
- data.tar.gz: 96e16695df615dacd4d59c563755c73de889e904a10a1dc29248c97a3ed5e40933535f5cf0bebeb7702bd56aefb84d904aed5b151ea4c5aad365c24c8e201438
6
+ metadata.gz: 70287307438223546321af68362fc63fd56f592eb09d17d543d53986990109bb53b285a8467500617f7a8b99ab4991b6850a7b53c0df241282ee2259f5a8808d
7
+ data.tar.gz: f8e370285d393fada617ccc2d4dcb9d5cd54396cf47757a9af6f425bdfb83ac77c6354127171a6859939367698cd9b23e0915b8c954e557533140dcc1ea1525c
data/CHANGELOG.md CHANGED
@@ -1,3 +1,7 @@
1
+ ## 0.5.2 - 2017-07-26
2
+ * [enhancement]Enable realtime data export[#47](https://github.com/treasure-data/embulk-input-mixpanel/pull/47)
3
+ * [maintenance]Fix incorrect error message[#49](https://github.com/treasure-data/embulk-input-mixpanel/pull/49)
4
+
1
5
  ## 0.5.1 - 2016-12-13
2
6
  * Enable TCP Keepalive to protect from NAT [#48](https://github.com/treasure-data/embulk-input-mixpanel/pull/48)
3
7
 
@@ -1,7 +1,7 @@
1
1
 
2
2
  Gem::Specification.new do |spec|
3
3
  spec.name = "embulk-input-mixpanel"
4
- spec.version = "0.5.1"
4
+ spec.version = "0.5.2"
5
5
  spec.authors = ["yoshihara", "uu59"]
6
6
  spec.summary = "Mixpanel input plugin for Embulk"
7
7
  spec.description = "Loads records from Mixpanel."
@@ -51,6 +51,7 @@ module Embulk
51
51
  fetch_custom_properties: config.param(:fetch_custom_properties, :bool, default: true),
52
52
  retry_initial_wait_sec: config.param(:retry_initial_wait_sec, :integer, default: 1),
53
53
  retry_limit: config.param(:retry_limit, :integer, default: 5),
54
+ latest_fetched_time: config.param(:latest_fetched_time, :integer, default: 0),
54
55
  }
55
56
 
56
57
  if task[:fetch_unknown_columns] && task[:fetch_custom_properties]
@@ -82,9 +83,12 @@ module Embulk
82
83
  # NOTE: If this plugin supports to run by multi threads, this
83
84
  # implementation is terrible.
84
85
  task_report = task_reports.first
85
- next_to_date = Date.parse(task_report[:to_date]).next
86
+ next_to_date = Date.parse(task_report[:to_date])
86
87
 
87
- next_config_diff = {from_date: next_to_date.to_s}
88
+ next_config_diff = {
89
+ from_date: next_to_date.to_s,
90
+ latest_fetched_time: task_report[:latest_fetched_time],
91
+ }
88
92
  return next_config_diff
89
93
  end
90
94
 
@@ -133,13 +137,28 @@ module Embulk
133
137
 
134
138
  def run
135
139
  self.class.giveup_when_mixpanel_is_down
140
+ prev_latest_fetched_time = task[:latest_fetched_time] || 0
141
+ prev_latest_fetched_time_format = Time.at(prev_latest_fetched_time).strftime("%F %T %z")
142
+ current_latest_fetched_time = prev_latest_fetched_time
136
143
 
137
144
  @dates.each_slice(SLICE_DAYS_COUNT) do |dates|
145
+ ignored_record_count = 0
138
146
  unless preview?
139
147
  Embulk.logger.info "Fetching data from #{dates.first} to #{dates.last} ..."
140
148
  end
141
149
 
142
150
  fetch(dates).each do |record|
151
+ record_time = record["properties"]["time"]
152
+ if record_time <= prev_latest_fetched_time
153
+ ignored_record_count += 1
154
+ next
155
+ end
156
+
157
+ current_latest_fetched_time= [
158
+ current_latest_fetched_time,
159
+ record_time,
160
+ ].max
161
+
143
162
  values = extract_values(record)
144
163
  if @fetch_unknown_columns
145
164
  unknown_values = extract_unknown_values(record)
@@ -151,12 +170,18 @@ module Embulk
151
170
  page_builder.add(values)
152
171
  end
153
172
 
173
+ if ignored_record_count > 0
174
+ Embulk.logger.warn "Skipped already loaded #{ignored_record_count} records. These record times are older or equal than previous fetched record time (#{prev_latest_fetched_time} @ #{prev_latest_fetched_time_format})."
175
+ end
154
176
  break if preview?
155
177
  end
156
178
 
157
179
  page_builder.finish
158
180
 
159
- task_report = {to_date: @dates.last || (Date.today - 1)}
181
+ task_report = {
182
+ latest_fetched_time: current_latest_fetched_time,
183
+ to_date: @dates.last || Date.today - 1,
184
+ }
160
185
  return task_report
161
186
  end
162
187
 
@@ -100,18 +100,24 @@ module Embulk
100
100
  set_signatures(params)
101
101
 
102
102
  buf = ""
103
- response = httpclient.get(ENDPOINT_EXPORT, params) do |chunk|
104
- chunk.each_line do |line|
105
- begin
106
- record = JSON.parse(buf + line)
107
- block.call record
108
- buf = ""
109
- rescue JSON::ParserError => e
110
- buf << line
103
+ error_response = ''
104
+ response = httpclient.get(ENDPOINT_EXPORT, params) do |response, chunk|
105
+ # Only process data if response status is 200..299
106
+ if response.status/100 == 2
107
+ chunk.each_line do |line|
108
+ begin
109
+ record = JSON.parse(buf + line)
110
+ block.call record
111
+ buf = ""
112
+ rescue JSON::ParserError => e
113
+ buf << line
114
+ end
111
115
  end
116
+ else
117
+ error_response << chunk
112
118
  end
113
119
  end
114
- handle_error(response)
120
+ handle_error(response, error_response)
115
121
  end
116
122
 
117
123
  def request_small_dataset(params, range)
@@ -123,21 +129,21 @@ module Embulk
123
129
  # cannot satisfied requested Range, get full body
124
130
  res = httpclient.get(ENDPOINT_EXPORT, params)
125
131
  end
126
- handle_error(res)
132
+ handle_error(res,res.body)
127
133
  response_to_enum(res.body)
128
134
  end
129
135
 
130
- def handle_error(response)
136
+ def handle_error(response, error_response)
131
137
  Embulk.logger.debug "response code: #{response.code}"
132
138
  case response.code
133
139
  when 400..499
134
140
  if response.code == 429
135
141
  # [429] {"error": "too many export requests in progress for this project"}
136
- raise RuntimeError.new("[#{response.code}] #{response.body} (will retry)")
142
+ raise RuntimeError.new("[#{response.code}] #{error_response} (will retry)")
137
143
  end
138
- raise ConfigError.new("[#{response.code}] #{response.body}")
144
+ raise ConfigError.new("[#{response.code}] #{error_response}")
139
145
  when 500..599
140
- raise RuntimeError.new("[#{response.code}] #{response.body}")
146
+ raise RuntimeError.new("[#{response.code}] #{error_response}")
141
147
  end
142
148
  end
143
149
 
@@ -9,7 +9,7 @@ class RangeGenerator
9
9
  def generate_range
10
10
  validate
11
11
  show_warnings
12
- range_only_past.map{|date| date.to_s}
12
+ range_only_present.map{|date| date.to_s}
13
13
  end
14
14
 
15
15
  private
@@ -49,12 +49,12 @@ class RangeGenerator
49
49
  if fetch_days
50
50
  from_date..(from_date + fetch_days - 1)
51
51
  else
52
- from_date..yesterday
52
+ from_date..today
53
53
  end
54
54
  end
55
55
 
56
- def range_only_past
57
- range.find_all{|date| date < today}
56
+ def range_only_present
57
+ range.find_all{|date| date <= today}
58
58
  end
59
59
 
60
60
  def overdays?
@@ -62,15 +62,11 @@ class RangeGenerator
62
62
  end
63
63
 
64
64
  def overdays
65
- range.to_a - range_only_past.to_a
65
+ range.to_a - range_only_present.to_a
66
66
  end
67
67
 
68
68
  def from_date_too_early?
69
- from_date > yesterday
70
- end
71
-
72
- def yesterday
73
- today - 1
69
+ from_date > today
74
70
  end
75
71
 
76
72
  def today
@@ -72,12 +72,12 @@ module Embulk
72
72
  assert_equal(expected, actual)
73
73
  end
74
74
 
75
- def test_from_date_today
75
+ def test_from_date_future
76
76
  config = {
77
77
  type: "mixpanel",
78
78
  api_key: API_KEY,
79
79
  api_secret: API_SECRET,
80
- from_date: Date.today.to_s,
80
+ from_date: (Date.today + 1).to_s,
81
81
  }
82
82
 
83
83
  stub_export_all
@@ -245,7 +245,7 @@ module Embulk
245
245
  end
246
246
 
247
247
  def target_dates
248
- dates.find_all{|d| d < Date.today}.map {|date| date.to_s}
248
+ dates.find_all{|d| d <= Date.today}.map {|date| date.to_s}
249
249
  end
250
250
 
251
251
  def transaction_config
@@ -375,9 +375,9 @@ module Embulk
375
375
 
376
376
  def test_resume
377
377
  today = Date.today
378
- control = proc { [{to_date: today.to_s}] }
378
+ control = proc { [{to_date: today.to_s, latest_fetched_time: 999}] }
379
379
  actual = Mixpanel.resume(transaction_task, columns, 1, &control)
380
- assert_equal({from_date: today.next.to_s}, actual)
380
+ assert_equal({from_date: today.to_s, latest_fetched_time: 999}, actual)
381
381
  end
382
382
 
383
383
  def control
@@ -504,6 +504,7 @@ module Embulk
504
504
  fetch_custom_properties: false,
505
505
  retry_initial_wait_sec: 0,
506
506
  retry_limit: 3,
507
+ latest_fetched_time: 0,
507
508
  }
508
509
  end
509
510
  end
@@ -564,6 +565,7 @@ module Embulk
564
565
  added = [
565
566
  record["event"],
566
567
  record["properties"]["$specified"],
568
+ record["properties"]["time"] - 32400, # timezone adjust
567
569
  custom_property_keys.map{|k| {k => record["properties"][k] }}.inject(&:merge)
568
570
  ]
569
571
 
@@ -583,6 +585,7 @@ module Embulk
583
585
  {
584
586
  "event" => "EV",
585
587
  "properties" => {
588
+ "time" => 1000000,
586
589
  "$os" => "Android",
587
590
  "$specified" => "foo",
588
591
  "$foobar" => "foobar",
@@ -594,6 +597,7 @@ module Embulk
594
597
  [
595
598
  {"name" => "event", "type" => "string"},
596
599
  {"name" => "$specified", "type" => "string"},
600
+ {"name" => "time", "type" => "integer"},
597
601
  ]
598
602
  end
599
603
  end
@@ -674,6 +678,7 @@ module Embulk
674
678
  fetch_custom_properties: false,
675
679
  retry_initial_wait_sec: 2,
676
680
  retry_limit: 3,
681
+ latest_fetched_time: 0,
677
682
  }
678
683
  end
679
684
 
@@ -709,6 +714,7 @@ module Embulk
709
714
  fetch_custom_properties: false,
710
715
  retry_initial_wait_sec: 2,
711
716
  retry_limit: 3,
717
+ latest_fetched_time: 0,
712
718
  }
713
719
  end
714
720
 
@@ -37,8 +37,8 @@ class RangeGeneratorTest < Test::Unit::TestCase
37
37
  @warn_message_regexp = /ignored them/
38
38
  end
39
39
 
40
- def test_range_only_past
41
- expected_to = Date.today - 1
40
+ def test_range_only_present
41
+ expected_to = Date.today
42
42
  expected = (@from..expected_to).to_a.map{|date| date.to_s}
43
43
 
44
44
  stub(Embulk.logger).warn(@warn_message_regexp)
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-input-mixpanel
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.1
4
+ version: 0.5.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - yoshihara
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2016-12-13 00:00:00.000000000 Z
12
+ date: 2017-07-26 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  requirement: !ruby/object:Gem::Requirement