embulk-input-mixpanel 0.5.1 → 0.5.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: a00fda7cfbd7a835af6407887de6be6680178752
4
- data.tar.gz: a736976f532f0395b32031b12f86ac13982468a0
3
+ metadata.gz: 8534316e5eae7127b70afc10d9b247e872643c32
4
+ data.tar.gz: e629adeb4d42e6386564bd13cfba3bce107ecd9c
5
5
  SHA512:
6
- metadata.gz: cefb098dc10c716f228385ba644d1743b911b140cc96148188de0c1ab7ac8000ac262ae4a510949eb7de30f37d0219119438c90d19fd701d34c860bd367d507d
7
- data.tar.gz: 96e16695df615dacd4d59c563755c73de889e904a10a1dc29248c97a3ed5e40933535f5cf0bebeb7702bd56aefb84d904aed5b151ea4c5aad365c24c8e201438
6
+ metadata.gz: 70287307438223546321af68362fc63fd56f592eb09d17d543d53986990109bb53b285a8467500617f7a8b99ab4991b6850a7b53c0df241282ee2259f5a8808d
7
+ data.tar.gz: f8e370285d393fada617ccc2d4dcb9d5cd54396cf47757a9af6f425bdfb83ac77c6354127171a6859939367698cd9b23e0915b8c954e557533140dcc1ea1525c
data/CHANGELOG.md CHANGED
@@ -1,3 +1,7 @@
1
+ ## 0.5.2 - 2017-07-26
2
+ * [enhancement]Enable realtime data export[#47](https://github.com/treasure-data/embulk-input-mixpanel/pull/47)
3
+ * [maintenance]Fix incorrect error message[#49](https://github.com/treasure-data/embulk-input-mixpanel/pull/49)
4
+
1
5
  ## 0.5.1 - 2016-12-13
2
6
  * Enable TCP Keepalive to protect from NAT [#48](https://github.com/treasure-data/embulk-input-mixpanel/pull/48)
3
7
 
@@ -1,7 +1,7 @@
1
1
 
2
2
  Gem::Specification.new do |spec|
3
3
  spec.name = "embulk-input-mixpanel"
4
- spec.version = "0.5.1"
4
+ spec.version = "0.5.2"
5
5
  spec.authors = ["yoshihara", "uu59"]
6
6
  spec.summary = "Mixpanel input plugin for Embulk"
7
7
  spec.description = "Loads records from Mixpanel."
@@ -51,6 +51,7 @@ module Embulk
51
51
  fetch_custom_properties: config.param(:fetch_custom_properties, :bool, default: true),
52
52
  retry_initial_wait_sec: config.param(:retry_initial_wait_sec, :integer, default: 1),
53
53
  retry_limit: config.param(:retry_limit, :integer, default: 5),
54
+ latest_fetched_time: config.param(:latest_fetched_time, :integer, default: 0),
54
55
  }
55
56
 
56
57
  if task[:fetch_unknown_columns] && task[:fetch_custom_properties]
@@ -82,9 +83,12 @@ module Embulk
82
83
  # NOTE: If this plugin supports to run by multi threads, this
83
84
  # implementation is terrible.
84
85
  task_report = task_reports.first
85
- next_to_date = Date.parse(task_report[:to_date]).next
86
+ next_to_date = Date.parse(task_report[:to_date])
86
87
 
87
- next_config_diff = {from_date: next_to_date.to_s}
88
+ next_config_diff = {
89
+ from_date: next_to_date.to_s,
90
+ latest_fetched_time: task_report[:latest_fetched_time],
91
+ }
88
92
  return next_config_diff
89
93
  end
90
94
 
@@ -133,13 +137,28 @@ module Embulk
133
137
 
134
138
  def run
135
139
  self.class.giveup_when_mixpanel_is_down
140
+ prev_latest_fetched_time = task[:latest_fetched_time] || 0
141
+ prev_latest_fetched_time_format = Time.at(prev_latest_fetched_time).strftime("%F %T %z")
142
+ current_latest_fetched_time = prev_latest_fetched_time
136
143
 
137
144
  @dates.each_slice(SLICE_DAYS_COUNT) do |dates|
145
+ ignored_record_count = 0
138
146
  unless preview?
139
147
  Embulk.logger.info "Fetching data from #{dates.first} to #{dates.last} ..."
140
148
  end
141
149
 
142
150
  fetch(dates).each do |record|
151
+ record_time = record["properties"]["time"]
152
+ if record_time <= prev_latest_fetched_time
153
+ ignored_record_count += 1
154
+ next
155
+ end
156
+
157
+ current_latest_fetched_time= [
158
+ current_latest_fetched_time,
159
+ record_time,
160
+ ].max
161
+
143
162
  values = extract_values(record)
144
163
  if @fetch_unknown_columns
145
164
  unknown_values = extract_unknown_values(record)
@@ -151,12 +170,18 @@ module Embulk
151
170
  page_builder.add(values)
152
171
  end
153
172
 
173
+ if ignored_record_count > 0
174
+ Embulk.logger.warn "Skipped already loaded #{ignored_record_count} records. These record times are older or equal than previous fetched record time (#{prev_latest_fetched_time} @ #{prev_latest_fetched_time_format})."
175
+ end
154
176
  break if preview?
155
177
  end
156
178
 
157
179
  page_builder.finish
158
180
 
159
- task_report = {to_date: @dates.last || (Date.today - 1)}
181
+ task_report = {
182
+ latest_fetched_time: current_latest_fetched_time,
183
+ to_date: @dates.last || Date.today - 1,
184
+ }
160
185
  return task_report
161
186
  end
162
187
 
@@ -100,18 +100,24 @@ module Embulk
100
100
  set_signatures(params)
101
101
 
102
102
  buf = ""
103
- response = httpclient.get(ENDPOINT_EXPORT, params) do |chunk|
104
- chunk.each_line do |line|
105
- begin
106
- record = JSON.parse(buf + line)
107
- block.call record
108
- buf = ""
109
- rescue JSON::ParserError => e
110
- buf << line
103
+ error_response = ''
104
+ response = httpclient.get(ENDPOINT_EXPORT, params) do |response, chunk|
105
+ # Only process data if response status is 200..299
106
+ if response.status/100 == 2
107
+ chunk.each_line do |line|
108
+ begin
109
+ record = JSON.parse(buf + line)
110
+ block.call record
111
+ buf = ""
112
+ rescue JSON::ParserError => e
113
+ buf << line
114
+ end
111
115
  end
116
+ else
117
+ error_response << chunk
112
118
  end
113
119
  end
114
- handle_error(response)
120
+ handle_error(response, error_response)
115
121
  end
116
122
 
117
123
  def request_small_dataset(params, range)
@@ -123,21 +129,21 @@ module Embulk
123
129
  # cannot satisfied requested Range, get full body
124
130
  res = httpclient.get(ENDPOINT_EXPORT, params)
125
131
  end
126
- handle_error(res)
132
+ handle_error(res,res.body)
127
133
  response_to_enum(res.body)
128
134
  end
129
135
 
130
- def handle_error(response)
136
+ def handle_error(response, error_response)
131
137
  Embulk.logger.debug "response code: #{response.code}"
132
138
  case response.code
133
139
  when 400..499
134
140
  if response.code == 429
135
141
  # [429] {"error": "too many export requests in progress for this project"}
136
- raise RuntimeError.new("[#{response.code}] #{response.body} (will retry)")
142
+ raise RuntimeError.new("[#{response.code}] #{error_response} (will retry)")
137
143
  end
138
- raise ConfigError.new("[#{response.code}] #{response.body}")
144
+ raise ConfigError.new("[#{response.code}] #{error_response}")
139
145
  when 500..599
140
- raise RuntimeError.new("[#{response.code}] #{response.body}")
146
+ raise RuntimeError.new("[#{response.code}] #{error_response}")
141
147
  end
142
148
  end
143
149
 
@@ -9,7 +9,7 @@ class RangeGenerator
9
9
  def generate_range
10
10
  validate
11
11
  show_warnings
12
- range_only_past.map{|date| date.to_s}
12
+ range_only_present.map{|date| date.to_s}
13
13
  end
14
14
 
15
15
  private
@@ -49,12 +49,12 @@ class RangeGenerator
49
49
  if fetch_days
50
50
  from_date..(from_date + fetch_days - 1)
51
51
  else
52
- from_date..yesterday
52
+ from_date..today
53
53
  end
54
54
  end
55
55
 
56
- def range_only_past
57
- range.find_all{|date| date < today}
56
+ def range_only_present
57
+ range.find_all{|date| date <= today}
58
58
  end
59
59
 
60
60
  def overdays?
@@ -62,15 +62,11 @@ class RangeGenerator
62
62
  end
63
63
 
64
64
  def overdays
65
- range.to_a - range_only_past.to_a
65
+ range.to_a - range_only_present.to_a
66
66
  end
67
67
 
68
68
  def from_date_too_early?
69
- from_date > yesterday
70
- end
71
-
72
- def yesterday
73
- today - 1
69
+ from_date > today
74
70
  end
75
71
 
76
72
  def today
@@ -72,12 +72,12 @@ module Embulk
72
72
  assert_equal(expected, actual)
73
73
  end
74
74
 
75
- def test_from_date_today
75
+ def test_from_date_future
76
76
  config = {
77
77
  type: "mixpanel",
78
78
  api_key: API_KEY,
79
79
  api_secret: API_SECRET,
80
- from_date: Date.today.to_s,
80
+ from_date: (Date.today + 1).to_s,
81
81
  }
82
82
 
83
83
  stub_export_all
@@ -245,7 +245,7 @@ module Embulk
245
245
  end
246
246
 
247
247
  def target_dates
248
- dates.find_all{|d| d < Date.today}.map {|date| date.to_s}
248
+ dates.find_all{|d| d <= Date.today}.map {|date| date.to_s}
249
249
  end
250
250
 
251
251
  def transaction_config
@@ -375,9 +375,9 @@ module Embulk
375
375
 
376
376
  def test_resume
377
377
  today = Date.today
378
- control = proc { [{to_date: today.to_s}] }
378
+ control = proc { [{to_date: today.to_s, latest_fetched_time: 999}] }
379
379
  actual = Mixpanel.resume(transaction_task, columns, 1, &control)
380
- assert_equal({from_date: today.next.to_s}, actual)
380
+ assert_equal({from_date: today.to_s, latest_fetched_time: 999}, actual)
381
381
  end
382
382
 
383
383
  def control
@@ -504,6 +504,7 @@ module Embulk
504
504
  fetch_custom_properties: false,
505
505
  retry_initial_wait_sec: 0,
506
506
  retry_limit: 3,
507
+ latest_fetched_time: 0,
507
508
  }
508
509
  end
509
510
  end
@@ -564,6 +565,7 @@ module Embulk
564
565
  added = [
565
566
  record["event"],
566
567
  record["properties"]["$specified"],
568
+ record["properties"]["time"] - 32400, # timezone adjust
567
569
  custom_property_keys.map{|k| {k => record["properties"][k] }}.inject(&:merge)
568
570
  ]
569
571
 
@@ -583,6 +585,7 @@ module Embulk
583
585
  {
584
586
  "event" => "EV",
585
587
  "properties" => {
588
+ "time" => 1000000,
586
589
  "$os" => "Android",
587
590
  "$specified" => "foo",
588
591
  "$foobar" => "foobar",
@@ -594,6 +597,7 @@ module Embulk
594
597
  [
595
598
  {"name" => "event", "type" => "string"},
596
599
  {"name" => "$specified", "type" => "string"},
600
+ {"name" => "time", "type" => "integer"},
597
601
  ]
598
602
  end
599
603
  end
@@ -674,6 +678,7 @@ module Embulk
674
678
  fetch_custom_properties: false,
675
679
  retry_initial_wait_sec: 2,
676
680
  retry_limit: 3,
681
+ latest_fetched_time: 0,
677
682
  }
678
683
  end
679
684
 
@@ -709,6 +714,7 @@ module Embulk
709
714
  fetch_custom_properties: false,
710
715
  retry_initial_wait_sec: 2,
711
716
  retry_limit: 3,
717
+ latest_fetched_time: 0,
712
718
  }
713
719
  end
714
720
 
@@ -37,8 +37,8 @@ class RangeGeneratorTest < Test::Unit::TestCase
37
37
  @warn_message_regexp = /ignored them/
38
38
  end
39
39
 
40
- def test_range_only_past
41
- expected_to = Date.today - 1
40
+ def test_range_only_present
41
+ expected_to = Date.today
42
42
  expected = (@from..expected_to).to_a.map{|date| date.to_s}
43
43
 
44
44
  stub(Embulk.logger).warn(@warn_message_regexp)
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-input-mixpanel
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.1
4
+ version: 0.5.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - yoshihara
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2016-12-13 00:00:00.000000000 Z
12
+ date: 2017-07-26 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  requirement: !ruby/object:Gem::Requirement