embulk-input-mixpanel 0.5.9 → 0.5.10

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 71b5e68b20fb92e74d3a3210ee1700f90bdf26bc
4
- data.tar.gz: b29ffe1fdf6f9f759cb6d0790e1448da6fd34ec2
3
+ metadata.gz: 2db94c13e223960ee53abf712889a290728870d4
4
+ data.tar.gz: bae22b3a844dd7b9a3e0aa61cbb7c2c06846137f
5
5
  SHA512:
6
- metadata.gz: 527ebe18c1cf8a7e6f8ab553f4b272f1719c09c8df4f2a3530f2ba267b7d633056cb6641b9681117f4fb2497a1cf4265ede7cce75b5f6e947e24b5333b55fd5d
7
- data.tar.gz: 279e5750a346ed4b04c104418d56b7b7e7427d5a580ad1f7481e124b5ba893d4c370eb27577686db9fa11f6d1f18d513f22504441353a4547082356163a5d1be
6
+ metadata.gz: 72410d007e0d2035ecbeb1827d226c127fe62434f0e94cf3aa683ca4c1c6ba13c9107a1fdbc03d74dae00da7356034787f3da32417539a26efe0cf42f845278f
7
+ data.tar.gz: 65c4d423bd208e80a9d72f17d6289cf2369c7b740ecbd7397fafa219672b48ff7bdd59a58953051a4919645bc8f2963c64445edad25ea29eb8b2e490130f491f
@@ -1,3 +1,7 @@
1
+ ## 0.5.10 - 2017-12-03
2
+
3
+ * [enhancement] Add logic to detect error from Mixpanel when doing import, add option to failed embulk job if encounter error import [#57](https://github.com/treasure-data/embulk-input-mixpanel/pull/57)
4
+
1
5
  ## 0.5.9 - 2017-11-10
2
6
 
3
7
  * [enhancement] Add upper limit delay to incremental column query [#56](https://github.com/treasure-data/embulk-input-mixpanel/pull/56)
data/README.md CHANGED
@@ -42,6 +42,8 @@ To get it, you should log in mixpanel website, and click gear icon at the lower
42
42
  - **incremental_column**: Column to be add to where query as a constraint for incremental time. Only data that have incremental_column timestamp > than previous latest_fetched_time will be return (string, optional, default: nil)
43
43
  - **back_fill_time**: Amount of time that will be subtracted from `from_date` to calculate the final `from_date` that will be use for API Request. This is due to Mixpanel caching data on user devices before sending it to Mixpanel server (integer, optional, default: 5)
44
44
  - NOTE: Only have effect when incremental is true and incremental_column is specified
45
+ - **incremental_column_upper_limit_delay_in_seconds**: When query with incremental column, plugin will lock the upper limit of incremental column query with the job start time, in order to avoid issue with data that commit when the job is running
46
+ ex: `where mp_processing_time <= job_start_time`. The upper limit will be calculated by using job_start_time minus with this configuration parameter. This is to support case when Mixpanel have delay in their processing (integer, optional, default: 0)
45
47
  - **fetch_unknown_columns**(deprecated): If you want this plugin fetches unknown (unconfigured in config) columns (boolean, optional, default: false)
46
48
  - NOTE: If true, `unknown_columns` column is created and added unknown columns' data.
47
49
  - **fetch_custom_properties**: All custom properties into `custom_properties` key. "custom properties" are not desribed Mixpanel document [1](https://mixpanel.com/help/questions/articles/special-or-reserved-properties), [2](https://mixpanel.com/help/questions/articles/what-properties-do-mixpanels-libraries-store-by-default). (boolean, optional, default: true)
@@ -51,6 +53,7 @@ To get it, you should log in mixpanel website, and click gear icon at the lower
51
53
  - **bucket**:The data backet to filter data (string, optional, default: nil)
52
54
  - **retry_initial_wait_sec** Wait seconds for exponential backoff initial value (integer, default: 1)
53
55
  - **retry_limit**: Try to retry this times (integer, default: 5)
56
+ - **allow_partial_import**: Allow plugin to skip errored import (boolean, default: true)
54
57
 
55
58
  ### `fetch_unknown_columns` and `fetch_custom_properties`
56
59
 
@@ -1,6 +1,6 @@
1
1
  Gem::Specification.new do |spec|
2
2
  spec.name = "embulk-input-mixpanel"
3
- spec.version = "0.5.9"
3
+ spec.version = "0.5.10"
4
4
  spec.authors = ["yoshihara", "uu59"]
5
5
  spec.summary = "Mixpanel input plugin for Embulk"
6
6
  spec.description = "Loads records from Mixpanel."
@@ -1,6 +1,7 @@
1
1
  require "tzinfo"
2
2
  require "perfect_retry"
3
3
  require "embulk/input/mixpanel_api/client"
4
+ require "embulk/input/mixpanel_api/exceptions"
4
5
  require "range_generator"
5
6
  require "timezone_validator"
6
7
 
@@ -71,7 +72,8 @@ module Embulk
71
72
  incremental: incremental,
72
73
  slice_range: config.param(:slice_range, :integer, default: 7),
73
74
  job_start_time: job_start_time,
74
- incremental_column_upper_limit: incremental_column_upper_limit
75
+ incremental_column_upper_limit: incremental_column_upper_limit,
76
+ allow_partial_import: config.param(:allow_partial_import,:bool, default: true)
75
77
  }
76
78
 
77
79
  if !incremental_column.nil? && !latest_fetched_time.nil? && (incremental_column_upper_limit <= latest_fetched_time)
@@ -144,7 +146,7 @@ module Embulk
144
146
  PerfectRetry.new do |config|
145
147
  config.limit = task[:retry_limit]
146
148
  config.sleep = proc{|n| task[:retry_initial_wait_sec] * (2 * (n - 1)) }
147
- config.dont_rescues = [Embulk::ConfigError]
149
+ config.dont_rescues = [Embulk::ConfigError,MixpanelApi::IncompleteExportResponseError]
148
150
  config.rescues = [RuntimeError]
149
151
  config.log_level = nil
150
152
  config.logger = Embulk.logger
@@ -175,33 +177,40 @@ module Embulk
175
177
  Embulk.logger.info "Fetching data from #{slice_dates.first} to #{slice_dates.last} ..."
176
178
  end
177
179
  record_time_column=@incremental_column || DEFAULT_TIME_COLUMN
178
- fetch(slice_dates, prev_latest_fetched_time).each do |record|
179
- if @incremental
180
- if !record["properties"].include?(record_time_column)
181
- raise Embulk::ConfigError.new("Incremental column not exists in fetched data #{record_time_column}")
182
- end
183
- record_time = record["properties"][record_time_column]
184
- if @incremental_column.nil?
185
- if record_time <= prev_latest_fetched_time
186
- ignored_record_count += 1
187
- next
180
+ begin
181
+ fetch(slice_dates, prev_latest_fetched_time).each do |record|
182
+ if @incremental
183
+ if !record["properties"].include?(record_time_column)
184
+ raise Embulk::ConfigError.new("Incremental column not exists in fetched data #{record_time_column}")
185
+ end
186
+ record_time = record["properties"][record_time_column]
187
+ if @incremental_column.nil?
188
+ if record_time <= prev_latest_fetched_time
189
+ ignored_record_count += 1
190
+ next
191
+ end
188
192
  end
189
- end
190
193
 
191
- current_latest_fetched_time= [
192
- current_latest_fetched_time,
193
- record_time,
194
- ].max
195
- end
196
- values = extract_values(record)
197
- if @fetch_unknown_columns
198
- unknown_values = extract_unknown_values(record)
199
- values << unknown_values.to_json
194
+ current_latest_fetched_time= [
195
+ current_latest_fetched_time,
196
+ record_time,
197
+ ].max
198
+ end
199
+ values = extract_values(record)
200
+ if @fetch_unknown_columns
201
+ unknown_values = extract_unknown_values(record)
202
+ values << unknown_values.to_json
203
+ end
204
+ if task[:fetch_custom_properties]
205
+ values << collect_custom_properties(record)
206
+ end
207
+ page_builder.add(values)
200
208
  end
201
- if task[:fetch_custom_properties]
202
- values << collect_custom_properties(record)
209
+ rescue MixpanelApi::IncompleteExportResponseError
210
+ if !task[:allow_partial_import]
211
+ # re raise the exception if we don't allow partial import
212
+ raise
203
213
  end
204
- page_builder.add(values)
205
214
  end
206
215
  if ignored_record_count > 0
207
216
  Embulk.logger.warn "Skipped already loaded #{ignored_record_count} records. These record times are older or equal than previous fetched record time (#{prev_latest_fetched_time} @ #{prev_latest_fetched_time_format})."
@@ -2,6 +2,7 @@ require "uri"
2
2
  require "digest/md5"
3
3
  require "json"
4
4
  require "httpclient"
5
+ require "embulk/input/mixpanel_api/exceptions"
5
6
 
6
7
  module Embulk
7
8
  module Input
@@ -118,6 +119,11 @@ module Embulk
118
119
  end
119
120
  end
120
121
  handle_error(response, error_response)
122
+ if !buf.empty?
123
+ # buffer is not empty mean the last json line is incomplete
124
+ Embulk.logger.error "Received incomplete data from Mixpanel, #{buf}"
125
+ raise MixpanelApi::IncompleteExportResponseError.new("Incomplete data received")
126
+ end
121
127
  end
122
128
 
123
129
  def request_small_dataset(params, range)
@@ -0,0 +1,7 @@
1
+ module Embulk
2
+ module Input
3
+ module MixpanelApi
4
+ class IncompleteExportResponseError < StandardError; end
5
+ end
6
+ end
7
+ end
@@ -1,4 +1,5 @@
1
1
  require "embulk/input/mixpanel_api/client"
2
+ require "embulk/input/mixpanel_api/exceptions"
2
3
  require "override_assert_raise"
3
4
 
4
5
  module Embulk
@@ -87,6 +88,33 @@ module Embulk
87
88
  assert_equal(dummy_responses, records)
88
89
  end
89
90
 
91
+ def test_export_partial_with_export_terminated_early
92
+ stub_client
93
+ stub(@client).set_signatures(anything) {}
94
+ stub_response(Struct.new(:code, :body).new(200, jsonl_dummy_responses+"\nexport terminated early"))
95
+
96
+ records = []
97
+ assert_raise MixpanelApi::IncompleteExportResponseError do
98
+ @client.export(params) do |record|
99
+ records << record
100
+ end
101
+ end
102
+ assert_equal(dummy_responses, records)
103
+ end
104
+
105
+ def test_export_partial_with_error_json
106
+ stub_client
107
+ stub(@client).set_signatures(anything) {}
108
+ stub_response(Struct.new(:code, :body).new(200, jsonl_dummy_responses+"\n{\"error\":"))
109
+ records = []
110
+ assert_raise MixpanelApi::IncompleteExportResponseError do
111
+ @client.export(params) do |record|
112
+ records << record
113
+ end
114
+ end
115
+ assert_equal(dummy_responses, records)
116
+ end
117
+
90
118
  def test_failure_with_400
91
119
  stub_client
92
120
  stub_response(failure_response(400))
@@ -595,7 +595,32 @@ module Embulk
595
595
 
596
596
  @plugin.run
597
597
  end
598
+ class PartialRunTest < self
599
+ def setup_client
600
+ any_instance_of(MixpanelApi::Client) do |klass|
601
+ stub(klass).request { raise MixpanelApi::IncompleteExportResponseError.new("Incomplete data received") }
602
+ end
603
+ end
604
+ def setup
605
+ setup_client()
606
+ @page_builder = Object.new
607
+ end
608
+
609
+ def test_run_with_allow_partial_false
610
+ @plugin = Mixpanel.new(task.merge(allow_partial_import: false), nil, nil, @page_builder)
611
+ stub(@plugin).preview? {false}
612
+ assert_raise MixpanelApi::IncompleteExportResponseError do
613
+ @plugin.run
614
+ end
615
+ end
598
616
 
617
+ def test_run_with_allow_partial_true
618
+ @plugin = Mixpanel.new(task.merge(allow_partial_import: true), nil, nil, @page_builder)
619
+ mock(@page_builder).finish
620
+ stub(@plugin).preview? {false}
621
+ @plugin.run
622
+ end
623
+ end
599
624
  class SliceRangeRunTest < self
600
625
 
601
626
  def test_default_slice_range
@@ -725,7 +750,6 @@ module Embulk
725
750
 
726
751
  @plugin.run
727
752
  end
728
-
729
753
  private
730
754
 
731
755
  def task
@@ -834,7 +858,8 @@ module Embulk
834
858
  latest_fetched_time: 0,
835
859
  slice_range: 7,
836
860
  job_start_time: JOB_START_TIME,
837
- incremental_column_upper_limit: (JOB_START_TIME - UPPER_LIMIT_DELAY * 1000)
861
+ incremental_column_upper_limit: (JOB_START_TIME - UPPER_LIMIT_DELAY * 1000),
862
+ allow_partial_import: true
838
863
  }
839
864
  end
840
865
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-input-mixpanel
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.9
4
+ version: 0.5.10
5
5
  platform: ruby
6
6
  authors:
7
7
  - yoshihara
@@ -9,80 +9,79 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2017-11-10 00:00:00.000000000 Z
12
+ date: 2017-12-03 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
- name: httpclient
16
15
  requirement: !ruby/object:Gem::Requirement
17
16
  requirements:
18
17
  - - ">="
19
18
  - !ruby/object:Gem::Version
20
19
  version: 2.8.3
20
+ name: httpclient
21
+ prerelease: false
22
+ type: :runtime
21
23
  version_requirements: !ruby/object:Gem::Requirement
22
24
  requirements:
23
25
  - - ">="
24
26
  - !ruby/object:Gem::Version
25
27
  version: 2.8.3
26
- prerelease: false
27
- type: :runtime
28
28
  - !ruby/object:Gem::Dependency
29
- name: tzinfo
30
29
  requirement: !ruby/object:Gem::Requirement
31
30
  requirements:
32
31
  - - ">="
33
32
  - !ruby/object:Gem::Version
34
33
  version: '0'
34
+ name: tzinfo
35
+ prerelease: false
36
+ type: :runtime
35
37
  version_requirements: !ruby/object:Gem::Requirement
36
38
  requirements:
37
39
  - - ">="
38
40
  - !ruby/object:Gem::Version
39
41
  version: '0'
40
- prerelease: false
41
- type: :runtime
42
42
  - !ruby/object:Gem::Dependency
43
- name: perfect_retry
44
43
  requirement: !ruby/object:Gem::Requirement
45
44
  requirements:
46
45
  - - "~>"
47
46
  - !ruby/object:Gem::Version
48
47
  version: '0.5'
48
+ name: perfect_retry
49
+ prerelease: false
50
+ type: :runtime
49
51
  version_requirements: !ruby/object:Gem::Requirement
50
52
  requirements:
51
53
  - - "~>"
52
54
  - !ruby/object:Gem::Version
53
55
  version: '0.5'
54
- prerelease: false
55
- type: :runtime
56
56
  - !ruby/object:Gem::Dependency
57
- name: bundler
58
57
  requirement: !ruby/object:Gem::Requirement
59
58
  requirements:
60
59
  - - "~>"
61
60
  - !ruby/object:Gem::Version
62
61
  version: '1.0'
62
+ name: bundler
63
+ prerelease: false
64
+ type: :development
63
65
  version_requirements: !ruby/object:Gem::Requirement
64
66
  requirements:
65
67
  - - "~>"
66
68
  - !ruby/object:Gem::Version
67
69
  version: '1.0'
68
- prerelease: false
69
- type: :development
70
70
  - !ruby/object:Gem::Dependency
71
- name: rake
72
71
  requirement: !ruby/object:Gem::Requirement
73
72
  requirements:
74
73
  - - ">="
75
74
  - !ruby/object:Gem::Version
76
75
  version: '10.0'
76
+ name: rake
77
+ prerelease: false
78
+ type: :development
77
79
  version_requirements: !ruby/object:Gem::Requirement
78
80
  requirements:
79
81
  - - ">="
80
82
  - !ruby/object:Gem::Version
81
83
  version: '10.0'
82
- prerelease: false
83
- type: :development
84
84
  - !ruby/object:Gem::Dependency
85
- name: embulk
86
85
  requirement: !ruby/object:Gem::Requirement
87
86
  requirements:
88
87
  - - ">="
@@ -91,6 +90,9 @@ dependencies:
91
90
  - - "<"
92
91
  - !ruby/object:Gem::Version
93
92
  version: '1.0'
93
+ name: embulk
94
+ prerelease: false
95
+ type: :development
94
96
  version_requirements: !ruby/object:Gem::Requirement
95
97
  requirements:
96
98
  - - ">="
@@ -99,78 +101,76 @@ dependencies:
99
101
  - - "<"
100
102
  - !ruby/object:Gem::Version
101
103
  version: '1.0'
102
- prerelease: false
103
- type: :development
104
104
  - !ruby/object:Gem::Dependency
105
- name: pry
106
105
  requirement: !ruby/object:Gem::Requirement
107
106
  requirements:
108
107
  - - ">="
109
108
  - !ruby/object:Gem::Version
110
109
  version: '0'
110
+ name: pry
111
+ prerelease: false
112
+ type: :development
111
113
  version_requirements: !ruby/object:Gem::Requirement
112
114
  requirements:
113
115
  - - ">="
114
116
  - !ruby/object:Gem::Version
115
117
  version: '0'
116
- prerelease: false
117
- type: :development
118
118
  - !ruby/object:Gem::Dependency
119
- name: test-unit
120
119
  requirement: !ruby/object:Gem::Requirement
121
120
  requirements:
122
121
  - - ">="
123
122
  - !ruby/object:Gem::Version
124
123
  version: '0'
124
+ name: test-unit
125
+ prerelease: false
126
+ type: :development
125
127
  version_requirements: !ruby/object:Gem::Requirement
126
128
  requirements:
127
129
  - - ">="
128
130
  - !ruby/object:Gem::Version
129
131
  version: '0'
130
- prerelease: false
131
- type: :development
132
132
  - !ruby/object:Gem::Dependency
133
- name: test-unit-rr
134
133
  requirement: !ruby/object:Gem::Requirement
135
134
  requirements:
136
135
  - - ">="
137
136
  - !ruby/object:Gem::Version
138
137
  version: '0'
138
+ name: test-unit-rr
139
+ prerelease: false
140
+ type: :development
139
141
  version_requirements: !ruby/object:Gem::Requirement
140
142
  requirements:
141
143
  - - ">="
142
144
  - !ruby/object:Gem::Version
143
145
  version: '0'
144
- prerelease: false
145
- type: :development
146
146
  - !ruby/object:Gem::Dependency
147
- name: codeclimate-test-reporter
148
147
  requirement: !ruby/object:Gem::Requirement
149
148
  requirements:
150
149
  - - "~>"
151
150
  - !ruby/object:Gem::Version
152
151
  version: '0.5'
152
+ name: codeclimate-test-reporter
153
+ prerelease: false
154
+ type: :development
153
155
  version_requirements: !ruby/object:Gem::Requirement
154
156
  requirements:
155
157
  - - "~>"
156
158
  - !ruby/object:Gem::Version
157
159
  version: '0.5'
158
- prerelease: false
159
- type: :development
160
160
  - !ruby/object:Gem::Dependency
161
- name: everyleaf-embulk_helper
162
161
  requirement: !ruby/object:Gem::Requirement
163
162
  requirements:
164
163
  - - ">="
165
164
  - !ruby/object:Gem::Version
166
165
  version: '0'
166
+ name: everyleaf-embulk_helper
167
+ prerelease: false
168
+ type: :development
167
169
  version_requirements: !ruby/object:Gem::Requirement
168
170
  requirements:
169
171
  - - ">="
170
172
  - !ruby/object:Gem::Version
171
173
  version: '0'
172
- prerelease: false
173
- type: :development
174
174
  description: Loads records from Mixpanel.
175
175
  email:
176
176
  - h.yoshihara@everyleaf.com
@@ -195,6 +195,7 @@ files:
195
195
  - gemfiles/template.erb
196
196
  - lib/embulk/input/mixpanel.rb
197
197
  - lib/embulk/input/mixpanel_api/client.rb
198
+ - lib/embulk/input/mixpanel_api/exceptions.rb
198
199
  - lib/range_generator.rb
199
200
  - lib/timezone_validator.rb
200
201
  - test/embulk/input/mixpanel_api/test_client.rb