embulk-input-mixpanel 0.5.9 → 0.5.10
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/README.md +3 -0
- data/embulk-input-mixpanel.gemspec +1 -1
- data/lib/embulk/input/mixpanel.rb +34 -25
- data/lib/embulk/input/mixpanel_api/client.rb +6 -0
- data/lib/embulk/input/mixpanel_api/exceptions.rb +7 -0
- data/test/embulk/input/mixpanel_api/test_client.rb +28 -0
- data/test/embulk/input/test_mixpanel.rb +27 -2
- metadata +36 -35
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 2db94c13e223960ee53abf712889a290728870d4
|
4
|
+
data.tar.gz: bae22b3a844dd7b9a3e0aa61cbb7c2c06846137f
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 72410d007e0d2035ecbeb1827d226c127fe62434f0e94cf3aa683ca4c1c6ba13c9107a1fdbc03d74dae00da7356034787f3da32417539a26efe0cf42f845278f
|
7
|
+
data.tar.gz: 65c4d423bd208e80a9d72f17d6289cf2369c7b740ecbd7397fafa219672b48ff7bdd59a58953051a4919645bc8f2963c64445edad25ea29eb8b2e490130f491f
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,7 @@
|
|
1
|
+
## 0.5.10 - 2017-12-03
|
2
|
+
|
3
|
+
* [enhancement] Add logic to detect error from Mixpanel when doing import, add option to failed embulk job if encounter error import [#57](https://github.com/treasure-data/embulk-input-mixpanel/pull/57)
|
4
|
+
|
1
5
|
## 0.5.9 - 2017-11-10
|
2
6
|
|
3
7
|
* [enhancement] Add upper limit delay to incremental column query [#56](https://github.com/treasure-data/embulk-input-mixpanel/pull/56)
|
data/README.md
CHANGED
@@ -42,6 +42,8 @@ To get it, you should log in mixpanel website, and click gear icon at the lower
|
|
42
42
|
- **incremental_column**: Column to be add to where query as a constraint for incremental time. Only data that have incremental_column timestamp > than previous latest_fetched_time will be return (string, optional, default: nil)
|
43
43
|
- **back_fill_time**: Amount of time that will be subtracted from `from_date` to calculate the final `from_date` that will be use for API Request. This is due to Mixpanel caching data on user devices before sending it to Mixpanel server (integer, optional, default: 5)
|
44
44
|
- NOTE: Only have effect when incremental is true and incremental_column is specified
|
45
|
+
- **incremental_column_upper_limit_delay_in_seconds**: When query with incremental column, plugin will lock the upper limit of incremental column query with the job start time, in order to avoid issue with data that commit when the job is running
|
46
|
+
ex: `where mp_processing_time <= job_start_time`. The upper limit will be calculated by using job_start_time minus with this configuration parameter. This is to support case when Mixpanel have delay in their processing (integer, optional, default: 0)
|
45
47
|
- **fetch_unknown_columns**(deprecated): If you want this plugin fetches unknown (unconfigured in config) columns (boolean, optional, default: false)
|
46
48
|
- NOTE: If true, `unknown_columns` column is created and added unknown columns' data.
|
47
49
|
- **fetch_custom_properties**: All custom properties into `custom_properties` key. "custom properties" are not desribed Mixpanel document [1](https://mixpanel.com/help/questions/articles/special-or-reserved-properties), [2](https://mixpanel.com/help/questions/articles/what-properties-do-mixpanels-libraries-store-by-default). (boolean, optional, default: true)
|
@@ -51,6 +53,7 @@ To get it, you should log in mixpanel website, and click gear icon at the lower
|
|
51
53
|
- **bucket**:The data backet to filter data (string, optional, default: nil)
|
52
54
|
- **retry_initial_wait_sec** Wait seconds for exponential backoff initial value (integer, default: 1)
|
53
55
|
- **retry_limit**: Try to retry this times (integer, default: 5)
|
56
|
+
- **allow_partial_import**: Allow plugin to skip errored import (boolean, default: true)
|
54
57
|
|
55
58
|
### `fetch_unknown_columns` and `fetch_custom_properties`
|
56
59
|
|
@@ -1,6 +1,7 @@
|
|
1
1
|
require "tzinfo"
|
2
2
|
require "perfect_retry"
|
3
3
|
require "embulk/input/mixpanel_api/client"
|
4
|
+
require "embulk/input/mixpanel_api/exceptions"
|
4
5
|
require "range_generator"
|
5
6
|
require "timezone_validator"
|
6
7
|
|
@@ -71,7 +72,8 @@ module Embulk
|
|
71
72
|
incremental: incremental,
|
72
73
|
slice_range: config.param(:slice_range, :integer, default: 7),
|
73
74
|
job_start_time: job_start_time,
|
74
|
-
incremental_column_upper_limit: incremental_column_upper_limit
|
75
|
+
incremental_column_upper_limit: incremental_column_upper_limit,
|
76
|
+
allow_partial_import: config.param(:allow_partial_import,:bool, default: true)
|
75
77
|
}
|
76
78
|
|
77
79
|
if !incremental_column.nil? && !latest_fetched_time.nil? && (incremental_column_upper_limit <= latest_fetched_time)
|
@@ -144,7 +146,7 @@ module Embulk
|
|
144
146
|
PerfectRetry.new do |config|
|
145
147
|
config.limit = task[:retry_limit]
|
146
148
|
config.sleep = proc{|n| task[:retry_initial_wait_sec] * (2 * (n - 1)) }
|
147
|
-
config.dont_rescues = [Embulk::ConfigError]
|
149
|
+
config.dont_rescues = [Embulk::ConfigError,MixpanelApi::IncompleteExportResponseError]
|
148
150
|
config.rescues = [RuntimeError]
|
149
151
|
config.log_level = nil
|
150
152
|
config.logger = Embulk.logger
|
@@ -175,33 +177,40 @@ module Embulk
|
|
175
177
|
Embulk.logger.info "Fetching data from #{slice_dates.first} to #{slice_dates.last} ..."
|
176
178
|
end
|
177
179
|
record_time_column=@incremental_column || DEFAULT_TIME_COLUMN
|
178
|
-
|
179
|
-
|
180
|
-
if
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
if
|
186
|
-
|
187
|
-
|
180
|
+
begin
|
181
|
+
fetch(slice_dates, prev_latest_fetched_time).each do |record|
|
182
|
+
if @incremental
|
183
|
+
if !record["properties"].include?(record_time_column)
|
184
|
+
raise Embulk::ConfigError.new("Incremental column not exists in fetched data #{record_time_column}")
|
185
|
+
end
|
186
|
+
record_time = record["properties"][record_time_column]
|
187
|
+
if @incremental_column.nil?
|
188
|
+
if record_time <= prev_latest_fetched_time
|
189
|
+
ignored_record_count += 1
|
190
|
+
next
|
191
|
+
end
|
188
192
|
end
|
189
|
-
end
|
190
193
|
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
194
|
+
current_latest_fetched_time= [
|
195
|
+
current_latest_fetched_time,
|
196
|
+
record_time,
|
197
|
+
].max
|
198
|
+
end
|
199
|
+
values = extract_values(record)
|
200
|
+
if @fetch_unknown_columns
|
201
|
+
unknown_values = extract_unknown_values(record)
|
202
|
+
values << unknown_values.to_json
|
203
|
+
end
|
204
|
+
if task[:fetch_custom_properties]
|
205
|
+
values << collect_custom_properties(record)
|
206
|
+
end
|
207
|
+
page_builder.add(values)
|
200
208
|
end
|
201
|
-
|
202
|
-
|
209
|
+
rescue MixpanelApi::IncompleteExportResponseError
|
210
|
+
if !task[:allow_partial_import]
|
211
|
+
# re raise the exception if we don't allow partial import
|
212
|
+
raise
|
203
213
|
end
|
204
|
-
page_builder.add(values)
|
205
214
|
end
|
206
215
|
if ignored_record_count > 0
|
207
216
|
Embulk.logger.warn "Skipped already loaded #{ignored_record_count} records. These record times are older or equal than previous fetched record time (#{prev_latest_fetched_time} @ #{prev_latest_fetched_time_format})."
|
@@ -2,6 +2,7 @@ require "uri"
|
|
2
2
|
require "digest/md5"
|
3
3
|
require "json"
|
4
4
|
require "httpclient"
|
5
|
+
require "embulk/input/mixpanel_api/exceptions"
|
5
6
|
|
6
7
|
module Embulk
|
7
8
|
module Input
|
@@ -118,6 +119,11 @@ module Embulk
|
|
118
119
|
end
|
119
120
|
end
|
120
121
|
handle_error(response, error_response)
|
122
|
+
if !buf.empty?
|
123
|
+
# buffer is not empty mean the last json line is incomplete
|
124
|
+
Embulk.logger.error "Received incomplete data from Mixpanel, #{buf}"
|
125
|
+
raise MixpanelApi::IncompleteExportResponseError.new("Incomplete data received")
|
126
|
+
end
|
121
127
|
end
|
122
128
|
|
123
129
|
def request_small_dataset(params, range)
|
@@ -1,4 +1,5 @@
|
|
1
1
|
require "embulk/input/mixpanel_api/client"
|
2
|
+
require "embulk/input/mixpanel_api/exceptions"
|
2
3
|
require "override_assert_raise"
|
3
4
|
|
4
5
|
module Embulk
|
@@ -87,6 +88,33 @@ module Embulk
|
|
87
88
|
assert_equal(dummy_responses, records)
|
88
89
|
end
|
89
90
|
|
91
|
+
def test_export_partial_with_export_terminated_early
|
92
|
+
stub_client
|
93
|
+
stub(@client).set_signatures(anything) {}
|
94
|
+
stub_response(Struct.new(:code, :body).new(200, jsonl_dummy_responses+"\nexport terminated early"))
|
95
|
+
|
96
|
+
records = []
|
97
|
+
assert_raise MixpanelApi::IncompleteExportResponseError do
|
98
|
+
@client.export(params) do |record|
|
99
|
+
records << record
|
100
|
+
end
|
101
|
+
end
|
102
|
+
assert_equal(dummy_responses, records)
|
103
|
+
end
|
104
|
+
|
105
|
+
def test_export_partial_with_error_json
|
106
|
+
stub_client
|
107
|
+
stub(@client).set_signatures(anything) {}
|
108
|
+
stub_response(Struct.new(:code, :body).new(200, jsonl_dummy_responses+"\n{\"error\":"))
|
109
|
+
records = []
|
110
|
+
assert_raise MixpanelApi::IncompleteExportResponseError do
|
111
|
+
@client.export(params) do |record|
|
112
|
+
records << record
|
113
|
+
end
|
114
|
+
end
|
115
|
+
assert_equal(dummy_responses, records)
|
116
|
+
end
|
117
|
+
|
90
118
|
def test_failure_with_400
|
91
119
|
stub_client
|
92
120
|
stub_response(failure_response(400))
|
@@ -595,7 +595,32 @@ module Embulk
|
|
595
595
|
|
596
596
|
@plugin.run
|
597
597
|
end
|
598
|
+
class PartialRunTest < self
|
599
|
+
def setup_client
|
600
|
+
any_instance_of(MixpanelApi::Client) do |klass|
|
601
|
+
stub(klass).request { raise MixpanelApi::IncompleteExportResponseError.new("Incomplete data received") }
|
602
|
+
end
|
603
|
+
end
|
604
|
+
def setup
|
605
|
+
setup_client()
|
606
|
+
@page_builder = Object.new
|
607
|
+
end
|
608
|
+
|
609
|
+
def test_run_with_allow_partial_false
|
610
|
+
@plugin = Mixpanel.new(task.merge(allow_partial_import: false), nil, nil, @page_builder)
|
611
|
+
stub(@plugin).preview? {false}
|
612
|
+
assert_raise MixpanelApi::IncompleteExportResponseError do
|
613
|
+
@plugin.run
|
614
|
+
end
|
615
|
+
end
|
598
616
|
|
617
|
+
def test_run_with_allow_partial_true
|
618
|
+
@plugin = Mixpanel.new(task.merge(allow_partial_import: true), nil, nil, @page_builder)
|
619
|
+
mock(@page_builder).finish
|
620
|
+
stub(@plugin).preview? {false}
|
621
|
+
@plugin.run
|
622
|
+
end
|
623
|
+
end
|
599
624
|
class SliceRangeRunTest < self
|
600
625
|
|
601
626
|
def test_default_slice_range
|
@@ -725,7 +750,6 @@ module Embulk
|
|
725
750
|
|
726
751
|
@plugin.run
|
727
752
|
end
|
728
|
-
|
729
753
|
private
|
730
754
|
|
731
755
|
def task
|
@@ -834,7 +858,8 @@ module Embulk
|
|
834
858
|
latest_fetched_time: 0,
|
835
859
|
slice_range: 7,
|
836
860
|
job_start_time: JOB_START_TIME,
|
837
|
-
incremental_column_upper_limit: (JOB_START_TIME - UPPER_LIMIT_DELAY * 1000)
|
861
|
+
incremental_column_upper_limit: (JOB_START_TIME - UPPER_LIMIT_DELAY * 1000),
|
862
|
+
allow_partial_import: true
|
838
863
|
}
|
839
864
|
end
|
840
865
|
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-input-mixpanel
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.5.
|
4
|
+
version: 0.5.10
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- yoshihara
|
@@ -9,80 +9,79 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2017-
|
12
|
+
date: 2017-12-03 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
|
-
name: httpclient
|
16
15
|
requirement: !ruby/object:Gem::Requirement
|
17
16
|
requirements:
|
18
17
|
- - ">="
|
19
18
|
- !ruby/object:Gem::Version
|
20
19
|
version: 2.8.3
|
20
|
+
name: httpclient
|
21
|
+
prerelease: false
|
22
|
+
type: :runtime
|
21
23
|
version_requirements: !ruby/object:Gem::Requirement
|
22
24
|
requirements:
|
23
25
|
- - ">="
|
24
26
|
- !ruby/object:Gem::Version
|
25
27
|
version: 2.8.3
|
26
|
-
prerelease: false
|
27
|
-
type: :runtime
|
28
28
|
- !ruby/object:Gem::Dependency
|
29
|
-
name: tzinfo
|
30
29
|
requirement: !ruby/object:Gem::Requirement
|
31
30
|
requirements:
|
32
31
|
- - ">="
|
33
32
|
- !ruby/object:Gem::Version
|
34
33
|
version: '0'
|
34
|
+
name: tzinfo
|
35
|
+
prerelease: false
|
36
|
+
type: :runtime
|
35
37
|
version_requirements: !ruby/object:Gem::Requirement
|
36
38
|
requirements:
|
37
39
|
- - ">="
|
38
40
|
- !ruby/object:Gem::Version
|
39
41
|
version: '0'
|
40
|
-
prerelease: false
|
41
|
-
type: :runtime
|
42
42
|
- !ruby/object:Gem::Dependency
|
43
|
-
name: perfect_retry
|
44
43
|
requirement: !ruby/object:Gem::Requirement
|
45
44
|
requirements:
|
46
45
|
- - "~>"
|
47
46
|
- !ruby/object:Gem::Version
|
48
47
|
version: '0.5'
|
48
|
+
name: perfect_retry
|
49
|
+
prerelease: false
|
50
|
+
type: :runtime
|
49
51
|
version_requirements: !ruby/object:Gem::Requirement
|
50
52
|
requirements:
|
51
53
|
- - "~>"
|
52
54
|
- !ruby/object:Gem::Version
|
53
55
|
version: '0.5'
|
54
|
-
prerelease: false
|
55
|
-
type: :runtime
|
56
56
|
- !ruby/object:Gem::Dependency
|
57
|
-
name: bundler
|
58
57
|
requirement: !ruby/object:Gem::Requirement
|
59
58
|
requirements:
|
60
59
|
- - "~>"
|
61
60
|
- !ruby/object:Gem::Version
|
62
61
|
version: '1.0'
|
62
|
+
name: bundler
|
63
|
+
prerelease: false
|
64
|
+
type: :development
|
63
65
|
version_requirements: !ruby/object:Gem::Requirement
|
64
66
|
requirements:
|
65
67
|
- - "~>"
|
66
68
|
- !ruby/object:Gem::Version
|
67
69
|
version: '1.0'
|
68
|
-
prerelease: false
|
69
|
-
type: :development
|
70
70
|
- !ruby/object:Gem::Dependency
|
71
|
-
name: rake
|
72
71
|
requirement: !ruby/object:Gem::Requirement
|
73
72
|
requirements:
|
74
73
|
- - ">="
|
75
74
|
- !ruby/object:Gem::Version
|
76
75
|
version: '10.0'
|
76
|
+
name: rake
|
77
|
+
prerelease: false
|
78
|
+
type: :development
|
77
79
|
version_requirements: !ruby/object:Gem::Requirement
|
78
80
|
requirements:
|
79
81
|
- - ">="
|
80
82
|
- !ruby/object:Gem::Version
|
81
83
|
version: '10.0'
|
82
|
-
prerelease: false
|
83
|
-
type: :development
|
84
84
|
- !ruby/object:Gem::Dependency
|
85
|
-
name: embulk
|
86
85
|
requirement: !ruby/object:Gem::Requirement
|
87
86
|
requirements:
|
88
87
|
- - ">="
|
@@ -91,6 +90,9 @@ dependencies:
|
|
91
90
|
- - "<"
|
92
91
|
- !ruby/object:Gem::Version
|
93
92
|
version: '1.0'
|
93
|
+
name: embulk
|
94
|
+
prerelease: false
|
95
|
+
type: :development
|
94
96
|
version_requirements: !ruby/object:Gem::Requirement
|
95
97
|
requirements:
|
96
98
|
- - ">="
|
@@ -99,78 +101,76 @@ dependencies:
|
|
99
101
|
- - "<"
|
100
102
|
- !ruby/object:Gem::Version
|
101
103
|
version: '1.0'
|
102
|
-
prerelease: false
|
103
|
-
type: :development
|
104
104
|
- !ruby/object:Gem::Dependency
|
105
|
-
name: pry
|
106
105
|
requirement: !ruby/object:Gem::Requirement
|
107
106
|
requirements:
|
108
107
|
- - ">="
|
109
108
|
- !ruby/object:Gem::Version
|
110
109
|
version: '0'
|
110
|
+
name: pry
|
111
|
+
prerelease: false
|
112
|
+
type: :development
|
111
113
|
version_requirements: !ruby/object:Gem::Requirement
|
112
114
|
requirements:
|
113
115
|
- - ">="
|
114
116
|
- !ruby/object:Gem::Version
|
115
117
|
version: '0'
|
116
|
-
prerelease: false
|
117
|
-
type: :development
|
118
118
|
- !ruby/object:Gem::Dependency
|
119
|
-
name: test-unit
|
120
119
|
requirement: !ruby/object:Gem::Requirement
|
121
120
|
requirements:
|
122
121
|
- - ">="
|
123
122
|
- !ruby/object:Gem::Version
|
124
123
|
version: '0'
|
124
|
+
name: test-unit
|
125
|
+
prerelease: false
|
126
|
+
type: :development
|
125
127
|
version_requirements: !ruby/object:Gem::Requirement
|
126
128
|
requirements:
|
127
129
|
- - ">="
|
128
130
|
- !ruby/object:Gem::Version
|
129
131
|
version: '0'
|
130
|
-
prerelease: false
|
131
|
-
type: :development
|
132
132
|
- !ruby/object:Gem::Dependency
|
133
|
-
name: test-unit-rr
|
134
133
|
requirement: !ruby/object:Gem::Requirement
|
135
134
|
requirements:
|
136
135
|
- - ">="
|
137
136
|
- !ruby/object:Gem::Version
|
138
137
|
version: '0'
|
138
|
+
name: test-unit-rr
|
139
|
+
prerelease: false
|
140
|
+
type: :development
|
139
141
|
version_requirements: !ruby/object:Gem::Requirement
|
140
142
|
requirements:
|
141
143
|
- - ">="
|
142
144
|
- !ruby/object:Gem::Version
|
143
145
|
version: '0'
|
144
|
-
prerelease: false
|
145
|
-
type: :development
|
146
146
|
- !ruby/object:Gem::Dependency
|
147
|
-
name: codeclimate-test-reporter
|
148
147
|
requirement: !ruby/object:Gem::Requirement
|
149
148
|
requirements:
|
150
149
|
- - "~>"
|
151
150
|
- !ruby/object:Gem::Version
|
152
151
|
version: '0.5'
|
152
|
+
name: codeclimate-test-reporter
|
153
|
+
prerelease: false
|
154
|
+
type: :development
|
153
155
|
version_requirements: !ruby/object:Gem::Requirement
|
154
156
|
requirements:
|
155
157
|
- - "~>"
|
156
158
|
- !ruby/object:Gem::Version
|
157
159
|
version: '0.5'
|
158
|
-
prerelease: false
|
159
|
-
type: :development
|
160
160
|
- !ruby/object:Gem::Dependency
|
161
|
-
name: everyleaf-embulk_helper
|
162
161
|
requirement: !ruby/object:Gem::Requirement
|
163
162
|
requirements:
|
164
163
|
- - ">="
|
165
164
|
- !ruby/object:Gem::Version
|
166
165
|
version: '0'
|
166
|
+
name: everyleaf-embulk_helper
|
167
|
+
prerelease: false
|
168
|
+
type: :development
|
167
169
|
version_requirements: !ruby/object:Gem::Requirement
|
168
170
|
requirements:
|
169
171
|
- - ">="
|
170
172
|
- !ruby/object:Gem::Version
|
171
173
|
version: '0'
|
172
|
-
prerelease: false
|
173
|
-
type: :development
|
174
174
|
description: Loads records from Mixpanel.
|
175
175
|
email:
|
176
176
|
- h.yoshihara@everyleaf.com
|
@@ -195,6 +195,7 @@ files:
|
|
195
195
|
- gemfiles/template.erb
|
196
196
|
- lib/embulk/input/mixpanel.rb
|
197
197
|
- lib/embulk/input/mixpanel_api/client.rb
|
198
|
+
- lib/embulk/input/mixpanel_api/exceptions.rb
|
198
199
|
- lib/range_generator.rb
|
199
200
|
- lib/timezone_validator.rb
|
200
201
|
- test/embulk/input/mixpanel_api/test_client.rb
|