embulk-input-mixpanel 0.4.3 → 0.4.4
Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f5cb5156064ae0192f6d1d2321a173cd12ef5fd3
|
4
|
+
data.tar.gz: 2a2f3a63e55035bb8dd96a8edf27eb45e546866d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 48d7e7af0b5ad28fc030e7c1baaa4c34fd236b60fb34df7e8cc1b764825c143aadcab7725d4d78afe3b1ba0ffb05067418217872d79cc0a1d00af7440e7abd90
|
7
|
+
data.tar.gz: b001c794683c39d69ae6b29782988ee3c64ea17a9449d29fb7d86342b144854116116b1928946f33691eaaf0f3ed6d711f985fbe7e4a414c7579a3fd6baf38ec
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,6 @@
|
|
1
|
+
## 0.4.4 - 2016-09-02
|
2
|
+
* [enhancement] Reduce memory usage by streaming processing [#42](https://github.com/treasure-data/embulk-input-mixpanel/pull/42)
|
3
|
+
|
1
4
|
## 0.4.3 - 2016-03-16
|
2
5
|
* [enhancement] Custom properties json [#40](https://github.com/treasure-data/embulk-input-mixpanel/pull/40)
|
3
6
|
|
@@ -1,7 +1,7 @@
|
|
1
1
|
|
2
2
|
Gem::Specification.new do |spec|
|
3
3
|
spec.name = "embulk-input-mixpanel"
|
4
|
-
spec.version = "0.4.
|
4
|
+
spec.version = "0.4.4"
|
5
5
|
spec.authors = ["yoshihara", "uu59"]
|
6
6
|
spec.summary = "Mixpanel input plugin for Embulk"
|
7
7
|
spec.description = "Loads records from Mixpanel."
|
@@ -15,7 +15,7 @@ Gem::Specification.new do |spec|
|
|
15
15
|
|
16
16
|
spec.add_dependency 'httpclient'
|
17
17
|
spec.add_dependency 'tzinfo'
|
18
|
-
spec.add_dependency 'perfect_retry', ["~> 0.
|
18
|
+
spec.add_dependency 'perfect_retry', ["~> 0.5"]
|
19
19
|
spec.add_development_dependency 'bundler', ['~> 1.0']
|
20
20
|
spec.add_development_dependency 'rake', ['>= 10.0']
|
21
21
|
spec.add_development_dependency 'embulk', ['>= 0.8.6', '< 1.0']
|
@@ -203,20 +203,22 @@ module Embulk
|
|
203
203
|
end
|
204
204
|
end
|
205
205
|
|
206
|
-
def fetch(dates)
|
206
|
+
def fetch(dates, &block)
|
207
207
|
from_date = dates.first
|
208
208
|
to_date = dates.last
|
209
209
|
params = @params.merge(
|
210
210
|
"from_date" => from_date,
|
211
211
|
"to_date" => to_date,
|
212
212
|
)
|
213
|
-
client = MixpanelApi::Client.new(@api_key, @api_secret)
|
213
|
+
client = MixpanelApi::Client.new(@api_key, @api_secret, @retryer)
|
214
214
|
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
client.export(params)
|
215
|
+
if preview?
|
216
|
+
client.export_for_small_dataset(params)
|
217
|
+
else
|
218
|
+
Enumerator.new do |y|
|
219
|
+
client.export(params) do |record|
|
220
|
+
y << record
|
221
|
+
end
|
220
222
|
end
|
221
223
|
end
|
222
224
|
end
|
@@ -14,6 +14,8 @@ module Embulk
|
|
14
14
|
PING_RETRY_WAIT = 2
|
15
15
|
SMALLSET_BYTE_RANGE = "0-#{5 * 1024 * 1024}"
|
16
16
|
|
17
|
+
attr_reader :retryer
|
18
|
+
|
17
19
|
def self.mixpanel_available?
|
18
20
|
retryer = PerfectRetry.new do |config|
|
19
21
|
config.limit = PING_RETRY_LIMIT
|
@@ -34,31 +36,42 @@ module Embulk
|
|
34
36
|
end
|
35
37
|
end
|
36
38
|
|
37
|
-
def initialize(api_key, api_secret)
|
39
|
+
def initialize(api_key, api_secret, retryer = nil)
|
38
40
|
@api_key = api_key
|
39
41
|
@api_secret = api_secret
|
42
|
+
@retryer = retryer || PerfectRetry.new do |config|
|
43
|
+
# for test
|
44
|
+
config.limit = 0
|
45
|
+
config.dont_rescues = [RuntimeError]
|
46
|
+
config.log_level = nil
|
47
|
+
config.logger = Embulk.logger
|
48
|
+
config.raise_original_error = true
|
49
|
+
end
|
40
50
|
end
|
41
51
|
|
42
|
-
def export(params = {})
|
43
|
-
|
44
|
-
|
52
|
+
def export(params = {}, &block)
|
53
|
+
retryer.with_retry do
|
54
|
+
request(params, &block)
|
55
|
+
end
|
45
56
|
end
|
46
57
|
|
47
|
-
def export_for_small_dataset(params = {}
|
48
|
-
|
49
|
-
|
50
|
-
|
58
|
+
def export_for_small_dataset(params = {})
|
59
|
+
try_to_dates = 5.times.map do |n|
|
60
|
+
# from_date + 1, from_date + 10, from_date + 100, ... so on
|
61
|
+
days = 1 * (10 ** n)
|
62
|
+
Date.parse(params["from_date"].to_s) + days
|
63
|
+
end
|
51
64
|
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
raise ConfigError.new "#{params["from_date"]} + #{days} days has no record. too old date?"
|
65
|
+
try_to_dates.each do |to_date|
|
66
|
+
params["to_date"] = to_date.strftime("%Y-%m-%d")
|
67
|
+
records = retryer.with_retry do
|
68
|
+
request_small_dataset(params, SMALLSET_BYTE_RANGE)
|
57
69
|
end
|
58
|
-
|
59
|
-
|
60
|
-
result
|
70
|
+
next if records.first.nil?
|
71
|
+
return records
|
61
72
|
end
|
73
|
+
|
74
|
+
raise ConfigError.new "#{params["from_date"]}..#{try_to_dates.last} has no record. too old date?"
|
62
75
|
end
|
63
76
|
|
64
77
|
private
|
@@ -72,34 +85,53 @@ module Embulk
|
|
72
85
|
end
|
73
86
|
end
|
74
87
|
|
75
|
-
def request(params,
|
88
|
+
def request(params, &block)
|
76
89
|
# https://mixpanel.com/docs/api-documentation/exporting-raw-data-you-inserted-into-mixpanel
|
77
|
-
params[:expire] ||= Time.now.to_i + TIMEOUT_SECONDS
|
78
|
-
params[:sig] = signature(params)
|
79
90
|
Embulk.logger.debug "Export param: #{params.to_s}"
|
91
|
+
set_signatures(params)
|
80
92
|
|
81
|
-
|
82
|
-
response =
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
res
|
93
|
+
buf = ""
|
94
|
+
response = httpclient.get(ENDPOINT_EXPORT, params) do |chunk|
|
95
|
+
chunk.each_line do |line|
|
96
|
+
begin
|
97
|
+
record = JSON.parse(buf + line)
|
98
|
+
block.call record
|
99
|
+
buf = ""
|
100
|
+
rescue JSON::ParserError => e
|
101
|
+
buf << line
|
91
102
|
end
|
92
|
-
else
|
93
|
-
httpclient.get(ENDPOINT_EXPORT, params)
|
94
103
|
end
|
104
|
+
end
|
105
|
+
handle_error(response)
|
106
|
+
end
|
107
|
+
|
108
|
+
def request_small_dataset(params, range)
|
109
|
+
# guess/preview
|
110
|
+
# Try to fetch first `range` bytes
|
111
|
+
set_signatures(params)
|
112
|
+
res = httpclient.get(ENDPOINT_EXPORT, params, {"Range" => "bytes=#{range}"})
|
113
|
+
if res.code == 416
|
114
|
+
# cannot satisfied requested Range, get full body
|
115
|
+
res = httpclient.get(ENDPOINT_EXPORT, params)
|
116
|
+
end
|
117
|
+
handle_error(res)
|
118
|
+
response_to_enum(res.body)
|
119
|
+
end
|
120
|
+
|
121
|
+
def handle_error(response)
|
95
122
|
Embulk.logger.debug "response code: #{response.code}"
|
96
123
|
case response.code
|
97
124
|
when 400..499
|
98
|
-
raise ConfigError.new response.body
|
125
|
+
raise ConfigError.new("[#{response.code}] #{response.body}")
|
99
126
|
when 500..599
|
100
|
-
raise RuntimeError
|
127
|
+
raise RuntimeError.new("[#{response.code}] #{response.body}")
|
101
128
|
end
|
102
|
-
|
129
|
+
end
|
130
|
+
|
131
|
+
def set_signatures(params)
|
132
|
+
params[:expire] ||= Time.now.to_i + TIMEOUT_SECONDS
|
133
|
+
params[:sig] = signature(params)
|
134
|
+
params
|
103
135
|
end
|
104
136
|
|
105
137
|
def signature(params)
|
@@ -121,6 +153,7 @@ module Embulk
|
|
121
153
|
client = HTTPClient.new
|
122
154
|
client.receive_timeout = TIMEOUT_SECONDS
|
123
155
|
client.default_header = {Accept: "application/json; charset=UTF-8"}
|
156
|
+
# client.debug_dev = STDERR
|
124
157
|
client
|
125
158
|
end
|
126
159
|
end
|
@@ -37,38 +37,17 @@ module Embulk
|
|
37
37
|
@httpclient = HTTPClient.new
|
38
38
|
end
|
39
39
|
|
40
|
-
def test_httpclient
|
41
|
-
stub_response(success_response)
|
42
|
-
mock(@client).httpclient { @httpclient }
|
43
|
-
|
44
|
-
@client.export(params)
|
45
|
-
end
|
46
|
-
|
47
|
-
def test_response_class
|
48
|
-
stub_client
|
49
|
-
stub_response(success_response)
|
50
|
-
|
51
|
-
actual = @client.export(params)
|
52
|
-
|
53
|
-
assert_equal(Enumerator, actual.class)
|
54
|
-
end
|
55
|
-
|
56
|
-
def test_http_request
|
57
|
-
stub_client
|
58
|
-
mock(@httpclient).get(Client::ENDPOINT_EXPORT, params) do
|
59
|
-
success_response
|
60
|
-
end
|
61
|
-
|
62
|
-
@client.export(params)
|
63
|
-
end
|
64
|
-
|
65
40
|
def test_success
|
66
41
|
stub_client
|
42
|
+
stub(@client).set_signatures(anything) {}
|
67
43
|
stub_response(success_response)
|
68
44
|
|
69
|
-
|
45
|
+
records = []
|
46
|
+
@client.export(params) do |record|
|
47
|
+
records << record
|
48
|
+
end
|
70
49
|
|
71
|
-
assert_equal(dummy_responses,
|
50
|
+
assert_equal(dummy_responses, records)
|
72
51
|
end
|
73
52
|
|
74
53
|
def test_failure_with_400
|
@@ -92,16 +71,17 @@ module Embulk
|
|
92
71
|
class ExportSmallDataset < self
|
93
72
|
def test_to_date_after_1_day
|
94
73
|
to = (Date.parse(params["from_date"]) + 1).to_s
|
95
|
-
mock(@client).
|
74
|
+
mock(@client).request_small_dataset(params.merge("to_date" => to), Client::SMALLSET_BYTE_RANGE) { [:foo] }
|
96
75
|
|
97
76
|
@client.export_for_small_dataset(params)
|
98
77
|
end
|
99
78
|
|
100
79
|
def test_to_date_after_1_day_after_10_days_if_empty
|
80
|
+
stub_client
|
101
81
|
to1 = (Date.parse(params["from_date"]) + 1).to_s
|
102
82
|
to2 = (Date.parse(params["from_date"]) + 10).to_s
|
103
|
-
mock(@client).
|
104
|
-
mock(@client).
|
83
|
+
mock(@client).request_small_dataset(params.merge("to_date" => to1), Client::SMALLSET_BYTE_RANGE) { [] }
|
84
|
+
mock(@client).request_small_dataset(params.merge("to_date" => to2), Client::SMALLSET_BYTE_RANGE) { [:foo] }
|
105
85
|
|
106
86
|
@client.export_for_small_dataset(params)
|
107
87
|
end
|
@@ -122,9 +102,12 @@ module Embulk
|
|
122
102
|
end
|
123
103
|
|
124
104
|
def stub_response(response)
|
125
|
-
|
126
|
-
response
|
127
|
-
|
105
|
+
@httpclient.test_loopback_http_response << [
|
106
|
+
"HTTP/1.1 #{response.code}",
|
107
|
+
"Content-Type: application/json",
|
108
|
+
"",
|
109
|
+
response.body
|
110
|
+
].join("\r\n")
|
128
111
|
end
|
129
112
|
|
130
113
|
def success_response
|
@@ -510,17 +510,17 @@ module Embulk
|
|
510
510
|
|
511
511
|
class RunTest < self
|
512
512
|
def setup_client
|
513
|
-
|
514
513
|
any_instance_of(MixpanelApi::Client) do |klass|
|
515
|
-
stub(klass).
|
514
|
+
stub(klass).request_small_dataset { records_raw_response }
|
515
|
+
stub(klass).request { records }
|
516
516
|
end
|
517
517
|
end
|
518
518
|
|
519
519
|
def setup
|
520
520
|
super
|
521
|
-
|
522
521
|
@page_builder = Object.new
|
523
522
|
@plugin = Mixpanel.new(task, nil, nil, @page_builder)
|
523
|
+
stub(@plugin).fetch { records }
|
524
524
|
end
|
525
525
|
|
526
526
|
def test_preview
|
@@ -542,7 +542,7 @@ module Embulk
|
|
542
542
|
def test_timezone
|
543
543
|
stub(@plugin).preview? { false }
|
544
544
|
adjusted = record_epoch - timezone_offset_seconds
|
545
|
-
mock(@page_builder).add(["FOO", adjusted]).times(records.length * 2)
|
545
|
+
mock(@page_builder).add(["FOO", adjusted, "event"]).times(records.length * 2)
|
546
546
|
mock(@page_builder).finish
|
547
547
|
|
548
548
|
@plugin.run
|
@@ -600,13 +600,14 @@ module Embulk
|
|
600
600
|
|
601
601
|
class UnknownColumnsTest < self
|
602
602
|
def setup
|
603
|
-
super
|
604
603
|
@page_builder = Object.new
|
605
604
|
@plugin = Mixpanel.new(task, nil, nil, @page_builder)
|
605
|
+
stub(@plugin).fetch { records }
|
606
606
|
end
|
607
607
|
|
608
608
|
def test_run
|
609
|
-
Embulk.logger.warn
|
609
|
+
stub(Embulk.logger).warn
|
610
|
+
stub(Embulk.logger).info
|
610
611
|
stub(@plugin).preview? { false }
|
611
612
|
|
612
613
|
# NOTE: Expect records are contained same record
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-input-mixpanel
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.4.
|
4
|
+
version: 0.4.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- yoshihara
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2016-
|
12
|
+
date: 2016-09-02 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
@@ -44,7 +44,7 @@ dependencies:
|
|
44
44
|
requirements:
|
45
45
|
- - "~>"
|
46
46
|
- !ruby/object:Gem::Version
|
47
|
-
version: '0.
|
47
|
+
version: '0.5'
|
48
48
|
name: perfect_retry
|
49
49
|
prerelease: false
|
50
50
|
type: :runtime
|
@@ -52,7 +52,7 @@ dependencies:
|
|
52
52
|
requirements:
|
53
53
|
- - "~>"
|
54
54
|
- !ruby/object:Gem::Version
|
55
|
-
version: '0.
|
55
|
+
version: '0.5'
|
56
56
|
- !ruby/object:Gem::Dependency
|
57
57
|
requirement: !ruby/object:Gem::Requirement
|
58
58
|
requirements:
|