embulk-input-mixpanel 0.4.3 → 0.4.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f5cb5156064ae0192f6d1d2321a173cd12ef5fd3
|
4
|
+
data.tar.gz: 2a2f3a63e55035bb8dd96a8edf27eb45e546866d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 48d7e7af0b5ad28fc030e7c1baaa4c34fd236b60fb34df7e8cc1b764825c143aadcab7725d4d78afe3b1ba0ffb05067418217872d79cc0a1d00af7440e7abd90
|
7
|
+
data.tar.gz: b001c794683c39d69ae6b29782988ee3c64ea17a9449d29fb7d86342b144854116116b1928946f33691eaaf0f3ed6d711f985fbe7e4a414c7579a3fd6baf38ec
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,6 @@
|
|
1
|
+
## 0.4.4 - 2016-09-02
|
2
|
+
* [enhancement] Reduce memory usage by streaming processing [#42](https://github.com/treasure-data/embulk-input-mixpanel/pull/42)
|
3
|
+
|
1
4
|
## 0.4.3 - 2016-03-16
|
2
5
|
* [enhancement] Custom properties json [#40](https://github.com/treasure-data/embulk-input-mixpanel/pull/40)
|
3
6
|
|
@@ -1,7 +1,7 @@
|
|
1
1
|
|
2
2
|
Gem::Specification.new do |spec|
|
3
3
|
spec.name = "embulk-input-mixpanel"
|
4
|
-
spec.version = "0.4.
|
4
|
+
spec.version = "0.4.4"
|
5
5
|
spec.authors = ["yoshihara", "uu59"]
|
6
6
|
spec.summary = "Mixpanel input plugin for Embulk"
|
7
7
|
spec.description = "Loads records from Mixpanel."
|
@@ -15,7 +15,7 @@ Gem::Specification.new do |spec|
|
|
15
15
|
|
16
16
|
spec.add_dependency 'httpclient'
|
17
17
|
spec.add_dependency 'tzinfo'
|
18
|
-
spec.add_dependency 'perfect_retry', ["~> 0.
|
18
|
+
spec.add_dependency 'perfect_retry', ["~> 0.5"]
|
19
19
|
spec.add_development_dependency 'bundler', ['~> 1.0']
|
20
20
|
spec.add_development_dependency 'rake', ['>= 10.0']
|
21
21
|
spec.add_development_dependency 'embulk', ['>= 0.8.6', '< 1.0']
|
@@ -203,20 +203,22 @@ module Embulk
|
|
203
203
|
end
|
204
204
|
end
|
205
205
|
|
206
|
-
def fetch(dates)
|
206
|
+
def fetch(dates, &block)
|
207
207
|
from_date = dates.first
|
208
208
|
to_date = dates.last
|
209
209
|
params = @params.merge(
|
210
210
|
"from_date" => from_date,
|
211
211
|
"to_date" => to_date,
|
212
212
|
)
|
213
|
-
client = MixpanelApi::Client.new(@api_key, @api_secret)
|
213
|
+
client = MixpanelApi::Client.new(@api_key, @api_secret, @retryer)
|
214
214
|
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
client.export(params)
|
215
|
+
if preview?
|
216
|
+
client.export_for_small_dataset(params)
|
217
|
+
else
|
218
|
+
Enumerator.new do |y|
|
219
|
+
client.export(params) do |record|
|
220
|
+
y << record
|
221
|
+
end
|
220
222
|
end
|
221
223
|
end
|
222
224
|
end
|
@@ -14,6 +14,8 @@ module Embulk
|
|
14
14
|
PING_RETRY_WAIT = 2
|
15
15
|
SMALLSET_BYTE_RANGE = "0-#{5 * 1024 * 1024}"
|
16
16
|
|
17
|
+
attr_reader :retryer
|
18
|
+
|
17
19
|
def self.mixpanel_available?
|
18
20
|
retryer = PerfectRetry.new do |config|
|
19
21
|
config.limit = PING_RETRY_LIMIT
|
@@ -34,31 +36,42 @@ module Embulk
|
|
34
36
|
end
|
35
37
|
end
|
36
38
|
|
37
|
-
def initialize(api_key, api_secret)
|
39
|
+
def initialize(api_key, api_secret, retryer = nil)
|
38
40
|
@api_key = api_key
|
39
41
|
@api_secret = api_secret
|
42
|
+
@retryer = retryer || PerfectRetry.new do |config|
|
43
|
+
# for test
|
44
|
+
config.limit = 0
|
45
|
+
config.dont_rescues = [RuntimeError]
|
46
|
+
config.log_level = nil
|
47
|
+
config.logger = Embulk.logger
|
48
|
+
config.raise_original_error = true
|
49
|
+
end
|
40
50
|
end
|
41
51
|
|
42
|
-
def export(params = {})
|
43
|
-
|
44
|
-
|
52
|
+
def export(params = {}, &block)
|
53
|
+
retryer.with_retry do
|
54
|
+
request(params, &block)
|
55
|
+
end
|
45
56
|
end
|
46
57
|
|
47
|
-
def export_for_small_dataset(params = {}
|
48
|
-
|
49
|
-
|
50
|
-
|
58
|
+
def export_for_small_dataset(params = {})
|
59
|
+
try_to_dates = 5.times.map do |n|
|
60
|
+
# from_date + 1, from_date + 10, from_date + 100, ... so on
|
61
|
+
days = 1 * (10 ** n)
|
62
|
+
Date.parse(params["from_date"].to_s) + days
|
63
|
+
end
|
51
64
|
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
raise ConfigError.new "#{params["from_date"]} + #{days} days has no record. too old date?"
|
65
|
+
try_to_dates.each do |to_date|
|
66
|
+
params["to_date"] = to_date.strftime("%Y-%m-%d")
|
67
|
+
records = retryer.with_retry do
|
68
|
+
request_small_dataset(params, SMALLSET_BYTE_RANGE)
|
57
69
|
end
|
58
|
-
|
59
|
-
|
60
|
-
result
|
70
|
+
next if records.first.nil?
|
71
|
+
return records
|
61
72
|
end
|
73
|
+
|
74
|
+
raise ConfigError.new "#{params["from_date"]}..#{try_to_dates.last} has no record. too old date?"
|
62
75
|
end
|
63
76
|
|
64
77
|
private
|
@@ -72,34 +85,53 @@ module Embulk
|
|
72
85
|
end
|
73
86
|
end
|
74
87
|
|
75
|
-
def request(params,
|
88
|
+
def request(params, &block)
|
76
89
|
# https://mixpanel.com/docs/api-documentation/exporting-raw-data-you-inserted-into-mixpanel
|
77
|
-
params[:expire] ||= Time.now.to_i + TIMEOUT_SECONDS
|
78
|
-
params[:sig] = signature(params)
|
79
90
|
Embulk.logger.debug "Export param: #{params.to_s}"
|
91
|
+
set_signatures(params)
|
80
92
|
|
81
|
-
|
82
|
-
response =
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
res
|
93
|
+
buf = ""
|
94
|
+
response = httpclient.get(ENDPOINT_EXPORT, params) do |chunk|
|
95
|
+
chunk.each_line do |line|
|
96
|
+
begin
|
97
|
+
record = JSON.parse(buf + line)
|
98
|
+
block.call record
|
99
|
+
buf = ""
|
100
|
+
rescue JSON::ParserError => e
|
101
|
+
buf << line
|
91
102
|
end
|
92
|
-
else
|
93
|
-
httpclient.get(ENDPOINT_EXPORT, params)
|
94
103
|
end
|
104
|
+
end
|
105
|
+
handle_error(response)
|
106
|
+
end
|
107
|
+
|
108
|
+
def request_small_dataset(params, range)
|
109
|
+
# guess/preview
|
110
|
+
# Try to fetch first `range` bytes
|
111
|
+
set_signatures(params)
|
112
|
+
res = httpclient.get(ENDPOINT_EXPORT, params, {"Range" => "bytes=#{range}"})
|
113
|
+
if res.code == 416
|
114
|
+
# cannot satisfied requested Range, get full body
|
115
|
+
res = httpclient.get(ENDPOINT_EXPORT, params)
|
116
|
+
end
|
117
|
+
handle_error(res)
|
118
|
+
response_to_enum(res.body)
|
119
|
+
end
|
120
|
+
|
121
|
+
def handle_error(response)
|
95
122
|
Embulk.logger.debug "response code: #{response.code}"
|
96
123
|
case response.code
|
97
124
|
when 400..499
|
98
|
-
raise ConfigError.new response.body
|
125
|
+
raise ConfigError.new("[#{response.code}] #{response.body}")
|
99
126
|
when 500..599
|
100
|
-
raise RuntimeError
|
127
|
+
raise RuntimeError.new("[#{response.code}] #{response.body}")
|
101
128
|
end
|
102
|
-
|
129
|
+
end
|
130
|
+
|
131
|
+
def set_signatures(params)
|
132
|
+
params[:expire] ||= Time.now.to_i + TIMEOUT_SECONDS
|
133
|
+
params[:sig] = signature(params)
|
134
|
+
params
|
103
135
|
end
|
104
136
|
|
105
137
|
def signature(params)
|
@@ -121,6 +153,7 @@ module Embulk
|
|
121
153
|
client = HTTPClient.new
|
122
154
|
client.receive_timeout = TIMEOUT_SECONDS
|
123
155
|
client.default_header = {Accept: "application/json; charset=UTF-8"}
|
156
|
+
# client.debug_dev = STDERR
|
124
157
|
client
|
125
158
|
end
|
126
159
|
end
|
@@ -37,38 +37,17 @@ module Embulk
|
|
37
37
|
@httpclient = HTTPClient.new
|
38
38
|
end
|
39
39
|
|
40
|
-
def test_httpclient
|
41
|
-
stub_response(success_response)
|
42
|
-
mock(@client).httpclient { @httpclient }
|
43
|
-
|
44
|
-
@client.export(params)
|
45
|
-
end
|
46
|
-
|
47
|
-
def test_response_class
|
48
|
-
stub_client
|
49
|
-
stub_response(success_response)
|
50
|
-
|
51
|
-
actual = @client.export(params)
|
52
|
-
|
53
|
-
assert_equal(Enumerator, actual.class)
|
54
|
-
end
|
55
|
-
|
56
|
-
def test_http_request
|
57
|
-
stub_client
|
58
|
-
mock(@httpclient).get(Client::ENDPOINT_EXPORT, params) do
|
59
|
-
success_response
|
60
|
-
end
|
61
|
-
|
62
|
-
@client.export(params)
|
63
|
-
end
|
64
|
-
|
65
40
|
def test_success
|
66
41
|
stub_client
|
42
|
+
stub(@client).set_signatures(anything) {}
|
67
43
|
stub_response(success_response)
|
68
44
|
|
69
|
-
|
45
|
+
records = []
|
46
|
+
@client.export(params) do |record|
|
47
|
+
records << record
|
48
|
+
end
|
70
49
|
|
71
|
-
assert_equal(dummy_responses,
|
50
|
+
assert_equal(dummy_responses, records)
|
72
51
|
end
|
73
52
|
|
74
53
|
def test_failure_with_400
|
@@ -92,16 +71,17 @@ module Embulk
|
|
92
71
|
class ExportSmallDataset < self
|
93
72
|
def test_to_date_after_1_day
|
94
73
|
to = (Date.parse(params["from_date"]) + 1).to_s
|
95
|
-
mock(@client).
|
74
|
+
mock(@client).request_small_dataset(params.merge("to_date" => to), Client::SMALLSET_BYTE_RANGE) { [:foo] }
|
96
75
|
|
97
76
|
@client.export_for_small_dataset(params)
|
98
77
|
end
|
99
78
|
|
100
79
|
def test_to_date_after_1_day_after_10_days_if_empty
|
80
|
+
stub_client
|
101
81
|
to1 = (Date.parse(params["from_date"]) + 1).to_s
|
102
82
|
to2 = (Date.parse(params["from_date"]) + 10).to_s
|
103
|
-
mock(@client).
|
104
|
-
mock(@client).
|
83
|
+
mock(@client).request_small_dataset(params.merge("to_date" => to1), Client::SMALLSET_BYTE_RANGE) { [] }
|
84
|
+
mock(@client).request_small_dataset(params.merge("to_date" => to2), Client::SMALLSET_BYTE_RANGE) { [:foo] }
|
105
85
|
|
106
86
|
@client.export_for_small_dataset(params)
|
107
87
|
end
|
@@ -122,9 +102,12 @@ module Embulk
|
|
122
102
|
end
|
123
103
|
|
124
104
|
def stub_response(response)
|
125
|
-
|
126
|
-
response
|
127
|
-
|
105
|
+
@httpclient.test_loopback_http_response << [
|
106
|
+
"HTTP/1.1 #{response.code}",
|
107
|
+
"Content-Type: application/json",
|
108
|
+
"",
|
109
|
+
response.body
|
110
|
+
].join("\r\n")
|
128
111
|
end
|
129
112
|
|
130
113
|
def success_response
|
@@ -510,17 +510,17 @@ module Embulk
|
|
510
510
|
|
511
511
|
class RunTest < self
|
512
512
|
def setup_client
|
513
|
-
|
514
513
|
any_instance_of(MixpanelApi::Client) do |klass|
|
515
|
-
stub(klass).
|
514
|
+
stub(klass).request_small_dataset { records_raw_response }
|
515
|
+
stub(klass).request { records }
|
516
516
|
end
|
517
517
|
end
|
518
518
|
|
519
519
|
def setup
|
520
520
|
super
|
521
|
-
|
522
521
|
@page_builder = Object.new
|
523
522
|
@plugin = Mixpanel.new(task, nil, nil, @page_builder)
|
523
|
+
stub(@plugin).fetch { records }
|
524
524
|
end
|
525
525
|
|
526
526
|
def test_preview
|
@@ -542,7 +542,7 @@ module Embulk
|
|
542
542
|
def test_timezone
|
543
543
|
stub(@plugin).preview? { false }
|
544
544
|
adjusted = record_epoch - timezone_offset_seconds
|
545
|
-
mock(@page_builder).add(["FOO", adjusted]).times(records.length * 2)
|
545
|
+
mock(@page_builder).add(["FOO", adjusted, "event"]).times(records.length * 2)
|
546
546
|
mock(@page_builder).finish
|
547
547
|
|
548
548
|
@plugin.run
|
@@ -600,13 +600,14 @@ module Embulk
|
|
600
600
|
|
601
601
|
class UnknownColumnsTest < self
|
602
602
|
def setup
|
603
|
-
super
|
604
603
|
@page_builder = Object.new
|
605
604
|
@plugin = Mixpanel.new(task, nil, nil, @page_builder)
|
605
|
+
stub(@plugin).fetch { records }
|
606
606
|
end
|
607
607
|
|
608
608
|
def test_run
|
609
|
-
Embulk.logger.warn
|
609
|
+
stub(Embulk.logger).warn
|
610
|
+
stub(Embulk.logger).info
|
610
611
|
stub(@plugin).preview? { false }
|
611
612
|
|
612
613
|
# NOTE: Expect records are contained same record
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-input-mixpanel
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.4.
|
4
|
+
version: 0.4.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- yoshihara
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2016-
|
12
|
+
date: 2016-09-02 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
@@ -44,7 +44,7 @@ dependencies:
|
|
44
44
|
requirements:
|
45
45
|
- - "~>"
|
46
46
|
- !ruby/object:Gem::Version
|
47
|
-
version: '0.
|
47
|
+
version: '0.5'
|
48
48
|
name: perfect_retry
|
49
49
|
prerelease: false
|
50
50
|
type: :runtime
|
@@ -52,7 +52,7 @@ dependencies:
|
|
52
52
|
requirements:
|
53
53
|
- - "~>"
|
54
54
|
- !ruby/object:Gem::Version
|
55
|
-
version: '0.
|
55
|
+
version: '0.5'
|
56
56
|
- !ruby/object:Gem::Dependency
|
57
57
|
requirement: !ruby/object:Gem::Requirement
|
58
58
|
requirements:
|