embulk-input-mixpanel 0.4.3 → 0.4.4

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 82bac0d9e5e93a4fc606b3d43f1e9cd35debf4b3
4
- data.tar.gz: 82e5c6323b3d9db15ff54589b572b642cb6d93e1
3
+ metadata.gz: f5cb5156064ae0192f6d1d2321a173cd12ef5fd3
4
+ data.tar.gz: 2a2f3a63e55035bb8dd96a8edf27eb45e546866d
5
5
  SHA512:
6
- metadata.gz: e606300ab082a98aa443432a0b4f4246c40a942ca7724bb68679d66a889fce60cb36eb95b7dcf59e7a35181bb3175efc785cb3710d7764b58d800209589a140d
7
- data.tar.gz: abf1f2528faf56e7c6e65d72d4f8e34a6beebda7f93de1eb16bd93130de2c4b647cf716053ec039a7ddd8b1fdbfa9c62864bb280334630b53af1f15db6137cd3
6
+ metadata.gz: 48d7e7af0b5ad28fc030e7c1baaa4c34fd236b60fb34df7e8cc1b764825c143aadcab7725d4d78afe3b1ba0ffb05067418217872d79cc0a1d00af7440e7abd90
7
+ data.tar.gz: b001c794683c39d69ae6b29782988ee3c64ea17a9449d29fb7d86342b144854116116b1928946f33691eaaf0f3ed6d711f985fbe7e4a414c7579a3fd6baf38ec
data/CHANGELOG.md CHANGED
@@ -1,3 +1,6 @@
1
+ ## 0.4.4 - 2016-09-02
2
+ * [enhancement] Reduce memory usage by streaming processing [#42](https://github.com/treasure-data/embulk-input-mixpanel/pull/42)
3
+
1
4
  ## 0.4.3 - 2016-03-16
2
5
  * [enhancement] Custom properties json [#40](https://github.com/treasure-data/embulk-input-mixpanel/pull/40)
3
6
 
@@ -1,7 +1,7 @@
1
1
 
2
2
  Gem::Specification.new do |spec|
3
3
  spec.name = "embulk-input-mixpanel"
4
- spec.version = "0.4.3"
4
+ spec.version = "0.4.4"
5
5
  spec.authors = ["yoshihara", "uu59"]
6
6
  spec.summary = "Mixpanel input plugin for Embulk"
7
7
  spec.description = "Loads records from Mixpanel."
@@ -15,7 +15,7 @@ Gem::Specification.new do |spec|
15
15
 
16
16
  spec.add_dependency 'httpclient'
17
17
  spec.add_dependency 'tzinfo'
18
- spec.add_dependency 'perfect_retry', ["~> 0.3"]
18
+ spec.add_dependency 'perfect_retry', ["~> 0.5"]
19
19
  spec.add_development_dependency 'bundler', ['~> 1.0']
20
20
  spec.add_development_dependency 'rake', ['>= 10.0']
21
21
  spec.add_development_dependency 'embulk', ['>= 0.8.6', '< 1.0']
@@ -203,20 +203,22 @@ module Embulk
203
203
  end
204
204
  end
205
205
 
206
- def fetch(dates)
206
+ def fetch(dates, &block)
207
207
  from_date = dates.first
208
208
  to_date = dates.last
209
209
  params = @params.merge(
210
210
  "from_date" => from_date,
211
211
  "to_date" => to_date,
212
212
  )
213
- client = MixpanelApi::Client.new(@api_key, @api_secret)
213
+ client = MixpanelApi::Client.new(@api_key, @api_secret, @retryer)
214
214
 
215
- @retryer.with_retry do
216
- if preview?
217
- client.export_for_small_dataset(params)
218
- else
219
- client.export(params)
215
+ if preview?
216
+ client.export_for_small_dataset(params)
217
+ else
218
+ Enumerator.new do |y|
219
+ client.export(params) do |record|
220
+ y << record
221
+ end
220
222
  end
221
223
  end
222
224
  end
@@ -14,6 +14,8 @@ module Embulk
14
14
  PING_RETRY_WAIT = 2
15
15
  SMALLSET_BYTE_RANGE = "0-#{5 * 1024 * 1024}"
16
16
 
17
+ attr_reader :retryer
18
+
17
19
  def self.mixpanel_available?
18
20
  retryer = PerfectRetry.new do |config|
19
21
  config.limit = PING_RETRY_LIMIT
@@ -34,31 +36,42 @@ module Embulk
34
36
  end
35
37
  end
36
38
 
37
- def initialize(api_key, api_secret)
39
+ def initialize(api_key, api_secret, retryer = nil)
38
40
  @api_key = api_key
39
41
  @api_secret = api_secret
42
+ @retryer = retryer || PerfectRetry.new do |config|
43
+ # for test
44
+ config.limit = 0
45
+ config.dont_rescues = [RuntimeError]
46
+ config.log_level = nil
47
+ config.logger = Embulk.logger
48
+ config.raise_original_error = true
49
+ end
40
50
  end
41
51
 
42
- def export(params = {})
43
- body = request(params)
44
- response_to_enum(body)
52
+ def export(params = {}, &block)
53
+ retryer.with_retry do
54
+ request(params, &block)
55
+ end
45
56
  end
46
57
 
47
- def export_for_small_dataset(params = {}, times = 0)
48
- days = (1 * (10 ** times))
49
- to_date = Date.parse(params["from_date"].to_s) + days
50
- params["to_date"] = to_date.strftime("%Y-%m-%d")
58
+ def export_for_small_dataset(params = {})
59
+ try_to_dates = 5.times.map do |n|
60
+ # from_date + 1, from_date + 10, from_date + 100, ... so on
61
+ days = 1 * (10 ** n)
62
+ Date.parse(params["from_date"].to_s) + days
63
+ end
51
64
 
52
- body = request(params, SMALLSET_BYTE_RANGE)
53
- result = response_to_enum(body)
54
- if result.first.nil?
55
- if times >= 5
56
- raise ConfigError.new "#{params["from_date"]} + #{days} days has no record. too old date?"
65
+ try_to_dates.each do |to_date|
66
+ params["to_date"] = to_date.strftime("%Y-%m-%d")
67
+ records = retryer.with_retry do
68
+ request_small_dataset(params, SMALLSET_BYTE_RANGE)
57
69
  end
58
- export_for_small_dataset(params, times + 1)
59
- else
60
- result
70
+ next if records.first.nil?
71
+ return records
61
72
  end
73
+
74
+ raise ConfigError.new "#{params["from_date"]}..#{try_to_dates.last} has no record. too old date?"
62
75
  end
63
76
 
64
77
  private
@@ -72,34 +85,53 @@ module Embulk
72
85
  end
73
86
  end
74
87
 
75
- def request(params, range = nil)
88
+ def request(params, &block)
76
89
  # https://mixpanel.com/docs/api-documentation/exporting-raw-data-you-inserted-into-mixpanel
77
- params[:expire] ||= Time.now.to_i + TIMEOUT_SECONDS
78
- params[:sig] = signature(params)
79
90
  Embulk.logger.debug "Export param: #{params.to_s}"
91
+ set_signatures(params)
80
92
 
81
- headers = {}
82
- response =
83
- if range
84
- # guess/preview
85
- res = httpclient.get(ENDPOINT_EXPORT, params, {"Range" => "bytes=#{range}"})
86
- if res.code == 416
87
- # cannot satisfied requested Range, get full body
88
- httpclient.get(ENDPOINT_EXPORT, params)
89
- else
90
- res
93
+ buf = ""
94
+ response = httpclient.get(ENDPOINT_EXPORT, params) do |chunk|
95
+ chunk.each_line do |line|
96
+ begin
97
+ record = JSON.parse(buf + line)
98
+ block.call record
99
+ buf = ""
100
+ rescue JSON::ParserError => e
101
+ buf << line
91
102
  end
92
- else
93
- httpclient.get(ENDPOINT_EXPORT, params)
94
103
  end
104
+ end
105
+ handle_error(response)
106
+ end
107
+
108
+ def request_small_dataset(params, range)
109
+ # guess/preview
110
+ # Try to fetch first `range` bytes
111
+ set_signatures(params)
112
+ res = httpclient.get(ENDPOINT_EXPORT, params, {"Range" => "bytes=#{range}"})
113
+ if res.code == 416
114
+ # cannot satisfied requested Range, get full body
115
+ res = httpclient.get(ENDPOINT_EXPORT, params)
116
+ end
117
+ handle_error(res)
118
+ response_to_enum(res.body)
119
+ end
120
+
121
+ def handle_error(response)
95
122
  Embulk.logger.debug "response code: #{response.code}"
96
123
  case response.code
97
124
  when 400..499
98
- raise ConfigError.new response.body
125
+ raise ConfigError.new("[#{response.code}] #{response.body}")
99
126
  when 500..599
100
- raise RuntimeError, response.body
127
+ raise RuntimeError.new("[#{response.code}] #{response.body}")
101
128
  end
102
- response.body
129
+ end
130
+
131
+ def set_signatures(params)
132
+ params[:expire] ||= Time.now.to_i + TIMEOUT_SECONDS
133
+ params[:sig] = signature(params)
134
+ params
103
135
  end
104
136
 
105
137
  def signature(params)
@@ -121,6 +153,7 @@ module Embulk
121
153
  client = HTTPClient.new
122
154
  client.receive_timeout = TIMEOUT_SECONDS
123
155
  client.default_header = {Accept: "application/json; charset=UTF-8"}
156
+ # client.debug_dev = STDERR
124
157
  client
125
158
  end
126
159
  end
@@ -37,38 +37,17 @@ module Embulk
37
37
  @httpclient = HTTPClient.new
38
38
  end
39
39
 
40
- def test_httpclient
41
- stub_response(success_response)
42
- mock(@client).httpclient { @httpclient }
43
-
44
- @client.export(params)
45
- end
46
-
47
- def test_response_class
48
- stub_client
49
- stub_response(success_response)
50
-
51
- actual = @client.export(params)
52
-
53
- assert_equal(Enumerator, actual.class)
54
- end
55
-
56
- def test_http_request
57
- stub_client
58
- mock(@httpclient).get(Client::ENDPOINT_EXPORT, params) do
59
- success_response
60
- end
61
-
62
- @client.export(params)
63
- end
64
-
65
40
  def test_success
66
41
  stub_client
42
+ stub(@client).set_signatures(anything) {}
67
43
  stub_response(success_response)
68
44
 
69
- actual = @client.export(params)
45
+ records = []
46
+ @client.export(params) do |record|
47
+ records << record
48
+ end
70
49
 
71
- assert_equal(dummy_responses, actual.to_a)
50
+ assert_equal(dummy_responses, records)
72
51
  end
73
52
 
74
53
  def test_failure_with_400
@@ -92,16 +71,17 @@ module Embulk
92
71
  class ExportSmallDataset < self
93
72
  def test_to_date_after_1_day
94
73
  to = (Date.parse(params["from_date"]) + 1).to_s
95
- mock(@client).request(params.merge("to_date" => to), Client::SMALLSET_BYTE_RANGE) { jsonl_dummy_responses }
74
+ mock(@client).request_small_dataset(params.merge("to_date" => to), Client::SMALLSET_BYTE_RANGE) { [:foo] }
96
75
 
97
76
  @client.export_for_small_dataset(params)
98
77
  end
99
78
 
100
79
  def test_to_date_after_1_day_after_10_days_if_empty
80
+ stub_client
101
81
  to1 = (Date.parse(params["from_date"]) + 1).to_s
102
82
  to2 = (Date.parse(params["from_date"]) + 10).to_s
103
- mock(@client).request(params.merge("to_date" => to1), Client::SMALLSET_BYTE_RANGE) { "" }
104
- mock(@client).request(params.merge("to_date" => to2), Client::SMALLSET_BYTE_RANGE) { jsonl_dummy_responses }
83
+ mock(@client).request_small_dataset(params.merge("to_date" => to1), Client::SMALLSET_BYTE_RANGE) { [] }
84
+ mock(@client).request_small_dataset(params.merge("to_date" => to2), Client::SMALLSET_BYTE_RANGE) { [:foo] }
105
85
 
106
86
  @client.export_for_small_dataset(params)
107
87
  end
@@ -122,9 +102,12 @@ module Embulk
122
102
  end
123
103
 
124
104
  def stub_response(response)
125
- stub(@httpclient).get(Client::ENDPOINT_EXPORT, params) do
126
- response
127
- end
105
+ @httpclient.test_loopback_http_response << [
106
+ "HTTP/1.1 #{response.code}",
107
+ "Content-Type: application/json",
108
+ "",
109
+ response.body
110
+ ].join("\r\n")
128
111
  end
129
112
 
130
113
  def success_response
@@ -510,17 +510,17 @@ module Embulk
510
510
 
511
511
  class RunTest < self
512
512
  def setup_client
513
-
514
513
  any_instance_of(MixpanelApi::Client) do |klass|
515
- stub(klass).request { records_raw_response }
514
+ stub(klass).request_small_dataset { records_raw_response }
515
+ stub(klass).request { records }
516
516
  end
517
517
  end
518
518
 
519
519
  def setup
520
520
  super
521
-
522
521
  @page_builder = Object.new
523
522
  @plugin = Mixpanel.new(task, nil, nil, @page_builder)
523
+ stub(@plugin).fetch { records }
524
524
  end
525
525
 
526
526
  def test_preview
@@ -542,7 +542,7 @@ module Embulk
542
542
  def test_timezone
543
543
  stub(@plugin).preview? { false }
544
544
  adjusted = record_epoch - timezone_offset_seconds
545
- mock(@page_builder).add(["FOO", adjusted]).times(records.length * 2)
545
+ mock(@page_builder).add(["FOO", adjusted, "event"]).times(records.length * 2)
546
546
  mock(@page_builder).finish
547
547
 
548
548
  @plugin.run
@@ -600,13 +600,14 @@ module Embulk
600
600
 
601
601
  class UnknownColumnsTest < self
602
602
  def setup
603
- super
604
603
  @page_builder = Object.new
605
604
  @plugin = Mixpanel.new(task, nil, nil, @page_builder)
605
+ stub(@plugin).fetch { records }
606
606
  end
607
607
 
608
608
  def test_run
609
- Embulk.logger.warn(anything)
609
+ stub(Embulk.logger).warn
610
+ stub(Embulk.logger).info
610
611
  stub(@plugin).preview? { false }
611
612
 
612
613
  # NOTE: Expect records are contained same record
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-input-mixpanel
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.3
4
+ version: 0.4.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - yoshihara
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2016-03-16 00:00:00.000000000 Z
12
+ date: 2016-09-02 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  requirement: !ruby/object:Gem::Requirement
@@ -44,7 +44,7 @@ dependencies:
44
44
  requirements:
45
45
  - - "~>"
46
46
  - !ruby/object:Gem::Version
47
- version: '0.3'
47
+ version: '0.5'
48
48
  name: perfect_retry
49
49
  prerelease: false
50
50
  type: :runtime
@@ -52,7 +52,7 @@ dependencies:
52
52
  requirements:
53
53
  - - "~>"
54
54
  - !ruby/object:Gem::Version
55
- version: '0.3'
55
+ version: '0.5'
56
56
  - !ruby/object:Gem::Dependency
57
57
  requirement: !ruby/object:Gem::Requirement
58
58
  requirements: