embulk-input-zendesk 0.2.12 → 0.2.13

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 873a74eb71d079359c4e2134e08bd0b9972477a6
4
- data.tar.gz: 6a8c70921ab79299a8e10d77dd9bcb034033384c
3
+ metadata.gz: e0911f65a242c33edcf81953f2546825701b2a4c
4
+ data.tar.gz: 34b91761ad01712a8ba0bb6a9c258d9555dd5a65
5
5
  SHA512:
6
- metadata.gz: baa602ab6ed0a7c33ee34fe7d200f9ea2680c1a41f5c2503ea94fc0e32f69b14571f2e45e7e85a548c06741daa63ccffa227953502add3e2503d76cbb9813f4d
7
- data.tar.gz: 6aae5b2d71285613c3e89bba77244385b2854afe60c4e12104f098954f7ad82ba30cd2604fa77f0e8483391fd996cc3094ccaaf89627495b00cf459a49b57a09
6
+ metadata.gz: 85e63afc0356899056627249dda852e1a9fe0af9de006335b86d89c01bc53ac97d93a8974f82c27ce69041ad22a40ca271e85d77853d7378cc425198657d6224
7
+ data.tar.gz: 01e8c2238dd8aa341f3560ed1c8079f0d68454e3a9964137483cedaf625bbec42a4c9bb6ae1cd3fefcdaa9fe4a68896b049150968efe8c4782ac06e943034d88
data/CHANGELOG.md CHANGED
@@ -1,3 +1,6 @@
1
+ ## 0.2.13 - 2019-01-14
2
+ * [enhancement] Add `dedup` option, in order to avoid OOM when importing large dataset [#48](https://github.com/treasure-data/embulk-input-zendesk/pull/48)
3
+
1
4
  ## 0.2.12 - 2019-01-04
2
5
  * [enhancement] Fix performance issue [#47](https://github.com/treasure-data/embulk-input-zendesk/pull/47)
3
6
 
data/README.md CHANGED
@@ -31,7 +31,8 @@ Required Embulk version >= 0.8.1.
31
31
  - **start_time**: Start export from this time if present. (string, default: `null`)
32
32
  - **retry_limit**: Try to retry this times (integer, default: 5)
33
33
  - **retry_initial_wait_sec**: Wait seconds for exponential backoff initial value (integer, default: 4)
34
- - **incremental**: If false, `start_time` in next.yml would not be updated that means you always fetch all of data from Zendesk with statically conditions. If true, `start_time` would be updated in next.yml. (bool, default: true)
34
+ - **incremental**: If false, `start_time` in next.yml would not be updated that means you always fetch all of data from Zendesk with statically conditions. If true, `start_time` would be updated in next.yml. (bool, default: `true`)
35
+ - **dedup**: Zendesk incremental API is not designed to protect against duplication. In order to de-dup records, plugin has to cache fetched IDs in memory. If you're importing a large dataset (eg. tens of millions of records), it can lead to OOM error, depends on your configured heap size. In such cases, you can set this option to `false`, but keep in mind that result may contain duplicated records. (bool, default: `true`)
35
36
  - **app_marketplace_integration_name**: Invisible to user, only requires to be a part of the Zendesk Apps Marketplace. This should be used to name of the integration.
36
37
  - **app_marketplace_org_id**: Invisible to user, only requires to be a part of the Zendesk Apps Marketplace. This should be the Organization ID for your organization from the new developer portal.
37
38
  - **app_marketplace_app_id**: Invisible to user, only requires to be a part of the Zendesk Apps Marketplace. This is the “App ID” that will be assigned to you when you submit your app.
@@ -1,7 +1,7 @@
1
1
 
2
2
  Gem::Specification.new do |spec|
3
3
  spec.name = "embulk-input-zendesk"
4
- spec.version = "0.2.12"
4
+ spec.version = "0.2.13"
5
5
  spec.authors = ["uu59", "muga", "sakama"]
6
6
  spec.summary = "Zendesk input plugin for Embulk"
7
7
  spec.description = "Loads records from Zendesk."
@@ -82,23 +82,23 @@ module Embulk
82
82
  # they have both Incremental API and non-incremental API
83
83
  # 170717: `ticket_events` can use standard endpoint format now, ie. `<target>.json`
84
84
  %w(tickets ticket_events users organizations).each do |target|
85
- define_method(target) do |partial = true, start_time = 0, &block|
85
+ define_method(target) do |partial = true, start_time = 0, dedup = true, &block|
86
86
  # Always use incremental_export. There is some difference between incremental_export and export.
87
- incremental_export("/api/v2/incremental/#{target}.json", target, start_time, Set.new, partial, &block)
87
+ incremental_export("/api/v2/incremental/#{target}.json", target, start_time, dedup, Set.new, partial, &block)
88
88
  end
89
89
  end
90
90
 
91
91
  # Ticket metrics will need to be export using both the non incremental and incremental on ticket
92
92
  # We provide support by filter out ticket_metrics with created at smaller than start time
93
93
  # while passing the incremental start time to the incremental ticket/ticket_metrics export
94
- define_method('ticket_metrics') do |partial = true, start_time = 0, &block|
94
+ define_method('ticket_metrics') do |partial = true, start_time = 0, dedup = true, &block|
95
95
  if partial
96
96
  # If partial export then we need to use the old end point. Since new end point return both ticket and
97
97
  # ticket metric with ticket come first so the current approach that cut off the response packet won't work
98
98
  # Since partial is only use for preview and guess so this should be fine
99
99
  export('/api/v2/ticket_metrics.json', 'ticket_metrics', &block)
100
100
  else
101
- incremental_export('/api/v2/incremental/tickets.json', 'metric_sets', start_time, Set.new, partial, { include: 'metric_sets' }, &block)
101
+ incremental_export('/api/v2/incremental/tickets.json', 'metric_sets', start_time, dedup, Set.new, partial, { include: 'metric_sets' }, &block)
102
102
  end
103
103
  end
104
104
 
@@ -175,7 +175,7 @@ module Embulk
175
175
  end
176
176
  end
177
177
 
178
- def incremental_export(path, key, start_time = 0, known_ids = Set.new, partial = true, query = {}, &block)
178
+ def incremental_export(path, key, start_time = 0, dedup = true, known_ids = Set.new, partial = true, query = {}, &block)
179
179
  if partial
180
180
  records = request_partial(path, query.merge(start_time: start_time)).first(5)
181
181
  records.uniq{|r| r["id"]}.each do |record|
@@ -184,6 +184,10 @@ module Embulk
184
184
  return
185
185
  end
186
186
 
187
+ if !dedup
188
+ Embulk.logger.warn("!!! You've selected to skip de-duplicating records, result may contain duplicated data !!!")
189
+ end
190
+
187
191
  execute_thread_pool do |pool|
188
192
  loop do
189
193
  start_fetching = Time.now
@@ -208,9 +212,11 @@ module Embulk
208
212
  # de-duplicated records.
209
213
  # https://developer.zendesk.com/rest_api/docs/core/incremental_export#usage-notes
210
214
  # https://github.com/zendesk/zendesk_api_client_rb/issues/251
211
- next if known_ids.include?(record["id"])
215
+ if dedup
216
+ next if known_ids.include?(record["id"])
217
+ known_ids << record["id"]
218
+ end
212
219
 
213
- known_ids << record["id"]
214
220
  pool.post { block.call record }
215
221
  actual_fetched += 1
216
222
  end
@@ -94,6 +94,7 @@ module Embulk
94
94
  retry_limit: config.param("retry_limit", :integer, default: 5),
95
95
  retry_initial_wait_sec: config.param("retry_initial_wait_sec", :integer, default: 4),
96
96
  incremental: config.param("incremental", :bool, default: true),
97
+ dedup: config.param("dedup", :bool, default: true),
97
98
  schema: config.param(:columns, :array, default: []),
98
99
  includes: config.param(:includes, :array, default: []),
99
100
  app_marketplace_integration_name: config.param("app_marketplace_integration_name", :string, default: nil),
@@ -109,8 +110,11 @@ module Embulk
109
110
  def run
110
111
  method = task[:target]
111
112
  args = [preview?]
112
- if @start_time
113
- args << @start_time.to_i
113
+ args << (@start_time || 0).to_i
114
+
115
+ # de-dup may lead to OOM
116
+ if !task[:dedup].nil? && !task[:dedup]
117
+ args << false
114
118
  end
115
119
 
116
120
  mutex = Mutex.new
@@ -196,6 +196,28 @@ module Embulk
196
196
  assert_equal(2,counter.value)
197
197
  end
198
198
 
199
+ test "allows to fetch tickets metrics *with* duplicated" do
200
+ records = [
201
+ {"id" => 1, "ticket_id" => 100},
202
+ {"id" => 2, "ticket_id" => 200},
203
+ {"id" => 1, "ticket_id" => 100},
204
+ {"id" => 1, "ticket_id" => 100},
205
+ ]
206
+ @httpclient.test_loopback_http_response << [
207
+ "HTTP/1.1 200",
208
+ "Content-Type: application/json",
209
+ "",
210
+ {
211
+ metric_sets: records,
212
+ count: records.length,
213
+ }.to_json
214
+ ].join("\r\n")
215
+ counter = Concurrent::AtomicFixnum.new(0)
216
+ handler = proc {counter.increment}
217
+ client.ticket_metrics(false, 0, false, &handler)
218
+ assert_equal(4,counter.value)
219
+ end
220
+
199
221
  test "fetch ticket_metrics with next_page" do
200
222
  end_time = 1488535542
201
223
  response_1 = [
@@ -259,12 +281,12 @@ module Embulk
259
281
 
260
282
  sub_test_case "ticket_events" do
261
283
  test "invoke incremental_export when partial=true" do
262
- mock(client).incremental_export(anything, "ticket_events", anything, Set.new, true)
284
+ mock(client).incremental_export(anything, "ticket_events", anything, true, Set.new, true)
263
285
  client.ticket_events(true)
264
286
  end
265
287
 
266
288
  test "invoke incremental_export when partial=false" do
267
- mock(client).incremental_export(anything, "ticket_events", anything, Set.new, false)
289
+ mock(client).incremental_export(anything, "ticket_events", anything, true, Set.new, false)
268
290
  client.ticket_events(false)
269
291
  end
270
292
  end
@@ -340,7 +340,7 @@ module Embulk
340
340
 
341
341
  test "call tickets method instead of ticket_all" do
342
342
  mock(@client).export.never
343
- mock(@client).incremental_export(anything, "tickets", anything, anything, anything) { [] }
343
+ mock(@client).incremental_export(anything, "tickets", anything, anything, anything, anything) { [] }
344
344
  mock(page_builder).finish
345
345
 
346
346
  @plugin.run
@@ -379,7 +379,7 @@ module Embulk
379
379
 
380
380
  test "call ticket_all method instead of tickets" do
381
381
  mock(@client).export.never
382
- mock(@client).incremental_export(anything, "tickets", 0, Set.new, false) { [] }
382
+ mock(@client).incremental_export(anything, "tickets", 0, true, Set.new, false) { [] }
383
383
  mock(page_builder).finish
384
384
 
385
385
  @plugin.run
@@ -544,7 +544,7 @@ module Embulk
544
544
  test "Nothing passed to client" do
545
545
  stub(page_builder).finish
546
546
 
547
- mock(@client).tickets(false)
547
+ mock(@client).tickets(false, 0)
548
548
  @plugin.run
549
549
  end
550
550
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-input-zendesk
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.12
4
+ version: 0.2.13
5
5
  platform: ruby
6
6
  authors:
7
7
  - uu59
@@ -10,7 +10,7 @@ authors:
10
10
  autorequire:
11
11
  bindir: bin
12
12
  cert_chain: []
13
- date: 2019-01-07 00:00:00.000000000 Z
13
+ date: 2019-01-14 00:00:00.000000000 Z
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
16
16
  requirement: !ruby/object:Gem::Requirement