embulk-input-zendesk 0.2.12 → 0.2.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 873a74eb71d079359c4e2134e08bd0b9972477a6
4
- data.tar.gz: 6a8c70921ab79299a8e10d77dd9bcb034033384c
3
+ metadata.gz: e0911f65a242c33edcf81953f2546825701b2a4c
4
+ data.tar.gz: 34b91761ad01712a8ba0bb6a9c258d9555dd5a65
5
5
  SHA512:
6
- metadata.gz: baa602ab6ed0a7c33ee34fe7d200f9ea2680c1a41f5c2503ea94fc0e32f69b14571f2e45e7e85a548c06741daa63ccffa227953502add3e2503d76cbb9813f4d
7
- data.tar.gz: 6aae5b2d71285613c3e89bba77244385b2854afe60c4e12104f098954f7ad82ba30cd2604fa77f0e8483391fd996cc3094ccaaf89627495b00cf459a49b57a09
6
+ metadata.gz: 85e63afc0356899056627249dda852e1a9fe0af9de006335b86d89c01bc53ac97d93a8974f82c27ce69041ad22a40ca271e85d77853d7378cc425198657d6224
7
+ data.tar.gz: 01e8c2238dd8aa341f3560ed1c8079f0d68454e3a9964137483cedaf625bbec42a4c9bb6ae1cd3fefcdaa9fe4a68896b049150968efe8c4782ac06e943034d88
data/CHANGELOG.md CHANGED
@@ -1,3 +1,6 @@
1
+ ## 0.2.13 - 2019-01-14
2
+ * [enhancement] Add `dedup` option, in order to avoid OOM when importing large dataset [#48](https://github.com/treasure-data/embulk-input-zendesk/pull/48)
3
+
1
4
  ## 0.2.12 - 2019-01-04
2
5
  * [enhancement] Fix performance issue [#47](https://github.com/treasure-data/embulk-input-zendesk/pull/47)
3
6
 
data/README.md CHANGED
@@ -31,7 +31,8 @@ Required Embulk version >= 0.8.1.
31
31
  - **start_time**: Start export from this time if present. (string, default: `null`)
32
32
  - **retry_limit**: Try to retry this times (integer, default: 5)
33
33
  - **retry_initial_wait_sec**: Wait seconds for exponential backoff initial value (integer, default: 4)
34
- - **incremental**: If false, `start_time` in next.yml would not be updated that means you always fetch all of data from Zendesk with statically conditions. If true, `start_time` would be updated in next.yml. (bool, default: true)
34
+ - **incremental**: If false, `start_time` in next.yml would not be updated that means you always fetch all of data from Zendesk with statically conditions. If true, `start_time` would be updated in next.yml. (bool, default: `true`)
35
+ - **dedup**: Zendesk incremental API is not designed to protect against duplication. In order to de-dup records, plugin has to cache fetched IDs in memory. If you're importing a large dataset (eg. tens of millions of records), it can lead to OOM error, depends on your configured heap size. In such cases, you can set this option to `false`, but keep in mind that result may contain duplicated records. (bool, default: `true`)
35
36
  - **app_marketplace_integration_name**: Invisible to user, only requires to be a part of the Zendesk Apps Marketplace. This should be used to name of the integration.
36
37
  - **app_marketplace_org_id**: Invisible to user, only requires to be a part of the Zendesk Apps Marketplace. This should be the Organization ID for your organization from the new developer portal.
37
38
  - **app_marketplace_app_id**: Invisible to user, only requires to be a part of the Zendesk Apps Marketplace. This is the “App ID” that will be assigned to you when you submit your app.
@@ -1,7 +1,7 @@
1
1
 
2
2
  Gem::Specification.new do |spec|
3
3
  spec.name = "embulk-input-zendesk"
4
- spec.version = "0.2.12"
4
+ spec.version = "0.2.13"
5
5
  spec.authors = ["uu59", "muga", "sakama"]
6
6
  spec.summary = "Zendesk input plugin for Embulk"
7
7
  spec.description = "Loads records from Zendesk."
@@ -82,23 +82,23 @@ module Embulk
82
82
  # they have both Incremental API and non-incremental API
83
83
  # 170717: `ticket_events` can use standard endpoint format now, ie. `<target>.json`
84
84
  %w(tickets ticket_events users organizations).each do |target|
85
- define_method(target) do |partial = true, start_time = 0, &block|
85
+ define_method(target) do |partial = true, start_time = 0, dedup = true, &block|
86
86
  # Always use incremental_export. There is some difference between incremental_export and export.
87
- incremental_export("/api/v2/incremental/#{target}.json", target, start_time, Set.new, partial, &block)
87
+ incremental_export("/api/v2/incremental/#{target}.json", target, start_time, dedup, Set.new, partial, &block)
88
88
  end
89
89
  end
90
90
 
91
91
  # Ticket metrics will need to be export using both the non incremental and incremental on ticket
92
92
  # We provide support by filter out ticket_metrics with created at smaller than start time
93
93
  # while passing the incremental start time to the incremental ticket/ticket_metrics export
94
- define_method('ticket_metrics') do |partial = true, start_time = 0, &block|
94
+ define_method('ticket_metrics') do |partial = true, start_time = 0, dedup = true, &block|
95
95
  if partial
96
96
  # If partial export then we need to use the old end point. Since new end point return both ticket and
97
97
  # ticket metric with ticket come first so the current approach that cut off the response packet won't work
98
98
  # Since partial is only use for preview and guess so this should be fine
99
99
  export('/api/v2/ticket_metrics.json', 'ticket_metrics', &block)
100
100
  else
101
- incremental_export('/api/v2/incremental/tickets.json', 'metric_sets', start_time, Set.new, partial, { include: 'metric_sets' }, &block)
101
+ incremental_export('/api/v2/incremental/tickets.json', 'metric_sets', start_time, dedup, Set.new, partial, { include: 'metric_sets' }, &block)
102
102
  end
103
103
  end
104
104
 
@@ -175,7 +175,7 @@ module Embulk
175
175
  end
176
176
  end
177
177
 
178
- def incremental_export(path, key, start_time = 0, known_ids = Set.new, partial = true, query = {}, &block)
178
+ def incremental_export(path, key, start_time = 0, dedup = true, known_ids = Set.new, partial = true, query = {}, &block)
179
179
  if partial
180
180
  records = request_partial(path, query.merge(start_time: start_time)).first(5)
181
181
  records.uniq{|r| r["id"]}.each do |record|
@@ -184,6 +184,10 @@ module Embulk
184
184
  return
185
185
  end
186
186
 
187
+ if !dedup
188
+ Embulk.logger.warn("!!! You've selected to skip de-duplicating records, result may contain duplicated data !!!")
189
+ end
190
+
187
191
  execute_thread_pool do |pool|
188
192
  loop do
189
193
  start_fetching = Time.now
@@ -208,9 +212,11 @@ module Embulk
208
212
  # de-duplicated records.
209
213
  # https://developer.zendesk.com/rest_api/docs/core/incremental_export#usage-notes
210
214
  # https://github.com/zendesk/zendesk_api_client_rb/issues/251
211
- next if known_ids.include?(record["id"])
215
+ if dedup
216
+ next if known_ids.include?(record["id"])
217
+ known_ids << record["id"]
218
+ end
212
219
 
213
- known_ids << record["id"]
214
220
  pool.post { block.call record }
215
221
  actual_fetched += 1
216
222
  end
@@ -94,6 +94,7 @@ module Embulk
94
94
  retry_limit: config.param("retry_limit", :integer, default: 5),
95
95
  retry_initial_wait_sec: config.param("retry_initial_wait_sec", :integer, default: 4),
96
96
  incremental: config.param("incremental", :bool, default: true),
97
+ dedup: config.param("dedup", :bool, default: true),
97
98
  schema: config.param(:columns, :array, default: []),
98
99
  includes: config.param(:includes, :array, default: []),
99
100
  app_marketplace_integration_name: config.param("app_marketplace_integration_name", :string, default: nil),
@@ -109,8 +110,11 @@ module Embulk
109
110
  def run
110
111
  method = task[:target]
111
112
  args = [preview?]
112
- if @start_time
113
- args << @start_time.to_i
113
+ args << (@start_time || 0).to_i
114
+
115
+ # de-dup may lead to OOM
116
+ if !task[:dedup].nil? && !task[:dedup]
117
+ args << false
114
118
  end
115
119
 
116
120
  mutex = Mutex.new
@@ -196,6 +196,28 @@ module Embulk
196
196
  assert_equal(2,counter.value)
197
197
  end
198
198
 
199
+ test "allows to fetch tickets metrics *with* duplicated" do
200
+ records = [
201
+ {"id" => 1, "ticket_id" => 100},
202
+ {"id" => 2, "ticket_id" => 200},
203
+ {"id" => 1, "ticket_id" => 100},
204
+ {"id" => 1, "ticket_id" => 100},
205
+ ]
206
+ @httpclient.test_loopback_http_response << [
207
+ "HTTP/1.1 200",
208
+ "Content-Type: application/json",
209
+ "",
210
+ {
211
+ metric_sets: records,
212
+ count: records.length,
213
+ }.to_json
214
+ ].join("\r\n")
215
+ counter = Concurrent::AtomicFixnum.new(0)
216
+ handler = proc {counter.increment}
217
+ client.ticket_metrics(false, 0, false, &handler)
218
+ assert_equal(4,counter.value)
219
+ end
220
+
199
221
  test "fetch ticket_metrics with next_page" do
200
222
  end_time = 1488535542
201
223
  response_1 = [
@@ -259,12 +281,12 @@ module Embulk
259
281
 
260
282
  sub_test_case "ticket_events" do
261
283
  test "invoke incremental_export when partial=true" do
262
- mock(client).incremental_export(anything, "ticket_events", anything, Set.new, true)
284
+ mock(client).incremental_export(anything, "ticket_events", anything, true, Set.new, true)
263
285
  client.ticket_events(true)
264
286
  end
265
287
 
266
288
  test "invoke incremental_export when partial=false" do
267
- mock(client).incremental_export(anything, "ticket_events", anything, Set.new, false)
289
+ mock(client).incremental_export(anything, "ticket_events", anything, true, Set.new, false)
268
290
  client.ticket_events(false)
269
291
  end
270
292
  end
@@ -340,7 +340,7 @@ module Embulk
340
340
 
341
341
  test "call tickets method instead of ticket_all" do
342
342
  mock(@client).export.never
343
- mock(@client).incremental_export(anything, "tickets", anything, anything, anything) { [] }
343
+ mock(@client).incremental_export(anything, "tickets", anything, anything, anything, anything) { [] }
344
344
  mock(page_builder).finish
345
345
 
346
346
  @plugin.run
@@ -379,7 +379,7 @@ module Embulk
379
379
 
380
380
  test "call ticket_all method instead of tickets" do
381
381
  mock(@client).export.never
382
- mock(@client).incremental_export(anything, "tickets", 0, Set.new, false) { [] }
382
+ mock(@client).incremental_export(anything, "tickets", 0, true, Set.new, false) { [] }
383
383
  mock(page_builder).finish
384
384
 
385
385
  @plugin.run
@@ -544,7 +544,7 @@ module Embulk
544
544
  test "Nothing passed to client" do
545
545
  stub(page_builder).finish
546
546
 
547
- mock(@client).tickets(false)
547
+ mock(@client).tickets(false, 0)
548
548
  @plugin.run
549
549
  end
550
550
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-input-zendesk
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.12
4
+ version: 0.2.13
5
5
  platform: ruby
6
6
  authors:
7
7
  - uu59
@@ -10,7 +10,7 @@ authors:
10
10
  autorequire:
11
11
  bindir: bin
12
12
  cert_chain: []
13
- date: 2019-01-07 00:00:00.000000000 Z
13
+ date: 2019-01-14 00:00:00.000000000 Z
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
16
16
  requirement: !ruby/object:Gem::Requirement