embulk-input-zendesk 0.2.12 → 0.2.13
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +3 -0
- data/README.md +2 -1
- data/embulk-input-zendesk.gemspec +1 -1
- data/lib/embulk/input/zendesk/client.rb +13 -7
- data/lib/embulk/input/zendesk/plugin.rb +6 -2
- data/test/embulk/input/zendesk/test_client.rb +24 -2
- data/test/embulk/input/zendesk/test_plugin.rb +3 -3
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: e0911f65a242c33edcf81953f2546825701b2a4c
|
4
|
+
data.tar.gz: 34b91761ad01712a8ba0bb6a9c258d9555dd5a65
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 85e63afc0356899056627249dda852e1a9fe0af9de006335b86d89c01bc53ac97d93a8974f82c27ce69041ad22a40ca271e85d77853d7378cc425198657d6224
|
7
|
+
data.tar.gz: 01e8c2238dd8aa341f3560ed1c8079f0d68454e3a9964137483cedaf625bbec42a4c9bb6ae1cd3fefcdaa9fe4a68896b049150968efe8c4782ac06e943034d88
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,6 @@
|
|
1
|
+
## 0.2.13 - 2019-01-14
|
2
|
+
* [enhancement] Add `dedup` option, in order to avoid OOM when importing large dataset [#48](https://github.com/treasure-data/embulk-input-zendesk/pull/48)
|
3
|
+
|
1
4
|
## 0.2.12 - 2019-01-04
|
2
5
|
* [enhancement] Fix performance issue [#47](https://github.com/treasure-data/embulk-input-zendesk/pull/47)
|
3
6
|
|
data/README.md
CHANGED
@@ -31,7 +31,8 @@ Required Embulk version >= 0.8.1.
|
|
31
31
|
- **start_time**: Start export from this time if present. (string, default: `null`)
|
32
32
|
- **retry_limit**: Try to retry this times (integer, default: 5)
|
33
33
|
- **retry_initial_wait_sec**: Wait seconds for exponential backoff initial value (integer, default: 4)
|
34
|
-
- **incremental**:
|
34
|
+
- **incremental**: If false, `start_time` in next.yml would not be updated that means you always fetch all of data from Zendesk with statically conditions. If true, `start_time` would be updated in next.yml. (bool, default: `true`)
|
35
|
+
- **dedup**: Zendesk incremental API is not designed to protect against duplication. In order to de-dup records, plugin has to cache fetched IDs in memory. If you're importing a large dataset (eg. tens of millions of records), it can lead to OOM error, depends on your configured heap size. In such cases, you can set this option to `false`, but keep in mind that result may contain duplicated records. (bool, default: `true`)
|
35
36
|
- **app_marketplace_integration_name**: Invisible to user, only requires to be a part of the Zendesk Apps Marketplace. This should be used to name of the integration.
|
36
37
|
- **app_marketplace_org_id**: Invisible to user, only requires to be a part of the Zendesk Apps Marketplace. This should be the Organization ID for your organization from the new developer portal.
|
37
38
|
- **app_marketplace_app_id**: Invisible to user, only requires to be a part of the Zendesk Apps Marketplace. This is the “App ID” that will be assigned to you when you submit your app.
|
@@ -1,7 +1,7 @@
|
|
1
1
|
|
2
2
|
Gem::Specification.new do |spec|
|
3
3
|
spec.name = "embulk-input-zendesk"
|
4
|
-
spec.version = "0.2.
|
4
|
+
spec.version = "0.2.13"
|
5
5
|
spec.authors = ["uu59", "muga", "sakama"]
|
6
6
|
spec.summary = "Zendesk input plugin for Embulk"
|
7
7
|
spec.description = "Loads records from Zendesk."
|
@@ -82,23 +82,23 @@ module Embulk
|
|
82
82
|
# they have both Incremental API and non-incremental API
|
83
83
|
# 170717: `ticket_events` can use standard endpoint format now, ie. `<target>.json`
|
84
84
|
%w(tickets ticket_events users organizations).each do |target|
|
85
|
-
define_method(target) do |partial = true, start_time = 0, &block|
|
85
|
+
define_method(target) do |partial = true, start_time = 0, dedup = true, &block|
|
86
86
|
# Always use incremental_export. There is some difference between incremental_export and export.
|
87
|
-
incremental_export("/api/v2/incremental/#{target}.json", target, start_time, Set.new, partial, &block)
|
87
|
+
incremental_export("/api/v2/incremental/#{target}.json", target, start_time, dedup, Set.new, partial, &block)
|
88
88
|
end
|
89
89
|
end
|
90
90
|
|
91
91
|
# Ticket metrics will need to be export using both the non incremental and incremental on ticket
|
92
92
|
# We provide support by filter out ticket_metrics with created at smaller than start time
|
93
93
|
# while passing the incremental start time to the incremental ticket/ticket_metrics export
|
94
|
-
define_method('ticket_metrics') do |partial = true, start_time = 0, &block|
|
94
|
+
define_method('ticket_metrics') do |partial = true, start_time = 0, dedup = true, &block|
|
95
95
|
if partial
|
96
96
|
# If partial export then we need to use the old end point. Since new end point return both ticket and
|
97
97
|
# ticket metric with ticket come first so the current approach that cut off the response packet won't work
|
98
98
|
# Since partial is only use for preview and guess so this should be fine
|
99
99
|
export('/api/v2/ticket_metrics.json', 'ticket_metrics', &block)
|
100
100
|
else
|
101
|
-
incremental_export('/api/v2/incremental/tickets.json', 'metric_sets', start_time, Set.new, partial, { include: 'metric_sets' }, &block)
|
101
|
+
incremental_export('/api/v2/incremental/tickets.json', 'metric_sets', start_time, dedup, Set.new, partial, { include: 'metric_sets' }, &block)
|
102
102
|
end
|
103
103
|
end
|
104
104
|
|
@@ -175,7 +175,7 @@ module Embulk
|
|
175
175
|
end
|
176
176
|
end
|
177
177
|
|
178
|
-
def incremental_export(path, key, start_time = 0, known_ids = Set.new, partial = true, query = {}, &block)
|
178
|
+
def incremental_export(path, key, start_time = 0, dedup = true, known_ids = Set.new, partial = true, query = {}, &block)
|
179
179
|
if partial
|
180
180
|
records = request_partial(path, query.merge(start_time: start_time)).first(5)
|
181
181
|
records.uniq{|r| r["id"]}.each do |record|
|
@@ -184,6 +184,10 @@ module Embulk
|
|
184
184
|
return
|
185
185
|
end
|
186
186
|
|
187
|
+
if !dedup
|
188
|
+
Embulk.logger.warn("!!! You've selected to skip de-duplicating records, result may contain duplicated data !!!")
|
189
|
+
end
|
190
|
+
|
187
191
|
execute_thread_pool do |pool|
|
188
192
|
loop do
|
189
193
|
start_fetching = Time.now
|
@@ -208,9 +212,11 @@ module Embulk
|
|
208
212
|
# de-duplicated records.
|
209
213
|
# https://developer.zendesk.com/rest_api/docs/core/incremental_export#usage-notes
|
210
214
|
# https://github.com/zendesk/zendesk_api_client_rb/issues/251
|
211
|
-
|
215
|
+
if dedup
|
216
|
+
next if known_ids.include?(record["id"])
|
217
|
+
known_ids << record["id"]
|
218
|
+
end
|
212
219
|
|
213
|
-
known_ids << record["id"]
|
214
220
|
pool.post { block.call record }
|
215
221
|
actual_fetched += 1
|
216
222
|
end
|
@@ -94,6 +94,7 @@ module Embulk
|
|
94
94
|
retry_limit: config.param("retry_limit", :integer, default: 5),
|
95
95
|
retry_initial_wait_sec: config.param("retry_initial_wait_sec", :integer, default: 4),
|
96
96
|
incremental: config.param("incremental", :bool, default: true),
|
97
|
+
dedup: config.param("dedup", :bool, default: true),
|
97
98
|
schema: config.param(:columns, :array, default: []),
|
98
99
|
includes: config.param(:includes, :array, default: []),
|
99
100
|
app_marketplace_integration_name: config.param("app_marketplace_integration_name", :string, default: nil),
|
@@ -109,8 +110,11 @@ module Embulk
|
|
109
110
|
def run
|
110
111
|
method = task[:target]
|
111
112
|
args = [preview?]
|
112
|
-
|
113
|
-
|
113
|
+
args << (@start_time || 0).to_i
|
114
|
+
|
115
|
+
# de-dup may lead to OOM
|
116
|
+
if !task[:dedup].nil? && !task[:dedup]
|
117
|
+
args << false
|
114
118
|
end
|
115
119
|
|
116
120
|
mutex = Mutex.new
|
@@ -196,6 +196,28 @@ module Embulk
|
|
196
196
|
assert_equal(2,counter.value)
|
197
197
|
end
|
198
198
|
|
199
|
+
test "allows to fetch tickets metrics *with* duplicated" do
|
200
|
+
records = [
|
201
|
+
{"id" => 1, "ticket_id" => 100},
|
202
|
+
{"id" => 2, "ticket_id" => 200},
|
203
|
+
{"id" => 1, "ticket_id" => 100},
|
204
|
+
{"id" => 1, "ticket_id" => 100},
|
205
|
+
]
|
206
|
+
@httpclient.test_loopback_http_response << [
|
207
|
+
"HTTP/1.1 200",
|
208
|
+
"Content-Type: application/json",
|
209
|
+
"",
|
210
|
+
{
|
211
|
+
metric_sets: records,
|
212
|
+
count: records.length,
|
213
|
+
}.to_json
|
214
|
+
].join("\r\n")
|
215
|
+
counter = Concurrent::AtomicFixnum.new(0)
|
216
|
+
handler = proc {counter.increment}
|
217
|
+
client.ticket_metrics(false, 0, false, &handler)
|
218
|
+
assert_equal(4,counter.value)
|
219
|
+
end
|
220
|
+
|
199
221
|
test "fetch ticket_metrics with next_page" do
|
200
222
|
end_time = 1488535542
|
201
223
|
response_1 = [
|
@@ -259,12 +281,12 @@ module Embulk
|
|
259
281
|
|
260
282
|
sub_test_case "ticket_events" do
|
261
283
|
test "invoke incremental_export when partial=true" do
|
262
|
-
mock(client).incremental_export(anything, "ticket_events", anything, Set.new, true)
|
284
|
+
mock(client).incremental_export(anything, "ticket_events", anything, true, Set.new, true)
|
263
285
|
client.ticket_events(true)
|
264
286
|
end
|
265
287
|
|
266
288
|
test "invoke incremental_export when partial=false" do
|
267
|
-
mock(client).incremental_export(anything, "ticket_events", anything, Set.new, false)
|
289
|
+
mock(client).incremental_export(anything, "ticket_events", anything, true, Set.new, false)
|
268
290
|
client.ticket_events(false)
|
269
291
|
end
|
270
292
|
end
|
@@ -340,7 +340,7 @@ module Embulk
|
|
340
340
|
|
341
341
|
test "call tickets method instead of ticket_all" do
|
342
342
|
mock(@client).export.never
|
343
|
-
mock(@client).incremental_export(anything, "tickets", anything, anything, anything) { [] }
|
343
|
+
mock(@client).incremental_export(anything, "tickets", anything, anything, anything, anything) { [] }
|
344
344
|
mock(page_builder).finish
|
345
345
|
|
346
346
|
@plugin.run
|
@@ -379,7 +379,7 @@ module Embulk
|
|
379
379
|
|
380
380
|
test "call ticket_all method instead of tickets" do
|
381
381
|
mock(@client).export.never
|
382
|
-
mock(@client).incremental_export(anything, "tickets", 0, Set.new, false) { [] }
|
382
|
+
mock(@client).incremental_export(anything, "tickets", 0, true, Set.new, false) { [] }
|
383
383
|
mock(page_builder).finish
|
384
384
|
|
385
385
|
@plugin.run
|
@@ -544,7 +544,7 @@ module Embulk
|
|
544
544
|
test "Nothing passed to client" do
|
545
545
|
stub(page_builder).finish
|
546
546
|
|
547
|
-
mock(@client).tickets(false)
|
547
|
+
mock(@client).tickets(false, 0)
|
548
548
|
@plugin.run
|
549
549
|
end
|
550
550
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-input-zendesk
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.13
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- uu59
|
@@ -10,7 +10,7 @@ authors:
|
|
10
10
|
autorequire:
|
11
11
|
bindir: bin
|
12
12
|
cert_chain: []
|
13
|
-
date: 2019-01-
|
13
|
+
date: 2019-01-14 00:00:00.000000000 Z
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
16
16
|
requirement: !ruby/object:Gem::Requirement
|