embulk-input-zendesk 0.2.12 → 0.2.13
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +3 -0
- data/README.md +2 -1
- data/embulk-input-zendesk.gemspec +1 -1
- data/lib/embulk/input/zendesk/client.rb +13 -7
- data/lib/embulk/input/zendesk/plugin.rb +6 -2
- data/test/embulk/input/zendesk/test_client.rb +24 -2
- data/test/embulk/input/zendesk/test_plugin.rb +3 -3
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: e0911f65a242c33edcf81953f2546825701b2a4c
|
4
|
+
data.tar.gz: 34b91761ad01712a8ba0bb6a9c258d9555dd5a65
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 85e63afc0356899056627249dda852e1a9fe0af9de006335b86d89c01bc53ac97d93a8974f82c27ce69041ad22a40ca271e85d77853d7378cc425198657d6224
|
7
|
+
data.tar.gz: 01e8c2238dd8aa341f3560ed1c8079f0d68454e3a9964137483cedaf625bbec42a4c9bb6ae1cd3fefcdaa9fe4a68896b049150968efe8c4782ac06e943034d88
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,6 @@
|
|
1
|
+
## 0.2.13 - 2019-01-14
|
2
|
+
* [enhancement] Add `dedup` option, in order to avoid OOM when importing large dataset [#48](https://github.com/treasure-data/embulk-input-zendesk/pull/48)
|
3
|
+
|
1
4
|
## 0.2.12 - 2019-01-04
|
2
5
|
* [enhancement] Fix performance issue [#47](https://github.com/treasure-data/embulk-input-zendesk/pull/47)
|
3
6
|
|
data/README.md
CHANGED
@@ -31,7 +31,8 @@ Required Embulk version >= 0.8.1.
|
|
31
31
|
- **start_time**: Start export from this time if present. (string, default: `null`)
|
32
32
|
- **retry_limit**: Try to retry this times (integer, default: 5)
|
33
33
|
- **retry_initial_wait_sec**: Wait seconds for exponential backoff initial value (integer, default: 4)
|
34
|
-
- **incremental**:
|
34
|
+
- **incremental**: If false, `start_time` in next.yml would not be updated that means you always fetch all of data from Zendesk with statically conditions. If true, `start_time` would be updated in next.yml. (bool, default: `true`)
|
35
|
+
- **dedup**: Zendesk incremental API is not designed to protect against duplication. In order to de-dup records, plugin has to cache fetched IDs in memory. If you're importing a large dataset (eg. tens of millions of records), it can lead to OOM error, depends on your configured heap size. In such cases, you can set this option to `false`, but keep in mind that result may contain duplicated records. (bool, default: `true`)
|
35
36
|
- **app_marketplace_integration_name**: Invisible to user, only requires to be a part of the Zendesk Apps Marketplace. This should be used to name of the integration.
|
36
37
|
- **app_marketplace_org_id**: Invisible to user, only requires to be a part of the Zendesk Apps Marketplace. This should be the Organization ID for your organization from the new developer portal.
|
37
38
|
- **app_marketplace_app_id**: Invisible to user, only requires to be a part of the Zendesk Apps Marketplace. This is the “App ID” that will be assigned to you when you submit your app.
|
@@ -1,7 +1,7 @@
|
|
1
1
|
|
2
2
|
Gem::Specification.new do |spec|
|
3
3
|
spec.name = "embulk-input-zendesk"
|
4
|
-
spec.version = "0.2.
|
4
|
+
spec.version = "0.2.13"
|
5
5
|
spec.authors = ["uu59", "muga", "sakama"]
|
6
6
|
spec.summary = "Zendesk input plugin for Embulk"
|
7
7
|
spec.description = "Loads records from Zendesk."
|
@@ -82,23 +82,23 @@ module Embulk
|
|
82
82
|
# they have both Incremental API and non-incremental API
|
83
83
|
# 170717: `ticket_events` can use standard endpoint format now, ie. `<target>.json`
|
84
84
|
%w(tickets ticket_events users organizations).each do |target|
|
85
|
-
define_method(target) do |partial = true, start_time = 0, &block|
|
85
|
+
define_method(target) do |partial = true, start_time = 0, dedup = true, &block|
|
86
86
|
# Always use incremental_export. There is some difference between incremental_export and export.
|
87
|
-
incremental_export("/api/v2/incremental/#{target}.json", target, start_time, Set.new, partial, &block)
|
87
|
+
incremental_export("/api/v2/incremental/#{target}.json", target, start_time, dedup, Set.new, partial, &block)
|
88
88
|
end
|
89
89
|
end
|
90
90
|
|
91
91
|
# Ticket metrics will need to be export using both the non incremental and incremental on ticket
|
92
92
|
# We provide support by filter out ticket_metrics with created at smaller than start time
|
93
93
|
# while passing the incremental start time to the incremental ticket/ticket_metrics export
|
94
|
-
define_method('ticket_metrics') do |partial = true, start_time = 0, &block|
|
94
|
+
define_method('ticket_metrics') do |partial = true, start_time = 0, dedup = true, &block|
|
95
95
|
if partial
|
96
96
|
# If partial export then we need to use the old end point. Since new end point return both ticket and
|
97
97
|
# ticket metric with ticket come first so the current approach that cut off the response packet won't work
|
98
98
|
# Since partial is only use for preview and guess so this should be fine
|
99
99
|
export('/api/v2/ticket_metrics.json', 'ticket_metrics', &block)
|
100
100
|
else
|
101
|
-
incremental_export('/api/v2/incremental/tickets.json', 'metric_sets', start_time, Set.new, partial, { include: 'metric_sets' }, &block)
|
101
|
+
incremental_export('/api/v2/incremental/tickets.json', 'metric_sets', start_time, dedup, Set.new, partial, { include: 'metric_sets' }, &block)
|
102
102
|
end
|
103
103
|
end
|
104
104
|
|
@@ -175,7 +175,7 @@ module Embulk
|
|
175
175
|
end
|
176
176
|
end
|
177
177
|
|
178
|
-
def incremental_export(path, key, start_time = 0, known_ids = Set.new, partial = true, query = {}, &block)
|
178
|
+
def incremental_export(path, key, start_time = 0, dedup = true, known_ids = Set.new, partial = true, query = {}, &block)
|
179
179
|
if partial
|
180
180
|
records = request_partial(path, query.merge(start_time: start_time)).first(5)
|
181
181
|
records.uniq{|r| r["id"]}.each do |record|
|
@@ -184,6 +184,10 @@ module Embulk
|
|
184
184
|
return
|
185
185
|
end
|
186
186
|
|
187
|
+
if !dedup
|
188
|
+
Embulk.logger.warn("!!! You've selected to skip de-duplicating records, result may contain duplicated data !!!")
|
189
|
+
end
|
190
|
+
|
187
191
|
execute_thread_pool do |pool|
|
188
192
|
loop do
|
189
193
|
start_fetching = Time.now
|
@@ -208,9 +212,11 @@ module Embulk
|
|
208
212
|
# de-duplicated records.
|
209
213
|
# https://developer.zendesk.com/rest_api/docs/core/incremental_export#usage-notes
|
210
214
|
# https://github.com/zendesk/zendesk_api_client_rb/issues/251
|
211
|
-
|
215
|
+
if dedup
|
216
|
+
next if known_ids.include?(record["id"])
|
217
|
+
known_ids << record["id"]
|
218
|
+
end
|
212
219
|
|
213
|
-
known_ids << record["id"]
|
214
220
|
pool.post { block.call record }
|
215
221
|
actual_fetched += 1
|
216
222
|
end
|
@@ -94,6 +94,7 @@ module Embulk
|
|
94
94
|
retry_limit: config.param("retry_limit", :integer, default: 5),
|
95
95
|
retry_initial_wait_sec: config.param("retry_initial_wait_sec", :integer, default: 4),
|
96
96
|
incremental: config.param("incremental", :bool, default: true),
|
97
|
+
dedup: config.param("dedup", :bool, default: true),
|
97
98
|
schema: config.param(:columns, :array, default: []),
|
98
99
|
includes: config.param(:includes, :array, default: []),
|
99
100
|
app_marketplace_integration_name: config.param("app_marketplace_integration_name", :string, default: nil),
|
@@ -109,8 +110,11 @@ module Embulk
|
|
109
110
|
def run
|
110
111
|
method = task[:target]
|
111
112
|
args = [preview?]
|
112
|
-
|
113
|
-
|
113
|
+
args << (@start_time || 0).to_i
|
114
|
+
|
115
|
+
# de-dup may lead to OOM
|
116
|
+
if !task[:dedup].nil? && !task[:dedup]
|
117
|
+
args << false
|
114
118
|
end
|
115
119
|
|
116
120
|
mutex = Mutex.new
|
@@ -196,6 +196,28 @@ module Embulk
|
|
196
196
|
assert_equal(2,counter.value)
|
197
197
|
end
|
198
198
|
|
199
|
+
test "allows to fetch tickets metrics *with* duplicated" do
|
200
|
+
records = [
|
201
|
+
{"id" => 1, "ticket_id" => 100},
|
202
|
+
{"id" => 2, "ticket_id" => 200},
|
203
|
+
{"id" => 1, "ticket_id" => 100},
|
204
|
+
{"id" => 1, "ticket_id" => 100},
|
205
|
+
]
|
206
|
+
@httpclient.test_loopback_http_response << [
|
207
|
+
"HTTP/1.1 200",
|
208
|
+
"Content-Type: application/json",
|
209
|
+
"",
|
210
|
+
{
|
211
|
+
metric_sets: records,
|
212
|
+
count: records.length,
|
213
|
+
}.to_json
|
214
|
+
].join("\r\n")
|
215
|
+
counter = Concurrent::AtomicFixnum.new(0)
|
216
|
+
handler = proc {counter.increment}
|
217
|
+
client.ticket_metrics(false, 0, false, &handler)
|
218
|
+
assert_equal(4,counter.value)
|
219
|
+
end
|
220
|
+
|
199
221
|
test "fetch ticket_metrics with next_page" do
|
200
222
|
end_time = 1488535542
|
201
223
|
response_1 = [
|
@@ -259,12 +281,12 @@ module Embulk
|
|
259
281
|
|
260
282
|
sub_test_case "ticket_events" do
|
261
283
|
test "invoke incremental_export when partial=true" do
|
262
|
-
mock(client).incremental_export(anything, "ticket_events", anything, Set.new, true)
|
284
|
+
mock(client).incremental_export(anything, "ticket_events", anything, true, Set.new, true)
|
263
285
|
client.ticket_events(true)
|
264
286
|
end
|
265
287
|
|
266
288
|
test "invoke incremental_export when partial=false" do
|
267
|
-
mock(client).incremental_export(anything, "ticket_events", anything, Set.new, false)
|
289
|
+
mock(client).incremental_export(anything, "ticket_events", anything, true, Set.new, false)
|
268
290
|
client.ticket_events(false)
|
269
291
|
end
|
270
292
|
end
|
@@ -340,7 +340,7 @@ module Embulk
|
|
340
340
|
|
341
341
|
test "call tickets method instead of ticket_all" do
|
342
342
|
mock(@client).export.never
|
343
|
-
mock(@client).incremental_export(anything, "tickets", anything, anything, anything) { [] }
|
343
|
+
mock(@client).incremental_export(anything, "tickets", anything, anything, anything, anything) { [] }
|
344
344
|
mock(page_builder).finish
|
345
345
|
|
346
346
|
@plugin.run
|
@@ -379,7 +379,7 @@ module Embulk
|
|
379
379
|
|
380
380
|
test "call ticket_all method instead of tickets" do
|
381
381
|
mock(@client).export.never
|
382
|
-
mock(@client).incremental_export(anything, "tickets", 0, Set.new, false) { [] }
|
382
|
+
mock(@client).incremental_export(anything, "tickets", 0, true, Set.new, false) { [] }
|
383
383
|
mock(page_builder).finish
|
384
384
|
|
385
385
|
@plugin.run
|
@@ -544,7 +544,7 @@ module Embulk
|
|
544
544
|
test "Nothing passed to client" do
|
545
545
|
stub(page_builder).finish
|
546
546
|
|
547
|
-
mock(@client).tickets(false)
|
547
|
+
mock(@client).tickets(false, 0)
|
548
548
|
@plugin.run
|
549
549
|
end
|
550
550
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-input-zendesk
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.13
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- uu59
|
@@ -10,7 +10,7 @@ authors:
|
|
10
10
|
autorequire:
|
11
11
|
bindir: bin
|
12
12
|
cert_chain: []
|
13
|
-
date: 2019-01-
|
13
|
+
date: 2019-01-14 00:00:00.000000000 Z
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
16
16
|
requirement: !ruby/object:Gem::Requirement
|