embulk-input-zendesk 0.2.14 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (82) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +9 -3
  3. data/.travis.yml +5 -44
  4. data/CHANGELOG.md +3 -0
  5. data/README.md +5 -5
  6. data/build.gradle +123 -0
  7. data/classpath/commons-codec-1.10.jar +0 -0
  8. data/classpath/commons-logging-1.2.jar +0 -0
  9. data/classpath/embulk-input-zendesk-0.3.0.jar +0 -0
  10. data/classpath/httpclient-4.5.6.jar +0 -0
  11. data/classpath/httpcore-4.4.10.jar +0 -0
  12. data/config/checkstyle/checkstyle.xml +128 -0
  13. data/config/checkstyle/default.xml +108 -0
  14. data/gradle/wrapper/gradle-wrapper.jar +0 -0
  15. data/gradle/wrapper/gradle-wrapper.properties +5 -0
  16. data/gradlew +172 -0
  17. data/gradlew.bat +84 -0
  18. data/lib/embulk/guess/zendesk.rb +21 -0
  19. data/lib/embulk/input/zendesk.rb +3 -9
  20. data/src/main/java/org/embulk/input/zendesk/ZendeskInputPlugin.java +471 -0
  21. data/src/main/java/org/embulk/input/zendesk/clients/ZendeskRestClient.java +268 -0
  22. data/src/main/java/org/embulk/input/zendesk/models/AuthenticationMethod.java +23 -0
  23. data/src/main/java/org/embulk/input/zendesk/models/Target.java +46 -0
  24. data/src/main/java/org/embulk/input/zendesk/models/ZendeskException.java +25 -0
  25. data/src/main/java/org/embulk/input/zendesk/services/ZendeskSupportAPIService.java +109 -0
  26. data/src/main/java/org/embulk/input/zendesk/utils/ZendeskConstants.java +61 -0
  27. data/src/main/java/org/embulk/input/zendesk/utils/ZendeskDateUtils.java +51 -0
  28. data/src/main/java/org/embulk/input/zendesk/utils/ZendeskUtils.java +150 -0
  29. data/src/main/java/org/embulk/input/zendesk/utils/ZendeskValidatorUtils.java +92 -0
  30. data/src/test/java/org/embulk/input/zendesk/TestZendeskInputPlugin.java +232 -0
  31. data/src/test/java/org/embulk/input/zendesk/clients/TestZendeskRestClient.java +351 -0
  32. data/src/test/java/org/embulk/input/zendesk/services/TestZendeskSupportAPIService.java +172 -0
  33. data/src/test/java/org/embulk/input/zendesk/utils/TestZendeskDateUtils.java +36 -0
  34. data/src/test/java/org/embulk/input/zendesk/utils/TestZendeskUtil.java +160 -0
  35. data/src/test/java/org/embulk/input/zendesk/utils/TestZendeskValidatorUtils.java +138 -0
  36. data/src/test/java/org/embulk/input/zendesk/utils/ZendeskPluginTestRuntime.java +133 -0
  37. data/src/test/java/org/embulk/input/zendesk/utils/ZendeskTestHelper.java +63 -0
  38. data/src/test/resources/config/base.yml +14 -0
  39. data/src/test/resources/config/base_validator.yml +48 -0
  40. data/src/test/resources/config/incremental.yml +54 -0
  41. data/src/test/resources/config/non-incremental.yml +39 -0
  42. data/src/test/resources/config/util.yml +18 -0
  43. data/src/test/resources/data/client.json +293 -0
  44. data/src/test/resources/data/error_data.json +187 -0
  45. data/src/test/resources/data/expected/ticket_column.json +148 -0
  46. data/src/test/resources/data/expected/ticket_column_with_related_objects.json +152 -0
  47. data/src/test/resources/data/expected/ticket_fields_column.json +92 -0
  48. data/src/test/resources/data/expected/ticket_metrics_column.json +98 -0
  49. data/src/test/resources/data/ticket_fields.json +225 -0
  50. data/src/test/resources/data/ticket_metrics.json +397 -0
  51. data/src/test/resources/data/ticket_with_related_objects.json +67 -0
  52. data/src/test/resources/data/tickets.json +232 -0
  53. data/src/test/resources/data/tickets_continue.json +52 -0
  54. data/src/test/resources/data/util.json +19 -0
  55. data/src/test/resources/data/util_page.json +227 -0
  56. metadata +65 -221
  57. data/.ruby-version +0 -1
  58. data/.travis.yml.erb +0 -43
  59. data/Gemfile +0 -2
  60. data/Rakefile +0 -21
  61. data/embulk-input-zendesk.gemspec +0 -29
  62. data/gemfiles/embulk-0.8.0-latest +0 -4
  63. data/gemfiles/embulk-0.8.1 +0 -4
  64. data/gemfiles/embulk-latest +0 -4
  65. data/gemfiles/template.erb +0 -4
  66. data/lib/embulk/input/zendesk/client.rb +0 -434
  67. data/lib/embulk/input/zendesk/plugin.rb +0 -199
  68. data/test/capture_io.rb +0 -45
  69. data/test/embulk/input/zendesk/test_client.rb +0 -722
  70. data/test/embulk/input/zendesk/test_plugin.rb +0 -628
  71. data/test/fixture_helper.rb +0 -11
  72. data/test/fixtures/invalid_app_marketplace_lack_one_property.yml +0 -13
  73. data/test/fixtures/invalid_app_marketplace_lack_two_property.yml +0 -12
  74. data/test/fixtures/invalid_lack_username.yml +0 -9
  75. data/test/fixtures/invalid_unknown_auth.yml +0 -9
  76. data/test/fixtures/tickets.json +0 -44
  77. data/test/fixtures/valid_app_marketplace.yml +0 -14
  78. data/test/fixtures/valid_auth_basic.yml +0 -11
  79. data/test/fixtures/valid_auth_oauth.yml +0 -10
  80. data/test/fixtures/valid_auth_token.yml +0 -11
  81. data/test/override_assert_raise.rb +0 -21
  82. data/test/run-test.rb +0 -26
@@ -1,4 +0,0 @@
1
- source 'https://rubygems.org/'
2
- gemspec :path => '../'
3
-
4
- gem "embulk", "~> 0.8.0"
@@ -1,4 +0,0 @@
1
- source 'https://rubygems.org/'
2
- gemspec :path => '../'
3
-
4
- gem "embulk", "0.8.1"
@@ -1,4 +0,0 @@
1
- source 'https://rubygems.org/'
2
- gemspec :path => '../'
3
-
4
- gem "embulk", "> 0.8.1"
@@ -1,4 +0,0 @@
1
- source 'https://rubygems.org/'
2
- gemspec :path => '../'
3
-
4
- gem "embulk", "<%= version %>"
@@ -1,434 +0,0 @@
1
- require "strscan"
2
- require "httpclient"
3
- require 'concurrent'
4
- require 'set'
5
-
6
- module Embulk
7
- module Input
8
- module Zendesk
9
- class Client
10
- attr_reader :config
11
-
12
- PARTIAL_RECORDS_SIZE = 50
13
- PARTIAL_RECORDS_BYTE_SIZE = 50000
14
- AVAILABLE_INCREMENTAL_EXPORT = %w(tickets users organizations ticket_events ticket_metrics).freeze
15
- UNAVAILABLE_INCREMENTAL_EXPORT = %w(ticket_fields ticket_forms).freeze
16
- AVAILABLE_TARGETS = AVAILABLE_INCREMENTAL_EXPORT + UNAVAILABLE_INCREMENTAL_EXPORT
17
-
18
- def initialize(config)
19
- @config = config
20
- end
21
-
22
- def httpclient
23
- # multi-threading + retry can create lot of instances, and each will keep connecting
24
- # re-using instance in multi threads can help to omit cleanup code
25
- @httpclient ||=
26
- begin
27
- clnt = HTTPClient.new
28
- clnt.connect_timeout = 240 # default:60 is not enough for huge data
29
- clnt.receive_timeout = 240 # better change default receive_timeout too
30
- # httpclient.debug_dev = STDOUT
31
- set_auth(clnt)
32
- end
33
- end
34
-
35
- def create_pool
36
- Concurrent::ThreadPoolExecutor.new(
37
- min_threads: 10,
38
- max_threads: 100,
39
- max_queue: 10_000,
40
- fallback_policy: :caller_runs
41
- )
42
- end
43
-
44
- def validate_config
45
- validate_credentials
46
- validate_target
47
- validate_app_marketplace
48
- end
49
-
50
- def validate_credentials
51
- valid = case config[:auth_method]
52
- when "basic"
53
- config[:username] && config[:password]
54
- when "token"
55
- config[:username] && config[:token]
56
- when "oauth"
57
- config[:access_token]
58
- else
59
- raise Embulk::ConfigError.new("Unknown auth_method (#{config[:auth_method]}). Should pick one from 'basic', 'token' or 'oauth'.")
60
- end
61
-
62
- unless valid
63
- raise Embulk::ConfigError.new("Missing required credentials for #{config[:auth_method]}")
64
- end
65
- end
66
-
67
- def validate_target
68
- unless AVAILABLE_TARGETS.include?(config[:target])
69
- raise Embulk::ConfigError.new("target: '#{config[:target]}' is not supported. Supported targets are #{AVAILABLE_TARGETS.join(", ")}.")
70
- end
71
- end
72
-
73
- def validate_app_marketplace
74
- valid = config[:app_marketplace_integration_name] && config[:app_marketplace_org_id] && config[:app_marketplace_app_id]
75
- valid = valid || (!config[:app_marketplace_integration_name] && !config[:app_marketplace_org_id] && !config[:app_marketplace_app_id])
76
-
77
- unless valid
78
- raise Embulk::ConfigError.new("All of app_marketplace_integration_name, app_marketplace_org_id, app_marketplace_app_id are required to fill out for Apps Marketplace API header")
79
- end
80
- end
81
-
82
- # they have both Incremental API and non-incremental API
83
- # 170717: `ticket_events` can use standard endpoint format now, ie. `<target>.json`
84
- %w(tickets ticket_events users organizations).each do |target|
85
- define_method(target) do |partial = true, start_time = 0, dedup = true, &block|
86
- # Always use incremental_export. There is some difference between incremental_export and export.
87
- incremental_export("/api/v2/incremental/#{target}.json", target, start_time, dedup, Set.new, partial, &block)
88
- end
89
- end
90
-
91
- # Ticket metrics will need to be export using both the non incremental and incremental on ticket
92
- # We provide support by filter out ticket_metrics with created at smaller than start time
93
- # while passing the incremental start time to the incremental ticket/ticket_metrics export
94
- define_method('ticket_metrics') do |partial = true, start_time = 0, dedup = true, &block|
95
- if partial
96
- # If partial export then we need to use the old end point. Since new end point return both ticket and
97
- # ticket metric with ticket come first so the current approach that cut off the response packet won't work
98
- # Since partial is only use for preview and guess so this should be fine
99
- export('/api/v2/ticket_metrics.json', 'ticket_metrics', &block)
100
- else
101
- incremental_export('/api/v2/incremental/tickets.json', 'metric_sets', start_time, dedup, Set.new, partial, { include: 'metric_sets' }, &block)
102
- end
103
- end
104
-
105
- # they have non-incremental API only
106
- UNAVAILABLE_INCREMENTAL_EXPORT.each do |target|
107
- define_method(target) do |partial = true, start_time = 0, dedup = true, &block|
108
- path = "/api/v2/#{target}.json"
109
- if partial
110
- export(path, target, &block)
111
- else
112
- export_parallel(path, target, start_time, dedup, false, &block)
113
- end
114
- end
115
- end
116
-
117
- def fetch_subresource(record_id, base, target)
118
- Embulk.logger.info "Fetching subresource #{target} of #{base}:#{record_id}"
119
- response = request("/api/v2/#{base}/#{record_id}/#{target}.json")
120
- return [] if response.status == 404
121
-
122
- begin
123
- data = JSON.parse(response.body)
124
- data[target]
125
- rescue => e
126
- raise Embulk::DataError.new(e)
127
- end
128
- end
129
-
130
- private
131
-
132
- def export_parallel(path, key, start_time = 0, dedup = true, paging = true, &block)
133
- per_page = 100 # 100 is maximum https://developer.zendesk.com/rest_api/docs/core/introduction#pagination
134
- first_response = request(path, false, per_page: per_page, page: 1)
135
- first_fetched = JSON.parse(first_response.body)
136
- total_count = first_fetched["count"]
137
- last_page_num = (total_count / per_page.to_f).ceil
138
- Embulk.logger.info "#{key} records=#{total_count} last_page=#{paging ? last_page_num : 1}"
139
-
140
- handler = lambda { |records| records.each { |r| block.call r } }
141
- handler.call(dedup ? first_fetched[key].uniq { |r| r['id'] } : first_fetched[key])
142
-
143
- # stop if endpoints have no pagination, ie. API returns all records
144
- # `ticket_fields`, `ticket_forms`
145
- if paging
146
- execute_thread_pool do |pool|
147
- (2..last_page_num).each do |page|
148
- pool.post do
149
- response = request(path, false, per_page: per_page, page: page)
150
- fetched_records = extract_records_from_response(response, key)
151
- Embulk.logger.info "Fetched #{key} on page=#{page} >>> size: #{fetched_records.length}"
152
- handler.call(dedup ? fetched_records.uniq { |r| r['id'] } : fetched_records)
153
- end
154
- end
155
- end
156
- end
157
-
158
- nil # this is necessary different with incremental_export
159
- end
160
-
161
- def export(path, key, page = 1, &block)
162
- per_page = PARTIAL_RECORDS_SIZE
163
- Embulk.logger.info("Fetching #{path} with page=#{page} (partial)")
164
-
165
- response = request(path, true, per_page: per_page, page: page)
166
-
167
- begin
168
- data = JSON.parse(response.body)
169
- raise "Invalid data format: #{key} must be array" unless data.key?(key) && data[key].is_a?(Array)
170
- rescue => e
171
- raise Embulk::DataError.new(e)
172
- end
173
-
174
- data[key].each do |record|
175
- block.call record
176
- end
177
- end
178
-
179
- def incremental_export(path, key, start_time = 0, dedup = true, known_ids = Set.new, partial = true, query = {}, &block)
180
- if partial
181
- records = request_partial(path, query.merge(start_time: start_time)).first(5)
182
- records.uniq{|r| r["id"]}.each do |record|
183
- block.call record
184
- end
185
- return
186
- end
187
-
188
- if !dedup
189
- Embulk.logger.warn("!!! You've selected to skip de-duplicating records, result may contain duplicated data !!!")
190
- end
191
-
192
- execute_thread_pool do |pool|
193
- loop do
194
- start_fetching = Time.now
195
- response = request(path, false, query.merge(start_time: start_time))
196
- actual_fetched = 0
197
- data = JSON.parse(response.body)
198
- # no key found in response occasionally => retry
199
- raise TempError, "No '#{key}' found in JSON response" unless data.key? key
200
- data[key].each do |record|
201
- # https://developer.zendesk.com/rest_api/docs/core/incremental_export#excluding-system-updates
202
- # "generated_timestamp" will be updated when Zendesk internal changing
203
- # "updated_at" will be updated when ticket data was changed
204
- # start_time for query parameter will be processed on Zendesk with generated_timestamp,
205
- # but it was calculated by record' updated_at time.
206
- # So the doesn't changed record from previous import would be appear by Zendesk internal changes.
207
- # We ignore record that has updated_at <= start_time
208
- if start_time && record["generated_timestamp"] && record["updated_at"]
209
- updated_at = Time.parse(record["updated_at"])
210
- next if updated_at <= Time.at(start_time)
211
- end
212
-
213
- # de-duplicated records.
214
- # https://developer.zendesk.com/rest_api/docs/core/incremental_export#usage-notes
215
- # https://github.com/zendesk/zendesk_api_client_rb/issues/251
216
- if dedup
217
- next if known_ids.include?(record["id"])
218
- known_ids << record["id"]
219
- end
220
-
221
- pool.post { block.call record }
222
- actual_fetched += 1
223
- end
224
- Embulk.logger.info "Fetched #{actual_fetched} records from start_time:#{start_time} (#{Time.at(start_time)}) within #{Time.now.to_i - start_fetching.to_i} seconds"
225
- start_time = data["end_time"]
226
-
227
- # NOTE: If count is less than 1000, then stop paginating.
228
- # Otherwise, use the next_page URL to get the next page of results.
229
- # https://developer.zendesk.com/rest_api/docs/core/incremental_export#pagination
230
- break data if data["count"] < 1000
231
- end
232
- end
233
- end
234
-
235
- def extract_records_from_response(response, key)
236
- begin
237
- data = JSON.parse(response.body)
238
- data[key]
239
- rescue => e
240
- raise Embulk::DataError.new(e)
241
- end
242
- end
243
-
244
- def retryer
245
- PerfectRetry.new do |config|
246
- config.limit = @config[:retry_limit]
247
- config.logger = Embulk.logger
248
- config.log_level = nil
249
- config.dont_rescues = [Embulk::DataError, Embulk::ConfigError]
250
- config.sleep = lambda{|n| @config[:retry_initial_wait_sec]* (2 ** (n-1)) }
251
- config.raise_original_error = true
252
- end
253
- end
254
-
255
- def set_auth(httpclient)
256
- validate_credentials
257
-
258
- # https://developer.zendesk.com/rest_api/docs/core/introduction#security-and-authentication
259
- case config[:auth_method]
260
- when "basic"
261
- httpclient.set_auth(config[:login_url], config[:username], config[:password])
262
- when "token"
263
- httpclient.set_auth(config[:login_url], "#{config[:username]}/token", config[:token])
264
- when "oauth"
265
- httpclient.default_header = {
266
- "Authorization" => "Bearer #{config[:access_token]}"
267
- }
268
- end
269
- httpclient
270
- end
271
-
272
- def request(path, partial = false, query = {})
273
- u = URI.parse(config[:login_url])
274
- u.path = path
275
-
276
- # https://help.zendesk.com/hc/en-us/articles/115010249348-Announcing-Updated-Apps-Marketplace-API-Header-Requirementsmerg
277
- extheader = {}
278
-
279
- if config[:app_marketplace_integration_name] && config[:app_marketplace_org_id] && config[:app_marketplace_app_id]
280
- extheader = {"X-Zendesk-Marketplace-Name" => config[:app_marketplace_integration_name],
281
- "X-Zendesk-Marketplace-Organization-Id" => config[:app_marketplace_org_id],
282
- "X-Zendesk-Marketplace-App-Id" => config[:app_marketplace_app_id]}
283
- end
284
-
285
- retryer.with_retry do
286
- Embulk.logger.debug "Fetching #{u.to_s}"
287
- response = httpclient.get(u.to_s, query, extheader)
288
- handle_response(response.status, response.headers, response.body, partial)
289
- response
290
- end
291
- end
292
-
293
- def request_partial(path, query = {})
294
- # NOTE: This is a dirty hack for quick response using incremental_export API.
295
- # Disconnect socket when received PARTIAL_RECORDS_BYTE_SIZE bytes,
296
- # And extract valid JSONs from received bytes (extract_valid_json_from_chunk method)
297
- u = URI.parse(config[:login_url])
298
- u.path = path
299
-
300
- # https://help.zendesk.com/hc/en-us/articles/115010249348-Announcing-Updated-Apps-Marketplace-API-Header-Requirementsmerg
301
- extheader = {}
302
-
303
- if config[:app_marketplace_integration_name] && config[:app_marketplace_org_id] && config[:app_marketplace_app_id]
304
- extheader = {"X-Zendesk-Marketplace-Name" => config[:app_marketplace_integration_name],
305
- "X-Zendesk-Marketplace-Organization-Id" => config[:app_marketplace_org_id],
306
- "X-Zendesk-Marketplace-App-Id" => config[:app_marketplace_app_id]}
307
- end
308
-
309
- retryer.with_retry do
310
- Embulk.logger.debug "Fetching #{u.to_s}"
311
- buf = ""
312
- auth_retry = 0
313
- httpclient.get(u.to_s, query, extheader) do |message, chunk|
314
- if message.status == 401
315
- # First request will fail by 401 because not included credentials.
316
- # HTTPClient will retry request with credentials.
317
- if auth_retry.zero?
318
- auth_retry += 1
319
- next
320
- end
321
- end
322
- handle_response(message.status, message.headers, chunk, true)
323
- buf << chunk
324
- break if buf.bytesize > PARTIAL_RECORDS_BYTE_SIZE
325
- end
326
- extract_valid_json_from_chunk(buf).map do |json|
327
- JSON.parse(json)
328
- end
329
- end
330
- end
331
-
332
- def extract_valid_json_from_chunk(chunk)
333
- # Drip JSON objects from incomplete string
334
- #
335
- # e.g.:
336
- # chunk = '{"ticket_events":[{"foo":1},{"foo":2},{"fo'
337
- # extract_valid_json_from_chunk(chunk) #=> ['{"foo":1}', '{"foo":2}']
338
- result = []
339
-
340
- # omit '{"tickets":[' prefix. See test/fixtures/tickets.json for actual response.
341
- s = StringScanner.new(chunk.scrub.gsub(%r!^{".*?":\[!,""))
342
- while !s.eos?
343
- opener = s.scan(/{/)
344
- break unless opener
345
- buf = opener # Initialize `buf` as "{"
346
- while content = s.scan(/.*?}/) # grab data from start to next "}"
347
- buf << content
348
- if (JSON.parse(buf) rescue false) # if JSON.parse success, `buf` is valid JSON. we'll take it.
349
- result << buf.dup
350
- break
351
- end
352
- end
353
- s.scan(/[^{]*/) # skip until next "{". `chunk` has comma separeted objects like '},{'. skip that comma.
354
- end
355
- result
356
- end
357
-
358
- def wait_rate_limit(retry_after, partial = false)
359
- # Won't retry for preview/guess mode
360
- if partial
361
- raise Embulk::DataError.new("Rate Limited. Waiting #{retry_after} seconds to re-run")
362
- else
363
- Embulk.logger.warn "Rate Limited. Waiting #{retry_after} seconds to retry"
364
- sleep retry_after
365
- throw :retry
366
- end
367
- end
368
-
369
- def handle_response(status_code, headers, body, partial = false)
370
- # https://developer.zendesk.com/rest_api/docs/core/introduction#response-format
371
- case status_code
372
- when 200, 404
373
- # 404 would be returned e.g. ticket comments are empty (on fetch_subresource method)
374
- when 409
375
- raise "[#{status_code}] temporally failure."
376
- when 422
377
- begin
378
- payload = JSON.parse(body)
379
- if payload["description"].start_with?("Too recent start_time.")
380
- # That means "No records from start_time". We can recognize it same as 200.
381
- return
382
- end
383
- rescue
384
- # Failed to parse response.body as JSON
385
- raise Embulk::ConfigError.new("[#{status_code}] #{body}")
386
- end
387
-
388
- # 422 and it isn't "Too recent start_time"
389
- raise Embulk::ConfigError.new("[#{status_code}] #{body}")
390
- when 429
391
- # rate limit
392
- retry_after = headers["Retry-After"]
393
- wait_rate_limit(retry_after.to_i, partial)
394
- when 400..500
395
- # Won't retry for 4xx range errors except above. Almost they should be ConfigError e.g. 403 Forbidden
396
- raise Embulk::ConfigError.new("[#{status_code}] #{body}")
397
- when 500, 503
398
- # 503 is possible rate limit
399
- retry_after = headers["Retry-After"]
400
- if retry_after
401
- wait_rate_limit(retry_after.to_i, partial)
402
- else
403
- raise "[#{status_code}] temporally failure."
404
- end
405
- else
406
- raise "Server returns unknown status code (#{status_code}) #{body}"
407
- end
408
- end
409
-
410
- def execute_thread_pool(&block)
411
- pool = create_pool
412
- pr = PerfectRetry.new do |config|
413
- config.limit = @config[:retry_limit]
414
- config.logger = Embulk.logger
415
- config.log_level = nil
416
- config.rescues = [TempError]
417
- config.sleep = lambda{|n| @config[:retry_initial_wait_sec]* (2 ** (n-1)) }
418
- end
419
- pr.with_retry { block.call(pool) }
420
- rescue => e
421
- raise Embulk::DataError.new(e)
422
- ensure
423
- Embulk.logger.info 'ThreadPool shutting down...'
424
- pool.shutdown
425
- pool.wait_for_termination
426
- Embulk.logger.info "ThreadPool shutdown? #{pool.shutdown?}"
427
- end
428
- end
429
-
430
- class TempError < StandardError
431
- end
432
- end
433
- end
434
- end