embulk-input-zendesk 0.1.3 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 6ed2bcc09ae824662952c09511b06045ebef3641
4
- data.tar.gz: 95281d1ac59e524c36356c9c670de71c067fb326
3
+ metadata.gz: dd6da90d8ce5a50d324d5d0d4e3016b151ebcdde
4
+ data.tar.gz: adff4881ad79e42236e68f6245cf0ab15dd16463
5
5
  SHA512:
6
- metadata.gz: 4994efad0da2b8872ceda6c29a0cd5eda72136194a33e71c6e6a418dbf412f95c289463d243c7e812b30c23a56f79dee19c344461667b722e10231c44cdcdfb5
7
- data.tar.gz: b9b63e1fde1f724cc418e39a529c9bf0f3ebbf831bfdd5f268ef3027b71b25fa7987f3370db058ce51c12e9e704f01a0bac55d40f26305104a3debf92fd60ab1
6
+ metadata.gz: fbf47f6f6f96ab00bf2642c313b3829a3a85356e339bd65cb32e26bfa847a5e57200dafc31fa1b47b76b8828bd34b94a8d9baafca0f40c285b3a31d0504d63cd
7
+ data.tar.gz: eb56a55ecf3d7ddb5dc2785a63e3daa7909267a0bac1c7ed52f2fce80e4103d8a74bac9d3dcc631c49f8436252e80f7dd82fed64fa4f9bb6049577594868ebe5
data/CHANGELOG.md CHANGED
@@ -1,3 +1,7 @@
1
+ ## 0.1.4 - 2016-04-08
2
+
3
+ * [enhancement] Correct preview data [#9](https://github.com/treasure-data/embulk-input-zendesk/pull/9)
4
+
1
5
  ## 0.1.3 - 2016-03-15
2
6
 
3
7
  * [enhancement] Support more targets [#8](https://github.com/treasure-data/embulk-input-zendesk/pull/8)
@@ -1,7 +1,7 @@
1
1
 
2
2
  Gem::Specification.new do |spec|
3
3
  spec.name = "embulk-input-zendesk"
4
- spec.version = "0.1.3"
4
+ spec.version = "0.1.4"
5
5
  spec.authors = ["uu59", "muga", "sakama"]
6
6
  spec.summary = "Zendesk input plugin for Embulk"
7
7
  spec.description = "Loads records from Zendesk."
@@ -1,3 +1,4 @@
1
+ require "strscan"
1
2
  require "httpclient"
2
3
 
3
4
  module Embulk
@@ -7,6 +8,7 @@ module Embulk
7
8
  attr_reader :config
8
9
 
9
10
  PARTIAL_RECORDS_SIZE = 50
11
+ PARTIAL_RECORDS_BYTE_SIZE = 50000
10
12
  AVAILABLE_INCREMENTAL_EXPORT = %w(tickets users organizations ticket_events).freeze
11
13
  UNAVAILABLE_INCREMENTAL_EXPORT = %w(ticket_fields ticket_forms ticket_metrics).freeze
12
14
  AVAILABLE_TARGETS = AVAILABLE_INCREMENTAL_EXPORT + UNAVAILABLE_INCREMENTAL_EXPORT
@@ -52,11 +54,8 @@ module Embulk
52
54
  # they have both Incremental API and non-incremental API
53
55
  %w(tickets users organizations).each do |target|
54
56
  define_method(target) do |partial = true, start_time = 0, &block|
55
- if partial
56
- export("/api/v2/#{target}.json", target, PARTIAL_RECORDS_SIZE, &block) # Ignore start_time
57
- else
58
- incremental_export("/api/v2/incremental/#{target}.json", target, start_time, [], &block)
59
- end
57
+ # Always use incremental_export. There is some difference between incremental_export and export.
58
+ incremental_export("/api/v2/incremental/#{target}.json", target, start_time, [], partial, &block)
60
59
  end
61
60
  end
62
61
 
@@ -64,7 +63,7 @@ module Embulk
64
63
  %w(ticket_events).each do |target|
65
64
  define_method(target) do |partial = true, start_time = 0, &block|
66
65
  path = "/api/v2/incremental/#{target}"
67
- incremental_export(path, target, start_time, [], &block)
66
+ incremental_export(path, target, start_time, [], partial, &block)
68
67
  end
69
68
  end
70
69
 
@@ -109,18 +108,21 @@ module Embulk
109
108
  nil # this is necessary different with incremental_export
110
109
  end
111
110
 
112
- def incremental_export(path, key, start_time = 0, known_ids = [], &block)
113
- # for `embulk run` to fetch all records.
114
- response = request(path, start_time: start_time)
115
-
116
- begin
117
- data = JSON.parse(response.body)
118
- rescue => e
119
- raise Embulk::DataError.new(e)
111
+ def incremental_export(path, key, start_time = 0, known_ids = [], partial = true, &block)
112
+ if partial
113
+ records = request_partial(path, {start_time: start_time}).first(5)
114
+ else
115
+ response = request(path, {start_time: start_time})
116
+ begin
117
+ data = JSON.parse(response.body)
118
+ rescue => e
119
+ raise Embulk::DataError.new(e)
120
+ end
121
+ Embulk.logger.debug "start_time:#{start_time} (#{Time.at(start_time)}) count:#{data["count"]} next_page:#{data["next_page"]} end_time:#{data["end_time"]} "
122
+ records = data[key]
120
123
  end
121
124
 
122
- Embulk.logger.debug "start_time:#{start_time} (#{Time.at(start_time)}) count:#{data["count"]} next_page:#{data["next_page"]} end_time:#{data["end_time"]} "
123
- data[key].each do |record|
125
+ records.each do |record|
124
126
  # de-duplicated records.
125
127
  # https://developer.zendesk.com/rest_api/docs/core/incremental_export#usage-notes
126
128
  # https://github.com/zendesk/zendesk_api_client_rb/issues/251
@@ -129,12 +131,13 @@ module Embulk
129
131
  known_ids << record["id"]
130
132
  block.call record
131
133
  end
134
+ return if partial
132
135
 
133
136
  # NOTE: If count is less than 1000, then stop paginating.
134
137
  # Otherwise, use the next_page URL to get the next page of results.
135
138
  # https://developer.zendesk.com/rest_api/docs/core/incremental_export#pagination
136
139
  if data["count"] == 1000
137
- incremental_export(path, key, data["end_time"], known_ids, &block)
140
+ incremental_export(path, key, data["end_time"], known_ids, partial, &block)
138
141
  else
139
142
  data
140
143
  end
@@ -175,32 +178,66 @@ module Embulk
175
178
  Embulk.logger.debug "Fetching #{u.to_s}"
176
179
  response = httpclient.get(u.to_s, query, follow_redirect: true)
177
180
 
178
- # https://developer.zendesk.com/rest_api/docs/core/introduction#response-format
179
- status_code = response.status
180
- case status_code
181
- when 200, 404
182
- # 404 would be returned e.g. ticket comments are empty (on fetch_subresource method)
183
- response
184
- when 400, 401
185
- raise Embulk::ConfigError.new("[#{status_code}] #{response.body}")
186
- when 409
187
- raise "[#{status_code}] temporally failure."
188
- when 429
189
- # rate limit
190
- retry_after = response.headers["Retry-After"]
191
- wait_rate_limit(retry_after.to_i)
192
- when 500, 503
193
- # 503 is possible rate limit
194
- retry_after = response.headers["Retry-After"]
195
- if retry_after
196
- wait_rate_limit(retry_after.to_i)
197
- else
198
- raise "[#{status_code}] temporally failure."
181
+ handle_response(response.status, response.headers, response.body)
182
+ response
183
+ end
184
+ end
185
+
186
+ def request_partial(path, query = {})
187
+ # NOTE: This is a dirty hack for quick response using incremental_export API.
188
+ # Disconnect socket when received PARTIAL_RECORDS_BYTE_SIZE bytes,
189
+ # And extract valid JSONs from received bytes (extract_valid_json_from_chunk method)
190
+ u = URI.parse(config[:login_url])
191
+ u.path = path
192
+
193
+ retryer.with_retry do
194
+ Embulk.logger.debug "Fetching #{u.to_s}"
195
+ buf = ""
196
+ auth_retry = 0
197
+ httpclient.get(u.to_s, query, follow_redirect: true) do |message, chunk|
198
+ if message.status == 401
199
+ # First request will fail by 401 because not included credentials.
200
+ # HTTPClient will retry request with credentials.
201
+ if auth_retry.zero?
202
+ auth_retry += 1
203
+ next
204
+ end
205
+ end
206
+ handle_response(message.status, message.headers, chunk)
207
+
208
+ buf << chunk
209
+ break if buf.bytesize > PARTIAL_RECORDS_BYTE_SIZE
210
+ end
211
+ extract_valid_json_from_chunk(buf).map do |json|
212
+ JSON.parse(json)
213
+ end
214
+ end
215
+ end
216
+
217
+ def extract_valid_json_from_chunk(chunk)
218
+ # Drip JSON objects from incomplete string
219
+ #
220
+ # e.g.:
221
+ # chunk = '{"ticket_events":[{"foo":1},{"foo":2},{"fo'
222
+ # extract_valid_json_from_chunk(chunk) #=> ['{"foo":1}', '{"foo":2}']
223
+ result = []
224
+
225
+ # omit '{"tickets":[' prefix. See test/fixtures/tickets.json for actual response.
226
+ s = StringScanner.new(chunk.scrub.gsub(%r!^{".*?":\[!,""))
227
+ while !s.eos?
228
+ opener = s.scan(/{/)
229
+ break unless opener
230
+ buf = opener # Initialize `buf` as "{"
231
+ while content = s.scan(/.*?}/) # grab data from start to next "}"
232
+ buf << content
233
+ if (JSON.parse(buf) rescue false) # if JSON.parse success, `buf` is valid JSON. we'll take it.
234
+ result << buf.dup
235
+ break
199
236
  end
200
- else
201
- raise "Server returns unknown status code (#{status_code})"
202
237
  end
238
+ s.scan(/[^{]*/) # skip until next "{". `chunk` has comma separeted objects like '},{'. skip that comma.
203
239
  end
240
+ result
204
241
  end
205
242
 
206
243
  def wait_rate_limit(retry_after)
@@ -209,6 +246,31 @@ module Embulk
209
246
  throw :retry
210
247
  end
211
248
 
249
+ def handle_response(status_code, headers, body)
250
+ # https://developer.zendesk.com/rest_api/docs/core/introduction#response-format
251
+ case status_code
252
+ when 200, 404
253
+ # 404 would be returned e.g. ticket comments are empty (on fetch_subresource method)
254
+ when 400, 401
255
+ raise Embulk::ConfigError.new("[#{status_code}] #{body}")
256
+ when 409
257
+ raise "[#{status_code}] temporally failure."
258
+ when 429
259
+ # rate limit
260
+ retry_after = headers["Retry-After"]
261
+ wait_rate_limit(retry_after.to_i)
262
+ when 500, 503
263
+ # 503 is possible rate limit
264
+ retry_after = headers["Retry-After"]
265
+ if retry_after
266
+ wait_rate_limit(retry_after.to_i)
267
+ else
268
+ raise "[#{status_code}] temporally failure."
269
+ end
270
+ else
271
+ raise "Server returns unknown status code (#{status_code})"
272
+ end
273
+ end
212
274
  end
213
275
  end
214
276
  end
@@ -96,7 +96,7 @@ module Embulk
96
96
  def run
97
97
  method = task[:target]
98
98
  args = [preview?]
99
- if !preview? && @start_time
99
+ if @start_time
100
100
  args << @start_time.to_i
101
101
  end
102
102
 
@@ -104,6 +104,7 @@ module Embulk
104
104
  record = fetch_related_object(record)
105
105
  values = extract_values(record)
106
106
  page_builder.add(values)
107
+ break if preview? # NOTE: preview take care only 1 record. subresources fetching is slow.
107
108
  end
108
109
  page_builder.finish
109
110
 
@@ -123,12 +124,7 @@ module Embulk
123
124
 
124
125
  def fetch_related_object(record)
125
126
  (task[:includes] || []).each do |ent|
126
- if preview?
127
- # Fetching subresource consume ~2 sec for each record. it is too long to preview. so the dummy value used.
128
- record[ent] = [{dummy: "(#{ent}) dummy value for preview"}]
129
- else
130
- record[ent] = client.fetch_subresource(record["id"], task[:target], ent)
131
- end
127
+ record[ent] = client.fetch_subresource(record["id"], task[:target], ent)
132
128
  end
133
129
  record
134
130
  end
@@ -47,19 +47,6 @@ module Embulk
47
47
  end
48
48
  client.tickets(&handler)
49
49
  end
50
-
51
- test "raise DataError when invalid JSON response" do
52
- @httpclient.test_loopback_http_response << [
53
- "HTTP/1.1 200",
54
- "Content-Type: application/json",
55
- "",
56
- "[[[" # invalid json
57
- ].join("\r\n")
58
-
59
- assert_raise(DataError) do
60
- client.tickets
61
- end
62
- end
63
50
  end
64
51
 
65
52
  sub_test_case "all" do
@@ -175,12 +162,12 @@ module Embulk
175
162
 
176
163
  sub_test_case "ticket_events" do
177
164
  test "invoke incremental_export when partial=true" do
178
- mock(client).incremental_export(anything, "ticket_events", anything, [])
165
+ mock(client).incremental_export(anything, "ticket_events", anything, [], true)
179
166
  client.ticket_events(true)
180
167
  end
181
168
 
182
169
  test "invoke incremental_export when partial=false" do
183
- mock(client).incremental_export(anything, "ticket_events", anything, [])
170
+ mock(client).incremental_export(anything, "ticket_events", anything, [], false)
184
171
  client.ticket_events(false)
185
172
  end
186
173
  end
@@ -364,13 +351,6 @@ module Embulk
364
351
  end
365
352
  end
366
353
 
367
- test "401" do
368
- stub_response(401)
369
- assert_raise(ConfigError) do
370
- client.tickets(&proc{})
371
- end
372
- end
373
-
374
354
  test "409" do
375
355
  stub_response(409)
376
356
  assert_raise(StandardError) do
@@ -444,6 +424,27 @@ module Embulk
444
424
  end
445
425
  end
446
426
 
427
+ sub_test_case ".extract_valid_json_from_chunk" do
428
+ setup do
429
+ @client = Client.new({target: "tickets"})
430
+ end
431
+
432
+ test "complete json" do
433
+ actual = @client.send(:extract_valid_json_from_chunk, '{"tickets":[{"foo":1},{"foo":2}]}')
434
+ assert_equal ['{"foo":1}', '{"foo":2}'], actual
435
+ end
436
+
437
+ test "broken json" do
438
+ json = '{"ticket_events":[{"foo":1},{"foo":2},{"fo'
439
+ actual = @client.send(:extract_valid_json_from_chunk, json)
440
+ expected = [
441
+ '{"foo":1}',
442
+ '{"foo":2}',
443
+ ]
444
+ assert_equal expected, actual
445
+ end
446
+ end
447
+
447
448
  def login_url
448
449
  "http://example.com"
449
450
  end
@@ -106,11 +106,7 @@ module Embulk
106
106
  "HTTP/1.1 200",
107
107
  "Content-Type: application/json",
108
108
  "",
109
- {
110
- tickets: [
111
- JSON.parse(fixture_load("tickets.json"))
112
- ]
113
- }.to_json
109
+ JSON.parse(fixture_load("tickets.json")).to_json
114
110
  ].join("\r\n")
115
111
  mock(@client).validate_config
116
112
  Plugin.guess(config)["columns"]
@@ -121,11 +117,7 @@ module Embulk
121
117
  "HTTP/1.1 200",
122
118
  "Content-Type: application/json",
123
119
  "",
124
- {
125
- tickets: [
126
- JSON.parse(fixture_load("tickets.json"))
127
- ]
128
- }.to_json
120
+ JSON.parse(fixture_load("tickets.json")).to_json
129
121
  ].join("\r\n")
130
122
  actual = Plugin.guess(config)["columns"]
131
123
  assert actual.include?(name: "url", type: :string)
@@ -241,7 +233,7 @@ module Embulk
241
233
 
242
234
  test "call fetch_subresource" do
243
235
  includes.each do |ent|
244
- mock(@client).fetch_subresource(anything, anything, ent).never
236
+ mock(@client).fetch_subresource(anything, anything, ent)
245
237
  end
246
238
  @plugin.run
247
239
  end
@@ -326,8 +318,8 @@ module Embulk
326
318
  end
327
319
 
328
320
  test "call tickets method instead of ticket_all" do
329
- mock(@client).export(anything, "tickets", anything) { [] }
330
- mock(@client).incremental_export.never
321
+ mock(@client).export.never
322
+ mock(@client).incremental_export(anything, "tickets", anything, anything, anything) { [] }
331
323
  mock(page_builder).finish
332
324
 
333
325
  @plugin.run
@@ -348,9 +340,10 @@ module Embulk
348
340
  }.to_json
349
341
  ].join("\r\n")
350
342
 
351
- tickets.each do |ticket|
352
- mock(page_builder).add([ticket["id"], ticket["tags"]])
353
- end
343
+ first_ticket = tickets[0]
344
+ second_ticket = tickets[1]
345
+ mock(page_builder).add([first_ticket["id"], first_ticket["tags"]])
346
+ mock(page_builder).add([second_ticket["id"], second_ticket["tags"]]).never
354
347
  mock(page_builder).finish
355
348
 
356
349
  @plugin.run
@@ -364,7 +357,7 @@ module Embulk
364
357
 
365
358
  test "call ticket_all method instead of tickets" do
366
359
  mock(@client).export.never
367
- mock(@client).incremental_export(anything, "tickets", 0, []) { [] }
360
+ mock(@client).incremental_export(anything, "tickets", 0, [], false) { [] }
368
361
  mock(page_builder).finish
369
362
 
370
363
  @plugin.run
@@ -1,4 +1,4 @@
1
- {
1
+ {"tickets":[{
2
2
  "id": 35436,
3
3
  "url": "https://company.zendesk.com/api/v2/tickets/35436.json",
4
4
  "external_id": "ahg35h3jh",
@@ -6,7 +6,7 @@
6
6
  "updated_at": "2011-05-05T10:38:52Z",
7
7
  "type": "incident",
8
8
  "subject": "Help, my printer is on fire!",
9
- "raw_subject": "{{dc.printer_on_fire}}",
9
+ "raw_subject": "{{dc.printer_on_fire}} }{",
10
10
  "description": "The fire is very colorful.",
11
11
  "priority": "high",
12
12
  "status": "open",
@@ -41,4 +41,4 @@
41
41
  "comment": "Great support!"
42
42
  },
43
43
  "sharing_agreement_ids": [84432]
44
- }
44
+ }]}
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-input-zendesk
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.3
4
+ version: 0.1.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - uu59
@@ -10,7 +10,7 @@ authors:
10
10
  autorequire:
11
11
  bindir: bin
12
12
  cert_chain: []
13
- date: 2016-03-15 00:00:00.000000000 Z
13
+ date: 2016-04-08 00:00:00.000000000 Z
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
16
16
  requirement: !ruby/object:Gem::Requirement