embulk-input-zendesk 0.1.3 → 0.1.4

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 6ed2bcc09ae824662952c09511b06045ebef3641
4
- data.tar.gz: 95281d1ac59e524c36356c9c670de71c067fb326
3
+ metadata.gz: dd6da90d8ce5a50d324d5d0d4e3016b151ebcdde
4
+ data.tar.gz: adff4881ad79e42236e68f6245cf0ab15dd16463
5
5
  SHA512:
6
- metadata.gz: 4994efad0da2b8872ceda6c29a0cd5eda72136194a33e71c6e6a418dbf412f95c289463d243c7e812b30c23a56f79dee19c344461667b722e10231c44cdcdfb5
7
- data.tar.gz: b9b63e1fde1f724cc418e39a529c9bf0f3ebbf831bfdd5f268ef3027b71b25fa7987f3370db058ce51c12e9e704f01a0bac55d40f26305104a3debf92fd60ab1
6
+ metadata.gz: fbf47f6f6f96ab00bf2642c313b3829a3a85356e339bd65cb32e26bfa847a5e57200dafc31fa1b47b76b8828bd34b94a8d9baafca0f40c285b3a31d0504d63cd
7
+ data.tar.gz: eb56a55ecf3d7ddb5dc2785a63e3daa7909267a0bac1c7ed52f2fce80e4103d8a74bac9d3dcc631c49f8436252e80f7dd82fed64fa4f9bb6049577594868ebe5
data/CHANGELOG.md CHANGED
@@ -1,3 +1,7 @@
1
+ ## 0.1.4 - 2016-04-08
2
+
3
+ * [enhancement] Correct preview data [#9](https://github.com/treasure-data/embulk-input-zendesk/pull/9)
4
+
1
5
  ## 0.1.3 - 2016-03-15
2
6
 
3
7
  * [enhancement] Support more targets [#8](https://github.com/treasure-data/embulk-input-zendesk/pull/8)
@@ -1,7 +1,7 @@
1
1
 
2
2
  Gem::Specification.new do |spec|
3
3
  spec.name = "embulk-input-zendesk"
4
- spec.version = "0.1.3"
4
+ spec.version = "0.1.4"
5
5
  spec.authors = ["uu59", "muga", "sakama"]
6
6
  spec.summary = "Zendesk input plugin for Embulk"
7
7
  spec.description = "Loads records from Zendesk."
@@ -1,3 +1,4 @@
1
+ require "strscan"
1
2
  require "httpclient"
2
3
 
3
4
  module Embulk
@@ -7,6 +8,7 @@ module Embulk
7
8
  attr_reader :config
8
9
 
9
10
  PARTIAL_RECORDS_SIZE = 50
11
+ PARTIAL_RECORDS_BYTE_SIZE = 50000
10
12
  AVAILABLE_INCREMENTAL_EXPORT = %w(tickets users organizations ticket_events).freeze
11
13
  UNAVAILABLE_INCREMENTAL_EXPORT = %w(ticket_fields ticket_forms ticket_metrics).freeze
12
14
  AVAILABLE_TARGETS = AVAILABLE_INCREMENTAL_EXPORT + UNAVAILABLE_INCREMENTAL_EXPORT
@@ -52,11 +54,8 @@ module Embulk
52
54
  # they have both Incremental API and non-incremental API
53
55
  %w(tickets users organizations).each do |target|
54
56
  define_method(target) do |partial = true, start_time = 0, &block|
55
- if partial
56
- export("/api/v2/#{target}.json", target, PARTIAL_RECORDS_SIZE, &block) # Ignore start_time
57
- else
58
- incremental_export("/api/v2/incremental/#{target}.json", target, start_time, [], &block)
59
- end
57
+ # Always use incremental_export. There is some difference between incremental_export and export.
58
+ incremental_export("/api/v2/incremental/#{target}.json", target, start_time, [], partial, &block)
60
59
  end
61
60
  end
62
61
 
@@ -64,7 +63,7 @@ module Embulk
64
63
  %w(ticket_events).each do |target|
65
64
  define_method(target) do |partial = true, start_time = 0, &block|
66
65
  path = "/api/v2/incremental/#{target}"
67
- incremental_export(path, target, start_time, [], &block)
66
+ incremental_export(path, target, start_time, [], partial, &block)
68
67
  end
69
68
  end
70
69
 
@@ -109,18 +108,21 @@ module Embulk
109
108
  nil # this is necessary different with incremental_export
110
109
  end
111
110
 
112
- def incremental_export(path, key, start_time = 0, known_ids = [], &block)
113
- # for `embulk run` to fetch all records.
114
- response = request(path, start_time: start_time)
115
-
116
- begin
117
- data = JSON.parse(response.body)
118
- rescue => e
119
- raise Embulk::DataError.new(e)
111
+ def incremental_export(path, key, start_time = 0, known_ids = [], partial = true, &block)
112
+ if partial
113
+ records = request_partial(path, {start_time: start_time}).first(5)
114
+ else
115
+ response = request(path, {start_time: start_time})
116
+ begin
117
+ data = JSON.parse(response.body)
118
+ rescue => e
119
+ raise Embulk::DataError.new(e)
120
+ end
121
+ Embulk.logger.debug "start_time:#{start_time} (#{Time.at(start_time)}) count:#{data["count"]} next_page:#{data["next_page"]} end_time:#{data["end_time"]} "
122
+ records = data[key]
120
123
  end
121
124
 
122
- Embulk.logger.debug "start_time:#{start_time} (#{Time.at(start_time)}) count:#{data["count"]} next_page:#{data["next_page"]} end_time:#{data["end_time"]} "
123
- data[key].each do |record|
125
+ records.each do |record|
124
126
  # de-duplicated records.
125
127
  # https://developer.zendesk.com/rest_api/docs/core/incremental_export#usage-notes
126
128
  # https://github.com/zendesk/zendesk_api_client_rb/issues/251
@@ -129,12 +131,13 @@ module Embulk
129
131
  known_ids << record["id"]
130
132
  block.call record
131
133
  end
134
+ return if partial
132
135
 
133
136
  # NOTE: If count is less than 1000, then stop paginating.
134
137
  # Otherwise, use the next_page URL to get the next page of results.
135
138
  # https://developer.zendesk.com/rest_api/docs/core/incremental_export#pagination
136
139
  if data["count"] == 1000
137
- incremental_export(path, key, data["end_time"], known_ids, &block)
140
+ incremental_export(path, key, data["end_time"], known_ids, partial, &block)
138
141
  else
139
142
  data
140
143
  end
@@ -175,32 +178,66 @@ module Embulk
175
178
  Embulk.logger.debug "Fetching #{u.to_s}"
176
179
  response = httpclient.get(u.to_s, query, follow_redirect: true)
177
180
 
178
- # https://developer.zendesk.com/rest_api/docs/core/introduction#response-format
179
- status_code = response.status
180
- case status_code
181
- when 200, 404
182
- # 404 would be returned e.g. ticket comments are empty (on fetch_subresource method)
183
- response
184
- when 400, 401
185
- raise Embulk::ConfigError.new("[#{status_code}] #{response.body}")
186
- when 409
187
- raise "[#{status_code}] temporally failure."
188
- when 429
189
- # rate limit
190
- retry_after = response.headers["Retry-After"]
191
- wait_rate_limit(retry_after.to_i)
192
- when 500, 503
193
- # 503 is possible rate limit
194
- retry_after = response.headers["Retry-After"]
195
- if retry_after
196
- wait_rate_limit(retry_after.to_i)
197
- else
198
- raise "[#{status_code}] temporally failure."
181
+ handle_response(response.status, response.headers, response.body)
182
+ response
183
+ end
184
+ end
185
+
186
+ def request_partial(path, query = {})
187
+ # NOTE: This is a dirty hack for quick response using incremental_export API.
188
+ # Disconnect socket when received PARTIAL_RECORDS_BYTE_SIZE bytes,
189
+ # And extract valid JSONs from received bytes (extract_valid_json_from_chunk method)
190
+ u = URI.parse(config[:login_url])
191
+ u.path = path
192
+
193
+ retryer.with_retry do
194
+ Embulk.logger.debug "Fetching #{u.to_s}"
195
+ buf = ""
196
+ auth_retry = 0
197
+ httpclient.get(u.to_s, query, follow_redirect: true) do |message, chunk|
198
+ if message.status == 401
199
+ # First request will fail by 401 because not included credentials.
200
+ # HTTPClient will retry request with credentials.
201
+ if auth_retry.zero?
202
+ auth_retry += 1
203
+ next
204
+ end
205
+ end
206
+ handle_response(message.status, message.headers, chunk)
207
+
208
+ buf << chunk
209
+ break if buf.bytesize > PARTIAL_RECORDS_BYTE_SIZE
210
+ end
211
+ extract_valid_json_from_chunk(buf).map do |json|
212
+ JSON.parse(json)
213
+ end
214
+ end
215
+ end
216
+
217
+ def extract_valid_json_from_chunk(chunk)
218
+ # Drip JSON objects from incomplete string
219
+ #
220
+ # e.g.:
221
+ # chunk = '{"ticket_events":[{"foo":1},{"foo":2},{"fo'
222
+ # extract_valid_json_from_chunk(chunk) #=> ['{"foo":1}', '{"foo":2}']
223
+ result = []
224
+
225
+ # omit '{"tickets":[' prefix. See test/fixtures/tickets.json for actual response.
226
+ s = StringScanner.new(chunk.scrub.gsub(%r!^{".*?":\[!,""))
227
+ while !s.eos?
228
+ opener = s.scan(/{/)
229
+ break unless opener
230
+ buf = opener # Initialize `buf` as "{"
231
+ while content = s.scan(/.*?}/) # grab data from start to next "}"
232
+ buf << content
233
+ if (JSON.parse(buf) rescue false) # if JSON.parse success, `buf` is valid JSON. we'll take it.
234
+ result << buf.dup
235
+ break
199
236
  end
200
- else
201
- raise "Server returns unknown status code (#{status_code})"
202
237
  end
238
+ s.scan(/[^{]*/) # skip until next "{". `chunk` has comma separeted objects like '},{'. skip that comma.
203
239
  end
240
+ result
204
241
  end
205
242
 
206
243
  def wait_rate_limit(retry_after)
@@ -209,6 +246,31 @@ module Embulk
209
246
  throw :retry
210
247
  end
211
248
 
249
+ def handle_response(status_code, headers, body)
250
+ # https://developer.zendesk.com/rest_api/docs/core/introduction#response-format
251
+ case status_code
252
+ when 200, 404
253
+ # 404 would be returned e.g. ticket comments are empty (on fetch_subresource method)
254
+ when 400, 401
255
+ raise Embulk::ConfigError.new("[#{status_code}] #{body}")
256
+ when 409
257
+ raise "[#{status_code}] temporally failure."
258
+ when 429
259
+ # rate limit
260
+ retry_after = headers["Retry-After"]
261
+ wait_rate_limit(retry_after.to_i)
262
+ when 500, 503
263
+ # 503 is possible rate limit
264
+ retry_after = headers["Retry-After"]
265
+ if retry_after
266
+ wait_rate_limit(retry_after.to_i)
267
+ else
268
+ raise "[#{status_code}] temporally failure."
269
+ end
270
+ else
271
+ raise "Server returns unknown status code (#{status_code})"
272
+ end
273
+ end
212
274
  end
213
275
  end
214
276
  end
@@ -96,7 +96,7 @@ module Embulk
96
96
  def run
97
97
  method = task[:target]
98
98
  args = [preview?]
99
- if !preview? && @start_time
99
+ if @start_time
100
100
  args << @start_time.to_i
101
101
  end
102
102
 
@@ -104,6 +104,7 @@ module Embulk
104
104
  record = fetch_related_object(record)
105
105
  values = extract_values(record)
106
106
  page_builder.add(values)
107
+ break if preview? # NOTE: preview take care only 1 record. subresources fetching is slow.
107
108
  end
108
109
  page_builder.finish
109
110
 
@@ -123,12 +124,7 @@ module Embulk
123
124
 
124
125
  def fetch_related_object(record)
125
126
  (task[:includes] || []).each do |ent|
126
- if preview?
127
- # Fetching subresource consume ~2 sec for each record. it is too long to preview. so the dummy value used.
128
- record[ent] = [{dummy: "(#{ent}) dummy value for preview"}]
129
- else
130
- record[ent] = client.fetch_subresource(record["id"], task[:target], ent)
131
- end
127
+ record[ent] = client.fetch_subresource(record["id"], task[:target], ent)
132
128
  end
133
129
  record
134
130
  end
@@ -47,19 +47,6 @@ module Embulk
47
47
  end
48
48
  client.tickets(&handler)
49
49
  end
50
-
51
- test "raise DataError when invalid JSON response" do
52
- @httpclient.test_loopback_http_response << [
53
- "HTTP/1.1 200",
54
- "Content-Type: application/json",
55
- "",
56
- "[[[" # invalid json
57
- ].join("\r\n")
58
-
59
- assert_raise(DataError) do
60
- client.tickets
61
- end
62
- end
63
50
  end
64
51
 
65
52
  sub_test_case "all" do
@@ -175,12 +162,12 @@ module Embulk
175
162
 
176
163
  sub_test_case "ticket_events" do
177
164
  test "invoke incremental_export when partial=true" do
178
- mock(client).incremental_export(anything, "ticket_events", anything, [])
165
+ mock(client).incremental_export(anything, "ticket_events", anything, [], true)
179
166
  client.ticket_events(true)
180
167
  end
181
168
 
182
169
  test "invoke incremental_export when partial=false" do
183
- mock(client).incremental_export(anything, "ticket_events", anything, [])
170
+ mock(client).incremental_export(anything, "ticket_events", anything, [], false)
184
171
  client.ticket_events(false)
185
172
  end
186
173
  end
@@ -364,13 +351,6 @@ module Embulk
364
351
  end
365
352
  end
366
353
 
367
- test "401" do
368
- stub_response(401)
369
- assert_raise(ConfigError) do
370
- client.tickets(&proc{})
371
- end
372
- end
373
-
374
354
  test "409" do
375
355
  stub_response(409)
376
356
  assert_raise(StandardError) do
@@ -444,6 +424,27 @@ module Embulk
444
424
  end
445
425
  end
446
426
 
427
+ sub_test_case ".extract_valid_json_from_chunk" do
428
+ setup do
429
+ @client = Client.new({target: "tickets"})
430
+ end
431
+
432
+ test "complete json" do
433
+ actual = @client.send(:extract_valid_json_from_chunk, '{"tickets":[{"foo":1},{"foo":2}]}')
434
+ assert_equal ['{"foo":1}', '{"foo":2}'], actual
435
+ end
436
+
437
+ test "broken json" do
438
+ json = '{"ticket_events":[{"foo":1},{"foo":2},{"fo'
439
+ actual = @client.send(:extract_valid_json_from_chunk, json)
440
+ expected = [
441
+ '{"foo":1}',
442
+ '{"foo":2}',
443
+ ]
444
+ assert_equal expected, actual
445
+ end
446
+ end
447
+
447
448
  def login_url
448
449
  "http://example.com"
449
450
  end
@@ -106,11 +106,7 @@ module Embulk
106
106
  "HTTP/1.1 200",
107
107
  "Content-Type: application/json",
108
108
  "",
109
- {
110
- tickets: [
111
- JSON.parse(fixture_load("tickets.json"))
112
- ]
113
- }.to_json
109
+ JSON.parse(fixture_load("tickets.json")).to_json
114
110
  ].join("\r\n")
115
111
  mock(@client).validate_config
116
112
  Plugin.guess(config)["columns"]
@@ -121,11 +117,7 @@ module Embulk
121
117
  "HTTP/1.1 200",
122
118
  "Content-Type: application/json",
123
119
  "",
124
- {
125
- tickets: [
126
- JSON.parse(fixture_load("tickets.json"))
127
- ]
128
- }.to_json
120
+ JSON.parse(fixture_load("tickets.json")).to_json
129
121
  ].join("\r\n")
130
122
  actual = Plugin.guess(config)["columns"]
131
123
  assert actual.include?(name: "url", type: :string)
@@ -241,7 +233,7 @@ module Embulk
241
233
 
242
234
  test "call fetch_subresource" do
243
235
  includes.each do |ent|
244
- mock(@client).fetch_subresource(anything, anything, ent).never
236
+ mock(@client).fetch_subresource(anything, anything, ent)
245
237
  end
246
238
  @plugin.run
247
239
  end
@@ -326,8 +318,8 @@ module Embulk
326
318
  end
327
319
 
328
320
  test "call tickets method instead of ticket_all" do
329
- mock(@client).export(anything, "tickets", anything) { [] }
330
- mock(@client).incremental_export.never
321
+ mock(@client).export.never
322
+ mock(@client).incremental_export(anything, "tickets", anything, anything, anything) { [] }
331
323
  mock(page_builder).finish
332
324
 
333
325
  @plugin.run
@@ -348,9 +340,10 @@ module Embulk
348
340
  }.to_json
349
341
  ].join("\r\n")
350
342
 
351
- tickets.each do |ticket|
352
- mock(page_builder).add([ticket["id"], ticket["tags"]])
353
- end
343
+ first_ticket = tickets[0]
344
+ second_ticket = tickets[1]
345
+ mock(page_builder).add([first_ticket["id"], first_ticket["tags"]])
346
+ mock(page_builder).add([second_ticket["id"], second_ticket["tags"]]).never
354
347
  mock(page_builder).finish
355
348
 
356
349
  @plugin.run
@@ -364,7 +357,7 @@ module Embulk
364
357
 
365
358
  test "call ticket_all method instead of tickets" do
366
359
  mock(@client).export.never
367
- mock(@client).incremental_export(anything, "tickets", 0, []) { [] }
360
+ mock(@client).incremental_export(anything, "tickets", 0, [], false) { [] }
368
361
  mock(page_builder).finish
369
362
 
370
363
  @plugin.run
@@ -1,4 +1,4 @@
1
- {
1
+ {"tickets":[{
2
2
  "id": 35436,
3
3
  "url": "https://company.zendesk.com/api/v2/tickets/35436.json",
4
4
  "external_id": "ahg35h3jh",
@@ -6,7 +6,7 @@
6
6
  "updated_at": "2011-05-05T10:38:52Z",
7
7
  "type": "incident",
8
8
  "subject": "Help, my printer is on fire!",
9
- "raw_subject": "{{dc.printer_on_fire}}",
9
+ "raw_subject": "{{dc.printer_on_fire}} }{",
10
10
  "description": "The fire is very colorful.",
11
11
  "priority": "high",
12
12
  "status": "open",
@@ -41,4 +41,4 @@
41
41
  "comment": "Great support!"
42
42
  },
43
43
  "sharing_agreement_ids": [84432]
44
- }
44
+ }]}
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-input-zendesk
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.3
4
+ version: 0.1.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - uu59
@@ -10,7 +10,7 @@ authors:
10
10
  autorequire:
11
11
  bindir: bin
12
12
  cert_chain: []
13
- date: 2016-03-15 00:00:00.000000000 Z
13
+ date: 2016-04-08 00:00:00.000000000 Z
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
16
16
  requirement: !ruby/object:Gem::Requirement