embulk-input-zendesk 0.1.6 → 0.1.7

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 1c154709e56d9966d5bfb53da83c905151afb506
4
- data.tar.gz: 744915c536f9fe8c6fa1fd20fd28a11bc57dee69
3
+ metadata.gz: 73115292227dc5474093e998c1025f4a0fa55c22
4
+ data.tar.gz: 23cd509f73bfa2c8ccff24478b258295c32c8806
5
5
  SHA512:
6
- metadata.gz: fcdf6b6607ef344697eb8ea12e1669752821c853dfaf2f5cb675777664ad78e521223a68be813ac202bd884a398f9d194e9fbc1eaec9a9c0b01cb38e2948336d
7
- data.tar.gz: b1bd6a19d263d48efdf9eb4047479dbce78e204e9d0304628d1fcc7e5670a01b3dbcdea20c9e5a8774c7068626c1b441185333a5c51b5986375bad188933f88c
6
+ metadata.gz: 6364750da3c79b7dc8a8e9507887884d3cda61445ad8f775b7bffbfb764f07a49938621380060c323c43a1a806b4ffc20f1d69a3e8e045ec7c12aa0e8d1c84a3
7
+ data.tar.gz: 93ae137dacd61c9b56f323916f6993a6f7b077231646227e85003c85d1966ee8b66c1ab99cf87c922ccc55f021ff5edeb8f136aad5039448a6d99fc5af98961d
data/CHANGELOG.md CHANGED
@@ -1,3 +1,6 @@
1
+ ## 0.1.7 - 2016-06-04
2
+ * [enhancement] Improvements for non incremental export [#12](https://github.com/treasure-data/embulk-input-zendesk/pull/12)
3
+
1
4
  ## 0.1.6 - 2016-05-09
2
5
  * [fixed] Fix non-incremental export to fetch all records [#11](https://github.com/treasure-data/embulk-input-zendesk/pull/11)
3
6
 
@@ -1,7 +1,7 @@
1
1
 
2
2
  Gem::Specification.new do |spec|
3
3
  spec.name = "embulk-input-zendesk"
4
- spec.version = "0.1.6"
4
+ spec.version = "0.1.7"
5
5
  spec.authors = ["uu59", "muga", "sakama"]
6
6
  spec.summary = "Zendesk input plugin for Embulk"
7
7
  spec.description = "Loads records from Zendesk."
@@ -1,4 +1,5 @@
1
1
  require "strscan"
2
+ require "thread"
2
3
  require "httpclient"
3
4
 
4
5
  module Embulk
@@ -71,11 +72,16 @@ module Embulk
71
72
  UNAVAILABLE_INCREMENTAL_EXPORT.each do |target|
72
73
  define_method(target) do |partial = true, start_time = 0, &block|
73
74
  path = "/api/v2/#{target}.json"
74
- export(path, target, partial, &block)
75
+ if partial
76
+ export(path, target, &block)
77
+ else
78
+ export_parallel(path, target, &block)
79
+ end
75
80
  end
76
81
  end
77
82
 
78
83
  def fetch_subresource(record_id, base, target)
84
+ Embulk.logger.info "Fetching subresource #{target} of #{base}:#{record_id}"
79
85
  response = request("/api/v2/#{base}/#{record_id}/#{target}.json")
80
86
  return [] if response.status == 404
81
87
 
@@ -89,9 +95,56 @@ module Embulk
89
95
 
90
96
  private
91
97
 
92
- def export(path, key, partial, page = 1, known_ids = [], &block)
93
- per_page = partial ? PARTIAL_RECORDS_SIZE : 100 # 100 is maximum https://developer.zendesk.com/rest_api/docs/core/introduction#pagination
94
- Embulk.logger.debug("#{path} with page=#{page}" + (partial ? " (partial)" : ""))
98
+ def export_parallel(path, key, workers = 5, &block)
99
+ per_page = 100 # 100 is maximum https://developer.zendesk.com/rest_api/docs/core/introduction#pagination
100
+ first_response = request(path, per_page: per_page, page: 1)
101
+ first_fetched = JSON.parse(first_response.body)
102
+ total_count = first_fetched["count"]
103
+ last_page_num = (total_count / per_page.to_f).ceil
104
+ Embulk.logger.info "#{key} records=#{total_count} last_page=#{last_page_num}"
105
+
106
+ queue = Queue.new
107
+ (2..last_page_num).each do |page|
108
+ queue << page
109
+ end
110
+ records = first_fetched[key]
111
+
112
+ mutex = Mutex.new
113
+ threads = workers.times.map do |n|
114
+ Thread.start do
115
+ loop do
116
+ break if queue.empty?
117
+ current_page = nil
118
+
119
+ begin
120
+ Timeout.timeout(0.1) do
121
+ # Somehow queue.pop(true) blocks... timeout is workaround for that
122
+ current_page = queue.pop(true)
123
+ end
124
+ rescue Timeout::Error, ThreadError => e
125
+ break #=> ThreadError: queue empty
126
+ end
127
+
128
+ response = request(path, per_page: per_page, page: current_page)
129
+ fetched_records = extract_records_from_response(response, key)
130
+ mutex.synchronize do
131
+ Embulk.logger.info "Fetched #{key} on page=#{current_page}"
132
+ records.concat fetched_records
133
+ end
134
+ end
135
+ end
136
+ end
137
+ threads.each(&:join)
138
+
139
+ records.uniq {|r| r["id"]}.each do |record|
140
+ block.call record
141
+ end
142
+ nil # this is necessary different with incremental_export
143
+ end
144
+
145
+ def export(path, key, page = 1, &block)
146
+ per_page = PARTIAL_RECORDS_SIZE
147
+ Embulk.logger.info("Fetching #{path} with page=#{page} (partial)")
95
148
 
96
149
  response = request(path, per_page: per_page, page: page)
97
150
 
@@ -102,18 +155,8 @@ module Embulk
102
155
  end
103
156
 
104
157
  data[key].each do |record|
105
- next if known_ids.include?(record["id"])
106
- known_ids << record["id"]
107
-
108
158
  block.call record
109
159
  end
110
- return if partial
111
-
112
- if data["next_page"]
113
- return export(path, key, partial, page + 1, &block)
114
- end
115
-
116
- nil # this is necessary different with incremental_export
117
160
  end
118
161
 
119
162
  def incremental_export(path, key, start_time = 0, known_ids = [], partial = true, &block)
@@ -126,7 +169,7 @@ module Embulk
126
169
  rescue => e
127
170
  raise Embulk::DataError.new(e)
128
171
  end
129
- Embulk.logger.debug "start_time:#{start_time} (#{Time.at(start_time)}) count:#{data["count"]} next_page:#{data["next_page"]} end_time:#{data["end_time"]} "
172
+ Embulk.logger.info "Fetched records from #{start_time} (#{Time.at(start_time)})"
130
173
  records = data[key]
131
174
  end
132
175
 
@@ -151,6 +194,15 @@ module Embulk
151
194
  end
152
195
  end
153
196
 
197
+ def extract_records_from_response(response, key)
198
+ begin
199
+ data = JSON.parse(response.body)
200
+ data[key]
201
+ rescue => e
202
+ raise Embulk::DataError.new(e)
203
+ end
204
+ end
205
+
154
206
  def retryer
155
207
  PerfectRetry.new do |config|
156
208
  config.limit = @config[:retry_limit]
@@ -82,7 +82,7 @@ module Embulk
82
82
  access_token: config.param("access_token", :string, default: nil),
83
83
  start_time: config.param("start_time", :string, default: nil),
84
84
  retry_limit: config.param("retry_limit", :integer, default: 5),
85
- retry_initial_wait_sec: config.param("retry_initial_wait_sec", :integer, default: 1),
85
+ retry_initial_wait_sec: config.param("retry_initial_wait_sec", :integer, default: 4),
86
86
  incremental: config.param("incremental", :bool, default: true),
87
87
  schema: config.param(:columns, :array, default: []),
88
88
  includes: config.param(:includes, :array, default: []),
@@ -97,12 +97,9 @@ module Embulk
97
97
  end
98
98
 
99
99
  test "fetch ticket_metrics all page" do
100
- records = [
101
- {"id" => 1},
102
- {"id" => 2},
103
- ]
100
+ records = 100.times.map{|n| {"id"=> n}}
104
101
  second_results = [
105
- {"id" => 3}
102
+ {"id" => 101}
106
103
  ]
107
104
  @httpclient.test_loopback_http_response << [
108
105
  "HTTP/1.1 200",
@@ -110,6 +107,7 @@ module Embulk
110
107
  "",
111
108
  {
112
109
  ticket_metrics: records,
110
+ count: records.length + second_results.length,
113
111
  next_page: "https://treasuredata.zendesk.com/api/v2/ticket_metrics.json?page=2",
114
112
  }.to_json
115
113
  ].join("\r\n")
@@ -120,6 +118,7 @@ module Embulk
120
118
  "",
121
119
  {
122
120
  ticket_metrics: second_results,
121
+ count: records.length + second_results.length,
123
122
  next_page: nil,
124
123
  }.to_json
125
124
  ].join("\r\n")
@@ -146,7 +145,8 @@ module Embulk
146
145
  "Content-Type: application/json",
147
146
  "",
148
147
  {
149
- ticket_metrics: records
148
+ ticket_metrics: records,
149
+ count: records.length,
150
150
  }.to_json
151
151
  ].join("\r\n")
152
152
 
@@ -163,7 +163,8 @@ module Embulk
163
163
  "Content-Type: application/json",
164
164
  "",
165
165
  {
166
- ticket_metrics: [{"id" => 1}],
166
+ ticket_metrics: 100.times.map{|n| {"id" => n}},
167
+ count: 101,
167
168
  next_page: "https://treasuredata.zendesk.com/api/v2/ticket_metrics.json?page=2",
168
169
  }.to_json
169
170
  ].join("\r\n")
@@ -173,8 +174,8 @@ module Embulk
173
174
  "Content-Type: application/json",
174
175
  "",
175
176
  {
176
- ticket_metrics: [{"id" => 2}],
177
- count: 2,
177
+ ticket_metrics: [{"id" => 101}],
178
+ count: 101,
178
179
  }.to_json
179
180
  ].join("\r\n")
180
181
 
@@ -182,7 +183,7 @@ module Embulk
182
183
  @httpclient.test_loopback_http_response << response_2
183
184
 
184
185
  handler = proc { }
185
- mock(handler).call(anything).twice
186
+ mock(handler).call(anything).times(101)
186
187
  client.ticket_metrics(false, &handler)
187
188
  end
188
189
 
@@ -226,24 +227,24 @@ module Embulk
226
227
 
227
228
  sub_test_case "ticket_fields" do
228
229
  test "invoke export when partial=true" do
229
- mock(client).export(anything, "ticket_fields", anything)
230
+ mock(client).export(anything, "ticket_fields")
230
231
  client.ticket_fields(true)
231
232
  end
232
233
 
233
234
  test "invoke export when partial=false" do
234
- mock(client).export(anything, "ticket_fields", anything)
235
+ mock(client).export_parallel(anything, "ticket_fields")
235
236
  client.ticket_fields(false)
236
237
  end
237
238
  end
238
239
 
239
240
  sub_test_case "ticket_forms" do
240
241
  test "invoke export when partial=true" do
241
- mock(client).export(anything, "ticket_forms", anything)
242
+ mock(client).export(anything, "ticket_forms")
242
243
  client.ticket_forms(true)
243
244
  end
244
245
 
245
246
  test "invoke export when partial=false" do
246
- mock(client).export(anything, "ticket_forms", anything)
247
+ mock(client).export_parallel(anything, "ticket_forms")
247
248
  client.ticket_forms(false)
248
249
  end
249
250
  end
@@ -353,6 +353,7 @@ module Embulk
353
353
  sub_test_case "run" do
354
354
  setup do
355
355
  stub(@plugin).preview? { false }
356
+ stub(Embulk).logger { Logger.new(File::NULL) }
356
357
  end
357
358
 
358
359
  test "call ticket_all method instead of tickets" do
@@ -416,6 +417,7 @@ module Embulk
416
417
  ].join("\r\n")
417
418
  stub(page_builder).add(anything)
418
419
  stub(page_builder).finish
420
+ stub(Embulk).logger { Logger.new(File::NULL) }
419
421
  end
420
422
 
421
423
  sub_test_case "incremental: true" do
@@ -443,6 +445,7 @@ module Embulk
443
445
 
444
446
  sub_test_case "casting value" do
445
447
  setup do
448
+ stub(Embulk).logger { Logger.new(File::NULL) }
446
449
  stub(@plugin).preview? { false }
447
450
  @httpclient.test_loopback_http_response << [
448
451
  "HTTP/1.1 200",
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-input-zendesk
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.6
4
+ version: 0.1.7
5
5
  platform: ruby
6
6
  authors:
7
7
  - uu59
@@ -10,7 +10,7 @@ authors:
10
10
  autorequire:
11
11
  bindir: bin
12
12
  cert_chain: []
13
- date: 2016-05-09 00:00:00.000000000 Z
13
+ date: 2016-06-03 00:00:00.000000000 Z
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
16
16
  requirement: !ruby/object:Gem::Requirement