embulk-input-zendesk 0.1.6 → 0.1.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 1c154709e56d9966d5bfb53da83c905151afb506
4
- data.tar.gz: 744915c536f9fe8c6fa1fd20fd28a11bc57dee69
3
+ metadata.gz: 73115292227dc5474093e998c1025f4a0fa55c22
4
+ data.tar.gz: 23cd509f73bfa2c8ccff24478b258295c32c8806
5
5
  SHA512:
6
- metadata.gz: fcdf6b6607ef344697eb8ea12e1669752821c853dfaf2f5cb675777664ad78e521223a68be813ac202bd884a398f9d194e9fbc1eaec9a9c0b01cb38e2948336d
7
- data.tar.gz: b1bd6a19d263d48efdf9eb4047479dbce78e204e9d0304628d1fcc7e5670a01b3dbcdea20c9e5a8774c7068626c1b441185333a5c51b5986375bad188933f88c
6
+ metadata.gz: 6364750da3c79b7dc8a8e9507887884d3cda61445ad8f775b7bffbfb764f07a49938621380060c323c43a1a806b4ffc20f1d69a3e8e045ec7c12aa0e8d1c84a3
7
+ data.tar.gz: 93ae137dacd61c9b56f323916f6993a6f7b077231646227e85003c85d1966ee8b66c1ab99cf87c922ccc55f021ff5edeb8f136aad5039448a6d99fc5af98961d
data/CHANGELOG.md CHANGED
@@ -1,3 +1,6 @@
1
+ ## 0.1.7 - 2016-06-04
2
+ * [enhancement] Improvements for non incremental export [#12](https://github.com/treasure-data/embulk-input-zendesk/pull/12)
3
+
1
4
  ## 0.1.6 - 2016-05-09
2
5
  * [fixed] Fix non-incremental export to fetch all records [#11](https://github.com/treasure-data/embulk-input-zendesk/pull/11)
3
6
 
@@ -1,7 +1,7 @@
1
1
 
2
2
  Gem::Specification.new do |spec|
3
3
  spec.name = "embulk-input-zendesk"
4
- spec.version = "0.1.6"
4
+ spec.version = "0.1.7"
5
5
  spec.authors = ["uu59", "muga", "sakama"]
6
6
  spec.summary = "Zendesk input plugin for Embulk"
7
7
  spec.description = "Loads records from Zendesk."
@@ -1,4 +1,5 @@
1
1
  require "strscan"
2
+ require "thread"
2
3
  require "httpclient"
3
4
 
4
5
  module Embulk
@@ -71,11 +72,16 @@ module Embulk
71
72
  UNAVAILABLE_INCREMENTAL_EXPORT.each do |target|
72
73
  define_method(target) do |partial = true, start_time = 0, &block|
73
74
  path = "/api/v2/#{target}.json"
74
- export(path, target, partial, &block)
75
+ if partial
76
+ export(path, target, &block)
77
+ else
78
+ export_parallel(path, target, &block)
79
+ end
75
80
  end
76
81
  end
77
82
 
78
83
  def fetch_subresource(record_id, base, target)
84
+ Embulk.logger.info "Fetching subresource #{target} of #{base}:#{record_id}"
79
85
  response = request("/api/v2/#{base}/#{record_id}/#{target}.json")
80
86
  return [] if response.status == 404
81
87
 
@@ -89,9 +95,56 @@ module Embulk
89
95
 
90
96
  private
91
97
 
92
- def export(path, key, partial, page = 1, known_ids = [], &block)
93
- per_page = partial ? PARTIAL_RECORDS_SIZE : 100 # 100 is maximum https://developer.zendesk.com/rest_api/docs/core/introduction#pagination
94
- Embulk.logger.debug("#{path} with page=#{page}" + (partial ? " (partial)" : ""))
98
+ def export_parallel(path, key, workers = 5, &block)
99
+ per_page = 100 # 100 is maximum https://developer.zendesk.com/rest_api/docs/core/introduction#pagination
100
+ first_response = request(path, per_page: per_page, page: 1)
101
+ first_fetched = JSON.parse(first_response.body)
102
+ total_count = first_fetched["count"]
103
+ last_page_num = (total_count / per_page.to_f).ceil
104
+ Embulk.logger.info "#{key} records=#{total_count} last_page=#{last_page_num}"
105
+
106
+ queue = Queue.new
107
+ (2..last_page_num).each do |page|
108
+ queue << page
109
+ end
110
+ records = first_fetched[key]
111
+
112
+ mutex = Mutex.new
113
+ threads = workers.times.map do |n|
114
+ Thread.start do
115
+ loop do
116
+ break if queue.empty?
117
+ current_page = nil
118
+
119
+ begin
120
+ Timeout.timeout(0.1) do
121
+ # Somehow queue.pop(true) blocks... timeout is workaround for that
122
+ current_page = queue.pop(true)
123
+ end
124
+ rescue Timeout::Error, ThreadError => e
125
+ break #=> ThreadError: queue empty
126
+ end
127
+
128
+ response = request(path, per_page: per_page, page: current_page)
129
+ fetched_records = extract_records_from_response(response, key)
130
+ mutex.synchronize do
131
+ Embulk.logger.info "Fetched #{key} on page=#{current_page}"
132
+ records.concat fetched_records
133
+ end
134
+ end
135
+ end
136
+ end
137
+ threads.each(&:join)
138
+
139
+ records.uniq {|r| r["id"]}.each do |record|
140
+ block.call record
141
+ end
142
+ nil # this is necessary different with incremental_export
143
+ end
144
+
145
+ def export(path, key, page = 1, &block)
146
+ per_page = PARTIAL_RECORDS_SIZE
147
+ Embulk.logger.info("Fetching #{path} with page=#{page} (partial)")
95
148
 
96
149
  response = request(path, per_page: per_page, page: page)
97
150
 
@@ -102,18 +155,8 @@ module Embulk
102
155
  end
103
156
 
104
157
  data[key].each do |record|
105
- next if known_ids.include?(record["id"])
106
- known_ids << record["id"]
107
-
108
158
  block.call record
109
159
  end
110
- return if partial
111
-
112
- if data["next_page"]
113
- return export(path, key, partial, page + 1, &block)
114
- end
115
-
116
- nil # this is necessary different with incremental_export
117
160
  end
118
161
 
119
162
  def incremental_export(path, key, start_time = 0, known_ids = [], partial = true, &block)
@@ -126,7 +169,7 @@ module Embulk
126
169
  rescue => e
127
170
  raise Embulk::DataError.new(e)
128
171
  end
129
- Embulk.logger.debug "start_time:#{start_time} (#{Time.at(start_time)}) count:#{data["count"]} next_page:#{data["next_page"]} end_time:#{data["end_time"]} "
172
+ Embulk.logger.info "Fetched records from #{start_time} (#{Time.at(start_time)})"
130
173
  records = data[key]
131
174
  end
132
175
 
@@ -151,6 +194,15 @@ module Embulk
151
194
  end
152
195
  end
153
196
 
197
+ def extract_records_from_response(response, key)
198
+ begin
199
+ data = JSON.parse(response.body)
200
+ data[key]
201
+ rescue => e
202
+ raise Embulk::DataError.new(e)
203
+ end
204
+ end
205
+
154
206
  def retryer
155
207
  PerfectRetry.new do |config|
156
208
  config.limit = @config[:retry_limit]
@@ -82,7 +82,7 @@ module Embulk
82
82
  access_token: config.param("access_token", :string, default: nil),
83
83
  start_time: config.param("start_time", :string, default: nil),
84
84
  retry_limit: config.param("retry_limit", :integer, default: 5),
85
- retry_initial_wait_sec: config.param("retry_initial_wait_sec", :integer, default: 1),
85
+ retry_initial_wait_sec: config.param("retry_initial_wait_sec", :integer, default: 4),
86
86
  incremental: config.param("incremental", :bool, default: true),
87
87
  schema: config.param(:columns, :array, default: []),
88
88
  includes: config.param(:includes, :array, default: []),
@@ -97,12 +97,9 @@ module Embulk
97
97
  end
98
98
 
99
99
  test "fetch ticket_metrics all page" do
100
- records = [
101
- {"id" => 1},
102
- {"id" => 2},
103
- ]
100
+ records = 100.times.map{|n| {"id"=> n}}
104
101
  second_results = [
105
- {"id" => 3}
102
+ {"id" => 101}
106
103
  ]
107
104
  @httpclient.test_loopback_http_response << [
108
105
  "HTTP/1.1 200",
@@ -110,6 +107,7 @@ module Embulk
110
107
  "",
111
108
  {
112
109
  ticket_metrics: records,
110
+ count: records.length + second_results.length,
113
111
  next_page: "https://treasuredata.zendesk.com/api/v2/ticket_metrics.json?page=2",
114
112
  }.to_json
115
113
  ].join("\r\n")
@@ -120,6 +118,7 @@ module Embulk
120
118
  "",
121
119
  {
122
120
  ticket_metrics: second_results,
121
+ count: records.length + second_results.length,
123
122
  next_page: nil,
124
123
  }.to_json
125
124
  ].join("\r\n")
@@ -146,7 +145,8 @@ module Embulk
146
145
  "Content-Type: application/json",
147
146
  "",
148
147
  {
149
- ticket_metrics: records
148
+ ticket_metrics: records,
149
+ count: records.length,
150
150
  }.to_json
151
151
  ].join("\r\n")
152
152
 
@@ -163,7 +163,8 @@ module Embulk
163
163
  "Content-Type: application/json",
164
164
  "",
165
165
  {
166
- ticket_metrics: [{"id" => 1}],
166
+ ticket_metrics: 100.times.map{|n| {"id" => n}},
167
+ count: 101,
167
168
  next_page: "https://treasuredata.zendesk.com/api/v2/ticket_metrics.json?page=2",
168
169
  }.to_json
169
170
  ].join("\r\n")
@@ -173,8 +174,8 @@ module Embulk
173
174
  "Content-Type: application/json",
174
175
  "",
175
176
  {
176
- ticket_metrics: [{"id" => 2}],
177
- count: 2,
177
+ ticket_metrics: [{"id" => 101}],
178
+ count: 101,
178
179
  }.to_json
179
180
  ].join("\r\n")
180
181
 
@@ -182,7 +183,7 @@ module Embulk
182
183
  @httpclient.test_loopback_http_response << response_2
183
184
 
184
185
  handler = proc { }
185
- mock(handler).call(anything).twice
186
+ mock(handler).call(anything).times(101)
186
187
  client.ticket_metrics(false, &handler)
187
188
  end
188
189
 
@@ -226,24 +227,24 @@ module Embulk
226
227
 
227
228
  sub_test_case "ticket_fields" do
228
229
  test "invoke export when partial=true" do
229
- mock(client).export(anything, "ticket_fields", anything)
230
+ mock(client).export(anything, "ticket_fields")
230
231
  client.ticket_fields(true)
231
232
  end
232
233
 
233
234
  test "invoke export when partial=false" do
234
- mock(client).export(anything, "ticket_fields", anything)
235
+ mock(client).export_parallel(anything, "ticket_fields")
235
236
  client.ticket_fields(false)
236
237
  end
237
238
  end
238
239
 
239
240
  sub_test_case "ticket_forms" do
240
241
  test "invoke export when partial=true" do
241
- mock(client).export(anything, "ticket_forms", anything)
242
+ mock(client).export(anything, "ticket_forms")
242
243
  client.ticket_forms(true)
243
244
  end
244
245
 
245
246
  test "invoke export when partial=false" do
246
- mock(client).export(anything, "ticket_forms", anything)
247
+ mock(client).export_parallel(anything, "ticket_forms")
247
248
  client.ticket_forms(false)
248
249
  end
249
250
  end
@@ -353,6 +353,7 @@ module Embulk
353
353
  sub_test_case "run" do
354
354
  setup do
355
355
  stub(@plugin).preview? { false }
356
+ stub(Embulk).logger { Logger.new(File::NULL) }
356
357
  end
357
358
 
358
359
  test "call ticket_all method instead of tickets" do
@@ -416,6 +417,7 @@ module Embulk
416
417
  ].join("\r\n")
417
418
  stub(page_builder).add(anything)
418
419
  stub(page_builder).finish
420
+ stub(Embulk).logger { Logger.new(File::NULL) }
419
421
  end
420
422
 
421
423
  sub_test_case "incremental: true" do
@@ -443,6 +445,7 @@ module Embulk
443
445
 
444
446
  sub_test_case "casting value" do
445
447
  setup do
448
+ stub(Embulk).logger { Logger.new(File::NULL) }
446
449
  stub(@plugin).preview? { false }
447
450
  @httpclient.test_loopback_http_response << [
448
451
  "HTTP/1.1 200",
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-input-zendesk
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.6
4
+ version: 0.1.7
5
5
  platform: ruby
6
6
  authors:
7
7
  - uu59
@@ -10,7 +10,7 @@ authors:
10
10
  autorequire:
11
11
  bindir: bin
12
12
  cert_chain: []
13
- date: 2016-05-09 00:00:00.000000000 Z
13
+ date: 2016-06-03 00:00:00.000000000 Z
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
16
16
  requirement: !ruby/object:Gem::Requirement