td-client 1.0.0-java

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. checksums.yaml +7 -0
  2. data/data/ca-bundle.crt +3448 -0
  3. data/lib/td-client.rb +1 -0
  4. data/lib/td/client.rb +606 -0
  5. data/lib/td/client/api.rb +707 -0
  6. data/lib/td/client/api/access_control.rb +74 -0
  7. data/lib/td/client/api/account.rb +45 -0
  8. data/lib/td/client/api/bulk_import.rb +184 -0
  9. data/lib/td/client/api/bulk_load.rb +172 -0
  10. data/lib/td/client/api/database.rb +50 -0
  11. data/lib/td/client/api/export.rb +38 -0
  12. data/lib/td/client/api/import.rb +38 -0
  13. data/lib/td/client/api/job.rb +390 -0
  14. data/lib/td/client/api/partial_delete.rb +27 -0
  15. data/lib/td/client/api/result.rb +46 -0
  16. data/lib/td/client/api/schedule.rb +120 -0
  17. data/lib/td/client/api/server_status.rb +21 -0
  18. data/lib/td/client/api/table.rb +132 -0
  19. data/lib/td/client/api/user.rb +134 -0
  20. data/lib/td/client/api_error.rb +37 -0
  21. data/lib/td/client/compat_gzip_reader.rb +22 -0
  22. data/lib/td/client/model.rb +816 -0
  23. data/lib/td/client/version.rb +5 -0
  24. data/lib/td/core_ext/openssl/ssl/sslcontext/set_params.rb +18 -0
  25. data/spec/spec_helper.rb +63 -0
  26. data/spec/td/client/access_control_api_spec.rb +37 -0
  27. data/spec/td/client/account_api_spec.rb +34 -0
  28. data/spec/td/client/api_error_spec.rb +77 -0
  29. data/spec/td/client/api_spec.rb +269 -0
  30. data/spec/td/client/api_ssl_connection_spec.rb +109 -0
  31. data/spec/td/client/bulk_import_spec.rb +199 -0
  32. data/spec/td/client/bulk_load_spec.rb +401 -0
  33. data/spec/td/client/db_api_spec.rb +123 -0
  34. data/spec/td/client/export_api_spec.rb +51 -0
  35. data/spec/td/client/import_api_spec.rb +148 -0
  36. data/spec/td/client/job_api_spec.rb +833 -0
  37. data/spec/td/client/model_job_spec.rb +136 -0
  38. data/spec/td/client/model_schedule_spec.rb +26 -0
  39. data/spec/td/client/model_schema_spec.rb +134 -0
  40. data/spec/td/client/partial_delete_api_spec.rb +58 -0
  41. data/spec/td/client/result_api_spec.rb +77 -0
  42. data/spec/td/client/sched_api_spec.rb +109 -0
  43. data/spec/td/client/server_status_api_spec.rb +25 -0
  44. data/spec/td/client/spec_resources.rb +99 -0
  45. data/spec/td/client/table_api_spec.rb +226 -0
  46. data/spec/td/client/user_api_spec.rb +118 -0
  47. data/spec/td/client_sched_spec.rb +79 -0
  48. data/spec/td/client_spec.rb +46 -0
  49. metadata +271 -0
@@ -0,0 +1,50 @@
1
+ class TreasureData::API
2
+ module Database
3
+
4
+ ####
5
+ ## Database API
6
+ ##
7
+
8
+ # @return [Array<String>] names as array
9
+ def list_databases
10
+ code, body, res = get("/v3/database/list")
11
+ if code != "200"
12
+ raise_error("List databases failed", res)
13
+ end
14
+ js = checked_json(body, %w[databases])
15
+ result = {}
16
+ js["databases"].each {|m|
17
+ name = m['name']
18
+ count = m['count']
19
+ created_at = m['created_at']
20
+ updated_at = m['updated_at']
21
+ permission = m['permission']
22
+ result[name] = [count, created_at, updated_at, nil, permission] # set nil to org for API compatibiilty
23
+ }
24
+ return result
25
+ end
26
+
27
+ # @param [String] db
28
+ # @return [true]
29
+ def delete_database(db)
30
+ code, body, res = post("/v3/database/delete/#{e db}")
31
+ if code != "200"
32
+ raise_error("Delete database failed", res)
33
+ end
34
+ return true
35
+ end
36
+
37
+ # @param [String] db
38
+ # @param [Hash] opts
39
+ # @return [true]
40
+ def create_database(db, opts={})
41
+ params = opts.dup
42
+ code, body, res = post("/v3/database/create/#{e db}", params)
43
+ if code != "200"
44
+ raise_error("Create database failed", res)
45
+ end
46
+ return true
47
+ end
48
+
49
+ end
50
+ end
@@ -0,0 +1,38 @@
1
+ class TreasureData::API
2
+ module Export
3
+
4
+ ####
5
+ ## Export API
6
+ ##
7
+
8
+ # => jobId:String
9
+ # @param [String] db
10
+ # @param [String] table
11
+ # @param [String] storage_type
12
+ # @param [Hash] opts
13
+ # @return [String] job_id
14
+ def export(db, table, storage_type, opts={})
15
+ params = opts.dup
16
+ params['storage_type'] = storage_type
17
+ code, body, res = post("/v3/export/run/#{e db}/#{e table}", params)
18
+ if code != "200"
19
+ raise_error("Export failed", res)
20
+ end
21
+ js = checked_json(body, %w[job_id])
22
+ return js['job_id'].to_s
23
+ end
24
+
25
+ # => jobId:String
26
+ # @param [String] target_job_id
27
+ # @param [Hash] opts
28
+ # @return [String] job_id
29
+ def result_export(target_job_id, opts={})
30
+ code, body, res = post("/v3/job/result_export/#{target_job_id}", opts)
31
+ if code != "200"
32
+ raise_error("Result Export failed", res)
33
+ end
34
+ js = checked_json(body, %w[job_id])
35
+ return js['job_id'].to_s
36
+ end
37
+ end
38
+ end
@@ -0,0 +1,38 @@
1
+ class TreasureData::API
2
+ module Import
3
+
4
+ ####
5
+ ## Import API
6
+ ##
7
+
8
+ # @param [String] db
9
+ # @param [String] table
10
+ # @param [String] format
11
+ # @param [String, StringIO] stream
12
+ # @param [Fixnum] size
13
+ # @param [String] unique_id
14
+ # @return [Float] elapsed time
15
+ def import(db, table, format, stream, size, unique_id=nil)
16
+ if unique_id
17
+ path = "/v3/table/import_with_id/#{e db}/#{e table}/#{unique_id}/#{format}"
18
+ else
19
+ path = "/v3/table/import/#{e db}/#{e table}/#{format}"
20
+ end
21
+ opts = {}
22
+ if @host == DEFAULT_ENDPOINT
23
+ opts[:host] = DEFAULT_IMPORT_ENDPOINT
24
+ elsif @host == TreasureData::API::OLD_ENDPOINT # backward compatibility
25
+ opts[:host] = 'api-import.treasure-data.com'
26
+ opts[:ssl] = false
27
+ end
28
+ code, body, res = put(path, stream, size, opts)
29
+ if code[0] != ?2
30
+ raise_error("Import failed", res)
31
+ end
32
+ js = checked_json(body, %w[])
33
+ time = js['elapsed_time'].to_f
34
+ return time
35
+ end
36
+
37
+ end
38
+ end
@@ -0,0 +1,390 @@
1
+ class TreasureData::API
2
+ module Job
3
+
4
+ ####
5
+ ## Job API
6
+ ##
7
+
8
+ # @param [Fixnum] from
9
+ # @param [Fixnum] to
10
+ # @param [String] status
11
+ # @param [Hash] conditions
12
+ # @return [Array]
13
+ def list_jobs(from=0, to=nil, status=nil, conditions=nil)
14
+ params = {}
15
+ params['from'] = from.to_s if from
16
+ params['to'] = to.to_s if to
17
+ params['status'] = status.to_s if status
18
+ params.merge!(conditions) if conditions
19
+ code, body, res = get("/v3/job/list", params)
20
+ if code != "200"
21
+ raise_error("List jobs failed", res)
22
+ end
23
+ js = checked_json(body, %w[jobs])
24
+ result = []
25
+ js['jobs'].each {|m|
26
+ job_id = m['job_id']
27
+ type = (m['type'] || '?').to_sym
28
+ database = m['database']
29
+ status = m['status']
30
+ query = m['query']
31
+ start_at = m['start_at']
32
+ end_at = m['end_at']
33
+ cpu_time = m['cpu_time']
34
+ result_size = m['result_size'] # compressed result size in msgpack.gz format
35
+ result_url = m['result']
36
+ priority = m['priority']
37
+ retry_limit = m['retry_limit']
38
+ duration = m['duration']
39
+ num_records = m['num_records']
40
+ result << [job_id, type, status, query, start_at, end_at, cpu_time,
41
+ result_size, result_url, priority, retry_limit, nil, database,
42
+ duration, num_records]
43
+ }
44
+ return result
45
+ end
46
+
47
+ # @param [String] job_id
48
+ # @return [Array]
49
+ def show_job(job_id)
50
+ # use v3/job/status instead of v3/job/show to poll finish of a job
51
+ code, body, res = get("/v3/job/show/#{e job_id}")
52
+ if code != "200"
53
+ raise_error("Show job failed", res)
54
+ end
55
+ js = checked_json(body, %w[status])
56
+ # TODO debug
57
+ type = (js['type'] || '?').to_sym # TODO
58
+ database = js['database']
59
+ query = js['query']
60
+ status = js['status']
61
+ debug = js['debug']
62
+ url = js['url']
63
+ start_at = js['start_at']
64
+ end_at = js['end_at']
65
+ cpu_time = js['cpu_time']
66
+ result_size = js['result_size'] # compressed result size in msgpack.gz format
67
+ num_records = js['num_records']
68
+ duration = js['duration']
69
+ result = js['result'] # result target URL
70
+ hive_result_schema = (js['hive_result_schema'] || '')
71
+ if hive_result_schema.empty?
72
+ hive_result_schema = nil
73
+ else
74
+ begin
75
+ hive_result_schema = JSON.parse(hive_result_schema)
76
+ rescue JSON::ParserError => e
77
+ # this is a workaround for a Known Limitation in the Pig Engine which does not set a default, auto-generated
78
+ # column name for anonymous columns (such as the ones that are generated from UDF like COUNT or SUM).
79
+ # The schema will contain 'nil' for the name of those columns and that breaks the JSON parser since it violates
80
+ # the JSON syntax standard.
81
+ if type == :pig and hive_result_schema !~ /[\{\}]/
82
+ begin
83
+ # NOTE: this works because a JSON 2 dimensional array is the same as a Ruby one.
84
+ # Any change in the format for the hive_result_schema output may cause a syntax error, in which case
85
+ # this lame attempt at fixing the problem will fail and we will be raising the original JSON exception
86
+ hive_result_schema = eval(hive_result_schema)
87
+ rescue SyntaxError => ignored_e
88
+ raise e
89
+ end
90
+ hive_result_schema.each_with_index {|col_schema, idx|
91
+ if col_schema[0].nil?
92
+ col_schema[0] = "_col#{idx}"
93
+ end
94
+ }
95
+ else
96
+ raise e
97
+ end
98
+ end
99
+ end
100
+ priority = js['priority']
101
+ retry_limit = js['retry_limit']
102
+ return [type, query, status, url, debug, start_at, end_at, cpu_time,
103
+ result_size, result, hive_result_schema, priority, retry_limit, nil, database, duration, num_records]
104
+ end
105
+
106
+ # @param [String] job_id
107
+ # @return [String] HTTP status
108
+ def job_status(job_id)
109
+ code, body, res = get("/v3/job/status/#{e job_id}")
110
+ if code != "200"
111
+ raise_error("Get job status failed", res)
112
+ end
113
+
114
+ js = checked_json(body, %w[status])
115
+ return js['status']
116
+ end
117
+
118
+ # @param [String] job_id
119
+ # @return [Array]
120
+ def job_result(job_id)
121
+ result = []
122
+ unpacker = MessagePack::Unpacker.new
123
+ job_result_download(job_id) do |chunk|
124
+ unpacker.feed_each(chunk) do |row|
125
+ result << row
126
+ end
127
+ end
128
+ return result
129
+ end
130
+
131
+ # block is optional and must accept 1 parameter
132
+ #
133
+ # @param [String] job_id
134
+ # @param [String] format
135
+ # @param [IO] io
136
+ # @param [Proc] block
137
+ # @return [nil, String]
138
+ def job_result_format(job_id, format, io=nil)
139
+ if io
140
+ job_result_download(job_id, format) do |chunk, total|
141
+ io.write chunk
142
+ yield total if block_given?
143
+ end
144
+ nil
145
+ else
146
+ body = String.new
147
+ job_result_download(job_id, format) do |chunk|
148
+ body << chunk
149
+ end
150
+ body
151
+ end
152
+ end
153
+
154
+ # block is optional and must accept 1 argument
155
+ #
156
+ # @param [String] job_id
157
+ # @param [Proc] block
158
+ # @return [nil]
159
+ def job_result_each(job_id, &block)
160
+ upkr = MessagePack::Unpacker.new
161
+ # default to decompressing the response since format is fixed to 'msgpack'
162
+ job_result_download(job_id) do |chunk|
163
+ upkr.feed_each(chunk, &block)
164
+ end
165
+ nil
166
+ end
167
+
168
+ # block is optional and must accept 1 argument
169
+ #
170
+ # @param [String] job_id
171
+ # @param [Proc] block
172
+ # @return [nil]
173
+ def job_result_each_with_compr_size(job_id)
174
+ upkr = MessagePack::Unpacker.new
175
+ # default to decompressing the response since format is fixed to 'msgpack'
176
+ job_result_download(job_id) do |chunk, total|
177
+ upkr.feed_each(chunk) {|unpacked|
178
+ yield unpacked, total if block_given?
179
+ }
180
+ end
181
+ nil
182
+ end
183
+
184
+ # @param [String] job_id
185
+ # @param [String] format
186
+ # @return [String]
187
+ def job_result_raw(job_id, format, io = nil)
188
+ body = io ? nil : String.new
189
+ job_result_download(job_id, format, false) do |chunk, total|
190
+ if io
191
+ io.write(chunk)
192
+ yield total if block_given?
193
+ else
194
+ body << chunk
195
+ end
196
+ end
197
+ body
198
+ end
199
+
200
+ # @param [String] job_id
201
+ # @return [String]
202
+ def kill(job_id)
203
+ code, body, res = post("/v3/job/kill/#{e job_id}")
204
+ if code != "200"
205
+ raise_error("Kill job failed", res)
206
+ end
207
+ js = checked_json(body, %w[])
208
+ former_status = js['former_status']
209
+ return former_status
210
+ end
211
+
212
+ # @param [String] q
213
+ # @param [String] db
214
+ # @param [String] result_url
215
+ # @param [Fixnum] priority
216
+ # @param [Hash] opts
217
+ # @return [String] job_id
218
+ def hive_query(q, db=nil, result_url=nil, priority=nil, retry_limit=nil, opts={})
219
+ query(q, :hive, db, result_url, priority, retry_limit, opts)
220
+ end
221
+
222
+ # @param [String] q
223
+ # @param [String] db
224
+ # @param [String] result_url
225
+ # @param [Fixnum] priority
226
+ # @param [Hash] opts
227
+ # @return [String] job_id
228
+ def pig_query(q, db=nil, result_url=nil, priority=nil, retry_limit=nil, opts={})
229
+ query(q, :pig, db, result_url, priority, retry_limit, opts)
230
+ end
231
+
232
+ # @param [String] q
233
+ # @param [Symbol] type
234
+ # @param [String] db
235
+ # @param [String] result_url
236
+ # @param [Fixnum] priority
237
+ # @param [Hash] opts
238
+ # @return [String] job_id
239
+ def query(q, type=:hive, db=nil, result_url=nil, priority=nil, retry_limit=nil, opts={})
240
+ params = {'query' => q}.merge(opts)
241
+ params['result'] = result_url if result_url
242
+ params['priority'] = priority if priority
243
+ params['retry_limit'] = retry_limit if retry_limit
244
+ code, body, res = post("/v3/job/issue/#{type}/#{e db}", params)
245
+ if code != "200"
246
+ raise_error("Query failed", res)
247
+ end
248
+ js = checked_json(body, %w[job_id])
249
+ return js['job_id'].to_s
250
+ end
251
+
252
+ private
253
+
254
+ def validate_content_length_with_range(response, current_total_chunk_size)
255
+ if expected_size = response.header['Content-Range'][0]
256
+ expected_size = expected_size[/\d+$/].to_i
257
+ elsif expected_size = response.header['Content-Length'][0]
258
+ expected_size = expected_size.to_i
259
+ end
260
+
261
+ if expected_size.nil?
262
+ elsif current_total_chunk_size < expected_size
263
+ # too small
264
+ # NOTE:
265
+ # ext/openssl raises EOFError in case where underlying connection
266
+ # causes an error, but httpclient ignores it.
267
+ # https://github.com/nahi/httpclient/blob/v3.2.8/lib/httpclient/session.rb#L1003
268
+ raise EOFError, 'httpclient IncompleteError'
269
+ elsif current_total_chunk_size > expected_size
270
+ # too large
271
+ raise_error("Get job result failed", response)
272
+ end
273
+ end
274
+
275
+ def job_result_download(job_id, format='msgpack', autodecode=true)
276
+ client, header = new_client
277
+ client.send_timeout = @send_timeout
278
+ client.receive_timeout = @read_timeout
279
+ header['Accept-Encoding'] = 'deflate, gzip'
280
+
281
+ url = build_endpoint("/v3/job/result/#{e job_id}", @host)
282
+ params = {'format' => format}
283
+
284
+ unless ENV['TD_CLIENT_DEBUG'].nil?
285
+ puts "DEBUG: REST GET call:"
286
+ puts "DEBUG: header: " + header.to_s
287
+ puts "DEBUG: url: " + url.to_s
288
+ puts "DEBUG: params: " + params.to_s
289
+ end
290
+
291
+ # up to 7 retries with exponential (base 2) back-off starting at 'retry_delay'
292
+ retry_delay = @retry_delay
293
+ cumul_retry_delay = 0
294
+ current_total_chunk_size = 0
295
+ infl = nil
296
+ begin # LOOP of Network/Server errors
297
+ response = nil
298
+ client.get(url, params, header) do |res, chunk|
299
+ unless response
300
+ case res.status
301
+ when 200
302
+ if current_total_chunk_size != 0
303
+ # try to resume but the server returns 200
304
+ raise_error("Get job result failed", res)
305
+ end
306
+ when 206 # resuming
307
+ else
308
+ if res.status/100 == 5 && cumul_retry_delay < @max_cumul_retry_delay
309
+ $stderr.puts "Error #{res.status}: #{get_error(res)}. Retrying after #{retry_delay} seconds..."
310
+ sleep retry_delay
311
+ cumul_retry_delay += retry_delay
312
+ retry_delay *= 2
313
+ redo
314
+ end
315
+ raise_error("Get job result failed", res)
316
+ end
317
+ if infl.nil? && autodecode
318
+ case res.header['Content-Encoding'][0].to_s.downcase
319
+ when 'gzip'
320
+ infl = Zlib::Inflate.new(Zlib::MAX_WBITS + 16)
321
+ when 'deflate'
322
+ infl = Zlib::Inflate.new
323
+ end
324
+ end
325
+ end
326
+ response = res
327
+ current_total_chunk_size += chunk.bytesize
328
+ chunk = infl.inflate(chunk) if infl
329
+ yield chunk, current_total_chunk_size
330
+ end
331
+
332
+ # completed?
333
+ validate_content_length_with_range(response, current_total_chunk_size)
334
+ rescue Errno::ECONNREFUSED, Errno::ECONNRESET, Timeout::Error, EOFError, OpenSSL::SSL::SSLError, SocketError => e
335
+ if response # at least a chunk is downloaded
336
+ if etag = response.header['ETag'][0]
337
+ header['If-Range'] = etag
338
+ header['Range'] = "bytes=#{current_total_chunk_size}-"
339
+ end
340
+ end
341
+
342
+ $stderr.print "#{e.class}: #{e.message}. "
343
+ if cumul_retry_delay < @max_cumul_retry_delay
344
+ $stderr.puts "Retrying after #{retry_delay} seconds..."
345
+ sleep retry_delay
346
+ cumul_retry_delay += retry_delay
347
+ retry_delay *= 2
348
+ retry
349
+ end
350
+ raise
351
+ end
352
+
353
+ unless ENV['TD_CLIENT_DEBUG'].nil?
354
+ puts "DEBUG: REST GET response:"
355
+ puts "DEBUG: header: " + response.header.to_s
356
+ puts "DEBUG: status: " + response.code.to_s
357
+ puts "DEBUG: body: " + response.body.to_s
358
+ end
359
+
360
+ nil
361
+ ensure
362
+ infl.close if infl
363
+ end
364
+
365
+ class NullInflate
366
+ def inflate(chunk)
367
+ chunk
368
+ end
369
+
370
+ def close
371
+ end
372
+ end
373
+
374
+ def create_inflalte_or_null_inflate(response)
375
+ if response.header['Content-Encoding'].empty?
376
+ NullInflate.new
377
+ else
378
+ create_inflate(response)
379
+ end
380
+ end
381
+
382
+ def create_inflate(response)
383
+ if response.header['Content-Encoding'].include?('gzip')
384
+ Zlib::Inflate.new(Zlib::MAX_WBITS + 16)
385
+ else
386
+ Zlib::Inflate.new
387
+ end
388
+ end
389
+ end
390
+ end