td-client 1.0.0-java

Sign up to get free protection for your applications and to get access to all the features.
Files changed (49) hide show
  1. checksums.yaml +7 -0
  2. data/data/ca-bundle.crt +3448 -0
  3. data/lib/td-client.rb +1 -0
  4. data/lib/td/client.rb +606 -0
  5. data/lib/td/client/api.rb +707 -0
  6. data/lib/td/client/api/access_control.rb +74 -0
  7. data/lib/td/client/api/account.rb +45 -0
  8. data/lib/td/client/api/bulk_import.rb +184 -0
  9. data/lib/td/client/api/bulk_load.rb +172 -0
  10. data/lib/td/client/api/database.rb +50 -0
  11. data/lib/td/client/api/export.rb +38 -0
  12. data/lib/td/client/api/import.rb +38 -0
  13. data/lib/td/client/api/job.rb +390 -0
  14. data/lib/td/client/api/partial_delete.rb +27 -0
  15. data/lib/td/client/api/result.rb +46 -0
  16. data/lib/td/client/api/schedule.rb +120 -0
  17. data/lib/td/client/api/server_status.rb +21 -0
  18. data/lib/td/client/api/table.rb +132 -0
  19. data/lib/td/client/api/user.rb +134 -0
  20. data/lib/td/client/api_error.rb +37 -0
  21. data/lib/td/client/compat_gzip_reader.rb +22 -0
  22. data/lib/td/client/model.rb +816 -0
  23. data/lib/td/client/version.rb +5 -0
  24. data/lib/td/core_ext/openssl/ssl/sslcontext/set_params.rb +18 -0
  25. data/spec/spec_helper.rb +63 -0
  26. data/spec/td/client/access_control_api_spec.rb +37 -0
  27. data/spec/td/client/account_api_spec.rb +34 -0
  28. data/spec/td/client/api_error_spec.rb +77 -0
  29. data/spec/td/client/api_spec.rb +269 -0
  30. data/spec/td/client/api_ssl_connection_spec.rb +109 -0
  31. data/spec/td/client/bulk_import_spec.rb +199 -0
  32. data/spec/td/client/bulk_load_spec.rb +401 -0
  33. data/spec/td/client/db_api_spec.rb +123 -0
  34. data/spec/td/client/export_api_spec.rb +51 -0
  35. data/spec/td/client/import_api_spec.rb +148 -0
  36. data/spec/td/client/job_api_spec.rb +833 -0
  37. data/spec/td/client/model_job_spec.rb +136 -0
  38. data/spec/td/client/model_schedule_spec.rb +26 -0
  39. data/spec/td/client/model_schema_spec.rb +134 -0
  40. data/spec/td/client/partial_delete_api_spec.rb +58 -0
  41. data/spec/td/client/result_api_spec.rb +77 -0
  42. data/spec/td/client/sched_api_spec.rb +109 -0
  43. data/spec/td/client/server_status_api_spec.rb +25 -0
  44. data/spec/td/client/spec_resources.rb +99 -0
  45. data/spec/td/client/table_api_spec.rb +226 -0
  46. data/spec/td/client/user_api_spec.rb +118 -0
  47. data/spec/td/client_sched_spec.rb +79 -0
  48. data/spec/td/client_spec.rb +46 -0
  49. metadata +271 -0
@@ -0,0 +1,50 @@
1
+ class TreasureData::API
2
+ module Database
3
+
4
+ ####
5
+ ## Database API
6
+ ##
7
+
8
+ # @return [Array<String>] names as array
9
+ def list_databases
10
+ code, body, res = get("/v3/database/list")
11
+ if code != "200"
12
+ raise_error("List databases failed", res)
13
+ end
14
+ js = checked_json(body, %w[databases])
15
+ result = {}
16
+ js["databases"].each {|m|
17
+ name = m['name']
18
+ count = m['count']
19
+ created_at = m['created_at']
20
+ updated_at = m['updated_at']
21
+ permission = m['permission']
22
+ result[name] = [count, created_at, updated_at, nil, permission] # set nil to org for API compatibiilty
23
+ }
24
+ return result
25
+ end
26
+
27
+ # @param [String] db
28
+ # @return [true]
29
+ def delete_database(db)
30
+ code, body, res = post("/v3/database/delete/#{e db}")
31
+ if code != "200"
32
+ raise_error("Delete database failed", res)
33
+ end
34
+ return true
35
+ end
36
+
37
+ # @param [String] db
38
+ # @param [Hash] opts
39
+ # @return [true]
40
+ def create_database(db, opts={})
41
+ params = opts.dup
42
+ code, body, res = post("/v3/database/create/#{e db}", params)
43
+ if code != "200"
44
+ raise_error("Create database failed", res)
45
+ end
46
+ return true
47
+ end
48
+
49
+ end
50
+ end
@@ -0,0 +1,38 @@
1
+ class TreasureData::API
2
+ module Export
3
+
4
+ ####
5
+ ## Export API
6
+ ##
7
+
8
+ # => jobId:String
9
+ # @param [String] db
10
+ # @param [String] table
11
+ # @param [String] storage_type
12
+ # @param [Hash] opts
13
+ # @return [String] job_id
14
+ def export(db, table, storage_type, opts={})
15
+ params = opts.dup
16
+ params['storage_type'] = storage_type
17
+ code, body, res = post("/v3/export/run/#{e db}/#{e table}", params)
18
+ if code != "200"
19
+ raise_error("Export failed", res)
20
+ end
21
+ js = checked_json(body, %w[job_id])
22
+ return js['job_id'].to_s
23
+ end
24
+
25
+ # => jobId:String
26
+ # @param [String] target_job_id
27
+ # @param [Hash] opts
28
+ # @return [String] job_id
29
+ def result_export(target_job_id, opts={})
30
+ code, body, res = post("/v3/job/result_export/#{target_job_id}", opts)
31
+ if code != "200"
32
+ raise_error("Result Export failed", res)
33
+ end
34
+ js = checked_json(body, %w[job_id])
35
+ return js['job_id'].to_s
36
+ end
37
+ end
38
+ end
@@ -0,0 +1,38 @@
1
+ class TreasureData::API
2
+ module Import
3
+
4
+ ####
5
+ ## Import API
6
+ ##
7
+
8
+ # @param [String] db
9
+ # @param [String] table
10
+ # @param [String] format
11
+ # @param [String, StringIO] stream
12
+ # @param [Fixnum] size
13
+ # @param [String] unique_id
14
+ # @return [Float] elapsed time
15
+ def import(db, table, format, stream, size, unique_id=nil)
16
+ if unique_id
17
+ path = "/v3/table/import_with_id/#{e db}/#{e table}/#{unique_id}/#{format}"
18
+ else
19
+ path = "/v3/table/import/#{e db}/#{e table}/#{format}"
20
+ end
21
+ opts = {}
22
+ if @host == DEFAULT_ENDPOINT
23
+ opts[:host] = DEFAULT_IMPORT_ENDPOINT
24
+ elsif @host == TreasureData::API::OLD_ENDPOINT # backward compatibility
25
+ opts[:host] = 'api-import.treasure-data.com'
26
+ opts[:ssl] = false
27
+ end
28
+ code, body, res = put(path, stream, size, opts)
29
+ if code[0] != ?2
30
+ raise_error("Import failed", res)
31
+ end
32
+ js = checked_json(body, %w[])
33
+ time = js['elapsed_time'].to_f
34
+ return time
35
+ end
36
+
37
+ end
38
+ end
@@ -0,0 +1,390 @@
1
+ class TreasureData::API
2
+ module Job
3
+
4
+ ####
5
+ ## Job API
6
+ ##
7
+
8
+ # @param [Fixnum] from
9
+ # @param [Fixnum] to
10
+ # @param [String] status
11
+ # @param [Hash] conditions
12
+ # @return [Array]
13
+ def list_jobs(from=0, to=nil, status=nil, conditions=nil)
14
+ params = {}
15
+ params['from'] = from.to_s if from
16
+ params['to'] = to.to_s if to
17
+ params['status'] = status.to_s if status
18
+ params.merge!(conditions) if conditions
19
+ code, body, res = get("/v3/job/list", params)
20
+ if code != "200"
21
+ raise_error("List jobs failed", res)
22
+ end
23
+ js = checked_json(body, %w[jobs])
24
+ result = []
25
+ js['jobs'].each {|m|
26
+ job_id = m['job_id']
27
+ type = (m['type'] || '?').to_sym
28
+ database = m['database']
29
+ status = m['status']
30
+ query = m['query']
31
+ start_at = m['start_at']
32
+ end_at = m['end_at']
33
+ cpu_time = m['cpu_time']
34
+ result_size = m['result_size'] # compressed result size in msgpack.gz format
35
+ result_url = m['result']
36
+ priority = m['priority']
37
+ retry_limit = m['retry_limit']
38
+ duration = m['duration']
39
+ num_records = m['num_records']
40
+ result << [job_id, type, status, query, start_at, end_at, cpu_time,
41
+ result_size, result_url, priority, retry_limit, nil, database,
42
+ duration, num_records]
43
+ }
44
+ return result
45
+ end
46
+
47
+ # @param [String] job_id
48
+ # @return [Array]
49
+ def show_job(job_id)
50
+ # use v3/job/status instead of v3/job/show to poll finish of a job
51
+ code, body, res = get("/v3/job/show/#{e job_id}")
52
+ if code != "200"
53
+ raise_error("Show job failed", res)
54
+ end
55
+ js = checked_json(body, %w[status])
56
+ # TODO debug
57
+ type = (js['type'] || '?').to_sym # TODO
58
+ database = js['database']
59
+ query = js['query']
60
+ status = js['status']
61
+ debug = js['debug']
62
+ url = js['url']
63
+ start_at = js['start_at']
64
+ end_at = js['end_at']
65
+ cpu_time = js['cpu_time']
66
+ result_size = js['result_size'] # compressed result size in msgpack.gz format
67
+ num_records = js['num_records']
68
+ duration = js['duration']
69
+ result = js['result'] # result target URL
70
+ hive_result_schema = (js['hive_result_schema'] || '')
71
+ if hive_result_schema.empty?
72
+ hive_result_schema = nil
73
+ else
74
+ begin
75
+ hive_result_schema = JSON.parse(hive_result_schema)
76
+ rescue JSON::ParserError => e
77
+ # this is a workaround for a Known Limitation in the Pig Engine which does not set a default, auto-generated
78
+ # column name for anonymous columns (such as the ones that are generated from UDF like COUNT or SUM).
79
+ # The schema will contain 'nil' for the name of those columns and that breaks the JSON parser since it violates
80
+ # the JSON syntax standard.
81
+ if type == :pig and hive_result_schema !~ /[\{\}]/
82
+ begin
83
+ # NOTE: this works because a JSON 2 dimensional array is the same as a Ruby one.
84
+ # Any change in the format for the hive_result_schema output may cause a syntax error, in which case
85
+ # this lame attempt at fixing the problem will fail and we will be raising the original JSON exception
86
+ hive_result_schema = eval(hive_result_schema)
87
+ rescue SyntaxError => ignored_e
88
+ raise e
89
+ end
90
+ hive_result_schema.each_with_index {|col_schema, idx|
91
+ if col_schema[0].nil?
92
+ col_schema[0] = "_col#{idx}"
93
+ end
94
+ }
95
+ else
96
+ raise e
97
+ end
98
+ end
99
+ end
100
+ priority = js['priority']
101
+ retry_limit = js['retry_limit']
102
+ return [type, query, status, url, debug, start_at, end_at, cpu_time,
103
+ result_size, result, hive_result_schema, priority, retry_limit, nil, database, duration, num_records]
104
+ end
105
+
106
+ # @param [String] job_id
107
+ # @return [String] HTTP status
108
+ def job_status(job_id)
109
+ code, body, res = get("/v3/job/status/#{e job_id}")
110
+ if code != "200"
111
+ raise_error("Get job status failed", res)
112
+ end
113
+
114
+ js = checked_json(body, %w[status])
115
+ return js['status']
116
+ end
117
+
118
+ # @param [String] job_id
119
+ # @return [Array]
120
+ def job_result(job_id)
121
+ result = []
122
+ unpacker = MessagePack::Unpacker.new
123
+ job_result_download(job_id) do |chunk|
124
+ unpacker.feed_each(chunk) do |row|
125
+ result << row
126
+ end
127
+ end
128
+ return result
129
+ end
130
+
131
+ # block is optional and must accept 1 parameter
132
+ #
133
+ # @param [String] job_id
134
+ # @param [String] format
135
+ # @param [IO] io
136
+ # @param [Proc] block
137
+ # @return [nil, String]
138
+ def job_result_format(job_id, format, io=nil)
139
+ if io
140
+ job_result_download(job_id, format) do |chunk, total|
141
+ io.write chunk
142
+ yield total if block_given?
143
+ end
144
+ nil
145
+ else
146
+ body = String.new
147
+ job_result_download(job_id, format) do |chunk|
148
+ body << chunk
149
+ end
150
+ body
151
+ end
152
+ end
153
+
154
+ # block is optional and must accept 1 argument
155
+ #
156
+ # @param [String] job_id
157
+ # @param [Proc] block
158
+ # @return [nil]
159
+ def job_result_each(job_id, &block)
160
+ upkr = MessagePack::Unpacker.new
161
+ # default to decompressing the response since format is fixed to 'msgpack'
162
+ job_result_download(job_id) do |chunk|
163
+ upkr.feed_each(chunk, &block)
164
+ end
165
+ nil
166
+ end
167
+
168
+ # block is optional and must accept 1 argument
169
+ #
170
+ # @param [String] job_id
171
+ # @param [Proc] block
172
+ # @return [nil]
173
+ def job_result_each_with_compr_size(job_id)
174
+ upkr = MessagePack::Unpacker.new
175
+ # default to decompressing the response since format is fixed to 'msgpack'
176
+ job_result_download(job_id) do |chunk, total|
177
+ upkr.feed_each(chunk) {|unpacked|
178
+ yield unpacked, total if block_given?
179
+ }
180
+ end
181
+ nil
182
+ end
183
+
184
+ # @param [String] job_id
185
+ # @param [String] format
186
+ # @return [String]
187
+ def job_result_raw(job_id, format, io = nil)
188
+ body = io ? nil : String.new
189
+ job_result_download(job_id, format, false) do |chunk, total|
190
+ if io
191
+ io.write(chunk)
192
+ yield total if block_given?
193
+ else
194
+ body << chunk
195
+ end
196
+ end
197
+ body
198
+ end
199
+
200
+ # @param [String] job_id
201
+ # @return [String]
202
+ def kill(job_id)
203
+ code, body, res = post("/v3/job/kill/#{e job_id}")
204
+ if code != "200"
205
+ raise_error("Kill job failed", res)
206
+ end
207
+ js = checked_json(body, %w[])
208
+ former_status = js['former_status']
209
+ return former_status
210
+ end
211
+
212
+ # @param [String] q
213
+ # @param [String] db
214
+ # @param [String] result_url
215
+ # @param [Fixnum] priority
216
+ # @param [Hash] opts
217
+ # @return [String] job_id
218
+ def hive_query(q, db=nil, result_url=nil, priority=nil, retry_limit=nil, opts={})
219
+ query(q, :hive, db, result_url, priority, retry_limit, opts)
220
+ end
221
+
222
+ # @param [String] q
223
+ # @param [String] db
224
+ # @param [String] result_url
225
+ # @param [Fixnum] priority
226
+ # @param [Hash] opts
227
+ # @return [String] job_id
228
+ def pig_query(q, db=nil, result_url=nil, priority=nil, retry_limit=nil, opts={})
229
+ query(q, :pig, db, result_url, priority, retry_limit, opts)
230
+ end
231
+
232
+ # @param [String] q
233
+ # @param [Symbol] type
234
+ # @param [String] db
235
+ # @param [String] result_url
236
+ # @param [Fixnum] priority
237
+ # @param [Hash] opts
238
+ # @return [String] job_id
239
+ def query(q, type=:hive, db=nil, result_url=nil, priority=nil, retry_limit=nil, opts={})
240
+ params = {'query' => q}.merge(opts)
241
+ params['result'] = result_url if result_url
242
+ params['priority'] = priority if priority
243
+ params['retry_limit'] = retry_limit if retry_limit
244
+ code, body, res = post("/v3/job/issue/#{type}/#{e db}", params)
245
+ if code != "200"
246
+ raise_error("Query failed", res)
247
+ end
248
+ js = checked_json(body, %w[job_id])
249
+ return js['job_id'].to_s
250
+ end
251
+
252
+ private
253
+
254
+ def validate_content_length_with_range(response, current_total_chunk_size)
255
+ if expected_size = response.header['Content-Range'][0]
256
+ expected_size = expected_size[/\d+$/].to_i
257
+ elsif expected_size = response.header['Content-Length'][0]
258
+ expected_size = expected_size.to_i
259
+ end
260
+
261
+ if expected_size.nil?
262
+ elsif current_total_chunk_size < expected_size
263
+ # too small
264
+ # NOTE:
265
+ # ext/openssl raises EOFError in case where underlying connection
266
+ # causes an error, but httpclient ignores it.
267
+ # https://github.com/nahi/httpclient/blob/v3.2.8/lib/httpclient/session.rb#L1003
268
+ raise EOFError, 'httpclient IncompleteError'
269
+ elsif current_total_chunk_size > expected_size
270
+ # too large
271
+ raise_error("Get job result failed", response)
272
+ end
273
+ end
274
+
275
+ def job_result_download(job_id, format='msgpack', autodecode=true)
276
+ client, header = new_client
277
+ client.send_timeout = @send_timeout
278
+ client.receive_timeout = @read_timeout
279
+ header['Accept-Encoding'] = 'deflate, gzip'
280
+
281
+ url = build_endpoint("/v3/job/result/#{e job_id}", @host)
282
+ params = {'format' => format}
283
+
284
+ unless ENV['TD_CLIENT_DEBUG'].nil?
285
+ puts "DEBUG: REST GET call:"
286
+ puts "DEBUG: header: " + header.to_s
287
+ puts "DEBUG: url: " + url.to_s
288
+ puts "DEBUG: params: " + params.to_s
289
+ end
290
+
291
+ # up to 7 retries with exponential (base 2) back-off starting at 'retry_delay'
292
+ retry_delay = @retry_delay
293
+ cumul_retry_delay = 0
294
+ current_total_chunk_size = 0
295
+ infl = nil
296
+ begin # LOOP of Network/Server errors
297
+ response = nil
298
+ client.get(url, params, header) do |res, chunk|
299
+ unless response
300
+ case res.status
301
+ when 200
302
+ if current_total_chunk_size != 0
303
+ # try to resume but the server returns 200
304
+ raise_error("Get job result failed", res)
305
+ end
306
+ when 206 # resuming
307
+ else
308
+ if res.status/100 == 5 && cumul_retry_delay < @max_cumul_retry_delay
309
+ $stderr.puts "Error #{res.status}: #{get_error(res)}. Retrying after #{retry_delay} seconds..."
310
+ sleep retry_delay
311
+ cumul_retry_delay += retry_delay
312
+ retry_delay *= 2
313
+ redo
314
+ end
315
+ raise_error("Get job result failed", res)
316
+ end
317
+ if infl.nil? && autodecode
318
+ case res.header['Content-Encoding'][0].to_s.downcase
319
+ when 'gzip'
320
+ infl = Zlib::Inflate.new(Zlib::MAX_WBITS + 16)
321
+ when 'deflate'
322
+ infl = Zlib::Inflate.new
323
+ end
324
+ end
325
+ end
326
+ response = res
327
+ current_total_chunk_size += chunk.bytesize
328
+ chunk = infl.inflate(chunk) if infl
329
+ yield chunk, current_total_chunk_size
330
+ end
331
+
332
+ # completed?
333
+ validate_content_length_with_range(response, current_total_chunk_size)
334
+ rescue Errno::ECONNREFUSED, Errno::ECONNRESET, Timeout::Error, EOFError, OpenSSL::SSL::SSLError, SocketError => e
335
+ if response # at least a chunk is downloaded
336
+ if etag = response.header['ETag'][0]
337
+ header['If-Range'] = etag
338
+ header['Range'] = "bytes=#{current_total_chunk_size}-"
339
+ end
340
+ end
341
+
342
+ $stderr.print "#{e.class}: #{e.message}. "
343
+ if cumul_retry_delay < @max_cumul_retry_delay
344
+ $stderr.puts "Retrying after #{retry_delay} seconds..."
345
+ sleep retry_delay
346
+ cumul_retry_delay += retry_delay
347
+ retry_delay *= 2
348
+ retry
349
+ end
350
+ raise
351
+ end
352
+
353
+ unless ENV['TD_CLIENT_DEBUG'].nil?
354
+ puts "DEBUG: REST GET response:"
355
+ puts "DEBUG: header: " + response.header.to_s
356
+ puts "DEBUG: status: " + response.code.to_s
357
+ puts "DEBUG: body: " + response.body.to_s
358
+ end
359
+
360
+ nil
361
+ ensure
362
+ infl.close if infl
363
+ end
364
+
365
+ class NullInflate
366
+ def inflate(chunk)
367
+ chunk
368
+ end
369
+
370
+ def close
371
+ end
372
+ end
373
+
374
+ def create_inflalte_or_null_inflate(response)
375
+ if response.header['Content-Encoding'].empty?
376
+ NullInflate.new
377
+ else
378
+ create_inflate(response)
379
+ end
380
+ end
381
+
382
+ def create_inflate(response)
383
+ if response.header['Content-Encoding'].include?('gzip')
384
+ Zlib::Inflate.new(Zlib::MAX_WBITS + 16)
385
+ else
386
+ Zlib::Inflate.new
387
+ end
388
+ end
389
+ end
390
+ end