td 0.12.0 → 0.13.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -122,9 +122,9 @@ module Command
122
122
  end
123
123
 
124
124
  if render_opts[:header]
125
- unless ['tsv', 'csv'].include?(format)
125
+ unless ['json', 'tsv', 'csv'].include?(format)
126
126
  raise ParameterConfigurationError,
127
- "Option -c / --column-header is only supported with tsv and csv formats"
127
+ "Option -c / --column-header is only supported with json, tsv and csv formats"
128
128
  end
129
129
  end
130
130
 
@@ -306,27 +306,15 @@ private
306
306
 
307
307
  case format
308
308
  when 'json'
309
- require 'yajl'
310
- open_file(tempfile || output, "w") {|f|
311
- f.write "["
312
- n_rows = 0
313
- unless output.nil?
314
- indicator = Command::SizeBasedDownloadProgressIndicator.new(
315
- "NOTE: the job result is being written to #{output} in json format",
316
- job.result_size, 0.1, 1)
317
- end
318
- job.result_each_with_compr_size {|row, compr_size|
319
- indicator.update(compr_size) unless output.nil?
320
- f.write ",\n" if n_rows > 0
321
- f.write Yajl.dump(row)
322
- n_rows += 1
323
- break if output.nil? and !limit.nil? and n_rows == limit
324
- }
325
- f.write "]"
326
- indicator.finish unless output.nil?
327
- }
328
- $stdout.puts if output.nil?
309
+ if render_opts[:header] && job.hive_result_schema
310
+ headers = job.hive_result_schema.map {|name, _| name }
329
311
 
312
+ write_result_for_json(job, output, tempfile, limit, render_opts) {|row|
313
+ Hash[headers.zip(row)]
314
+ }
315
+ else
316
+ write_result_for_json(job, output, tempfile, limit, render_opts) {|row| row }
317
+ end
330
318
  when 'csv'
331
319
  require 'yajl'
332
320
  require 'csv'
@@ -444,6 +432,29 @@ private
444
432
  end
445
433
  end
446
434
 
435
+ def write_result_for_json(job, output, tempfile, limit, render_opts)
436
+ require 'yajl'
437
+ open_file(tempfile || output, "w") {|f|
438
+ f.write "["
439
+ n_rows = 0
440
+ unless output.nil?
441
+ indicator = Command::SizeBasedDownloadProgressIndicator.new(
442
+ "NOTE: the job result is being written to #{output} in json format",
443
+ job.result_size, 0.1, 1)
444
+ end
445
+ job.result_each_with_compr_size {|row, compr_size|
446
+ indicator.update(compr_size) unless output.nil?
447
+ f.write ",\n" if n_rows > 0
448
+ f.write Yajl.dump(yield(row))
449
+ n_rows += 1
450
+ break if output.nil? and !limit.nil? and n_rows == limit
451
+ }
452
+ f.write "]"
453
+ indicator.finish unless output.nil?
454
+ }
455
+ $stdout.puts if output.nil?
456
+ end
457
+
447
458
  def render_result(job, limit, format=nil, render_opts={})
448
459
  require 'yajl'
449
460
 
@@ -515,4 +526,3 @@ private
515
526
 
516
527
  end # module Command
517
528
  end # module TrasureData
518
-
@@ -236,6 +236,7 @@ module List
236
236
  add_list 'table:import', %w[db table files_], 'Parse and import files to a table', ['table:import example_db table1 --apache access.log', 'table:import example_db table1 --json -t time - < test.json']
237
237
  add_list 'table:export', %w[db table], 'Dump logs in a table to the specified storage', ['table:export example_db table1 --s3-bucket mybucket -k KEY_ID -s SECRET_KEY']
238
238
  add_list 'table:swap', %w[db table1 table2], 'Swap names of two tables', ['table:swap example_db table1 table2']
239
+ add_list 'table:rename', %w[db from_table dest_table], 'rename exist table', ['table:rename example_db table1 table2']
239
240
  add_list 'table:tail', %w[db table], 'Get recently imported logs', ['table:tail example_db table1', 'table:tail example_db table1 -n 30']
240
241
  add_list 'table:partial_delete', %w[db table], 'Delete logs from the table within the specified time range', ['table:partial_delete example_db table1 --from 1341000000 --to 1341003600']
241
242
  add_list 'table:expire', %w[db table expire_days], 'Expire data in table after specified number of days. Set to 0 to disable the expiration.', ['table:expire example_db table1 30']
@@ -269,6 +270,7 @@ module List
269
270
  add_list 'import:delete', %w[name], 'Delete a bulk import session', ['import:delete logs_201201']
270
271
  add_list 'import:freeze', %w[name], 'Reject succeeding uploadings to a bulk import session', ['import:freeze logs_201201']
271
272
  add_list 'import:unfreeze', %w[name], 'Unfreeze a frozen bulk import session', ['import:unfreeze logs_201201']
273
+ add_list 'import:config', %w[files_], 'create guess config from arguments', ['import:config "s3://<s3_access_key>:<s3_secret_key>@/my_bucket/path/to/*.csv" -o seed.yml']
272
274
 
273
275
  add_list 'result:list', %w[], 'Show list of result URLs', ['result:list', 'results']
274
276
  add_list 'result:show', %w[name], 'Describe information of a result URL', ['result name']
@@ -353,7 +355,7 @@ module List
353
355
  add_list 'connector:update', %w[name config], 'Modify connector session', ['connector:update connector1 td-bulkload.yml']
354
356
  add_list 'connector:delete', %w[name], 'Delete connector session', ['connector:delete connector1']
355
357
  add_list 'connector:history', %w[name], 'Show job history of connector session', ['connector:history connector1']
356
- add_list 'connector:run', %w[name time], 'Run connector session for the specified time', ['connector:run connector1 "2016-01-01 00:00:00"']
358
+ add_list 'connector:run', %w[name time?], 'Run connector with session for the specified time option', ['connector:run connector1 "2016-01-01 00:00:00"']
357
359
 
358
360
  # aliases
359
361
  add_alias 'db', 'db:show'
@@ -441,4 +443,3 @@ module List
441
443
  end # module List
442
444
  end # module Command
443
445
  end # module TreasureData
444
-
@@ -76,7 +76,7 @@ module Command
76
76
  end
77
77
  limit = s.to_i
78
78
  }
79
- op.on('-c', '--column-header', 'output of the columns\' header when the schema is available for the table (only applies to tsv and csv formats)', TrueClass) {|b|
79
+ op.on('-c', '--column-header', 'output of the columns\' header when the schema is available for the table (only applies to json, tsv and csv formats)', TrueClass) {|b|
80
80
  render_opts[:header] = b
81
81
  }
82
82
  op.on('-x', '--exclude', 'do not automatically retrieve the job result', TrueClass) {|b|
@@ -115,9 +115,9 @@ module Command
115
115
  end
116
116
 
117
117
  if render_opts[:header]
118
- unless ['tsv', 'csv'].include?(format)
118
+ unless ['tsv', 'csv', 'json'].include?(format)
119
119
  raise ParameterConfigurationError,
120
- "Option -c / --column-header is only supported with tsv and csv formats"
120
+ "Option -c / --column-header is only supported with json, tsv and csv formats"
121
121
  end
122
122
  end
123
123
 
@@ -155,5 +155,3 @@ module Command
155
155
  require 'td/command/job' # wait_job, job_priority_id_of
156
156
  end
157
157
  end
158
-
159
-
@@ -190,7 +190,7 @@ EOF
190
190
  show_backtrace "Error #{$!.class}: backtrace:", $!.backtrace
191
191
  end
192
192
 
193
- if $!.respond_to? :api_backtrace
193
+ if $!.respond_to?(:api_backtrace) && $!.api_backtrace
194
194
  show_backtrace "Error backtrace from server:", $!.api_backtrace.split("\n")
195
195
  end
196
196
 
@@ -227,4 +227,3 @@ end # class Runner
227
227
 
228
228
  end # module Command
229
229
  end # module TreasureData
230
-
@@ -15,8 +15,6 @@ module Command
15
15
 
16
16
  def table_create(op)
17
17
  type = nil
18
- primary_key = nil
19
- primary_key_type = nil
20
18
 
21
19
  op.on('-T', '--type TYPE', 'set table type (log)') {|s|
22
20
  unless s == 'log'
@@ -24,21 +22,6 @@ module Command
24
22
  end
25
23
  type = s.to_sym
26
24
  }
27
- op.on('--primary-key PRIMARY_KEY_AND_TYPE', '[primary key]:[primary key type(int or string)]') {|s|
28
- unless /\A[\w]+:(string|int)\z/ =~ s
29
- $stderr.puts "--primary-key PRIMARY_KEY_AND_TYPE is required, and should be in the format [primary key]:[primary key type]"
30
- exit 1
31
- end
32
-
33
- args = s.split(':')
34
- if args.length != 2
35
- # this really shouldn't happen with the above regex
36
- exit 1
37
- end
38
- primary_key = args[0]
39
- primary_key_type = args[1]
40
- }
41
-
42
25
  db_name, table_name = op.cmd_parse
43
26
 
44
27
  API.validate_table_name(table_name)
@@ -204,6 +187,35 @@ module Command
204
187
  $stderr.puts "'#{db_name}.#{table_name1}' and '#{db_name}.#{table_name2}' are swapped."
205
188
  end
206
189
 
190
+ def table_rename(op)
191
+ overwrite = false
192
+ op.on('--overwrite', 'replace existing dest table') { overwrite = true }
193
+ database_name, from_table_name, dest_table_name = op.cmd_parse
194
+
195
+ client = get_client
196
+ database = get_database(client, database_name)
197
+
198
+ unless table_exist?(database, from_table_name)
199
+ raise ParameterConfigurationError, "From table `#{from_table_name}` isn't exist."
200
+ end
201
+
202
+ if table_exist?(database, dest_table_name)
203
+ unless overwrite
204
+ raise ParameterConfigurationError, "Dest table `#{dest_table_name}` is exist. If you want to overwrite dest table, you should set `overwrite` option."
205
+ end
206
+ else
207
+ client.create_log_table(database_name, dest_table_name)
208
+ end
209
+
210
+ client.swap_table(database_name, from_table_name, dest_table_name)
211
+ client.delete_table(database_name, from_table_name)
212
+
213
+ $stderr.puts "'renamed from '#{database_name}.#{from_table_name}' to '#{database_name}.#{dest_table_name}'."
214
+ rescue ParameterConfigurationError => e
215
+ $stderr.puts e.message
216
+ exit 1
217
+ end
218
+
207
219
  def table_show(op)
208
220
  db_name, table_name = op.cmd_parse
209
221
 
@@ -673,4 +685,3 @@ module Command
673
685
  require 'td/command/job' # wait_job
674
686
  end
675
687
  end
676
-
@@ -0,0 +1,41 @@
1
+ require 'psych'
2
+
3
+ module TreasureData
4
+ module CompactFormatYamler
5
+ module Visitors
6
+ class YAMLTree < Psych::Visitors::YAMLTree
7
+ # NOTE support 2.0 following
8
+ unless self.respond_to? :create
9
+ class << self
10
+ alias :create :new
11
+ end
12
+ end
13
+
14
+ def visit_Hash o
15
+ if o.class == ::Hash && o.values.all? {|v| v.kind_of?(Numeric) || v.kind_of?(String) || v.kind_of?(Symbol) }
16
+ register(o, @emitter.start_mapping(nil, nil, true, Psych::Nodes::Mapping::FLOW))
17
+
18
+ o.each do |k,v|
19
+ accept k
20
+ accept v
21
+ end
22
+ @emitter.end_mapping
23
+ else
24
+ super
25
+ end
26
+ end
27
+ end
28
+ end
29
+
30
+ def self.dump(o, io = nil, options = {})
31
+ if Hash === io
32
+ options = io
33
+ io = nil
34
+ end
35
+
36
+ visitor = ::TreasureData::CompactFormatYamler::Visitors::YAMLTree.create options
37
+ visitor << o
38
+ visitor.tree.yaml io, options
39
+ end
40
+ end
41
+ end
@@ -0,0 +1,32 @@
1
+ module TreasureData
2
+ class ConnectorConfigNormalizer
3
+ def initialize(config)
4
+ @config = config
5
+ end
6
+
7
+ def normalized_config
8
+ case
9
+ when @config['in']
10
+ {
11
+ 'in' => @config['in'],
12
+ 'out' => @config['out'] || {},
13
+ 'exec' => @config['exec'] || {},
14
+ 'filters' => @config['filters'] || []
15
+ }
16
+ when @config['config']
17
+ if @config.size != 1
18
+ raise "Setting #{(@config.keys - ['config']).inspect} keys in a configuration file is not supported. Please set options to the command line argument."
19
+ end
20
+
21
+ self.class.new(@config['config']).normalized_config
22
+ else
23
+ {
24
+ 'in' => @config,
25
+ 'out' => {},
26
+ 'exec' => {},
27
+ 'filters' => []
28
+ }
29
+ end
30
+ end
31
+ end
32
+ end
@@ -1,3 +1,3 @@
1
1
  module TreasureData
2
- TOOLBELT_VERSION = '0.12.0'
2
+ TOOLBELT_VERSION = '0.13.0'
3
3
  end
@@ -49,3 +49,21 @@ def execute_td(command_line)
49
49
  end
50
50
 
51
51
  class CallSystemExitError < RuntimeError; end
52
+
53
+ shared_context 'quiet_out' do
54
+ let(:stdout_io) { StringIO.new }
55
+ let(:stderr_io) { StringIO.new }
56
+
57
+ around do |example|
58
+ out = $stdout.dup
59
+ err= $stdout.dup
60
+ begin
61
+ $stdout = stdout_io
62
+ $stderr = stderr_io
63
+ example.call
64
+ ensure
65
+ $stdout = out
66
+ $stderr = err
67
+ end
68
+ end
69
+ end
@@ -5,6 +5,49 @@ require 'td/command/connector'
5
5
 
6
6
  module TreasureData::Command
7
7
  describe 'connector commands' do
8
+ describe '#connector_guess' do
9
+ let :command do
10
+ Class.new { include TreasureData::Command }.new
11
+ end
12
+
13
+ describe 'guess plugins' do
14
+ let(:guess_plugins) { %w(json query_string) }
15
+
16
+ let(:in_file) { Tempfile.new('in.yml').tap{|f| f.close } }
17
+ let(:out_file) { Tempfile.new('out.yml').tap{|f| f.close } }
18
+
19
+ let(:option) {
20
+ List::CommandParser.new("connector:guess", ["config"], [], nil, [in_file.path, '-o', out_file.path, '--guess', guess_plugins.join(',')], true)
21
+ }
22
+ let(:client) { double(:client) }
23
+
24
+ before do
25
+ command.stub(:get_client).and_return(client)
26
+ end
27
+
28
+ let(:config) {
29
+ {
30
+ 'in' => {'type' => 's3'}
31
+ }
32
+ }
33
+ let(:expect_config) {
34
+ config.merge('out' => {}, 'exec' => {'guess_plugins' => guess_plugins}, 'filters' => [])
35
+ }
36
+
37
+ include_context 'quiet_out'
38
+
39
+ before do
40
+ command.stub(:prepare_bulkload_job_config).and_return(config)
41
+ end
42
+
43
+ it 'guess_plugins passed td-client' do
44
+ client.should_receive(:bulk_load_guess).with({config: expect_config}).and_return({})
45
+
46
+ command.connector_guess(option)
47
+ end
48
+ end
49
+ end
50
+
8
51
  describe '#connector_preview' do
9
52
  let :command do
10
53
  Class.new { include TreasureData::Command }.new
@@ -129,5 +172,115 @@ module TreasureData::Command
129
172
  end
130
173
  end
131
174
  end
175
+
176
+ describe '#connector_run' do
177
+ include_context 'quiet_out'
178
+
179
+ let :command do
180
+ Class.new { include TreasureData::Command }.new
181
+ end
182
+ let(:client) { double(:client) }
183
+ let(:job_name) { 'job_1' }
184
+
185
+ before do
186
+ command.stub(:get_client).and_return(client)
187
+ client.stub(:database)
188
+ end
189
+
190
+ context 'with scheduled_time' do
191
+ let(:scheduled_time) { Time.now + 60 }
192
+ let(:option) {
193
+ List::CommandParser.new('connector:run', ['name'], ['time'], nil, [job_name, scheduled_time.strftime("%Y-%m-%d %H:%M:%S")], true)
194
+ }
195
+
196
+ it 'client call with unix time' do
197
+ client.should_receive(:bulk_load_run).with(job_name, scheduled_time.to_i).and_return(123)
198
+
199
+ command.connector_run(option)
200
+ end
201
+ end
202
+
203
+ context 'without scheduled_time' do
204
+ let(:option) {
205
+ List::CommandParser.new('connector:run', ['name'], ['time'], nil, [job_name], true)
206
+ }
207
+ let(:current_time) { Time.now }
208
+
209
+ it 'client call with unix time' do
210
+ client.should_receive(:bulk_load_run).with(job_name, current_time.to_i).and_return(123)
211
+ command.stub(:current_time).and_return(current_time.to_i)
212
+
213
+ command.connector_run(option)
214
+ end
215
+ end
216
+ end
217
+
218
+ describe 'connector history' do
219
+ include_context 'quiet_out'
220
+
221
+ let :command do
222
+ Class.new { include TreasureData::Command }.new
223
+ end
224
+ let(:name) { 'connector_test' }
225
+
226
+ subject do
227
+ op = List::CommandParser.new("connector:history", ["name"], [], nil, [name], true)
228
+ command.connector_history(op)
229
+ end
230
+
231
+ before do
232
+ client = double(:client)
233
+ client.stub(:bulk_load_history).with(name).and_return(history)
234
+ command.stub(:get_client).and_return(client)
235
+ end
236
+
237
+ context 'history is empty' do
238
+ let(:history) { [] }
239
+
240
+ it { expect { subject }.not_to raise_error }
241
+ end
242
+
243
+ context 'history in not empty' do
244
+ let(:history) { [column] }
245
+ let(:column) {
246
+ # TODO set real value
247
+ {
248
+ 'job_id' => '',
249
+ 'status' => '',
250
+ 'records' => '',
251
+ 'database' => {'name' => ''},
252
+ 'table' => {'name' => ''},
253
+ 'priority' => ''
254
+ }
255
+ }
256
+
257
+ context 'job is queueing' do
258
+ before do
259
+ column['start_at'] = nil
260
+ column['end_at'] = nil
261
+ end
262
+
263
+ it { expect { subject }.not_to raise_error }
264
+ end
265
+
266
+ context 'job is running' do
267
+ before do
268
+ column['start_at'] = Time.now.to_i
269
+ column['end_at'] = nil
270
+ end
271
+
272
+ it { expect { subject }.not_to raise_error }
273
+ end
274
+
275
+ context 'jobi is finished' do
276
+ before do
277
+ column['start_at'] = Time.now.to_i
278
+ column['end_at'] = (Time.now + 60).to_i
279
+ end
280
+
281
+ it { expect { subject }.not_to raise_error }
282
+ end
283
+ end
284
+ end
132
285
  end
133
286
  end