td 0.12.0 → 0.13.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.travis.yml +5 -0
- data/ChangeLog +15 -0
- data/lib/td/command/common.rb +96 -26
- data/lib/td/command/connector.rb +58 -36
- data/lib/td/command/import.rb +162 -1
- data/lib/td/command/job.rb +33 -23
- data/lib/td/command/list.rb +3 -2
- data/lib/td/command/query.rb +3 -5
- data/lib/td/command/runner.rb +1 -2
- data/lib/td/command/table.rb +29 -18
- data/lib/td/compact_format_yamler.rb +41 -0
- data/lib/td/connector_config_normalizer.rb +32 -0
- data/lib/td/version.rb +1 -1
- data/spec/spec_helper.rb +18 -0
- data/spec/td/command/connector_spec.rb +153 -0
- data/spec/td/command/import_spec.rb +179 -0
- data/spec/td/command/job_spec.rb +1 -1
- data/spec/td/command/table_spec.rb +113 -0
- data/spec/td/common_spec.rb +23 -1
- data/spec/td/compact_format_yamler_spec.rb +38 -0
- data/spec/td/connector_config_normalizer_spec.rb +62 -0
- data/td.gemspec +2 -1
- metadata +25 -5
data/lib/td/command/job.rb
CHANGED
@@ -122,9 +122,9 @@ module Command
|
|
122
122
|
end
|
123
123
|
|
124
124
|
if render_opts[:header]
|
125
|
-
unless ['tsv', 'csv'].include?(format)
|
125
|
+
unless ['json', 'tsv', 'csv'].include?(format)
|
126
126
|
raise ParameterConfigurationError,
|
127
|
-
"Option -c / --column-header is only supported with tsv and csv formats"
|
127
|
+
"Option -c / --column-header is only supported with json, tsv and csv formats"
|
128
128
|
end
|
129
129
|
end
|
130
130
|
|
@@ -306,27 +306,15 @@ private
|
|
306
306
|
|
307
307
|
case format
|
308
308
|
when 'json'
|
309
|
-
|
310
|
-
|
311
|
-
f.write "["
|
312
|
-
n_rows = 0
|
313
|
-
unless output.nil?
|
314
|
-
indicator = Command::SizeBasedDownloadProgressIndicator.new(
|
315
|
-
"NOTE: the job result is being written to #{output} in json format",
|
316
|
-
job.result_size, 0.1, 1)
|
317
|
-
end
|
318
|
-
job.result_each_with_compr_size {|row, compr_size|
|
319
|
-
indicator.update(compr_size) unless output.nil?
|
320
|
-
f.write ",\n" if n_rows > 0
|
321
|
-
f.write Yajl.dump(row)
|
322
|
-
n_rows += 1
|
323
|
-
break if output.nil? and !limit.nil? and n_rows == limit
|
324
|
-
}
|
325
|
-
f.write "]"
|
326
|
-
indicator.finish unless output.nil?
|
327
|
-
}
|
328
|
-
$stdout.puts if output.nil?
|
309
|
+
if render_opts[:header] && job.hive_result_schema
|
310
|
+
headers = job.hive_result_schema.map {|name, _| name }
|
329
311
|
|
312
|
+
write_result_for_json(job, output, tempfile, limit, render_opts) {|row|
|
313
|
+
Hash[headers.zip(row)]
|
314
|
+
}
|
315
|
+
else
|
316
|
+
write_result_for_json(job, output, tempfile, limit, render_opts) {|row| row }
|
317
|
+
end
|
330
318
|
when 'csv'
|
331
319
|
require 'yajl'
|
332
320
|
require 'csv'
|
@@ -444,6 +432,29 @@ private
|
|
444
432
|
end
|
445
433
|
end
|
446
434
|
|
435
|
+
def write_result_for_json(job, output, tempfile, limit, render_opts)
|
436
|
+
require 'yajl'
|
437
|
+
open_file(tempfile || output, "w") {|f|
|
438
|
+
f.write "["
|
439
|
+
n_rows = 0
|
440
|
+
unless output.nil?
|
441
|
+
indicator = Command::SizeBasedDownloadProgressIndicator.new(
|
442
|
+
"NOTE: the job result is being written to #{output} in json format",
|
443
|
+
job.result_size, 0.1, 1)
|
444
|
+
end
|
445
|
+
job.result_each_with_compr_size {|row, compr_size|
|
446
|
+
indicator.update(compr_size) unless output.nil?
|
447
|
+
f.write ",\n" if n_rows > 0
|
448
|
+
f.write Yajl.dump(yield(row))
|
449
|
+
n_rows += 1
|
450
|
+
break if output.nil? and !limit.nil? and n_rows == limit
|
451
|
+
}
|
452
|
+
f.write "]"
|
453
|
+
indicator.finish unless output.nil?
|
454
|
+
}
|
455
|
+
$stdout.puts if output.nil?
|
456
|
+
end
|
457
|
+
|
447
458
|
def render_result(job, limit, format=nil, render_opts={})
|
448
459
|
require 'yajl'
|
449
460
|
|
@@ -515,4 +526,3 @@ private
|
|
515
526
|
|
516
527
|
end # module Command
|
517
528
|
end # module TrasureData
|
518
|
-
|
data/lib/td/command/list.rb
CHANGED
@@ -236,6 +236,7 @@ module List
|
|
236
236
|
add_list 'table:import', %w[db table files_], 'Parse and import files to a table', ['table:import example_db table1 --apache access.log', 'table:import example_db table1 --json -t time - < test.json']
|
237
237
|
add_list 'table:export', %w[db table], 'Dump logs in a table to the specified storage', ['table:export example_db table1 --s3-bucket mybucket -k KEY_ID -s SECRET_KEY']
|
238
238
|
add_list 'table:swap', %w[db table1 table2], 'Swap names of two tables', ['table:swap example_db table1 table2']
|
239
|
+
add_list 'table:rename', %w[db from_table dest_table], 'rename exist table', ['table:rename example_db table1 table2']
|
239
240
|
add_list 'table:tail', %w[db table], 'Get recently imported logs', ['table:tail example_db table1', 'table:tail example_db table1 -n 30']
|
240
241
|
add_list 'table:partial_delete', %w[db table], 'Delete logs from the table within the specified time range', ['table:partial_delete example_db table1 --from 1341000000 --to 1341003600']
|
241
242
|
add_list 'table:expire', %w[db table expire_days], 'Expire data in table after specified number of days. Set to 0 to disable the expiration.', ['table:expire example_db table1 30']
|
@@ -269,6 +270,7 @@ module List
|
|
269
270
|
add_list 'import:delete', %w[name], 'Delete a bulk import session', ['import:delete logs_201201']
|
270
271
|
add_list 'import:freeze', %w[name], 'Reject succeeding uploadings to a bulk import session', ['import:freeze logs_201201']
|
271
272
|
add_list 'import:unfreeze', %w[name], 'Unfreeze a frozen bulk import session', ['import:unfreeze logs_201201']
|
273
|
+
add_list 'import:config', %w[files_], 'create guess config from arguments', ['import:config "s3://<s3_access_key>:<s3_secret_key>@/my_bucket/path/to/*.csv" -o seed.yml']
|
272
274
|
|
273
275
|
add_list 'result:list', %w[], 'Show list of result URLs', ['result:list', 'results']
|
274
276
|
add_list 'result:show', %w[name], 'Describe information of a result URL', ['result name']
|
@@ -353,7 +355,7 @@ module List
|
|
353
355
|
add_list 'connector:update', %w[name config], 'Modify connector session', ['connector:update connector1 td-bulkload.yml']
|
354
356
|
add_list 'connector:delete', %w[name], 'Delete connector session', ['connector:delete connector1']
|
355
357
|
add_list 'connector:history', %w[name], 'Show job history of connector session', ['connector:history connector1']
|
356
|
-
add_list 'connector:run', %w[name time], 'Run connector session for the specified time', ['connector:run connector1 "2016-01-01 00:00:00"']
|
358
|
+
add_list 'connector:run', %w[name time?], 'Run connector with session for the specified time option', ['connector:run connector1 "2016-01-01 00:00:00"']
|
357
359
|
|
358
360
|
# aliases
|
359
361
|
add_alias 'db', 'db:show'
|
@@ -441,4 +443,3 @@ module List
|
|
441
443
|
end # module List
|
442
444
|
end # module Command
|
443
445
|
end # module TreasureData
|
444
|
-
|
data/lib/td/command/query.rb
CHANGED
@@ -76,7 +76,7 @@ module Command
|
|
76
76
|
end
|
77
77
|
limit = s.to_i
|
78
78
|
}
|
79
|
-
op.on('-c', '--column-header', 'output of the columns\' header when the schema is available for the table (only applies to tsv and csv formats)', TrueClass) {|b|
|
79
|
+
op.on('-c', '--column-header', 'output of the columns\' header when the schema is available for the table (only applies to json, tsv and csv formats)', TrueClass) {|b|
|
80
80
|
render_opts[:header] = b
|
81
81
|
}
|
82
82
|
op.on('-x', '--exclude', 'do not automatically retrieve the job result', TrueClass) {|b|
|
@@ -115,9 +115,9 @@ module Command
|
|
115
115
|
end
|
116
116
|
|
117
117
|
if render_opts[:header]
|
118
|
-
unless ['tsv', 'csv'].include?(format)
|
118
|
+
unless ['tsv', 'csv', 'json'].include?(format)
|
119
119
|
raise ParameterConfigurationError,
|
120
|
-
"Option -c / --column-header is only supported with tsv and csv formats"
|
120
|
+
"Option -c / --column-header is only supported with json, tsv and csv formats"
|
121
121
|
end
|
122
122
|
end
|
123
123
|
|
@@ -155,5 +155,3 @@ module Command
|
|
155
155
|
require 'td/command/job' # wait_job, job_priority_id_of
|
156
156
|
end
|
157
157
|
end
|
158
|
-
|
159
|
-
|
data/lib/td/command/runner.rb
CHANGED
@@ -190,7 +190,7 @@ EOF
|
|
190
190
|
show_backtrace "Error #{$!.class}: backtrace:", $!.backtrace
|
191
191
|
end
|
192
192
|
|
193
|
-
if $!.respond_to?
|
193
|
+
if $!.respond_to?(:api_backtrace) && $!.api_backtrace
|
194
194
|
show_backtrace "Error backtrace from server:", $!.api_backtrace.split("\n")
|
195
195
|
end
|
196
196
|
|
@@ -227,4 +227,3 @@ end # class Runner
|
|
227
227
|
|
228
228
|
end # module Command
|
229
229
|
end # module TreasureData
|
230
|
-
|
data/lib/td/command/table.rb
CHANGED
@@ -15,8 +15,6 @@ module Command
|
|
15
15
|
|
16
16
|
def table_create(op)
|
17
17
|
type = nil
|
18
|
-
primary_key = nil
|
19
|
-
primary_key_type = nil
|
20
18
|
|
21
19
|
op.on('-T', '--type TYPE', 'set table type (log)') {|s|
|
22
20
|
unless s == 'log'
|
@@ -24,21 +22,6 @@ module Command
|
|
24
22
|
end
|
25
23
|
type = s.to_sym
|
26
24
|
}
|
27
|
-
op.on('--primary-key PRIMARY_KEY_AND_TYPE', '[primary key]:[primary key type(int or string)]') {|s|
|
28
|
-
unless /\A[\w]+:(string|int)\z/ =~ s
|
29
|
-
$stderr.puts "--primary-key PRIMARY_KEY_AND_TYPE is required, and should be in the format [primary key]:[primary key type]"
|
30
|
-
exit 1
|
31
|
-
end
|
32
|
-
|
33
|
-
args = s.split(':')
|
34
|
-
if args.length != 2
|
35
|
-
# this really shouldn't happen with the above regex
|
36
|
-
exit 1
|
37
|
-
end
|
38
|
-
primary_key = args[0]
|
39
|
-
primary_key_type = args[1]
|
40
|
-
}
|
41
|
-
|
42
25
|
db_name, table_name = op.cmd_parse
|
43
26
|
|
44
27
|
API.validate_table_name(table_name)
|
@@ -204,6 +187,35 @@ module Command
|
|
204
187
|
$stderr.puts "'#{db_name}.#{table_name1}' and '#{db_name}.#{table_name2}' are swapped."
|
205
188
|
end
|
206
189
|
|
190
|
+
def table_rename(op)
|
191
|
+
overwrite = false
|
192
|
+
op.on('--overwrite', 'replace existing dest table') { overwrite = true }
|
193
|
+
database_name, from_table_name, dest_table_name = op.cmd_parse
|
194
|
+
|
195
|
+
client = get_client
|
196
|
+
database = get_database(client, database_name)
|
197
|
+
|
198
|
+
unless table_exist?(database, from_table_name)
|
199
|
+
raise ParameterConfigurationError, "From table `#{from_table_name}` isn't exist."
|
200
|
+
end
|
201
|
+
|
202
|
+
if table_exist?(database, dest_table_name)
|
203
|
+
unless overwrite
|
204
|
+
raise ParameterConfigurationError, "Dest table `#{dest_table_name}` is exist. If you want to overwrite dest table, you should set `overwrite` option."
|
205
|
+
end
|
206
|
+
else
|
207
|
+
client.create_log_table(database_name, dest_table_name)
|
208
|
+
end
|
209
|
+
|
210
|
+
client.swap_table(database_name, from_table_name, dest_table_name)
|
211
|
+
client.delete_table(database_name, from_table_name)
|
212
|
+
|
213
|
+
$stderr.puts "'renamed from '#{database_name}.#{from_table_name}' to '#{database_name}.#{dest_table_name}'."
|
214
|
+
rescue ParameterConfigurationError => e
|
215
|
+
$stderr.puts e.message
|
216
|
+
exit 1
|
217
|
+
end
|
218
|
+
|
207
219
|
def table_show(op)
|
208
220
|
db_name, table_name = op.cmd_parse
|
209
221
|
|
@@ -673,4 +685,3 @@ module Command
|
|
673
685
|
require 'td/command/job' # wait_job
|
674
686
|
end
|
675
687
|
end
|
676
|
-
|
@@ -0,0 +1,41 @@
|
|
1
|
+
require 'psych'
|
2
|
+
|
3
|
+
module TreasureData
|
4
|
+
module CompactFormatYamler
|
5
|
+
module Visitors
|
6
|
+
class YAMLTree < Psych::Visitors::YAMLTree
|
7
|
+
# NOTE support 2.0 following
|
8
|
+
unless self.respond_to? :create
|
9
|
+
class << self
|
10
|
+
alias :create :new
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
def visit_Hash o
|
15
|
+
if o.class == ::Hash && o.values.all? {|v| v.kind_of?(Numeric) || v.kind_of?(String) || v.kind_of?(Symbol) }
|
16
|
+
register(o, @emitter.start_mapping(nil, nil, true, Psych::Nodes::Mapping::FLOW))
|
17
|
+
|
18
|
+
o.each do |k,v|
|
19
|
+
accept k
|
20
|
+
accept v
|
21
|
+
end
|
22
|
+
@emitter.end_mapping
|
23
|
+
else
|
24
|
+
super
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
def self.dump(o, io = nil, options = {})
|
31
|
+
if Hash === io
|
32
|
+
options = io
|
33
|
+
io = nil
|
34
|
+
end
|
35
|
+
|
36
|
+
visitor = ::TreasureData::CompactFormatYamler::Visitors::YAMLTree.create options
|
37
|
+
visitor << o
|
38
|
+
visitor.tree.yaml io, options
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
@@ -0,0 +1,32 @@
|
|
1
|
+
module TreasureData
|
2
|
+
class ConnectorConfigNormalizer
|
3
|
+
def initialize(config)
|
4
|
+
@config = config
|
5
|
+
end
|
6
|
+
|
7
|
+
def normalized_config
|
8
|
+
case
|
9
|
+
when @config['in']
|
10
|
+
{
|
11
|
+
'in' => @config['in'],
|
12
|
+
'out' => @config['out'] || {},
|
13
|
+
'exec' => @config['exec'] || {},
|
14
|
+
'filters' => @config['filters'] || []
|
15
|
+
}
|
16
|
+
when @config['config']
|
17
|
+
if @config.size != 1
|
18
|
+
raise "Setting #{(@config.keys - ['config']).inspect} keys in a configuration file is not supported. Please set options to the command line argument."
|
19
|
+
end
|
20
|
+
|
21
|
+
self.class.new(@config['config']).normalized_config
|
22
|
+
else
|
23
|
+
{
|
24
|
+
'in' => @config,
|
25
|
+
'out' => {},
|
26
|
+
'exec' => {},
|
27
|
+
'filters' => []
|
28
|
+
}
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
data/lib/td/version.rb
CHANGED
data/spec/spec_helper.rb
CHANGED
@@ -49,3 +49,21 @@ def execute_td(command_line)
|
|
49
49
|
end
|
50
50
|
|
51
51
|
class CallSystemExitError < RuntimeError; end
|
52
|
+
|
53
|
+
shared_context 'quiet_out' do
|
54
|
+
let(:stdout_io) { StringIO.new }
|
55
|
+
let(:stderr_io) { StringIO.new }
|
56
|
+
|
57
|
+
around do |example|
|
58
|
+
out = $stdout.dup
|
59
|
+
err= $stdout.dup
|
60
|
+
begin
|
61
|
+
$stdout = stdout_io
|
62
|
+
$stderr = stderr_io
|
63
|
+
example.call
|
64
|
+
ensure
|
65
|
+
$stdout = out
|
66
|
+
$stderr = err
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
@@ -5,6 +5,49 @@ require 'td/command/connector'
|
|
5
5
|
|
6
6
|
module TreasureData::Command
|
7
7
|
describe 'connector commands' do
|
8
|
+
describe '#connector_guess' do
|
9
|
+
let :command do
|
10
|
+
Class.new { include TreasureData::Command }.new
|
11
|
+
end
|
12
|
+
|
13
|
+
describe 'guess plugins' do
|
14
|
+
let(:guess_plugins) { %w(json query_string) }
|
15
|
+
|
16
|
+
let(:in_file) { Tempfile.new('in.yml').tap{|f| f.close } }
|
17
|
+
let(:out_file) { Tempfile.new('out.yml').tap{|f| f.close } }
|
18
|
+
|
19
|
+
let(:option) {
|
20
|
+
List::CommandParser.new("connector:guess", ["config"], [], nil, [in_file.path, '-o', out_file.path, '--guess', guess_plugins.join(',')], true)
|
21
|
+
}
|
22
|
+
let(:client) { double(:client) }
|
23
|
+
|
24
|
+
before do
|
25
|
+
command.stub(:get_client).and_return(client)
|
26
|
+
end
|
27
|
+
|
28
|
+
let(:config) {
|
29
|
+
{
|
30
|
+
'in' => {'type' => 's3'}
|
31
|
+
}
|
32
|
+
}
|
33
|
+
let(:expect_config) {
|
34
|
+
config.merge('out' => {}, 'exec' => {'guess_plugins' => guess_plugins}, 'filters' => [])
|
35
|
+
}
|
36
|
+
|
37
|
+
include_context 'quiet_out'
|
38
|
+
|
39
|
+
before do
|
40
|
+
command.stub(:prepare_bulkload_job_config).and_return(config)
|
41
|
+
end
|
42
|
+
|
43
|
+
it 'guess_plugins passed td-client' do
|
44
|
+
client.should_receive(:bulk_load_guess).with({config: expect_config}).and_return({})
|
45
|
+
|
46
|
+
command.connector_guess(option)
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
8
51
|
describe '#connector_preview' do
|
9
52
|
let :command do
|
10
53
|
Class.new { include TreasureData::Command }.new
|
@@ -129,5 +172,115 @@ module TreasureData::Command
|
|
129
172
|
end
|
130
173
|
end
|
131
174
|
end
|
175
|
+
|
176
|
+
describe '#connector_run' do
|
177
|
+
include_context 'quiet_out'
|
178
|
+
|
179
|
+
let :command do
|
180
|
+
Class.new { include TreasureData::Command }.new
|
181
|
+
end
|
182
|
+
let(:client) { double(:client) }
|
183
|
+
let(:job_name) { 'job_1' }
|
184
|
+
|
185
|
+
before do
|
186
|
+
command.stub(:get_client).and_return(client)
|
187
|
+
client.stub(:database)
|
188
|
+
end
|
189
|
+
|
190
|
+
context 'with scheduled_time' do
|
191
|
+
let(:scheduled_time) { Time.now + 60 }
|
192
|
+
let(:option) {
|
193
|
+
List::CommandParser.new('connector:run', ['name'], ['time'], nil, [job_name, scheduled_time.strftime("%Y-%m-%d %H:%M:%S")], true)
|
194
|
+
}
|
195
|
+
|
196
|
+
it 'client call with unix time' do
|
197
|
+
client.should_receive(:bulk_load_run).with(job_name, scheduled_time.to_i).and_return(123)
|
198
|
+
|
199
|
+
command.connector_run(option)
|
200
|
+
end
|
201
|
+
end
|
202
|
+
|
203
|
+
context 'without scheduled_time' do
|
204
|
+
let(:option) {
|
205
|
+
List::CommandParser.new('connector:run', ['name'], ['time'], nil, [job_name], true)
|
206
|
+
}
|
207
|
+
let(:current_time) { Time.now }
|
208
|
+
|
209
|
+
it 'client call with unix time' do
|
210
|
+
client.should_receive(:bulk_load_run).with(job_name, current_time.to_i).and_return(123)
|
211
|
+
command.stub(:current_time).and_return(current_time.to_i)
|
212
|
+
|
213
|
+
command.connector_run(option)
|
214
|
+
end
|
215
|
+
end
|
216
|
+
end
|
217
|
+
|
218
|
+
describe 'connector history' do
|
219
|
+
include_context 'quiet_out'
|
220
|
+
|
221
|
+
let :command do
|
222
|
+
Class.new { include TreasureData::Command }.new
|
223
|
+
end
|
224
|
+
let(:name) { 'connector_test' }
|
225
|
+
|
226
|
+
subject do
|
227
|
+
op = List::CommandParser.new("connector:history", ["name"], [], nil, [name], true)
|
228
|
+
command.connector_history(op)
|
229
|
+
end
|
230
|
+
|
231
|
+
before do
|
232
|
+
client = double(:client)
|
233
|
+
client.stub(:bulk_load_history).with(name).and_return(history)
|
234
|
+
command.stub(:get_client).and_return(client)
|
235
|
+
end
|
236
|
+
|
237
|
+
context 'history is empty' do
|
238
|
+
let(:history) { [] }
|
239
|
+
|
240
|
+
it { expect { subject }.not_to raise_error }
|
241
|
+
end
|
242
|
+
|
243
|
+
context 'history in not empty' do
|
244
|
+
let(:history) { [column] }
|
245
|
+
let(:column) {
|
246
|
+
# TODO set real value
|
247
|
+
{
|
248
|
+
'job_id' => '',
|
249
|
+
'status' => '',
|
250
|
+
'records' => '',
|
251
|
+
'database' => {'name' => ''},
|
252
|
+
'table' => {'name' => ''},
|
253
|
+
'priority' => ''
|
254
|
+
}
|
255
|
+
}
|
256
|
+
|
257
|
+
context 'job is queueing' do
|
258
|
+
before do
|
259
|
+
column['start_at'] = nil
|
260
|
+
column['end_at'] = nil
|
261
|
+
end
|
262
|
+
|
263
|
+
it { expect { subject }.not_to raise_error }
|
264
|
+
end
|
265
|
+
|
266
|
+
context 'job is running' do
|
267
|
+
before do
|
268
|
+
column['start_at'] = Time.now.to_i
|
269
|
+
column['end_at'] = nil
|
270
|
+
end
|
271
|
+
|
272
|
+
it { expect { subject }.not_to raise_error }
|
273
|
+
end
|
274
|
+
|
275
|
+
context 'jobi is finished' do
|
276
|
+
before do
|
277
|
+
column['start_at'] = Time.now.to_i
|
278
|
+
column['end_at'] = (Time.now + 60).to_i
|
279
|
+
end
|
280
|
+
|
281
|
+
it { expect { subject }.not_to raise_error }
|
282
|
+
end
|
283
|
+
end
|
284
|
+
end
|
132
285
|
end
|
133
286
|
end
|