td 0.10.98 → 0.10.99

Sign up to get free protection for your applications and to get access to all the features.
data/ChangeLog CHANGED
@@ -1,8 +1,15 @@
1
+ == 2014-02-26 version 0.10.99
2
+
3
+ * job:show, query: limit the number of records ouputted when printing on stdout using the -l / --limit option
4
+ * job:show, query: optionally output/store csv and tsv files with headers using the -c / --column-names option
5
+ * job:show, query: support result output from Presto
6
+
1
7
 
2
8
  == 2014-02-21 version 0.10.98
3
9
 
4
10
  * import:unfreeze: fixed NoMethodError error
5
11
  * import:show uses more efficient REST API
12
+ * Update td-client-ruby to 0.8.57
6
13
 
7
14
 
8
15
  == 2013-12-18 version 0.10.97
data/README.rdoc CHANGED
@@ -37,6 +37,7 @@ Disable RVM or rbenv and use ruby.pkg's ruby (/usr/local/td/ruby/bin/ruby).
37
37
  And then run following commands:
38
38
 
39
39
  $ /usr/local/td/ruby/bin/gem install bundler rubyzip
40
+ $ /usr/local/td/ruby/bin/bundle install
40
41
  $ /usr/local/td/ruby/bin/rake pkg:build
41
42
 
42
43
  == Windows
@@ -59,5 +60,5 @@ Then run following commands on MinGW Shell:
59
60
 
60
61
  = Copyright
61
62
 
62
- Copyright:: Copyright (c) 2011 Treasure Data Inc.
63
+ Copyright:: Copyright (c) 2014 Treasure Data Inc.
63
64
  License:: Apache License, Version 2.0
@@ -47,7 +47,7 @@ module Command
47
47
  op.on('-E', '--error', 'show only failed jobs', TrueClass) {|b|
48
48
  status = 'error'
49
49
  }
50
- op.on('--slow [SECONDS]', 'show slow queries (default threshold: 3600 seconds)', Integer) { |i|
50
+ op.on('--slow [SECONDS]', 'show slow queries (default threshold: 3600 seconds)', Integer) {|i|
51
51
  slower_than = i || 3600
52
52
  }
53
53
  set_render_format_option(op)
@@ -85,7 +85,8 @@ module Command
85
85
  wait = false
86
86
  output = nil
87
87
  format = nil
88
- render_opts = {}
88
+ render_opts = {:header => false}
89
+ limit = nil
89
90
  exclude = false
90
91
 
91
92
  op.on('-v', '--verbose', 'show logs', TrueClass) {|b|
@@ -107,18 +108,40 @@ module Command
107
108
  end
108
109
  format = s
109
110
  }
111
+ op.on('-l', '--limit ROWS', 'limit the number of result rows shown when not outputting to file') {|s|
112
+ unless s.to_i > 0
113
+ raise "Invalid limit number. Must be a positive integer"
114
+ end
115
+ limit = s.to_i
116
+ }
117
+ op.on('-c', '--column-header', 'output of the columns\' header when the schema is available for the table (only applies to tsv and csv formats)', TrueClass) {|b|
118
+ render_opts[:header] = b;
119
+ }
110
120
  op.on('-x', '--exclude', 'do not automatically retrieve the job result', TrueClass) {|b|
111
121
  exclude = b
112
122
  }
113
123
 
114
124
  job_id = op.cmd_parse
115
125
 
126
+ # parameter concurrency validation
127
+
116
128
  if output.nil? && format
117
129
  unless ['tsv', 'csv', 'json'].include?(format)
118
- raise "Supported formats are only tsv, csv and json without --output option"
130
+ raise "Supported formats are only tsv, csv and json without -o / --output option"
119
131
  end
120
132
  end
121
133
 
134
+ if render_opts[:header]
135
+ unless ['tsv', 'csv'].include?(format)
136
+ raise "Option -c / --column-header is only supported with tsv and csv formats"
137
+ end
138
+ end
139
+
140
+ if !output.nil? && !limit.nil?
141
+ raise "Option -l / --limit is only valid when not outputting to file " +
142
+ "(no -o / --output option provided)"
143
+ end
144
+
122
145
  client = get_client
123
146
 
124
147
  job = client.job(job_id)
@@ -127,43 +150,51 @@ module Command
127
150
  #puts "URL : #{job.url}"
128
151
  puts "Status : #{job.status}"
129
152
  puts "Type : #{job.type}"
130
- puts "Priority : #{job_priority_name_of(job.priority)}"
131
- puts "Retry limit : #{job.retry_limit}"
132
- puts "Result : #{job.result_url}"
133
153
  puts "Database : #{job.db_name}"
134
- puts "Query : #{job.query}"
154
+ # exclude some fields from bulk_import_perform type jobs
155
+ if [:hive, :pig, :impala, :presto].include?(job.type)
156
+ puts "Priority : #{job_priority_name_of(job.priority)}"
157
+ puts "Retry limit : #{job.retry_limit}"
158
+ puts "Output : #{job.result_url}"
159
+ puts "Query : #{job.query}"
160
+ end
135
161
 
136
162
  if wait && !job.finished?
137
163
  wait_job(job)
138
- if [:hive, :pig, :impala].include?(job.type) && !exclude
164
+ if [:hive, :pig, :impala, :presto].include?(job.type) && !exclude
139
165
  puts "Result :"
140
166
  begin
141
- show_result(job, output, format, render_opts)
167
+ show_result(job, output, limit, format, render_opts)
142
168
  rescue TreasureData::NotFoundError => e
143
169
  # Got 404 because result not found.
144
170
  end
145
171
  end
146
172
 
147
173
  else
148
- if [:hive, :pig, :impala].include?(job.type) && !exclude
174
+ if [:hive, :pig, :impala, :presto].include?(job.type) && !exclude
149
175
  puts "Result :"
150
176
  begin
151
- show_result(job, output, format, render_opts)
177
+ show_result(job, output, limit, format, render_opts)
152
178
  rescue TreasureData::NotFoundError => e
179
+ # Got 404 because result not found.
153
180
  end
154
181
  end
155
182
 
156
183
  if verbose
157
- puts ""
158
- puts "cmdout:"
159
- job.debug['cmdout'].to_s.split("\n").each {|line|
160
- puts " "+line
161
- }
162
- puts ""
163
- puts "stderr:"
164
- job.debug['stderr'].to_s.split("\n").each {|line|
165
- puts " "+line
166
- }
184
+ if !job.debug['cmdout'].nil?
185
+ puts ""
186
+ puts "cmdout:"
187
+ job.debug['cmdout'].to_s.split("\n").each {|line|
188
+ puts " " + line
189
+ }
190
+ end
191
+ if !job.debug['stderr'].nil?
192
+ puts ""
193
+ puts "stderr:"
194
+ job.debug['stderr'].to_s.split("\n").each {|line|
195
+ puts " " + line
196
+ }
197
+ end
167
198
  end
168
199
  end
169
200
 
@@ -226,72 +257,97 @@ module Command
226
257
  end
227
258
  end
228
259
 
229
- def show_result(job, output, format, render_opts={})
260
+ def show_result(job, output, limit, format, render_opts={})
230
261
  if output
231
- write_result(job, output, format)
262
+ write_result(job, output, limit, format, render_opts)
232
263
  puts "written to #{output} in #{format} format"
233
264
  else
234
- render_result(job, render_opts, format)
265
+ render_result(job, limit, format, render_opts)
235
266
  end
236
267
  end
237
268
 
238
- def write_result(job, output, format)
269
+ def write_result(job, output, limit, format, render_opts={})
270
+
271
+ # the next 3 formats allow writing to both a file and stdout
272
+
239
273
  case format
240
274
  when 'json'
241
275
  require 'yajl'
242
- first = true
243
- open_file(output, "w") { |f|
276
+ open_file(output, "w") {|f|
244
277
  f.write "["
278
+ n_rows = 0
245
279
  job.result_each {|row|
246
- if first
247
- first = false
248
- else
249
- f.write ","
250
- end
280
+ f.write ",\n" if n_rows > 0
251
281
  f.write Yajl.dump(row)
282
+ n_rows += 1
283
+ break if output.nil? and !limit.nil? and n_rows == limit
252
284
  }
253
285
  f.write "]"
254
286
  }
255
287
  puts if output.nil?
256
288
 
257
- when 'msgpack'
258
- open_file(output, "wb") { |f|
259
- job.result_format('msgpack', f)
260
- }
261
-
262
- when 'msgpack.gz'
263
- open_file(output, "wb") { |f|
264
- job.result_format('msgpack.gz', f)
265
- }
266
-
267
289
  when 'csv'
268
290
  require 'yajl'
269
291
  require 'csv'
270
292
 
271
- open_file(output, "w") { |f|
293
+ open_file(output, "w") {|f|
272
294
  writer = CSV.new(f)
295
+ n_rows = 0
296
+ # output headers
297
+ if render_opts[:header] && job.hive_result_schema
298
+ writer << job.hive_result_schema.map {|name,type|
299
+ name
300
+ }
301
+ end
302
+ # output data
273
303
  job.result_each {|row|
274
- writer << row.map {|col| dump_column(col) }
304
+ # TODO limit the # of columns
305
+ writer << row.map {|col|
306
+ dump_column(col)
307
+ }
308
+ n_rows += 1
309
+ break if output.nil? and !limit.nil? and n_rows == limit
275
310
  }
276
311
  }
277
312
 
278
313
  when 'tsv'
279
314
  require 'yajl'
280
- open_file(output, "w") { |f|
315
+ open_file(output, "w") {|f|
316
+ # output headers
317
+ if render_opts[:header] && job.hive_result_schema
318
+ job.hive_result_schema.each {|name,type|
319
+ f.write name + "\t"
320
+ }
321
+ f.write "\n"
322
+ end
323
+ # output data
324
+ n_rows = 0
281
325
  job.result_each {|row|
282
- first = true
326
+ n_cols = 0
283
327
  row.each {|col|
284
- if first
285
- first = false
286
- else
287
- f.write "\t"
288
- end
328
+ f.write "\t" if n_cols > 0
329
+ # TODO limit the # of columns
289
330
  f.write dump_column(col)
331
+ n_cols += 1
290
332
  }
291
333
  f.write "\n"
334
+ n_rows += 1
335
+ break if output.nil? and !limit.nil? and n_rows == limit
292
336
  }
293
337
  }
294
338
 
339
+ # these last 2 formats are only valid if writing the result to file through the -o/--output option.
340
+
341
+ when 'msgpack'
342
+ open_file(output, "wb") {|f|
343
+ job.result_format('msgpack', f)
344
+ }
345
+
346
+ when 'msgpack.gz'
347
+ open_file(output, "wb") {|f|
348
+ job.result_format('msgpack.gz', f)
349
+ }
350
+
295
351
  else
296
352
  raise "Unknown format #{format.inspect}"
297
353
  end
@@ -311,26 +367,32 @@ module Command
311
367
  end
312
368
  end
313
369
 
314
- def render_result(job, opts, format = nil)
370
+ def render_result(job, limit, format=nil, render_opts={})
315
371
  require 'yajl'
316
372
 
317
373
  if format.nil?
374
+ # display result in tabular format
318
375
  rows = []
376
+ n_rows = 0
319
377
  job.result_each {|row|
320
378
  # TODO limit number of rows to show
321
379
  rows << row.map {|v|
322
380
  dump_column(v)
323
381
  }
382
+ n_rows += 1
383
+ break if !limit.nil? and n_rows == limit
324
384
  }
325
385
 
326
- opts[:max_width] = 10000
386
+ render_opts[:max_width] = 10000
327
387
  if job.hive_result_schema
328
- opts[:change_fields] = job.hive_result_schema.map {|name,type| name }
388
+ render_opts[:change_fields] = job.hive_result_schema.map { |name,type| name }
329
389
  end
330
390
 
331
- puts cmd_render_table(rows, opts)
391
+ puts cmd_render_table(rows, render_opts)
332
392
  else
333
- write_result(job, nil, format)
393
+ # display result in any of: json, csv, tsv.
394
+ # msgpack and mspgpack.gz are not supported for stdout output
395
+ write_result(job, nil, limit, format, render_opts)
334
396
  end
335
397
  end
336
398
 
@@ -7,7 +7,7 @@ module Command
7
7
  wait = false
8
8
  output = nil
9
9
  format = nil
10
- render_opts = {}
10
+ render_opts = {:header => false}
11
11
  result_url = nil
12
12
  result_user = nil
13
13
  result_ask_password = false
@@ -16,6 +16,7 @@ module Command
16
16
  query = nil
17
17
  sampling_all = nil
18
18
  type = nil
19
+ limit = nil
19
20
  exclude = false
20
21
 
21
22
  op.on('-d', '--database DB_NAME', 'use the database (required)') {|s|
@@ -58,24 +59,41 @@ module Command
58
59
  op.on('-q', '--query PATH', 'use file instead of inline query') {|s|
59
60
  query = File.open(s) { |f| f.read.strip }
60
61
  }
61
- op.on('-T', '--type TYPE', 'set query type (hive or pig)') {|s|
62
+ op.on('-T', '--type TYPE', 'set query type (hive, pig, impala, presto)') {|s|
62
63
  type = s.to_sym
63
64
  }
64
65
  op.on('--sampling DENOMINATOR', 'enable random sampling to reduce records 1/DENOMINATOR', Integer) {|i|
65
66
  sampling_all = i
66
67
  }
68
+ op.on('-l', '--limit ROWS', 'limit the number of result rows shown when not outputting to file') {|s|
69
+ unless s.to_i > 0
70
+ raise "Invalid limit number. Must be a positive integer"
71
+ end
72
+ limit = s.to_i
73
+ }
74
+ op.on('-c', '--column-header', 'output of the columns\' header when the schema is available for the table (only applies to tsv and csv formats)', TrueClass) {|b|
75
+ render_opts[:header] = b;
76
+ }
67
77
  op.on('-x', '--exclude', 'do not automatically retrieve the job result', TrueClass) {|b|
68
78
  exclude = b
69
79
  }
70
80
 
71
81
  sql = op.cmd_parse
72
82
 
83
+ # parameter concurrency validation
84
+
73
85
  if output.nil? && format
74
86
  unless ['tsv', 'csv', 'json'].include?(format)
75
87
  raise "Supported formats are only tsv, csv and json without --output option"
76
88
  end
77
89
  end
78
90
 
91
+ if render_opts[:header]
92
+ unless ['tsv', 'csv'].include?(format)
93
+ raise "Option -c / --column-header is only supported with tsv and csv formats"
94
+ end
95
+ end
96
+
79
97
  unless db_name
80
98
  $stderr.puts "-d, --database DB_NAME option is required."
81
99
  exit 1
@@ -117,7 +135,7 @@ module Command
117
135
  if job.success? && !exclude
118
136
  puts "Result :"
119
137
  begin
120
- show_result(job, output, format, render_opts)
138
+ show_result(job, output, limit, format, render_opts)
121
139
  rescue TreasureData::NotFoundError => e
122
140
  end
123
141
  end
data/lib/td/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  module TreasureData
2
2
 
3
- VERSION = '0.10.98'
3
+ VERSION = '0.10.99'
4
4
 
5
5
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: td
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.10.98
4
+ version: 0.10.99
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2014-02-22 00:00:00.000000000 Z
12
+ date: 2014-03-01 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: msgpack
@@ -288,12 +288,18 @@ required_ruby_version: !ruby/object:Gem::Requirement
288
288
  - - ! '>='
289
289
  - !ruby/object:Gem::Version
290
290
  version: '0'
291
+ segments:
292
+ - 0
293
+ hash: 687370063067596647
291
294
  required_rubygems_version: !ruby/object:Gem::Requirement
292
295
  none: false
293
296
  requirements:
294
297
  - - ! '>='
295
298
  - !ruby/object:Gem::Version
296
299
  version: '0'
300
+ segments:
301
+ - 0
302
+ hash: 687370063067596647
297
303
  requirements: []
298
304
  rubyforge_project:
299
305
  rubygems_version: 1.8.23
@@ -310,4 +316,3 @@ test_files:
310
316
  - spec/spec_helper.rb
311
317
  - spec/td/helpers_spec.rb
312
318
  - spec/td/version_spec.rb
313
- has_rdoc: false