td 0.10.98 → 0.10.99

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/ChangeLog CHANGED
@@ -1,8 +1,15 @@
1
+ == 2014-02-26 version 0.10.99
2
+
3
+ * job:show, query: limit the number of records ouputted when printing on stdout using the -l / --limit option
4
+ * job:show, query: optionally output/store csv and tsv files with headers using the -c / --column-names option
5
+ * job:show, query: support result output from Presto
6
+
1
7
 
2
8
  == 2014-02-21 version 0.10.98
3
9
 
4
10
  * import:unfreeze: fixed NoMethodError error
5
11
  * import:show uses more efficient REST API
12
+ * Update td-client-ruby to 0.8.57
6
13
 
7
14
 
8
15
  == 2013-12-18 version 0.10.97
data/README.rdoc CHANGED
@@ -37,6 +37,7 @@ Disable RVM or rbenv and use ruby.pkg's ruby (/usr/local/td/ruby/bin/ruby).
37
37
  And then run following commands:
38
38
 
39
39
  $ /usr/local/td/ruby/bin/gem install bundler rubyzip
40
+ $ /usr/local/td/ruby/bin/bundle install
40
41
  $ /usr/local/td/ruby/bin/rake pkg:build
41
42
 
42
43
  == Windows
@@ -59,5 +60,5 @@ Then run following commands on MinGW Shell:
59
60
 
60
61
  = Copyright
61
62
 
62
- Copyright:: Copyright (c) 2011 Treasure Data Inc.
63
+ Copyright:: Copyright (c) 2014 Treasure Data Inc.
63
64
  License:: Apache License, Version 2.0
@@ -47,7 +47,7 @@ module Command
47
47
  op.on('-E', '--error', 'show only failed jobs', TrueClass) {|b|
48
48
  status = 'error'
49
49
  }
50
- op.on('--slow [SECONDS]', 'show slow queries (default threshold: 3600 seconds)', Integer) { |i|
50
+ op.on('--slow [SECONDS]', 'show slow queries (default threshold: 3600 seconds)', Integer) {|i|
51
51
  slower_than = i || 3600
52
52
  }
53
53
  set_render_format_option(op)
@@ -85,7 +85,8 @@ module Command
85
85
  wait = false
86
86
  output = nil
87
87
  format = nil
88
- render_opts = {}
88
+ render_opts = {:header => false}
89
+ limit = nil
89
90
  exclude = false
90
91
 
91
92
  op.on('-v', '--verbose', 'show logs', TrueClass) {|b|
@@ -107,18 +108,40 @@ module Command
107
108
  end
108
109
  format = s
109
110
  }
111
+ op.on('-l', '--limit ROWS', 'limit the number of result rows shown when not outputting to file') {|s|
112
+ unless s.to_i > 0
113
+ raise "Invalid limit number. Must be a positive integer"
114
+ end
115
+ limit = s.to_i
116
+ }
117
+ op.on('-c', '--column-header', 'output of the columns\' header when the schema is available for the table (only applies to tsv and csv formats)', TrueClass) {|b|
118
+ render_opts[:header] = b;
119
+ }
110
120
  op.on('-x', '--exclude', 'do not automatically retrieve the job result', TrueClass) {|b|
111
121
  exclude = b
112
122
  }
113
123
 
114
124
  job_id = op.cmd_parse
115
125
 
126
+ # parameter concurrency validation
127
+
116
128
  if output.nil? && format
117
129
  unless ['tsv', 'csv', 'json'].include?(format)
118
- raise "Supported formats are only tsv, csv and json without --output option"
130
+ raise "Supported formats are only tsv, csv and json without -o / --output option"
119
131
  end
120
132
  end
121
133
 
134
+ if render_opts[:header]
135
+ unless ['tsv', 'csv'].include?(format)
136
+ raise "Option -c / --column-header is only supported with tsv and csv formats"
137
+ end
138
+ end
139
+
140
+ if !output.nil? && !limit.nil?
141
+ raise "Option -l / --limit is only valid when not outputting to file " +
142
+ "(no -o / --output option provided)"
143
+ end
144
+
122
145
  client = get_client
123
146
 
124
147
  job = client.job(job_id)
@@ -127,43 +150,51 @@ module Command
127
150
  #puts "URL : #{job.url}"
128
151
  puts "Status : #{job.status}"
129
152
  puts "Type : #{job.type}"
130
- puts "Priority : #{job_priority_name_of(job.priority)}"
131
- puts "Retry limit : #{job.retry_limit}"
132
- puts "Result : #{job.result_url}"
133
153
  puts "Database : #{job.db_name}"
134
- puts "Query : #{job.query}"
154
+ # exclude some fields from bulk_import_perform type jobs
155
+ if [:hive, :pig, :impala, :presto].include?(job.type)
156
+ puts "Priority : #{job_priority_name_of(job.priority)}"
157
+ puts "Retry limit : #{job.retry_limit}"
158
+ puts "Output : #{job.result_url}"
159
+ puts "Query : #{job.query}"
160
+ end
135
161
 
136
162
  if wait && !job.finished?
137
163
  wait_job(job)
138
- if [:hive, :pig, :impala].include?(job.type) && !exclude
164
+ if [:hive, :pig, :impala, :presto].include?(job.type) && !exclude
139
165
  puts "Result :"
140
166
  begin
141
- show_result(job, output, format, render_opts)
167
+ show_result(job, output, limit, format, render_opts)
142
168
  rescue TreasureData::NotFoundError => e
143
169
  # Got 404 because result not found.
144
170
  end
145
171
  end
146
172
 
147
173
  else
148
- if [:hive, :pig, :impala].include?(job.type) && !exclude
174
+ if [:hive, :pig, :impala, :presto].include?(job.type) && !exclude
149
175
  puts "Result :"
150
176
  begin
151
- show_result(job, output, format, render_opts)
177
+ show_result(job, output, limit, format, render_opts)
152
178
  rescue TreasureData::NotFoundError => e
179
+ # Got 404 because result not found.
153
180
  end
154
181
  end
155
182
 
156
183
  if verbose
157
- puts ""
158
- puts "cmdout:"
159
- job.debug['cmdout'].to_s.split("\n").each {|line|
160
- puts " "+line
161
- }
162
- puts ""
163
- puts "stderr:"
164
- job.debug['stderr'].to_s.split("\n").each {|line|
165
- puts " "+line
166
- }
184
+ if !job.debug['cmdout'].nil?
185
+ puts ""
186
+ puts "cmdout:"
187
+ job.debug['cmdout'].to_s.split("\n").each {|line|
188
+ puts " " + line
189
+ }
190
+ end
191
+ if !job.debug['stderr'].nil?
192
+ puts ""
193
+ puts "stderr:"
194
+ job.debug['stderr'].to_s.split("\n").each {|line|
195
+ puts " " + line
196
+ }
197
+ end
167
198
  end
168
199
  end
169
200
 
@@ -226,72 +257,97 @@ module Command
226
257
  end
227
258
  end
228
259
 
229
- def show_result(job, output, format, render_opts={})
260
+ def show_result(job, output, limit, format, render_opts={})
230
261
  if output
231
- write_result(job, output, format)
262
+ write_result(job, output, limit, format, render_opts)
232
263
  puts "written to #{output} in #{format} format"
233
264
  else
234
- render_result(job, render_opts, format)
265
+ render_result(job, limit, format, render_opts)
235
266
  end
236
267
  end
237
268
 
238
- def write_result(job, output, format)
269
+ def write_result(job, output, limit, format, render_opts={})
270
+
271
+ # the next 3 formats allow writing to both a file and stdout
272
+
239
273
  case format
240
274
  when 'json'
241
275
  require 'yajl'
242
- first = true
243
- open_file(output, "w") { |f|
276
+ open_file(output, "w") {|f|
244
277
  f.write "["
278
+ n_rows = 0
245
279
  job.result_each {|row|
246
- if first
247
- first = false
248
- else
249
- f.write ","
250
- end
280
+ f.write ",\n" if n_rows > 0
251
281
  f.write Yajl.dump(row)
282
+ n_rows += 1
283
+ break if output.nil? and !limit.nil? and n_rows == limit
252
284
  }
253
285
  f.write "]"
254
286
  }
255
287
  puts if output.nil?
256
288
 
257
- when 'msgpack'
258
- open_file(output, "wb") { |f|
259
- job.result_format('msgpack', f)
260
- }
261
-
262
- when 'msgpack.gz'
263
- open_file(output, "wb") { |f|
264
- job.result_format('msgpack.gz', f)
265
- }
266
-
267
289
  when 'csv'
268
290
  require 'yajl'
269
291
  require 'csv'
270
292
 
271
- open_file(output, "w") { |f|
293
+ open_file(output, "w") {|f|
272
294
  writer = CSV.new(f)
295
+ n_rows = 0
296
+ # output headers
297
+ if render_opts[:header] && job.hive_result_schema
298
+ writer << job.hive_result_schema.map {|name,type|
299
+ name
300
+ }
301
+ end
302
+ # output data
273
303
  job.result_each {|row|
274
- writer << row.map {|col| dump_column(col) }
304
+ # TODO limit the # of columns
305
+ writer << row.map {|col|
306
+ dump_column(col)
307
+ }
308
+ n_rows += 1
309
+ break if output.nil? and !limit.nil? and n_rows == limit
275
310
  }
276
311
  }
277
312
 
278
313
  when 'tsv'
279
314
  require 'yajl'
280
- open_file(output, "w") { |f|
315
+ open_file(output, "w") {|f|
316
+ # output headers
317
+ if render_opts[:header] && job.hive_result_schema
318
+ job.hive_result_schema.each {|name,type|
319
+ f.write name + "\t"
320
+ }
321
+ f.write "\n"
322
+ end
323
+ # output data
324
+ n_rows = 0
281
325
  job.result_each {|row|
282
- first = true
326
+ n_cols = 0
283
327
  row.each {|col|
284
- if first
285
- first = false
286
- else
287
- f.write "\t"
288
- end
328
+ f.write "\t" if n_cols > 0
329
+ # TODO limit the # of columns
289
330
  f.write dump_column(col)
331
+ n_cols += 1
290
332
  }
291
333
  f.write "\n"
334
+ n_rows += 1
335
+ break if output.nil? and !limit.nil? and n_rows == limit
292
336
  }
293
337
  }
294
338
 
339
+ # these last 2 formats are only valid if writing the result to file through the -o/--output option.
340
+
341
+ when 'msgpack'
342
+ open_file(output, "wb") {|f|
343
+ job.result_format('msgpack', f)
344
+ }
345
+
346
+ when 'msgpack.gz'
347
+ open_file(output, "wb") {|f|
348
+ job.result_format('msgpack.gz', f)
349
+ }
350
+
295
351
  else
296
352
  raise "Unknown format #{format.inspect}"
297
353
  end
@@ -311,26 +367,32 @@ module Command
311
367
  end
312
368
  end
313
369
 
314
- def render_result(job, opts, format = nil)
370
+ def render_result(job, limit, format=nil, render_opts={})
315
371
  require 'yajl'
316
372
 
317
373
  if format.nil?
374
+ # display result in tabular format
318
375
  rows = []
376
+ n_rows = 0
319
377
  job.result_each {|row|
320
378
  # TODO limit number of rows to show
321
379
  rows << row.map {|v|
322
380
  dump_column(v)
323
381
  }
382
+ n_rows += 1
383
+ break if !limit.nil? and n_rows == limit
324
384
  }
325
385
 
326
- opts[:max_width] = 10000
386
+ render_opts[:max_width] = 10000
327
387
  if job.hive_result_schema
328
- opts[:change_fields] = job.hive_result_schema.map {|name,type| name }
388
+ render_opts[:change_fields] = job.hive_result_schema.map { |name,type| name }
329
389
  end
330
390
 
331
- puts cmd_render_table(rows, opts)
391
+ puts cmd_render_table(rows, render_opts)
332
392
  else
333
- write_result(job, nil, format)
393
+ # display result in any of: json, csv, tsv.
394
+ # msgpack and mspgpack.gz are not supported for stdout output
395
+ write_result(job, nil, limit, format, render_opts)
334
396
  end
335
397
  end
336
398
 
@@ -7,7 +7,7 @@ module Command
7
7
  wait = false
8
8
  output = nil
9
9
  format = nil
10
- render_opts = {}
10
+ render_opts = {:header => false}
11
11
  result_url = nil
12
12
  result_user = nil
13
13
  result_ask_password = false
@@ -16,6 +16,7 @@ module Command
16
16
  query = nil
17
17
  sampling_all = nil
18
18
  type = nil
19
+ limit = nil
19
20
  exclude = false
20
21
 
21
22
  op.on('-d', '--database DB_NAME', 'use the database (required)') {|s|
@@ -58,24 +59,41 @@ module Command
58
59
  op.on('-q', '--query PATH', 'use file instead of inline query') {|s|
59
60
  query = File.open(s) { |f| f.read.strip }
60
61
  }
61
- op.on('-T', '--type TYPE', 'set query type (hive or pig)') {|s|
62
+ op.on('-T', '--type TYPE', 'set query type (hive, pig, impala, presto)') {|s|
62
63
  type = s.to_sym
63
64
  }
64
65
  op.on('--sampling DENOMINATOR', 'enable random sampling to reduce records 1/DENOMINATOR', Integer) {|i|
65
66
  sampling_all = i
66
67
  }
68
+ op.on('-l', '--limit ROWS', 'limit the number of result rows shown when not outputting to file') {|s|
69
+ unless s.to_i > 0
70
+ raise "Invalid limit number. Must be a positive integer"
71
+ end
72
+ limit = s.to_i
73
+ }
74
+ op.on('-c', '--column-header', 'output of the columns\' header when the schema is available for the table (only applies to tsv and csv formats)', TrueClass) {|b|
75
+ render_opts[:header] = b;
76
+ }
67
77
  op.on('-x', '--exclude', 'do not automatically retrieve the job result', TrueClass) {|b|
68
78
  exclude = b
69
79
  }
70
80
 
71
81
  sql = op.cmd_parse
72
82
 
83
+ # parameter concurrency validation
84
+
73
85
  if output.nil? && format
74
86
  unless ['tsv', 'csv', 'json'].include?(format)
75
87
  raise "Supported formats are only tsv, csv and json without --output option"
76
88
  end
77
89
  end
78
90
 
91
+ if render_opts[:header]
92
+ unless ['tsv', 'csv'].include?(format)
93
+ raise "Option -c / --column-header is only supported with tsv and csv formats"
94
+ end
95
+ end
96
+
79
97
  unless db_name
80
98
  $stderr.puts "-d, --database DB_NAME option is required."
81
99
  exit 1
@@ -117,7 +135,7 @@ module Command
117
135
  if job.success? && !exclude
118
136
  puts "Result :"
119
137
  begin
120
- show_result(job, output, format, render_opts)
138
+ show_result(job, output, limit, format, render_opts)
121
139
  rescue TreasureData::NotFoundError => e
122
140
  end
123
141
  end
data/lib/td/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  module TreasureData
2
2
 
3
- VERSION = '0.10.98'
3
+ VERSION = '0.10.99'
4
4
 
5
5
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: td
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.10.98
4
+ version: 0.10.99
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2014-02-22 00:00:00.000000000 Z
12
+ date: 2014-03-01 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: msgpack
@@ -288,12 +288,18 @@ required_ruby_version: !ruby/object:Gem::Requirement
288
288
  - - ! '>='
289
289
  - !ruby/object:Gem::Version
290
290
  version: '0'
291
+ segments:
292
+ - 0
293
+ hash: 687370063067596647
291
294
  required_rubygems_version: !ruby/object:Gem::Requirement
292
295
  none: false
293
296
  requirements:
294
297
  - - ! '>='
295
298
  - !ruby/object:Gem::Version
296
299
  version: '0'
300
+ segments:
301
+ - 0
302
+ hash: 687370063067596647
297
303
  requirements: []
298
304
  rubyforge_project:
299
305
  rubygems_version: 1.8.23
@@ -310,4 +316,3 @@ test_files:
310
316
  - spec/spec_helper.rb
311
317
  - spec/td/helpers_spec.rb
312
318
  - spec/td/version_spec.rb
313
- has_rdoc: false