td 0.10.99 → 0.11.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -248,8 +248,8 @@ module List
248
248
  add_list 'import:list', %w[], 'List bulk import sessions', 'import:list'
249
249
  add_list 'import:show', %w[name], 'Show list of uploaded parts', 'import:show'
250
250
  add_list 'import:create', %w[name db table], 'Create a new bulk import session to the the table', 'import:create logs_201201 example_db event_logs'
251
- add_list 'import:java_version', %w[], 'Show version', 'import:java_version'
252
- add_list 'import:jar_update', %w[], 'Update import jar', 'import:jar_update'
251
+ add_list 'import:jar_version', %w[], 'Show import jar version', 'import:jar_version'
252
+ add_list 'import:jar_update', %w[], 'Update import jar to the latest version', 'import:jar_update'
253
253
  add_list 'import:prepare', %w[files_], 'Convert files into part file format', 'import:prepare logs/*.csv --format csv --columns time,uid,price,count --time-column "time" -o parts/'
254
254
  add_list 'import:upload', %w[name files_], 'Upload or re-upload files into a bulk import session', 'import:upload parts/* --parallel 4'
255
255
  add_list 'import:auto', %w[name files_], 'Upload files and automatically perform and commit the data', 'import:auto parts/* --parallel 4'
@@ -261,9 +261,9 @@ module List
261
261
  add_list 'import:unfreeze', %w[name], 'Unfreeze a frozen bulk import session', 'import:unfreeze logs_201201'
262
262
 
263
263
  add_list 'result:list', %w[], 'Show list of result URLs', 'result:list', 'results'
264
- add_list 'result:show', %w[name], 'Describe information of a result URL', 'result mydb'
265
- add_list 'result:create', %w[name URL], 'Create a result URL', 'result:create mydb mysql://my-server/mydb'
266
- add_list 'result:delete', %w[name], 'Delete a result URL', 'result:delete mydb'
264
+ add_list 'result:show', %w[name], 'Describe information of a result URL', 'result name'
265
+ add_list 'result:create', %w[name URL], 'Create a result URL', 'result:create name mysql://my-server/mydb'
266
+ add_list 'result:delete', %w[name], 'Delete a result URL', 'result:delete name'
267
267
 
268
268
  add_list 'status', %w[], 'Show schedules, jobs, tables and results', 'status', 's'
269
269
 
@@ -346,8 +346,10 @@ module List
346
346
  add_alias 'scheds', 'sched:list'
347
347
  add_alias 'schedules', 'sched:list'
348
348
 
349
- add_alias 'import', 'import:show'
350
- add_alias 'imports', 'import:list'
349
+ add_alias 'import', 'import:show'
350
+ add_alias 'imports', 'import:list'
351
+ add_alias 'import:java_version', 'import:jar_version'
352
+
351
353
 
352
354
  add_alias 'bulk_import', 'bulk_import:show'
353
355
  add_alias 'bulk_imports', 'bulk_import:list'
@@ -14,7 +14,6 @@ module Command
14
14
  priority = nil
15
15
  retry_limit = nil
16
16
  query = nil
17
- sampling_all = nil
18
17
  type = nil
19
18
  limit = nil
20
19
  exclude = false
@@ -38,7 +37,9 @@ module Command
38
37
  end
39
38
  format = s
40
39
  }
41
- op.on('-r', '--result RESULT_URL', 'write result to the URL (see also result:create subcommand)') {|s|
40
+ op.on('-r', '--result RESULT_URL', 'write result to the URL (see also result:create subcommand)',
41
+ ' It is suggested for this option to be used with the -x / --exclude option to suppress printing',
42
+ ' of the query result to stdout or -o / --output to dump the query result into a file.') {|s|
42
43
  result_url = s
43
44
  }
44
45
  op.on('-u', '--user NAME', 'set user name for the result URL') {|s|
@@ -62,8 +63,10 @@ module Command
62
63
  op.on('-T', '--type TYPE', 'set query type (hive, pig, impala, presto)') {|s|
63
64
  type = s.to_sym
64
65
  }
65
- op.on('--sampling DENOMINATOR', 'enable random sampling to reduce records 1/DENOMINATOR', Integer) {|i|
66
- sampling_all = i
66
+ op.on('--sampling DENOMINATOR', 'OBSOLETE - enable random sampling to reduce records 1/DENOMINATOR', Integer) {|i|
67
+ puts "WARNING: the random sampling feature enabled through the '--sampling' option was removed and does no longer"
68
+ puts " have any effect. It is left for backwards compatibility with older scripts using 'td'."
69
+ puts
67
70
  }
68
71
  op.on('-l', '--limit ROWS', 'limit the number of result rows shown when not outputting to file') {|s|
69
72
  unless s.to_i > 0
@@ -80,23 +83,11 @@ module Command
80
83
 
81
84
  sql = op.cmd_parse
82
85
 
83
- # parameter concurrency validation
84
-
85
- if output.nil? && format
86
- unless ['tsv', 'csv', 'json'].include?(format)
87
- raise "Supported formats are only tsv, csv and json without --output option"
88
- end
89
- end
90
-
91
- if render_opts[:header]
92
- unless ['tsv', 'csv'].include?(format)
93
- raise "Option -c / --column-header is only supported with tsv and csv formats"
94
- end
95
- end
86
+ # required parameters
96
87
 
97
88
  unless db_name
98
- $stderr.puts "-d, --database DB_NAME option is required."
99
- exit 1
89
+ raise ParameterConfigurationError,
90
+ "-d / --database DB_NAME option is required."
100
91
  end
101
92
 
102
93
  if sql == '-'
@@ -104,30 +95,47 @@ module Command
104
95
  elsif sql.nil?
105
96
  sql = query
106
97
  end
107
-
108
98
  unless sql
109
- $stderr.puts "<sql> argument or -q,--query PATH option is required."
110
- exit 1
99
+ raise ParameterConfigurationError,
100
+ "<sql> argument or -q / --query PATH option is required."
101
+ end
102
+
103
+ # parameter concurrency validation
104
+
105
+ if output.nil? && format
106
+ unless ['tsv', 'csv', 'json'].include?(format)
107
+ raise ParameterConfigurationError,
108
+ "Supported formats are only tsv, csv and json without --output option"
109
+ end
110
+ end
111
+
112
+ if render_opts[:header]
113
+ unless ['tsv', 'csv'].include?(format)
114
+ raise ParameterConfigurationError,
115
+ "Option -c / --column-header is only supported with tsv and csv formats"
116
+ end
111
117
  end
112
118
 
113
119
  if result_url
114
120
  require 'td/command/result'
115
121
  result_url = build_result_url(result_url, result_user, result_ask_password)
122
+ if result_url =~ /^td:/
123
+ validate_td_result_url(result_url)
124
+ end
116
125
  end
117
126
 
118
127
  client = get_client
119
128
 
120
- # local existance check
129
+ # local existence check
121
130
  get_database(client, db_name)
122
131
 
123
132
  opts = {}
124
- opts['sampling_all'] = sampling_all if sampling_all
125
133
  opts['type'] = type if type
126
134
  job = client.query(db_name, sql, result_url, priority, retry_limit, opts)
127
135
 
128
- $stderr.puts "Job #{job.job_id} is queued."
129
- $stderr.puts "Use '#{$prog} " + Config.cl_apikey_string + "job:show #{job.job_id}' to show the status."
130
- #$stderr.puts "See #{job.url} to see the progress."
136
+ puts "Job #{job.job_id} is queued."
137
+ puts "Use '#{$prog} " + Config.cl_apikey_string + "job:show #{job.job_id}' to show the status."
138
+ #puts "See #{job.url} to see the progress."
131
139
 
132
140
  if wait
133
141
  wait_job(job, true)
@@ -56,8 +56,7 @@ module Command
56
56
  }
57
57
 
58
58
  name, url = op.cmd_parse
59
-
60
- API.validate_database_name(name)
59
+ API.validate_result_set_name(name)
61
60
 
62
61
  client = get_client
63
62
 
@@ -123,6 +122,23 @@ module Command
123
122
 
124
123
  url
125
124
  end
125
+
126
+ private
127
+ def validate_td_result_url(url)
128
+ re = /td:\/\/[^@]*@\/(.*)\/(.*)?/
129
+ match = re.match(url)
130
+ if match.nil?
131
+ raise ParameterConfigurationError, "Treasure Data result output invalid URL format"
132
+ end
133
+ dbs = match[1]
134
+ tbl = match[2]
135
+ begin
136
+ API.validate_name("Treasure Data result output destination database", 3, 256, dbs)
137
+ API.validate_name("Treasure Data result output destination table", 3, 256, tbl)
138
+ rescue ParameterValidationError => e
139
+ raise ParameterConfigurationError, e
140
+ end
141
+ end
126
142
  end
127
143
  end
128
144
 
@@ -2,7 +2,6 @@
2
2
  module TreasureData
3
3
  module Command
4
4
 
5
-
6
5
  class Runner
7
6
  def initialize
8
7
  @config_path = nil
@@ -21,7 +20,7 @@ class Runner
21
20
  $prog = @prog_name || File.basename($0)
22
21
 
23
22
  op = OptionParser.new
24
- op.version = VERSION
23
+ op.version = TOOLBELT_VERSION
25
24
  op.banner = <<EOF
26
25
  usage: #{$prog} [options] COMMAND [args]
27
26
 
@@ -45,11 +44,11 @@ Basic commands:
45
44
  import # manage bulk import sessions (Java based fast processing)
46
45
  bulk_import # manage bulk import sessions (Old Ruby-based implementation)
47
46
  result # create/delete/list result URLs
47
+ sched # create/delete/list schedules that run a query periodically
48
+ schema # create/delete/modify schemas of tables
48
49
 
49
50
  Additional commands:
50
51
 
51
- sched # create/delete/list schedules that run a query periodically
52
- schema # create/delete/modify schemas of tables
53
52
  status # show scheds, jobs, tables and results
54
53
  apikey # show/set API key
55
54
  server # show status of the Treasure Data server
@@ -141,12 +140,19 @@ EOF
141
140
  $stderr.puts "TreasureData account is not configured yet."
142
141
  $stderr.puts "Run '#{$prog} account' first."
143
142
  rescue => e
144
- $stderr.puts "error #{$!.class}: backtrace:"
145
- $!.backtrace.each {|b|
146
- $stderr.puts " #{b}"
147
- }
148
- puts ""
149
- puts $!
143
+ # work in progress look ahead development: new exceptions are rendered as simple
144
+ # error messages unless the TD_TOOLBELT_DEBUG variable is not empty.
145
+ # List of new exceptions:
146
+ # => ParameterConfigurationError
147
+ # => BulkImportExecutionError
148
+ unless [ParameterConfigurationError, BulkImportExecutionError].include?(e.class) && ENV['TD_TOOLBELT_DEBUG'].nil?
149
+ $stderr.puts "error #{$!.class}: backtrace:"
150
+ $!.backtrace.each {|b|
151
+ $stderr.puts " #{b}"
152
+ }
153
+ puts ""
154
+ end
155
+ puts "Error: " + $!.to_s
150
156
 
151
157
  require 'socket'
152
158
  if e.is_a?(::SocketError)
@@ -157,7 +163,9 @@ If you want to use td command through a proxy,
157
163
  please set HTTP_PROXY environment variable (e.g. export HTTP_PROXY="host:port")
158
164
  EOS
159
165
  end
166
+ return 1
160
167
  end
168
+ return 0
161
169
  end
162
170
  end
163
171
 
@@ -41,7 +41,13 @@ module Command
41
41
  op.on('-d', '--database DB_NAME', 'use the database (required)') {|s|
42
42
  db_name = s
43
43
  }
44
- op.on('-t', '--timezone TZ', 'name of the timezone (like Asia/Tokyo)') {|s|
44
+ op.on('-t', '--timezone TZ', "name of the timezone.",
45
+ " Only extended timezones like 'Asia/Tokyo', 'America/Los_Angeles' are supported,",
46
+ " (no 'PST', 'PDT', etc...).",
47
+ " When a timezone is specified, the cron schedule is referred to that timezone.",
48
+ " Otherwise, the cron schedule is referred to the UTC timezone.",
49
+ " E.g. cron schedule '0 12 * * *' will execute daily at 5 AM without timezone option",
50
+ " and at 12PM with the -t / --timezone 'America/Los_Angeles' timezone option") {|s|
45
51
  timezone = s
46
52
  }
47
53
  op.on('-D', '--delay SECONDS', 'delay time of the schedule', Integer) {|i|
@@ -97,7 +103,7 @@ module Command
97
103
 
98
104
  client = get_client
99
105
 
100
- # local existance check
106
+ # local existence check
101
107
  get_database(client, db_name)
102
108
 
103
109
  begin
@@ -153,7 +159,13 @@ module Command
153
159
  op.on('-r', '--result RESULT_TABLE', 'change the result table') {|s|
154
160
  result = s
155
161
  }
156
- op.on('-t', '--timezone TZ', 'change the name of the timezone (like Asia/Tokyo)') {|s|
162
+ op.on('-t', '--timezone TZ', "name of the timezone.",
163
+ " Only extended timezones like 'Asia/Tokyo', 'America/Los_Angeles' are supported,",
164
+ " (no 'PST', 'PDT', etc...).",
165
+ " When a timezone is specified, the cron schedule is referred to that timezone.",
166
+ " Otherwise, the cron schedule is referred to the UTC timezone.",
167
+ " E.g. cron schedule '0 12 * * *' will execute daily at 5 AM without timezone option",
168
+ " and at 12PM with the -t / --timezone 'America/Los_Angeles' timezone option") {|s|
157
169
  timezone = s
158
170
  }
159
171
  op.on('-D', '--delay SECONDS', 'change the delay time of the schedule', Integer) {|i|
@@ -74,7 +74,11 @@ module Command
74
74
  name = name.to_s
75
75
  type = type.to_s
76
76
 
77
- API.validate_column_name(name)
77
+ begin
78
+ API.validate_column_name(name)
79
+ rescue ParameterValidationError => e
80
+ raise ParameterConfigurationError, e
81
+ end
78
82
  #type = API.normalize_type_name(type)
79
83
 
80
84
  if schema.fields.find {|f| f.name == name }
@@ -34,7 +34,7 @@ module Command
34
34
  j = client.jobs(0, 4)
35
35
  j.each {|job|
36
36
  start = job.start_at
37
- elapsed = cmd_format_elapsed(start, job.end_at)
37
+ elapsed = humanize_elapsed_time(start, job.end_at)
38
38
  jobs << {:JobID => job.job_id, :Status => job.status, :Query => job.query.to_s, :Start => (start ? start.localtime : ''), :Elapsed => elapsed, :Result => job.result_url}
39
39
  }
40
40
  x2, y2 = status_render(0, 0, "[Jobs]", jobs, :fields => [:JobID, :Status, :Start, :Elapsed, :Result, :Query])
@@ -144,20 +144,27 @@ module Command
144
144
  databases = client.databases
145
145
  end
146
146
 
147
+ has_item = databases.select {|db| db.tables.select {|table| table.type == :item}.length > 0 }.length > 0
148
+
147
149
  rows = []
148
150
  ::Parallel.each(databases, :in_threads => num_threads) {|db|
149
151
  begin
152
+ db.tables.each {}
150
153
  db.tables.each {|table|
151
154
  pschema = table.schema.fields.map {|f|
152
155
  "#{f.name}:#{f.type}"
153
156
  }.join(', ')
154
- rows << {
157
+ new_row = {
155
158
  :Database => db.name, :Table => table.name, :Type => table.type.to_s, :Count => TreasureData::Helpers.format_with_delimiter(table.count),
156
159
  :Size => show_size_in_bytes ? TreasureData::Helpers.format_with_delimiter(table.estimated_storage_size) : table.estimated_storage_size_string,
157
160
  'Last import' => table.last_import ? table.last_import.localtime : nil,
158
161
  'Last log timestamp' => table.last_log_timestamp ? table.last_log_timestamp.localtime : nil,
159
162
  :Schema => pschema
160
163
  }
164
+ if has_item and table.type == :item
165
+ new_row['Primary key'] = "#{table.primary_key}:#{table.primary_key_type}"
166
+ end
167
+ rows << new_row
161
168
  }
162
169
  rescue APIError => e
163
170
  # ignores permission error because db:list shows all databases
@@ -171,7 +178,13 @@ module Command
171
178
  [map[:Database], map[:Type].size, map[:Table]]
172
179
  }
173
180
 
174
- puts cmd_render_table(rows, :fields => [:Database, :Table, :Type, :Count, :Size, 'Last import', 'Last log timestamp', :Schema], :max_width=>500, :render_format => op.render_format)
181
+ fields = []
182
+ if has_item
183
+ fields = [:Database, :Table, :Type, :Count, :Size, 'Last import', 'Last log timestamp', 'Primary key', :Schema]
184
+ else
185
+ fields = [:Database, :Table, :Type, :Count, :Size, 'Last import', 'Last log timestamp', :Schema]
186
+ end
187
+ puts cmd_render_table(rows, :fields => fields, :max_width => 500, :render_format => op.render_format)
175
188
 
176
189
  if rows.empty?
177
190
  if db_name
@@ -207,10 +220,12 @@ module Command
207
220
 
208
221
  table = get_table(client, db_name, table_name)
209
222
 
210
- puts "Name : #{table.db_name}.#{table.name}"
211
- puts "Type : #{table.type}"
212
- puts "Count : #{table.count}"
213
- puts "Schema : ("
223
+ puts "Name : #{table.db_name}.#{table.name}"
224
+ puts "Type : #{table.type}"
225
+ puts "Count : #{table.count}"
226
+ # p table.methods.each {|m| puts m}
227
+ puts "Primary key : #{table.primary_key}:#{table.primary_key_type}" if table.type == :item
228
+ puts "Schema : ("
214
229
  table.schema.fields.each {|f|
215
230
  puts " #{f.name}:#{f.type}"
216
231
  }
@@ -334,7 +349,7 @@ module Command
334
349
  to = nil
335
350
  wait = false
336
351
 
337
- op.on('-t', '--to TIME', 'end time of logs to delete') {|s|
352
+ op.on('-t', '--to TIME', 'end time of logs to delete in Unix time multiple of 3600 (1 hour)') {|s|
338
353
  if s.to_i.to_s == s
339
354
  # UNIX time
340
355
  to = s.to_i
@@ -343,7 +358,7 @@ module Command
343
358
  to = Time.parse(s).to_i
344
359
  end
345
360
  }
346
- op.on('-f', '--from TIME', 'start time of logs to delete') {|s|
361
+ op.on('-f', '--from TIME', 'start time of logs to delete in Unix time multiple of 3600 (1 hour)') {|s|
347
362
  if s.to_i.to_s == s
348
363
  from = s.to_i
349
364
  else
@@ -351,7 +366,7 @@ module Command
351
366
  from = Time.parse(s).to_i
352
367
  end
353
368
  }
354
- op.on('-w', '--wait', 'wait for finishing the job', TrueClass) {|b|
369
+ op.on('-w', '--wait', 'wait for the job to finish', TrueClass) {|b|
355
370
  wait = b
356
371
  }
357
372
 
@@ -368,7 +383,7 @@ module Command
368
383
  end
369
384
 
370
385
  if from % 3600 != 0 || to % 3600 != 0
371
- $stderr.puts "time must be a multiple of 3600 (1 hour)"
386
+ $stderr.puts "Time for the -f / --from and -t / --to options must be a multiple of 3600 (1 hour)"
372
387
  exit 1
373
388
  end
374
389
 
@@ -402,7 +417,7 @@ module Command
402
417
 
403
418
  $stderr.puts "Table set to expire data older than #{expire_days} days."
404
419
  end
405
-
420
+
406
421
 
407
422
  IMPORT_TEMPLATES = {
408
423
  'apache' => [
@@ -4,21 +4,20 @@ module TreasureData
4
4
  module Command
5
5
 
6
6
  def update(op)
7
- if TreasureData::Updater.disable?
8
- $stderr.puts TreasureData::Updater.disable_message
7
+ # for gem installation, this command is disallowed -
8
+ # it only works for the toolbelt.
9
+ if Updater.disable?
10
+ $stderr.puts Updater.disable_message
9
11
  exit
10
12
  end
11
13
 
12
- $stderr.puts <<EOS
13
- Updating started at #{Time.now}
14
- from #{TreasureData::VERSION}
15
- EOS
16
- if new_version = TreasureData::Updater.update
17
- $stderr.puts "updated to #{new_version}"
14
+ start_time = Time.now
15
+ puts "Updating 'td' from #{TOOLBELT_VERSION}..."
16
+ if new_version = Updater.update
17
+ puts "Successfully updated to #{new_version} in #{humanize_time((Time.now - start_time).to_i)}."
18
18
  else
19
- $stderr.puts "nothing to update"
19
+ puts "Nothing to update."
20
20
  end
21
- $stderr.puts "ended at #{Time.now}"
22
21
  end
23
22
 
24
23
  end