td 0.10.99 → 0.11.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -248,8 +248,8 @@ module List
248
248
  add_list 'import:list', %w[], 'List bulk import sessions', 'import:list'
249
249
  add_list 'import:show', %w[name], 'Show list of uploaded parts', 'import:show'
250
250
  add_list 'import:create', %w[name db table], 'Create a new bulk import session to the the table', 'import:create logs_201201 example_db event_logs'
251
- add_list 'import:java_version', %w[], 'Show version', 'import:java_version'
252
- add_list 'import:jar_update', %w[], 'Update import jar', 'import:jar_update'
251
+ add_list 'import:jar_version', %w[], 'Show import jar version', 'import:jar_version'
252
+ add_list 'import:jar_update', %w[], 'Update import jar to the latest version', 'import:jar_update'
253
253
  add_list 'import:prepare', %w[files_], 'Convert files into part file format', 'import:prepare logs/*.csv --format csv --columns time,uid,price,count --time-column "time" -o parts/'
254
254
  add_list 'import:upload', %w[name files_], 'Upload or re-upload files into a bulk import session', 'import:upload parts/* --parallel 4'
255
255
  add_list 'import:auto', %w[name files_], 'Upload files and automatically perform and commit the data', 'import:auto parts/* --parallel 4'
@@ -261,9 +261,9 @@ module List
261
261
  add_list 'import:unfreeze', %w[name], 'Unfreeze a frozen bulk import session', 'import:unfreeze logs_201201'
262
262
 
263
263
  add_list 'result:list', %w[], 'Show list of result URLs', 'result:list', 'results'
264
- add_list 'result:show', %w[name], 'Describe information of a result URL', 'result mydb'
265
- add_list 'result:create', %w[name URL], 'Create a result URL', 'result:create mydb mysql://my-server/mydb'
266
- add_list 'result:delete', %w[name], 'Delete a result URL', 'result:delete mydb'
264
+ add_list 'result:show', %w[name], 'Describe information of a result URL', 'result name'
265
+ add_list 'result:create', %w[name URL], 'Create a result URL', 'result:create name mysql://my-server/mydb'
266
+ add_list 'result:delete', %w[name], 'Delete a result URL', 'result:delete name'
267
267
 
268
268
  add_list 'status', %w[], 'Show schedules, jobs, tables and results', 'status', 's'
269
269
 
@@ -346,8 +346,10 @@ module List
346
346
  add_alias 'scheds', 'sched:list'
347
347
  add_alias 'schedules', 'sched:list'
348
348
 
349
- add_alias 'import', 'import:show'
350
- add_alias 'imports', 'import:list'
349
+ add_alias 'import', 'import:show'
350
+ add_alias 'imports', 'import:list'
351
+ add_alias 'import:java_version', 'import:jar_version'
352
+
351
353
 
352
354
  add_alias 'bulk_import', 'bulk_import:show'
353
355
  add_alias 'bulk_imports', 'bulk_import:list'
@@ -14,7 +14,6 @@ module Command
14
14
  priority = nil
15
15
  retry_limit = nil
16
16
  query = nil
17
- sampling_all = nil
18
17
  type = nil
19
18
  limit = nil
20
19
  exclude = false
@@ -38,7 +37,9 @@ module Command
38
37
  end
39
38
  format = s
40
39
  }
41
- op.on('-r', '--result RESULT_URL', 'write result to the URL (see also result:create subcommand)') {|s|
40
+ op.on('-r', '--result RESULT_URL', 'write result to the URL (see also result:create subcommand)',
41
+ ' It is suggested for this option to be used with the -x / --exclude option to suppress printing',
42
+ ' of the query result to stdout or -o / --output to dump the query result into a file.') {|s|
42
43
  result_url = s
43
44
  }
44
45
  op.on('-u', '--user NAME', 'set user name for the result URL') {|s|
@@ -62,8 +63,10 @@ module Command
62
63
  op.on('-T', '--type TYPE', 'set query type (hive, pig, impala, presto)') {|s|
63
64
  type = s.to_sym
64
65
  }
65
- op.on('--sampling DENOMINATOR', 'enable random sampling to reduce records 1/DENOMINATOR', Integer) {|i|
66
- sampling_all = i
66
+ op.on('--sampling DENOMINATOR', 'OBSOLETE - enable random sampling to reduce records 1/DENOMINATOR', Integer) {|i|
67
+ puts "WARNING: the random sampling feature enabled through the '--sampling' option was removed and does no longer"
68
+ puts " have any effect. It is left for backwards compatibility with older scripts using 'td'."
69
+ puts
67
70
  }
68
71
  op.on('-l', '--limit ROWS', 'limit the number of result rows shown when not outputting to file') {|s|
69
72
  unless s.to_i > 0
@@ -80,23 +83,11 @@ module Command
80
83
 
81
84
  sql = op.cmd_parse
82
85
 
83
- # parameter concurrency validation
84
-
85
- if output.nil? && format
86
- unless ['tsv', 'csv', 'json'].include?(format)
87
- raise "Supported formats are only tsv, csv and json without --output option"
88
- end
89
- end
90
-
91
- if render_opts[:header]
92
- unless ['tsv', 'csv'].include?(format)
93
- raise "Option -c / --column-header is only supported with tsv and csv formats"
94
- end
95
- end
86
+ # required parameters
96
87
 
97
88
  unless db_name
98
- $stderr.puts "-d, --database DB_NAME option is required."
99
- exit 1
89
+ raise ParameterConfigurationError,
90
+ "-d / --database DB_NAME option is required."
100
91
  end
101
92
 
102
93
  if sql == '-'
@@ -104,30 +95,47 @@ module Command
104
95
  elsif sql.nil?
105
96
  sql = query
106
97
  end
107
-
108
98
  unless sql
109
- $stderr.puts "<sql> argument or -q,--query PATH option is required."
110
- exit 1
99
+ raise ParameterConfigurationError,
100
+ "<sql> argument or -q / --query PATH option is required."
101
+ end
102
+
103
+ # parameter concurrency validation
104
+
105
+ if output.nil? && format
106
+ unless ['tsv', 'csv', 'json'].include?(format)
107
+ raise ParameterConfigurationError,
108
+ "Supported formats are only tsv, csv and json without --output option"
109
+ end
110
+ end
111
+
112
+ if render_opts[:header]
113
+ unless ['tsv', 'csv'].include?(format)
114
+ raise ParameterConfigurationError,
115
+ "Option -c / --column-header is only supported with tsv and csv formats"
116
+ end
111
117
  end
112
118
 
113
119
  if result_url
114
120
  require 'td/command/result'
115
121
  result_url = build_result_url(result_url, result_user, result_ask_password)
122
+ if result_url =~ /^td:/
123
+ validate_td_result_url(result_url)
124
+ end
116
125
  end
117
126
 
118
127
  client = get_client
119
128
 
120
- # local existance check
129
+ # local existence check
121
130
  get_database(client, db_name)
122
131
 
123
132
  opts = {}
124
- opts['sampling_all'] = sampling_all if sampling_all
125
133
  opts['type'] = type if type
126
134
  job = client.query(db_name, sql, result_url, priority, retry_limit, opts)
127
135
 
128
- $stderr.puts "Job #{job.job_id} is queued."
129
- $stderr.puts "Use '#{$prog} " + Config.cl_apikey_string + "job:show #{job.job_id}' to show the status."
130
- #$stderr.puts "See #{job.url} to see the progress."
136
+ puts "Job #{job.job_id} is queued."
137
+ puts "Use '#{$prog} " + Config.cl_apikey_string + "job:show #{job.job_id}' to show the status."
138
+ #puts "See #{job.url} to see the progress."
131
139
 
132
140
  if wait
133
141
  wait_job(job, true)
@@ -56,8 +56,7 @@ module Command
56
56
  }
57
57
 
58
58
  name, url = op.cmd_parse
59
-
60
- API.validate_database_name(name)
59
+ API.validate_result_set_name(name)
61
60
 
62
61
  client = get_client
63
62
 
@@ -123,6 +122,23 @@ module Command
123
122
 
124
123
  url
125
124
  end
125
+
126
+ private
127
+ def validate_td_result_url(url)
128
+ re = /td:\/\/[^@]*@\/(.*)\/(.*)?/
129
+ match = re.match(url)
130
+ if match.nil?
131
+ raise ParameterConfigurationError, "Treasure Data result output invalid URL format"
132
+ end
133
+ dbs = match[1]
134
+ tbl = match[2]
135
+ begin
136
+ API.validate_name("Treasure Data result output destination database", 3, 256, dbs)
137
+ API.validate_name("Treasure Data result output destination table", 3, 256, tbl)
138
+ rescue ParameterValidationError => e
139
+ raise ParameterConfigurationError, e
140
+ end
141
+ end
126
142
  end
127
143
  end
128
144
 
@@ -2,7 +2,6 @@
2
2
  module TreasureData
3
3
  module Command
4
4
 
5
-
6
5
  class Runner
7
6
  def initialize
8
7
  @config_path = nil
@@ -21,7 +20,7 @@ class Runner
21
20
  $prog = @prog_name || File.basename($0)
22
21
 
23
22
  op = OptionParser.new
24
- op.version = VERSION
23
+ op.version = TOOLBELT_VERSION
25
24
  op.banner = <<EOF
26
25
  usage: #{$prog} [options] COMMAND [args]
27
26
 
@@ -45,11 +44,11 @@ Basic commands:
45
44
  import # manage bulk import sessions (Java based fast processing)
46
45
  bulk_import # manage bulk import sessions (Old Ruby-based implementation)
47
46
  result # create/delete/list result URLs
47
+ sched # create/delete/list schedules that run a query periodically
48
+ schema # create/delete/modify schemas of tables
48
49
 
49
50
  Additional commands:
50
51
 
51
- sched # create/delete/list schedules that run a query periodically
52
- schema # create/delete/modify schemas of tables
53
52
  status # show scheds, jobs, tables and results
54
53
  apikey # show/set API key
55
54
  server # show status of the Treasure Data server
@@ -141,12 +140,19 @@ EOF
141
140
  $stderr.puts "TreasureData account is not configured yet."
142
141
  $stderr.puts "Run '#{$prog} account' first."
143
142
  rescue => e
144
- $stderr.puts "error #{$!.class}: backtrace:"
145
- $!.backtrace.each {|b|
146
- $stderr.puts " #{b}"
147
- }
148
- puts ""
149
- puts $!
143
+ # work in progress look ahead development: new exceptions are rendered as simple
144
+ # error messages unless the TD_TOOLBELT_DEBUG variable is not empty.
145
+ # List of new exceptions:
146
+ # => ParameterConfigurationError
147
+ # => BulkImportExecutionError
148
+ unless [ParameterConfigurationError, BulkImportExecutionError].include?(e.class) && ENV['TD_TOOLBELT_DEBUG'].nil?
149
+ $stderr.puts "error #{$!.class}: backtrace:"
150
+ $!.backtrace.each {|b|
151
+ $stderr.puts " #{b}"
152
+ }
153
+ puts ""
154
+ end
155
+ puts "Error: " + $!.to_s
150
156
 
151
157
  require 'socket'
152
158
  if e.is_a?(::SocketError)
@@ -157,7 +163,9 @@ If you want to use td command through a proxy,
157
163
  please set HTTP_PROXY environment variable (e.g. export HTTP_PROXY="host:port")
158
164
  EOS
159
165
  end
166
+ return 1
160
167
  end
168
+ return 0
161
169
  end
162
170
  end
163
171
 
@@ -41,7 +41,13 @@ module Command
41
41
  op.on('-d', '--database DB_NAME', 'use the database (required)') {|s|
42
42
  db_name = s
43
43
  }
44
- op.on('-t', '--timezone TZ', 'name of the timezone (like Asia/Tokyo)') {|s|
44
+ op.on('-t', '--timezone TZ', "name of the timezone.",
45
+ " Only extended timezones like 'Asia/Tokyo', 'America/Los_Angeles' are supported,",
46
+ " (no 'PST', 'PDT', etc...).",
47
+ " When a timezone is specified, the cron schedule is referred to that timezone.",
48
+ " Otherwise, the cron schedule is referred to the UTC timezone.",
49
+ " E.g. cron schedule '0 12 * * *' will execute daily at 5 AM without timezone option",
50
+ " and at 12PM with the -t / --timezone 'America/Los_Angeles' timezone option") {|s|
45
51
  timezone = s
46
52
  }
47
53
  op.on('-D', '--delay SECONDS', 'delay time of the schedule', Integer) {|i|
@@ -97,7 +103,7 @@ module Command
97
103
 
98
104
  client = get_client
99
105
 
100
- # local existance check
106
+ # local existence check
101
107
  get_database(client, db_name)
102
108
 
103
109
  begin
@@ -153,7 +159,13 @@ module Command
153
159
  op.on('-r', '--result RESULT_TABLE', 'change the result table') {|s|
154
160
  result = s
155
161
  }
156
- op.on('-t', '--timezone TZ', 'change the name of the timezone (like Asia/Tokyo)') {|s|
162
+ op.on('-t', '--timezone TZ', "name of the timezone.",
163
+ " Only extended timezones like 'Asia/Tokyo', 'America/Los_Angeles' are supported,",
164
+ " (no 'PST', 'PDT', etc...).",
165
+ " When a timezone is specified, the cron schedule is referred to that timezone.",
166
+ " Otherwise, the cron schedule is referred to the UTC timezone.",
167
+ " E.g. cron schedule '0 12 * * *' will execute daily at 5 AM without timezone option",
168
+ " and at 12PM with the -t / --timezone 'America/Los_Angeles' timezone option") {|s|
157
169
  timezone = s
158
170
  }
159
171
  op.on('-D', '--delay SECONDS', 'change the delay time of the schedule', Integer) {|i|
@@ -74,7 +74,11 @@ module Command
74
74
  name = name.to_s
75
75
  type = type.to_s
76
76
 
77
- API.validate_column_name(name)
77
+ begin
78
+ API.validate_column_name(name)
79
+ rescue ParameterValidationError => e
80
+ raise ParameterConfigurationError, e
81
+ end
78
82
  #type = API.normalize_type_name(type)
79
83
 
80
84
  if schema.fields.find {|f| f.name == name }
@@ -34,7 +34,7 @@ module Command
34
34
  j = client.jobs(0, 4)
35
35
  j.each {|job|
36
36
  start = job.start_at
37
- elapsed = cmd_format_elapsed(start, job.end_at)
37
+ elapsed = humanize_elapsed_time(start, job.end_at)
38
38
  jobs << {:JobID => job.job_id, :Status => job.status, :Query => job.query.to_s, :Start => (start ? start.localtime : ''), :Elapsed => elapsed, :Result => job.result_url}
39
39
  }
40
40
  x2, y2 = status_render(0, 0, "[Jobs]", jobs, :fields => [:JobID, :Status, :Start, :Elapsed, :Result, :Query])
@@ -144,20 +144,27 @@ module Command
144
144
  databases = client.databases
145
145
  end
146
146
 
147
+ has_item = databases.select {|db| db.tables.select {|table| table.type == :item}.length > 0 }.length > 0
148
+
147
149
  rows = []
148
150
  ::Parallel.each(databases, :in_threads => num_threads) {|db|
149
151
  begin
152
+ db.tables.each {}
150
153
  db.tables.each {|table|
151
154
  pschema = table.schema.fields.map {|f|
152
155
  "#{f.name}:#{f.type}"
153
156
  }.join(', ')
154
- rows << {
157
+ new_row = {
155
158
  :Database => db.name, :Table => table.name, :Type => table.type.to_s, :Count => TreasureData::Helpers.format_with_delimiter(table.count),
156
159
  :Size => show_size_in_bytes ? TreasureData::Helpers.format_with_delimiter(table.estimated_storage_size) : table.estimated_storage_size_string,
157
160
  'Last import' => table.last_import ? table.last_import.localtime : nil,
158
161
  'Last log timestamp' => table.last_log_timestamp ? table.last_log_timestamp.localtime : nil,
159
162
  :Schema => pschema
160
163
  }
164
+ if has_item and table.type == :item
165
+ new_row['Primary key'] = "#{table.primary_key}:#{table.primary_key_type}"
166
+ end
167
+ rows << new_row
161
168
  }
162
169
  rescue APIError => e
163
170
  # ignores permission error because db:list shows all databases
@@ -171,7 +178,13 @@ module Command
171
178
  [map[:Database], map[:Type].size, map[:Table]]
172
179
  }
173
180
 
174
- puts cmd_render_table(rows, :fields => [:Database, :Table, :Type, :Count, :Size, 'Last import', 'Last log timestamp', :Schema], :max_width=>500, :render_format => op.render_format)
181
+ fields = []
182
+ if has_item
183
+ fields = [:Database, :Table, :Type, :Count, :Size, 'Last import', 'Last log timestamp', 'Primary key', :Schema]
184
+ else
185
+ fields = [:Database, :Table, :Type, :Count, :Size, 'Last import', 'Last log timestamp', :Schema]
186
+ end
187
+ puts cmd_render_table(rows, :fields => fields, :max_width => 500, :render_format => op.render_format)
175
188
 
176
189
  if rows.empty?
177
190
  if db_name
@@ -207,10 +220,12 @@ module Command
207
220
 
208
221
  table = get_table(client, db_name, table_name)
209
222
 
210
- puts "Name : #{table.db_name}.#{table.name}"
211
- puts "Type : #{table.type}"
212
- puts "Count : #{table.count}"
213
- puts "Schema : ("
223
+ puts "Name : #{table.db_name}.#{table.name}"
224
+ puts "Type : #{table.type}"
225
+ puts "Count : #{table.count}"
226
+ # p table.methods.each {|m| puts m}
227
+ puts "Primary key : #{table.primary_key}:#{table.primary_key_type}" if table.type == :item
228
+ puts "Schema : ("
214
229
  table.schema.fields.each {|f|
215
230
  puts " #{f.name}:#{f.type}"
216
231
  }
@@ -334,7 +349,7 @@ module Command
334
349
  to = nil
335
350
  wait = false
336
351
 
337
- op.on('-t', '--to TIME', 'end time of logs to delete') {|s|
352
+ op.on('-t', '--to TIME', 'end time of logs to delete in Unix time multiple of 3600 (1 hour)') {|s|
338
353
  if s.to_i.to_s == s
339
354
  # UNIX time
340
355
  to = s.to_i
@@ -343,7 +358,7 @@ module Command
343
358
  to = Time.parse(s).to_i
344
359
  end
345
360
  }
346
- op.on('-f', '--from TIME', 'start time of logs to delete') {|s|
361
+ op.on('-f', '--from TIME', 'start time of logs to delete in Unix time multiple of 3600 (1 hour)') {|s|
347
362
  if s.to_i.to_s == s
348
363
  from = s.to_i
349
364
  else
@@ -351,7 +366,7 @@ module Command
351
366
  from = Time.parse(s).to_i
352
367
  end
353
368
  }
354
- op.on('-w', '--wait', 'wait for finishing the job', TrueClass) {|b|
369
+ op.on('-w', '--wait', 'wait for the job to finish', TrueClass) {|b|
355
370
  wait = b
356
371
  }
357
372
 
@@ -368,7 +383,7 @@ module Command
368
383
  end
369
384
 
370
385
  if from % 3600 != 0 || to % 3600 != 0
371
- $stderr.puts "time must be a multiple of 3600 (1 hour)"
386
+ $stderr.puts "Time for the -f / --from and -t / --to options must be a multiple of 3600 (1 hour)"
372
387
  exit 1
373
388
  end
374
389
 
@@ -402,7 +417,7 @@ module Command
402
417
 
403
418
  $stderr.puts "Table set to expire data older than #{expire_days} days."
404
419
  end
405
-
420
+
406
421
 
407
422
  IMPORT_TEMPLATES = {
408
423
  'apache' => [
@@ -4,21 +4,20 @@ module TreasureData
4
4
  module Command
5
5
 
6
6
  def update(op)
7
- if TreasureData::Updater.disable?
8
- $stderr.puts TreasureData::Updater.disable_message
7
+ # for gem installation, this command is disallowed -
8
+ # it only works for the toolbelt.
9
+ if Updater.disable?
10
+ $stderr.puts Updater.disable_message
9
11
  exit
10
12
  end
11
13
 
12
- $stderr.puts <<EOS
13
- Updating started at #{Time.now}
14
- from #{TreasureData::VERSION}
15
- EOS
16
- if new_version = TreasureData::Updater.update
17
- $stderr.puts "updated to #{new_version}"
14
+ start_time = Time.now
15
+ puts "Updating 'td' from #{TOOLBELT_VERSION}..."
16
+ if new_version = Updater.update
17
+ puts "Successfully updated to #{new_version} in #{humanize_time((Time.now - start_time).to_i)}."
18
18
  else
19
- $stderr.puts "nothing to update"
19
+ puts "Nothing to update."
20
20
  end
21
- $stderr.puts "ended at #{Time.now}"
22
21
  end
23
22
 
24
23
  end