td 0.10.99 → 0.11.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,15 @@
1
+ ---
2
+ !binary "U0hBMQ==":
3
+ metadata.gz: !binary |-
4
+ MjhhYWI5NWE1ZmI3ZTkwZjY0OTNhMGE3N2VjNjUzNzk2ZWY2OGJkYQ==
5
+ data.tar.gz: !binary |-
6
+ NDZmMDJlMDQyZmViMjY5NTNmMTE0ODg5ZWE2MGYxZjJhYTg4Y2VhZQ==
7
+ SHA512:
8
+ metadata.gz: !binary |-
9
+ NGZmMTk4ZTk5ZWFhYzc4YmEyZTE0ZmI0MjA2ODYzOWQxNzQ0ZTc5OGRjZDhh
10
+ OTI0NjJiZjA3MDhiZmFjNjg3MThjNjI1ZjcyZjNiOTNiOWU5MjQ2M2FhOWRh
11
+ ZTI4OGI2ZTk1YTEwYmVkZGJmNGU4Y2YzYTY1ZDc5MmNkYTI5ZTQ=
12
+ data.tar.gz: !binary |-
13
+ NjNiNTI4MGI4MDY0MGYzMjZiMTBmNzBhYzJmNjNmZjcxZDgxZWVjNTgzYzUx
14
+ ZGZiMzViYmQ0ZjQwNWIwZmI5MWQ5ZmExYTA3MzZkZDlkY2RmNTEzNDMxYzNj
15
+ ZGJhZTFlZDExYjc4YzdkYzUwZmJjNjc3NTlkZGQ5OTI2MDQ4NjY=
data/.gitignore CHANGED
@@ -1,5 +1,8 @@
1
1
  .bundle
2
+ .DS_Store
2
3
  build/td-import-java
3
4
  Gemfile.lock
4
5
  vendor/*
6
+ pkg/
7
+ build/*
5
8
  *~
data/ChangeLog CHANGED
@@ -1,10 +1,39 @@
1
+ == 2014-04-29 version 0.11.1
2
+
3
+ * Fix Treasure Data query result output database and table validation
4
+ regular expression
5
+ * Fixed auto updater which threw a warning due to a constant being
6
+ reinitialized
7
+ * Interpret the CPU time as number of milliseconds as opposed to
8
+ number of seconds in the job:show output
9
+ * when outputting the query result to file, flush the data every
10
+ 100 records
11
+
12
+ == 2014-04-23 version 0.11.0
13
+
14
+ * Show cumulative CPU time in the job:list and job:show outputs
15
+ * The error message when the specified schema has columns containing upper case
16
+ alphanumeric characters is improved to be more representative of the problem
17
+ * 'td query' commands with result output to Treasure Data (--result td://xxxx)
18
+ validate the database and table naming convention before running the query
19
+ * The Java bulk import JAR file is now auto-updated. Checking for an updated
20
+ version is performed hourly
21
+ * 'td query' commands with result output specification invite the user to use
22
+ the '-x / --exclude' option to avoid outputting the query result to stdout as
23
+ well
24
+ * The 'Destination' field in the summary for the Bulk import perform output
25
+ from the jobs:show command contains the destination table name in the form of
26
+ a LOAD DATA SESSION query
27
+ * Returning the correct non-zero error codes in the occurrence of an exception
28
+ * Declare the 'td query' option '--sampling' obsolete. A warning indicating the
29
+ option is obsolete and has no effect will be printed to warn the user
30
+
1
31
  == 2014-02-26 version 0.10.99
2
32
 
3
33
  * job:show, query: limit the number of records ouputted when printing on stdout using the -l / --limit option
4
34
  * job:show, query: optionally output/store csv and tsv files with headers using the -c / --column-names option
5
35
  * job:show, query: support result output from Presto
6
36
 
7
-
8
37
  == 2014-02-21 version 0.10.98
9
38
 
10
39
  * import:unfreeze: fixed NoMethodError error
data/Rakefile CHANGED
@@ -11,7 +11,7 @@ require "erb"
11
11
 
12
12
  def version
13
13
  require project_root_path('lib/td/version')
14
- TreasureData::VERSION
14
+ TreasureData::TOOLBELT_VERSION
15
15
  end
16
16
 
17
17
  task "jar" do
data/bin/td CHANGED
@@ -1,17 +1,34 @@
1
1
  #!/usr/bin/env ruby
2
2
  # -*- coding: utf-8 -*-
3
+
3
4
  require 'rubygems' unless defined?(gem)
4
5
  gem 'td-client'
5
6
  here = File.dirname(__FILE__)
6
7
  $LOAD_PATH << File.expand_path(File.join(here, '..', 'lib'))
7
8
 
9
+ # disable the updater for the td gem
8
10
  require 'td/updater'
9
11
  TreasureData::Updater.disable(<<EOS
10
- `td update` is only available from Treasure Data Toolbelt.
11
- Download and install from http://toolbelt.treasure-data.com
12
+
13
+ `td update` is only available from the Treasure Data Toolbelt.
14
+ You can download and install it from http://toolbelt.treasure-data.com.
15
+
16
+ It appers you are running the `td` gem. To update the gem to the latest
17
+ version, please run `gem update td`.
18
+
19
+ Please note that if you install `td` with `bundler` in a Gemfile/Gemspec
20
+ federated environment, you will need to upgrade the reference version for
21
+ `td` in the Gemfile/Gemspec for the updated `td` version to be used after
22
+ updating it.
23
+
12
24
  EOS
13
25
  )
14
26
 
27
+ # start up the CLI
15
28
  require 'td/command/runner'
16
- TreasureData::Command::Runner.new.run ARGV
17
-
29
+ ev = TreasureData::Command::Runner.new.run ARGV
30
+ unless ev.nil?
31
+ exit ev
32
+ else
33
+ puts "No exit status"
34
+ end
@@ -27,4 +27,4 @@ TreasureData::Updater.inject_libpath
27
27
 
28
28
  # start up the CLI
29
29
  require 'td/command/runner'
30
- TreasureData::Command::Runner.new.run ARGV
30
+ exit TreasureData::Command::Runner.new.run ARGV
@@ -26,5 +26,5 @@ require 'td/updater'
26
26
  TreasureData::Updater.inject_libpath
27
27
 
28
28
  # start up the CLI
29
- require "td/command/runner"
30
- TreasureData::Command::Runner.new.run ARGV
29
+ require 'td/command/runner'
30
+ exit TreasureData::Command::Runner.new.run ARGV
@@ -35,6 +35,7 @@ handlers= java.util.logging.FileHandler
35
35
 
36
36
  java.util.logging.FileHandler.level = INFO
37
37
  java.util.logging.FileHandler.pattern=td-bulk-import.log
38
+ java.util.logging.FileHandler.append=true
38
39
  java.util.logging.FileHandler.limit = 50000
39
40
  java.util.logging.FileHandler.count = 1
40
41
  java.util.logging.FileHandler.formatter = java.util.logging.SimpleFormatter
@@ -11,6 +11,12 @@ autoload :Job, 'td/client'
11
11
 
12
12
  module Command
13
13
 
14
+ class ParameterConfigurationError < ArgumentError
15
+ end
16
+
17
+ class BulkImportExecutionError < ArgumentError
18
+ end
19
+
14
20
  private
15
21
  def initialize
16
22
  @render_indent = ''
@@ -24,7 +30,7 @@ module Command
24
30
  unless apikey
25
31
  raise ConfigError, "Account is not configured."
26
32
  end
27
- opts[:user_agent] = "TD: #{TreasureData::VERSION}"
33
+ opts[:user_agent] = "TD: #{TOOLBELT_VERSION}"
28
34
  if h = ENV['TD_API_HEADERS']
29
35
  pairs = h.split("\n")
30
36
  opts[:headers] = Hash[pairs.map {|pair| pair.split('=', 2) }]
@@ -105,30 +111,53 @@ EOS
105
111
  end
106
112
  end
107
113
 
108
- def cmd_format_elapsed(start, finish)
114
+ def humanize_time(time, is_ms = false)
115
+ if time.nil?
116
+ return ''
117
+ end
118
+
119
+ time = time.to_i
120
+ millisecs = nil
121
+ elapsed = ''
122
+
123
+ if is_ms
124
+ # store the first 3 decimals
125
+ millisecs = time % 1000
126
+ time /= 1000
127
+ end
128
+
129
+ if time >= 3600
130
+ elapsed << "#{time / 3600}h "
131
+ time %= 3600
132
+ elapsed << "%dm " % (time / 60)
133
+ time %= 60
134
+ elapsed << "%ds" % time
135
+ elsif time >= 60
136
+ elapsed << "%dm " % (time / 60)
137
+ time %= 60
138
+ elapsed << "%ds" % time
139
+ elsif time > 0
140
+ elapsed << "%ds" % time
141
+ end
142
+
143
+ if is_ms and millisecs > 0
144
+ elapsed << " %03dms" % millisecs
145
+ end
146
+
147
+ elapsed
148
+ end
149
+
150
+ # assumed to
151
+ def humanize_elapsed_time(start, finish)
109
152
  if start
110
153
  if !finish
111
154
  finish = Time.now.utc
112
155
  end
113
- e = finish.to_i - start.to_i
114
- elapsed = ''
115
- if e >= 3600
116
- elapsed << "#{e/3600}h "
117
- e %= 3600
118
- elapsed << "%2dm " % (e/60)
119
- e %= 60
120
- elapsed << "%2dsec" % e
121
- elsif e >= 60
122
- elapsed << "%2dm " % (e/60)
123
- e %= 60
124
- elapsed << "%2dsec" % e
125
- else
126
- elapsed << "%2dsec" % e
127
- end
156
+ elapsed = humanize_time(finish.to_i - start.to_i, false)
128
157
  else
129
158
  elapsed = ''
130
159
  end
131
- elapsed = "% 13s" % elapsed # right aligned
160
+ elapsed
132
161
  end
133
162
 
134
163
  def get_database(client, db_name)
@@ -9,7 +9,7 @@ module Command
9
9
  s3_bucket = nil
10
10
  aws_access_key_id = nil
11
11
  aws_secret_access_key = nil
12
- file_format = nil
12
+ file_format = "json.gz" # default
13
13
 
14
14
  op.on('-f', '--from TIME', 'export data which is newer than or same with the TIME') {|s|
15
15
  from = export_parse_time(s)
@@ -26,7 +26,7 @@ module Command
26
26
  op.on('-s', '--aws-secret-key SECRET_KEY', 'AWS secret access key to export data (required)') {|s|
27
27
  aws_secret_access_key = s
28
28
  }
29
- op.on('-F', '--file-format FILE_FORMAT', 'file format for exported data, either json.gz (default) or line-json.gz') { |s|
29
+ op.on('-F', '--file-format FILE_FORMAT', 'file format for exported data, either json.gz (default) or line-json.gz') { |s|
30
30
  raise ArgumentError, "#{s} is not a supported file format" unless SUPPORTED_FORMATS.include?(s)
31
31
  file_format = s
32
32
  }
@@ -1,10 +1,9 @@
1
1
  require 'td/updater'
2
+ require 'time'
2
3
 
3
4
  module TreasureData
4
5
  module Command
5
-
6
- BASE_PATH = File.expand_path('../../..', File.dirname(__FILE__))
7
- UPDATED_PATH = File.join(Updater.home_directory, ".td", "java")
6
+ include TreasureData::Updater
8
7
 
9
8
  JAVA_COMMAND = "java"
10
9
  JAVA_MAIN_CLASS = "com.treasure_data.td_import.BulkImportCommand"
@@ -25,37 +24,13 @@ module Command
25
24
  bulk_import_create(op)
26
25
  end
27
26
 
28
- def import_java_version(op)
29
- vfile = find_version_file[0]
30
- puts "td-import-java #{File.open(vfile, 'r').read}"
27
+ def import_jar_version(op)
28
+ version = find_version_file
29
+ puts "td-import-java #{File.open(version, 'r').read}"
31
30
  end
32
31
 
33
32
  def import_jar_update(op)
34
- last_updated = existence_jar_updated_time
35
-
36
- require 'rexml/document'
37
- require 'open-uri'
38
- require 'fileutils'
39
-
40
- doc = REXML::Document.new(open('http://maven.treasure-data.com/com/treasure_data/td-import/maven-metadata.xml') { |f| f.read })
41
- updated = Time.strptime(REXML::XPath.match(doc, '/metadata/versioning/lastUpdated').first.text, "%Y%m%d%H%M%S")
42
- version = REXML::XPath.match(doc, '/metadata/versioning/release').first.text
43
-
44
- # Convert into UTF to compare time correctly
45
- updated = (updated + updated.gmt_offset).utc unless updated.gmt?
46
- last_updated = last_updated.utc unless last_updated.gmt?
47
-
48
- if updated > last_updated
49
- FileUtils.mkdir_p(UPDATED_PATH) unless File.exist?(UPDATED_PATH)
50
- File.open(File.join(UPDATED_PATH, 'VERSION'), 'w') { |f| f.print "#{version} via import:jar_update" }
51
- File.open(File.join(UPDATED_PATH, 'td-import-java.version'), 'w') { |f| f.print "#{version} #{updated}" }
52
- File.open(File.join(UPDATED_PATH, 'td-import.jar'), 'wb') { |f|
53
- f.print Updater.fetch("http://maven.treasure-data.com/com/treasure_data/td-import/#{version}/td-import-#{version}-jar-with-dependencies.jar")
54
- }
55
- puts "Installed td-import.jar #{version} into #{UPDATED_PATH}"
56
- else
57
- puts 'Installed td-import.jar is latest version'
58
- end
33
+ check_n_update_jar(false)
59
34
  end
60
35
 
61
36
  def import_prepare(op)
@@ -100,8 +75,14 @@ module Command
100
75
  bulk_import_unfreeze(op)
101
76
  end
102
77
 
78
+ #
79
+ # Module private methods - don't map to import:* commands
80
+ #
81
+
103
82
  private
104
83
  def import_by_java(subcmd)
84
+ check_n_update_jar(true)
85
+
105
86
  # check java runtime exists or not
106
87
  check_java
107
88
 
@@ -122,6 +103,10 @@ module Command
122
103
 
123
104
  cmd = [JAVA_COMMAND] + JVM_OPTS + java_args
124
105
  system(*cmd)
106
+ if $?.exitstatus != 0
107
+ raise BulkImportExecutionError,
108
+ "Bulk Import returned error #{$?.exitstatus}. Please check the 'td-bulk-import.log' logfile for details."
109
+ end
125
110
  end
126
111
 
127
112
  private
@@ -136,20 +121,10 @@ module Command
136
121
 
137
122
  unless $?.success?
138
123
  $stderr.puts "Java is not installed. 'td import' command requires Java (version 1.6 or later)."
139
- $stderr.puts "Alternatively, you can use 'bulk_import' commands instead which is much slower."
140
- exit 1
141
- end
142
- end
143
-
144
- private
145
- def find_td_import_jar
146
- libjars = find_files('*.jar')
147
- found = libjars.find { |path| File.basename(path) =~ /^td-import/ }
148
- if found.nil?
149
- $stderr.puts "td-import.jar is not found."
124
+ $stderr.puts "Alternatively, you can use the 'bulk_import' commands."
125
+ $stderr.puts "Since they are implemented in Ruby, they perform significantly slower."
150
126
  exit 1
151
127
  end
152
- found
153
128
  end
154
129
 
155
130
  private
@@ -158,13 +133,12 @@ module Command
158
133
 
159
134
  # set apiserver
160
135
  set_sysprops_endpoint(sysprops)
161
-
162
136
  # set http_proxy
163
137
  set_sysprops_http_proxy(sysprops)
164
138
 
165
139
  # set configuration file for logging
166
- conf_file = try_find_logging_conf_file
167
- if conf_file
140
+ conf_file = find_logging_property
141
+ unless conf_file.empty?
168
142
  sysprops << "-Djava.util.logging.config.file=#{conf_file}"
169
143
  end
170
144
 
@@ -223,30 +197,5 @@ module Command
223
197
  end
224
198
  end
225
199
 
226
- private
227
- def try_find_logging_conf_file
228
- libjars = Dir.glob("#{BASE_PATH}/java/**/*.properties")
229
- libjars.find { |path| File.basename(path) =~ /^logging.properties/ }
230
- end
231
-
232
- private
233
- def find_version_file
234
- vfile = find_files('VERSION')
235
- vfile
236
- end
237
-
238
- def existence_jar_updated_time
239
- require 'time'
240
-
241
- content = File.open(find_files("td-import-java.version").first).read
242
- index = content.index(' ')
243
- Time.parse(content[index + 1..-1].strip)
244
- end
245
-
246
- def find_files(target)
247
- files = Dir.glob("#{UPDATED_PATH}/**/#{target}")
248
- return files unless files.empty?
249
- Dir.glob("#{BASE_PATH}/java/**/#{target}")
250
- end
251
200
  end
252
201
  end
@@ -67,17 +67,33 @@ module Command
67
67
  conditions = {:slower_than => slower_than}
68
68
  end
69
69
 
70
- jobs = client.jobs(skip, skip+max-1, status, conditions)
70
+ jobs = client.jobs(skip, skip + max - 1, status, conditions)
71
71
 
72
72
  rows = []
73
73
  jobs.each {|job|
74
74
  start = job.start_at
75
- elapsed = cmd_format_elapsed(start, job.end_at)
75
+ elapsed = humanize_elapsed_time(start, job.end_at)
76
+ cpu_time = humanize_time(job.cpu_time, true)
76
77
  priority = job_priority_name_of(job.priority)
77
- rows << {:JobID => job.job_id, :Database => job.db_name, :Status => job.status, :Type => job.type, :Query => job.query.to_s, :Start => (start ? start.localtime : ''), :Elapsed => elapsed, :Priority => priority, :Result => job.result_url}
78
+ rows << {
79
+ :JobID => job.job_id,
80
+ :Database => job.db_name,
81
+ :Status => job.status,
82
+ :Type => job.type,
83
+ :Query => job.query.to_s[0,50] + " ...",
84
+ :Start => (start ? start.localtime : ''),
85
+ :Elapsed => elapsed.rjust(11),
86
+ :CPUTime => cpu_time.rjust(17),
87
+ :Priority => priority,
88
+ :Result => job.result_url
89
+ }
78
90
  }
79
91
 
80
- puts cmd_render_table(rows, :fields => [:JobID, :Status, :Start, :Elapsed, :Priority, :Result, :Type, :Database, :Query], :max_width => 140, :render_format => op.render_format)
92
+ puts cmd_render_table(rows,
93
+ :fields => [:JobID, :Status, :Start, :Elapsed, :CPUTime, :Priority, :Result, :Type, :Database, :Query],
94
+ :max_width => 1000,
95
+ :render_format => op.render_format
96
+ )
81
97
  end
82
98
 
83
99
  def job_show(op)
@@ -102,9 +118,9 @@ module Command
102
118
  output = s
103
119
  format = 'tsv' if format.nil?
104
120
  }
105
- op.on('-f', '--format FORMAT', 'format of the result to write to the file (tsv, csv, json or msgpack)') {|s|
121
+ op.on('-f', '--format FORMAT', 'format of the result to write to the file (tsv, csv, json, msgpack, and msgpack.gz)') {|s|
106
122
  unless ['tsv', 'csv', 'json', 'msgpack', 'msgpack.gz'].include?(s)
107
- raise "Unknown format #{s.dump}. Supported format: tsv, csv, json, msgpack, msgpack.gz"
123
+ raise "Unknown format #{s.dump}. Supported formats are: tsv, csv, json, msgpack, and msgpack.gz"
108
124
  end
109
125
  format = s
110
126
  }
@@ -114,7 +130,8 @@ module Command
114
130
  end
115
131
  limit = s.to_i
116
132
  }
117
- op.on('-c', '--column-header', 'output of the columns\' header when the schema is available for the table (only applies to tsv and csv formats)', TrueClass) {|b|
133
+ op.on('-c', '--column-header', 'output of the columns\' header when the schema is available',
134
+ ' for the table (only applies to tsv and csv formats)', TrueClass) {|b|
118
135
  render_opts[:header] = b;
119
136
  }
120
137
  op.on('-x', '--exclude', 'do not automatically retrieve the job result', TrueClass) {|b|
@@ -127,19 +144,21 @@ module Command
127
144
 
128
145
  if output.nil? && format
129
146
  unless ['tsv', 'csv', 'json'].include?(format)
130
- raise "Supported formats are only tsv, csv and json without -o / --output option"
147
+ raise ParameterConfigurationError,
148
+ "Supported formats are only tsv, csv and json without -o / --output option"
131
149
  end
132
150
  end
133
151
 
134
152
  if render_opts[:header]
135
153
  unless ['tsv', 'csv'].include?(format)
136
- raise "Option -c / --column-header is only supported with tsv and csv formats"
154
+ raise ParameterConfigurationError,
155
+ "Option -c / --column-header is only supported with tsv and csv formats"
137
156
  end
138
157
  end
139
158
 
140
159
  if !output.nil? && !limit.nil?
141
- raise "Option -l / --limit is only valid when not outputting to file " +
142
- "(no -o / --output option provided)"
160
+ raise ParameterConfigurationError,
161
+ "Option -l / --limit is only valid when not outputting to file (no -o / --output option provided)"
143
162
  end
144
163
 
145
164
  client = get_client
@@ -157,6 +176,12 @@ module Command
157
176
  puts "Retry limit : #{job.retry_limit}"
158
177
  puts "Output : #{job.result_url}"
159
178
  puts "Query : #{job.query}"
179
+ elsif job.type == :bulk_import_perform
180
+ puts "Destination : #{job.query}"
181
+ end
182
+ # if the job is done and is of type hive, show the Map-Reduce cumulated CPU time
183
+ if job.finished? && [:hive].include?(job.type)
184
+ puts "CPU time : #{humanize_time(job.cpu_time, true)}"
160
185
  end
161
186
 
162
187
  if wait && !job.finished?
@@ -183,14 +208,14 @@ module Command
183
208
  if verbose
184
209
  if !job.debug['cmdout'].nil?
185
210
  puts ""
186
- puts "cmdout:"
211
+ puts "Output:"
187
212
  job.debug['cmdout'].to_s.split("\n").each {|line|
188
213
  puts " " + line
189
214
  }
190
215
  end
191
216
  if !job.debug['stderr'].nil?
192
217
  puts ""
193
- puts "stderr:"
218
+ puts "Details:"
194
219
  job.debug['stderr'].to_s.split("\n").each {|line|
195
220
  puts " " + line
196
221
  }
@@ -198,7 +223,7 @@ module Command
198
223
  end
199
224
  end
200
225
 
201
- $stderr.puts "Use '-v' option to show detailed messages." unless verbose
226
+ puts "Use '-v' option to show detailed messages." unless verbose
202
227
  end
203
228
 
204
229
  def job_status(op)
@@ -262,12 +287,33 @@ module Command
262
287
  write_result(job, output, limit, format, render_opts)
263
288
  puts "written to #{output} in #{format} format"
264
289
  else
290
+ # every format that is allowed on stdout
265
291
  render_result(job, limit, format, render_opts)
266
292
  end
267
293
  end
268
294
 
269
295
  def write_result(job, output, limit, format, render_opts={})
270
296
 
297
+ # start progress indicator
298
+ line_len = 0
299
+ start_time = last_time = Time.now.to_i
300
+ base_msg = "WARNING: the query result is being written"
301
+ if !output.nil?
302
+ msg = base_msg + "\r"
303
+ line_len += msg.length
304
+ print msg
305
+ end
306
+
307
+ # this lambda is passed on as a block when outputing the results to file
308
+ progress = lambda {
309
+ if (time = Time.now.to_i) - last_time > 5
310
+ msg = base_msg + ": #{humanize_time(time - start_time)} elapsed" + " " * 10 + "\r"
311
+ line_len = msg.length
312
+ print msg
313
+ last_time = time
314
+ end
315
+ }
316
+
271
317
  # the next 3 formats allow writing to both a file and stdout
272
318
 
273
319
  case format
@@ -281,6 +327,9 @@ module Command
281
327
  f.write Yajl.dump(row)
282
328
  n_rows += 1
283
329
  break if output.nil? and !limit.nil? and n_rows == limit
330
+
331
+ # update progress indicator
332
+ progress.call if !output.nil?
284
333
  }
285
334
  f.write "]"
286
335
  }
@@ -306,12 +355,17 @@ module Command
306
355
  dump_column(col)
307
356
  }
308
357
  n_rows += 1
358
+ writer.flush if n_rows % 100 == 0 # flush every 100 recods
309
359
  break if output.nil? and !limit.nil? and n_rows == limit
360
+
361
+ # update progress indicator
362
+ progress.call if !output.nil?
310
363
  }
311
364
  }
312
365
 
313
366
  when 'tsv'
314
367
  require 'yajl'
368
+
315
369
  open_file(output, "w") {|f|
316
370
  # output headers
317
371
  if render_opts[:header] && job.hive_result_schema
@@ -332,25 +386,43 @@ module Command
332
386
  }
333
387
  f.write "\n"
334
388
  n_rows += 1
389
+ f.flush if n_rows % 100 == 0 # flush every 100 recods
335
390
  break if output.nil? and !limit.nil? and n_rows == limit
391
+
392
+ # update progress indicator
393
+ progress.call if !output.nil?
336
394
  }
337
395
  }
338
396
 
339
397
  # these last 2 formats are only valid if writing the result to file through the -o/--output option.
340
398
 
341
399
  when 'msgpack'
400
+ if output.nil?
401
+ raise ParameterConfigurationError,
402
+ "Format 'msgpack' does not support writing to stdout"
403
+ end
342
404
  open_file(output, "wb") {|f|
343
- job.result_format('msgpack', f)
405
+ job.result_format('msgpack', f, &progress)
344
406
  }
345
407
 
346
408
  when 'msgpack.gz'
409
+ if output.nil?
410
+ raise ParameterConfigurationError,
411
+ "Format 'msgpack' does not support writing to stdout"
412
+ end
347
413
  open_file(output, "wb") {|f|
348
- job.result_format('msgpack.gz', f)
414
+ job.result_format('msgpack.gz', f, &progress)
349
415
  }
350
416
 
351
417
  else
352
418
  raise "Unknown format #{format.inspect}"
353
419
  end
420
+
421
+ # clear the progress indicator
422
+ if !output.nil?
423
+ print "\r"
424
+ print " " * (line_len + 5) + "\r"
425
+ end
354
426
  end
355
427
 
356
428
  def open_file(output, mode)
@@ -371,6 +443,14 @@ module Command
371
443
  require 'yajl'
372
444
 
373
445
  if format.nil?
446
+
447
+ # start progress indicator
448
+ start_time = last_time = Time.now.to_i
449
+ base_msg = "WARNING: the query result is being downloaded"
450
+ msg = base_msg + "\r"
451
+ line_len = msg.length
452
+ print msg
453
+
374
454
  # display result in tabular format
375
455
  rows = []
376
456
  n_rows = 0
@@ -381,8 +461,21 @@ module Command
381
461
  }
382
462
  n_rows += 1
383
463
  break if !limit.nil? and n_rows == limit
464
+
465
+ # progress indication
466
+ time = Time.now.to_i
467
+ if time - last_time > 5
468
+ msg = base_msg + ": #{humanize_time(time - start_time)} elapsed" + " " * 10 + "\r"
469
+ line_len = msg.length
470
+ print msg
471
+ last_time = time
472
+ end
384
473
  }
385
474
 
475
+ # clear the progress indicator
476
+ print "\r"
477
+ print " " * (line_len + 5) + "\r"
478
+
386
479
  render_opts[:max_width] = 10000
387
480
  if job.hive_result_schema
388
481
  render_opts[:change_fields] = job.hive_result_schema.map { |name,type| name }