td 0.10.99 → 0.11.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,15 @@
1
+ ---
2
+ !binary "U0hBMQ==":
3
+ metadata.gz: !binary |-
4
+ MjhhYWI5NWE1ZmI3ZTkwZjY0OTNhMGE3N2VjNjUzNzk2ZWY2OGJkYQ==
5
+ data.tar.gz: !binary |-
6
+ NDZmMDJlMDQyZmViMjY5NTNmMTE0ODg5ZWE2MGYxZjJhYTg4Y2VhZQ==
7
+ SHA512:
8
+ metadata.gz: !binary |-
9
+ NGZmMTk4ZTk5ZWFhYzc4YmEyZTE0ZmI0MjA2ODYzOWQxNzQ0ZTc5OGRjZDhh
10
+ OTI0NjJiZjA3MDhiZmFjNjg3MThjNjI1ZjcyZjNiOTNiOWU5MjQ2M2FhOWRh
11
+ ZTI4OGI2ZTk1YTEwYmVkZGJmNGU4Y2YzYTY1ZDc5MmNkYTI5ZTQ=
12
+ data.tar.gz: !binary |-
13
+ NjNiNTI4MGI4MDY0MGYzMjZiMTBmNzBhYzJmNjNmZjcxZDgxZWVjNTgzYzUx
14
+ ZGZiMzViYmQ0ZjQwNWIwZmI5MWQ5ZmExYTA3MzZkZDlkY2RmNTEzNDMxYzNj
15
+ ZGJhZTFlZDExYjc4YzdkYzUwZmJjNjc3NTlkZGQ5OTI2MDQ4NjY=
data/.gitignore CHANGED
@@ -1,5 +1,8 @@
1
1
  .bundle
2
+ .DS_Store
2
3
  build/td-import-java
3
4
  Gemfile.lock
4
5
  vendor/*
6
+ pkg/
7
+ build/*
5
8
  *~
data/ChangeLog CHANGED
@@ -1,10 +1,39 @@
1
+ == 2014-04-29 version 0.11.1
2
+
3
+ * Fix Treasure Data query result output database and table validation
4
+ regular expression
5
+ * Fixed auto updater which threw a warning due to a constant being
6
+ reinitialized
7
+ * Interpret the CPU time as number of milliseconds as opposed to
8
+ number of seconds in the job:show output
9
+ * when outputting the query result to file, flush the data every
10
+ 100 records
11
+
12
+ == 2014-04-23 version 0.11.0
13
+
14
+ * Show cumulative CPU time in the job:list and job:show outputs
15
+ * The error message when the specified schema has columns containing upper case
16
+ alphanumeric characters is improved to be more representative of the problem
17
+ * 'td query' commands with result output to Treasure Data (--result td://xxxx)
18
+ validate the database and table naming convention before running the query
19
+ * The Java bulk import JAR file is now auto-updated. Checking for an updated
20
+ version is performed hourly
21
+ * 'td query' commands with result output specification invite the user to use
22
+ the '-x / --exclude' option to avoid outputting the query result to stdout as
23
+ well
24
+ * The 'Destination' field in the summary for the Bulk import perform output
25
+ from the jobs:show command contains the destination table name in the form of
26
+ a LOAD DATA SESSION query
27
+ * Returning the correct non-zero error codes in the occurrence of an exception
28
+ * Declare the 'td query' option '--sampling' obsolete. A warning indicating the
29
+ option is obsolete and has no effect will be printed to warn the user
30
+
1
31
  == 2014-02-26 version 0.10.99
2
32
 
3
33
  * job:show, query: limit the number of records ouputted when printing on stdout using the -l / --limit option
4
34
  * job:show, query: optionally output/store csv and tsv files with headers using the -c / --column-names option
5
35
  * job:show, query: support result output from Presto
6
36
 
7
-
8
37
  == 2014-02-21 version 0.10.98
9
38
 
10
39
  * import:unfreeze: fixed NoMethodError error
data/Rakefile CHANGED
@@ -11,7 +11,7 @@ require "erb"
11
11
 
12
12
  def version
13
13
  require project_root_path('lib/td/version')
14
- TreasureData::VERSION
14
+ TreasureData::TOOLBELT_VERSION
15
15
  end
16
16
 
17
17
  task "jar" do
data/bin/td CHANGED
@@ -1,17 +1,34 @@
1
1
  #!/usr/bin/env ruby
2
2
  # -*- coding: utf-8 -*-
3
+
3
4
  require 'rubygems' unless defined?(gem)
4
5
  gem 'td-client'
5
6
  here = File.dirname(__FILE__)
6
7
  $LOAD_PATH << File.expand_path(File.join(here, '..', 'lib'))
7
8
 
9
+ # disable the updater for the td gem
8
10
  require 'td/updater'
9
11
  TreasureData::Updater.disable(<<EOS
10
- `td update` is only available from Treasure Data Toolbelt.
11
- Download and install from http://toolbelt.treasure-data.com
12
+
13
+ `td update` is only available from the Treasure Data Toolbelt.
14
+ You can download and install it from http://toolbelt.treasure-data.com.
15
+
16
+ It appers you are running the `td` gem. To update the gem to the latest
17
+ version, please run `gem update td`.
18
+
19
+ Please note that if you install `td` with `bundler` in a Gemfile/Gemspec
20
+ federated environment, you will need to upgrade the reference version for
21
+ `td` in the Gemfile/Gemspec for the updated `td` version to be used after
22
+ updating it.
23
+
12
24
  EOS
13
25
  )
14
26
 
27
+ # start up the CLI
15
28
  require 'td/command/runner'
16
- TreasureData::Command::Runner.new.run ARGV
17
-
29
+ ev = TreasureData::Command::Runner.new.run ARGV
30
+ unless ev.nil?
31
+ exit ev
32
+ else
33
+ puts "No exit status"
34
+ end
@@ -27,4 +27,4 @@ TreasureData::Updater.inject_libpath
27
27
 
28
28
  # start up the CLI
29
29
  require 'td/command/runner'
30
- TreasureData::Command::Runner.new.run ARGV
30
+ exit TreasureData::Command::Runner.new.run ARGV
@@ -26,5 +26,5 @@ require 'td/updater'
26
26
  TreasureData::Updater.inject_libpath
27
27
 
28
28
  # start up the CLI
29
- require "td/command/runner"
30
- TreasureData::Command::Runner.new.run ARGV
29
+ require 'td/command/runner'
30
+ exit TreasureData::Command::Runner.new.run ARGV
@@ -35,6 +35,7 @@ handlers= java.util.logging.FileHandler
35
35
 
36
36
  java.util.logging.FileHandler.level = INFO
37
37
  java.util.logging.FileHandler.pattern=td-bulk-import.log
38
+ java.util.logging.FileHandler.append=true
38
39
  java.util.logging.FileHandler.limit = 50000
39
40
  java.util.logging.FileHandler.count = 1
40
41
  java.util.logging.FileHandler.formatter = java.util.logging.SimpleFormatter
@@ -11,6 +11,12 @@ autoload :Job, 'td/client'
11
11
 
12
12
  module Command
13
13
 
14
+ class ParameterConfigurationError < ArgumentError
15
+ end
16
+
17
+ class BulkImportExecutionError < ArgumentError
18
+ end
19
+
14
20
  private
15
21
  def initialize
16
22
  @render_indent = ''
@@ -24,7 +30,7 @@ module Command
24
30
  unless apikey
25
31
  raise ConfigError, "Account is not configured."
26
32
  end
27
- opts[:user_agent] = "TD: #{TreasureData::VERSION}"
33
+ opts[:user_agent] = "TD: #{TOOLBELT_VERSION}"
28
34
  if h = ENV['TD_API_HEADERS']
29
35
  pairs = h.split("\n")
30
36
  opts[:headers] = Hash[pairs.map {|pair| pair.split('=', 2) }]
@@ -105,30 +111,53 @@ EOS
105
111
  end
106
112
  end
107
113
 
108
- def cmd_format_elapsed(start, finish)
114
+ def humanize_time(time, is_ms = false)
115
+ if time.nil?
116
+ return ''
117
+ end
118
+
119
+ time = time.to_i
120
+ millisecs = nil
121
+ elapsed = ''
122
+
123
+ if is_ms
124
+ # store the first 3 decimals
125
+ millisecs = time % 1000
126
+ time /= 1000
127
+ end
128
+
129
+ if time >= 3600
130
+ elapsed << "#{time / 3600}h "
131
+ time %= 3600
132
+ elapsed << "%dm " % (time / 60)
133
+ time %= 60
134
+ elapsed << "%ds" % time
135
+ elsif time >= 60
136
+ elapsed << "%dm " % (time / 60)
137
+ time %= 60
138
+ elapsed << "%ds" % time
139
+ elsif time > 0
140
+ elapsed << "%ds" % time
141
+ end
142
+
143
+ if is_ms and millisecs > 0
144
+ elapsed << " %03dms" % millisecs
145
+ end
146
+
147
+ elapsed
148
+ end
149
+
150
+ # assumed to
151
+ def humanize_elapsed_time(start, finish)
109
152
  if start
110
153
  if !finish
111
154
  finish = Time.now.utc
112
155
  end
113
- e = finish.to_i - start.to_i
114
- elapsed = ''
115
- if e >= 3600
116
- elapsed << "#{e/3600}h "
117
- e %= 3600
118
- elapsed << "%2dm " % (e/60)
119
- e %= 60
120
- elapsed << "%2dsec" % e
121
- elsif e >= 60
122
- elapsed << "%2dm " % (e/60)
123
- e %= 60
124
- elapsed << "%2dsec" % e
125
- else
126
- elapsed << "%2dsec" % e
127
- end
156
+ elapsed = humanize_time(finish.to_i - start.to_i, false)
128
157
  else
129
158
  elapsed = ''
130
159
  end
131
- elapsed = "% 13s" % elapsed # right aligned
160
+ elapsed
132
161
  end
133
162
 
134
163
  def get_database(client, db_name)
@@ -9,7 +9,7 @@ module Command
9
9
  s3_bucket = nil
10
10
  aws_access_key_id = nil
11
11
  aws_secret_access_key = nil
12
- file_format = nil
12
+ file_format = "json.gz" # default
13
13
 
14
14
  op.on('-f', '--from TIME', 'export data which is newer than or same with the TIME') {|s|
15
15
  from = export_parse_time(s)
@@ -26,7 +26,7 @@ module Command
26
26
  op.on('-s', '--aws-secret-key SECRET_KEY', 'AWS secret access key to export data (required)') {|s|
27
27
  aws_secret_access_key = s
28
28
  }
29
- op.on('-F', '--file-format FILE_FORMAT', 'file format for exported data, either json.gz (default) or line-json.gz') { |s|
29
+ op.on('-F', '--file-format FILE_FORMAT', 'file format for exported data, either json.gz (default) or line-json.gz') { |s|
30
30
  raise ArgumentError, "#{s} is not a supported file format" unless SUPPORTED_FORMATS.include?(s)
31
31
  file_format = s
32
32
  }
@@ -1,10 +1,9 @@
1
1
  require 'td/updater'
2
+ require 'time'
2
3
 
3
4
  module TreasureData
4
5
  module Command
5
-
6
- BASE_PATH = File.expand_path('../../..', File.dirname(__FILE__))
7
- UPDATED_PATH = File.join(Updater.home_directory, ".td", "java")
6
+ include TreasureData::Updater
8
7
 
9
8
  JAVA_COMMAND = "java"
10
9
  JAVA_MAIN_CLASS = "com.treasure_data.td_import.BulkImportCommand"
@@ -25,37 +24,13 @@ module Command
25
24
  bulk_import_create(op)
26
25
  end
27
26
 
28
- def import_java_version(op)
29
- vfile = find_version_file[0]
30
- puts "td-import-java #{File.open(vfile, 'r').read}"
27
+ def import_jar_version(op)
28
+ version = find_version_file
29
+ puts "td-import-java #{File.open(version, 'r').read}"
31
30
  end
32
31
 
33
32
  def import_jar_update(op)
34
- last_updated = existence_jar_updated_time
35
-
36
- require 'rexml/document'
37
- require 'open-uri'
38
- require 'fileutils'
39
-
40
- doc = REXML::Document.new(open('http://maven.treasure-data.com/com/treasure_data/td-import/maven-metadata.xml') { |f| f.read })
41
- updated = Time.strptime(REXML::XPath.match(doc, '/metadata/versioning/lastUpdated').first.text, "%Y%m%d%H%M%S")
42
- version = REXML::XPath.match(doc, '/metadata/versioning/release').first.text
43
-
44
- # Convert into UTF to compare time correctly
45
- updated = (updated + updated.gmt_offset).utc unless updated.gmt?
46
- last_updated = last_updated.utc unless last_updated.gmt?
47
-
48
- if updated > last_updated
49
- FileUtils.mkdir_p(UPDATED_PATH) unless File.exist?(UPDATED_PATH)
50
- File.open(File.join(UPDATED_PATH, 'VERSION'), 'w') { |f| f.print "#{version} via import:jar_update" }
51
- File.open(File.join(UPDATED_PATH, 'td-import-java.version'), 'w') { |f| f.print "#{version} #{updated}" }
52
- File.open(File.join(UPDATED_PATH, 'td-import.jar'), 'wb') { |f|
53
- f.print Updater.fetch("http://maven.treasure-data.com/com/treasure_data/td-import/#{version}/td-import-#{version}-jar-with-dependencies.jar")
54
- }
55
- puts "Installed td-import.jar #{version} into #{UPDATED_PATH}"
56
- else
57
- puts 'Installed td-import.jar is latest version'
58
- end
33
+ check_n_update_jar(false)
59
34
  end
60
35
 
61
36
  def import_prepare(op)
@@ -100,8 +75,14 @@ module Command
100
75
  bulk_import_unfreeze(op)
101
76
  end
102
77
 
78
+ #
79
+ # Module private methods - don't map to import:* commands
80
+ #
81
+
103
82
  private
104
83
  def import_by_java(subcmd)
84
+ check_n_update_jar(true)
85
+
105
86
  # check java runtime exists or not
106
87
  check_java
107
88
 
@@ -122,6 +103,10 @@ module Command
122
103
 
123
104
  cmd = [JAVA_COMMAND] + JVM_OPTS + java_args
124
105
  system(*cmd)
106
+ if $?.exitstatus != 0
107
+ raise BulkImportExecutionError,
108
+ "Bulk Import returned error #{$?.exitstatus}. Please check the 'td-bulk-import.log' logfile for details."
109
+ end
125
110
  end
126
111
 
127
112
  private
@@ -136,20 +121,10 @@ module Command
136
121
 
137
122
  unless $?.success?
138
123
  $stderr.puts "Java is not installed. 'td import' command requires Java (version 1.6 or later)."
139
- $stderr.puts "Alternatively, you can use 'bulk_import' commands instead which is much slower."
140
- exit 1
141
- end
142
- end
143
-
144
- private
145
- def find_td_import_jar
146
- libjars = find_files('*.jar')
147
- found = libjars.find { |path| File.basename(path) =~ /^td-import/ }
148
- if found.nil?
149
- $stderr.puts "td-import.jar is not found."
124
+ $stderr.puts "Alternatively, you can use the 'bulk_import' commands."
125
+ $stderr.puts "Since they are implemented in Ruby, they perform significantly slower."
150
126
  exit 1
151
127
  end
152
- found
153
128
  end
154
129
 
155
130
  private
@@ -158,13 +133,12 @@ module Command
158
133
 
159
134
  # set apiserver
160
135
  set_sysprops_endpoint(sysprops)
161
-
162
136
  # set http_proxy
163
137
  set_sysprops_http_proxy(sysprops)
164
138
 
165
139
  # set configuration file for logging
166
- conf_file = try_find_logging_conf_file
167
- if conf_file
140
+ conf_file = find_logging_property
141
+ unless conf_file.empty?
168
142
  sysprops << "-Djava.util.logging.config.file=#{conf_file}"
169
143
  end
170
144
 
@@ -223,30 +197,5 @@ module Command
223
197
  end
224
198
  end
225
199
 
226
- private
227
- def try_find_logging_conf_file
228
- libjars = Dir.glob("#{BASE_PATH}/java/**/*.properties")
229
- libjars.find { |path| File.basename(path) =~ /^logging.properties/ }
230
- end
231
-
232
- private
233
- def find_version_file
234
- vfile = find_files('VERSION')
235
- vfile
236
- end
237
-
238
- def existence_jar_updated_time
239
- require 'time'
240
-
241
- content = File.open(find_files("td-import-java.version").first).read
242
- index = content.index(' ')
243
- Time.parse(content[index + 1..-1].strip)
244
- end
245
-
246
- def find_files(target)
247
- files = Dir.glob("#{UPDATED_PATH}/**/#{target}")
248
- return files unless files.empty?
249
- Dir.glob("#{BASE_PATH}/java/**/#{target}")
250
- end
251
200
  end
252
201
  end
@@ -67,17 +67,33 @@ module Command
67
67
  conditions = {:slower_than => slower_than}
68
68
  end
69
69
 
70
- jobs = client.jobs(skip, skip+max-1, status, conditions)
70
+ jobs = client.jobs(skip, skip + max - 1, status, conditions)
71
71
 
72
72
  rows = []
73
73
  jobs.each {|job|
74
74
  start = job.start_at
75
- elapsed = cmd_format_elapsed(start, job.end_at)
75
+ elapsed = humanize_elapsed_time(start, job.end_at)
76
+ cpu_time = humanize_time(job.cpu_time, true)
76
77
  priority = job_priority_name_of(job.priority)
77
- rows << {:JobID => job.job_id, :Database => job.db_name, :Status => job.status, :Type => job.type, :Query => job.query.to_s, :Start => (start ? start.localtime : ''), :Elapsed => elapsed, :Priority => priority, :Result => job.result_url}
78
+ rows << {
79
+ :JobID => job.job_id,
80
+ :Database => job.db_name,
81
+ :Status => job.status,
82
+ :Type => job.type,
83
+ :Query => job.query.to_s[0,50] + " ...",
84
+ :Start => (start ? start.localtime : ''),
85
+ :Elapsed => elapsed.rjust(11),
86
+ :CPUTime => cpu_time.rjust(17),
87
+ :Priority => priority,
88
+ :Result => job.result_url
89
+ }
78
90
  }
79
91
 
80
- puts cmd_render_table(rows, :fields => [:JobID, :Status, :Start, :Elapsed, :Priority, :Result, :Type, :Database, :Query], :max_width => 140, :render_format => op.render_format)
92
+ puts cmd_render_table(rows,
93
+ :fields => [:JobID, :Status, :Start, :Elapsed, :CPUTime, :Priority, :Result, :Type, :Database, :Query],
94
+ :max_width => 1000,
95
+ :render_format => op.render_format
96
+ )
81
97
  end
82
98
 
83
99
  def job_show(op)
@@ -102,9 +118,9 @@ module Command
102
118
  output = s
103
119
  format = 'tsv' if format.nil?
104
120
  }
105
- op.on('-f', '--format FORMAT', 'format of the result to write to the file (tsv, csv, json or msgpack)') {|s|
121
+ op.on('-f', '--format FORMAT', 'format of the result to write to the file (tsv, csv, json, msgpack, and msgpack.gz)') {|s|
106
122
  unless ['tsv', 'csv', 'json', 'msgpack', 'msgpack.gz'].include?(s)
107
- raise "Unknown format #{s.dump}. Supported format: tsv, csv, json, msgpack, msgpack.gz"
123
+ raise "Unknown format #{s.dump}. Supported formats are: tsv, csv, json, msgpack, and msgpack.gz"
108
124
  end
109
125
  format = s
110
126
  }
@@ -114,7 +130,8 @@ module Command
114
130
  end
115
131
  limit = s.to_i
116
132
  }
117
- op.on('-c', '--column-header', 'output of the columns\' header when the schema is available for the table (only applies to tsv and csv formats)', TrueClass) {|b|
133
+ op.on('-c', '--column-header', 'output of the columns\' header when the schema is available',
134
+ ' for the table (only applies to tsv and csv formats)', TrueClass) {|b|
118
135
  render_opts[:header] = b;
119
136
  }
120
137
  op.on('-x', '--exclude', 'do not automatically retrieve the job result', TrueClass) {|b|
@@ -127,19 +144,21 @@ module Command
127
144
 
128
145
  if output.nil? && format
129
146
  unless ['tsv', 'csv', 'json'].include?(format)
130
- raise "Supported formats are only tsv, csv and json without -o / --output option"
147
+ raise ParameterConfigurationError,
148
+ "Supported formats are only tsv, csv and json without -o / --output option"
131
149
  end
132
150
  end
133
151
 
134
152
  if render_opts[:header]
135
153
  unless ['tsv', 'csv'].include?(format)
136
- raise "Option -c / --column-header is only supported with tsv and csv formats"
154
+ raise ParameterConfigurationError,
155
+ "Option -c / --column-header is only supported with tsv and csv formats"
137
156
  end
138
157
  end
139
158
 
140
159
  if !output.nil? && !limit.nil?
141
- raise "Option -l / --limit is only valid when not outputting to file " +
142
- "(no -o / --output option provided)"
160
+ raise ParameterConfigurationError,
161
+ "Option -l / --limit is only valid when not outputting to file (no -o / --output option provided)"
143
162
  end
144
163
 
145
164
  client = get_client
@@ -157,6 +176,12 @@ module Command
157
176
  puts "Retry limit : #{job.retry_limit}"
158
177
  puts "Output : #{job.result_url}"
159
178
  puts "Query : #{job.query}"
179
+ elsif job.type == :bulk_import_perform
180
+ puts "Destination : #{job.query}"
181
+ end
182
+ # if the job is done and is of type hive, show the Map-Reduce cumulated CPU time
183
+ if job.finished? && [:hive].include?(job.type)
184
+ puts "CPU time : #{humanize_time(job.cpu_time, true)}"
160
185
  end
161
186
 
162
187
  if wait && !job.finished?
@@ -183,14 +208,14 @@ module Command
183
208
  if verbose
184
209
  if !job.debug['cmdout'].nil?
185
210
  puts ""
186
- puts "cmdout:"
211
+ puts "Output:"
187
212
  job.debug['cmdout'].to_s.split("\n").each {|line|
188
213
  puts " " + line
189
214
  }
190
215
  end
191
216
  if !job.debug['stderr'].nil?
192
217
  puts ""
193
- puts "stderr:"
218
+ puts "Details:"
194
219
  job.debug['stderr'].to_s.split("\n").each {|line|
195
220
  puts " " + line
196
221
  }
@@ -198,7 +223,7 @@ module Command
198
223
  end
199
224
  end
200
225
 
201
- $stderr.puts "Use '-v' option to show detailed messages." unless verbose
226
+ puts "Use '-v' option to show detailed messages." unless verbose
202
227
  end
203
228
 
204
229
  def job_status(op)
@@ -262,12 +287,33 @@ module Command
262
287
  write_result(job, output, limit, format, render_opts)
263
288
  puts "written to #{output} in #{format} format"
264
289
  else
290
+ # every format that is allowed on stdout
265
291
  render_result(job, limit, format, render_opts)
266
292
  end
267
293
  end
268
294
 
269
295
  def write_result(job, output, limit, format, render_opts={})
270
296
 
297
+ # start progress indicator
298
+ line_len = 0
299
+ start_time = last_time = Time.now.to_i
300
+ base_msg = "WARNING: the query result is being written"
301
+ if !output.nil?
302
+ msg = base_msg + "\r"
303
+ line_len += msg.length
304
+ print msg
305
+ end
306
+
307
+ # this lambda is passed on as a block when outputing the results to file
308
+ progress = lambda {
309
+ if (time = Time.now.to_i) - last_time > 5
310
+ msg = base_msg + ": #{humanize_time(time - start_time)} elapsed" + " " * 10 + "\r"
311
+ line_len = msg.length
312
+ print msg
313
+ last_time = time
314
+ end
315
+ }
316
+
271
317
  # the next 3 formats allow writing to both a file and stdout
272
318
 
273
319
  case format
@@ -281,6 +327,9 @@ module Command
281
327
  f.write Yajl.dump(row)
282
328
  n_rows += 1
283
329
  break if output.nil? and !limit.nil? and n_rows == limit
330
+
331
+ # update progress indicator
332
+ progress.call if !output.nil?
284
333
  }
285
334
  f.write "]"
286
335
  }
@@ -306,12 +355,17 @@ module Command
306
355
  dump_column(col)
307
356
  }
308
357
  n_rows += 1
358
+ writer.flush if n_rows % 100 == 0 # flush every 100 recods
309
359
  break if output.nil? and !limit.nil? and n_rows == limit
360
+
361
+ # update progress indicator
362
+ progress.call if !output.nil?
310
363
  }
311
364
  }
312
365
 
313
366
  when 'tsv'
314
367
  require 'yajl'
368
+
315
369
  open_file(output, "w") {|f|
316
370
  # output headers
317
371
  if render_opts[:header] && job.hive_result_schema
@@ -332,25 +386,43 @@ module Command
332
386
  }
333
387
  f.write "\n"
334
388
  n_rows += 1
389
+ f.flush if n_rows % 100 == 0 # flush every 100 recods
335
390
  break if output.nil? and !limit.nil? and n_rows == limit
391
+
392
+ # update progress indicator
393
+ progress.call if !output.nil?
336
394
  }
337
395
  }
338
396
 
339
397
  # these last 2 formats are only valid if writing the result to file through the -o/--output option.
340
398
 
341
399
  when 'msgpack'
400
+ if output.nil?
401
+ raise ParameterConfigurationError,
402
+ "Format 'msgpack' does not support writing to stdout"
403
+ end
342
404
  open_file(output, "wb") {|f|
343
- job.result_format('msgpack', f)
405
+ job.result_format('msgpack', f, &progress)
344
406
  }
345
407
 
346
408
  when 'msgpack.gz'
409
+ if output.nil?
410
+ raise ParameterConfigurationError,
411
+ "Format 'msgpack' does not support writing to stdout"
412
+ end
347
413
  open_file(output, "wb") {|f|
348
- job.result_format('msgpack.gz', f)
414
+ job.result_format('msgpack.gz', f, &progress)
349
415
  }
350
416
 
351
417
  else
352
418
  raise "Unknown format #{format.inspect}"
353
419
  end
420
+
421
+ # clear the progress indicator
422
+ if !output.nil?
423
+ print "\r"
424
+ print " " * (line_len + 5) + "\r"
425
+ end
354
426
  end
355
427
 
356
428
  def open_file(output, mode)
@@ -371,6 +443,14 @@ module Command
371
443
  require 'yajl'
372
444
 
373
445
  if format.nil?
446
+
447
+ # start progress indicator
448
+ start_time = last_time = Time.now.to_i
449
+ base_msg = "WARNING: the query result is being downloaded"
450
+ msg = base_msg + "\r"
451
+ line_len = msg.length
452
+ print msg
453
+
374
454
  # display result in tabular format
375
455
  rows = []
376
456
  n_rows = 0
@@ -381,8 +461,21 @@ module Command
381
461
  }
382
462
  n_rows += 1
383
463
  break if !limit.nil? and n_rows == limit
464
+
465
+ # progress indication
466
+ time = Time.now.to_i
467
+ if time - last_time > 5
468
+ msg = base_msg + ": #{humanize_time(time - start_time)} elapsed" + " " * 10 + "\r"
469
+ line_len = msg.length
470
+ print msg
471
+ last_time = time
472
+ end
384
473
  }
385
474
 
475
+ # clear the progress indicator
476
+ print "\r"
477
+ print " " * (line_len + 5) + "\r"
478
+
386
479
  render_opts[:max_width] = 10000
387
480
  if job.hive_result_schema
388
481
  render_opts[:change_fields] = job.hive_result_schema.map { |name,type| name }