td 0.10.51 → 0.10.52

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/ChangeLog CHANGED
@@ -1,4 +1,10 @@
1
1
 
2
+ == 2012-09-26 version 0.10.52
3
+
4
+ * bulk_import:upload_parts subcommand supports --parallel option to upload
5
+ files in parallel (default=2, max=8)
6
+
7
+
2
8
  == 2012-09-21 version 0.10.51
3
9
 
4
10
  * job:list and job:show show database name
@@ -104,6 +104,7 @@ module Command
104
104
  suffix_count = 0
105
105
  part_prefix = ""
106
106
  auto_perform = false
107
+ parallel = 2
107
108
 
108
109
  op.on('-P', '--prefix NAME', 'add prefix to parts name') {|s|
109
110
  part_prefix = s
@@ -114,20 +115,53 @@ module Command
114
115
  op.on('--auto-perform', 'perform bulk import job automatically', TrueClass) {|b|
115
116
  auto_perform = b
116
117
  }
118
+ op.on('--parallel NUM', 'perform uploading in parallel (default: 2; max 8)', Integer) {|i|
119
+ parallel = i
120
+ }
117
121
 
118
122
  name, *files = op.cmd_parse
119
123
 
120
- files.each {|ifname|
121
- basename = File.basename(ifname)
122
- part_name = part_prefix + basename.split('.')[0..suffix_count].join('.')
124
+ parallel = 1 if parallel <= 1
125
+ parallel = 8 if parallel >= 8
126
+
127
+ threads = (1..parallel).map {|i|
128
+ Thread.new do
129
+ errors = []
130
+ until files.empty?
131
+ ifname = files.shift
132
+ basename = File.basename(ifname)
133
+ begin
134
+ part_name = part_prefix + basename.split('.')[0..suffix_count].join('.')
135
+
136
+ File.open(ifname, "rb") {|io|
137
+ size = io.size
138
+ $stderr.write "Uploading '#{ifname}' -> '#{part_name}'... (#{size} bytes)\n"
139
+
140
+ bulk_import_upload_impl(name, part_name, io, size, retry_limit, retry_wait)
141
+ }
142
+ rescue
143
+ errors << [ifname, $!]
144
+ end
145
+ end
146
+ errors
147
+ end
148
+ }
123
149
 
124
- File.open(ifname, "rb") {|io|
125
- size = io.size
126
- $stderr.puts "Uploading '#{ifname}' -> '#{part_name}'... (#{size} bytes)"
150
+ errors = []
151
+ threads.each {|t|
152
+ errors.concat t.value
153
+ }
127
154
 
128
- bulk_import_upload_impl(name, part_name, io, size, retry_limit, retry_wait)
155
+ unless errors.empty?
156
+ $stderr.puts "failed to upload #{errors.size} files."
157
+ errors.each {|(ifname,ex)|
158
+ $stderr.puts " #{ifname}: #{ex}"
159
+ ex.backtrace.each {|bt|
160
+ $stderr.puts " #{ifname}: #{ex}"
161
+ }
129
162
  }
130
- }
163
+ exit 1
164
+ end
131
165
 
132
166
  $stderr.puts "done."
133
167
 
@@ -311,6 +345,7 @@ module Command
311
345
  end
312
346
  }
313
347
 
348
+ # TODO multi process
314
349
  files.each {|ifname|
315
350
  $stderr.puts "Processing #{ifname}..."
316
351
  record_num = 0
@@ -361,7 +396,7 @@ module Command
361
396
  rescue
362
397
  if retry_limit > 0
363
398
  retry_limit -= 1
364
- $stderr.puts "#{$!}; retrying '#{part_name}'..."
399
+ $stderr.write "#{$!}; retrying '#{part_name}'...\n"
365
400
  sleep retry_wait
366
401
  retry
367
402
  end
@@ -233,7 +233,7 @@ module List
233
233
  add_list 'bulk_import:create', %w[name db table], 'Create a new bulk import session to the the table', 'bulk_import:create logs_201201 example_db event_logs'
234
234
  add_list 'bulk_import:prepare_parts', %w[files_], 'Convert files into part file format', 'bulk_import:prepare_parts logs/*.csv --format csv --columns time,uid,price,count --time-column "time" -o parts/'
235
235
  add_list 'bulk_import:upload_part', %w[name id path.msgpack.gz], 'Upload or re-upload a file into a bulk import session', 'bulk_import:upload_part logs_201201 01h data-201201-01.msgpack.gz'
236
- add_list 'bulk_import:upload_parts', %w[name files_], 'Upload or re-upload files into a bulk import session', 'bulk_import:upload_parts parts/* --prefix logs_'
236
+ add_list 'bulk_import:upload_parts', %w[name files_], 'Upload or re-upload files into a bulk import session', 'bulk_import:upload_parts parts/* --parallel 4'
237
237
  add_list 'bulk_import:delete_part', %w[name id], 'Delete a uploaded file from a bulk import session', 'bulk_import:delete_part logs_201201 01h'
238
238
  add_list 'bulk_import:delete_parts', %w[name ids_], 'Delete uploaded files from a bulk import session', 'bulk_import:delete_parts logs_201201 01h 02h 03h'
239
239
  add_list 'bulk_import:perform', %w[name], 'Start to validate and convert uploaded files', 'bulk_import:perform logs_201201'
data/lib/td/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  module TreasureData
2
2
 
3
- VERSION = '0.10.51'
3
+ VERSION = '0.10.52'
4
4
 
5
5
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: td
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.10.51
4
+ version: 0.10.52
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-09-22 00:00:00.000000000 Z
12
+ date: 2012-09-27 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: msgpack