td 0.10.51 → 0.10.52

Sign up to get free protection for your applications and to get access to all the features.
data/ChangeLog CHANGED
@@ -1,4 +1,10 @@
1
1
 
2
+ == 2012-09-26 version 0.10.52
3
+
4
+ * bulk_import:upload_parts subcommand supports --parallel option to upload
5
+ files in parallel (default=2, max=8)
6
+
7
+
2
8
  == 2012-09-21 version 0.10.51
3
9
 
4
10
  * job:list and job:show show database name
@@ -104,6 +104,7 @@ module Command
104
104
  suffix_count = 0
105
105
  part_prefix = ""
106
106
  auto_perform = false
107
+ parallel = 2
107
108
 
108
109
  op.on('-P', '--prefix NAME', 'add prefix to parts name') {|s|
109
110
  part_prefix = s
@@ -114,20 +115,53 @@ module Command
114
115
  op.on('--auto-perform', 'perform bulk import job automatically', TrueClass) {|b|
115
116
  auto_perform = b
116
117
  }
118
+ op.on('--parallel NUM', 'perform uploading in parallel (default: 2; max 8)', Integer) {|i|
119
+ parallel = i
120
+ }
117
121
 
118
122
  name, *files = op.cmd_parse
119
123
 
120
- files.each {|ifname|
121
- basename = File.basename(ifname)
122
- part_name = part_prefix + basename.split('.')[0..suffix_count].join('.')
124
+ parallel = 1 if parallel <= 1
125
+ parallel = 8 if parallel >= 8
126
+
127
+ threads = (1..parallel).map {|i|
128
+ Thread.new do
129
+ errors = []
130
+ until files.empty?
131
+ ifname = files.shift
132
+ basename = File.basename(ifname)
133
+ begin
134
+ part_name = part_prefix + basename.split('.')[0..suffix_count].join('.')
135
+
136
+ File.open(ifname, "rb") {|io|
137
+ size = io.size
138
+ $stderr.write "Uploading '#{ifname}' -> '#{part_name}'... (#{size} bytes)\n"
139
+
140
+ bulk_import_upload_impl(name, part_name, io, size, retry_limit, retry_wait)
141
+ }
142
+ rescue
143
+ errors << [ifname, $!]
144
+ end
145
+ end
146
+ errors
147
+ end
148
+ }
123
149
 
124
- File.open(ifname, "rb") {|io|
125
- size = io.size
126
- $stderr.puts "Uploading '#{ifname}' -> '#{part_name}'... (#{size} bytes)"
150
+ errors = []
151
+ threads.each {|t|
152
+ errors.concat t.value
153
+ }
127
154
 
128
- bulk_import_upload_impl(name, part_name, io, size, retry_limit, retry_wait)
155
+ unless errors.empty?
156
+ $stderr.puts "failed to upload #{errors.size} files."
157
+ errors.each {|(ifname,ex)|
158
+ $stderr.puts " #{ifname}: #{ex}"
159
+ ex.backtrace.each {|bt|
160
+ $stderr.puts " #{ifname}: #{ex}"
161
+ }
129
162
  }
130
- }
163
+ exit 1
164
+ end
131
165
 
132
166
  $stderr.puts "done."
133
167
 
@@ -311,6 +345,7 @@ module Command
311
345
  end
312
346
  }
313
347
 
348
+ # TODO multi process
314
349
  files.each {|ifname|
315
350
  $stderr.puts "Processing #{ifname}..."
316
351
  record_num = 0
@@ -361,7 +396,7 @@ module Command
361
396
  rescue
362
397
  if retry_limit > 0
363
398
  retry_limit -= 1
364
- $stderr.puts "#{$!}; retrying '#{part_name}'..."
399
+ $stderr.write "#{$!}; retrying '#{part_name}'...\n"
365
400
  sleep retry_wait
366
401
  retry
367
402
  end
@@ -233,7 +233,7 @@ module List
233
233
  add_list 'bulk_import:create', %w[name db table], 'Create a new bulk import session to the the table', 'bulk_import:create logs_201201 example_db event_logs'
234
234
  add_list 'bulk_import:prepare_parts', %w[files_], 'Convert files into part file format', 'bulk_import:prepare_parts logs/*.csv --format csv --columns time,uid,price,count --time-column "time" -o parts/'
235
235
  add_list 'bulk_import:upload_part', %w[name id path.msgpack.gz], 'Upload or re-upload a file into a bulk import session', 'bulk_import:upload_part logs_201201 01h data-201201-01.msgpack.gz'
236
- add_list 'bulk_import:upload_parts', %w[name files_], 'Upload or re-upload files into a bulk import session', 'bulk_import:upload_parts parts/* --prefix logs_'
236
+ add_list 'bulk_import:upload_parts', %w[name files_], 'Upload or re-upload files into a bulk import session', 'bulk_import:upload_parts parts/* --parallel 4'
237
237
  add_list 'bulk_import:delete_part', %w[name id], 'Delete a uploaded file from a bulk import session', 'bulk_import:delete_part logs_201201 01h'
238
238
  add_list 'bulk_import:delete_parts', %w[name ids_], 'Delete uploaded files from a bulk import session', 'bulk_import:delete_parts logs_201201 01h 02h 03h'
239
239
  add_list 'bulk_import:perform', %w[name], 'Start to validate and convert uploaded files', 'bulk_import:perform logs_201201'
data/lib/td/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  module TreasureData
2
2
 
3
- VERSION = '0.10.51'
3
+ VERSION = '0.10.52'
4
4
 
5
5
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: td
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.10.51
4
+ version: 0.10.52
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-09-22 00:00:00.000000000 Z
12
+ date: 2012-09-27 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: msgpack