td 0.10.66 → 0.10.67

Sign up to get free protection for your applications and to get access to all the features.
data/.gitignore ADDED
@@ -0,0 +1,5 @@
1
+ .bundle
2
+ build/td-java
3
+ Gemfile.lock
4
+ vendor/*
5
+ *~
data/ChangeLog CHANGED
@@ -1,4 +1,9 @@
1
1
 
2
+ == 2013-01-18 version 0.10.67
3
+
4
+ * Added bulk_import:prepare_parts2 subcommand which is faster and more reliable
5
+
6
+
2
7
  == 2013-01-16 version 0.10.66
3
8
 
4
9
  * td-client v0.8.42
data/Rakefile CHANGED
@@ -13,6 +13,10 @@ def version
13
13
  TreasureData::VERSION
14
14
  end
15
15
 
16
+ task "jar" do
17
+ system('./build/update-td.sh')
18
+ end
19
+
16
20
  def project_root_path(path)
17
21
  "#{PROJECT_ROOT}/#{path}"
18
22
  end
@@ -0,0 +1,37 @@
1
+ #!/bin/bash
2
+ cd "$(dirname $0)"
3
+ chrev="$1"
4
+
5
+ if [ -d td-java/.git ];then
6
+ rm -rf td-java/*
7
+ cd td-java
8
+ git checkout . || exit 1
9
+ git pull || exit 1
10
+ else
11
+ rm -rf td-java/
12
+ git clone git@github.com:treasure-data/td-java.git td-java || exit 1
13
+ cd td-java
14
+ fi
15
+ git checkout master
16
+
17
+ if [ -n "$chrev" ];then
18
+ git checkout $chrev
19
+ fi
20
+
21
+ revname="$(git show --pretty=format:'%H %ad' | head -n 1)"
22
+
23
+ mvn package -Dmaven.test.skip=true || exit 1
24
+ cp target/td-0.1.1-SNAPSHOT.jar ../../java/td-0.1.1-SNAPSHOT.jar
25
+
26
+ if [ -n "$chrev" ];then
27
+ git checkout master
28
+ fi
29
+
30
+ cd ../../
31
+
32
+ echo "$revname" > java/td_java.version
33
+
34
+ echo ""
35
+ echo "git commit ./java -m \"updated td-java $revname\""
36
+ git commit ./java/td-0.1.1-SNAPSHOT.jar ./java/td_java.version -m "updated td-java $revname" || exit 1
37
+
Binary file
@@ -0,0 +1 @@
1
+ a0bb26a737329cc3df9987c0f1a6ea7188d3b334 Fri Jan 18 15:03:20 2013 +0900
@@ -392,6 +392,102 @@ module Command
392
392
  }
393
393
  end
394
394
 
395
+ def bulk_import_prepare_parts2(op)
396
+ format = 'csv'
397
+ columns = nil
398
+ column_types = nil
399
+ has_header = nil
400
+ time_column = 'time'
401
+ time_value = nil
402
+ split_size_kb = PART_SPLIT_SIZE / 1024 # kb
403
+ outdir = nil
404
+
405
+ op.on('-f', '--format NAME', 'source file format [csv]') {|s|
406
+ format = s
407
+ }
408
+ op.on('-h', '--columns NAME,NAME,...', 'column names (use --column-header instead if the first line has column names)') {|s|
409
+ columns = s
410
+ }
411
+ op.on('--column-types TYPE,TYPE,...', 'column types [string, int, long]') {|s|
412
+ column_types = s
413
+ }
414
+ op.on('-H', '--column-header', 'first line includes column names', TrueClass) {|b|
415
+ has_header = b
416
+ }
417
+ op.on('-t', '--time-column NAME', 'name of the time column') {|s|
418
+ time_column = s
419
+ }
420
+ op.on('--time-value TIME', 'long value of the time column') {|s|
421
+ if s.to_i.to_s == s
422
+ time_value = s.to_i
423
+ else
424
+ require 'time'
425
+ time_value = Time.parse(s).to_i
426
+ end
427
+ }
428
+ op.on('-s', '--split-size SIZE_IN_KB', "size of each parts (default: #{split_size_kb})", Integer) {|i|
429
+ split_size_kb = i
430
+ }
431
+ op.on('-o', '--output DIR', 'output directory') {|s|
432
+ outdir = s
433
+ }
434
+
435
+ files = op.cmd_parse
436
+ files = [files] unless files.is_a?(Array) # TODO ruby 1.9
437
+
438
+ # options validation
439
+ unless column_types
440
+ $stderr.puts "--column-types TYPE,TYPE,... option is required."
441
+ exit 1
442
+ end
443
+ unless outdir
444
+ $stderr.puts "-o, --output DIR option is required."
445
+ exit 1
446
+ end
447
+
448
+ # java command
449
+ javacmd = 'java'
450
+
451
+ # make jvm options
452
+ jvm_opts = []
453
+ jvm_opts << "-Xmx1024m" # TODO
454
+
455
+ # find java/*.jar and td.jar
456
+ base_path = File.expand_path('../../..', File.dirname(__FILE__)) # TODO
457
+ libjars = Dir.glob("#{base_path}/java/**/*.jar")
458
+ found = libjars.find { |path| File.basename(path) =~ /^td-/ }
459
+ td_command_jar = libjars.delete(found)
460
+
461
+ # make application options
462
+ app_opts = []
463
+ app_opts << "-cp .:#{td_command_jar}"
464
+
465
+ # make system properties
466
+ sysprops = []
467
+ sysprops << "-Dtd.bulk_import.prepare_parts.format=#{format}"
468
+ sysprops << "-Dtd.bulk_import.prepare_parts.columns=#{columns}" if columns
469
+ sysprops << "-Dtd.bulk_import.prepare_parts.column-types=#{column_types}" if column_types
470
+ sysprops << "-Dtd.bulk_import.prepare_parts.column-header=#{has_header}" if has_header
471
+ sysprops << "-Dtd.bulk_import.prepare_parts.time-column=#{time_column}"
472
+ sysprops << "-Dtd.bulk_import.prepare_parts.time-value=#{time_value.to_s}" if time_value
473
+ sysprops << "-Dtd.bulk_import.prepare_parts.split-size=#{split_size_kb}"
474
+ sysprops << "-Dtd.bulk_import.prepare_parts.output-dir=#{outdir}"
475
+
476
+ # make application arguments
477
+ app_args = []
478
+ app_args << 'com.treasure_data.tools.BulkImportTool'
479
+ app_args << 'prepare_parts'
480
+ app_args << files
481
+
482
+ command = "#{javacmd} #{jvm_opts.join(' ')} #{app_opts.join(' ')} #{sysprops.join(' ')} #{app_args.join(' ')}"
483
+
484
+ begin
485
+ exec(command)
486
+ rescue
487
+ exit 1
488
+ end
489
+ end
490
+
395
491
  private
396
492
  def bulk_import_upload_impl(name, part_name, io, size, retry_limit, retry_wait)
397
493
  begin
@@ -233,6 +233,7 @@ module List
233
233
  add_list 'bulk_import:show', %w[name], 'Show list of uploaded parts', 'bulk_import:show'
234
234
  add_list 'bulk_import:create', %w[name db table], 'Create a new bulk import session to the the table', 'bulk_import:create logs_201201 example_db event_logs'
235
235
  add_list 'bulk_import:prepare_parts', %w[files_], 'Convert files into part file format', 'bulk_import:prepare_parts logs/*.csv --format csv --columns time,uid,price,count --time-column "time" -o parts/'
236
+ add_list 'bulk_import:prepare_parts2', %w[files_], 'Convert files into part file format', 'bulk_import:prepare_parts2 logs/*.csv --format csv --columns time,uid,price,count --column-types long,string,long,int --time-column "time" -o parts/'
236
237
  add_list 'bulk_import:upload_part', %w[name id path.msgpack.gz], 'Upload or re-upload a file into a bulk import session', 'bulk_import:upload_part logs_201201 01h data-201201-01.msgpack.gz'
237
238
  add_list 'bulk_import:upload_parts', %w[name files_], 'Upload or re-upload files into a bulk import session', 'bulk_import:upload_parts parts/* --parallel 4'
238
239
  add_list 'bulk_import:delete_part', %w[name id], 'Delete a uploaded file from a bulk import session', 'bulk_import:delete_part logs_201201 01h'
data/lib/td/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  module TreasureData
2
2
 
3
- VERSION = '0.10.66'
3
+ VERSION = '0.10.67'
4
4
 
5
5
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: td
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.10.66
4
+ version: 0.10.67
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2013-01-16 00:00:00.000000000 Z
12
+ date: 2013-01-18 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: msgpack
@@ -162,11 +162,13 @@ executables:
162
162
  extensions: []
163
163
  extra_rdoc_files: []
164
164
  files:
165
+ - .gitignore
165
166
  - ChangeLog
166
167
  - Gemfile
167
168
  - README.rdoc
168
169
  - Rakefile
169
170
  - bin/td
171
+ - build/update-td.sh
170
172
  - data/sample_apache.json
171
173
  - data/sample_apache_gen.rb
172
174
  - dist/exe.rake
@@ -179,6 +181,8 @@ files:
179
181
  - dist/resources/pkg/PackageInfo.erb
180
182
  - dist/resources/pkg/postinstall
181
183
  - dist/resources/pkg/td
184
+ - java/td-0.1.1-SNAPSHOT.jar
185
+ - java/td_java.version
182
186
  - lib/td.rb
183
187
  - lib/td/command/account.rb
184
188
  - lib/td/command/acl.rb