td 0.10.66 → 0.10.67

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/.gitignore ADDED
@@ -0,0 +1,5 @@
1
+ .bundle
2
+ build/td-java
3
+ Gemfile.lock
4
+ vendor/*
5
+ *~
data/ChangeLog CHANGED
@@ -1,4 +1,9 @@
1
1
 
2
+ == 2013-01-18 version 0.10.67
3
+
4
+ * Added bulk_import:prepare_parts2 subcommand which is faster and more reliable
5
+
6
+
2
7
  == 2013-01-16 version 0.10.66
3
8
 
4
9
  * td-client v0.8.42
data/Rakefile CHANGED
@@ -13,6 +13,10 @@ def version
13
13
  TreasureData::VERSION
14
14
  end
15
15
 
16
+ task "jar" do
17
+ system('./build/update-td.sh')
18
+ end
19
+
16
20
  def project_root_path(path)
17
21
  "#{PROJECT_ROOT}/#{path}"
18
22
  end
@@ -0,0 +1,37 @@
1
+ #!/bin/bash
2
+ cd "$(dirname $0)"
3
+ chrev="$1"
4
+
5
+ if [ -d td-java/.git ];then
6
+ rm -rf td-java/*
7
+ cd td-java
8
+ git checkout . || exit 1
9
+ git pull || exit 1
10
+ else
11
+ rm -rf td-java/
12
+ git clone git@github.com:treasure-data/td-java.git td-java || exit 1
13
+ cd td-java
14
+ fi
15
+ git checkout master
16
+
17
+ if [ -n "$chrev" ];then
18
+ git checkout $chrev
19
+ fi
20
+
21
+ revname="$(git show --pretty=format:'%H %ad' | head -n 1)"
22
+
23
+ mvn package -Dmaven.test.skip=true || exit 1
24
+ cp target/td-0.1.1-SNAPSHOT.jar ../../java/td-0.1.1-SNAPSHOT.jar
25
+
26
+ if [ -n "$chrev" ];then
27
+ git checkout master
28
+ fi
29
+
30
+ cd ../../
31
+
32
+ echo "$revname" > java/td_java.version
33
+
34
+ echo ""
35
+ echo "git commit ./java -m \"updated td-java $revname\""
36
+ git commit ./java/td-0.1.1-SNAPSHOT.jar ./java/td_java.version -m "updated td-java $revname" || exit 1
37
+
Binary file
@@ -0,0 +1 @@
1
+ a0bb26a737329cc3df9987c0f1a6ea7188d3b334 Fri Jan 18 15:03:20 2013 +0900
@@ -392,6 +392,102 @@ module Command
392
392
  }
393
393
  end
394
394
 
395
+ def bulk_import_prepare_parts2(op)
396
+ format = 'csv'
397
+ columns = nil
398
+ column_types = nil
399
+ has_header = nil
400
+ time_column = 'time'
401
+ time_value = nil
402
+ split_size_kb = PART_SPLIT_SIZE / 1024 # kb
403
+ outdir = nil
404
+
405
+ op.on('-f', '--format NAME', 'source file format [csv]') {|s|
406
+ format = s
407
+ }
408
+ op.on('-h', '--columns NAME,NAME,...', 'column names (use --column-header instead if the first line has column names)') {|s|
409
+ columns = s
410
+ }
411
+ op.on('--column-types TYPE,TYPE,...', 'column types [string, int, long]') {|s|
412
+ column_types = s
413
+ }
414
+ op.on('-H', '--column-header', 'first line includes column names', TrueClass) {|b|
415
+ has_header = b
416
+ }
417
+ op.on('-t', '--time-column NAME', 'name of the time column') {|s|
418
+ time_column = s
419
+ }
420
+ op.on('--time-value TIME', 'long value of the time column') {|s|
421
+ if s.to_i.to_s == s
422
+ time_value = s.to_i
423
+ else
424
+ require 'time'
425
+ time_value = Time.parse(s).to_i
426
+ end
427
+ }
428
+ op.on('-s', '--split-size SIZE_IN_KB', "size of each parts (default: #{split_size_kb})", Integer) {|i|
429
+ split_size_kb = i
430
+ }
431
+ op.on('-o', '--output DIR', 'output directory') {|s|
432
+ outdir = s
433
+ }
434
+
435
+ files = op.cmd_parse
436
+ files = [files] unless files.is_a?(Array) # TODO ruby 1.9
437
+
438
+ # options validation
439
+ unless column_types
440
+ $stderr.puts "--column-types TYPE,TYPE,... option is required."
441
+ exit 1
442
+ end
443
+ unless outdir
444
+ $stderr.puts "-o, --output DIR option is required."
445
+ exit 1
446
+ end
447
+
448
+ # java command
449
+ javacmd = 'java'
450
+
451
+ # make jvm options
452
+ jvm_opts = []
453
+ jvm_opts << "-Xmx1024m" # TODO
454
+
455
+ # find java/*.jar and td.jar
456
+ base_path = File.expand_path('../../..', File.dirname(__FILE__)) # TODO
457
+ libjars = Dir.glob("#{base_path}/java/**/*.jar")
458
+ found = libjars.find { |path| File.basename(path) =~ /^td-/ }
459
+ td_command_jar = libjars.delete(found)
460
+
461
+ # make application options
462
+ app_opts = []
463
+ app_opts << "-cp .:#{td_command_jar}"
464
+
465
+ # make system properties
466
+ sysprops = []
467
+ sysprops << "-Dtd.bulk_import.prepare_parts.format=#{format}"
468
+ sysprops << "-Dtd.bulk_import.prepare_parts.columns=#{columns}" if columns
469
+ sysprops << "-Dtd.bulk_import.prepare_parts.column-types=#{column_types}" if column_types
470
+ sysprops << "-Dtd.bulk_import.prepare_parts.column-header=#{has_header}" if has_header
471
+ sysprops << "-Dtd.bulk_import.prepare_parts.time-column=#{time_column}"
472
+ sysprops << "-Dtd.bulk_import.prepare_parts.time-value=#{time_value.to_s}" if time_value
473
+ sysprops << "-Dtd.bulk_import.prepare_parts.split-size=#{split_size_kb}"
474
+ sysprops << "-Dtd.bulk_import.prepare_parts.output-dir=#{outdir}"
475
+
476
+ # make application arguments
477
+ app_args = []
478
+ app_args << 'com.treasure_data.tools.BulkImportTool'
479
+ app_args << 'prepare_parts'
480
+ app_args << files
481
+
482
+ command = "#{javacmd} #{jvm_opts.join(' ')} #{app_opts.join(' ')} #{sysprops.join(' ')} #{app_args.join(' ')}"
483
+
484
+ begin
485
+ exec(command)
486
+ rescue
487
+ exit 1
488
+ end
489
+ end
490
+
395
491
  private
396
492
  def bulk_import_upload_impl(name, part_name, io, size, retry_limit, retry_wait)
397
493
  begin
@@ -233,6 +233,7 @@ module List
233
233
  add_list 'bulk_import:show', %w[name], 'Show list of uploaded parts', 'bulk_import:show'
234
234
  add_list 'bulk_import:create', %w[name db table], 'Create a new bulk import session to the the table', 'bulk_import:create logs_201201 example_db event_logs'
235
235
  add_list 'bulk_import:prepare_parts', %w[files_], 'Convert files into part file format', 'bulk_import:prepare_parts logs/*.csv --format csv --columns time,uid,price,count --time-column "time" -o parts/'
236
+ add_list 'bulk_import:prepare_parts2', %w[files_], 'Convert files into part file format', 'bulk_import:prepare_parts2 logs/*.csv --format csv --columns time,uid,price,count --column-types long,string,long,int --time-column "time" -o parts/'
236
237
  add_list 'bulk_import:upload_part', %w[name id path.msgpack.gz], 'Upload or re-upload a file into a bulk import session', 'bulk_import:upload_part logs_201201 01h data-201201-01.msgpack.gz'
237
238
  add_list 'bulk_import:upload_parts', %w[name files_], 'Upload or re-upload files into a bulk import session', 'bulk_import:upload_parts parts/* --parallel 4'
238
239
  add_list 'bulk_import:delete_part', %w[name id], 'Delete a uploaded file from a bulk import session', 'bulk_import:delete_part logs_201201 01h'
data/lib/td/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  module TreasureData
2
2
 
3
- VERSION = '0.10.66'
3
+ VERSION = '0.10.67'
4
4
 
5
5
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: td
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.10.66
4
+ version: 0.10.67
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2013-01-16 00:00:00.000000000 Z
12
+ date: 2013-01-18 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: msgpack
@@ -162,11 +162,13 @@ executables:
162
162
  extensions: []
163
163
  extra_rdoc_files: []
164
164
  files:
165
+ - .gitignore
165
166
  - ChangeLog
166
167
  - Gemfile
167
168
  - README.rdoc
168
169
  - Rakefile
169
170
  - bin/td
171
+ - build/update-td.sh
170
172
  - data/sample_apache.json
171
173
  - data/sample_apache_gen.rb
172
174
  - dist/exe.rake
@@ -179,6 +181,8 @@ files:
179
181
  - dist/resources/pkg/PackageInfo.erb
180
182
  - dist/resources/pkg/postinstall
181
183
  - dist/resources/pkg/td
184
+ - java/td-0.1.1-SNAPSHOT.jar
185
+ - java/td_java.version
182
186
  - lib/td.rb
183
187
  - lib/td/command/account.rb
184
188
  - lib/td/command/acl.rb