td 0.11.7 → 0.11.8

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: e4b7f5341cbcebd3487c7df38951a4275204507e
4
- data.tar.gz: 9aad2c3285c7beb623a97410b62844ff41f74574
3
+ metadata.gz: 7517598e29ec516a589d42d0b1955ddbe2279493
4
+ data.tar.gz: aa7597906e87e24fe94c9529bd33eede0922a218
5
5
  SHA512:
6
- metadata.gz: 24846c57a7df595675ca6999412b0c542ac7beb1850adda09de6f9832152954d33ff2be5335b293213f92e492e558ab711f132598587433b0249a4f803eedebc
7
- data.tar.gz: 65eed33f8222fe6c3fb57067087d1cc477fb3a9bc64d9d427b0375964281c32726b11f8b65a0258ab77ce914a7530a0aadfaa92adb0d40ae243b5a63369294d9
6
+ metadata.gz: b59f3192dece017a26edd4012f55c02ab54cc135c7e63d054fa8e7eeec259f43faf59f0b20a1207f7b30e48064dc00735a1d2df8bd84e8540fb0d4a3a965e495
7
+ data.tar.gz: af178fd2dc244ba7726e7ad5c62a8be7dc6bec3b5f8a6bf357cfb2ddf432b3cf302f7ba2b91d6e83e4173effce885c4a2be0d409a0b712d61fafe2d651a6c755
data/ChangeLog CHANGED
@@ -1,3 +1,23 @@
1
+ == 2015-02-20 version 0.11.8
2
+
3
+ * Fix: save CSV/TSV file without transcoding.
4
+
5
+ The fix in 9d74b44b533878ee5f2a84db3506279f87f85bef at 0.11.7 broke the
6
+ job:show command when the download format is CSV or TSV and the target
7
+ query result contains non UTF-8 encoded byte sequence.
8
+
9
+ 0.11.6 and before does transcoding and *sanitizes* non UTF-8 byte
10
+ sequence before saving to file. So the generated file is always in
11
+ UTF-8, but could be broken when query result is not in UTF-8.
12
+
13
+ 0.11.7 simply removes that transcoding logic by
14
+ 9d74b44b533878ee5f2a84db3506279f87f85bef for performance and crashes for
15
+ non UTF-8 encoded byte sequence.
16
+
17
+ This release fixes 0.11.7 and saves non UTF-8 encoded byte sequence
18
+ as-is (through 'BINARY' encoding) so that the generated file should be
19
+ properly encoded in UTF-8, Shift_JIS, or whatever.
20
+
1
21
  == 2015-02-16 version 0.11.7
2
22
 
3
23
  * Fix server:endpoint command not working
@@ -515,7 +515,10 @@ module Command
515
515
  end
516
516
 
517
517
  def dump_column(v)
518
- v.is_a?(String) ? v.to_s : Yajl.dump(v)
518
+ s = v.is_a?(String) ? v.to_s : Yajl.dump(v)
519
+ # CAUTION: msgpack-ruby populates byte sequences as Encoding.default_internal which should be BINARY
520
+ s = s.force_encoding('BINARY') if s.respond_to?(:encode)
521
+ s
519
522
  end
520
523
 
521
524
  def dump_column_safe_utf8(v)
data/lib/td/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module TreasureData
2
- TOOLBELT_VERSION = '0.11.7'
2
+ TOOLBELT_VERSION = '0.11.8'
3
3
  end
@@ -1,3 +1,5 @@
1
+ # encoding: utf-8
2
+
1
3
  require 'spec_helper'
2
4
  require 'td/command/common'
3
5
  require 'td/command/job'
@@ -10,17 +12,17 @@ module TreasureData::Command
10
12
  Class.new { include TreasureData::Command }.new
11
13
  end
12
14
 
13
- let :job do
14
- job = TreasureData::Job.new(nil, 12345, 'hive', 'select * from employee')
15
- job.instance_eval do
16
- @result = [[["1", 2.0, {key:3}], 1], [["4", 5.0, {key:6}], 2], [["7", 8.0, {key:9}], 3]]
17
- @result_size = 3
18
- @status = 'success'
15
+ describe 'write_result' do
16
+ let :job do
17
+ job = TreasureData::Job.new(nil, 12345, 'hive', 'select * from employee')
18
+ job.instance_eval do
19
+ @result = [[["1", 2.0, {key:3}], 1], [["4", 5.0, {key:6}], 2], [["7", 8.0, {key:9}], 3]]
20
+ @result_size = 3
21
+ @status = 'success'
22
+ end
23
+ job
19
24
  end
20
- job
21
- end
22
25
 
23
- describe 'write_result' do
24
26
  it 'supports json output' do
25
27
  file = Tempfile.new("job_spec")
26
28
  command.send(:show_result, job, file, nil, 'json')
@@ -39,5 +41,62 @@ module TreasureData::Command
39
41
  File.read(file.path).should == %Q(1\t2.0\t{"key":3}\n4\t5.0\t{"key":6}\n7\t8.0\t{"key":9}\n)
40
42
  end
41
43
  end
44
+
45
+ describe 'multibyte chars' do
46
+ let :multibyte_string do
47
+ # Originally a Windows-31J but in UTF-8 like msgpack-ruby populates
48
+ "\x83\x81\x81[\x83\x8B"
49
+ end
50
+
51
+ let :multibyte_row do
52
+ [multibyte_string, 2.0, {multibyte_string => multibyte_string}]
53
+ end
54
+
55
+ let :job do
56
+ row = multibyte_row
57
+ job = TreasureData::Job.new(nil, 12345, 'hive', 'select * from employee')
58
+ job.instance_eval do
59
+ @result = [[row, 1], [row, 2]]
60
+ @result_size = 2
61
+ @status = 'success'
62
+ end
63
+ job
64
+ end
65
+
66
+ it 'assumes test setting is correct' do
67
+ # the String is actually in Windows-31J but encoding is UTF-8 msgpack-ruby reports
68
+ multibyte_string.encoding.should == Encoding::UTF_8
69
+ multibyte_string.force_encoding('Windows-31J').encode('UTF-8').should == 'メール'
70
+ end
71
+
72
+ it 'supports json output' do
73
+ row = multibyte_row
74
+ file = Tempfile.new("job_spec")
75
+ command.send(:show_result, job, file, nil, 'json')
76
+ File.read(file.path).should == '[' + [row, row].map { |e| Yajl.dump(e) }.join(",\n") + ']'
77
+ end
78
+
79
+ it 'supports csv output' do
80
+ row = multibyte_row.map { |e| dump_column(e) }
81
+ file = Tempfile.new("job_spec")
82
+ command.send(:show_result, job, file, nil, 'csv')
83
+ File.binread(file.path).should == [row, row].map { |e| CSV.generate_line(e) }.join
84
+ File.open(file.path, 'r:Windows-31J').read.encode('UTF-8').split.first.should == 'メール,2.0,"{""メール"":""メール""}"'
85
+ end
86
+
87
+ it 'supports tsv output' do
88
+ row = multibyte_row.map { |e| dump_column(e) }
89
+ file = Tempfile.new("job_spec")
90
+ command.send(:show_result, job, file, nil, 'tsv')
91
+ File.binread(file.path).should == [row, row].map { |e| e.join("\t") + "\n" }.join
92
+ File.open(file.path, 'r:Windows-31J').read.encode('UTF-8').split("\n").first.should == "メール\t2.0\t{\"メール\":\"メール\"}"
93
+ end
94
+ end
95
+
96
+ def dump_column(v)
97
+ s = v.is_a?(String) ? v.to_s : Yajl.dump(v)
98
+ s = s.force_encoding('BINARY')
99
+ s
100
+ end
42
101
  end
43
102
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: td
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.11.7
4
+ version: 0.11.8
5
5
  platform: ruby
6
6
  authors:
7
7
  - Treasure Data, Inc.
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-02-17 00:00:00.000000000 Z
11
+ date: 2015-02-20 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: msgpack