td 0.11.7 → 0.11.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: e4b7f5341cbcebd3487c7df38951a4275204507e
4
- data.tar.gz: 9aad2c3285c7beb623a97410b62844ff41f74574
3
+ metadata.gz: 7517598e29ec516a589d42d0b1955ddbe2279493
4
+ data.tar.gz: aa7597906e87e24fe94c9529bd33eede0922a218
5
5
  SHA512:
6
- metadata.gz: 24846c57a7df595675ca6999412b0c542ac7beb1850adda09de6f9832152954d33ff2be5335b293213f92e492e558ab711f132598587433b0249a4f803eedebc
7
- data.tar.gz: 65eed33f8222fe6c3fb57067087d1cc477fb3a9bc64d9d427b0375964281c32726b11f8b65a0258ab77ce914a7530a0aadfaa92adb0d40ae243b5a63369294d9
6
+ metadata.gz: b59f3192dece017a26edd4012f55c02ab54cc135c7e63d054fa8e7eeec259f43faf59f0b20a1207f7b30e48064dc00735a1d2df8bd84e8540fb0d4a3a965e495
7
+ data.tar.gz: af178fd2dc244ba7726e7ad5c62a8be7dc6bec3b5f8a6bf357cfb2ddf432b3cf302f7ba2b91d6e83e4173effce885c4a2be0d409a0b712d61fafe2d651a6c755
data/ChangeLog CHANGED
@@ -1,3 +1,23 @@
1
+ == 2015-02-20 version 0.11.8
2
+
3
+ * Fix: save CSV/TSV file without transcoding.
4
+
5
+ The fix in 9d74b44b533878ee5f2a84db3506279f87f85bef at 0.11.7 broke the
6
+ job:show command when the download format is CSV or TSV and the target
7
+ query result contains non UTF-8 encoded byte sequence.
8
+
9
+ 0.11.6 and before does transcoding and *sanitizes* non UTF-8 byte
10
+ sequence before saving to file. So the generated file is always in
11
+ UTF-8, but could be broken when query result is not in UTF-8.
12
+
13
+ 0.11.7 simply removes that transcoding logic by
14
+ 9d74b44b533878ee5f2a84db3506279f87f85bef for performance and crashes for
15
+ non UTF-8 encoded byte sequence.
16
+
17
+ This release fixes 0.11.7 and saves non UTF-8 encoded byte sequence
18
+ as-is (through 'BINARY' encoding) so that the generated file should be
19
+ properly encoded in UTF-8, Shift_JIS, or whatever.
20
+
1
21
  == 2015-02-16 version 0.11.7
2
22
 
3
23
  * Fix server:endpoint command not working
@@ -515,7 +515,10 @@ module Command
515
515
  end
516
516
 
517
517
  def dump_column(v)
518
- v.is_a?(String) ? v.to_s : Yajl.dump(v)
518
+ s = v.is_a?(String) ? v.to_s : Yajl.dump(v)
519
+ # CAUTION: msgpack-ruby populates byte sequences as Encoding.default_internal which should be BINARY
520
+ s = s.force_encoding('BINARY') if s.respond_to?(:encode)
521
+ s
519
522
  end
520
523
 
521
524
  def dump_column_safe_utf8(v)
data/lib/td/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module TreasureData
2
- TOOLBELT_VERSION = '0.11.7'
2
+ TOOLBELT_VERSION = '0.11.8'
3
3
  end
@@ -1,3 +1,5 @@
1
+ # encoding: utf-8
2
+
1
3
  require 'spec_helper'
2
4
  require 'td/command/common'
3
5
  require 'td/command/job'
@@ -10,17 +12,17 @@ module TreasureData::Command
10
12
  Class.new { include TreasureData::Command }.new
11
13
  end
12
14
 
13
- let :job do
14
- job = TreasureData::Job.new(nil, 12345, 'hive', 'select * from employee')
15
- job.instance_eval do
16
- @result = [[["1", 2.0, {key:3}], 1], [["4", 5.0, {key:6}], 2], [["7", 8.0, {key:9}], 3]]
17
- @result_size = 3
18
- @status = 'success'
15
+ describe 'write_result' do
16
+ let :job do
17
+ job = TreasureData::Job.new(nil, 12345, 'hive', 'select * from employee')
18
+ job.instance_eval do
19
+ @result = [[["1", 2.0, {key:3}], 1], [["4", 5.0, {key:6}], 2], [["7", 8.0, {key:9}], 3]]
20
+ @result_size = 3
21
+ @status = 'success'
22
+ end
23
+ job
19
24
  end
20
- job
21
- end
22
25
 
23
- describe 'write_result' do
24
26
  it 'supports json output' do
25
27
  file = Tempfile.new("job_spec")
26
28
  command.send(:show_result, job, file, nil, 'json')
@@ -39,5 +41,62 @@ module TreasureData::Command
39
41
  File.read(file.path).should == %Q(1\t2.0\t{"key":3}\n4\t5.0\t{"key":6}\n7\t8.0\t{"key":9}\n)
40
42
  end
41
43
  end
44
+
45
+ describe 'multibyte chars' do
46
+ let :multibyte_string do
47
+ # Originally a Windows-31J but in UTF-8 like msgpack-ruby populates
48
+ "\x83\x81\x81[\x83\x8B"
49
+ end
50
+
51
+ let :multibyte_row do
52
+ [multibyte_string, 2.0, {multibyte_string => multibyte_string}]
53
+ end
54
+
55
+ let :job do
56
+ row = multibyte_row
57
+ job = TreasureData::Job.new(nil, 12345, 'hive', 'select * from employee')
58
+ job.instance_eval do
59
+ @result = [[row, 1], [row, 2]]
60
+ @result_size = 2
61
+ @status = 'success'
62
+ end
63
+ job
64
+ end
65
+
66
+ it 'assumes test setting is correct' do
67
+ # the String is actually in Windows-31J but encoding is UTF-8 msgpack-ruby reports
68
+ multibyte_string.encoding.should == Encoding::UTF_8
69
+ multibyte_string.force_encoding('Windows-31J').encode('UTF-8').should == 'メール'
70
+ end
71
+
72
+ it 'supports json output' do
73
+ row = multibyte_row
74
+ file = Tempfile.new("job_spec")
75
+ command.send(:show_result, job, file, nil, 'json')
76
+ File.read(file.path).should == '[' + [row, row].map { |e| Yajl.dump(e) }.join(",\n") + ']'
77
+ end
78
+
79
+ it 'supports csv output' do
80
+ row = multibyte_row.map { |e| dump_column(e) }
81
+ file = Tempfile.new("job_spec")
82
+ command.send(:show_result, job, file, nil, 'csv')
83
+ File.binread(file.path).should == [row, row].map { |e| CSV.generate_line(e) }.join
84
+ File.open(file.path, 'r:Windows-31J').read.encode('UTF-8').split.first.should == 'メール,2.0,"{""メール"":""メール""}"'
85
+ end
86
+
87
+ it 'supports tsv output' do
88
+ row = multibyte_row.map { |e| dump_column(e) }
89
+ file = Tempfile.new("job_spec")
90
+ command.send(:show_result, job, file, nil, 'tsv')
91
+ File.binread(file.path).should == [row, row].map { |e| e.join("\t") + "\n" }.join
92
+ File.open(file.path, 'r:Windows-31J').read.encode('UTF-8').split("\n").first.should == "メール\t2.0\t{\"メール\":\"メール\"}"
93
+ end
94
+ end
95
+
96
+ def dump_column(v)
97
+ s = v.is_a?(String) ? v.to_s : Yajl.dump(v)
98
+ s = s.force_encoding('BINARY')
99
+ s
100
+ end
42
101
  end
43
102
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: td
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.11.7
4
+ version: 0.11.8
5
5
  platform: ruby
6
6
  authors:
7
7
  - Treasure Data, Inc.
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-02-17 00:00:00.000000000 Z
11
+ date: 2015-02-20 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: msgpack