td 0.10.65 → 0.10.66

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,96 @@
1
+ require 'spec_helper'
2
+ require 'file_reader/shared_context'
3
+
4
+ require 'stringio'
5
+ require 'td/file_reader'
6
+
7
+ include TreasureData
8
+
9
+ describe 'FileReader io filters' do
10
+ include_context 'error_proc'
11
+
12
+ describe FileReader::DecompressIOFilter do
13
+ let :lines do
14
+ [
15
+ '{"host":"128.216.140.97","user":"-","method":"GET","path":"/item/sports/2511","code":200,"size":95,"time":1353541928}',
16
+ '{"host":"224.225.147.72","user":"-","method":"GET","path":"/category/electronics","code":200,"size":43,"time":1353541927}',
17
+ '{"host":"172.75.186.56","user":"-","method":"GET","path":"/category/jewelry","code":200,"size":79,"time":1353541925}',
18
+ ]
19
+ end
20
+
21
+ let :io do
22
+ StringIO.new(lines.join("\n"))
23
+ end
24
+
25
+ let :gzipped_io do
26
+ require 'zlib'
27
+
28
+ io = StringIO.new('', 'w+')
29
+ gz = Zlib::GzipWriter.new(io)
30
+ gz.write(lines.join("\n"))
31
+ gz.close
32
+ StringIO.new(io.string)
33
+ end
34
+
35
+ it "can't filter with unknown compression" do
36
+ expect {
37
+ FileReader::DecompressIOFilter.filter(io, error, :compress => 'oreore')
38
+ }.to raise_error(Exception, /unknown compression/)
39
+ end
40
+
41
+ describe 'gzip' do
42
+ it "can't be wrapped with un-gzipped io" do
43
+ expect {
44
+ FileReader::DecompressIOFilter.filter(io, error, :compress => 'gzip')
45
+ }.to raise_error(Zlib::GzipFile::Error)
46
+ end
47
+
48
+ it 'returns Zlib::GzipReader with :gzip' do
49
+ wrapped = FileReader::DecompressIOFilter.filter(gzipped_io, error, :compress => 'gzip')
50
+ wrapped.should be_an_instance_of(Zlib::GzipReader)
51
+ end
52
+
53
+ it 'returns Zlib::GzipReader with auto detection' do
54
+ wrapped = FileReader::DecompressIOFilter.filter(gzipped_io, error, {})
55
+ wrapped.should be_an_instance_of(Zlib::GzipReader)
56
+ end
57
+
58
+ context 'after initialization' do
59
+ [{:compress => 'gzip'}, {}].each { |opts|
60
+ let :reader do
61
+ wrapped = FileReader::DecompressIOFilter.filter(gzipped_io, error, opts)
62
+ FileReader::LineReader.new(wrapped, error, {})
63
+ end
64
+
65
+ it 'forward_row returns one line' do
66
+ reader.forward_row.should == lines[0]
67
+ end
68
+
69
+ it 'feeds all lines' do
70
+ begin
71
+ i = 0
72
+ while line = reader.forward_row
73
+ line.should == lines[i]
74
+ i += 1
75
+ end
76
+ rescue
77
+ gzipped_io.eof?.should be_true
78
+ end
79
+ end
80
+ }
81
+ end
82
+ end
83
+
84
+ describe 'plain' do
85
+ it 'returns passed io with :plain' do
86
+ wrapped = FileReader::DecompressIOFilter.filter(io, error, :compress => 'plain')
87
+ wrapped.should be_an_instance_of(StringIO)
88
+ end
89
+
90
+ it 'returns passed io with auto detection' do
91
+ wrapped = FileReader::DecompressIOFilter.filter(io, error, {})
92
+ wrapped.should be_an_instance_of(StringIO)
93
+ end
94
+ end
95
+ end
96
+ end
@@ -0,0 +1,227 @@
1
+ require 'spec_helper'
2
+ require 'file_reader/shared_context'
3
+
4
+ require 'stringio'
5
+ require 'td/file_reader'
6
+
7
+ include TreasureData
8
+
9
+ describe FileReader::LineReader do
10
+ include_context 'error_proc'
11
+
12
+ let :lines do
13
+ [
14
+ '{"host":"128.216.140.97","user":"-","method":"GET","path":"/item/sports/2511","code":200,"referer":"http://www.google.com/search?ie=UTF-8&q=google&sclient=psy-ab&q=Sports+Electronics&oq=Sports+Electronics&aq=f&aqi=g-vL1&aql=&pbx=1&bav=on.2,or.r_gc.r_pw.r_qf.,cf.osb&biw=3994&bih=421","size":95,"agent":"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.7 (KHTML, like Gecko) Chrome/16.0.912.77 Safari/535.7","time":1353541928}',
15
+ '{"host":"224.225.147.72","user":"-","method":"GET","path":"/category/electronics","code":200,"referer":"-","size":43,"agent":"Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0)","time":1353541927}',
16
+ '{"host":"172.75.186.56","user":"-","method":"GET","path":"/category/jewelry","code":200,"referer":"-","size":79,"agent":"Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)","time":1353541925}',
17
+ ]
18
+ end
19
+
20
+ let :io do
21
+ StringIO.new(lines.join("\n"))
22
+ end
23
+
24
+ it 'initialize' do
25
+ if io.respond_to?(:external_encoding)
26
+ ee = io.external_encoding
27
+ end
28
+ FileReader::LineReader.new(io, error, {})
29
+ if io.respond_to?(:external_encoding)
30
+ io.external_encoding.should == ee
31
+ end
32
+ end
33
+
34
+ it 'initialize with specifid encoding' do
35
+ if io.respond_to?(:external_encoding)
36
+ ee = io.external_encoding
37
+ end
38
+ FileReader::LineReader.new(io, error, {:encoding => 'utf-8'})
39
+ if io.respond_to?(:external_encoding)
40
+ io.external_encoding.should_not == ee
41
+ io.external_encoding.should == Encoding.find('utf-8')
42
+ end
43
+ end
44
+
45
+ context 'after initialization' do
46
+ let :reader do
47
+ FileReader::LineReader.new(io, error, {})
48
+ end
49
+
50
+ it 'forward_row returns one line' do
51
+ reader.forward_row.should == lines[0]
52
+ end
53
+
54
+ # TODO: integrate with following shared_examples_for
55
+ it 'feeds all lines' do
56
+ begin
57
+ i = 0
58
+ while line = reader.forward_row
59
+ line.should == lines[i]
60
+ i += 1
61
+ end
62
+ rescue RSpec::Expectations::ExpectationNotMetError => e
63
+ fail
64
+ rescue
65
+ io.eof?.should be_true
66
+ end
67
+ end
68
+
69
+ shared_examples_for 'parser iterates all' do |step|
70
+ step = step || 1
71
+
72
+ it 'feeds all' do
73
+ begin
74
+ i = 0
75
+ while line = parser.forward
76
+ line.should == get_expected.call(i)
77
+ i += step
78
+ end
79
+ rescue RSpec::Expectations::ExpectationNotMetError => e
80
+ fail
81
+ rescue
82
+ io.eof?.should be_true
83
+ end
84
+ end
85
+ end
86
+
87
+ describe FileReader::JSONParser do
88
+ it 'initialize with LineReader' do
89
+ parser = FileReader::JSONParser.new(reader, error, {})
90
+ parser.should_not be_nil
91
+ end
92
+
93
+ context 'after initialization' do
94
+ let :parser do
95
+ FileReader::JSONParser.new(reader, error, {})
96
+ end
97
+
98
+ it 'forward returns one line' do
99
+ parser.forward.should == JSON.parse(lines[0])
100
+ end
101
+
102
+ let :get_expected do
103
+ lambda { |i| JSON.parse(lines[i]) }
104
+ end
105
+
106
+ it_should_behave_like 'parser iterates all'
107
+
108
+ context 'with broken line' do
109
+ let :lines do
110
+ [
111
+ '{"host":"128.216.140.97","user":"-","method":"GET","path":"/item/sports/2511","code":200,"referer":"http://www.google.com/search?ie=UTF-8&q=google&sclient=psy-ab&q=Sports+Electronics&oq=Sports+Electronics&aq=f&aqi=g-vL1&aql=&pbx=1&bav=on.2,or.r_gc.r_pw.r_qf.,cf.osb&biw=3994&bih=421","size":95,"agent":"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.7 (KHTML, like Gecko) Chrome/16.0.912.77 Safari/535.7","time":1353541928}',
112
+ '{This is invalid as a JSON}',
113
+ '{"host":"172.75.186.56","user":"-","method":"GET","path":"/category/jewelry","code":200,"referer":"-","size":79,"agent":"Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)","time":1353541925}',
114
+ ]
115
+ end
116
+
117
+ let :error_pattern do
118
+ /^invalid json format/
119
+ end
120
+
121
+ it_should_behave_like 'parser iterates all', 2
122
+ end
123
+ end
124
+ end
125
+
126
+ [',', "\t"].each { |pattern|
127
+ describe FileReader::DelimiterParser do
128
+ let :lines do
129
+ [
130
+ ['hoge', '12345', Time.now.to_s].join(pattern),
131
+ ['foo', '34567', Time.now.to_s].join(pattern),
132
+ ['piyo', '56789', Time.now.to_s].join(pattern),
133
+ ]
134
+ end
135
+
136
+ it "initialize with LineReader and #{pattern} delimiter" do
137
+ parser = FileReader::DelimiterParser.new(reader, error, {:delimiter_expr => Regexp.new(pattern)})
138
+ parser.should_not be_nil
139
+ end
140
+
141
+ context 'after initialization' do
142
+ let :parser do
143
+ FileReader::DelimiterParser.new(reader, error, {:delimiter_expr => Regexp.new(pattern)})
144
+ end
145
+
146
+ it 'forward returns one line' do
147
+ parser.forward.should == lines[0].split(pattern)
148
+ end
149
+
150
+ let :get_expected do
151
+ lambda { |i| lines[i].split(pattern) }
152
+ end
153
+
154
+ it_should_behave_like 'parser iterates all'
155
+ end
156
+ end
157
+ }
158
+
159
+ {
160
+ FileReader::ApacheParser => [
161
+ [
162
+ '58.83.188.60 - - [23/Oct/2011:08:15:46 -0700] "HEAD / HTTP/1.0" 200 277 "-" "-"',
163
+ '127.0.0.1 - - [23/Oct/2011:08:20:01 -0700] "GET / HTTP/1.0" 200 492 "-" "Wget/1.12 (linux-gnu)"',
164
+ '68.64.37.100 - - [24/Oct/2011:01:48:54 -0700] "GET /phpMyAdmin/scripts/setup.php HTTP/1.1" 404 480 "-" "ZmEu"'
165
+ ],
166
+ [
167
+ ["58.83.188.60", "-", "23/Oct/2011:08:15:46 -0700", "HEAD", "/", "200", "277", "-", "-"],
168
+ ["127.0.0.1", "-", "23/Oct/2011:08:20:01 -0700", "GET", "/", "200", "492", "-", "Wget/1.12 (linux-gnu)"],
169
+ ["68.64.37.100", "-", "24/Oct/2011:01:48:54 -0700", "GET", "/phpMyAdmin/scripts/setup.php", "404", "480", "-", "ZmEu"],
170
+ ]
171
+ ],
172
+ FileReader::SyslogParser => [
173
+ [
174
+ 'Dec 20 12:41:44 localhost kernel: [4843680.692840] e1000e: eth2 NIC Link is Down',
175
+ 'Dec 20 12:41:44 localhost kernel: [4843680.734466] br0: port 1(eth2) entering disabled state',
176
+ 'Dec 22 10:42:41 localhost kernel: [5009052.220155] zsh[25578]: segfault at 7fe849460260 ip 00007fe8474fd74d sp 00007fffe3bdf0e0 error 4 in libc-2.11.1.so[7fe847486000+17a000]',
177
+ ],
178
+ [
179
+ ["Dec 20 12:41:44", "localhost", "kernel", nil, "[4843680.692840] e1000e: eth2 NIC Link is Down"],
180
+ ["Dec 20 12:41:44", "localhost", "kernel", nil, "[4843680.734466] br0: port 1(eth2) entering disabled state"],
181
+ ["Dec 22 10:42:41", "localhost", "kernel", nil, "[5009052.220155] zsh[25578]: segfault at 7fe849460260 ip 00007fe8474fd74d sp 00007fffe3bdf0e0 error 4 in libc-2.11.1.so[7fe847486000+17a000]"],
182
+ ]
183
+ ]
184
+ }.each_pair { |parser_class, (input, output)|
185
+ describe parser_class do
186
+ let :lines do
187
+ input
188
+ end
189
+
190
+ it "initialize with LineReader" do
191
+ parser = parser_class.new(reader, error, {})
192
+ parser.should_not be_nil
193
+ end
194
+
195
+ context 'after initialization' do
196
+ let :parser do
197
+ parser_class.new(reader, error, {})
198
+ end
199
+
200
+ it 'forward returns one line' do
201
+ parser.forward.should == output[0]
202
+ end
203
+
204
+ let :get_expected do
205
+ lambda { |i| output[i] }
206
+ end
207
+
208
+ it_should_behave_like 'parser iterates all'
209
+
210
+ context 'with broken line' do
211
+ let :lines do
212
+ broken = input.dup
213
+ broken[1] = "Raw text sometimes is broken!"
214
+ broken
215
+ end
216
+
217
+ let :error_pattern do
218
+ /^invalid #{parser.instance_variable_get(:@format)} format/
219
+ end
220
+
221
+ it_should_behave_like 'parser iterates all', 2
222
+ end
223
+ end
224
+ end
225
+ }
226
+ end
227
+ end
@@ -0,0 +1,120 @@
1
+ # -*- coding: utf-8 -*-
2
+ require 'spec_helper'
3
+ require 'file_reader/shared_context'
4
+
5
+ require 'stringio'
6
+ require 'msgpack'
7
+ require 'td/file_reader'
8
+
9
+ include TreasureData
10
+
11
+ describe 'FileReader parsing readers' do
12
+ include_context 'error_proc'
13
+
14
+ shared_examples_for 'forward basics' do
15
+ it 'forward returns one data' do
16
+ reader.forward.should == dataset[0]
17
+ end
18
+
19
+ it 'feeds all dataset' do
20
+ begin
21
+ i = 0
22
+ while line = reader.forward
23
+ line.should == dataset[i]
24
+ i += 1
25
+ end
26
+ rescue RSpec::Expectations::ExpectationNotMetError => e
27
+ fail
28
+ rescue => e
29
+ io.eof?.should be_true
30
+ end
31
+ end
32
+ end
33
+
34
+ describe FileReader::MessagePackParsingReader do
35
+ let :dataset do
36
+ [
37
+ {'name' => 'k', 'num' => 12345, 'time' => Time.now.to_i},
38
+ {'name' => 's', 'num' => 34567, 'time' => Time.now.to_i},
39
+ {'name' => 'n', 'num' => 56789, 'time' => Time.now.to_i},
40
+ ]
41
+ end
42
+
43
+ let :io do
44
+ StringIO.new(dataset.map(&:to_msgpack).join(""))
45
+ end
46
+
47
+ it 'initialize' do
48
+ reader = FileReader::MessagePackParsingReader.new(io, error, {})
49
+ reader.should_not be_nil
50
+ end
51
+
52
+ context 'after initialization' do
53
+ let :reader do
54
+ FileReader::MessagePackParsingReader.new(io, error, {})
55
+ end
56
+
57
+ it_should_behave_like 'forward basics'
58
+ end
59
+ end
60
+
61
+ test_time = Time.now.to_i
62
+ {
63
+ 'csv' => [
64
+ {:delimiter_expr => ',', :quote_char => '"', :encoding => 'utf-8'},
65
+ [
66
+ %!k,123,"fo\no",true,#{test_time}!,
67
+ %!s,456,"T,D",false,#{test_time}!,
68
+ %!n,789,"ba""z",false,#{test_time}!,
69
+ ],
70
+ [
71
+ %W(k 123 fo\no true #{test_time}),
72
+ %W(s 456 T,D false #{test_time}),
73
+ %W(n 789 ba\"z false #{test_time}),
74
+ ]
75
+ ],
76
+ 'tsv' => [
77
+ {:delimiter_expr => "\t"},
78
+ [
79
+ %!k\t123\t"fo\no"\ttrue\t#{test_time}!,
80
+ %!s\t456\t"b,ar"\tfalse\t#{test_time}!,
81
+ %!n\t789\t"ba\tz"\tfalse\t#{test_time}!,
82
+ ],
83
+ [
84
+ %W(k 123 fo\no true #{test_time}),
85
+ %W(s 456 b,ar false #{test_time}),
86
+ %W(n 789 ba\tz false #{test_time}),
87
+ ]
88
+ ]
89
+ }.each_pair { |format, (opts, input, output)|
90
+ describe FileReader::SeparatedValueParsingReader do
91
+ let :dataset do
92
+ output
93
+ end
94
+
95
+ let :lines do
96
+ input
97
+ end
98
+
99
+ let :io do
100
+ StringIO.new(lines.join($/))
101
+ end
102
+
103
+ it "initialize #{format}" do
104
+ reader = FileReader::SeparatedValueParsingReader.new(io, error, opts)
105
+ reader.should_not be_nil
106
+ end
107
+
108
+ context "after #{format} initialization" do
109
+ let :reader do
110
+ reader = FileReader::SeparatedValueParsingReader.new(io, error, opts)
111
+ end
112
+
113
+ it_should_behave_like 'forward basics'
114
+
115
+ context "broken encodings" do
116
+ end
117
+ end
118
+ end
119
+ }
120
+ end
@@ -0,0 +1,9 @@
1
+ require 'rspec'
2
+
3
+ shared_context 'error_proc' do
4
+ let :error do
5
+ Proc.new { |reason, data|
6
+ reason.should match(error_pattern)
7
+ }
8
+ end
9
+ end