td 0.10.65 → 0.10.66

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,96 @@
1
+ require 'spec_helper'
2
+ require 'file_reader/shared_context'
3
+
4
+ require 'stringio'
5
+ require 'td/file_reader'
6
+
7
+ include TreasureData
8
+
9
+ describe 'FileReader io filters' do
10
+ include_context 'error_proc'
11
+
12
+ describe FileReader::DecompressIOFilter do
13
+ let :lines do
14
+ [
15
+ '{"host":"128.216.140.97","user":"-","method":"GET","path":"/item/sports/2511","code":200,"size":95,"time":1353541928}',
16
+ '{"host":"224.225.147.72","user":"-","method":"GET","path":"/category/electronics","code":200,"size":43,"time":1353541927}',
17
+ '{"host":"172.75.186.56","user":"-","method":"GET","path":"/category/jewelry","code":200,"size":79,"time":1353541925}',
18
+ ]
19
+ end
20
+
21
+ let :io do
22
+ StringIO.new(lines.join("\n"))
23
+ end
24
+
25
+ let :gzipped_io do
26
+ require 'zlib'
27
+
28
+ io = StringIO.new('', 'w+')
29
+ gz = Zlib::GzipWriter.new(io)
30
+ gz.write(lines.join("\n"))
31
+ gz.close
32
+ StringIO.new(io.string)
33
+ end
34
+
35
+ it "can't filter with unknown compression" do
36
+ expect {
37
+ FileReader::DecompressIOFilter.filter(io, error, :compress => 'oreore')
38
+ }.to raise_error(Exception, /unknown compression/)
39
+ end
40
+
41
+ describe 'gzip' do
42
+ it "can't be wrapped with un-gzipped io" do
43
+ expect {
44
+ FileReader::DecompressIOFilter.filter(io, error, :compress => 'gzip')
45
+ }.to raise_error(Zlib::GzipFile::Error)
46
+ end
47
+
48
+ it 'returns Zlib::GzipReader with :gzip' do
49
+ wrapped = FileReader::DecompressIOFilter.filter(gzipped_io, error, :compress => 'gzip')
50
+ wrapped.should be_an_instance_of(Zlib::GzipReader)
51
+ end
52
+
53
+ it 'returns Zlib::GzipReader with auto detection' do
54
+ wrapped = FileReader::DecompressIOFilter.filter(gzipped_io, error, {})
55
+ wrapped.should be_an_instance_of(Zlib::GzipReader)
56
+ end
57
+
58
+ context 'after initialization' do
59
+ [{:compress => 'gzip'}, {}].each { |opts|
60
+ let :reader do
61
+ wrapped = FileReader::DecompressIOFilter.filter(gzipped_io, error, opts)
62
+ FileReader::LineReader.new(wrapped, error, {})
63
+ end
64
+
65
+ it 'forward_row returns one line' do
66
+ reader.forward_row.should == lines[0]
67
+ end
68
+
69
+ it 'feeds all lines' do
70
+ begin
71
+ i = 0
72
+ while line = reader.forward_row
73
+ line.should == lines[i]
74
+ i += 1
75
+ end
76
+ rescue
77
+ gzipped_io.eof?.should be_true
78
+ end
79
+ end
80
+ }
81
+ end
82
+ end
83
+
84
+ describe 'plain' do
85
+ it 'returns passed io with :plain' do
86
+ wrapped = FileReader::DecompressIOFilter.filter(io, error, :compress => 'plain')
87
+ wrapped.should be_an_instance_of(StringIO)
88
+ end
89
+
90
+ it 'returns passed io with auto detection' do
91
+ wrapped = FileReader::DecompressIOFilter.filter(io, error, {})
92
+ wrapped.should be_an_instance_of(StringIO)
93
+ end
94
+ end
95
+ end
96
+ end
@@ -0,0 +1,227 @@
1
+ require 'spec_helper'
2
+ require 'file_reader/shared_context'
3
+
4
+ require 'stringio'
5
+ require 'td/file_reader'
6
+
7
+ include TreasureData
8
+
9
+ describe FileReader::LineReader do
10
+ include_context 'error_proc'
11
+
12
+ let :lines do
13
+ [
14
+ '{"host":"128.216.140.97","user":"-","method":"GET","path":"/item/sports/2511","code":200,"referer":"http://www.google.com/search?ie=UTF-8&q=google&sclient=psy-ab&q=Sports+Electronics&oq=Sports+Electronics&aq=f&aqi=g-vL1&aql=&pbx=1&bav=on.2,or.r_gc.r_pw.r_qf.,cf.osb&biw=3994&bih=421","size":95,"agent":"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.7 (KHTML, like Gecko) Chrome/16.0.912.77 Safari/535.7","time":1353541928}',
15
+ '{"host":"224.225.147.72","user":"-","method":"GET","path":"/category/electronics","code":200,"referer":"-","size":43,"agent":"Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0)","time":1353541927}',
16
+ '{"host":"172.75.186.56","user":"-","method":"GET","path":"/category/jewelry","code":200,"referer":"-","size":79,"agent":"Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)","time":1353541925}',
17
+ ]
18
+ end
19
+
20
+ let :io do
21
+ StringIO.new(lines.join("\n"))
22
+ end
23
+
24
+ it 'initialize' do
25
+ if io.respond_to?(:external_encoding)
26
+ ee = io.external_encoding
27
+ end
28
+ FileReader::LineReader.new(io, error, {})
29
+ if io.respond_to?(:external_encoding)
30
+ io.external_encoding.should == ee
31
+ end
32
+ end
33
+
34
+ it 'initialize with specifid encoding' do
35
+ if io.respond_to?(:external_encoding)
36
+ ee = io.external_encoding
37
+ end
38
+ FileReader::LineReader.new(io, error, {:encoding => 'utf-8'})
39
+ if io.respond_to?(:external_encoding)
40
+ io.external_encoding.should_not == ee
41
+ io.external_encoding.should == Encoding.find('utf-8')
42
+ end
43
+ end
44
+
45
+ context 'after initialization' do
46
+ let :reader do
47
+ FileReader::LineReader.new(io, error, {})
48
+ end
49
+
50
+ it 'forward_row returns one line' do
51
+ reader.forward_row.should == lines[0]
52
+ end
53
+
54
+ # TODO: integrate with following shared_examples_for
55
+ it 'feeds all lines' do
56
+ begin
57
+ i = 0
58
+ while line = reader.forward_row
59
+ line.should == lines[i]
60
+ i += 1
61
+ end
62
+ rescue RSpec::Expectations::ExpectationNotMetError => e
63
+ fail
64
+ rescue
65
+ io.eof?.should be_true
66
+ end
67
+ end
68
+
69
+ shared_examples_for 'parser iterates all' do |step|
70
+ step = step || 1
71
+
72
+ it 'feeds all' do
73
+ begin
74
+ i = 0
75
+ while line = parser.forward
76
+ line.should == get_expected.call(i)
77
+ i += step
78
+ end
79
+ rescue RSpec::Expectations::ExpectationNotMetError => e
80
+ fail
81
+ rescue
82
+ io.eof?.should be_true
83
+ end
84
+ end
85
+ end
86
+
87
+ describe FileReader::JSONParser do
88
+ it 'initialize with LineReader' do
89
+ parser = FileReader::JSONParser.new(reader, error, {})
90
+ parser.should_not be_nil
91
+ end
92
+
93
+ context 'after initialization' do
94
+ let :parser do
95
+ FileReader::JSONParser.new(reader, error, {})
96
+ end
97
+
98
+ it 'forward returns one line' do
99
+ parser.forward.should == JSON.parse(lines[0])
100
+ end
101
+
102
+ let :get_expected do
103
+ lambda { |i| JSON.parse(lines[i]) }
104
+ end
105
+
106
+ it_should_behave_like 'parser iterates all'
107
+
108
+ context 'with broken line' do
109
+ let :lines do
110
+ [
111
+ '{"host":"128.216.140.97","user":"-","method":"GET","path":"/item/sports/2511","code":200,"referer":"http://www.google.com/search?ie=UTF-8&q=google&sclient=psy-ab&q=Sports+Electronics&oq=Sports+Electronics&aq=f&aqi=g-vL1&aql=&pbx=1&bav=on.2,or.r_gc.r_pw.r_qf.,cf.osb&biw=3994&bih=421","size":95,"agent":"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.7 (KHTML, like Gecko) Chrome/16.0.912.77 Safari/535.7","time":1353541928}',
112
+ '{This is invalid as a JSON}',
113
+ '{"host":"172.75.186.56","user":"-","method":"GET","path":"/category/jewelry","code":200,"referer":"-","size":79,"agent":"Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)","time":1353541925}',
114
+ ]
115
+ end
116
+
117
+ let :error_pattern do
118
+ /^invalid json format/
119
+ end
120
+
121
+ it_should_behave_like 'parser iterates all', 2
122
+ end
123
+ end
124
+ end
125
+
126
+ [',', "\t"].each { |pattern|
127
+ describe FileReader::DelimiterParser do
128
+ let :lines do
129
+ [
130
+ ['hoge', '12345', Time.now.to_s].join(pattern),
131
+ ['foo', '34567', Time.now.to_s].join(pattern),
132
+ ['piyo', '56789', Time.now.to_s].join(pattern),
133
+ ]
134
+ end
135
+
136
+ it "initialize with LineReader and #{pattern} delimiter" do
137
+ parser = FileReader::DelimiterParser.new(reader, error, {:delimiter_expr => Regexp.new(pattern)})
138
+ parser.should_not be_nil
139
+ end
140
+
141
+ context 'after initialization' do
142
+ let :parser do
143
+ FileReader::DelimiterParser.new(reader, error, {:delimiter_expr => Regexp.new(pattern)})
144
+ end
145
+
146
+ it 'forward returns one line' do
147
+ parser.forward.should == lines[0].split(pattern)
148
+ end
149
+
150
+ let :get_expected do
151
+ lambda { |i| lines[i].split(pattern) }
152
+ end
153
+
154
+ it_should_behave_like 'parser iterates all'
155
+ end
156
+ end
157
+ }
158
+
159
+ {
160
+ FileReader::ApacheParser => [
161
+ [
162
+ '58.83.188.60 - - [23/Oct/2011:08:15:46 -0700] "HEAD / HTTP/1.0" 200 277 "-" "-"',
163
+ '127.0.0.1 - - [23/Oct/2011:08:20:01 -0700] "GET / HTTP/1.0" 200 492 "-" "Wget/1.12 (linux-gnu)"',
164
+ '68.64.37.100 - - [24/Oct/2011:01:48:54 -0700] "GET /phpMyAdmin/scripts/setup.php HTTP/1.1" 404 480 "-" "ZmEu"'
165
+ ],
166
+ [
167
+ ["58.83.188.60", "-", "23/Oct/2011:08:15:46 -0700", "HEAD", "/", "200", "277", "-", "-"],
168
+ ["127.0.0.1", "-", "23/Oct/2011:08:20:01 -0700", "GET", "/", "200", "492", "-", "Wget/1.12 (linux-gnu)"],
169
+ ["68.64.37.100", "-", "24/Oct/2011:01:48:54 -0700", "GET", "/phpMyAdmin/scripts/setup.php", "404", "480", "-", "ZmEu"],
170
+ ]
171
+ ],
172
+ FileReader::SyslogParser => [
173
+ [
174
+ 'Dec 20 12:41:44 localhost kernel: [4843680.692840] e1000e: eth2 NIC Link is Down',
175
+ 'Dec 20 12:41:44 localhost kernel: [4843680.734466] br0: port 1(eth2) entering disabled state',
176
+ 'Dec 22 10:42:41 localhost kernel: [5009052.220155] zsh[25578]: segfault at 7fe849460260 ip 00007fe8474fd74d sp 00007fffe3bdf0e0 error 4 in libc-2.11.1.so[7fe847486000+17a000]',
177
+ ],
178
+ [
179
+ ["Dec 20 12:41:44", "localhost", "kernel", nil, "[4843680.692840] e1000e: eth2 NIC Link is Down"],
180
+ ["Dec 20 12:41:44", "localhost", "kernel", nil, "[4843680.734466] br0: port 1(eth2) entering disabled state"],
181
+ ["Dec 22 10:42:41", "localhost", "kernel", nil, "[5009052.220155] zsh[25578]: segfault at 7fe849460260 ip 00007fe8474fd74d sp 00007fffe3bdf0e0 error 4 in libc-2.11.1.so[7fe847486000+17a000]"],
182
+ ]
183
+ ]
184
+ }.each_pair { |parser_class, (input, output)|
185
+ describe parser_class do
186
+ let :lines do
187
+ input
188
+ end
189
+
190
+ it "initialize with LineReader" do
191
+ parser = parser_class.new(reader, error, {})
192
+ parser.should_not be_nil
193
+ end
194
+
195
+ context 'after initialization' do
196
+ let :parser do
197
+ parser_class.new(reader, error, {})
198
+ end
199
+
200
+ it 'forward returns one line' do
201
+ parser.forward.should == output[0]
202
+ end
203
+
204
+ let :get_expected do
205
+ lambda { |i| output[i] }
206
+ end
207
+
208
+ it_should_behave_like 'parser iterates all'
209
+
210
+ context 'with broken line' do
211
+ let :lines do
212
+ broken = input.dup
213
+ broken[1] = "Raw text sometimes is broken!"
214
+ broken
215
+ end
216
+
217
+ let :error_pattern do
218
+ /^invalid #{parser.instance_variable_get(:@format)} format/
219
+ end
220
+
221
+ it_should_behave_like 'parser iterates all', 2
222
+ end
223
+ end
224
+ end
225
+ }
226
+ end
227
+ end
@@ -0,0 +1,120 @@
1
+ # -*- coding: utf-8 -*-
2
+ require 'spec_helper'
3
+ require 'file_reader/shared_context'
4
+
5
+ require 'stringio'
6
+ require 'msgpack'
7
+ require 'td/file_reader'
8
+
9
+ include TreasureData
10
+
11
+ describe 'FileReader parsing readers' do
12
+ include_context 'error_proc'
13
+
14
+ shared_examples_for 'forward basics' do
15
+ it 'forward returns one data' do
16
+ reader.forward.should == dataset[0]
17
+ end
18
+
19
+ it 'feeds all dataset' do
20
+ begin
21
+ i = 0
22
+ while line = reader.forward
23
+ line.should == dataset[i]
24
+ i += 1
25
+ end
26
+ rescue RSpec::Expectations::ExpectationNotMetError => e
27
+ fail
28
+ rescue => e
29
+ io.eof?.should be_true
30
+ end
31
+ end
32
+ end
33
+
34
+ describe FileReader::MessagePackParsingReader do
35
+ let :dataset do
36
+ [
37
+ {'name' => 'k', 'num' => 12345, 'time' => Time.now.to_i},
38
+ {'name' => 's', 'num' => 34567, 'time' => Time.now.to_i},
39
+ {'name' => 'n', 'num' => 56789, 'time' => Time.now.to_i},
40
+ ]
41
+ end
42
+
43
+ let :io do
44
+ StringIO.new(dataset.map(&:to_msgpack).join(""))
45
+ end
46
+
47
+ it 'initialize' do
48
+ reader = FileReader::MessagePackParsingReader.new(io, error, {})
49
+ reader.should_not be_nil
50
+ end
51
+
52
+ context 'after initialization' do
53
+ let :reader do
54
+ FileReader::MessagePackParsingReader.new(io, error, {})
55
+ end
56
+
57
+ it_should_behave_like 'forward basics'
58
+ end
59
+ end
60
+
61
+ test_time = Time.now.to_i
62
+ {
63
+ 'csv' => [
64
+ {:delimiter_expr => ',', :quote_char => '"', :encoding => 'utf-8'},
65
+ [
66
+ %!k,123,"fo\no",true,#{test_time}!,
67
+ %!s,456,"T,D",false,#{test_time}!,
68
+ %!n,789,"ba""z",false,#{test_time}!,
69
+ ],
70
+ [
71
+ %W(k 123 fo\no true #{test_time}),
72
+ %W(s 456 T,D false #{test_time}),
73
+ %W(n 789 ba\"z false #{test_time}),
74
+ ]
75
+ ],
76
+ 'tsv' => [
77
+ {:delimiter_expr => "\t"},
78
+ [
79
+ %!k\t123\t"fo\no"\ttrue\t#{test_time}!,
80
+ %!s\t456\t"b,ar"\tfalse\t#{test_time}!,
81
+ %!n\t789\t"ba\tz"\tfalse\t#{test_time}!,
82
+ ],
83
+ [
84
+ %W(k 123 fo\no true #{test_time}),
85
+ %W(s 456 b,ar false #{test_time}),
86
+ %W(n 789 ba\tz false #{test_time}),
87
+ ]
88
+ ]
89
+ }.each_pair { |format, (opts, input, output)|
90
+ describe FileReader::SeparatedValueParsingReader do
91
+ let :dataset do
92
+ output
93
+ end
94
+
95
+ let :lines do
96
+ input
97
+ end
98
+
99
+ let :io do
100
+ StringIO.new(lines.join($/))
101
+ end
102
+
103
+ it "initialize #{format}" do
104
+ reader = FileReader::SeparatedValueParsingReader.new(io, error, opts)
105
+ reader.should_not be_nil
106
+ end
107
+
108
+ context "after #{format} initialization" do
109
+ let :reader do
110
+ reader = FileReader::SeparatedValueParsingReader.new(io, error, opts)
111
+ end
112
+
113
+ it_should_behave_like 'forward basics'
114
+
115
+ context "broken encodings" do
116
+ end
117
+ end
118
+ end
119
+ }
120
+ end
@@ -0,0 +1,9 @@
1
+ require 'rspec'
2
+
3
+ shared_context 'error_proc' do
4
+ let :error do
5
+ Proc.new { |reason, data|
6
+ reason.should match(error_pattern)
7
+ }
8
+ end
9
+ end