td 0.10.65 → 0.10.66
Sign up to get free protection for your applications and to get access to all the features.
- data/ChangeLog +7 -0
- data/lib/td/command/import.rb +3 -3
- data/lib/td/command/job.rb +1 -0
- data/lib/td/command/query.rb +5 -1
- data/lib/td/command/sched.rb +12 -1
- data/lib/td/file_reader.rb +138 -50
- data/lib/td/version.rb +1 -1
- data/spec/file_reader/filter_spec.rb +236 -0
- data/spec/file_reader/io_filter_spec.rb +96 -0
- data/spec/file_reader/line_reader_spec.rb +227 -0
- data/spec/file_reader/parsing_reader_spec.rb +120 -0
- data/spec/file_reader/shared_context.rb +9 -0
- data/spec/file_reader_spec.rb +401 -0
- data/spec/spec_helper.rb +16 -0
- data/td.gemspec +3 -1
- metadata +52 -5
@@ -0,0 +1,96 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
require 'file_reader/shared_context'
|
3
|
+
|
4
|
+
require 'stringio'
|
5
|
+
require 'td/file_reader'
|
6
|
+
|
7
|
+
include TreasureData
|
8
|
+
|
9
|
+
describe 'FileReader io filters' do
|
10
|
+
include_context 'error_proc'
|
11
|
+
|
12
|
+
describe FileReader::DecompressIOFilter do
|
13
|
+
let :lines do
|
14
|
+
[
|
15
|
+
'{"host":"128.216.140.97","user":"-","method":"GET","path":"/item/sports/2511","code":200,"size":95,"time":1353541928}',
|
16
|
+
'{"host":"224.225.147.72","user":"-","method":"GET","path":"/category/electronics","code":200,"size":43,"time":1353541927}',
|
17
|
+
'{"host":"172.75.186.56","user":"-","method":"GET","path":"/category/jewelry","code":200,"size":79,"time":1353541925}',
|
18
|
+
]
|
19
|
+
end
|
20
|
+
|
21
|
+
let :io do
|
22
|
+
StringIO.new(lines.join("\n"))
|
23
|
+
end
|
24
|
+
|
25
|
+
let :gzipped_io do
|
26
|
+
require 'zlib'
|
27
|
+
|
28
|
+
io = StringIO.new('', 'w+')
|
29
|
+
gz = Zlib::GzipWriter.new(io)
|
30
|
+
gz.write(lines.join("\n"))
|
31
|
+
gz.close
|
32
|
+
StringIO.new(io.string)
|
33
|
+
end
|
34
|
+
|
35
|
+
it "can't filter with unknown compression" do
|
36
|
+
expect {
|
37
|
+
FileReader::DecompressIOFilter.filter(io, error, :compress => 'oreore')
|
38
|
+
}.to raise_error(Exception, /unknown compression/)
|
39
|
+
end
|
40
|
+
|
41
|
+
describe 'gzip' do
|
42
|
+
it "can't be wrapped with un-gzipped io" do
|
43
|
+
expect {
|
44
|
+
FileReader::DecompressIOFilter.filter(io, error, :compress => 'gzip')
|
45
|
+
}.to raise_error(Zlib::GzipFile::Error)
|
46
|
+
end
|
47
|
+
|
48
|
+
it 'returns Zlib::GzipReader with :gzip' do
|
49
|
+
wrapped = FileReader::DecompressIOFilter.filter(gzipped_io, error, :compress => 'gzip')
|
50
|
+
wrapped.should be_an_instance_of(Zlib::GzipReader)
|
51
|
+
end
|
52
|
+
|
53
|
+
it 'returns Zlib::GzipReader with auto detection' do
|
54
|
+
wrapped = FileReader::DecompressIOFilter.filter(gzipped_io, error, {})
|
55
|
+
wrapped.should be_an_instance_of(Zlib::GzipReader)
|
56
|
+
end
|
57
|
+
|
58
|
+
context 'after initialization' do
|
59
|
+
[{:compress => 'gzip'}, {}].each { |opts|
|
60
|
+
let :reader do
|
61
|
+
wrapped = FileReader::DecompressIOFilter.filter(gzipped_io, error, opts)
|
62
|
+
FileReader::LineReader.new(wrapped, error, {})
|
63
|
+
end
|
64
|
+
|
65
|
+
it 'forward_row returns one line' do
|
66
|
+
reader.forward_row.should == lines[0]
|
67
|
+
end
|
68
|
+
|
69
|
+
it 'feeds all lines' do
|
70
|
+
begin
|
71
|
+
i = 0
|
72
|
+
while line = reader.forward_row
|
73
|
+
line.should == lines[i]
|
74
|
+
i += 1
|
75
|
+
end
|
76
|
+
rescue
|
77
|
+
gzipped_io.eof?.should be_true
|
78
|
+
end
|
79
|
+
end
|
80
|
+
}
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|
84
|
+
describe 'plain' do
|
85
|
+
it 'returns passed io with :plain' do
|
86
|
+
wrapped = FileReader::DecompressIOFilter.filter(io, error, :compress => 'plain')
|
87
|
+
wrapped.should be_an_instance_of(StringIO)
|
88
|
+
end
|
89
|
+
|
90
|
+
it 'returns passed io with auto detection' do
|
91
|
+
wrapped = FileReader::DecompressIOFilter.filter(io, error, {})
|
92
|
+
wrapped.should be_an_instance_of(StringIO)
|
93
|
+
end
|
94
|
+
end
|
95
|
+
end
|
96
|
+
end
|
@@ -0,0 +1,227 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
require 'file_reader/shared_context'
|
3
|
+
|
4
|
+
require 'stringio'
|
5
|
+
require 'td/file_reader'
|
6
|
+
|
7
|
+
include TreasureData
|
8
|
+
|
9
|
+
describe FileReader::LineReader do
|
10
|
+
include_context 'error_proc'
|
11
|
+
|
12
|
+
let :lines do
|
13
|
+
[
|
14
|
+
'{"host":"128.216.140.97","user":"-","method":"GET","path":"/item/sports/2511","code":200,"referer":"http://www.google.com/search?ie=UTF-8&q=google&sclient=psy-ab&q=Sports+Electronics&oq=Sports+Electronics&aq=f&aqi=g-vL1&aql=&pbx=1&bav=on.2,or.r_gc.r_pw.r_qf.,cf.osb&biw=3994&bih=421","size":95,"agent":"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.7 (KHTML, like Gecko) Chrome/16.0.912.77 Safari/535.7","time":1353541928}',
|
15
|
+
'{"host":"224.225.147.72","user":"-","method":"GET","path":"/category/electronics","code":200,"referer":"-","size":43,"agent":"Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0)","time":1353541927}',
|
16
|
+
'{"host":"172.75.186.56","user":"-","method":"GET","path":"/category/jewelry","code":200,"referer":"-","size":79,"agent":"Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)","time":1353541925}',
|
17
|
+
]
|
18
|
+
end
|
19
|
+
|
20
|
+
let :io do
|
21
|
+
StringIO.new(lines.join("\n"))
|
22
|
+
end
|
23
|
+
|
24
|
+
it 'initialize' do
|
25
|
+
if io.respond_to?(:external_encoding)
|
26
|
+
ee = io.external_encoding
|
27
|
+
end
|
28
|
+
FileReader::LineReader.new(io, error, {})
|
29
|
+
if io.respond_to?(:external_encoding)
|
30
|
+
io.external_encoding.should == ee
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
it 'initialize with specifid encoding' do
|
35
|
+
if io.respond_to?(:external_encoding)
|
36
|
+
ee = io.external_encoding
|
37
|
+
end
|
38
|
+
FileReader::LineReader.new(io, error, {:encoding => 'utf-8'})
|
39
|
+
if io.respond_to?(:external_encoding)
|
40
|
+
io.external_encoding.should_not == ee
|
41
|
+
io.external_encoding.should == Encoding.find('utf-8')
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
context 'after initialization' do
|
46
|
+
let :reader do
|
47
|
+
FileReader::LineReader.new(io, error, {})
|
48
|
+
end
|
49
|
+
|
50
|
+
it 'forward_row returns one line' do
|
51
|
+
reader.forward_row.should == lines[0]
|
52
|
+
end
|
53
|
+
|
54
|
+
# TODO: integrate with following shared_examples_for
|
55
|
+
it 'feeds all lines' do
|
56
|
+
begin
|
57
|
+
i = 0
|
58
|
+
while line = reader.forward_row
|
59
|
+
line.should == lines[i]
|
60
|
+
i += 1
|
61
|
+
end
|
62
|
+
rescue RSpec::Expectations::ExpectationNotMetError => e
|
63
|
+
fail
|
64
|
+
rescue
|
65
|
+
io.eof?.should be_true
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
shared_examples_for 'parser iterates all' do |step|
|
70
|
+
step = step || 1
|
71
|
+
|
72
|
+
it 'feeds all' do
|
73
|
+
begin
|
74
|
+
i = 0
|
75
|
+
while line = parser.forward
|
76
|
+
line.should == get_expected.call(i)
|
77
|
+
i += step
|
78
|
+
end
|
79
|
+
rescue RSpec::Expectations::ExpectationNotMetError => e
|
80
|
+
fail
|
81
|
+
rescue
|
82
|
+
io.eof?.should be_true
|
83
|
+
end
|
84
|
+
end
|
85
|
+
end
|
86
|
+
|
87
|
+
describe FileReader::JSONParser do
|
88
|
+
it 'initialize with LineReader' do
|
89
|
+
parser = FileReader::JSONParser.new(reader, error, {})
|
90
|
+
parser.should_not be_nil
|
91
|
+
end
|
92
|
+
|
93
|
+
context 'after initialization' do
|
94
|
+
let :parser do
|
95
|
+
FileReader::JSONParser.new(reader, error, {})
|
96
|
+
end
|
97
|
+
|
98
|
+
it 'forward returns one line' do
|
99
|
+
parser.forward.should == JSON.parse(lines[0])
|
100
|
+
end
|
101
|
+
|
102
|
+
let :get_expected do
|
103
|
+
lambda { |i| JSON.parse(lines[i]) }
|
104
|
+
end
|
105
|
+
|
106
|
+
it_should_behave_like 'parser iterates all'
|
107
|
+
|
108
|
+
context 'with broken line' do
|
109
|
+
let :lines do
|
110
|
+
[
|
111
|
+
'{"host":"128.216.140.97","user":"-","method":"GET","path":"/item/sports/2511","code":200,"referer":"http://www.google.com/search?ie=UTF-8&q=google&sclient=psy-ab&q=Sports+Electronics&oq=Sports+Electronics&aq=f&aqi=g-vL1&aql=&pbx=1&bav=on.2,or.r_gc.r_pw.r_qf.,cf.osb&biw=3994&bih=421","size":95,"agent":"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.7 (KHTML, like Gecko) Chrome/16.0.912.77 Safari/535.7","time":1353541928}',
|
112
|
+
'{This is invalid as a JSON}',
|
113
|
+
'{"host":"172.75.186.56","user":"-","method":"GET","path":"/category/jewelry","code":200,"referer":"-","size":79,"agent":"Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)","time":1353541925}',
|
114
|
+
]
|
115
|
+
end
|
116
|
+
|
117
|
+
let :error_pattern do
|
118
|
+
/^invalid json format/
|
119
|
+
end
|
120
|
+
|
121
|
+
it_should_behave_like 'parser iterates all', 2
|
122
|
+
end
|
123
|
+
end
|
124
|
+
end
|
125
|
+
|
126
|
+
[',', "\t"].each { |pattern|
|
127
|
+
describe FileReader::DelimiterParser do
|
128
|
+
let :lines do
|
129
|
+
[
|
130
|
+
['hoge', '12345', Time.now.to_s].join(pattern),
|
131
|
+
['foo', '34567', Time.now.to_s].join(pattern),
|
132
|
+
['piyo', '56789', Time.now.to_s].join(pattern),
|
133
|
+
]
|
134
|
+
end
|
135
|
+
|
136
|
+
it "initialize with LineReader and #{pattern} delimiter" do
|
137
|
+
parser = FileReader::DelimiterParser.new(reader, error, {:delimiter_expr => Regexp.new(pattern)})
|
138
|
+
parser.should_not be_nil
|
139
|
+
end
|
140
|
+
|
141
|
+
context 'after initialization' do
|
142
|
+
let :parser do
|
143
|
+
FileReader::DelimiterParser.new(reader, error, {:delimiter_expr => Regexp.new(pattern)})
|
144
|
+
end
|
145
|
+
|
146
|
+
it 'forward returns one line' do
|
147
|
+
parser.forward.should == lines[0].split(pattern)
|
148
|
+
end
|
149
|
+
|
150
|
+
let :get_expected do
|
151
|
+
lambda { |i| lines[i].split(pattern) }
|
152
|
+
end
|
153
|
+
|
154
|
+
it_should_behave_like 'parser iterates all'
|
155
|
+
end
|
156
|
+
end
|
157
|
+
}
|
158
|
+
|
159
|
+
{
|
160
|
+
FileReader::ApacheParser => [
|
161
|
+
[
|
162
|
+
'58.83.188.60 - - [23/Oct/2011:08:15:46 -0700] "HEAD / HTTP/1.0" 200 277 "-" "-"',
|
163
|
+
'127.0.0.1 - - [23/Oct/2011:08:20:01 -0700] "GET / HTTP/1.0" 200 492 "-" "Wget/1.12 (linux-gnu)"',
|
164
|
+
'68.64.37.100 - - [24/Oct/2011:01:48:54 -0700] "GET /phpMyAdmin/scripts/setup.php HTTP/1.1" 404 480 "-" "ZmEu"'
|
165
|
+
],
|
166
|
+
[
|
167
|
+
["58.83.188.60", "-", "23/Oct/2011:08:15:46 -0700", "HEAD", "/", "200", "277", "-", "-"],
|
168
|
+
["127.0.0.1", "-", "23/Oct/2011:08:20:01 -0700", "GET", "/", "200", "492", "-", "Wget/1.12 (linux-gnu)"],
|
169
|
+
["68.64.37.100", "-", "24/Oct/2011:01:48:54 -0700", "GET", "/phpMyAdmin/scripts/setup.php", "404", "480", "-", "ZmEu"],
|
170
|
+
]
|
171
|
+
],
|
172
|
+
FileReader::SyslogParser => [
|
173
|
+
[
|
174
|
+
'Dec 20 12:41:44 localhost kernel: [4843680.692840] e1000e: eth2 NIC Link is Down',
|
175
|
+
'Dec 20 12:41:44 localhost kernel: [4843680.734466] br0: port 1(eth2) entering disabled state',
|
176
|
+
'Dec 22 10:42:41 localhost kernel: [5009052.220155] zsh[25578]: segfault at 7fe849460260 ip 00007fe8474fd74d sp 00007fffe3bdf0e0 error 4 in libc-2.11.1.so[7fe847486000+17a000]',
|
177
|
+
],
|
178
|
+
[
|
179
|
+
["Dec 20 12:41:44", "localhost", "kernel", nil, "[4843680.692840] e1000e: eth2 NIC Link is Down"],
|
180
|
+
["Dec 20 12:41:44", "localhost", "kernel", nil, "[4843680.734466] br0: port 1(eth2) entering disabled state"],
|
181
|
+
["Dec 22 10:42:41", "localhost", "kernel", nil, "[5009052.220155] zsh[25578]: segfault at 7fe849460260 ip 00007fe8474fd74d sp 00007fffe3bdf0e0 error 4 in libc-2.11.1.so[7fe847486000+17a000]"],
|
182
|
+
]
|
183
|
+
]
|
184
|
+
}.each_pair { |parser_class, (input, output)|
|
185
|
+
describe parser_class do
|
186
|
+
let :lines do
|
187
|
+
input
|
188
|
+
end
|
189
|
+
|
190
|
+
it "initialize with LineReader" do
|
191
|
+
parser = parser_class.new(reader, error, {})
|
192
|
+
parser.should_not be_nil
|
193
|
+
end
|
194
|
+
|
195
|
+
context 'after initialization' do
|
196
|
+
let :parser do
|
197
|
+
parser_class.new(reader, error, {})
|
198
|
+
end
|
199
|
+
|
200
|
+
it 'forward returns one line' do
|
201
|
+
parser.forward.should == output[0]
|
202
|
+
end
|
203
|
+
|
204
|
+
let :get_expected do
|
205
|
+
lambda { |i| output[i] }
|
206
|
+
end
|
207
|
+
|
208
|
+
it_should_behave_like 'parser iterates all'
|
209
|
+
|
210
|
+
context 'with broken line' do
|
211
|
+
let :lines do
|
212
|
+
broken = input.dup
|
213
|
+
broken[1] = "Raw text sometimes is broken!"
|
214
|
+
broken
|
215
|
+
end
|
216
|
+
|
217
|
+
let :error_pattern do
|
218
|
+
/^invalid #{parser.instance_variable_get(:@format)} format/
|
219
|
+
end
|
220
|
+
|
221
|
+
it_should_behave_like 'parser iterates all', 2
|
222
|
+
end
|
223
|
+
end
|
224
|
+
end
|
225
|
+
}
|
226
|
+
end
|
227
|
+
end
|
@@ -0,0 +1,120 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
require 'spec_helper'
|
3
|
+
require 'file_reader/shared_context'
|
4
|
+
|
5
|
+
require 'stringio'
|
6
|
+
require 'msgpack'
|
7
|
+
require 'td/file_reader'
|
8
|
+
|
9
|
+
include TreasureData
|
10
|
+
|
11
|
+
describe 'FileReader parsing readers' do
|
12
|
+
include_context 'error_proc'
|
13
|
+
|
14
|
+
shared_examples_for 'forward basics' do
|
15
|
+
it 'forward returns one data' do
|
16
|
+
reader.forward.should == dataset[0]
|
17
|
+
end
|
18
|
+
|
19
|
+
it 'feeds all dataset' do
|
20
|
+
begin
|
21
|
+
i = 0
|
22
|
+
while line = reader.forward
|
23
|
+
line.should == dataset[i]
|
24
|
+
i += 1
|
25
|
+
end
|
26
|
+
rescue RSpec::Expectations::ExpectationNotMetError => e
|
27
|
+
fail
|
28
|
+
rescue => e
|
29
|
+
io.eof?.should be_true
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
describe FileReader::MessagePackParsingReader do
|
35
|
+
let :dataset do
|
36
|
+
[
|
37
|
+
{'name' => 'k', 'num' => 12345, 'time' => Time.now.to_i},
|
38
|
+
{'name' => 's', 'num' => 34567, 'time' => Time.now.to_i},
|
39
|
+
{'name' => 'n', 'num' => 56789, 'time' => Time.now.to_i},
|
40
|
+
]
|
41
|
+
end
|
42
|
+
|
43
|
+
let :io do
|
44
|
+
StringIO.new(dataset.map(&:to_msgpack).join(""))
|
45
|
+
end
|
46
|
+
|
47
|
+
it 'initialize' do
|
48
|
+
reader = FileReader::MessagePackParsingReader.new(io, error, {})
|
49
|
+
reader.should_not be_nil
|
50
|
+
end
|
51
|
+
|
52
|
+
context 'after initialization' do
|
53
|
+
let :reader do
|
54
|
+
FileReader::MessagePackParsingReader.new(io, error, {})
|
55
|
+
end
|
56
|
+
|
57
|
+
it_should_behave_like 'forward basics'
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
test_time = Time.now.to_i
|
62
|
+
{
|
63
|
+
'csv' => [
|
64
|
+
{:delimiter_expr => ',', :quote_char => '"', :encoding => 'utf-8'},
|
65
|
+
[
|
66
|
+
%!k,123,"fo\no",true,#{test_time}!,
|
67
|
+
%!s,456,"T,D",false,#{test_time}!,
|
68
|
+
%!n,789,"ba""z",false,#{test_time}!,
|
69
|
+
],
|
70
|
+
[
|
71
|
+
%W(k 123 fo\no true #{test_time}),
|
72
|
+
%W(s 456 T,D false #{test_time}),
|
73
|
+
%W(n 789 ba\"z false #{test_time}),
|
74
|
+
]
|
75
|
+
],
|
76
|
+
'tsv' => [
|
77
|
+
{:delimiter_expr => "\t"},
|
78
|
+
[
|
79
|
+
%!k\t123\t"fo\no"\ttrue\t#{test_time}!,
|
80
|
+
%!s\t456\t"b,ar"\tfalse\t#{test_time}!,
|
81
|
+
%!n\t789\t"ba\tz"\tfalse\t#{test_time}!,
|
82
|
+
],
|
83
|
+
[
|
84
|
+
%W(k 123 fo\no true #{test_time}),
|
85
|
+
%W(s 456 b,ar false #{test_time}),
|
86
|
+
%W(n 789 ba\tz false #{test_time}),
|
87
|
+
]
|
88
|
+
]
|
89
|
+
}.each_pair { |format, (opts, input, output)|
|
90
|
+
describe FileReader::SeparatedValueParsingReader do
|
91
|
+
let :dataset do
|
92
|
+
output
|
93
|
+
end
|
94
|
+
|
95
|
+
let :lines do
|
96
|
+
input
|
97
|
+
end
|
98
|
+
|
99
|
+
let :io do
|
100
|
+
StringIO.new(lines.join($/))
|
101
|
+
end
|
102
|
+
|
103
|
+
it "initialize #{format}" do
|
104
|
+
reader = FileReader::SeparatedValueParsingReader.new(io, error, opts)
|
105
|
+
reader.should_not be_nil
|
106
|
+
end
|
107
|
+
|
108
|
+
context "after #{format} initialization" do
|
109
|
+
let :reader do
|
110
|
+
reader = FileReader::SeparatedValueParsingReader.new(io, error, opts)
|
111
|
+
end
|
112
|
+
|
113
|
+
it_should_behave_like 'forward basics'
|
114
|
+
|
115
|
+
context "broken encodings" do
|
116
|
+
end
|
117
|
+
end
|
118
|
+
end
|
119
|
+
}
|
120
|
+
end
|