td 0.10.65 → 0.10.66
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/ChangeLog +7 -0
- data/lib/td/command/import.rb +3 -3
- data/lib/td/command/job.rb +1 -0
- data/lib/td/command/query.rb +5 -1
- data/lib/td/command/sched.rb +12 -1
- data/lib/td/file_reader.rb +138 -50
- data/lib/td/version.rb +1 -1
- data/spec/file_reader/filter_spec.rb +236 -0
- data/spec/file_reader/io_filter_spec.rb +96 -0
- data/spec/file_reader/line_reader_spec.rb +227 -0
- data/spec/file_reader/parsing_reader_spec.rb +120 -0
- data/spec/file_reader/shared_context.rb +9 -0
- data/spec/file_reader_spec.rb +401 -0
- data/spec/spec_helper.rb +16 -0
- data/td.gemspec +3 -1
- metadata +52 -5
@@ -0,0 +1,96 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
require 'file_reader/shared_context'
|
3
|
+
|
4
|
+
require 'stringio'
|
5
|
+
require 'td/file_reader'
|
6
|
+
|
7
|
+
include TreasureData
|
8
|
+
|
9
|
+
describe 'FileReader io filters' do
|
10
|
+
include_context 'error_proc'
|
11
|
+
|
12
|
+
describe FileReader::DecompressIOFilter do
|
13
|
+
let :lines do
|
14
|
+
[
|
15
|
+
'{"host":"128.216.140.97","user":"-","method":"GET","path":"/item/sports/2511","code":200,"size":95,"time":1353541928}',
|
16
|
+
'{"host":"224.225.147.72","user":"-","method":"GET","path":"/category/electronics","code":200,"size":43,"time":1353541927}',
|
17
|
+
'{"host":"172.75.186.56","user":"-","method":"GET","path":"/category/jewelry","code":200,"size":79,"time":1353541925}',
|
18
|
+
]
|
19
|
+
end
|
20
|
+
|
21
|
+
let :io do
|
22
|
+
StringIO.new(lines.join("\n"))
|
23
|
+
end
|
24
|
+
|
25
|
+
let :gzipped_io do
|
26
|
+
require 'zlib'
|
27
|
+
|
28
|
+
io = StringIO.new('', 'w+')
|
29
|
+
gz = Zlib::GzipWriter.new(io)
|
30
|
+
gz.write(lines.join("\n"))
|
31
|
+
gz.close
|
32
|
+
StringIO.new(io.string)
|
33
|
+
end
|
34
|
+
|
35
|
+
it "can't filter with unknown compression" do
|
36
|
+
expect {
|
37
|
+
FileReader::DecompressIOFilter.filter(io, error, :compress => 'oreore')
|
38
|
+
}.to raise_error(Exception, /unknown compression/)
|
39
|
+
end
|
40
|
+
|
41
|
+
describe 'gzip' do
|
42
|
+
it "can't be wrapped with un-gzipped io" do
|
43
|
+
expect {
|
44
|
+
FileReader::DecompressIOFilter.filter(io, error, :compress => 'gzip')
|
45
|
+
}.to raise_error(Zlib::GzipFile::Error)
|
46
|
+
end
|
47
|
+
|
48
|
+
it 'returns Zlib::GzipReader with :gzip' do
|
49
|
+
wrapped = FileReader::DecompressIOFilter.filter(gzipped_io, error, :compress => 'gzip')
|
50
|
+
wrapped.should be_an_instance_of(Zlib::GzipReader)
|
51
|
+
end
|
52
|
+
|
53
|
+
it 'returns Zlib::GzipReader with auto detection' do
|
54
|
+
wrapped = FileReader::DecompressIOFilter.filter(gzipped_io, error, {})
|
55
|
+
wrapped.should be_an_instance_of(Zlib::GzipReader)
|
56
|
+
end
|
57
|
+
|
58
|
+
context 'after initialization' do
|
59
|
+
[{:compress => 'gzip'}, {}].each { |opts|
|
60
|
+
let :reader do
|
61
|
+
wrapped = FileReader::DecompressIOFilter.filter(gzipped_io, error, opts)
|
62
|
+
FileReader::LineReader.new(wrapped, error, {})
|
63
|
+
end
|
64
|
+
|
65
|
+
it 'forward_row returns one line' do
|
66
|
+
reader.forward_row.should == lines[0]
|
67
|
+
end
|
68
|
+
|
69
|
+
it 'feeds all lines' do
|
70
|
+
begin
|
71
|
+
i = 0
|
72
|
+
while line = reader.forward_row
|
73
|
+
line.should == lines[i]
|
74
|
+
i += 1
|
75
|
+
end
|
76
|
+
rescue
|
77
|
+
gzipped_io.eof?.should be_true
|
78
|
+
end
|
79
|
+
end
|
80
|
+
}
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|
84
|
+
describe 'plain' do
|
85
|
+
it 'returns passed io with :plain' do
|
86
|
+
wrapped = FileReader::DecompressIOFilter.filter(io, error, :compress => 'plain')
|
87
|
+
wrapped.should be_an_instance_of(StringIO)
|
88
|
+
end
|
89
|
+
|
90
|
+
it 'returns passed io with auto detection' do
|
91
|
+
wrapped = FileReader::DecompressIOFilter.filter(io, error, {})
|
92
|
+
wrapped.should be_an_instance_of(StringIO)
|
93
|
+
end
|
94
|
+
end
|
95
|
+
end
|
96
|
+
end
|
@@ -0,0 +1,227 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
require 'file_reader/shared_context'
|
3
|
+
|
4
|
+
require 'stringio'
|
5
|
+
require 'td/file_reader'
|
6
|
+
|
7
|
+
include TreasureData
|
8
|
+
|
9
|
+
describe FileReader::LineReader do
|
10
|
+
include_context 'error_proc'
|
11
|
+
|
12
|
+
let :lines do
|
13
|
+
[
|
14
|
+
'{"host":"128.216.140.97","user":"-","method":"GET","path":"/item/sports/2511","code":200,"referer":"http://www.google.com/search?ie=UTF-8&q=google&sclient=psy-ab&q=Sports+Electronics&oq=Sports+Electronics&aq=f&aqi=g-vL1&aql=&pbx=1&bav=on.2,or.r_gc.r_pw.r_qf.,cf.osb&biw=3994&bih=421","size":95,"agent":"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.7 (KHTML, like Gecko) Chrome/16.0.912.77 Safari/535.7","time":1353541928}',
|
15
|
+
'{"host":"224.225.147.72","user":"-","method":"GET","path":"/category/electronics","code":200,"referer":"-","size":43,"agent":"Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0)","time":1353541927}',
|
16
|
+
'{"host":"172.75.186.56","user":"-","method":"GET","path":"/category/jewelry","code":200,"referer":"-","size":79,"agent":"Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)","time":1353541925}',
|
17
|
+
]
|
18
|
+
end
|
19
|
+
|
20
|
+
let :io do
|
21
|
+
StringIO.new(lines.join("\n"))
|
22
|
+
end
|
23
|
+
|
24
|
+
it 'initialize' do
|
25
|
+
if io.respond_to?(:external_encoding)
|
26
|
+
ee = io.external_encoding
|
27
|
+
end
|
28
|
+
FileReader::LineReader.new(io, error, {})
|
29
|
+
if io.respond_to?(:external_encoding)
|
30
|
+
io.external_encoding.should == ee
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
it 'initialize with specifid encoding' do
|
35
|
+
if io.respond_to?(:external_encoding)
|
36
|
+
ee = io.external_encoding
|
37
|
+
end
|
38
|
+
FileReader::LineReader.new(io, error, {:encoding => 'utf-8'})
|
39
|
+
if io.respond_to?(:external_encoding)
|
40
|
+
io.external_encoding.should_not == ee
|
41
|
+
io.external_encoding.should == Encoding.find('utf-8')
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
context 'after initialization' do
|
46
|
+
let :reader do
|
47
|
+
FileReader::LineReader.new(io, error, {})
|
48
|
+
end
|
49
|
+
|
50
|
+
it 'forward_row returns one line' do
|
51
|
+
reader.forward_row.should == lines[0]
|
52
|
+
end
|
53
|
+
|
54
|
+
# TODO: integrate with following shared_examples_for
|
55
|
+
it 'feeds all lines' do
|
56
|
+
begin
|
57
|
+
i = 0
|
58
|
+
while line = reader.forward_row
|
59
|
+
line.should == lines[i]
|
60
|
+
i += 1
|
61
|
+
end
|
62
|
+
rescue RSpec::Expectations::ExpectationNotMetError => e
|
63
|
+
fail
|
64
|
+
rescue
|
65
|
+
io.eof?.should be_true
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
shared_examples_for 'parser iterates all' do |step|
|
70
|
+
step = step || 1
|
71
|
+
|
72
|
+
it 'feeds all' do
|
73
|
+
begin
|
74
|
+
i = 0
|
75
|
+
while line = parser.forward
|
76
|
+
line.should == get_expected.call(i)
|
77
|
+
i += step
|
78
|
+
end
|
79
|
+
rescue RSpec::Expectations::ExpectationNotMetError => e
|
80
|
+
fail
|
81
|
+
rescue
|
82
|
+
io.eof?.should be_true
|
83
|
+
end
|
84
|
+
end
|
85
|
+
end
|
86
|
+
|
87
|
+
describe FileReader::JSONParser do
|
88
|
+
it 'initialize with LineReader' do
|
89
|
+
parser = FileReader::JSONParser.new(reader, error, {})
|
90
|
+
parser.should_not be_nil
|
91
|
+
end
|
92
|
+
|
93
|
+
context 'after initialization' do
|
94
|
+
let :parser do
|
95
|
+
FileReader::JSONParser.new(reader, error, {})
|
96
|
+
end
|
97
|
+
|
98
|
+
it 'forward returns one line' do
|
99
|
+
parser.forward.should == JSON.parse(lines[0])
|
100
|
+
end
|
101
|
+
|
102
|
+
let :get_expected do
|
103
|
+
lambda { |i| JSON.parse(lines[i]) }
|
104
|
+
end
|
105
|
+
|
106
|
+
it_should_behave_like 'parser iterates all'
|
107
|
+
|
108
|
+
context 'with broken line' do
|
109
|
+
let :lines do
|
110
|
+
[
|
111
|
+
'{"host":"128.216.140.97","user":"-","method":"GET","path":"/item/sports/2511","code":200,"referer":"http://www.google.com/search?ie=UTF-8&q=google&sclient=psy-ab&q=Sports+Electronics&oq=Sports+Electronics&aq=f&aqi=g-vL1&aql=&pbx=1&bav=on.2,or.r_gc.r_pw.r_qf.,cf.osb&biw=3994&bih=421","size":95,"agent":"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.7 (KHTML, like Gecko) Chrome/16.0.912.77 Safari/535.7","time":1353541928}',
|
112
|
+
'{This is invalid as a JSON}',
|
113
|
+
'{"host":"172.75.186.56","user":"-","method":"GET","path":"/category/jewelry","code":200,"referer":"-","size":79,"agent":"Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)","time":1353541925}',
|
114
|
+
]
|
115
|
+
end
|
116
|
+
|
117
|
+
let :error_pattern do
|
118
|
+
/^invalid json format/
|
119
|
+
end
|
120
|
+
|
121
|
+
it_should_behave_like 'parser iterates all', 2
|
122
|
+
end
|
123
|
+
end
|
124
|
+
end
|
125
|
+
|
126
|
+
[',', "\t"].each { |pattern|
|
127
|
+
describe FileReader::DelimiterParser do
|
128
|
+
let :lines do
|
129
|
+
[
|
130
|
+
['hoge', '12345', Time.now.to_s].join(pattern),
|
131
|
+
['foo', '34567', Time.now.to_s].join(pattern),
|
132
|
+
['piyo', '56789', Time.now.to_s].join(pattern),
|
133
|
+
]
|
134
|
+
end
|
135
|
+
|
136
|
+
it "initialize with LineReader and #{pattern} delimiter" do
|
137
|
+
parser = FileReader::DelimiterParser.new(reader, error, {:delimiter_expr => Regexp.new(pattern)})
|
138
|
+
parser.should_not be_nil
|
139
|
+
end
|
140
|
+
|
141
|
+
context 'after initialization' do
|
142
|
+
let :parser do
|
143
|
+
FileReader::DelimiterParser.new(reader, error, {:delimiter_expr => Regexp.new(pattern)})
|
144
|
+
end
|
145
|
+
|
146
|
+
it 'forward returns one line' do
|
147
|
+
parser.forward.should == lines[0].split(pattern)
|
148
|
+
end
|
149
|
+
|
150
|
+
let :get_expected do
|
151
|
+
lambda { |i| lines[i].split(pattern) }
|
152
|
+
end
|
153
|
+
|
154
|
+
it_should_behave_like 'parser iterates all'
|
155
|
+
end
|
156
|
+
end
|
157
|
+
}
|
158
|
+
|
159
|
+
{
|
160
|
+
FileReader::ApacheParser => [
|
161
|
+
[
|
162
|
+
'58.83.188.60 - - [23/Oct/2011:08:15:46 -0700] "HEAD / HTTP/1.0" 200 277 "-" "-"',
|
163
|
+
'127.0.0.1 - - [23/Oct/2011:08:20:01 -0700] "GET / HTTP/1.0" 200 492 "-" "Wget/1.12 (linux-gnu)"',
|
164
|
+
'68.64.37.100 - - [24/Oct/2011:01:48:54 -0700] "GET /phpMyAdmin/scripts/setup.php HTTP/1.1" 404 480 "-" "ZmEu"'
|
165
|
+
],
|
166
|
+
[
|
167
|
+
["58.83.188.60", "-", "23/Oct/2011:08:15:46 -0700", "HEAD", "/", "200", "277", "-", "-"],
|
168
|
+
["127.0.0.1", "-", "23/Oct/2011:08:20:01 -0700", "GET", "/", "200", "492", "-", "Wget/1.12 (linux-gnu)"],
|
169
|
+
["68.64.37.100", "-", "24/Oct/2011:01:48:54 -0700", "GET", "/phpMyAdmin/scripts/setup.php", "404", "480", "-", "ZmEu"],
|
170
|
+
]
|
171
|
+
],
|
172
|
+
FileReader::SyslogParser => [
|
173
|
+
[
|
174
|
+
'Dec 20 12:41:44 localhost kernel: [4843680.692840] e1000e: eth2 NIC Link is Down',
|
175
|
+
'Dec 20 12:41:44 localhost kernel: [4843680.734466] br0: port 1(eth2) entering disabled state',
|
176
|
+
'Dec 22 10:42:41 localhost kernel: [5009052.220155] zsh[25578]: segfault at 7fe849460260 ip 00007fe8474fd74d sp 00007fffe3bdf0e0 error 4 in libc-2.11.1.so[7fe847486000+17a000]',
|
177
|
+
],
|
178
|
+
[
|
179
|
+
["Dec 20 12:41:44", "localhost", "kernel", nil, "[4843680.692840] e1000e: eth2 NIC Link is Down"],
|
180
|
+
["Dec 20 12:41:44", "localhost", "kernel", nil, "[4843680.734466] br0: port 1(eth2) entering disabled state"],
|
181
|
+
["Dec 22 10:42:41", "localhost", "kernel", nil, "[5009052.220155] zsh[25578]: segfault at 7fe849460260 ip 00007fe8474fd74d sp 00007fffe3bdf0e0 error 4 in libc-2.11.1.so[7fe847486000+17a000]"],
|
182
|
+
]
|
183
|
+
]
|
184
|
+
}.each_pair { |parser_class, (input, output)|
|
185
|
+
describe parser_class do
|
186
|
+
let :lines do
|
187
|
+
input
|
188
|
+
end
|
189
|
+
|
190
|
+
it "initialize with LineReader" do
|
191
|
+
parser = parser_class.new(reader, error, {})
|
192
|
+
parser.should_not be_nil
|
193
|
+
end
|
194
|
+
|
195
|
+
context 'after initialization' do
|
196
|
+
let :parser do
|
197
|
+
parser_class.new(reader, error, {})
|
198
|
+
end
|
199
|
+
|
200
|
+
it 'forward returns one line' do
|
201
|
+
parser.forward.should == output[0]
|
202
|
+
end
|
203
|
+
|
204
|
+
let :get_expected do
|
205
|
+
lambda { |i| output[i] }
|
206
|
+
end
|
207
|
+
|
208
|
+
it_should_behave_like 'parser iterates all'
|
209
|
+
|
210
|
+
context 'with broken line' do
|
211
|
+
let :lines do
|
212
|
+
broken = input.dup
|
213
|
+
broken[1] = "Raw text sometimes is broken!"
|
214
|
+
broken
|
215
|
+
end
|
216
|
+
|
217
|
+
let :error_pattern do
|
218
|
+
/^invalid #{parser.instance_variable_get(:@format)} format/
|
219
|
+
end
|
220
|
+
|
221
|
+
it_should_behave_like 'parser iterates all', 2
|
222
|
+
end
|
223
|
+
end
|
224
|
+
end
|
225
|
+
}
|
226
|
+
end
|
227
|
+
end
|
@@ -0,0 +1,120 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
require 'spec_helper'
|
3
|
+
require 'file_reader/shared_context'
|
4
|
+
|
5
|
+
require 'stringio'
|
6
|
+
require 'msgpack'
|
7
|
+
require 'td/file_reader'
|
8
|
+
|
9
|
+
include TreasureData
|
10
|
+
|
11
|
+
describe 'FileReader parsing readers' do
|
12
|
+
include_context 'error_proc'
|
13
|
+
|
14
|
+
shared_examples_for 'forward basics' do
|
15
|
+
it 'forward returns one data' do
|
16
|
+
reader.forward.should == dataset[0]
|
17
|
+
end
|
18
|
+
|
19
|
+
it 'feeds all dataset' do
|
20
|
+
begin
|
21
|
+
i = 0
|
22
|
+
while line = reader.forward
|
23
|
+
line.should == dataset[i]
|
24
|
+
i += 1
|
25
|
+
end
|
26
|
+
rescue RSpec::Expectations::ExpectationNotMetError => e
|
27
|
+
fail
|
28
|
+
rescue => e
|
29
|
+
io.eof?.should be_true
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
describe FileReader::MessagePackParsingReader do
|
35
|
+
let :dataset do
|
36
|
+
[
|
37
|
+
{'name' => 'k', 'num' => 12345, 'time' => Time.now.to_i},
|
38
|
+
{'name' => 's', 'num' => 34567, 'time' => Time.now.to_i},
|
39
|
+
{'name' => 'n', 'num' => 56789, 'time' => Time.now.to_i},
|
40
|
+
]
|
41
|
+
end
|
42
|
+
|
43
|
+
let :io do
|
44
|
+
StringIO.new(dataset.map(&:to_msgpack).join(""))
|
45
|
+
end
|
46
|
+
|
47
|
+
it 'initialize' do
|
48
|
+
reader = FileReader::MessagePackParsingReader.new(io, error, {})
|
49
|
+
reader.should_not be_nil
|
50
|
+
end
|
51
|
+
|
52
|
+
context 'after initialization' do
|
53
|
+
let :reader do
|
54
|
+
FileReader::MessagePackParsingReader.new(io, error, {})
|
55
|
+
end
|
56
|
+
|
57
|
+
it_should_behave_like 'forward basics'
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
test_time = Time.now.to_i
|
62
|
+
{
|
63
|
+
'csv' => [
|
64
|
+
{:delimiter_expr => ',', :quote_char => '"', :encoding => 'utf-8'},
|
65
|
+
[
|
66
|
+
%!k,123,"fo\no",true,#{test_time}!,
|
67
|
+
%!s,456,"T,D",false,#{test_time}!,
|
68
|
+
%!n,789,"ba""z",false,#{test_time}!,
|
69
|
+
],
|
70
|
+
[
|
71
|
+
%W(k 123 fo\no true #{test_time}),
|
72
|
+
%W(s 456 T,D false #{test_time}),
|
73
|
+
%W(n 789 ba\"z false #{test_time}),
|
74
|
+
]
|
75
|
+
],
|
76
|
+
'tsv' => [
|
77
|
+
{:delimiter_expr => "\t"},
|
78
|
+
[
|
79
|
+
%!k\t123\t"fo\no"\ttrue\t#{test_time}!,
|
80
|
+
%!s\t456\t"b,ar"\tfalse\t#{test_time}!,
|
81
|
+
%!n\t789\t"ba\tz"\tfalse\t#{test_time}!,
|
82
|
+
],
|
83
|
+
[
|
84
|
+
%W(k 123 fo\no true #{test_time}),
|
85
|
+
%W(s 456 b,ar false #{test_time}),
|
86
|
+
%W(n 789 ba\tz false #{test_time}),
|
87
|
+
]
|
88
|
+
]
|
89
|
+
}.each_pair { |format, (opts, input, output)|
|
90
|
+
describe FileReader::SeparatedValueParsingReader do
|
91
|
+
let :dataset do
|
92
|
+
output
|
93
|
+
end
|
94
|
+
|
95
|
+
let :lines do
|
96
|
+
input
|
97
|
+
end
|
98
|
+
|
99
|
+
let :io do
|
100
|
+
StringIO.new(lines.join($/))
|
101
|
+
end
|
102
|
+
|
103
|
+
it "initialize #{format}" do
|
104
|
+
reader = FileReader::SeparatedValueParsingReader.new(io, error, opts)
|
105
|
+
reader.should_not be_nil
|
106
|
+
end
|
107
|
+
|
108
|
+
context "after #{format} initialization" do
|
109
|
+
let :reader do
|
110
|
+
reader = FileReader::SeparatedValueParsingReader.new(io, error, opts)
|
111
|
+
end
|
112
|
+
|
113
|
+
it_should_behave_like 'forward basics'
|
114
|
+
|
115
|
+
context "broken encodings" do
|
116
|
+
end
|
117
|
+
end
|
118
|
+
end
|
119
|
+
}
|
120
|
+
end
|