universal-access-log-parser 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.document +5 -0
- data/.rspec +1 -0
- data/Gemfile +13 -0
- data/Gemfile.lock +30 -0
- data/LICENSE.txt +20 -0
- data/README.rdoc +98 -0
- data/Rakefile +49 -0
- data/VERSION +1 -0
- data/lib/common_parsers.rb +66 -0
- data/lib/universal-access-log-parser.rb +418 -0
- data/spec/common_parsers_spec.rb +312 -0
- data/spec/data/apache_access.log +178 -0
- data/spec/data/bad1.log +3 -0
- data/spec/data/bad2.log +3 -0
- data/spec/data/iis_short.log +10 -0
- data/spec/data/test1.log +3 -0
- data/spec/data/test2.log +5 -0
- data/spec/spec_helper.rb +20 -0
- data/spec/universal-access-log-parser_spec.rb +639 -0
- data/universal-access-log-parser.gemspec +71 -0
- metadata +160 -0
data/spec/data/bad1.log
ADDED
data/spec/data/bad2.log
ADDED
@@ -0,0 +1,10 @@
|
|
1
|
+
#Software: Microsoft Internet Information Services 7.5
|
2
|
+
#Version: 1.0
|
3
|
+
#Date: 2011-06-18 00:00:00
|
4
|
+
#Fields: date time s-ip cs-method cs-uri-stem cs-uri-query s-port cs-username c-ip cs(User-Agent) sc-status sc-substatus sc-win32-status time-taken
|
5
|
+
2011-06-18 00:00:00 38.111.242.43 GET /blahs/bulgaria/teeth-whitening/map/index.aspx - 80 - 66.249.72.231 Mediapartners-Google 200 0 0 343
|
6
|
+
2011-06-18 00:00:00 38.111.242.43 GET /physiotherapy/uk/lancashire/bryn/peripheral-joint-manipulation/index.aspx - 80 - 66.249.72.16 Mozilla/5.0+(compatible;+Googlebot/2.1;++http://www.google.com/bot.html) 200 0 0 609
|
7
|
+
2011-06-18 00:00:01 38.111.242.43 GET /blahs/canada/ontario/kawartha-lakes/bone-graft/index.aspx - 80 - 66.249.72.231 Mozilla/5.0+(compatible;+Googlebot/2.1;++http://www.google.com/bot.html) 200 0 0 4296
|
8
|
+
2011-06-18 00:00:01 38.111.242.43 GET /traditional-chinese-medicine-practitioners/ireland/county-westmeath/traditional-chinese-medicine-consultation/index.aspx - 80 - 66.249.72.204 Mozilla/5.0+(compatible;+Googlebot/2.1;++http://www.google.com/bot.html) 301 0 0 187
|
9
|
+
2011-06-18 00:00:01 38.111.242.43 GET /blahs/canada/ontario/pickering/sedation-dental/index.aspx - 80 - 66.249.72.16 Mozilla/5.0+(compatible;+Googlebot/2.1;++http://www.google.com/bot.html) 200 0 0 2640
|
10
|
+
2011-06-18 00:00:01 38.111.242.43 GET /blahs/uk/leicestershire/little-bowden/invisalign/index.aspx - 80 - 87.250.252.241 Mozilla/5.0+(compatible;+YandexBot/3.0;++http://yandex.com/bots) 200 0 0 5656
|
data/spec/data/test1.log
ADDED
data/spec/data/test2.log
ADDED
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
|
2
|
+
$LOAD_PATH.unshift(File.dirname(__FILE__))
|
3
|
+
require 'rspec'
|
4
|
+
|
5
|
+
# Requires supporting files with custom matchers and macros, etc,
|
6
|
+
# in ./support/ and its subdirectories.
|
7
|
+
Dir["#{File.dirname(__FILE__)}/support/**/*.rb"].each {|f| require f}
|
8
|
+
|
9
|
+
RSpec.configure do |config|
|
10
|
+
|
11
|
+
end
|
12
|
+
|
13
|
+
def open_files
|
14
|
+
r, w = IO.pipe
|
15
|
+
out = r.fileno
|
16
|
+
r.close
|
17
|
+
w.close
|
18
|
+
out - 1
|
19
|
+
end
|
20
|
+
|
@@ -0,0 +1,639 @@
|
|
1
|
+
require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
|
2
|
+
require 'universal-access-log-parser'
|
3
|
+
require 'time'
|
4
|
+
|
5
|
+
describe 'UniversalAccessLogParser' do
|
6
|
+
describe UniversalAccessLogParser::ElementGroup do
|
7
|
+
describe 'with nesting' do
|
8
|
+
it '#names should return array of all defined element names' do
|
9
|
+
e = UniversalAccessLogParser::ElementGroup::Root.new(' ') do
|
10
|
+
element :test1, 'test1'
|
11
|
+
element :test2, 'test2'
|
12
|
+
separated_with ',' do
|
13
|
+
element :test3, 'test3'
|
14
|
+
element :test4, 'test4'
|
15
|
+
end
|
16
|
+
element :test5, 'test5'
|
17
|
+
element :test6, 'test6'
|
18
|
+
end
|
19
|
+
e.names.should == [:test1, :test2, :test3, :test4, :test5, :test6, :other]
|
20
|
+
end
|
21
|
+
|
22
|
+
it '#regexp should return element regexp joined by separator' do
|
23
|
+
e = UniversalAccessLogParser::ElementGroup::Root.new(' ') do
|
24
|
+
element :test1, 'test1'
|
25
|
+
element :test2, 'test2'
|
26
|
+
separated_with ',' do
|
27
|
+
element :test3, 'test3'
|
28
|
+
element :test4, 'test4'
|
29
|
+
end
|
30
|
+
element :test5, 'test5'
|
31
|
+
element :test6, 'test6'
|
32
|
+
end
|
33
|
+
e.regexp.should == '(test1) (test2) (test3),(test4) (test5) (test6)(| .*)'
|
34
|
+
end
|
35
|
+
|
36
|
+
it '#parser should return array of all element parsers' do
|
37
|
+
e = UniversalAccessLogParser::ElementGroup::Root.new(' ') do
|
38
|
+
element :test1, 'test1'
|
39
|
+
element :test2, 'test2'
|
40
|
+
separated_with ',' do
|
41
|
+
element :test3, 'test3'
|
42
|
+
element :test4, 'test4'
|
43
|
+
end
|
44
|
+
element :test5, 'test5'
|
45
|
+
element :test6, 'test6'
|
46
|
+
end
|
47
|
+
e.parsers.should have(7).elements
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
describe 'supported type' do
|
53
|
+
describe 'string' do
|
54
|
+
it 'with space separators' do
|
55
|
+
UniversalAccessLogParser.new do
|
56
|
+
string :test1
|
57
|
+
string :test2
|
58
|
+
string :test3
|
59
|
+
end.parse('abc def ghi').test2.should == 'def'
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
describe 'date' do
|
64
|
+
it 'in custom format' do
|
65
|
+
p = UniversalAccessLogParser.new do
|
66
|
+
string :test1
|
67
|
+
date :date, '%d.%b.%Y %H:%M:%S %z'
|
68
|
+
string :test2
|
69
|
+
end.parse('hello 29.Sep.2011 17:38:06 +0100 world')
|
70
|
+
|
71
|
+
p.date.to_i.should == Time.parse('+Thu Sep 29 17:38:06 +0100 2011').to_i
|
72
|
+
p.test1.should == 'hello'
|
73
|
+
p.test2.should == 'world'
|
74
|
+
end
|
75
|
+
|
76
|
+
it 'in NCSA format' do
|
77
|
+
p = UniversalAccessLogParser.new do
|
78
|
+
string :test1
|
79
|
+
date_ncsa :date
|
80
|
+
string :test2
|
81
|
+
end.parse('hello 29/Sep/2011:17:38:06 +0100 world')
|
82
|
+
|
83
|
+
p.date.to_i.should == Time.parse('+Thu Sep 29 17:38:06 +0100 2011').to_i
|
84
|
+
p.test1.should == 'hello'
|
85
|
+
p.test2.should == 'world'
|
86
|
+
end
|
87
|
+
|
88
|
+
it 'in IIS format' do
|
89
|
+
p = UniversalAccessLogParser.new do
|
90
|
+
string :test1
|
91
|
+
date_iis :date
|
92
|
+
string :test2
|
93
|
+
end.parse('hello 2011-06-20 00:00:01 world')
|
94
|
+
|
95
|
+
p.date.to_i.should == Time.parse('Mon Jun 20 00:00:01 +0000 2011').to_i
|
96
|
+
p.test1.should == 'hello'
|
97
|
+
p.test2.should == 'world'
|
98
|
+
end
|
99
|
+
end
|
100
|
+
|
101
|
+
describe 'IP' do
|
102
|
+
it 'in v4 format' do
|
103
|
+
p = UniversalAccessLogParser.new do
|
104
|
+
string :test1
|
105
|
+
ip :ip
|
106
|
+
string :test2
|
107
|
+
end.parse('hello 192.168.1.2 world')
|
108
|
+
|
109
|
+
p.ip.should == IP.new("192.168.1.2")
|
110
|
+
p.test1.should == 'hello'
|
111
|
+
p.test2.should == 'world'
|
112
|
+
end
|
113
|
+
|
114
|
+
it 'in v6 format' do
|
115
|
+
p = UniversalAccessLogParser.new do
|
116
|
+
string :test1
|
117
|
+
ip :ip
|
118
|
+
string :test2
|
119
|
+
end.parse('hello 2001:db8:be00:: world')
|
120
|
+
|
121
|
+
p.ip.should == IP.new("2001:db8:be00::")
|
122
|
+
p.test1.should == 'hello'
|
123
|
+
p.test2.should == 'world'
|
124
|
+
end
|
125
|
+
end
|
126
|
+
|
127
|
+
describe 'integer' do
|
128
|
+
it 'unsigned' do
|
129
|
+
p = UniversalAccessLogParser.new do
|
130
|
+
string :test1
|
131
|
+
integer :number
|
132
|
+
string :test2
|
133
|
+
end.parse('hello 1234 world')
|
134
|
+
|
135
|
+
p.number.should == 1234
|
136
|
+
p.test1.should == 'hello'
|
137
|
+
p.test2.should == 'world'
|
138
|
+
end
|
139
|
+
|
140
|
+
it 'signed' do
|
141
|
+
p = UniversalAccessLogParser.new do
|
142
|
+
string :test1
|
143
|
+
integer :number1
|
144
|
+
integer :number2
|
145
|
+
string :test2
|
146
|
+
end.parse('hello -1234 +1235 world')
|
147
|
+
|
148
|
+
p.number1.should == -1234
|
149
|
+
p.number2.should == 1235
|
150
|
+
p.test1.should == 'hello'
|
151
|
+
p.test2.should == 'world'
|
152
|
+
end
|
153
|
+
end
|
154
|
+
|
155
|
+
describe 'float' do
|
156
|
+
it 'with dot unsigned' do
|
157
|
+
p = UniversalAccessLogParser.new do
|
158
|
+
string :test1
|
159
|
+
float :number
|
160
|
+
string :test2
|
161
|
+
end.parse('hello 123.4 world')
|
162
|
+
|
163
|
+
p.number.should == 123.4
|
164
|
+
p.test1.should == 'hello'
|
165
|
+
p.test2.should == 'world'
|
166
|
+
end
|
167
|
+
|
168
|
+
it 'whitout dot unsigned' do
|
169
|
+
p = UniversalAccessLogParser.new do
|
170
|
+
string :test1
|
171
|
+
float :number
|
172
|
+
string :test2
|
173
|
+
end.parse('hello 1234 world')
|
174
|
+
|
175
|
+
p.number.should == 1234.0
|
176
|
+
p.test1.should == 'hello'
|
177
|
+
p.test2.should == 'world'
|
178
|
+
end
|
179
|
+
|
180
|
+
it 'with dot signed' do
|
181
|
+
p = UniversalAccessLogParser.new do
|
182
|
+
string :test1
|
183
|
+
float :number1
|
184
|
+
float :number2
|
185
|
+
string :test2
|
186
|
+
end.parse('hello -123.4 +123.5 world')
|
187
|
+
|
188
|
+
p.number1.should == -123.4
|
189
|
+
p.number2.should == 123.5
|
190
|
+
p.test1.should == 'hello'
|
191
|
+
p.test2.should == 'world'
|
192
|
+
end
|
193
|
+
|
194
|
+
it 'whitout dot signed' do
|
195
|
+
p = UniversalAccessLogParser.new do
|
196
|
+
string :test1
|
197
|
+
float :number1
|
198
|
+
float :number2
|
199
|
+
string :test2
|
200
|
+
end.parse('hello -1234 +1235 world')
|
201
|
+
|
202
|
+
p.number1.should == -1234.0
|
203
|
+
p.number2.should == 1235.0
|
204
|
+
p.test1.should == 'hello'
|
205
|
+
p.test2.should == 'world'
|
206
|
+
end
|
207
|
+
end
|
208
|
+
end
|
209
|
+
|
210
|
+
describe 'with quoted/surrounded strings' do
|
211
|
+
it 'by []' do
|
212
|
+
p = UniversalAccessLogParser.new do
|
213
|
+
string :test1
|
214
|
+
surrounded_by '\[', '\]' do
|
215
|
+
date :date, '%d.%b.%Y %H:%M:%S %z'
|
216
|
+
end
|
217
|
+
string :test2
|
218
|
+
end.parse('hello [29.Sep.2011 17:38:06 +0100] world')
|
219
|
+
|
220
|
+
p.date.to_i.should == Time.parse('+Thu Sep 29 17:38:06 +0100 2011').to_i
|
221
|
+
p.test1.should == 'hello'
|
222
|
+
p.test2.should == 'world'
|
223
|
+
end
|
224
|
+
|
225
|
+
it 'single quoted' do
|
226
|
+
p = UniversalAccessLogParser.new do
|
227
|
+
string :test1
|
228
|
+
single_quoted do
|
229
|
+
date :date, '%d.%b.%Y %H:%M:%S %z'
|
230
|
+
end
|
231
|
+
string :test2
|
232
|
+
end.parse("hello '29.Sep.2011 17:38:06 +0100' world")
|
233
|
+
|
234
|
+
p.date.to_i.should == Time.parse('+Thu Sep 29 17:38:06 +0100 2011').to_i
|
235
|
+
p.test1.should == 'hello'
|
236
|
+
p.test2.should == 'world'
|
237
|
+
end
|
238
|
+
|
239
|
+
it 'double quoted' do
|
240
|
+
p = UniversalAccessLogParser.new do
|
241
|
+
string :test1
|
242
|
+
double_quoted do
|
243
|
+
date :date, '%d.%b.%Y %H:%M:%S %z'
|
244
|
+
integer :number
|
245
|
+
end
|
246
|
+
string :test2
|
247
|
+
end.parse('hello "29.Sep.2011 17:38:06 +0100 123" world')
|
248
|
+
|
249
|
+
p.date.to_i.should == Time.parse('+Thu Sep 29 17:38:06 +0100 2011').to_i
|
250
|
+
p.number.should == 123
|
251
|
+
p.test1.should == 'hello'
|
252
|
+
p.test2.should == 'world'
|
253
|
+
end
|
254
|
+
end
|
255
|
+
|
256
|
+
describe 'optional blocks' do
|
257
|
+
it 'should optionally match set of elements or "" allowing access via name' do
|
258
|
+
parser = UniversalAccessLogParser.new do
|
259
|
+
string :test1
|
260
|
+
optional :first_request_line do
|
261
|
+
string :method, :nil_on => ''
|
262
|
+
string :uri, :nil_on => ''
|
263
|
+
string :protocol, :nil_on => ''
|
264
|
+
end
|
265
|
+
string :test2
|
266
|
+
end
|
267
|
+
|
268
|
+
data = parser.parse('hello GET / HTTP/1.1 world')
|
269
|
+
|
270
|
+
data.first_request_line.should == 'GET / HTTP/1.1'
|
271
|
+
data.method.should == 'GET'
|
272
|
+
data.uri.should == '/'
|
273
|
+
data.protocol.should == 'HTTP/1.1'
|
274
|
+
|
275
|
+
data.test1.should == 'hello'
|
276
|
+
data.test2.should == 'world'
|
277
|
+
|
278
|
+
data = parser.parse('hello GET world')
|
279
|
+
|
280
|
+
data.first_request_line.should == 'GET '
|
281
|
+
data.method.should == 'GET'
|
282
|
+
data.uri.should == nil
|
283
|
+
data.protocol.should == nil
|
284
|
+
|
285
|
+
data.test1.should == 'hello'
|
286
|
+
data.test2.should == 'world'
|
287
|
+
|
288
|
+
data = parser.parse('hello world')
|
289
|
+
|
290
|
+
data.first_request_line.should == ''
|
291
|
+
data.method.should == nil
|
292
|
+
data.uri.should == nil
|
293
|
+
data.protocol.should == nil
|
294
|
+
|
295
|
+
data.test1.should == 'hello'
|
296
|
+
data.test2.should == 'world'
|
297
|
+
end
|
298
|
+
|
299
|
+
it 'should optionally match set of elements or nil allowing access via name if :nil_on option given' do
|
300
|
+
parser = UniversalAccessLogParser.new do
|
301
|
+
string :test1
|
302
|
+
optional :first_request_line, :nil_on => '' do
|
303
|
+
string :method, :nil_on => ''
|
304
|
+
string :uri, :nil_on => ''
|
305
|
+
string :protocol, :nil_on => ''
|
306
|
+
end
|
307
|
+
string :test2
|
308
|
+
end
|
309
|
+
|
310
|
+
data = parser.parse('hello world')
|
311
|
+
|
312
|
+
data.first_request_line.should == nil
|
313
|
+
data.method.should == nil
|
314
|
+
data.uri.should == nil
|
315
|
+
data.protocol.should == nil
|
316
|
+
|
317
|
+
data.test1.should == 'hello'
|
318
|
+
data.test2.should == 'world'
|
319
|
+
end
|
320
|
+
end
|
321
|
+
|
322
|
+
it 'can parse log with format described in new block' do
|
323
|
+
parser = UniversalAccessLogParser.new do
|
324
|
+
ip :remote_host
|
325
|
+
string :logname, :nil_on => '-'
|
326
|
+
string :user, :nil_on => '-'
|
327
|
+
surrounded_by '\[', '\]' do
|
328
|
+
date_ncsa :time
|
329
|
+
end
|
330
|
+
double_quoted do
|
331
|
+
string :method, :nil_on => ''
|
332
|
+
string :uri, :nil_on => ''
|
333
|
+
string :protocol, :nil_on => ''
|
334
|
+
end
|
335
|
+
integer :status
|
336
|
+
integer :response_size, :nil_on => '-'
|
337
|
+
double_quoted do
|
338
|
+
string :referer, :nil_on => '-'
|
339
|
+
end
|
340
|
+
double_quoted do
|
341
|
+
string :user_agent, :nil_on => '-'
|
342
|
+
end
|
343
|
+
end
|
344
|
+
data = parser.parse('95.221.65.17 kazuya - [29/Sep/2011:17:38:06 +0100] "GET / HTTP/1.0" 200 1 "http://yandex.ru/yandsearch?text=sigquit.net" "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 1.1.4322; .NET CLR 2.0.50727)"')
|
345
|
+
|
346
|
+
data.remote_host.should == IP.new('95.221.65.17')
|
347
|
+
data.logname.should == 'kazuya'
|
348
|
+
data.user.should == nil
|
349
|
+
data.time.to_i.should == Time.parse('Thu Sep 29 17:38:06 +0100 2011').to_i
|
350
|
+
data.method.should == 'GET'
|
351
|
+
data.uri.should == '/'
|
352
|
+
data.protocol.should == 'HTTP/1.0'
|
353
|
+
data.status.should == 200
|
354
|
+
data.response_size.should == 1
|
355
|
+
data.referer.should == 'http://yandex.ru/yandsearch?text=sigquit.net'
|
356
|
+
data.user_agent.should == 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 1.1.4322; .NET CLR 2.0.50727)'
|
357
|
+
end
|
358
|
+
|
359
|
+
it 'should raise UniversalAccessLogParser::ParsingError on parsing failure' do
|
360
|
+
parser = UniversalAccessLogParser.new do
|
361
|
+
ip :remote_host
|
362
|
+
string :logname, :nil_on => '-'
|
363
|
+
string :user, :nil_on => '-'
|
364
|
+
end
|
365
|
+
|
366
|
+
lambda {
|
367
|
+
parser.parse('123.123.123.213 - -')
|
368
|
+
}.should_not raise_error
|
369
|
+
|
370
|
+
lambda {
|
371
|
+
parser.parse('123.123.123.213 dasf')
|
372
|
+
}.should raise_error UniversalAccessLogParser::ParsingError
|
373
|
+
end
|
374
|
+
|
375
|
+
it 'should parse log lines with more elements than defined that then can be accessed via #other' do
|
376
|
+
parser = UniversalAccessLogParser.new do
|
377
|
+
ip :remote_host
|
378
|
+
string :logname, :nil_on => '-'
|
379
|
+
string :user, :nil_on => '-'
|
380
|
+
end
|
381
|
+
|
382
|
+
data = parser.parse('123.123.123.213 kazuya test a b cdef')
|
383
|
+
data.remote_host.should == IP.new('123.123.123.213')
|
384
|
+
data.logname.should == 'kazuya'
|
385
|
+
data.user.should == 'test'
|
386
|
+
data.other.should == 'a b cdef'
|
387
|
+
end
|
388
|
+
|
389
|
+
it 'should have nil other if there was no additional data in the log line' do
|
390
|
+
parser = UniversalAccessLogParser.new do
|
391
|
+
ip :remote_host
|
392
|
+
string :logname, :nil_on => '-'
|
393
|
+
string :user, :nil_on => '-'
|
394
|
+
end
|
395
|
+
|
396
|
+
data = parser.parse('123.123.123.213 kazuya test')
|
397
|
+
data.remote_host.should == IP.new('123.123.123.213')
|
398
|
+
data.logname.should == 'kazuya'
|
399
|
+
data.user.should == 'test'
|
400
|
+
data.other.should == nil
|
401
|
+
end
|
402
|
+
|
403
|
+
describe 'parsing data sources' do
|
404
|
+
before :all do
|
405
|
+
@parser = UniversalAccessLogParser.new do
|
406
|
+
ip :remote_host
|
407
|
+
string :logname, :nil_on => '-'
|
408
|
+
string :user, :nil_on => '-'
|
409
|
+
end
|
410
|
+
end
|
411
|
+
|
412
|
+
it 'IO stream' do
|
413
|
+
File.open(File.dirname(__FILE__) + '/data/test1.log') do |io|
|
414
|
+
entries = []
|
415
|
+
@parser.parse_io(io).each do |entry|
|
416
|
+
entries << entry
|
417
|
+
end
|
418
|
+
|
419
|
+
entries[0].remote_host.should == IP.new('123.123.123.0')
|
420
|
+
entries[1].remote_host.should == IP.new('123.123.123.1')
|
421
|
+
entries[2].remote_host.should == IP.new('123.123.123.2')
|
422
|
+
end
|
423
|
+
end
|
424
|
+
|
425
|
+
it 'should parse file and not leak fd\'s' do
|
426
|
+
entries = []
|
427
|
+
|
428
|
+
fds = open_files
|
429
|
+
@parser.parse_file(File.dirname(__FILE__) + '/data/test1.log').each do |entry|
|
430
|
+
entries << entry
|
431
|
+
end
|
432
|
+
fds.should == open_files
|
433
|
+
|
434
|
+
entries.should have(3).entries
|
435
|
+
entries[0].remote_host.should == IP.new('123.123.123.0')
|
436
|
+
entries[1].remote_host.should == IP.new('123.123.123.1')
|
437
|
+
entries[2].remote_host.should == IP.new('123.123.123.2')
|
438
|
+
end
|
439
|
+
|
440
|
+
it 'should raise IOError if another attempt of each is tried' do
|
441
|
+
iter = @parser.parse_file(File.dirname(__FILE__) + '/data/test1.log')
|
442
|
+
iter.each do |entry|
|
443
|
+
end
|
444
|
+
|
445
|
+
lambda {
|
446
|
+
iter.each do |entry|
|
447
|
+
end
|
448
|
+
}.should raise_error IOError
|
449
|
+
end
|
450
|
+
|
451
|
+
it 'should skip lines maching regexp' do
|
452
|
+
parser = UniversalAccessLogParser.new do
|
453
|
+
skip_line '^#'
|
454
|
+
ip :remote_host
|
455
|
+
string :logname, :nil_on => '-'
|
456
|
+
string :user, :nil_on => '-'
|
457
|
+
end
|
458
|
+
|
459
|
+
entries = []
|
460
|
+
iter = parser.parse_file(File.dirname(__FILE__) + '/data/test2.log')
|
461
|
+
iter.each do |entry|
|
462
|
+
entries << entry
|
463
|
+
end
|
464
|
+
|
465
|
+
entries.should have(3).entries
|
466
|
+
entries[0].remote_host.should == IP.new('123.123.123.0')
|
467
|
+
entries[1].remote_host.should == IP.new('123.123.123.1')
|
468
|
+
entries[2].remote_host.should == IP.new('123.123.123.2')
|
469
|
+
end
|
470
|
+
end
|
471
|
+
|
472
|
+
describe 'bad data handling' do
|
473
|
+
before :each do
|
474
|
+
parser = UniversalAccessLogParser.new do
|
475
|
+
ip :remote_host
|
476
|
+
string :logname, :nil_on => '-'
|
477
|
+
string :user, :nil_on => '-'
|
478
|
+
end
|
479
|
+
@iter = parser.parse_file(File.dirname(__FILE__) + '/data/bad1.log')
|
480
|
+
end
|
481
|
+
|
482
|
+
it 'with each it should not raise exceptions' do
|
483
|
+
entries = []
|
484
|
+
lambda {
|
485
|
+
@iter.each do |entry|
|
486
|
+
entries << entry
|
487
|
+
end
|
488
|
+
}.should_not raise_error
|
489
|
+
|
490
|
+
entries.should have(2).entries
|
491
|
+
entries[0].remote_host.should == IP.new('123.123.123.0')
|
492
|
+
# line skipped
|
493
|
+
entries[1].remote_host.should == IP.new('123.123.123.2')
|
494
|
+
end
|
495
|
+
|
496
|
+
it 'with each it should provide parse failure statistics' do
|
497
|
+
entries = []
|
498
|
+
lambda {
|
499
|
+
stats = @iter.each do |entry|
|
500
|
+
entries << entry
|
501
|
+
end
|
502
|
+
|
503
|
+
stats.failures.should == 1
|
504
|
+
stats.successes.should == 2
|
505
|
+
}.should_not raise_error
|
506
|
+
|
507
|
+
entries.should have(2).entries
|
508
|
+
entries[0].remote_host.should == IP.new('123.123.123.0')
|
509
|
+
# line skipped
|
510
|
+
entries[1].remote_host.should == IP.new('123.123.123.2')
|
511
|
+
end
|
512
|
+
|
513
|
+
it 'with each! it should should raise UniversalAccessLogParser::ParsingError' do
|
514
|
+
entries = []
|
515
|
+
lambda {
|
516
|
+
@iter.each! do |entry|
|
517
|
+
entries << entry
|
518
|
+
end
|
519
|
+
}.should raise_error UniversalAccessLogParser::ParsingError
|
520
|
+
|
521
|
+
entries.should have(1).entries
|
522
|
+
entries[0].remote_host.should == IP.new('123.123.123.0')
|
523
|
+
end
|
524
|
+
end
|
525
|
+
|
526
|
+
describe 'delayed entry parsing' do
|
527
|
+
before :each do
|
528
|
+
parser = UniversalAccessLogParser.new do
|
529
|
+
ip :remote_host
|
530
|
+
string :logname, :nil_on => '-'
|
531
|
+
string :user, :nil_on => '-'
|
532
|
+
end
|
533
|
+
@iter = parser.parse_file(File.dirname(__FILE__) + '/data/bad2.log')
|
534
|
+
end
|
535
|
+
|
536
|
+
it 'should report errors regarding element parsing on element access' do
|
537
|
+
entries = []
|
538
|
+
lambda {
|
539
|
+
@iter.each do |entry|
|
540
|
+
entries << entry
|
541
|
+
end
|
542
|
+
}.should_not raise_error
|
543
|
+
|
544
|
+
entries.should have(3).entries
|
545
|
+
entries[0].remote_host.should == IP.new('123.123.123.0')
|
546
|
+
|
547
|
+
lambda {
|
548
|
+
entries[1].remote_host
|
549
|
+
}.should raise_error UniversalAccessLogParser::ElementParsingError
|
550
|
+
|
551
|
+
entries[2].remote_host.should == IP.new('123.123.123.2')
|
552
|
+
end
|
553
|
+
|
554
|
+
it 'entry #parse! should parse and cache all element values' do
|
555
|
+
entries = []
|
556
|
+
lambda {
|
557
|
+
@iter.each do |entry|
|
558
|
+
entries << entry
|
559
|
+
end
|
560
|
+
}.should_not raise_error
|
561
|
+
|
562
|
+
entries.should have(3).entries
|
563
|
+
|
564
|
+
lambda {
|
565
|
+
entries[0].parse!
|
566
|
+
}.should_not raise_error
|
567
|
+
|
568
|
+
lambda {
|
569
|
+
entries[1].parse!
|
570
|
+
}.should raise_error UniversalAccessLogParser::ElementParsingError
|
571
|
+
|
572
|
+
lambda {
|
573
|
+
entries[2].parse!
|
574
|
+
}.should_not raise_error
|
575
|
+
end
|
576
|
+
|
577
|
+
it 'entry #to_hash should return fully parsed hash' do
|
578
|
+
entries = []
|
579
|
+
lambda {
|
580
|
+
@iter.each do |entry|
|
581
|
+
entries << entry
|
582
|
+
end
|
583
|
+
}.should_not raise_error
|
584
|
+
|
585
|
+
entries.should have(3).entries
|
586
|
+
|
587
|
+
h = entries[0].to_hash
|
588
|
+
h[:remote_host].should == IP.new('123.123.123.0')
|
589
|
+
h[:logname].should == 'hello'
|
590
|
+
h[:user].should == 'world'
|
591
|
+
|
592
|
+
lambda {
|
593
|
+
entries[1].to_hash
|
594
|
+
}.should raise_error UniversalAccessLogParser::ElementParsingError
|
595
|
+
|
596
|
+
h = entries[2].to_hash
|
597
|
+
h[:remote_host].should == IP.new('123.123.123.2')
|
598
|
+
h[:logname].should == 'hello'
|
599
|
+
h[:user].should == nil
|
600
|
+
end
|
601
|
+
|
602
|
+
it 'parser #each_parsed! should return fully parsed elements' do
|
603
|
+
entries = []
|
604
|
+
lambda {
|
605
|
+
@iter.each_parsed! do |entry|
|
606
|
+
entries << entry
|
607
|
+
end
|
608
|
+
}.should raise_error UniversalAccessLogParser::ElementParsingError
|
609
|
+
|
610
|
+
entries.should have(1).entries
|
611
|
+
entries[0].remote_host.should == IP.new('123.123.123.0')
|
612
|
+
end
|
613
|
+
end
|
614
|
+
|
615
|
+
it 'should provide nice parsed element inspect output' do
|
616
|
+
parser = UniversalAccessLogParser.new do
|
617
|
+
ip :remote_host
|
618
|
+
string :logname, :nil_on => '-'
|
619
|
+
string :user, :nil_on => '-'
|
620
|
+
end
|
621
|
+
|
622
|
+
data = parser.parse('123.123.123.213 kazuya test')
|
623
|
+
data.remote_host
|
624
|
+
data.user
|
625
|
+
data.inspect.should == '#<UniversalAccessLogParser::ParsedLogLine: logname: "<unparsed>", other: "<unparsed>", remote_host: #<IP::V4 123.123.123.213>, user: "test">'
|
626
|
+
end
|
627
|
+
|
628
|
+
it 'should provide nice parsed element to_s output' do
|
629
|
+
parser = UniversalAccessLogParser.new do
|
630
|
+
ip :remote_host
|
631
|
+
string :logname, :nil_on => '-'
|
632
|
+
string :user, :nil_on => '-'
|
633
|
+
end
|
634
|
+
|
635
|
+
data = parser.parse('123.123.123.213 kazuya test')
|
636
|
+
data.to_s.should =~ /^#<UniversalAccessLogParser::ParsedLogLine/
|
637
|
+
end
|
638
|
+
end
|
639
|
+
|