universal-access-log-parser 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- data/.document +5 -0
- data/.rspec +1 -0
- data/Gemfile +13 -0
- data/Gemfile.lock +30 -0
- data/LICENSE.txt +20 -0
- data/README.rdoc +98 -0
- data/Rakefile +49 -0
- data/VERSION +1 -0
- data/lib/common_parsers.rb +66 -0
- data/lib/universal-access-log-parser.rb +418 -0
- data/spec/common_parsers_spec.rb +312 -0
- data/spec/data/apache_access.log +178 -0
- data/spec/data/bad1.log +3 -0
- data/spec/data/bad2.log +3 -0
- data/spec/data/iis_short.log +10 -0
- data/spec/data/test1.log +3 -0
- data/spec/data/test2.log +5 -0
- data/spec/spec_helper.rb +20 -0
- data/spec/universal-access-log-parser_spec.rb +639 -0
- data/universal-access-log-parser.gemspec +71 -0
- metadata +160 -0
data/.document
ADDED
data/.rspec
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
--color
|
data/Gemfile
ADDED
@@ -0,0 +1,13 @@
|
|
1
|
+
source "http://rubygems.org"
|
2
|
+
|
3
|
+
gem "ruby-ip", ">= 0"
|
4
|
+
|
5
|
+
# Add dependencies to develop your gem here.
|
6
|
+
# Include everything needed to run rake, tests, features, etc.
|
7
|
+
group :development do
|
8
|
+
gem "rspec", "~> 2.3.0"
|
9
|
+
gem "bundler", "~> 1.0.0"
|
10
|
+
gem "jeweler", "~> 1.6.4"
|
11
|
+
gem "rcov", ">= 0"
|
12
|
+
end
|
13
|
+
|
data/Gemfile.lock
ADDED
@@ -0,0 +1,30 @@
|
|
1
|
+
GEM
|
2
|
+
remote: http://rubygems.org/
|
3
|
+
specs:
|
4
|
+
diff-lcs (1.1.3)
|
5
|
+
git (1.2.5)
|
6
|
+
jeweler (1.6.4)
|
7
|
+
bundler (~> 1.0)
|
8
|
+
git (>= 1.2.5)
|
9
|
+
rake
|
10
|
+
rake (0.9.2)
|
11
|
+
rcov (0.9.10)
|
12
|
+
rspec (2.3.0)
|
13
|
+
rspec-core (~> 2.3.0)
|
14
|
+
rspec-expectations (~> 2.3.0)
|
15
|
+
rspec-mocks (~> 2.3.0)
|
16
|
+
rspec-core (2.3.1)
|
17
|
+
rspec-expectations (2.3.0)
|
18
|
+
diff-lcs (~> 1.1.2)
|
19
|
+
rspec-mocks (2.3.0)
|
20
|
+
ruby-ip (0.9.0)
|
21
|
+
|
22
|
+
PLATFORMS
|
23
|
+
ruby
|
24
|
+
|
25
|
+
DEPENDENCIES
|
26
|
+
bundler (~> 1.0.0)
|
27
|
+
jeweler (~> 1.6.4)
|
28
|
+
rcov
|
29
|
+
rspec (~> 2.3.0)
|
30
|
+
ruby-ip
|
data/LICENSE.txt
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
Copyright (c) 2011 Jakub Pastuszek
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
4
|
+
a copy of this software and associated documentation files (the
|
5
|
+
"Software"), to deal in the Software without restriction, including
|
6
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
7
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
+
permit persons to whom the Software is furnished to do so, subject to
|
9
|
+
the following conditions:
|
10
|
+
|
11
|
+
The above copyright notice and this permission notice shall be
|
12
|
+
included in all copies or substantial portions of the Software.
|
13
|
+
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
17
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
18
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
19
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
20
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.rdoc
ADDED
@@ -0,0 +1,98 @@
|
|
1
|
+
= universal-access-log-parser
|
2
|
+
|
3
|
+
== Installation
|
4
|
+
|
5
|
+
gem install universal-access-log-parser
|
6
|
+
|
7
|
+
== Usage
|
8
|
+
|
9
|
+
require 'universal-access-log-parser'
|
10
|
+
|
11
|
+
# use predefined parser
|
12
|
+
parser = UniversalAccessLogParser.apache_combined
|
13
|
+
|
14
|
+
# or extend it inline
|
15
|
+
parser = UniversalAccessLogParser.new do
|
16
|
+
# reuse predefined element set
|
17
|
+
apache_combined
|
18
|
+
|
19
|
+
# add your own
|
20
|
+
string :varnish
|
21
|
+
string :varnish_status, :nil_on => '-'
|
22
|
+
string :initial_varnish_status, :nil_on => '-'
|
23
|
+
integer :cache_hits
|
24
|
+
integer :cache_ttl, :nil_on => '-'
|
25
|
+
integer :cache_age
|
26
|
+
end
|
27
|
+
|
28
|
+
# or define new parser
|
29
|
+
UniversalAccessLogParser.parser(:iis) do
|
30
|
+
skip_line '^#'
|
31
|
+
date_iis :time
|
32
|
+
ip :server_ip
|
33
|
+
string :method
|
34
|
+
string :url
|
35
|
+
string :query, :nil_on => '-'
|
36
|
+
integer :port
|
37
|
+
string :username, :nil_on => '-'
|
38
|
+
ip :client_ip
|
39
|
+
string :user_agent, :nil_on => '-', :process => lambda{|s| s.tr('+', ' ')}
|
40
|
+
integer :status
|
41
|
+
integer :substatus
|
42
|
+
integer :win32_status
|
43
|
+
integer :duration, :process => lambda{|i| i.to_f / 1000}
|
44
|
+
end
|
45
|
+
parser = UniversalAccessLogParser.iis
|
46
|
+
|
47
|
+
# and iterate entries with #each - won't raise errors
|
48
|
+
stats = parser.parse_file('access.log').each |entry|
|
49
|
+
puts entry.time
|
50
|
+
puts entry.cache_age
|
51
|
+
end
|
52
|
+
|
53
|
+
# and get parsing stats
|
54
|
+
puts stats.failures
|
55
|
+
puts stats.successes
|
56
|
+
|
57
|
+
# or wait for exception with #each!
|
58
|
+
parser.parse_file('access.log').each! |entry|
|
59
|
+
puts entry.time
|
60
|
+
puts entry.cache_age
|
61
|
+
end # will raise UniversalAccessLogParser::ParsingError on line parsing error
|
62
|
+
|
63
|
+
# data elements wont be parsed until accessed, so if you are not interested in some elements you won't waste time
|
64
|
+
stats = parser.parse_file('access.log').each |entry|
|
65
|
+
# entry.time not parsed yet - Time object is not created
|
66
|
+
puts entry.time # this will parse time and create Time object - this may raise UniversalAccessLogParser::ElementParsingError!
|
67
|
+
puts entry.time # now Time object is returned from cache
|
68
|
+
puts entry.cache_age
|
69
|
+
|
70
|
+
# parse all elements
|
71
|
+
entry.parse!
|
72
|
+
|
73
|
+
# this will also parse all elements and return hash map of them
|
74
|
+
entry.to_hash
|
75
|
+
end
|
76
|
+
|
77
|
+
# iterate and parse all data with #each_parsed! - if this won't raise, all log lines are parsing fine including elements
|
78
|
+
parser.parse_file('access.log').each_parsed! |entry|
|
79
|
+
puts entry.time # already in cache
|
80
|
+
puts entry.cache_age # already in cache
|
81
|
+
end # will raise on line and element parsing error - try rescuing UniversalAccessLogParser::ParserError to catch both
|
82
|
+
|
83
|
+
== Contributing to universal-access-log-parser
|
84
|
+
|
85
|
+
* Please add more common parsers to lib/common_parsers.rb and rspec in spec/common_parsers_spec.rb or send me a gist
|
86
|
+
* Check out the latest master to make sure the feature hasn't been implemented or the bug hasn't been fixed yet
|
87
|
+
* Check out the issue tracker to make sure someone already hasn't requested it and/or contributed it
|
88
|
+
* Fork the project
|
89
|
+
* Start a feature/bugfix branch
|
90
|
+
* Commit and push until you are happy with your contribution
|
91
|
+
* Make sure to add tests for it. This is important so I don't break it in a future version unintentionally.
|
92
|
+
* Please try not to mess with the Rakefile, version, or history. If you want to have your own version, or is otherwise necessary, that is fine, but please isolate to its own commit so I can cherry-pick around it.
|
93
|
+
|
94
|
+
== Copyright
|
95
|
+
|
96
|
+
Copyright (c) 2011 Jakub Pastuszek. See LICENSE.txt for
|
97
|
+
further details.
|
98
|
+
|
data/Rakefile
ADDED
@@ -0,0 +1,49 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
require 'rubygems'
|
4
|
+
require 'bundler'
|
5
|
+
begin
|
6
|
+
Bundler.setup(:default, :development)
|
7
|
+
rescue Bundler::BundlerError => e
|
8
|
+
$stderr.puts e.message
|
9
|
+
$stderr.puts "Run `bundle install` to install missing gems"
|
10
|
+
exit e.status_code
|
11
|
+
end
|
12
|
+
require 'rake'
|
13
|
+
|
14
|
+
require 'jeweler'
|
15
|
+
Jeweler::Tasks.new do |gem|
|
16
|
+
# gem is a Gem::Specification... see http://docs.rubygems.org/read/chapter/20 for more options
|
17
|
+
gem.name = "universal-access-log-parser"
|
18
|
+
gem.homepage = "http://github.com/jpastuszek/universal-access-log-parser"
|
19
|
+
gem.license = "MIT"
|
20
|
+
gem.summary = 'Define your own access log parser of reuse/extend predefined common parsers'
|
21
|
+
gem.description = 'Meta parser that allows you to define new parser with ruby DSL to match access log file format you are willing to parse or use one of the predefined parsers like Apache common, IIS and more'
|
22
|
+
gem.email = "jpastuszek@gmail.com"
|
23
|
+
gem.authors = ["Jakub Pastuszek"]
|
24
|
+
# dependencies defined in Gemfile
|
25
|
+
end
|
26
|
+
Jeweler::RubygemsDotOrgTasks.new
|
27
|
+
|
28
|
+
require 'rspec/core'
|
29
|
+
require 'rspec/core/rake_task'
|
30
|
+
RSpec::Core::RakeTask.new(:spec) do |spec|
|
31
|
+
spec.pattern = FileList['spec/**/*_spec.rb']
|
32
|
+
end
|
33
|
+
|
34
|
+
RSpec::Core::RakeTask.new(:rcov) do |spec|
|
35
|
+
spec.pattern = 'spec/**/*_spec.rb'
|
36
|
+
spec.rcov = true
|
37
|
+
end
|
38
|
+
|
39
|
+
task :default => :spec
|
40
|
+
|
41
|
+
require 'rake/rdoctask'
|
42
|
+
Rake::RDocTask.new do |rdoc|
|
43
|
+
version = File.exist?('VERSION') ? File.read('VERSION') : ""
|
44
|
+
|
45
|
+
rdoc.rdoc_dir = 'rdoc'
|
46
|
+
rdoc.title = "universal-access-log-parser #{version}"
|
47
|
+
rdoc.rdoc_files.include('README*')
|
48
|
+
rdoc.rdoc_files.include('lib/**/*.rb')
|
49
|
+
end
|
data/VERSION
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
1.0.0
|
@@ -0,0 +1,66 @@
|
|
1
|
+
UniversalAccessLogParser.parser(:apache_common) do
|
2
|
+
ip :remote_host
|
3
|
+
string :logname, :nil_on => '-'
|
4
|
+
string :user, :nil_on => '-'
|
5
|
+
surrounded_by '\[', '\]' do
|
6
|
+
date_ncsa :time
|
7
|
+
end
|
8
|
+
double_quoted do
|
9
|
+
optional :first_request_line do
|
10
|
+
string :method, :nil_on => ''
|
11
|
+
string :uri, :nil_on => ''
|
12
|
+
string :protocol, :nil_on => ''
|
13
|
+
end
|
14
|
+
end
|
15
|
+
integer :status
|
16
|
+
integer :response_size, :nil_on => '-'
|
17
|
+
end
|
18
|
+
|
19
|
+
UniversalAccessLogParser.parser(:apache_vhost_common) do
|
20
|
+
string :vhost
|
21
|
+
apache_common
|
22
|
+
end
|
23
|
+
|
24
|
+
UniversalAccessLogParser.parser(:apache_combined) do
|
25
|
+
apache_common
|
26
|
+
double_quoted do
|
27
|
+
string :referer, :nil_on => '-'
|
28
|
+
end
|
29
|
+
double_quoted do
|
30
|
+
string :user_agent, :nil_on => '-'
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
UniversalAccessLogParser.parser(:apache_referer) do
|
35
|
+
separated_with ' -> ' do
|
36
|
+
string :referer, :nil_on => '-'
|
37
|
+
string :url
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
UniversalAccessLogParser.parser(:apache_user_agent) do
|
42
|
+
string :user_agent, :nil_on => '-', :greedy => false
|
43
|
+
end
|
44
|
+
|
45
|
+
UniversalAccessLogParser.parser(:icecast) do
|
46
|
+
apache_combined
|
47
|
+
integer :duration, :nil_on => '-'
|
48
|
+
end
|
49
|
+
|
50
|
+
UniversalAccessLogParser.parser(:iis) do
|
51
|
+
skip_line '^#'
|
52
|
+
date_iis :time
|
53
|
+
ip :server_ip
|
54
|
+
string :method
|
55
|
+
string :url
|
56
|
+
string :query, :nil_on => '-'
|
57
|
+
integer :port
|
58
|
+
string :username, :nil_on => '-'
|
59
|
+
ip :client_ip
|
60
|
+
string :user_agent, :nil_on => '-', :process => lambda{|s| s.tr('+', ' ')}
|
61
|
+
integer :status
|
62
|
+
integer :substatus
|
63
|
+
integer :win32_status
|
64
|
+
integer :duration, :process => lambda{|i| i.to_f / 1000}
|
65
|
+
end
|
66
|
+
|
@@ -0,0 +1,418 @@
|
|
1
|
+
require 'ip'
|
2
|
+
|
3
|
+
class UniversalAccessLogParser
|
4
|
+
class ParserError < ArgumentError
|
5
|
+
end
|
6
|
+
|
7
|
+
class ParsingError < ParserError
|
8
|
+
def initialize(msg, parser, line)
|
9
|
+
@parser = parser
|
10
|
+
@line = line
|
11
|
+
super(msg)
|
12
|
+
end
|
13
|
+
|
14
|
+
attr_reader :parser, :line
|
15
|
+
end
|
16
|
+
|
17
|
+
class ElementParsingError < ParserError
|
18
|
+
def initialize(e)
|
19
|
+
@error = e
|
20
|
+
super("argument parsing error: #{e}")
|
21
|
+
end
|
22
|
+
|
23
|
+
attr_reader :error
|
24
|
+
end
|
25
|
+
|
26
|
+
class ElementGroup < Array
|
27
|
+
class Element
|
28
|
+
def initialize(name, regexp, nil_on = nil)
|
29
|
+
@name = name
|
30
|
+
@regexp = regexp
|
31
|
+
@nil_on = nil_on
|
32
|
+
@parser = lambda{|s|
|
33
|
+
return nil if @nil_on and s == @nil_on
|
34
|
+
yield s if block_given?
|
35
|
+
}
|
36
|
+
end
|
37
|
+
|
38
|
+
attr_reader :name, :parser
|
39
|
+
|
40
|
+
def regexp
|
41
|
+
return "(#{@nil_on}|#{@regexp})" if @nil_on
|
42
|
+
"(#{@regexp})"
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
class Integrating < ElementGroup
|
47
|
+
def initialize(parent, separator, &block)
|
48
|
+
@separator = separator
|
49
|
+
super(parent, &block)
|
50
|
+
end
|
51
|
+
|
52
|
+
attr_reader :separator
|
53
|
+
end
|
54
|
+
|
55
|
+
class Root < Integrating
|
56
|
+
def initialize(separator, &block)
|
57
|
+
@skip_lines = []
|
58
|
+
super(nil, separator, &block)
|
59
|
+
end
|
60
|
+
|
61
|
+
attr_reader :skip_lines
|
62
|
+
|
63
|
+
def regexp
|
64
|
+
super + "(|#{separator}.*)"
|
65
|
+
end
|
66
|
+
|
67
|
+
def names
|
68
|
+
super << :other
|
69
|
+
end
|
70
|
+
|
71
|
+
def parsers
|
72
|
+
super << lambda{ |s|
|
73
|
+
return nil if s.empty?
|
74
|
+
s.sub(Regexp.new("^#{separator}"), '')
|
75
|
+
}
|
76
|
+
end
|
77
|
+
|
78
|
+
# root specific DSL
|
79
|
+
def skip_line(regexp)
|
80
|
+
@skip_lines << regexp
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|
84
|
+
class Surrounding < ElementGroup
|
85
|
+
def initialize(parent, left, right, &block)
|
86
|
+
@left = left
|
87
|
+
@right = right
|
88
|
+
super(parent, &block)
|
89
|
+
end
|
90
|
+
|
91
|
+
def regexp
|
92
|
+
@left + super + @right
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
96
|
+
class Optional < ElementGroup
|
97
|
+
def initialize(parent, name, options = {}, &block)
|
98
|
+
@group_name = name
|
99
|
+
@nil_on = options[:nil_on]
|
100
|
+
super(parent, &block)
|
101
|
+
end
|
102
|
+
|
103
|
+
def regexp
|
104
|
+
'(' + super + '|.*?)'
|
105
|
+
end
|
106
|
+
|
107
|
+
def names
|
108
|
+
super.unshift @group_name
|
109
|
+
end
|
110
|
+
|
111
|
+
def parsers
|
112
|
+
if @nil_on
|
113
|
+
super.unshift lambda{ |s| s == @nil_on ? nil : s }
|
114
|
+
else
|
115
|
+
super.unshift lambda{ |s| s}
|
116
|
+
end
|
117
|
+
end
|
118
|
+
end
|
119
|
+
|
120
|
+
def initialize(parent, &block)
|
121
|
+
@parent = parent
|
122
|
+
instance_eval &block
|
123
|
+
end
|
124
|
+
|
125
|
+
# custom parser definition
|
126
|
+
def self.parser(name, &block)
|
127
|
+
define_method(name, &block)
|
128
|
+
end
|
129
|
+
|
130
|
+
def separator
|
131
|
+
raise ParsingError, 'Integrating ElementGroup not defined in ElementGroup hierarhy' unless @parent
|
132
|
+
@parent.separator
|
133
|
+
end
|
134
|
+
|
135
|
+
def regexp
|
136
|
+
map{|e| e.regexp}.join(separator)
|
137
|
+
end
|
138
|
+
|
139
|
+
def names
|
140
|
+
map do |e|
|
141
|
+
if e.kind_of? ElementGroup
|
142
|
+
e.names
|
143
|
+
else
|
144
|
+
e.name
|
145
|
+
end
|
146
|
+
end.flatten
|
147
|
+
end
|
148
|
+
|
149
|
+
def parsers
|
150
|
+
map do |e|
|
151
|
+
if e.kind_of? ElementGroup
|
152
|
+
e.parsers
|
153
|
+
else
|
154
|
+
e.parser
|
155
|
+
end
|
156
|
+
end.flatten
|
157
|
+
end
|
158
|
+
|
159
|
+
# core DSL
|
160
|
+
def integratin_group(separator, &block)
|
161
|
+
push ElementGroup::Integrating.new(self, separator, &block)
|
162
|
+
end
|
163
|
+
|
164
|
+
def surrounding_group(left, right, &block)
|
165
|
+
push ElementGroup::Surrounding.new(self, left, right, &block)
|
166
|
+
end
|
167
|
+
|
168
|
+
def optional(name, options = {}, &block)
|
169
|
+
push ElementGroup::Optional.new(self, name, options, &block)
|
170
|
+
end
|
171
|
+
|
172
|
+
def element(name, regexp, options = {}, &parser)
|
173
|
+
nil_on = options[:nil_on]
|
174
|
+
process = options[:process]
|
175
|
+
if process
|
176
|
+
p = lambda{|s| process.call(parser.call(s))}
|
177
|
+
else
|
178
|
+
p = parser
|
179
|
+
end
|
180
|
+
push Element.new(name, regexp, nil_on, &p)
|
181
|
+
end
|
182
|
+
|
183
|
+
# DSL
|
184
|
+
def separated_with(separator, &block)
|
185
|
+
integratin_group(separator, &block)
|
186
|
+
end
|
187
|
+
|
188
|
+
def surrounded_by(left, right, &block)
|
189
|
+
surrounding_group(left, right, &block)
|
190
|
+
end
|
191
|
+
|
192
|
+
def single_quoted(&block)
|
193
|
+
surrounded_by("'", "'", &block)
|
194
|
+
end
|
195
|
+
|
196
|
+
def double_quoted(&block)
|
197
|
+
surrounded_by('"', '"', &block)
|
198
|
+
end
|
199
|
+
|
200
|
+
def date_ncsa(name, options = {})
|
201
|
+
date(name, '%d/%b/%Y:%H:%M:%S %z', options)
|
202
|
+
end
|
203
|
+
|
204
|
+
def date_iis(name, options = {})
|
205
|
+
date(name, '%Y-%m-%d %H:%M:%S', options)
|
206
|
+
end
|
207
|
+
|
208
|
+
def date(name, format = '%d/%b/%Y:%H:%M:%S %z', options = {})
|
209
|
+
regex = Regexp.escape(format).gsub(/%./, '.+').gsub(/\//, '\\/') + '?'
|
210
|
+
element(name, regex, options) do |match|
|
211
|
+
DateTime.strptime(match, format).new_offset(0).instance_eval do
|
212
|
+
Time.utc(year, mon, mday, hour, min, sec + sec_fraction)
|
213
|
+
end
|
214
|
+
end
|
215
|
+
end
|
216
|
+
|
217
|
+
def ip(name, options = {})
|
218
|
+
greedy = true
|
219
|
+
greedy = options[:greedy] if options.member? :greedy
|
220
|
+
element(name, ".*#{greedy ? '?' : ''}", options){|s| IP.new(s)}
|
221
|
+
end
|
222
|
+
|
223
|
+
def integer(name, options = {})
|
224
|
+
element(name, '[\+|-]?\d+', options){|s| s.to_i}
|
225
|
+
end
|
226
|
+
|
227
|
+
def float(name, options = {})
|
228
|
+
element(name, '[\+|-]?\d+\.?\d*', options){|s| s.to_f}
|
229
|
+
end
|
230
|
+
|
231
|
+
def string(name, options = {})
|
232
|
+
greedy = true
|
233
|
+
greedy = options[:greedy] if options.member? :greedy
|
234
|
+
element(name, ".*#{greedy ? '?' : ''}", options){|s| s}
|
235
|
+
end
|
236
|
+
end
|
237
|
+
|
238
|
+
class EntryIterator
|
239
|
+
class Stats < Struct.new(:failures, :successes)
|
240
|
+
end
|
241
|
+
|
242
|
+
def initialize(parser, io, close_io)
|
243
|
+
@parser = parser
|
244
|
+
@io = io
|
245
|
+
@close_io = close_io
|
246
|
+
end
|
247
|
+
|
248
|
+
def each
|
249
|
+
failures = 0
|
250
|
+
successes = 0
|
251
|
+
|
252
|
+
@io.each_line do |line|
|
253
|
+
begin
|
254
|
+
line.strip!
|
255
|
+
next if @parser.skip?(line)
|
256
|
+
yield @parser.parse(line.strip)
|
257
|
+
successes += 1
|
258
|
+
rescue ParsingError
|
259
|
+
failures += 1
|
260
|
+
end
|
261
|
+
end
|
262
|
+
@io.close if @close_io
|
263
|
+
Stats.new(failures, successes)
|
264
|
+
end
|
265
|
+
|
266
|
+
def each!
|
267
|
+
begin
|
268
|
+
@io.each_line do |line|
|
269
|
+
line.strip!
|
270
|
+
next if @parser.skip?(line)
|
271
|
+
yield @parser.parse(line.strip)
|
272
|
+
end
|
273
|
+
ensure
|
274
|
+
@io.close if @close_io
|
275
|
+
end
|
276
|
+
end
|
277
|
+
|
278
|
+
def each_parsed!
|
279
|
+
begin
|
280
|
+
@io.each_line do |line|
|
281
|
+
line.strip!
|
282
|
+
next if @parser.skip?(line)
|
283
|
+
yield @parser.parse(line.strip).parse!
|
284
|
+
end
|
285
|
+
ensure
|
286
|
+
@io.close if @close_io
|
287
|
+
end
|
288
|
+
end
|
289
|
+
|
290
|
+
def close
|
291
|
+
@io.close
|
292
|
+
end
|
293
|
+
end
|
294
|
+
|
295
|
+
# just so parsed log line class can be tested and named
|
296
|
+
class ParsedLogLine
|
297
|
+
end
|
298
|
+
|
299
|
+
def initialize(&block)
|
300
|
+
@@parser_id ||= 0
|
301
|
+
@@parser_id += 1
|
302
|
+
|
303
|
+
@elements = ElementGroup::Root.new(' ', &block)
|
304
|
+
|
305
|
+
@skip_lines = @elements.skip_lines.map{|s| Regexp.new(s)}
|
306
|
+
@regexp = Regexp.new('^' + @elements.regexp + '$')
|
307
|
+
|
308
|
+
@names = @elements.names
|
309
|
+
|
310
|
+
@parsers = {}
|
311
|
+
@names.zip(@elements.parsers).each do |name, parser|
|
312
|
+
@parsers[name] = parser
|
313
|
+
end
|
314
|
+
|
315
|
+
@parsed_log_entry_class = Class.new(ParsedLogLine) do
|
316
|
+
def self.name
|
317
|
+
superclass.name
|
318
|
+
end
|
319
|
+
|
320
|
+
def self.make_metods(names)
|
321
|
+
names.each do |name|
|
322
|
+
class_eval """
|
323
|
+
def #{name}
|
324
|
+
return @cache[:#{name}] if @cache.member? :#{name}
|
325
|
+
begin
|
326
|
+
value = @parsers[:#{name}].call(@strings[:#{name}])
|
327
|
+
rescue => e
|
328
|
+
raise ElementParsingError.new(e)
|
329
|
+
end
|
330
|
+
@cache[:#{name}] = value
|
331
|
+
value
|
332
|
+
end
|
333
|
+
"""
|
334
|
+
end
|
335
|
+
end
|
336
|
+
|
337
|
+
def initialize(names, parsers, strings)
|
338
|
+
@parsers = parsers
|
339
|
+
|
340
|
+
@strings = {}
|
341
|
+
names.zip(strings).each do |name, string|
|
342
|
+
@strings[name] = string
|
343
|
+
end
|
344
|
+
|
345
|
+
@cache = {}
|
346
|
+
end
|
347
|
+
|
348
|
+
def parse!
|
349
|
+
@strings.keys.each do |name|
|
350
|
+
send(name)
|
351
|
+
end
|
352
|
+
self
|
353
|
+
end
|
354
|
+
|
355
|
+
def to_hash
|
356
|
+
parse!
|
357
|
+
@cache
|
358
|
+
end
|
359
|
+
|
360
|
+
def inspect
|
361
|
+
hash = @cache.dup
|
362
|
+
@strings.keys.each do |name|
|
363
|
+
hash[name] = '<unparsed>' unless hash.member? name
|
364
|
+
end
|
365
|
+
"#<#{self.class.name}: #{hash.keys.map{|s| s.to_s}.sort.map{|name| "#{name}: #{hash[name.to_sym].inspect}"}.join(', ')}>"
|
366
|
+
end
|
367
|
+
|
368
|
+
def to_s
|
369
|
+
"#<#{self.class.name}:#{object_id}>"
|
370
|
+
end
|
371
|
+
end
|
372
|
+
|
373
|
+
@parsed_log_entry_class.make_metods(@names)
|
374
|
+
end
|
375
|
+
|
376
|
+
# custom parser definition
|
377
|
+
def self.parser(name, &block)
|
378
|
+
ElementGroup.parser(name, &block)
|
379
|
+
|
380
|
+
eval """
|
381
|
+
def self.#{name}
|
382
|
+
self.new{ #{name} }
|
383
|
+
end
|
384
|
+
"""
|
385
|
+
end
|
386
|
+
|
387
|
+
def skip?(line)
|
388
|
+
@skip_lines.each do |regexp|
|
389
|
+
return true if line =~ regexp
|
390
|
+
end
|
391
|
+
return false
|
392
|
+
end
|
393
|
+
|
394
|
+
def parse(line)
|
395
|
+
matched, *strings = @regexp.match(line).to_a
|
396
|
+
|
397
|
+
raise ParsingError.new('parser regexp did not match log line', self, line) if strings.empty?
|
398
|
+
|
399
|
+
@parsed_log_entry_class.new(@names, @parsers, strings)
|
400
|
+
end
|
401
|
+
|
402
|
+
def parse_io(io, close_io = false)
|
403
|
+
EntryIterator.new(self, io, close_io)
|
404
|
+
end
|
405
|
+
|
406
|
+
def parse_file(file_path)
|
407
|
+
io = File.open(file_path)
|
408
|
+
# io will be closed after each
|
409
|
+
parse_io(io, true)
|
410
|
+
end
|
411
|
+
|
412
|
+
def inspect
|
413
|
+
"#<#{self.class.name}:#{@regexp.inspect} => #{@elements.names.join(' ')}>"
|
414
|
+
end
|
415
|
+
end
|
416
|
+
|
417
|
+
require 'common_parsers'
|
418
|
+
|