universal-access-log-parser 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
data/.document ADDED
@@ -0,0 +1,5 @@
1
+ lib/**/*.rb
2
+ bin/*
3
+ -
4
+ features/**/*.feature
5
+ LICENSE.txt
data/.rspec ADDED
@@ -0,0 +1 @@
1
+ --color
data/Gemfile ADDED
@@ -0,0 +1,13 @@
1
+ source "http://rubygems.org"
2
+
3
+ gem "ruby-ip", ">= 0"
4
+
5
+ # Add dependencies to develop your gem here.
6
+ # Include everything needed to run rake, tests, features, etc.
7
+ group :development do
8
+ gem "rspec", "~> 2.3.0"
9
+ gem "bundler", "~> 1.0.0"
10
+ gem "jeweler", "~> 1.6.4"
11
+ gem "rcov", ">= 0"
12
+ end
13
+
data/Gemfile.lock ADDED
@@ -0,0 +1,30 @@
1
+ GEM
2
+ remote: http://rubygems.org/
3
+ specs:
4
+ diff-lcs (1.1.3)
5
+ git (1.2.5)
6
+ jeweler (1.6.4)
7
+ bundler (~> 1.0)
8
+ git (>= 1.2.5)
9
+ rake
10
+ rake (0.9.2)
11
+ rcov (0.9.10)
12
+ rspec (2.3.0)
13
+ rspec-core (~> 2.3.0)
14
+ rspec-expectations (~> 2.3.0)
15
+ rspec-mocks (~> 2.3.0)
16
+ rspec-core (2.3.1)
17
+ rspec-expectations (2.3.0)
18
+ diff-lcs (~> 1.1.2)
19
+ rspec-mocks (2.3.0)
20
+ ruby-ip (0.9.0)
21
+
22
+ PLATFORMS
23
+ ruby
24
+
25
+ DEPENDENCIES
26
+ bundler (~> 1.0.0)
27
+ jeweler (~> 1.6.4)
28
+ rcov
29
+ rspec (~> 2.3.0)
30
+ ruby-ip
data/LICENSE.txt ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2011 Jakub Pastuszek
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.rdoc ADDED
@@ -0,0 +1,98 @@
1
+ = universal-access-log-parser
2
+
3
+ == Installation
4
+
5
+ gem install universal-access-log-parser
6
+
7
+ == Usage
8
+
9
+ require 'universal-access-log-parser'
10
+
11
+ # use predefined parser
12
+ parser = UniversalAccessLogParser.apache_combined
13
+
14
+ # or extend it inline
15
+ parser = UniversalAccessLogParser.new do
16
+ # reuse predefined element set
17
+ apache_combined
18
+
19
+ # add your own
20
+ string :varnish
21
+ string :varnish_status, :nil_on => '-'
22
+ string :initial_varnish_status, :nil_on => '-'
23
+ integer :cache_hits
24
+ integer :cache_ttl, :nil_on => '-'
25
+ integer :cache_age
26
+ end
27
+
28
+ # or define new parser
29
+ UniversalAccessLogParser.parser(:iis) do
30
+ skip_line '^#'
31
+ date_iis :time
32
+ ip :server_ip
33
+ string :method
34
+ string :url
35
+ string :query, :nil_on => '-'
36
+ integer :port
37
+ string :username, :nil_on => '-'
38
+ ip :client_ip
39
+ string :user_agent, :nil_on => '-', :process => lambda{|s| s.tr('+', ' ')}
40
+ integer :status
41
+ integer :substatus
42
+ integer :win32_status
43
+ integer :duration, :process => lambda{|i| i.to_f / 1000}
44
+ end
45
+ parser = UniversalAccessLogParser.iis
46
+
47
+ # and iterate entries with #each - won't raise errors
48
+ stats = parser.parse_file('access.log').each |entry|
49
+ puts entry.time
50
+ puts entry.cache_age
51
+ end
52
+
53
+ # and get parsing stats
54
+ puts stats.failures
55
+ puts stats.successes
56
+
57
+ # or wait for exception with #each!
58
+ parser.parse_file('access.log').each! |entry|
59
+ puts entry.time
60
+ puts entry.cache_age
61
+ end # will raise UniversalAccessLogParser::ParsingError on line parsing error
62
+
63
+ # data elements wont be parsed until accessed, so if you are not interested in some elements you won't waste time
64
+ stats = parser.parse_file('access.log').each |entry|
65
+ # entry.time not parsed yet - Time object is not created
66
+ puts entry.time # this will parse time and create Time object - this may raise UniversalAccessLogParser::ElementParsingError!
67
+ puts entry.time # now Time object is returned from cache
68
+ puts entry.cache_age
69
+
70
+ # parse all elements
71
+ entry.parse!
72
+
73
+ # this will also parse all elements and return hash map of them
74
+ entry.to_hash
75
+ end
76
+
77
+ # iterate and parse all data with #each_parsed! - if this won't raise, all log lines are parsing fine including elements
78
+ parser.parse_file('access.log').each_parsed! |entry|
79
+ puts entry.time # already in cache
80
+ puts entry.cache_age # already in cache
81
+ end # will raise on line and element parsing error - try rescuing UniversalAccessLogParser::ParserError to catch both
82
+
83
+ == Contributing to universal-access-log-parser
84
+
85
+ * Please add more common parsers to lib/common_parsers.rb and rspec in spec/common_parsers_spec.rb or send me a gist
86
+ * Check out the latest master to make sure the feature hasn't been implemented or the bug hasn't been fixed yet
87
+ * Check out the issue tracker to make sure someone already hasn't requested it and/or contributed it
88
+ * Fork the project
89
+ * Start a feature/bugfix branch
90
+ * Commit and push until you are happy with your contribution
91
+ * Make sure to add tests for it. This is important so I don't break it in a future version unintentionally.
92
+ * Please try not to mess with the Rakefile, version, or history. If you want to have your own version, or is otherwise necessary, that is fine, but please isolate to its own commit so I can cherry-pick around it.
93
+
94
+ == Copyright
95
+
96
+ Copyright (c) 2011 Jakub Pastuszek. See LICENSE.txt for
97
+ further details.
98
+
data/Rakefile ADDED
@@ -0,0 +1,49 @@
1
+ # encoding: utf-8
2
+
3
+ require 'rubygems'
4
+ require 'bundler'
5
+ begin
6
+ Bundler.setup(:default, :development)
7
+ rescue Bundler::BundlerError => e
8
+ $stderr.puts e.message
9
+ $stderr.puts "Run `bundle install` to install missing gems"
10
+ exit e.status_code
11
+ end
12
+ require 'rake'
13
+
14
+ require 'jeweler'
15
+ Jeweler::Tasks.new do |gem|
16
+ # gem is a Gem::Specification... see http://docs.rubygems.org/read/chapter/20 for more options
17
+ gem.name = "universal-access-log-parser"
18
+ gem.homepage = "http://github.com/jpastuszek/universal-access-log-parser"
19
+ gem.license = "MIT"
20
+ gem.summary = 'Define your own access log parser of reuse/extend predefined common parsers'
21
+ gem.description = 'Meta parser that allows you to define new parser with ruby DSL to match access log file format you are willing to parse or use one of the predefined parsers like Apache common, IIS and more'
22
+ gem.email = "jpastuszek@gmail.com"
23
+ gem.authors = ["Jakub Pastuszek"]
24
+ # dependencies defined in Gemfile
25
+ end
26
+ Jeweler::RubygemsDotOrgTasks.new
27
+
28
+ require 'rspec/core'
29
+ require 'rspec/core/rake_task'
30
+ RSpec::Core::RakeTask.new(:spec) do |spec|
31
+ spec.pattern = FileList['spec/**/*_spec.rb']
32
+ end
33
+
34
+ RSpec::Core::RakeTask.new(:rcov) do |spec|
35
+ spec.pattern = 'spec/**/*_spec.rb'
36
+ spec.rcov = true
37
+ end
38
+
39
+ task :default => :spec
40
+
41
+ require 'rake/rdoctask'
42
+ Rake::RDocTask.new do |rdoc|
43
+ version = File.exist?('VERSION') ? File.read('VERSION') : ""
44
+
45
+ rdoc.rdoc_dir = 'rdoc'
46
+ rdoc.title = "universal-access-log-parser #{version}"
47
+ rdoc.rdoc_files.include('README*')
48
+ rdoc.rdoc_files.include('lib/**/*.rb')
49
+ end
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 1.0.0
@@ -0,0 +1,66 @@
1
+ UniversalAccessLogParser.parser(:apache_common) do
2
+ ip :remote_host
3
+ string :logname, :nil_on => '-'
4
+ string :user, :nil_on => '-'
5
+ surrounded_by '\[', '\]' do
6
+ date_ncsa :time
7
+ end
8
+ double_quoted do
9
+ optional :first_request_line do
10
+ string :method, :nil_on => ''
11
+ string :uri, :nil_on => ''
12
+ string :protocol, :nil_on => ''
13
+ end
14
+ end
15
+ integer :status
16
+ integer :response_size, :nil_on => '-'
17
+ end
18
+
19
+ UniversalAccessLogParser.parser(:apache_vhost_common) do
20
+ string :vhost
21
+ apache_common
22
+ end
23
+
24
+ UniversalAccessLogParser.parser(:apache_combined) do
25
+ apache_common
26
+ double_quoted do
27
+ string :referer, :nil_on => '-'
28
+ end
29
+ double_quoted do
30
+ string :user_agent, :nil_on => '-'
31
+ end
32
+ end
33
+
34
+ UniversalAccessLogParser.parser(:apache_referer) do
35
+ separated_with ' -> ' do
36
+ string :referer, :nil_on => '-'
37
+ string :url
38
+ end
39
+ end
40
+
41
+ UniversalAccessLogParser.parser(:apache_user_agent) do
42
+ string :user_agent, :nil_on => '-', :greedy => false
43
+ end
44
+
45
+ UniversalAccessLogParser.parser(:icecast) do
46
+ apache_combined
47
+ integer :duration, :nil_on => '-'
48
+ end
49
+
50
+ UniversalAccessLogParser.parser(:iis) do
51
+ skip_line '^#'
52
+ date_iis :time
53
+ ip :server_ip
54
+ string :method
55
+ string :url
56
+ string :query, :nil_on => '-'
57
+ integer :port
58
+ string :username, :nil_on => '-'
59
+ ip :client_ip
60
+ string :user_agent, :nil_on => '-', :process => lambda{|s| s.tr('+', ' ')}
61
+ integer :status
62
+ integer :substatus
63
+ integer :win32_status
64
+ integer :duration, :process => lambda{|i| i.to_f / 1000}
65
+ end
66
+
@@ -0,0 +1,418 @@
1
+ require 'ip'
2
+
3
+ class UniversalAccessLogParser
4
+ class ParserError < ArgumentError
5
+ end
6
+
7
+ class ParsingError < ParserError
8
+ def initialize(msg, parser, line)
9
+ @parser = parser
10
+ @line = line
11
+ super(msg)
12
+ end
13
+
14
+ attr_reader :parser, :line
15
+ end
16
+
17
+ class ElementParsingError < ParserError
18
+ def initialize(e)
19
+ @error = e
20
+ super("argument parsing error: #{e}")
21
+ end
22
+
23
+ attr_reader :error
24
+ end
25
+
26
+ class ElementGroup < Array
27
+ class Element
28
+ def initialize(name, regexp, nil_on = nil)
29
+ @name = name
30
+ @regexp = regexp
31
+ @nil_on = nil_on
32
+ @parser = lambda{|s|
33
+ return nil if @nil_on and s == @nil_on
34
+ yield s if block_given?
35
+ }
36
+ end
37
+
38
+ attr_reader :name, :parser
39
+
40
+ def regexp
41
+ return "(#{@nil_on}|#{@regexp})" if @nil_on
42
+ "(#{@regexp})"
43
+ end
44
+ end
45
+
46
+ class Integrating < ElementGroup
47
+ def initialize(parent, separator, &block)
48
+ @separator = separator
49
+ super(parent, &block)
50
+ end
51
+
52
+ attr_reader :separator
53
+ end
54
+
55
+ class Root < Integrating
56
+ def initialize(separator, &block)
57
+ @skip_lines = []
58
+ super(nil, separator, &block)
59
+ end
60
+
61
+ attr_reader :skip_lines
62
+
63
+ def regexp
64
+ super + "(|#{separator}.*)"
65
+ end
66
+
67
+ def names
68
+ super << :other
69
+ end
70
+
71
+ def parsers
72
+ super << lambda{ |s|
73
+ return nil if s.empty?
74
+ s.sub(Regexp.new("^#{separator}"), '')
75
+ }
76
+ end
77
+
78
+ # root specific DSL
79
+ def skip_line(regexp)
80
+ @skip_lines << regexp
81
+ end
82
+ end
83
+
84
+ class Surrounding < ElementGroup
85
+ def initialize(parent, left, right, &block)
86
+ @left = left
87
+ @right = right
88
+ super(parent, &block)
89
+ end
90
+
91
+ def regexp
92
+ @left + super + @right
93
+ end
94
+ end
95
+
96
+ class Optional < ElementGroup
97
+ def initialize(parent, name, options = {}, &block)
98
+ @group_name = name
99
+ @nil_on = options[:nil_on]
100
+ super(parent, &block)
101
+ end
102
+
103
+ def regexp
104
+ '(' + super + '|.*?)'
105
+ end
106
+
107
+ def names
108
+ super.unshift @group_name
109
+ end
110
+
111
+ def parsers
112
+ if @nil_on
113
+ super.unshift lambda{ |s| s == @nil_on ? nil : s }
114
+ else
115
+ super.unshift lambda{ |s| s}
116
+ end
117
+ end
118
+ end
119
+
120
+ def initialize(parent, &block)
121
+ @parent = parent
122
+ instance_eval &block
123
+ end
124
+
125
+ # custom parser definition
126
+ def self.parser(name, &block)
127
+ define_method(name, &block)
128
+ end
129
+
130
+ def separator
131
+ raise ParsingError, 'Integrating ElementGroup not defined in ElementGroup hierarhy' unless @parent
132
+ @parent.separator
133
+ end
134
+
135
+ def regexp
136
+ map{|e| e.regexp}.join(separator)
137
+ end
138
+
139
+ def names
140
+ map do |e|
141
+ if e.kind_of? ElementGroup
142
+ e.names
143
+ else
144
+ e.name
145
+ end
146
+ end.flatten
147
+ end
148
+
149
+ def parsers
150
+ map do |e|
151
+ if e.kind_of? ElementGroup
152
+ e.parsers
153
+ else
154
+ e.parser
155
+ end
156
+ end.flatten
157
+ end
158
+
159
+ # core DSL
160
+ def integratin_group(separator, &block)
161
+ push ElementGroup::Integrating.new(self, separator, &block)
162
+ end
163
+
164
+ def surrounding_group(left, right, &block)
165
+ push ElementGroup::Surrounding.new(self, left, right, &block)
166
+ end
167
+
168
+ def optional(name, options = {}, &block)
169
+ push ElementGroup::Optional.new(self, name, options, &block)
170
+ end
171
+
172
+ def element(name, regexp, options = {}, &parser)
173
+ nil_on = options[:nil_on]
174
+ process = options[:process]
175
+ if process
176
+ p = lambda{|s| process.call(parser.call(s))}
177
+ else
178
+ p = parser
179
+ end
180
+ push Element.new(name, regexp, nil_on, &p)
181
+ end
182
+
183
+ # DSL
184
+ def separated_with(separator, &block)
185
+ integratin_group(separator, &block)
186
+ end
187
+
188
+ def surrounded_by(left, right, &block)
189
+ surrounding_group(left, right, &block)
190
+ end
191
+
192
+ def single_quoted(&block)
193
+ surrounded_by("'", "'", &block)
194
+ end
195
+
196
+ def double_quoted(&block)
197
+ surrounded_by('"', '"', &block)
198
+ end
199
+
200
+ def date_ncsa(name, options = {})
201
+ date(name, '%d/%b/%Y:%H:%M:%S %z', options)
202
+ end
203
+
204
+ def date_iis(name, options = {})
205
+ date(name, '%Y-%m-%d %H:%M:%S', options)
206
+ end
207
+
208
+ def date(name, format = '%d/%b/%Y:%H:%M:%S %z', options = {})
209
+ regex = Regexp.escape(format).gsub(/%./, '.+').gsub(/\//, '\\/') + '?'
210
+ element(name, regex, options) do |match|
211
+ DateTime.strptime(match, format).new_offset(0).instance_eval do
212
+ Time.utc(year, mon, mday, hour, min, sec + sec_fraction)
213
+ end
214
+ end
215
+ end
216
+
217
+ def ip(name, options = {})
218
+ greedy = true
219
+ greedy = options[:greedy] if options.member? :greedy
220
+ element(name, ".*#{greedy ? '?' : ''}", options){|s| IP.new(s)}
221
+ end
222
+
223
+ def integer(name, options = {})
224
+ element(name, '[\+|-]?\d+', options){|s| s.to_i}
225
+ end
226
+
227
+ def float(name, options = {})
228
+ element(name, '[\+|-]?\d+\.?\d*', options){|s| s.to_f}
229
+ end
230
+
231
+ def string(name, options = {})
232
+ greedy = true
233
+ greedy = options[:greedy] if options.member? :greedy
234
+ element(name, ".*#{greedy ? '?' : ''}", options){|s| s}
235
+ end
236
+ end
237
+
238
+ class EntryIterator
239
+ class Stats < Struct.new(:failures, :successes)
240
+ end
241
+
242
+ def initialize(parser, io, close_io)
243
+ @parser = parser
244
+ @io = io
245
+ @close_io = close_io
246
+ end
247
+
248
+ def each
249
+ failures = 0
250
+ successes = 0
251
+
252
+ @io.each_line do |line|
253
+ begin
254
+ line.strip!
255
+ next if @parser.skip?(line)
256
+ yield @parser.parse(line.strip)
257
+ successes += 1
258
+ rescue ParsingError
259
+ failures += 1
260
+ end
261
+ end
262
+ @io.close if @close_io
263
+ Stats.new(failures, successes)
264
+ end
265
+
266
+ def each!
267
+ begin
268
+ @io.each_line do |line|
269
+ line.strip!
270
+ next if @parser.skip?(line)
271
+ yield @parser.parse(line.strip)
272
+ end
273
+ ensure
274
+ @io.close if @close_io
275
+ end
276
+ end
277
+
278
+ def each_parsed!
279
+ begin
280
+ @io.each_line do |line|
281
+ line.strip!
282
+ next if @parser.skip?(line)
283
+ yield @parser.parse(line.strip).parse!
284
+ end
285
+ ensure
286
+ @io.close if @close_io
287
+ end
288
+ end
289
+
290
+ def close
291
+ @io.close
292
+ end
293
+ end
294
+
295
+ # just so parsed log line class can be tested and named
296
+ class ParsedLogLine
297
+ end
298
+
299
+ def initialize(&block)
300
+ @@parser_id ||= 0
301
+ @@parser_id += 1
302
+
303
+ @elements = ElementGroup::Root.new(' ', &block)
304
+
305
+ @skip_lines = @elements.skip_lines.map{|s| Regexp.new(s)}
306
+ @regexp = Regexp.new('^' + @elements.regexp + '$')
307
+
308
+ @names = @elements.names
309
+
310
+ @parsers = {}
311
+ @names.zip(@elements.parsers).each do |name, parser|
312
+ @parsers[name] = parser
313
+ end
314
+
315
+ @parsed_log_entry_class = Class.new(ParsedLogLine) do
316
+ def self.name
317
+ superclass.name
318
+ end
319
+
320
+ def self.make_metods(names)
321
+ names.each do |name|
322
+ class_eval """
323
+ def #{name}
324
+ return @cache[:#{name}] if @cache.member? :#{name}
325
+ begin
326
+ value = @parsers[:#{name}].call(@strings[:#{name}])
327
+ rescue => e
328
+ raise ElementParsingError.new(e)
329
+ end
330
+ @cache[:#{name}] = value
331
+ value
332
+ end
333
+ """
334
+ end
335
+ end
336
+
337
+ def initialize(names, parsers, strings)
338
+ @parsers = parsers
339
+
340
+ @strings = {}
341
+ names.zip(strings).each do |name, string|
342
+ @strings[name] = string
343
+ end
344
+
345
+ @cache = {}
346
+ end
347
+
348
+ def parse!
349
+ @strings.keys.each do |name|
350
+ send(name)
351
+ end
352
+ self
353
+ end
354
+
355
+ def to_hash
356
+ parse!
357
+ @cache
358
+ end
359
+
360
+ def inspect
361
+ hash = @cache.dup
362
+ @strings.keys.each do |name|
363
+ hash[name] = '<unparsed>' unless hash.member? name
364
+ end
365
+ "#<#{self.class.name}: #{hash.keys.map{|s| s.to_s}.sort.map{|name| "#{name}: #{hash[name.to_sym].inspect}"}.join(', ')}>"
366
+ end
367
+
368
+ def to_s
369
+ "#<#{self.class.name}:#{object_id}>"
370
+ end
371
+ end
372
+
373
+ @parsed_log_entry_class.make_metods(@names)
374
+ end
375
+
376
+ # custom parser definition
377
+ def self.parser(name, &block)
378
+ ElementGroup.parser(name, &block)
379
+
380
+ eval """
381
+ def self.#{name}
382
+ self.new{ #{name} }
383
+ end
384
+ """
385
+ end
386
+
387
+ def skip?(line)
388
+ @skip_lines.each do |regexp|
389
+ return true if line =~ regexp
390
+ end
391
+ return false
392
+ end
393
+
394
+ def parse(line)
395
+ matched, *strings = @regexp.match(line).to_a
396
+
397
+ raise ParsingError.new('parser regexp did not match log line', self, line) if strings.empty?
398
+
399
+ @parsed_log_entry_class.new(@names, @parsers, strings)
400
+ end
401
+
402
+ def parse_io(io, close_io = false)
403
+ EntryIterator.new(self, io, close_io)
404
+ end
405
+
406
+ def parse_file(file_path)
407
+ io = File.open(file_path)
408
+ # io will be closed after each
409
+ parse_io(io, true)
410
+ end
411
+
412
+ def inspect
413
+ "#<#{self.class.name}:#{@regexp.inspect} => #{@elements.names.join(' ')}>"
414
+ end
415
+ end
416
+
417
+ require 'common_parsers'
418
+