apachelogregex 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG.rdoc +5 -0
- data/LICENSE.rdoc +25 -0
- data/Manifest +12 -0
- data/README.rdoc +97 -0
- data/Rakefile +56 -0
- data/apachelogregex.gemspec +38 -0
- data/lib/apache_log_regex.rb +174 -0
- data/lib/apache_log_regex/version.rb +30 -0
- data/lib/apachelogregex.rb +16 -0
- data/setup.rb +1585 -0
- data/test/apache_log_regex_test.rb +110 -0
- data/test/test_all.rb +18 -0
- data/test/test_helper.rb +24 -0
- metadata +93 -0
data/CHANGELOG.rdoc
ADDED
data/LICENSE.rdoc
ADDED
@@ -0,0 +1,25 @@
|
|
1
|
+
= License
|
2
|
+
|
3
|
+
(The MIT License)
|
4
|
+
|
5
|
+
Copyright (c) 2008-2009 Simone Carletti <weppos@weppos.net>
|
6
|
+
|
7
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
8
|
+
a copy of this software and associated documentation files (the
|
9
|
+
"Software"), to deal in the Software without restriction, including
|
10
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
11
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
12
|
+
permit persons to whom the Software is furnished to do so, subject to
|
13
|
+
the following conditions:
|
14
|
+
|
15
|
+
The above copyright notice and this permission notice shall be
|
16
|
+
included in all copies or substantial portions of the Software.
|
17
|
+
|
18
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
19
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
20
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
21
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
22
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
23
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
24
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
25
|
+
|
data/Manifest
ADDED
data/README.rdoc
ADDED
@@ -0,0 +1,97 @@
|
|
1
|
+
= Apache Log Regex
|
2
|
+
|
3
|
+
Apache Log Regex is a Ruby port of Peter Hickman's Apache::LogRegex 1.4 Perl module,
|
4
|
+
available at http://cpan.uwinnipeg.ca/~peterhi/Apache-LogRegex.
|
5
|
+
It provides functionalities to parse a line from an Apache log file into a hash.
|
6
|
+
|
7
|
+
|
8
|
+
== Dependencies
|
9
|
+
|
10
|
+
* Ruby >= 1.8.6 (not tested with previous versions)
|
11
|
+
|
12
|
+
ApacheLogRegex is compatible with Ruby 1.9.1.
|
13
|
+
|
14
|
+
|
15
|
+
== Overview
|
16
|
+
|
17
|
+
Apache Log Regex is designed to be a simple class to parse Apache log files.
|
18
|
+
|
19
|
+
It takes an Apache logging format and generates a regular expression which is used to parse a line from a log file and returns a Hash with keys corresponding to the fields defined in the log format.
|
20
|
+
The log format should match the one defined in your Apache configuration file with the LogFormat directive.
|
21
|
+
|
22
|
+
|
23
|
+
== Example Usage
|
24
|
+
|
25
|
+
The following one is the most simple example usage.
|
26
|
+
It tries to parse the `access.log` file and echoes each parsed line.
|
27
|
+
|
28
|
+
format = '%h %l %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-Agent}i\"'
|
29
|
+
parser = ApacheLogRegex.new(format)
|
30
|
+
|
31
|
+
File.foreach('/var/apache/access.log') do |line|
|
32
|
+
begin
|
33
|
+
parser.parse(line)
|
34
|
+
# {"%r"=>"GET /blog/index.xml HTTP/1.1", "%h"=>"87.18.183.252", ... }
|
35
|
+
rescue ApacheLogRegex::ParseError => e
|
36
|
+
puts "Error parsing log file: " + e.message
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
More often, you might want to collect parsed lines and use them later in your program.
|
41
|
+
The following example iterates all log lines, parses them and returns an array of Hash with the results.
|
42
|
+
|
43
|
+
format = '%h %l %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-Agent}i\"'
|
44
|
+
parser = ApacheLogRegex.new(format)
|
45
|
+
|
46
|
+
File.readlines('/var/apache/access.log').collect do |line|
|
47
|
+
begin
|
48
|
+
parser.parse(line)
|
49
|
+
# {"%r"=>"GET /blog/index.xml HTTP/1.1", "%h"=>"87.18.183.252", ... }
|
50
|
+
rescue ApacheLogRegex::ParseError => e
|
51
|
+
nil
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
|
56
|
+
== Acknowledgments
|
57
|
+
|
58
|
+
This library is essentially a Ruby port of Apache::LogRegex Perl library.
|
59
|
+
A big thanks goes to Peter Hickman, the author of the original package. This Ruby GEM would probably not exists without your contribution!
|
60
|
+
|
61
|
+
I also want to thank you Harry Fuecks and Hamish Morgan,
|
62
|
+
the authors of the Python[http://code.google.com/p/apachelog/] and PHP[http://kitty0.org/] ports.
|
63
|
+
First of all they gave me the inspiration to translate the original Perl code to Ruby,
|
64
|
+
as soon as I discovered nobody did it before me.
|
65
|
+
Last but not least, their library have been extremely useful to help me understand some obscure Perl statements
|
66
|
+
from the original package.
|
67
|
+
|
68
|
+
|
69
|
+
== Author
|
70
|
+
|
71
|
+
{Simone Carletti}[http://www.simonecarletti.com/] <weppos@weppos.net>
|
72
|
+
|
73
|
+
|
74
|
+
== Resources
|
75
|
+
|
76
|
+
* {Homepage}[http://code.simonecarletti.com/apachelogregex]
|
77
|
+
* {API}[http://apachelogregex.rubyforge.org/]
|
78
|
+
* {GitHub}[http://github.com/weppos/apachelogregex/]
|
79
|
+
* {RubyForge}[http://rubyforge.org/projects/apachelogregex/]
|
80
|
+
|
81
|
+
|
82
|
+
== FeedBack and Bug reports
|
83
|
+
|
84
|
+
Feel free to email {Simone Carletti}[mailto:weppos@weppos.net] with any questions or feedback.
|
85
|
+
|
86
|
+
Please use the {Ticket System}[http://code.simonecarletti.com/projects/show/apachelogregex] to submit bug reports or feature request.
|
87
|
+
|
88
|
+
|
89
|
+
== Changelog
|
90
|
+
|
91
|
+
See the CHANGELOG.rdoc file for details.
|
92
|
+
|
93
|
+
|
94
|
+
== License
|
95
|
+
|
96
|
+
Copyright (c) 2008-2009 Simone Carletti, ApacheLogRegex is released under the MIT license.
|
97
|
+
|
data/Rakefile
ADDED
@@ -0,0 +1,56 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'rake'
|
3
|
+
|
4
|
+
gem 'echoe', '>= 3.1'
|
5
|
+
require 'echoe'
|
6
|
+
|
7
|
+
$LOAD_PATH.unshift(File.dirname(__FILE__) + "/lib")
|
8
|
+
require 'apache_log_regex'
|
9
|
+
|
10
|
+
|
11
|
+
# Common package properties
|
12
|
+
PKG_NAME = ENV['PKG_NAME'] || ApacheLogRegex::GEM
|
13
|
+
PKG_VERSION = ENV['PKG_VERSION'] || ApacheLogRegex::VERSION
|
14
|
+
PKG_SUMMARY = "Ruby parser for Apache log files based on regular expressions."
|
15
|
+
PKG_FILES = FileList.new("{lib,test}/**/*.rb") do |files|
|
16
|
+
files.include %w(README.rdoc CHANGELOG.rdoc LICENSE.rdoc)
|
17
|
+
files.include %w(Rakefile setup.rb)
|
18
|
+
end
|
19
|
+
RUBYFORGE_PROJECT = 'apachelogregex'
|
20
|
+
|
21
|
+
if ENV['SNAPSHOT'].to_i == 1
|
22
|
+
PKG_VERSION << "." << Time.now.utc.strftime("%Y%m%d%H%M%S")
|
23
|
+
end
|
24
|
+
|
25
|
+
|
26
|
+
Echoe.new(PKG_NAME, PKG_VERSION) do |p|
|
27
|
+
p.author = "Simone Carletti"
|
28
|
+
p.email = "weppos@weppos.net"
|
29
|
+
p.summary = PKG_SUMMARY
|
30
|
+
p.description = <<-EOF
|
31
|
+
Apache Log Regex is a Ruby port \
|
32
|
+
of Peter Hickman's Apache::LogRegex 1.4 Perl module. \
|
33
|
+
It provides functionalities to parse a line from an Apache log file into a hash.
|
34
|
+
EOF
|
35
|
+
p.url = "http://code.simonecarletti.com/apachelogregex"
|
36
|
+
p.project = RUBYFORGE_PROJECT
|
37
|
+
|
38
|
+
p.need_zip = true
|
39
|
+
p.rcov_options = ["--main << README.rdoc -x Rakefile -x rcov"]
|
40
|
+
p.rdoc_pattern = /^(lib|CHANGELOG.rdoc|README.rdoc)/
|
41
|
+
|
42
|
+
p.development_dependencies += ["rake >=0.8",
|
43
|
+
"echoe >=3.1"]
|
44
|
+
end
|
45
|
+
|
46
|
+
|
47
|
+
begin
|
48
|
+
require 'code_statistics'
|
49
|
+
desc "Show library's code statistics"
|
50
|
+
task :stats do
|
51
|
+
CodeStatistics.new(["ApacheLogRegex", "lib"],
|
52
|
+
["Tests", "test"]).to_s
|
53
|
+
end
|
54
|
+
rescue LoadError
|
55
|
+
puts "CodeStatistics (Rails) is not available"
|
56
|
+
end
|
@@ -0,0 +1,38 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
|
3
|
+
Gem::Specification.new do |s|
|
4
|
+
s.name = %q{apachelogregex}
|
5
|
+
s.version = "0.1.0"
|
6
|
+
|
7
|
+
s.required_rubygems_version = Gem::Requirement.new(">= 1.2") if s.respond_to? :required_rubygems_version=
|
8
|
+
s.authors = ["Simone Carletti"]
|
9
|
+
s.date = %q{2009-02-13}
|
10
|
+
s.description = %q{Apache Log Regex is a Ruby port of Peter Hickman's Apache::LogRegex 1.4 Perl module. It provides functionalities to parse a line from an Apache log file into a hash.}
|
11
|
+
s.email = %q{weppos@weppos.net}
|
12
|
+
s.extra_rdoc_files = ["CHANGELOG.rdoc", "lib/apache_log_regex/version.rb", "lib/apache_log_regex.rb", "lib/apachelogregex.rb", "README.rdoc"]
|
13
|
+
s.files = ["CHANGELOG.rdoc", "lib/apache_log_regex/version.rb", "lib/apache_log_regex.rb", "lib/apachelogregex.rb", "LICENSE.rdoc", "Manifest", "Rakefile", "README.rdoc", "setup.rb", "test/apache_log_regex_test.rb", "test/test_all.rb", "test/test_helper.rb", "apachelogregex.gemspec"]
|
14
|
+
s.has_rdoc = true
|
15
|
+
s.homepage = %q{http://code.simonecarletti.com/apachelogregex}
|
16
|
+
s.rdoc_options = ["--line-numbers", "--inline-source", "--title", "Apachelogregex", "--main", "README.rdoc"]
|
17
|
+
s.require_paths = ["lib"]
|
18
|
+
s.rubyforge_project = %q{apachelogregex}
|
19
|
+
s.rubygems_version = %q{1.3.1}
|
20
|
+
s.summary = %q{Ruby parser for Apache log files based on regular expressions.}
|
21
|
+
s.test_files = ["test/test_all.rb"]
|
22
|
+
|
23
|
+
if s.respond_to? :specification_version then
|
24
|
+
current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
|
25
|
+
s.specification_version = 2
|
26
|
+
|
27
|
+
if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
|
28
|
+
s.add_development_dependency(%q<rake>, [">= 0.8"])
|
29
|
+
s.add_development_dependency(%q<echoe>, [">= 3.1"])
|
30
|
+
else
|
31
|
+
s.add_dependency(%q<rake>, [">= 0.8"])
|
32
|
+
s.add_dependency(%q<echoe>, [">= 3.1"])
|
33
|
+
end
|
34
|
+
else
|
35
|
+
s.add_dependency(%q<rake>, [">= 0.8"])
|
36
|
+
s.add_dependency(%q<echoe>, [">= 3.1"])
|
37
|
+
end
|
38
|
+
end
|
@@ -0,0 +1,174 @@
|
|
1
|
+
#
|
2
|
+
# = Apache Log Regex
|
3
|
+
#
|
4
|
+
# Ruby parser for Apache log files based on regular expressions.
|
5
|
+
#
|
6
|
+
# Category::
|
7
|
+
# Package:: ApacheLogRegex
|
8
|
+
# Author:: Simone Carletti <weppos@weppos.net>
|
9
|
+
# License:: MIT License
|
10
|
+
#
|
11
|
+
#--
|
12
|
+
# SVN: $Id$
|
13
|
+
#++
|
14
|
+
|
15
|
+
|
16
|
+
require 'apache_log_regex/version'
|
17
|
+
|
18
|
+
|
19
|
+
#
|
20
|
+
# = Apache Log Regex
|
21
|
+
#
|
22
|
+
# Parse a line from an Apache log file into a hash.
|
23
|
+
#
|
24
|
+
# This is a Ruby port of Peter Hickman's Apache::LogRegex 1.4 Perl module,
|
25
|
+
# available at http://cpan.uwinnipeg.ca/~peterhi/Apache-LogRegex.
|
26
|
+
#
|
27
|
+
# == Example Usage
|
28
|
+
#
|
29
|
+
# The following one is the most simple example usage.
|
30
|
+
# It tries to parse the `access.log` file and echoes each parsed line.
|
31
|
+
#
|
32
|
+
# format = '%h %l %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-Agent}i\"'
|
33
|
+
# parser = ApacheLogRegex.new(format)
|
34
|
+
#
|
35
|
+
# File.foreach('/var/apache/access.log') do |line|
|
36
|
+
# begin
|
37
|
+
# parser.parse(line)
|
38
|
+
# # {"%r"=>"GET /blog/index.xml HTTP/1.1", "%h"=>"87.18.183.252", ... }
|
39
|
+
# rescue ApacheLogRegex::ParseError => e
|
40
|
+
# puts "Error parsing log file: " + e.message
|
41
|
+
# end
|
42
|
+
# end
|
43
|
+
#
|
44
|
+
# More often, you might want to collect parsed lines and use them later in your program.
|
45
|
+
# The following example iterates all log lines, parses them and returns an array of Hash with the results.
|
46
|
+
#
|
47
|
+
# format = '%h %l %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-Agent}i\"'
|
48
|
+
# parser = ApacheLogRegex.new(format)
|
49
|
+
#
|
50
|
+
# File.readlines('/var/apache/access.log').collect do |line|
|
51
|
+
# begin
|
52
|
+
# parser.parse(line)
|
53
|
+
# # {"%r"=>"GET /blog/index.xml HTTP/1.1", "%h"=>"87.18.183.252", ... }
|
54
|
+
# rescue ApacheLogRegex::ParseError => e
|
55
|
+
# nil
|
56
|
+
# end
|
57
|
+
# end
|
58
|
+
#
|
59
|
+
class ApacheLogRegex
|
60
|
+
|
61
|
+
NAME = 'ApacheLogRegex'
|
62
|
+
GEM = 'apachelogregex'
|
63
|
+
AUTHOR = 'Simone Carletti <weppos@weppos.net>'
|
64
|
+
|
65
|
+
|
66
|
+
#
|
67
|
+
# = ParseError
|
68
|
+
#
|
69
|
+
# Raised in case the parser can't parse a log line with current +format+.
|
70
|
+
#
|
71
|
+
class ParseError < RuntimeError; end
|
72
|
+
|
73
|
+
|
74
|
+
# The normalized log file format.
|
75
|
+
# Some common formats:
|
76
|
+
#
|
77
|
+
# Common Log Format (CLF)
|
78
|
+
# '%h %l %u %t \"%r\" %>s %b'
|
79
|
+
#
|
80
|
+
# Common Log Format with Virtual Host
|
81
|
+
# '%v %h %l %u %t \"%r\" %>s %b'
|
82
|
+
#
|
83
|
+
# NCSA extended/combined log format
|
84
|
+
# '%h %l %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-agent}i\"'
|
85
|
+
#
|
86
|
+
attr_reader :format
|
87
|
+
|
88
|
+
# Regexp instance used for parsing a log line.
|
89
|
+
attr_reader :regexp
|
90
|
+
|
91
|
+
# The list of field names that extracted from log format.
|
92
|
+
attr_reader :names
|
93
|
+
|
94
|
+
|
95
|
+
# Initializes a new parser instance with given log <tt>format</tt>.
|
96
|
+
def initialize(format)
|
97
|
+
@regexp = nil
|
98
|
+
@names = []
|
99
|
+
@format = parse_format(format)
|
100
|
+
end
|
101
|
+
|
102
|
+
# Parses <tt>line</tt> according to current log <tt>format</tt>
|
103
|
+
# and returns an hash of log field => value on success.
|
104
|
+
# Returns <tt>nil</tt> if <tt>line</tt> doesn't match current log <tt>format</tt>.
|
105
|
+
def parse(line)
|
106
|
+
row = line.to_s
|
107
|
+
row.chomp!
|
108
|
+
row.strip!
|
109
|
+
return unless match = regexp.match(row)
|
110
|
+
|
111
|
+
data = {}
|
112
|
+
names.each_with_index { |field, index| data[field] = match[index + 1] } # [0] == line
|
113
|
+
data
|
114
|
+
end
|
115
|
+
|
116
|
+
# Same as <tt>ApacheLogRegex#parse</tt> but raises a <tt>ParseError</tt>
|
117
|
+
# if <tt>line</tt> doesn't match current <tt>format</tt>.
|
118
|
+
#
|
119
|
+
# ==== Raises
|
120
|
+
#
|
121
|
+
# ParseError:: if <tt>line</tt> doesn't match current <tt>format</tt>
|
122
|
+
#
|
123
|
+
def parse!(line)
|
124
|
+
parse(line) || raise(ParseError, "Invalid format `%s` for line `%s`" % [format, line])
|
125
|
+
end
|
126
|
+
|
127
|
+
|
128
|
+
protected
|
129
|
+
|
130
|
+
# Overwrite this method if you want to use some human-readable name for log fields.
|
131
|
+
# This method is called only once at <tt>parse_format</tt> time.
|
132
|
+
def rename_this_name(name)
|
133
|
+
name
|
134
|
+
end
|
135
|
+
|
136
|
+
# Parse log <tt>format</tt> into a suitable Regexp instance.
|
137
|
+
def parse_format(format)
|
138
|
+
format = format.to_s
|
139
|
+
format.chomp! # remove carriage return
|
140
|
+
format.strip! # remove leading and trailing space
|
141
|
+
format.gsub!(/[ \t]+/, ' ') # replace tabulations or spaces with a space
|
142
|
+
|
143
|
+
strip_quotes = proc { |string| string.gsub(/^\\"/, '').gsub(/\\"$/, '') }
|
144
|
+
find_quotes = proc { |string| string =~ /^\\"/ }
|
145
|
+
find_percent = proc { |string| string =~ /^%.*t$/ }
|
146
|
+
find_referrer_or_useragent = proc { |string| string =~ /Referer|User-Agent/ }
|
147
|
+
|
148
|
+
pattern = format.split(' ').map do |element|
|
149
|
+
has_quotes = !!find_quotes.call(element)
|
150
|
+
element = strip_quotes.call(element) if has_quotes
|
151
|
+
|
152
|
+
self.names << rename_this_name(element)
|
153
|
+
|
154
|
+
case
|
155
|
+
when has_quotes
|
156
|
+
if element == '%r' or find_referrer_or_useragent.call(element)
|
157
|
+
/"([^"\\]*(?:\\.[^"\\]*)*)"/
|
158
|
+
else
|
159
|
+
'\"([^\"]*)\"'
|
160
|
+
end
|
161
|
+
when find_percent.call(element)
|
162
|
+
'(\[[^\]]+\])'
|
163
|
+
when element == '%U'
|
164
|
+
'(.+?)'
|
165
|
+
else
|
166
|
+
'(\S*)'
|
167
|
+
end
|
168
|
+
end.join(' ')
|
169
|
+
|
170
|
+
@regexp = Regexp.new("^#{pattern}$")
|
171
|
+
format
|
172
|
+
end
|
173
|
+
|
174
|
+
end
|
@@ -0,0 +1,30 @@
|
|
1
|
+
#
|
2
|
+
# = Apache Log Regex
|
3
|
+
#
|
4
|
+
# Ruby parser for Apache log files based on regular expressions.
|
5
|
+
#
|
6
|
+
# Category::
|
7
|
+
# Package:: ApacheLogRegex
|
8
|
+
# Author:: Simone Carletti <weppos@weppos.net>
|
9
|
+
# License:: MIT License
|
10
|
+
#
|
11
|
+
#--
|
12
|
+
# SVN: $Id$
|
13
|
+
#++
|
14
|
+
|
15
|
+
|
16
|
+
class ApacheLogRegex
|
17
|
+
|
18
|
+
module Version #:nodoc:
|
19
|
+
MAJOR = 0
|
20
|
+
MINOR = 1
|
21
|
+
TINY = 0
|
22
|
+
|
23
|
+
STRING = [MAJOR, MINOR, TINY].join('.')
|
24
|
+
end
|
25
|
+
|
26
|
+
VERSION = Version::STRING
|
27
|
+
STATUS = 'alpha'
|
28
|
+
BUILD = ''.match(/(\d+)/).to_a.first
|
29
|
+
|
30
|
+
end
|
@@ -0,0 +1,16 @@
|
|
1
|
+
#
|
2
|
+
# = Apache Log Regex
|
3
|
+
#
|
4
|
+
# Ruby parser for Apache log files based on regular expressions.
|
5
|
+
#
|
6
|
+
# Category::
|
7
|
+
# Package:: ApacheLogRegex
|
8
|
+
# Author:: Simone Carletti <weppos@weppos.net>
|
9
|
+
# License:: MIT License
|
10
|
+
#
|
11
|
+
#--
|
12
|
+
# SVN: $Id$
|
13
|
+
#++
|
14
|
+
|
15
|
+
|
16
|
+
require 'apache_log_regex'
|