apachelogregex 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG.rdoc +5 -0
- data/LICENSE.rdoc +25 -0
- data/Manifest +12 -0
- data/README.rdoc +97 -0
- data/Rakefile +56 -0
- data/apachelogregex.gemspec +38 -0
- data/lib/apache_log_regex.rb +174 -0
- data/lib/apache_log_regex/version.rb +30 -0
- data/lib/apachelogregex.rb +16 -0
- data/setup.rb +1585 -0
- data/test/apache_log_regex_test.rb +110 -0
- data/test/test_all.rb +18 -0
- data/test/test_helper.rb +24 -0
- metadata +93 -0
data/CHANGELOG.rdoc
ADDED
data/LICENSE.rdoc
ADDED
@@ -0,0 +1,25 @@
|
|
1
|
+
= License
|
2
|
+
|
3
|
+
(The MIT License)
|
4
|
+
|
5
|
+
Copyright (c) 2008-2009 Simone Carletti <weppos@weppos.net>
|
6
|
+
|
7
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
8
|
+
a copy of this software and associated documentation files (the
|
9
|
+
"Software"), to deal in the Software without restriction, including
|
10
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
11
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
12
|
+
permit persons to whom the Software is furnished to do so, subject to
|
13
|
+
the following conditions:
|
14
|
+
|
15
|
+
The above copyright notice and this permission notice shall be
|
16
|
+
included in all copies or substantial portions of the Software.
|
17
|
+
|
18
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
19
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
20
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
21
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
22
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
23
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
24
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
25
|
+
|
data/Manifest
ADDED
data/README.rdoc
ADDED
@@ -0,0 +1,97 @@
|
|
1
|
+
= Apache Log Regex
|
2
|
+
|
3
|
+
Apache Log Regex is a Ruby port of Peter Hickman's Apache::LogRegex 1.4 Perl module,
|
4
|
+
available at http://cpan.uwinnipeg.ca/~peterhi/Apache-LogRegex.
|
5
|
+
It provides functionalities to parse a line from an Apache log file into a hash.
|
6
|
+
|
7
|
+
|
8
|
+
== Dependencies
|
9
|
+
|
10
|
+
* Ruby >= 1.8.6 (not tested with previous versions)
|
11
|
+
|
12
|
+
ApacheLogRegex is compatible with Ruby 1.9.1.
|
13
|
+
|
14
|
+
|
15
|
+
== Overview
|
16
|
+
|
17
|
+
Apache Log Regex is designed to be a simple class to parse Apache log files.
|
18
|
+
|
19
|
+
It takes an Apache logging format and generates a regular expression which is used to parse a line from a log file and returns a Hash with keys corresponding to the fields defined in the log format.
|
20
|
+
The log format should match the one defined in your Apache configuration file with the LogFormat directive.
|
21
|
+
|
22
|
+
|
23
|
+
== Example Usage
|
24
|
+
|
25
|
+
The following one is the most simple example usage.
|
26
|
+
It tries to parse the `access.log` file and echoes each parsed line.
|
27
|
+
|
28
|
+
format = '%h %l %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-Agent}i\"'
|
29
|
+
parser = ApacheLogRegex.new(format)
|
30
|
+
|
31
|
+
File.foreach('/var/apache/access.log') do |line|
|
32
|
+
begin
|
33
|
+
parser.parse(line)
|
34
|
+
# {"%r"=>"GET /blog/index.xml HTTP/1.1", "%h"=>"87.18.183.252", ... }
|
35
|
+
rescue ApacheLogRegex::ParseError => e
|
36
|
+
puts "Error parsing log file: " + e.message
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
More often, you might want to collect parsed lines and use them later in your program.
|
41
|
+
The following example iterates all log lines, parses them and returns an array of Hash with the results.
|
42
|
+
|
43
|
+
format = '%h %l %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-Agent}i\"'
|
44
|
+
parser = ApacheLogRegex.new(format)
|
45
|
+
|
46
|
+
File.readlines('/var/apache/access.log').collect do |line|
|
47
|
+
begin
|
48
|
+
parser.parse(line)
|
49
|
+
# {"%r"=>"GET /blog/index.xml HTTP/1.1", "%h"=>"87.18.183.252", ... }
|
50
|
+
rescue ApacheLogRegex::ParseError => e
|
51
|
+
nil
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
|
56
|
+
== Acknowledgments
|
57
|
+
|
58
|
+
This library is essentially a Ruby port of Apache::LogRegex Perl library.
|
59
|
+
A big thanks goes to Peter Hickman, the author of the original package. This Ruby GEM would probably not exists without your contribution!
|
60
|
+
|
61
|
+
I also want to thank you Harry Fuecks and Hamish Morgan,
|
62
|
+
the authors of the Python[http://code.google.com/p/apachelog/] and PHP[http://kitty0.org/] ports.
|
63
|
+
First of all they gave me the inspiration to translate the original Perl code to Ruby,
|
64
|
+
as soon as I discovered nobody did it before me.
|
65
|
+
Last but not least, their library have been extremely useful to help me understand some obscure Perl statements
|
66
|
+
from the original package.
|
67
|
+
|
68
|
+
|
69
|
+
== Author
|
70
|
+
|
71
|
+
{Simone Carletti}[http://www.simonecarletti.com/] <weppos@weppos.net>
|
72
|
+
|
73
|
+
|
74
|
+
== Resources
|
75
|
+
|
76
|
+
* {Homepage}[http://code.simonecarletti.com/apachelogregex]
|
77
|
+
* {API}[http://apachelogregex.rubyforge.org/]
|
78
|
+
* {GitHub}[http://github.com/weppos/apachelogregex/]
|
79
|
+
* {RubyForge}[http://rubyforge.org/projects/apachelogregex/]
|
80
|
+
|
81
|
+
|
82
|
+
== FeedBack and Bug reports
|
83
|
+
|
84
|
+
Feel free to email {Simone Carletti}[mailto:weppos@weppos.net] with any questions or feedback.
|
85
|
+
|
86
|
+
Please use the {Ticket System}[http://code.simonecarletti.com/projects/show/apachelogregex] to submit bug reports or feature request.
|
87
|
+
|
88
|
+
|
89
|
+
== Changelog
|
90
|
+
|
91
|
+
See the CHANGELOG.rdoc file for details.
|
92
|
+
|
93
|
+
|
94
|
+
== License
|
95
|
+
|
96
|
+
Copyright (c) 2008-2009 Simone Carletti, ApacheLogRegex is released under the MIT license.
|
97
|
+
|
data/Rakefile
ADDED
@@ -0,0 +1,56 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'rake'
|
3
|
+
|
4
|
+
gem 'echoe', '>= 3.1'
|
5
|
+
require 'echoe'
|
6
|
+
|
7
|
+
$LOAD_PATH.unshift(File.dirname(__FILE__) + "/lib")
|
8
|
+
require 'apache_log_regex'
|
9
|
+
|
10
|
+
|
11
|
+
# Common package properties
|
12
|
+
PKG_NAME = ENV['PKG_NAME'] || ApacheLogRegex::GEM
|
13
|
+
PKG_VERSION = ENV['PKG_VERSION'] || ApacheLogRegex::VERSION
|
14
|
+
PKG_SUMMARY = "Ruby parser for Apache log files based on regular expressions."
|
15
|
+
PKG_FILES = FileList.new("{lib,test}/**/*.rb") do |files|
|
16
|
+
files.include %w(README.rdoc CHANGELOG.rdoc LICENSE.rdoc)
|
17
|
+
files.include %w(Rakefile setup.rb)
|
18
|
+
end
|
19
|
+
RUBYFORGE_PROJECT = 'apachelogregex'
|
20
|
+
|
21
|
+
if ENV['SNAPSHOT'].to_i == 1
|
22
|
+
PKG_VERSION << "." << Time.now.utc.strftime("%Y%m%d%H%M%S")
|
23
|
+
end
|
24
|
+
|
25
|
+
|
26
|
+
Echoe.new(PKG_NAME, PKG_VERSION) do |p|
|
27
|
+
p.author = "Simone Carletti"
|
28
|
+
p.email = "weppos@weppos.net"
|
29
|
+
p.summary = PKG_SUMMARY
|
30
|
+
p.description = <<-EOF
|
31
|
+
Apache Log Regex is a Ruby port \
|
32
|
+
of Peter Hickman's Apache::LogRegex 1.4 Perl module. \
|
33
|
+
It provides functionalities to parse a line from an Apache log file into a hash.
|
34
|
+
EOF
|
35
|
+
p.url = "http://code.simonecarletti.com/apachelogregex"
|
36
|
+
p.project = RUBYFORGE_PROJECT
|
37
|
+
|
38
|
+
p.need_zip = true
|
39
|
+
p.rcov_options = ["--main << README.rdoc -x Rakefile -x rcov"]
|
40
|
+
p.rdoc_pattern = /^(lib|CHANGELOG.rdoc|README.rdoc)/
|
41
|
+
|
42
|
+
p.development_dependencies += ["rake >=0.8",
|
43
|
+
"echoe >=3.1"]
|
44
|
+
end
|
45
|
+
|
46
|
+
|
47
|
+
begin
|
48
|
+
require 'code_statistics'
|
49
|
+
desc "Show library's code statistics"
|
50
|
+
task :stats do
|
51
|
+
CodeStatistics.new(["ApacheLogRegex", "lib"],
|
52
|
+
["Tests", "test"]).to_s
|
53
|
+
end
|
54
|
+
rescue LoadError
|
55
|
+
puts "CodeStatistics (Rails) is not available"
|
56
|
+
end
|
@@ -0,0 +1,38 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
|
3
|
+
Gem::Specification.new do |s|
|
4
|
+
s.name = %q{apachelogregex}
|
5
|
+
s.version = "0.1.0"
|
6
|
+
|
7
|
+
s.required_rubygems_version = Gem::Requirement.new(">= 1.2") if s.respond_to? :required_rubygems_version=
|
8
|
+
s.authors = ["Simone Carletti"]
|
9
|
+
s.date = %q{2009-02-13}
|
10
|
+
s.description = %q{Apache Log Regex is a Ruby port of Peter Hickman's Apache::LogRegex 1.4 Perl module. It provides functionalities to parse a line from an Apache log file into a hash.}
|
11
|
+
s.email = %q{weppos@weppos.net}
|
12
|
+
s.extra_rdoc_files = ["CHANGELOG.rdoc", "lib/apache_log_regex/version.rb", "lib/apache_log_regex.rb", "lib/apachelogregex.rb", "README.rdoc"]
|
13
|
+
s.files = ["CHANGELOG.rdoc", "lib/apache_log_regex/version.rb", "lib/apache_log_regex.rb", "lib/apachelogregex.rb", "LICENSE.rdoc", "Manifest", "Rakefile", "README.rdoc", "setup.rb", "test/apache_log_regex_test.rb", "test/test_all.rb", "test/test_helper.rb", "apachelogregex.gemspec"]
|
14
|
+
s.has_rdoc = true
|
15
|
+
s.homepage = %q{http://code.simonecarletti.com/apachelogregex}
|
16
|
+
s.rdoc_options = ["--line-numbers", "--inline-source", "--title", "Apachelogregex", "--main", "README.rdoc"]
|
17
|
+
s.require_paths = ["lib"]
|
18
|
+
s.rubyforge_project = %q{apachelogregex}
|
19
|
+
s.rubygems_version = %q{1.3.1}
|
20
|
+
s.summary = %q{Ruby parser for Apache log files based on regular expressions.}
|
21
|
+
s.test_files = ["test/test_all.rb"]
|
22
|
+
|
23
|
+
if s.respond_to? :specification_version then
|
24
|
+
current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
|
25
|
+
s.specification_version = 2
|
26
|
+
|
27
|
+
if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
|
28
|
+
s.add_development_dependency(%q<rake>, [">= 0.8"])
|
29
|
+
s.add_development_dependency(%q<echoe>, [">= 3.1"])
|
30
|
+
else
|
31
|
+
s.add_dependency(%q<rake>, [">= 0.8"])
|
32
|
+
s.add_dependency(%q<echoe>, [">= 3.1"])
|
33
|
+
end
|
34
|
+
else
|
35
|
+
s.add_dependency(%q<rake>, [">= 0.8"])
|
36
|
+
s.add_dependency(%q<echoe>, [">= 3.1"])
|
37
|
+
end
|
38
|
+
end
|
@@ -0,0 +1,174 @@
|
|
1
|
+
#
|
2
|
+
# = Apache Log Regex
|
3
|
+
#
|
4
|
+
# Ruby parser for Apache log files based on regular expressions.
|
5
|
+
#
|
6
|
+
# Category::
|
7
|
+
# Package:: ApacheLogRegex
|
8
|
+
# Author:: Simone Carletti <weppos@weppos.net>
|
9
|
+
# License:: MIT License
|
10
|
+
#
|
11
|
+
#--
|
12
|
+
# SVN: $Id$
|
13
|
+
#++
|
14
|
+
|
15
|
+
|
16
|
+
require 'apache_log_regex/version'
|
17
|
+
|
18
|
+
|
19
|
+
#
|
20
|
+
# = Apache Log Regex
|
21
|
+
#
|
22
|
+
# Parse a line from an Apache log file into a hash.
|
23
|
+
#
|
24
|
+
# This is a Ruby port of Peter Hickman's Apache::LogRegex 1.4 Perl module,
|
25
|
+
# available at http://cpan.uwinnipeg.ca/~peterhi/Apache-LogRegex.
|
26
|
+
#
|
27
|
+
# == Example Usage
|
28
|
+
#
|
29
|
+
# The following one is the most simple example usage.
|
30
|
+
# It tries to parse the `access.log` file and echoes each parsed line.
|
31
|
+
#
|
32
|
+
# format = '%h %l %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-Agent}i\"'
|
33
|
+
# parser = ApacheLogRegex.new(format)
|
34
|
+
#
|
35
|
+
# File.foreach('/var/apache/access.log') do |line|
|
36
|
+
# begin
|
37
|
+
# parser.parse(line)
|
38
|
+
# # {"%r"=>"GET /blog/index.xml HTTP/1.1", "%h"=>"87.18.183.252", ... }
|
39
|
+
# rescue ApacheLogRegex::ParseError => e
|
40
|
+
# puts "Error parsing log file: " + e.message
|
41
|
+
# end
|
42
|
+
# end
|
43
|
+
#
|
44
|
+
# More often, you might want to collect parsed lines and use them later in your program.
|
45
|
+
# The following example iterates all log lines, parses them and returns an array of Hash with the results.
|
46
|
+
#
|
47
|
+
# format = '%h %l %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-Agent}i\"'
|
48
|
+
# parser = ApacheLogRegex.new(format)
|
49
|
+
#
|
50
|
+
# File.readlines('/var/apache/access.log').collect do |line|
|
51
|
+
# begin
|
52
|
+
# parser.parse(line)
|
53
|
+
# # {"%r"=>"GET /blog/index.xml HTTP/1.1", "%h"=>"87.18.183.252", ... }
|
54
|
+
# rescue ApacheLogRegex::ParseError => e
|
55
|
+
# nil
|
56
|
+
# end
|
57
|
+
# end
|
58
|
+
#
|
59
|
+
class ApacheLogRegex
|
60
|
+
|
61
|
+
NAME = 'ApacheLogRegex'
|
62
|
+
GEM = 'apachelogregex'
|
63
|
+
AUTHOR = 'Simone Carletti <weppos@weppos.net>'
|
64
|
+
|
65
|
+
|
66
|
+
#
|
67
|
+
# = ParseError
|
68
|
+
#
|
69
|
+
# Raised in case the parser can't parse a log line with current +format+.
|
70
|
+
#
|
71
|
+
class ParseError < RuntimeError; end
|
72
|
+
|
73
|
+
|
74
|
+
# The normalized log file format.
|
75
|
+
# Some common formats:
|
76
|
+
#
|
77
|
+
# Common Log Format (CLF)
|
78
|
+
# '%h %l %u %t \"%r\" %>s %b'
|
79
|
+
#
|
80
|
+
# Common Log Format with Virtual Host
|
81
|
+
# '%v %h %l %u %t \"%r\" %>s %b'
|
82
|
+
#
|
83
|
+
# NCSA extended/combined log format
|
84
|
+
# '%h %l %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-agent}i\"'
|
85
|
+
#
|
86
|
+
attr_reader :format
|
87
|
+
|
88
|
+
# Regexp instance used for parsing a log line.
|
89
|
+
attr_reader :regexp
|
90
|
+
|
91
|
+
# The list of field names that extracted from log format.
|
92
|
+
attr_reader :names
|
93
|
+
|
94
|
+
|
95
|
+
# Initializes a new parser instance with given log <tt>format</tt>.
|
96
|
+
def initialize(format)
|
97
|
+
@regexp = nil
|
98
|
+
@names = []
|
99
|
+
@format = parse_format(format)
|
100
|
+
end
|
101
|
+
|
102
|
+
# Parses <tt>line</tt> according to current log <tt>format</tt>
|
103
|
+
# and returns an hash of log field => value on success.
|
104
|
+
# Returns <tt>nil</tt> if <tt>line</tt> doesn't match current log <tt>format</tt>.
|
105
|
+
def parse(line)
|
106
|
+
row = line.to_s
|
107
|
+
row.chomp!
|
108
|
+
row.strip!
|
109
|
+
return unless match = regexp.match(row)
|
110
|
+
|
111
|
+
data = {}
|
112
|
+
names.each_with_index { |field, index| data[field] = match[index + 1] } # [0] == line
|
113
|
+
data
|
114
|
+
end
|
115
|
+
|
116
|
+
# Same as <tt>ApacheLogRegex#parse</tt> but raises a <tt>ParseError</tt>
|
117
|
+
# if <tt>line</tt> doesn't match current <tt>format</tt>.
|
118
|
+
#
|
119
|
+
# ==== Raises
|
120
|
+
#
|
121
|
+
# ParseError:: if <tt>line</tt> doesn't match current <tt>format</tt>
|
122
|
+
#
|
123
|
+
def parse!(line)
|
124
|
+
parse(line) || raise(ParseError, "Invalid format `%s` for line `%s`" % [format, line])
|
125
|
+
end
|
126
|
+
|
127
|
+
|
128
|
+
protected
|
129
|
+
|
130
|
+
# Overwrite this method if you want to use some human-readable name for log fields.
|
131
|
+
# This method is called only once at <tt>parse_format</tt> time.
|
132
|
+
def rename_this_name(name)
|
133
|
+
name
|
134
|
+
end
|
135
|
+
|
136
|
+
# Parse log <tt>format</tt> into a suitable Regexp instance.
|
137
|
+
def parse_format(format)
|
138
|
+
format = format.to_s
|
139
|
+
format.chomp! # remove carriage return
|
140
|
+
format.strip! # remove leading and trailing space
|
141
|
+
format.gsub!(/[ \t]+/, ' ') # replace tabulations or spaces with a space
|
142
|
+
|
143
|
+
strip_quotes = proc { |string| string.gsub(/^\\"/, '').gsub(/\\"$/, '') }
|
144
|
+
find_quotes = proc { |string| string =~ /^\\"/ }
|
145
|
+
find_percent = proc { |string| string =~ /^%.*t$/ }
|
146
|
+
find_referrer_or_useragent = proc { |string| string =~ /Referer|User-Agent/ }
|
147
|
+
|
148
|
+
pattern = format.split(' ').map do |element|
|
149
|
+
has_quotes = !!find_quotes.call(element)
|
150
|
+
element = strip_quotes.call(element) if has_quotes
|
151
|
+
|
152
|
+
self.names << rename_this_name(element)
|
153
|
+
|
154
|
+
case
|
155
|
+
when has_quotes
|
156
|
+
if element == '%r' or find_referrer_or_useragent.call(element)
|
157
|
+
/"([^"\\]*(?:\\.[^"\\]*)*)"/
|
158
|
+
else
|
159
|
+
'\"([^\"]*)\"'
|
160
|
+
end
|
161
|
+
when find_percent.call(element)
|
162
|
+
'(\[[^\]]+\])'
|
163
|
+
when element == '%U'
|
164
|
+
'(.+?)'
|
165
|
+
else
|
166
|
+
'(\S*)'
|
167
|
+
end
|
168
|
+
end.join(' ')
|
169
|
+
|
170
|
+
@regexp = Regexp.new("^#{pattern}$")
|
171
|
+
format
|
172
|
+
end
|
173
|
+
|
174
|
+
end
|
@@ -0,0 +1,30 @@
|
|
1
|
+
#
|
2
|
+
# = Apache Log Regex
|
3
|
+
#
|
4
|
+
# Ruby parser for Apache log files based on regular expressions.
|
5
|
+
#
|
6
|
+
# Category::
|
7
|
+
# Package:: ApacheLogRegex
|
8
|
+
# Author:: Simone Carletti <weppos@weppos.net>
|
9
|
+
# License:: MIT License
|
10
|
+
#
|
11
|
+
#--
|
12
|
+
# SVN: $Id$
|
13
|
+
#++
|
14
|
+
|
15
|
+
|
16
|
+
class ApacheLogRegex
|
17
|
+
|
18
|
+
module Version #:nodoc:
|
19
|
+
MAJOR = 0
|
20
|
+
MINOR = 1
|
21
|
+
TINY = 0
|
22
|
+
|
23
|
+
STRING = [MAJOR, MINOR, TINY].join('.')
|
24
|
+
end
|
25
|
+
|
26
|
+
VERSION = Version::STRING
|
27
|
+
STATUS = 'alpha'
|
28
|
+
BUILD = ''.match(/(\d+)/).to_a.first
|
29
|
+
|
30
|
+
end
|
@@ -0,0 +1,16 @@
|
|
1
|
+
#
|
2
|
+
# = Apache Log Regex
|
3
|
+
#
|
4
|
+
# Ruby parser for Apache log files based on regular expressions.
|
5
|
+
#
|
6
|
+
# Category::
|
7
|
+
# Package:: ApacheLogRegex
|
8
|
+
# Author:: Simone Carletti <weppos@weppos.net>
|
9
|
+
# License:: MIT License
|
10
|
+
#
|
11
|
+
#--
|
12
|
+
# SVN: $Id$
|
13
|
+
#++
|
14
|
+
|
15
|
+
|
16
|
+
require 'apache_log_regex'
|