apache_log_parser 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- data/.document +5 -0
- data/.gitignore +21 -0
- data/LICENSE +20 -0
- data/README.rdoc +49 -0
- data/Rakefile +45 -0
- data/VERSION +1 -0
- data/apache_log_parser.gemspec +55 -0
- data/lib/apache_log_parser.rb +54 -0
- data/spec/apache_log_parser_spec.rb +7 -0
- data/spec/spec.opts +1 -0
- data/spec/spec_helper.rb +9 -0
- metadata +76 -0
data/.document
ADDED
data/.gitignore
ADDED
data/LICENSE
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
Copyright (c) 2009 Joel Watson
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
4
|
+
a copy of this software and associated documentation files (the
|
5
|
+
"Software"), to deal in the Software without restriction, including
|
6
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
7
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
+
permit persons to whom the Software is furnished to do so, subject to
|
9
|
+
the following conditions:
|
10
|
+
|
11
|
+
The above copyright notice and this permission notice shall be
|
12
|
+
included in all copies or substantial portions of the Software.
|
13
|
+
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
17
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
18
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
19
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
20
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.rdoc
ADDED
@@ -0,0 +1,49 @@
|
|
1
|
+
= Getting Started
|
2
|
+
|
3
|
+
This is a library designed to easily parse and access standard Apache log files.
|
4
|
+
|
5
|
+
To get started, require the library:
|
6
|
+
|
7
|
+
require 'apache_log_parser'
|
8
|
+
|
9
|
+
From there, you would use it as follows:
|
10
|
+
|
11
|
+
ApacheLogParser.parse(logfile, rules) do |parsed|
|
12
|
+
parsed[:ip] #=> "12.12.12.12"
|
13
|
+
parsed[:date] #=> "21/Jan/2010"
|
14
|
+
parsed[:day] #=> 21
|
15
|
+
parsed[:month] #=> "Jan"
|
16
|
+
parsed[:year] #=> 2010
|
17
|
+
parsed[:hour] #=> 14
|
18
|
+
parsed[:zone] #=> "-0800"
|
19
|
+
parsed[:method] #=> "GET"
|
20
|
+
parsed[:resource] #=> "/some/page.php"
|
21
|
+
parsed[:status] #=> 200
|
22
|
+
parsed[:size] #=> "7047"
|
23
|
+
parsed[:referer] #=> "-"
|
24
|
+
parsed[:user_agent] #=> "Mozilla/5.0 (Macintosh; U; Intel..."
|
25
|
+
end
|
26
|
+
|
27
|
+
The logfile parameter is simply the path to the logfile in question and the rules
|
28
|
+
parameter is a hash of rules to filter the logfile with.
|
29
|
+
|
30
|
+
= Using Rules
|
31
|
+
|
32
|
+
To use rules, simply build a hash with options you want to filter with as follows:
|
33
|
+
|
34
|
+
rules = {}
|
35
|
+
rules[:hour] = 11..13 # only accept hits between 11:00 and 13:59 hours
|
36
|
+
rules[:day] = 21 # only accept hits where the day is 21
|
37
|
+
rules[:date] = "12/Jan/2010" # only accept hits on Jan 12, 2010
|
38
|
+
rules[:method] = "GET" # only accept hits where the request method is GET
|
39
|
+
rules[:status] = 404 # only accept hits where the status response is 404
|
40
|
+
|
41
|
+
Rules are inclusive, so only hits where ALL rules are met will be kept. From there,
|
42
|
+
you would simply pass the rules hash into the parse method as shown above. The logfile
|
43
|
+
is read one line at a time, so the memory footprint is quite small and can easily handle
|
44
|
+
large logfiles (sizes as large as 6GB have been tested). The larger the file, the longer
|
45
|
+
the parse process will take though.
|
46
|
+
|
47
|
+
== Copyright
|
48
|
+
|
49
|
+
Copyright (c) 2010 Joel Watson. See LICENSE for details.
|
data/Rakefile
ADDED
@@ -0,0 +1,45 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'rake'
|
3
|
+
|
4
|
+
begin
|
5
|
+
require 'jeweler'
|
6
|
+
Jeweler::Tasks.new do |gem|
|
7
|
+
gem.name = "apache_log_parser"
|
8
|
+
gem.summary = %Q{Library to easily parse standard Apache log files.}
|
9
|
+
gem.description = %Q{Library to easily parse standard Apache log files.}
|
10
|
+
gem.email = "watsonian@gmail.com"
|
11
|
+
gem.homepage = "http://github.com/watsonian/apache_log_parser"
|
12
|
+
gem.authors = ["watsonian"]
|
13
|
+
gem.add_development_dependency "rspec", ">= 1.2.9"
|
14
|
+
# gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
|
15
|
+
end
|
16
|
+
Jeweler::GemcutterTasks.new
|
17
|
+
rescue LoadError
|
18
|
+
puts "Jeweler (or a dependency) not available. Install it with: gem install jeweler"
|
19
|
+
end
|
20
|
+
|
21
|
+
require 'spec/rake/spectask'
|
22
|
+
Spec::Rake::SpecTask.new(:spec) do |spec|
|
23
|
+
spec.libs << 'lib' << 'spec'
|
24
|
+
spec.spec_files = FileList['spec/**/*_spec.rb']
|
25
|
+
end
|
26
|
+
|
27
|
+
Spec::Rake::SpecTask.new(:rcov) do |spec|
|
28
|
+
spec.libs << 'lib' << 'spec'
|
29
|
+
spec.pattern = 'spec/**/*_spec.rb'
|
30
|
+
spec.rcov = true
|
31
|
+
end
|
32
|
+
|
33
|
+
task :spec => :check_dependencies
|
34
|
+
|
35
|
+
task :default => :spec
|
36
|
+
|
37
|
+
require 'rake/rdoctask'
|
38
|
+
Rake::RDocTask.new do |rdoc|
|
39
|
+
version = File.exist?('VERSION') ? File.read('VERSION') : ""
|
40
|
+
|
41
|
+
rdoc.rdoc_dir = 'rdoc'
|
42
|
+
rdoc.title = "apache_log_parser #{version}"
|
43
|
+
rdoc.rdoc_files.include('README*')
|
44
|
+
rdoc.rdoc_files.include('lib/**/*.rb')
|
45
|
+
end
|
data/VERSION
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
1.0.0
|
@@ -0,0 +1,55 @@
|
|
1
|
+
# Generated by jeweler
|
2
|
+
# DO NOT EDIT THIS FILE DIRECTLY
|
3
|
+
# Instead, edit Jeweler::Tasks in Rakefile, and run the gemspec command
|
4
|
+
# -*- encoding: utf-8 -*-
|
5
|
+
|
6
|
+
Gem::Specification.new do |s|
|
7
|
+
s.name = %q{apache_log_parser}
|
8
|
+
s.version = "1.0.0"
|
9
|
+
|
10
|
+
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
|
+
s.authors = ["watsonian"]
|
12
|
+
s.date = %q{2010-01-27}
|
13
|
+
s.description = %q{Library to easily parse standard Apache log files.}
|
14
|
+
s.email = %q{watsonian@gmail.com}
|
15
|
+
s.extra_rdoc_files = [
|
16
|
+
"LICENSE",
|
17
|
+
"README.rdoc"
|
18
|
+
]
|
19
|
+
s.files = [
|
20
|
+
".document",
|
21
|
+
".gitignore",
|
22
|
+
"LICENSE",
|
23
|
+
"README.rdoc",
|
24
|
+
"Rakefile",
|
25
|
+
"VERSION",
|
26
|
+
"apache_log_parser.gemspec",
|
27
|
+
"lib/apache_log_parser.rb",
|
28
|
+
"spec/apache_log_parser_spec.rb",
|
29
|
+
"spec/spec.opts",
|
30
|
+
"spec/spec_helper.rb"
|
31
|
+
]
|
32
|
+
s.homepage = %q{http://github.com/watsonian/apache_log_parser}
|
33
|
+
s.rdoc_options = ["--charset=UTF-8"]
|
34
|
+
s.require_paths = ["lib"]
|
35
|
+
s.rubygems_version = %q{1.3.5}
|
36
|
+
s.summary = %q{Library to easily parse standard Apache log files.}
|
37
|
+
s.test_files = [
|
38
|
+
"spec/apache_log_parser_spec.rb",
|
39
|
+
"spec/spec_helper.rb"
|
40
|
+
]
|
41
|
+
|
42
|
+
if s.respond_to? :specification_version then
|
43
|
+
current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
|
44
|
+
s.specification_version = 3
|
45
|
+
|
46
|
+
if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
|
47
|
+
s.add_development_dependency(%q<rspec>, [">= 1.2.9"])
|
48
|
+
else
|
49
|
+
s.add_dependency(%q<rspec>, [">= 1.2.9"])
|
50
|
+
end
|
51
|
+
else
|
52
|
+
s.add_dependency(%q<rspec>, [">= 1.2.9"])
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
@@ -0,0 +1,54 @@
|
|
1
|
+
class ApacheLogParser
|
2
|
+
def self.parse(filename, rules={}, &block)
|
3
|
+
rules = process_rules(rules)
|
4
|
+
parse_file(filename, rules, &block)
|
5
|
+
end
|
6
|
+
|
7
|
+
private
|
8
|
+
def self.parse_line(line)
|
9
|
+
m = line.match(/^(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}).*?(([0-9]{1,2})\/(.*?)\/([0-9]{4})):(([0-9]{2}):[0-9]{2}:[0-9]{2})\s*(.*?)\]\s*"([\w]*)\s(.*?)\s.*?"\s([0-9]{3})\s(.*?)\s*"(.*?)"\s*"(.*?)"/)
|
10
|
+
if m
|
11
|
+
{:ip => m[1],
|
12
|
+
:date => m[2],
|
13
|
+
:day => m[3].to_i,
|
14
|
+
:month => m[4],
|
15
|
+
:year => m[5].to_i,
|
16
|
+
:time => m[6],
|
17
|
+
:hour => m[7].to_i,
|
18
|
+
:zone => m[8],
|
19
|
+
:method => m[9],
|
20
|
+
:resource => m[10],
|
21
|
+
:status => m[11].to_i,
|
22
|
+
:size => m[12],
|
23
|
+
:referer => m[13],
|
24
|
+
:user_agent => m[14]}
|
25
|
+
else
|
26
|
+
{}
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
def self.parse_file(filename, rules={}, &block)
|
31
|
+
File.foreach(filename) do |line|
|
32
|
+
parsed = parse_line(line)
|
33
|
+
if rules.any?
|
34
|
+
# stop parsing the file if we're past the designated hour range
|
35
|
+
break if rules[:hour] && Array(parsed[:hour]).last > Array(rules[:hour]).last
|
36
|
+
|
37
|
+
# go to the next line if there are any rules that are not matched by this line
|
38
|
+
next if rules.reject{|k,v| Array(v).include?(parsed[k]) }.any?
|
39
|
+
end
|
40
|
+
yield parsed
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
def self.process_rules(rules)
|
45
|
+
#default_options = {:date => Time.now.strftime("#{"%02d" % rules[:day] || "%d"}/%h/%Y")}
|
46
|
+
#rules = default_options.merge(rules)
|
47
|
+
if rules[:date]
|
48
|
+
rules[:day], rules[:month], rules[:year] = rules[:date].split("/")
|
49
|
+
rules[:day] = rules[:day].to_i
|
50
|
+
rules[:year] = rules[:year].to_i
|
51
|
+
end
|
52
|
+
rules
|
53
|
+
end
|
54
|
+
end
|
data/spec/spec.opts
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
--color
|
data/spec/spec_helper.rb
ADDED
metadata
ADDED
@@ -0,0 +1,76 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: apache_log_parser
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 1.0.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- watsonian
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
|
12
|
+
date: 2010-01-27 00:00:00 -08:00
|
13
|
+
default_executable:
|
14
|
+
dependencies:
|
15
|
+
- !ruby/object:Gem::Dependency
|
16
|
+
name: rspec
|
17
|
+
type: :development
|
18
|
+
version_requirement:
|
19
|
+
version_requirements: !ruby/object:Gem::Requirement
|
20
|
+
requirements:
|
21
|
+
- - ">="
|
22
|
+
- !ruby/object:Gem::Version
|
23
|
+
version: 1.2.9
|
24
|
+
version:
|
25
|
+
description: Library to easily parse standard Apache log files.
|
26
|
+
email: watsonian@gmail.com
|
27
|
+
executables: []
|
28
|
+
|
29
|
+
extensions: []
|
30
|
+
|
31
|
+
extra_rdoc_files:
|
32
|
+
- LICENSE
|
33
|
+
- README.rdoc
|
34
|
+
files:
|
35
|
+
- .document
|
36
|
+
- .gitignore
|
37
|
+
- LICENSE
|
38
|
+
- README.rdoc
|
39
|
+
- Rakefile
|
40
|
+
- VERSION
|
41
|
+
- apache_log_parser.gemspec
|
42
|
+
- lib/apache_log_parser.rb
|
43
|
+
- spec/apache_log_parser_spec.rb
|
44
|
+
- spec/spec.opts
|
45
|
+
- spec/spec_helper.rb
|
46
|
+
has_rdoc: true
|
47
|
+
homepage: http://github.com/watsonian/apache_log_parser
|
48
|
+
licenses: []
|
49
|
+
|
50
|
+
post_install_message:
|
51
|
+
rdoc_options:
|
52
|
+
- --charset=UTF-8
|
53
|
+
require_paths:
|
54
|
+
- lib
|
55
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
56
|
+
requirements:
|
57
|
+
- - ">="
|
58
|
+
- !ruby/object:Gem::Version
|
59
|
+
version: "0"
|
60
|
+
version:
|
61
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
62
|
+
requirements:
|
63
|
+
- - ">="
|
64
|
+
- !ruby/object:Gem::Version
|
65
|
+
version: "0"
|
66
|
+
version:
|
67
|
+
requirements: []
|
68
|
+
|
69
|
+
rubyforge_project:
|
70
|
+
rubygems_version: 1.3.5
|
71
|
+
signing_key:
|
72
|
+
specification_version: 3
|
73
|
+
summary: Library to easily parse standard Apache log files.
|
74
|
+
test_files:
|
75
|
+
- spec/apache_log_parser_spec.rb
|
76
|
+
- spec/spec_helper.rb
|