apache_log_parser 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
data/.document ADDED
@@ -0,0 +1,5 @@
1
+ README.rdoc
2
+ lib/**/*.rb
3
+ bin/*
4
+ features/**/*.feature
5
+ LICENSE
data/.gitignore ADDED
@@ -0,0 +1,21 @@
1
+ ## MAC OS
2
+ .DS_Store
3
+
4
+ ## TEXTMATE
5
+ *.tmproj
6
+ tmtags
7
+
8
+ ## EMACS
9
+ *~
10
+ \#*
11
+ .\#*
12
+
13
+ ## VIM
14
+ *.swp
15
+
16
+ ## PROJECT::GENERAL
17
+ coverage
18
+ rdoc
19
+ pkg
20
+
21
+ ## PROJECT::SPECIFIC
data/LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2009 Joel Watson
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.rdoc ADDED
@@ -0,0 +1,49 @@
1
+ = Getting Started
2
+
3
+ This is a library designed to easily parse and access standard Apache log files.
4
+
5
+ To get started, require the library:
6
+
7
+ require 'apache_log_parser'
8
+
9
+ From there, you would use it as follows:
10
+
11
+ ApacheLogParser.parse(logfile, rules) do |parsed|
12
+ parsed[:ip] #=> "12.12.12.12"
13
+ parsed[:date] #=> "21/Jan/2010"
14
+ parsed[:day] #=> 21
15
+ parsed[:month] #=> "Jan"
16
+ parsed[:year] #=> 2010
17
+ parsed[:hour] #=> 14
18
+ parsed[:zone] #=> "-0800"
19
+ parsed[:method] #=> "GET"
20
+ parsed[:resource] #=> "/some/page.php"
21
+ parsed[:status] #=> 200
22
+ parsed[:size] #=> "7047"
23
+ parsed[:referer] #=> "-"
24
+ parsed[:user_agent] #=> "Mozilla/5.0 (Macintosh; U; Intel..."
25
+ end
26
+
27
+ The logfile parameter is simply the path to the logfile in question and the rules
28
+ parameter is a hash of rules to filter the logfile with.
29
+
30
+ = Using Rules
31
+
32
+ To use rules, simply build a hash with options you want to filter with as follows:
33
+
34
+ rules = {}
35
+ rules[:hour] = 11..13 # only accept hits between 11:00 and 13:59 hours
36
+ rules[:day] = 21 # only accept hits where the day is 21
37
+ rules[:date] = "12/Jan/2010" # only accept hits on Jan 12, 2010
38
+ rules[:method] = "GET" # only accept hits where the request method is GET
39
+ rules[:status] = 404 # only accept hits where the status response is 404
40
+
41
+ Rules are inclusive, so only hits where ALL rules are met will be kept. From there,
42
+ you would simply pass the rules hash into the parse method as shown above. The logfile
43
+ is read one line at a time, so the memory footprint is quite small and can easily handle
44
+ large logfiles (sizes as large as 6GB have been tested). The larger the file, the longer
45
+ the parse process will take though.
46
+
47
+ == Copyright
48
+
49
+ Copyright (c) 2010 Joel Watson. See LICENSE for details.
data/Rakefile ADDED
@@ -0,0 +1,45 @@
1
+ require 'rubygems'
2
+ require 'rake'
3
+
4
+ begin
5
+ require 'jeweler'
6
+ Jeweler::Tasks.new do |gem|
7
+ gem.name = "apache_log_parser"
8
+ gem.summary = %Q{Library to easily parse standard Apache log files.}
9
+ gem.description = %Q{Library to easily parse standard Apache log files.}
10
+ gem.email = "watsonian@gmail.com"
11
+ gem.homepage = "http://github.com/watsonian/apache_log_parser"
12
+ gem.authors = ["watsonian"]
13
+ gem.add_development_dependency "rspec", ">= 1.2.9"
14
+ # gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
15
+ end
16
+ Jeweler::GemcutterTasks.new
17
+ rescue LoadError
18
+ puts "Jeweler (or a dependency) not available. Install it with: gem install jeweler"
19
+ end
20
+
21
+ require 'spec/rake/spectask'
22
+ Spec::Rake::SpecTask.new(:spec) do |spec|
23
+ spec.libs << 'lib' << 'spec'
24
+ spec.spec_files = FileList['spec/**/*_spec.rb']
25
+ end
26
+
27
+ Spec::Rake::SpecTask.new(:rcov) do |spec|
28
+ spec.libs << 'lib' << 'spec'
29
+ spec.pattern = 'spec/**/*_spec.rb'
30
+ spec.rcov = true
31
+ end
32
+
33
+ task :spec => :check_dependencies
34
+
35
+ task :default => :spec
36
+
37
+ require 'rake/rdoctask'
38
+ Rake::RDocTask.new do |rdoc|
39
+ version = File.exist?('VERSION') ? File.read('VERSION') : ""
40
+
41
+ rdoc.rdoc_dir = 'rdoc'
42
+ rdoc.title = "apache_log_parser #{version}"
43
+ rdoc.rdoc_files.include('README*')
44
+ rdoc.rdoc_files.include('lib/**/*.rb')
45
+ end
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 1.0.0
@@ -0,0 +1,55 @@
1
+ # Generated by jeweler
2
+ # DO NOT EDIT THIS FILE DIRECTLY
3
+ # Instead, edit Jeweler::Tasks in Rakefile, and run the gemspec command
4
+ # -*- encoding: utf-8 -*-
5
+
6
+ Gem::Specification.new do |s|
7
+ s.name = %q{apache_log_parser}
8
+ s.version = "1.0.0"
9
+
10
+ s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
+ s.authors = ["watsonian"]
12
+ s.date = %q{2010-01-27}
13
+ s.description = %q{Library to easily parse standard Apache log files.}
14
+ s.email = %q{watsonian@gmail.com}
15
+ s.extra_rdoc_files = [
16
+ "LICENSE",
17
+ "README.rdoc"
18
+ ]
19
+ s.files = [
20
+ ".document",
21
+ ".gitignore",
22
+ "LICENSE",
23
+ "README.rdoc",
24
+ "Rakefile",
25
+ "VERSION",
26
+ "apache_log_parser.gemspec",
27
+ "lib/apache_log_parser.rb",
28
+ "spec/apache_log_parser_spec.rb",
29
+ "spec/spec.opts",
30
+ "spec/spec_helper.rb"
31
+ ]
32
+ s.homepage = %q{http://github.com/watsonian/apache_log_parser}
33
+ s.rdoc_options = ["--charset=UTF-8"]
34
+ s.require_paths = ["lib"]
35
+ s.rubygems_version = %q{1.3.5}
36
+ s.summary = %q{Library to easily parse standard Apache log files.}
37
+ s.test_files = [
38
+ "spec/apache_log_parser_spec.rb",
39
+ "spec/spec_helper.rb"
40
+ ]
41
+
42
+ if s.respond_to? :specification_version then
43
+ current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
44
+ s.specification_version = 3
45
+
46
+ if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
47
+ s.add_development_dependency(%q<rspec>, [">= 1.2.9"])
48
+ else
49
+ s.add_dependency(%q<rspec>, [">= 1.2.9"])
50
+ end
51
+ else
52
+ s.add_dependency(%q<rspec>, [">= 1.2.9"])
53
+ end
54
+ end
55
+
@@ -0,0 +1,54 @@
1
+ class ApacheLogParser
2
+ def self.parse(filename, rules={}, &block)
3
+ rules = process_rules(rules)
4
+ parse_file(filename, rules, &block)
5
+ end
6
+
7
+ private
8
+ def self.parse_line(line)
9
+ m = line.match(/^(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}).*?(([0-9]{1,2})\/(.*?)\/([0-9]{4})):(([0-9]{2}):[0-9]{2}:[0-9]{2})\s*(.*?)\]\s*"([\w]*)\s(.*?)\s.*?"\s([0-9]{3})\s(.*?)\s*"(.*?)"\s*"(.*?)"/)
10
+ if m
11
+ {:ip => m[1],
12
+ :date => m[2],
13
+ :day => m[3].to_i,
14
+ :month => m[4],
15
+ :year => m[5].to_i,
16
+ :time => m[6],
17
+ :hour => m[7].to_i,
18
+ :zone => m[8],
19
+ :method => m[9],
20
+ :resource => m[10],
21
+ :status => m[11].to_i,
22
+ :size => m[12],
23
+ :referer => m[13],
24
+ :user_agent => m[14]}
25
+ else
26
+ {}
27
+ end
28
+ end
29
+
30
+ def self.parse_file(filename, rules={}, &block)
31
+ File.foreach(filename) do |line|
32
+ parsed = parse_line(line)
33
+ if rules.any?
34
+ # stop parsing the file if we're past the designated hour range
35
+ break if rules[:hour] && Array(parsed[:hour]).last > Array(rules[:hour]).last
36
+
37
+ # go to the next line if there are any rules that are not matched by this line
38
+ next if rules.reject{|k,v| Array(v).include?(parsed[k]) }.any?
39
+ end
40
+ yield parsed
41
+ end
42
+ end
43
+
44
+ def self.process_rules(rules)
45
+ #default_options = {:date => Time.now.strftime("#{"%02d" % rules[:day] || "%d"}/%h/%Y")}
46
+ #rules = default_options.merge(rules)
47
+ if rules[:date]
48
+ rules[:day], rules[:month], rules[:year] = rules[:date].split("/")
49
+ rules[:day] = rules[:day].to_i
50
+ rules[:year] = rules[:year].to_i
51
+ end
52
+ rules
53
+ end
54
+ end
@@ -0,0 +1,7 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
2
+
3
+ describe "ApacheLogParser" do
4
+ # it "fails" do
5
+ # fail "hey buddy, you should probably rename this file and start specing for real"
6
+ # end
7
+ end
data/spec/spec.opts ADDED
@@ -0,0 +1 @@
1
+ --color
@@ -0,0 +1,9 @@
1
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
2
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
3
+ require 'apache_log_parser'
4
+ require 'spec'
5
+ require 'spec/autorun'
6
+
7
+ Spec::Runner.configure do |config|
8
+
9
+ end
metadata ADDED
@@ -0,0 +1,76 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: apache_log_parser
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.0.0
5
+ platform: ruby
6
+ authors:
7
+ - watsonian
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2010-01-27 00:00:00 -08:00
13
+ default_executable:
14
+ dependencies:
15
+ - !ruby/object:Gem::Dependency
16
+ name: rspec
17
+ type: :development
18
+ version_requirement:
19
+ version_requirements: !ruby/object:Gem::Requirement
20
+ requirements:
21
+ - - ">="
22
+ - !ruby/object:Gem::Version
23
+ version: 1.2.9
24
+ version:
25
+ description: Library to easily parse standard Apache log files.
26
+ email: watsonian@gmail.com
27
+ executables: []
28
+
29
+ extensions: []
30
+
31
+ extra_rdoc_files:
32
+ - LICENSE
33
+ - README.rdoc
34
+ files:
35
+ - .document
36
+ - .gitignore
37
+ - LICENSE
38
+ - README.rdoc
39
+ - Rakefile
40
+ - VERSION
41
+ - apache_log_parser.gemspec
42
+ - lib/apache_log_parser.rb
43
+ - spec/apache_log_parser_spec.rb
44
+ - spec/spec.opts
45
+ - spec/spec_helper.rb
46
+ has_rdoc: true
47
+ homepage: http://github.com/watsonian/apache_log_parser
48
+ licenses: []
49
+
50
+ post_install_message:
51
+ rdoc_options:
52
+ - --charset=UTF-8
53
+ require_paths:
54
+ - lib
55
+ required_ruby_version: !ruby/object:Gem::Requirement
56
+ requirements:
57
+ - - ">="
58
+ - !ruby/object:Gem::Version
59
+ version: "0"
60
+ version:
61
+ required_rubygems_version: !ruby/object:Gem::Requirement
62
+ requirements:
63
+ - - ">="
64
+ - !ruby/object:Gem::Version
65
+ version: "0"
66
+ version:
67
+ requirements: []
68
+
69
+ rubyforge_project:
70
+ rubygems_version: 1.3.5
71
+ signing_key:
72
+ specification_version: 3
73
+ summary: Library to easily parse standard Apache log files.
74
+ test_files:
75
+ - spec/apache_log_parser_spec.rb
76
+ - spec/spec_helper.rb