http-log-parser 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/README.rdoc +30 -0
- data/Rakefile +36 -0
- data/lib/http/parser.rb +96 -0
- data/lib/http_log_parser.rb +4 -0
- metadata +65 -0
data/README.rdoc
ADDED
@@ -0,0 +1,30 @@
|
|
1
|
+
= LogParser
|
2
|
+
|
3
|
+
== Introduction
|
4
|
+
|
5
|
+
This gem provides an easy to use parser to various HTTP log formats.
|
6
|
+
|
7
|
+
== Installation
|
8
|
+
|
9
|
+
Just run:
|
10
|
+
|
11
|
+
gem install http-log-parser
|
12
|
+
|
13
|
+
That should take care of it.
|
14
|
+
|
15
|
+
== Usage
|
16
|
+
|
17
|
+
require 'http_log_parser'
|
18
|
+
|
19
|
+
parser = HttpLogParser.new
|
20
|
+
|
21
|
+
File.open('/path/to.log', 'r') do |file|
|
22
|
+
while(line = file.gets)
|
23
|
+
parsed_data = parser.parse_line(line)
|
24
|
+
p parsed_data
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
== License
|
29
|
+
|
30
|
+
This code is made availablie under the MIT license. It is based on based on code from Jan Wikholm.
|
data/Rakefile
ADDED
@@ -0,0 +1,36 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'rake/gempackagetask'
|
3
|
+
require 'rake/rdoctask'
|
4
|
+
|
5
|
+
spec = Gem::Specification.new do |s|
|
6
|
+
s.name = "http-log-parser"
|
7
|
+
s.version = "0.0.1"
|
8
|
+
s.author = "Torsten Curdt"
|
9
|
+
s.email = "tcurdt at vafer.org"
|
10
|
+
s.homepage = "http://github.com/tcurdt/http-log-parser"
|
11
|
+
s.description = "HTTP log file parser"
|
12
|
+
s.summary = "A package for parsing web server logs."
|
13
|
+
|
14
|
+
s.platform = Gem::Platform::RUBY
|
15
|
+
s.has_rdoc = true
|
16
|
+
s.extra_rdoc_files = ["README.rdoc"]
|
17
|
+
|
18
|
+
s.require_path = "lib"
|
19
|
+
s.files = %w(README.rdoc Rakefile) + Dir.glob("lib/**/*")
|
20
|
+
end
|
21
|
+
|
22
|
+
Rake::GemPackageTask.new(spec) do |pkg|
|
23
|
+
pkg.need_tar = true
|
24
|
+
end
|
25
|
+
|
26
|
+
Rake::RDocTask.new(:rdoc) do |rdoc|
|
27
|
+
rdoc.rdoc_dir = 'rdoc'
|
28
|
+
rdoc.title = 'HttpLogParser'
|
29
|
+
rdoc.options << '--line-numbers' << '--inline-source'
|
30
|
+
rdoc.rdoc_files.include('README')
|
31
|
+
rdoc.rdoc_files.include('lib/**/*.rb')
|
32
|
+
end
|
33
|
+
|
34
|
+
task :default => "pkg/#{spec.name}-#{spec.version}.gem" do
|
35
|
+
puts "generated latest version"
|
36
|
+
end
|
data/lib/http/parser.rb
ADDED
@@ -0,0 +1,96 @@
|
|
1
|
+
class HttpLogFormat
|
2
|
+
attr_reader :name, :format, :format_symbols, :format_regex
|
3
|
+
|
4
|
+
DIRECTIVES = {
|
5
|
+
'h' => [:ip, /\d+\.\d+\.\d+\.\d+/],
|
6
|
+
'l' => [:auth, /.*?/],
|
7
|
+
'u' => [:username, /.*?/],
|
8
|
+
't' => [:datetime, /\[.*?\]/],
|
9
|
+
'r' => [:request, /.*?/],
|
10
|
+
's' => [:status, /\d+/],
|
11
|
+
'b' => [:bytecount, /-|\d+/],
|
12
|
+
'v' => [:domain, /.*?/],
|
13
|
+
'i' => [:header_lines, /.*?/],
|
14
|
+
'e' => [:errorlevel, /\[.*?\]/],
|
15
|
+
}
|
16
|
+
|
17
|
+
def initialize(name, format)
|
18
|
+
@name, @format = name, format
|
19
|
+
parse_format(format)
|
20
|
+
end
|
21
|
+
|
22
|
+
def parse_format(format)
|
23
|
+
format_directive = /%(.*?)(\{.*?\})?([#{[DIRECTIVES.keys.join('|')]}])([\s\\"]*)/
|
24
|
+
|
25
|
+
log_format_symbols = []
|
26
|
+
format_regex = ""
|
27
|
+
format.scan(format_directive) do |condition, subdirective, directive_char, ignored|
|
28
|
+
log_format, match_regex = process_directive(directive_char, subdirective, condition)
|
29
|
+
ignored.gsub!(/\s/, '\\s') unless ignored.nil?
|
30
|
+
log_format_symbols << log_format
|
31
|
+
format_regex << "(#{match_regex})#{ignored}"
|
32
|
+
end
|
33
|
+
@format_symbols = log_format_symbols
|
34
|
+
@format_regex = /^#{format_regex}/
|
35
|
+
end
|
36
|
+
|
37
|
+
def process_directive(directive_char, subdirective, condition)
|
38
|
+
directive = DIRECTIVES[directive_char]
|
39
|
+
case directive_char
|
40
|
+
when 'i'
|
41
|
+
log_format = subdirective[1...-1].downcase.tr('-', '_').to_sym
|
42
|
+
[log_format, directive[1].source]
|
43
|
+
else
|
44
|
+
[directive[0], directive[1].source]
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
class HttpLogParser
|
50
|
+
|
51
|
+
LOG_FORMATS = {
|
52
|
+
:common => '%h %l %u %t \"%r\" %>s %b',
|
53
|
+
:common_with_virtual => '%v %h %l %u %t \"%r\" %>s %b',
|
54
|
+
:combined => '%h %l %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-agent}i\"',
|
55
|
+
:combined_with_virtual => '%v %h %l %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-agent}i\"',
|
56
|
+
:combined_with_cookies => '%h %l %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-agent}i\" \"%{Cookies}i\"'
|
57
|
+
}
|
58
|
+
|
59
|
+
attr_reader :known_formats
|
60
|
+
|
61
|
+
def initialize
|
62
|
+
@log_format = []
|
63
|
+
initialize_known_formats
|
64
|
+
end
|
65
|
+
|
66
|
+
def initialize_known_formats
|
67
|
+
@known_formats = {}
|
68
|
+
LOG_FORMATS.each do |name, format|
|
69
|
+
@known_formats[name] = HttpLogFormat.new(name, format)
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
def check_format(line)
|
74
|
+
@known_formats.sort_by { |key, log_format| log_format.format_regex.source.size }.reverse.each { |key, log_format|
|
75
|
+
return key if line.match(log_format.format_regex)
|
76
|
+
}
|
77
|
+
return :unknown
|
78
|
+
end
|
79
|
+
|
80
|
+
def parse_line(line)
|
81
|
+
@format = check_format(line)
|
82
|
+
log_format = @known_formats[@format]
|
83
|
+
raise ArgumentError if log_format.nil? or line !~ log_format.format_regex
|
84
|
+
data = line.scan(log_format.format_regex).flatten
|
85
|
+
parsed_data = {}
|
86
|
+
log_format.format_symbols.size.times do |i|
|
87
|
+
parsed_data[log_format.format_symbols[i]] = data[i]
|
88
|
+
end
|
89
|
+
|
90
|
+
parsed_data[:datetime] = parsed_data[:datetime][1...-1] if parsed_data[:datetime]
|
91
|
+
parsed_data[:domain] = parsed_data[:ip] unless parsed_data[:domain]
|
92
|
+
parsed_data[:format] = @format
|
93
|
+
|
94
|
+
parsed_data
|
95
|
+
end
|
96
|
+
end
|
metadata
ADDED
@@ -0,0 +1,65 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: http-log-parser
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
prerelease: false
|
5
|
+
segments:
|
6
|
+
- 0
|
7
|
+
- 0
|
8
|
+
- 1
|
9
|
+
version: 0.0.1
|
10
|
+
platform: ruby
|
11
|
+
authors:
|
12
|
+
- Torsten Curdt
|
13
|
+
autorequire:
|
14
|
+
bindir: bin
|
15
|
+
cert_chain: []
|
16
|
+
|
17
|
+
date: 2010-04-22 00:00:00 +02:00
|
18
|
+
default_executable:
|
19
|
+
dependencies: []
|
20
|
+
|
21
|
+
description: HTTP log file parser
|
22
|
+
email: tcurdt at vafer.org
|
23
|
+
executables: []
|
24
|
+
|
25
|
+
extensions: []
|
26
|
+
|
27
|
+
extra_rdoc_files:
|
28
|
+
- README.rdoc
|
29
|
+
files:
|
30
|
+
- README.rdoc
|
31
|
+
- Rakefile
|
32
|
+
- lib/http/parser.rb
|
33
|
+
- lib/http_log_parser.rb
|
34
|
+
has_rdoc: true
|
35
|
+
homepage: http://github.com/tcurdt/http-log-parser
|
36
|
+
licenses: []
|
37
|
+
|
38
|
+
post_install_message:
|
39
|
+
rdoc_options: []
|
40
|
+
|
41
|
+
require_paths:
|
42
|
+
- lib
|
43
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - ">="
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
segments:
|
48
|
+
- 0
|
49
|
+
version: "0"
|
50
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ">="
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
segments:
|
55
|
+
- 0
|
56
|
+
version: "0"
|
57
|
+
requirements: []
|
58
|
+
|
59
|
+
rubyforge_project:
|
60
|
+
rubygems_version: 1.3.6
|
61
|
+
signing_key:
|
62
|
+
specification_version: 3
|
63
|
+
summary: A package for parsing web server logs.
|
64
|
+
test_files: []
|
65
|
+
|