http-log-parser 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,30 @@
1
+ = LogParser
2
+
3
+ == Introduction
4
+
5
+ This gem provides an easy to use parser to various HTTP log formats.
6
+
7
+ == Installation
8
+
9
+ Just run:
10
+
11
+ gem install http-log-parser
12
+
13
+ That should take care of it.
14
+
15
+ == Usage
16
+
17
+ require 'http_log_parser'
18
+
19
+ parser = HttpLogParser.new
20
+
21
+ File.open('/path/to.log', 'r') do |file|
22
+ while(line = file.gets)
23
+ parsed_data = parser.parse_line(line)
24
+ p parsed_data
25
+ end
26
+ end
27
+
28
+ == License
29
+
30
+ This code is made availablie under the MIT license. It is based on based on code from Jan Wikholm.
@@ -0,0 +1,36 @@
1
+ require 'rubygems'
2
+ require 'rake/gempackagetask'
3
+ require 'rake/rdoctask'
4
+
5
+ spec = Gem::Specification.new do |s|
6
+ s.name = "http-log-parser"
7
+ s.version = "0.0.1"
8
+ s.author = "Torsten Curdt"
9
+ s.email = "tcurdt at vafer.org"
10
+ s.homepage = "http://github.com/tcurdt/http-log-parser"
11
+ s.description = "HTTP log file parser"
12
+ s.summary = "A package for parsing web server logs."
13
+
14
+ s.platform = Gem::Platform::RUBY
15
+ s.has_rdoc = true
16
+ s.extra_rdoc_files = ["README.rdoc"]
17
+
18
+ s.require_path = "lib"
19
+ s.files = %w(README.rdoc Rakefile) + Dir.glob("lib/**/*")
20
+ end
21
+
22
+ Rake::GemPackageTask.new(spec) do |pkg|
23
+ pkg.need_tar = true
24
+ end
25
+
26
+ Rake::RDocTask.new(:rdoc) do |rdoc|
27
+ rdoc.rdoc_dir = 'rdoc'
28
+ rdoc.title = 'HttpLogParser'
29
+ rdoc.options << '--line-numbers' << '--inline-source'
30
+ rdoc.rdoc_files.include('README')
31
+ rdoc.rdoc_files.include('lib/**/*.rb')
32
+ end
33
+
34
+ task :default => "pkg/#{spec.name}-#{spec.version}.gem" do
35
+ puts "generated latest version"
36
+ end
@@ -0,0 +1,96 @@
1
+ class HttpLogFormat
2
+ attr_reader :name, :format, :format_symbols, :format_regex
3
+
4
+ DIRECTIVES = {
5
+ 'h' => [:ip, /\d+\.\d+\.\d+\.\d+/],
6
+ 'l' => [:auth, /.*?/],
7
+ 'u' => [:username, /.*?/],
8
+ 't' => [:datetime, /\[.*?\]/],
9
+ 'r' => [:request, /.*?/],
10
+ 's' => [:status, /\d+/],
11
+ 'b' => [:bytecount, /-|\d+/],
12
+ 'v' => [:domain, /.*?/],
13
+ 'i' => [:header_lines, /.*?/],
14
+ 'e' => [:errorlevel, /\[.*?\]/],
15
+ }
16
+
17
+ def initialize(name, format)
18
+ @name, @format = name, format
19
+ parse_format(format)
20
+ end
21
+
22
+ def parse_format(format)
23
+ format_directive = /%(.*?)(\{.*?\})?([#{[DIRECTIVES.keys.join('|')]}])([\s\\"]*)/
24
+
25
+ log_format_symbols = []
26
+ format_regex = ""
27
+ format.scan(format_directive) do |condition, subdirective, directive_char, ignored|
28
+ log_format, match_regex = process_directive(directive_char, subdirective, condition)
29
+ ignored.gsub!(/\s/, '\\s') unless ignored.nil?
30
+ log_format_symbols << log_format
31
+ format_regex << "(#{match_regex})#{ignored}"
32
+ end
33
+ @format_symbols = log_format_symbols
34
+ @format_regex = /^#{format_regex}/
35
+ end
36
+
37
+ def process_directive(directive_char, subdirective, condition)
38
+ directive = DIRECTIVES[directive_char]
39
+ case directive_char
40
+ when 'i'
41
+ log_format = subdirective[1...-1].downcase.tr('-', '_').to_sym
42
+ [log_format, directive[1].source]
43
+ else
44
+ [directive[0], directive[1].source]
45
+ end
46
+ end
47
+ end
48
+
49
+ class HttpLogParser
50
+
51
+ LOG_FORMATS = {
52
+ :common => '%h %l %u %t \"%r\" %>s %b',
53
+ :common_with_virtual => '%v %h %l %u %t \"%r\" %>s %b',
54
+ :combined => '%h %l %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-agent}i\"',
55
+ :combined_with_virtual => '%v %h %l %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-agent}i\"',
56
+ :combined_with_cookies => '%h %l %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-agent}i\" \"%{Cookies}i\"'
57
+ }
58
+
59
+ attr_reader :known_formats
60
+
61
+ def initialize
62
+ @log_format = []
63
+ initialize_known_formats
64
+ end
65
+
66
+ def initialize_known_formats
67
+ @known_formats = {}
68
+ LOG_FORMATS.each do |name, format|
69
+ @known_formats[name] = HttpLogFormat.new(name, format)
70
+ end
71
+ end
72
+
73
+ def check_format(line)
74
+ @known_formats.sort_by { |key, log_format| log_format.format_regex.source.size }.reverse.each { |key, log_format|
75
+ return key if line.match(log_format.format_regex)
76
+ }
77
+ return :unknown
78
+ end
79
+
80
+ def parse_line(line)
81
+ @format = check_format(line)
82
+ log_format = @known_formats[@format]
83
+ raise ArgumentError if log_format.nil? or line !~ log_format.format_regex
84
+ data = line.scan(log_format.format_regex).flatten
85
+ parsed_data = {}
86
+ log_format.format_symbols.size.times do |i|
87
+ parsed_data[log_format.format_symbols[i]] = data[i]
88
+ end
89
+
90
+ parsed_data[:datetime] = parsed_data[:datetime][1...-1] if parsed_data[:datetime]
91
+ parsed_data[:domain] = parsed_data[:ip] unless parsed_data[:domain]
92
+ parsed_data[:format] = @format
93
+
94
+ parsed_data
95
+ end
96
+ end
@@ -0,0 +1,4 @@
1
+ # encoding: utf-8
2
+ # module HttpLogParser # :doc:
3
+ require 'http/parser'
4
+ # end
metadata ADDED
@@ -0,0 +1,65 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: http-log-parser
3
+ version: !ruby/object:Gem::Version
4
+ prerelease: false
5
+ segments:
6
+ - 0
7
+ - 0
8
+ - 1
9
+ version: 0.0.1
10
+ platform: ruby
11
+ authors:
12
+ - Torsten Curdt
13
+ autorequire:
14
+ bindir: bin
15
+ cert_chain: []
16
+
17
+ date: 2010-04-22 00:00:00 +02:00
18
+ default_executable:
19
+ dependencies: []
20
+
21
+ description: HTTP log file parser
22
+ email: tcurdt at vafer.org
23
+ executables: []
24
+
25
+ extensions: []
26
+
27
+ extra_rdoc_files:
28
+ - README.rdoc
29
+ files:
30
+ - README.rdoc
31
+ - Rakefile
32
+ - lib/http/parser.rb
33
+ - lib/http_log_parser.rb
34
+ has_rdoc: true
35
+ homepage: http://github.com/tcurdt/http-log-parser
36
+ licenses: []
37
+
38
+ post_install_message:
39
+ rdoc_options: []
40
+
41
+ require_paths:
42
+ - lib
43
+ required_ruby_version: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ segments:
48
+ - 0
49
+ version: "0"
50
+ required_rubygems_version: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ segments:
55
+ - 0
56
+ version: "0"
57
+ requirements: []
58
+
59
+ rubyforge_project:
60
+ rubygems_version: 1.3.6
61
+ signing_key:
62
+ specification_version: 3
63
+ summary: A package for parsing web server logs.
64
+ test_files: []
65
+