http-log-parser 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,30 @@
1
+ = LogParser
2
+
3
+ == Introduction
4
+
5
+ This gem provides an easy to use parser to various HTTP log formats.
6
+
7
+ == Installation
8
+
9
+ Just run:
10
+
11
+ gem install http-log-parser
12
+
13
+ That should take care of it.
14
+
15
+ == Usage
16
+
17
+ require 'http_log_parser'
18
+
19
+ parser = HttpLogParser.new
20
+
21
+ File.open('/path/to.log', 'r') do |file|
22
+ while(line = file.gets)
23
+ parsed_data = parser.parse_line(line)
24
+ p parsed_data
25
+ end
26
+ end
27
+
28
+ == License
29
+
30
+ This code is made availablie under the MIT license. It is based on based on code from Jan Wikholm.
@@ -0,0 +1,36 @@
1
+ require 'rubygems'
2
+ require 'rake/gempackagetask'
3
+ require 'rake/rdoctask'
4
+
5
+ spec = Gem::Specification.new do |s|
6
+ s.name = "http-log-parser"
7
+ s.version = "0.0.1"
8
+ s.author = "Torsten Curdt"
9
+ s.email = "tcurdt at vafer.org"
10
+ s.homepage = "http://github.com/tcurdt/http-log-parser"
11
+ s.description = "HTTP log file parser"
12
+ s.summary = "A package for parsing web server logs."
13
+
14
+ s.platform = Gem::Platform::RUBY
15
+ s.has_rdoc = true
16
+ s.extra_rdoc_files = ["README.rdoc"]
17
+
18
+ s.require_path = "lib"
19
+ s.files = %w(README.rdoc Rakefile) + Dir.glob("lib/**/*")
20
+ end
21
+
22
+ Rake::GemPackageTask.new(spec) do |pkg|
23
+ pkg.need_tar = true
24
+ end
25
+
26
+ Rake::RDocTask.new(:rdoc) do |rdoc|
27
+ rdoc.rdoc_dir = 'rdoc'
28
+ rdoc.title = 'HttpLogParser'
29
+ rdoc.options << '--line-numbers' << '--inline-source'
30
+ rdoc.rdoc_files.include('README')
31
+ rdoc.rdoc_files.include('lib/**/*.rb')
32
+ end
33
+
34
+ task :default => "pkg/#{spec.name}-#{spec.version}.gem" do
35
+ puts "generated latest version"
36
+ end
@@ -0,0 +1,96 @@
1
+ class HttpLogFormat
2
+ attr_reader :name, :format, :format_symbols, :format_regex
3
+
4
+ DIRECTIVES = {
5
+ 'h' => [:ip, /\d+\.\d+\.\d+\.\d+/],
6
+ 'l' => [:auth, /.*?/],
7
+ 'u' => [:username, /.*?/],
8
+ 't' => [:datetime, /\[.*?\]/],
9
+ 'r' => [:request, /.*?/],
10
+ 's' => [:status, /\d+/],
11
+ 'b' => [:bytecount, /-|\d+/],
12
+ 'v' => [:domain, /.*?/],
13
+ 'i' => [:header_lines, /.*?/],
14
+ 'e' => [:errorlevel, /\[.*?\]/],
15
+ }
16
+
17
+ def initialize(name, format)
18
+ @name, @format = name, format
19
+ parse_format(format)
20
+ end
21
+
22
+ def parse_format(format)
23
+ format_directive = /%(.*?)(\{.*?\})?([#{[DIRECTIVES.keys.join('|')]}])([\s\\"]*)/
24
+
25
+ log_format_symbols = []
26
+ format_regex = ""
27
+ format.scan(format_directive) do |condition, subdirective, directive_char, ignored|
28
+ log_format, match_regex = process_directive(directive_char, subdirective, condition)
29
+ ignored.gsub!(/\s/, '\\s') unless ignored.nil?
30
+ log_format_symbols << log_format
31
+ format_regex << "(#{match_regex})#{ignored}"
32
+ end
33
+ @format_symbols = log_format_symbols
34
+ @format_regex = /^#{format_regex}/
35
+ end
36
+
37
+ def process_directive(directive_char, subdirective, condition)
38
+ directive = DIRECTIVES[directive_char]
39
+ case directive_char
40
+ when 'i'
41
+ log_format = subdirective[1...-1].downcase.tr('-', '_').to_sym
42
+ [log_format, directive[1].source]
43
+ else
44
+ [directive[0], directive[1].source]
45
+ end
46
+ end
47
+ end
48
+
49
+ class HttpLogParser
50
+
51
+ LOG_FORMATS = {
52
+ :common => '%h %l %u %t \"%r\" %>s %b',
53
+ :common_with_virtual => '%v %h %l %u %t \"%r\" %>s %b',
54
+ :combined => '%h %l %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-agent}i\"',
55
+ :combined_with_virtual => '%v %h %l %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-agent}i\"',
56
+ :combined_with_cookies => '%h %l %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-agent}i\" \"%{Cookies}i\"'
57
+ }
58
+
59
+ attr_reader :known_formats
60
+
61
+ def initialize
62
+ @log_format = []
63
+ initialize_known_formats
64
+ end
65
+
66
+ def initialize_known_formats
67
+ @known_formats = {}
68
+ LOG_FORMATS.each do |name, format|
69
+ @known_formats[name] = HttpLogFormat.new(name, format)
70
+ end
71
+ end
72
+
73
+ def check_format(line)
74
+ @known_formats.sort_by { |key, log_format| log_format.format_regex.source.size }.reverse.each { |key, log_format|
75
+ return key if line.match(log_format.format_regex)
76
+ }
77
+ return :unknown
78
+ end
79
+
80
+ def parse_line(line)
81
+ @format = check_format(line)
82
+ log_format = @known_formats[@format]
83
+ raise ArgumentError if log_format.nil? or line !~ log_format.format_regex
84
+ data = line.scan(log_format.format_regex).flatten
85
+ parsed_data = {}
86
+ log_format.format_symbols.size.times do |i|
87
+ parsed_data[log_format.format_symbols[i]] = data[i]
88
+ end
89
+
90
+ parsed_data[:datetime] = parsed_data[:datetime][1...-1] if parsed_data[:datetime]
91
+ parsed_data[:domain] = parsed_data[:ip] unless parsed_data[:domain]
92
+ parsed_data[:format] = @format
93
+
94
+ parsed_data
95
+ end
96
+ end
@@ -0,0 +1,4 @@
1
+ # encoding: utf-8
2
+ # module HttpLogParser # :doc:
3
+ require 'http/parser'
4
+ # end
metadata ADDED
@@ -0,0 +1,65 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: http-log-parser
3
+ version: !ruby/object:Gem::Version
4
+ prerelease: false
5
+ segments:
6
+ - 0
7
+ - 0
8
+ - 1
9
+ version: 0.0.1
10
+ platform: ruby
11
+ authors:
12
+ - Torsten Curdt
13
+ autorequire:
14
+ bindir: bin
15
+ cert_chain: []
16
+
17
+ date: 2010-04-22 00:00:00 +02:00
18
+ default_executable:
19
+ dependencies: []
20
+
21
+ description: HTTP log file parser
22
+ email: tcurdt at vafer.org
23
+ executables: []
24
+
25
+ extensions: []
26
+
27
+ extra_rdoc_files:
28
+ - README.rdoc
29
+ files:
30
+ - README.rdoc
31
+ - Rakefile
32
+ - lib/http/parser.rb
33
+ - lib/http_log_parser.rb
34
+ has_rdoc: true
35
+ homepage: http://github.com/tcurdt/http-log-parser
36
+ licenses: []
37
+
38
+ post_install_message:
39
+ rdoc_options: []
40
+
41
+ require_paths:
42
+ - lib
43
+ required_ruby_version: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ segments:
48
+ - 0
49
+ version: "0"
50
+ required_rubygems_version: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ segments:
55
+ - 0
56
+ version: "0"
57
+ requirements: []
58
+
59
+ rubyforge_project:
60
+ rubygems_version: 1.3.6
61
+ signing_key:
62
+ specification_version: 3
63
+ summary: A package for parsing web server logs.
64
+ test_files: []
65
+