logparser 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/Rakefile +46 -0
- data/lib/logparser.rb +3 -0
- data/lib/logparser/line.rb +69 -0
- data/lib/logparser/template.rb +29 -0
- data/lib/logparser/version.rb +9 -0
- data/test/parsing_test.rb +104 -0
- data/test/template_test.rb +20 -0
- metadata +61 -0
data/Rakefile
ADDED
@@ -0,0 +1,46 @@
|
|
1
|
+
require 'rake'
|
2
|
+
require 'rake/testtask'
|
3
|
+
require "rake/rdoctask"
|
4
|
+
|
5
|
+
desc 'Default: run unit tests.'
|
6
|
+
task :default => :test
|
7
|
+
|
8
|
+
desc 'Test the library.'
|
9
|
+
Rake::TestTask.new(:test) do |t|
|
10
|
+
t.pattern = 'test/**/*_test.rb'
|
11
|
+
t.verbose = true
|
12
|
+
end
|
13
|
+
|
14
|
+
begin
|
15
|
+
require "rake/gempackagetask"
|
16
|
+
require File.dirname(__FILE__)+"/lib/logparser/version"
|
17
|
+
|
18
|
+
spec = Gem::Specification.new do |s|
|
19
|
+
# Change these as appropriate
|
20
|
+
s.name = "logparser"
|
21
|
+
s.version = LogParser::VERSION::STRING
|
22
|
+
s.summary = "Parse log files using a simple syntax."
|
23
|
+
s.author = "Paul Battley"
|
24
|
+
s.email = "pbattley@gmail.com"
|
25
|
+
s.homepage = "http://github.com/threedaymonk/logparser"
|
26
|
+
|
27
|
+
s.has_rdoc = false
|
28
|
+
|
29
|
+
# Add any extra files to include in the gem (like your README)
|
30
|
+
s.files = %w(Rakefile) + Dir.glob("{test,lib}/**/*")
|
31
|
+
|
32
|
+
s.require_paths = ["lib"]
|
33
|
+
|
34
|
+
# If you want to depend on other gems, add them here, along with any
|
35
|
+
# relevant versions
|
36
|
+
# s.add_dependency("some_other_gem", "~> 0.1.0")
|
37
|
+
|
38
|
+
# If your tests use any gems, include them here
|
39
|
+
# s.add_development_dependency("mocha")
|
40
|
+
end
|
41
|
+
|
42
|
+
Rake::GemPackageTask.new(spec) do |pkg|
|
43
|
+
pkg.gem_spec = spec
|
44
|
+
end
|
45
|
+
rescue LoadError
|
46
|
+
end
|
data/lib/logparser.rb
ADDED
@@ -0,0 +1,69 @@
|
|
1
|
+
require 'date'
|
2
|
+
|
3
|
+
module LogParser
|
4
|
+
class Line
|
5
|
+
def initialize(template, raw)
|
6
|
+
@template = template
|
7
|
+
@raw = raw.strip
|
8
|
+
end
|
9
|
+
|
10
|
+
def raw_fields
|
11
|
+
@raw_fields ||= @template.apply(@raw)
|
12
|
+
end
|
13
|
+
|
14
|
+
def host
|
15
|
+
@host ||= decode_string(raw_fields[:host])
|
16
|
+
end
|
17
|
+
|
18
|
+
def domain
|
19
|
+
@domain ||= decode_string(raw_fields[:domain])
|
20
|
+
end
|
21
|
+
|
22
|
+
def timestamp
|
23
|
+
@timestamp ||= DateTime.strptime(raw_fields[:timestamp], '%d/%b/%Y:%H:%M:%S %Z')
|
24
|
+
end
|
25
|
+
|
26
|
+
def verb
|
27
|
+
@verb ||= raw_fields[:verb].downcase.to_sym
|
28
|
+
end
|
29
|
+
|
30
|
+
def path
|
31
|
+
@path ||= raw_fields[:path]
|
32
|
+
end
|
33
|
+
|
34
|
+
def protocol
|
35
|
+
@protocol ||= raw_fields[:protocol]
|
36
|
+
end
|
37
|
+
|
38
|
+
def status
|
39
|
+
@status ||= raw_fields[:status].to_i
|
40
|
+
end
|
41
|
+
|
42
|
+
def bytes
|
43
|
+
@bytes ||= raw_fields[:bytes].to_i
|
44
|
+
end
|
45
|
+
|
46
|
+
def referrer
|
47
|
+
@referrer ||= decode_string(raw_fields[:referrer])
|
48
|
+
end
|
49
|
+
|
50
|
+
def user_agent
|
51
|
+
@user_agent ||= decode_string(raw_fields[:user_agent])
|
52
|
+
end
|
53
|
+
|
54
|
+
def time_taken
|
55
|
+
@time_taken ||= raw_fields[:time_taken] ? raw_fields[:time_taken].to_f : nil
|
56
|
+
end
|
57
|
+
|
58
|
+
private
|
59
|
+
|
60
|
+
def decode_string(str)
|
61
|
+
case str
|
62
|
+
when '-', ''
|
63
|
+
return nil
|
64
|
+
else
|
65
|
+
return str
|
66
|
+
end
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
@@ -0,0 +1,29 @@
|
|
1
|
+
module LogParser
|
2
|
+
class Template
|
3
|
+
attr_reader :fields, :regexp
|
4
|
+
|
5
|
+
def initialize(pattern)
|
6
|
+
build_regexp(pattern)
|
7
|
+
end
|
8
|
+
|
9
|
+
def build_regexp(pattern)
|
10
|
+
@fields = []
|
11
|
+
@regexp = Regexp.new("\\A" << Regexp.escape(pattern).gsub(/:([a-z_][a-z0-9_]*)/){
|
12
|
+
@fields << $1.to_sym
|
13
|
+
"(.*?)"
|
14
|
+
} << "\\Z" )
|
15
|
+
end
|
16
|
+
|
17
|
+
def apply(str)
|
18
|
+
if matches = str.match(@regexp)
|
19
|
+
hash = {}
|
20
|
+
@fields.each_with_index do |key, i|
|
21
|
+
hash[key] = matches[i+1]
|
22
|
+
end
|
23
|
+
return hash
|
24
|
+
else
|
25
|
+
return nil
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
@@ -0,0 +1,104 @@
|
|
1
|
+
$:.unshift(File.join(File.dirname(__FILE__), "..", "lib"))
|
2
|
+
require 'test/unit'
|
3
|
+
require 'logparser'
|
4
|
+
|
5
|
+
class GeneralParsingTest < Test::Unit::TestCase
|
6
|
+
include LogParser
|
7
|
+
|
8
|
+
def test_should_extract_host
|
9
|
+
line = Line.new(Template.new(':host'), '83.148.169.161')
|
10
|
+
assert_equal '83.148.169.161', line.host
|
11
|
+
end
|
12
|
+
|
13
|
+
def test_should_extract_blank_data
|
14
|
+
line = Line.new(Template.new('":host"'), '""')
|
15
|
+
assert_nil line.host
|
16
|
+
end
|
17
|
+
|
18
|
+
def test_should_extract_domain
|
19
|
+
line = Line.new(Template.new(':domain'), 'www.reevoo.com')
|
20
|
+
assert_equal 'www.reevoo.com', line.domain
|
21
|
+
end
|
22
|
+
|
23
|
+
def test_should_extract_timestamp
|
24
|
+
line = Line.new(Template.new('[:timestamp]'), '[02/Nov/2006:13:41:41 +0000]')
|
25
|
+
assert_equal DateTime.parse('2006-11-02T13:41:41Z'), line.timestamp
|
26
|
+
end
|
27
|
+
|
28
|
+
def test_should_extract_verb
|
29
|
+
line = Line.new(Template.new('":verb :path :protocol"'), '"GET /javascripts/effects.js?1161276768 HTTP/1.0"')
|
30
|
+
assert_equal :get, line.verb
|
31
|
+
end
|
32
|
+
|
33
|
+
def test_should_extract_path
|
34
|
+
line = Line.new(Template.new('":verb :path :protocol"'), '"GET /javascripts/effects.js?1161276768 HTTP/1.0"')
|
35
|
+
assert_equal '/javascripts/effects.js?1161276768', line.path
|
36
|
+
end
|
37
|
+
|
38
|
+
def test_should_extract_protocol
|
39
|
+
line = Line.new(Template.new('":verb :path :protocol"'), '"GET /javascripts/effects.js?1161276768 HTTP/1.0"')
|
40
|
+
assert_equal 'HTTP/1.0', line.protocol
|
41
|
+
end
|
42
|
+
|
43
|
+
def test_should_extract_status
|
44
|
+
line = Line.new(Template.new(':status'), '200')
|
45
|
+
assert_equal 200, line.status
|
46
|
+
end
|
47
|
+
|
48
|
+
def test_should_extract_bytes
|
49
|
+
line = Line.new(Template.new(':bytes'), '32871')
|
50
|
+
assert_equal 32871, line.bytes
|
51
|
+
end
|
52
|
+
|
53
|
+
def test_should_extract_referrer
|
54
|
+
line = Line.new(Template.new('":referrer"'), '"http://www.reevoo.com/reviews/mpn/hotpoint/fdw60p"')
|
55
|
+
assert_equal 'http://www.reevoo.com/reviews/mpn/hotpoint/fdw60p', line.referrer
|
56
|
+
end
|
57
|
+
|
58
|
+
def test_should_extract_user_agent
|
59
|
+
line = Line.new(Template.new('":user_agent"'), '"Mozilla/4.0 (compatible; MSIE 6.0; Windows 98)"')
|
60
|
+
assert_equal 'Mozilla/4.0 (compatible; MSIE 6.0; Windows 98)', line.user_agent
|
61
|
+
end
|
62
|
+
|
63
|
+
def test_should_have_nil_referrer_when_log_contains_hyphen_placeholder
|
64
|
+
line = Line.new(Template.new('":referrer"'), '"-"')
|
65
|
+
assert_nil line.referrer
|
66
|
+
end
|
67
|
+
|
68
|
+
def test_should_extract_time_taken
|
69
|
+
line = Line.new(Template.new(':time_taken msec'), '0.004 msec')
|
70
|
+
assert_in_delta 0.004, line.time_taken, 0.00001
|
71
|
+
end
|
72
|
+
|
73
|
+
def test_should_give_time_taken_as_nil_when_not_given
|
74
|
+
line = Line.new(Template.new(''), '')
|
75
|
+
assert_nil line.time_taken
|
76
|
+
end
|
77
|
+
|
78
|
+
def test_should_extract_all_fields_from_sample_line
|
79
|
+
sample = '83.148.169.161 www.reevoo.com - [02/Nov/2006:13:41:41 +0000] '+
|
80
|
+
'"GET /javascripts/effects.js?1161276768 HTTP/1.0" 200 32871 '+
|
81
|
+
'"http://www.reevoo.com/reviews/mpn/hotpoint/fdw60p" '+
|
82
|
+
'"Mozilla/4.0 (compatible; MSIE 6.0; Windows 98)"'
|
83
|
+
line = Line.new(
|
84
|
+
Template.new(':host :domain :unknown [:timestamp] ":verb :path :protocol" :status :bytes ":referrer" ":user_agent"'),
|
85
|
+
sample
|
86
|
+
)
|
87
|
+
assert_equal '83.148.169.161', line.host
|
88
|
+
assert_equal 'www.reevoo.com', line.domain
|
89
|
+
assert_equal DateTime.parse('2006-11-02T13:41:41Z'), line.timestamp
|
90
|
+
assert_equal :get, line.verb
|
91
|
+
assert_equal '/javascripts/effects.js?1161276768', line.path
|
92
|
+
assert_equal '/javascripts/effects.js?1161276768', line.path
|
93
|
+
assert_equal 'HTTP/1.0', line.protocol
|
94
|
+
assert_equal 200, line.status
|
95
|
+
assert_equal 32871, line.bytes
|
96
|
+
assert_equal 'http://www.reevoo.com/reviews/mpn/hotpoint/fdw60p', line.referrer
|
97
|
+
assert_equal 'Mozilla/4.0 (compatible; MSIE 6.0; Windows 98)', line.user_agent
|
98
|
+
end
|
99
|
+
|
100
|
+
def test_should_ignore_trailing_newline
|
101
|
+
line = Line.new(Template.new('":referrer"'), %{"foo"\n})
|
102
|
+
assert_equal 'foo', line.referrer
|
103
|
+
end
|
104
|
+
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
$:.unshift(File.join(File.dirname(__FILE__), "..", "lib"))
|
2
|
+
require 'test/unit'
|
3
|
+
require 'logparser/template'
|
4
|
+
|
5
|
+
class TemplateTest < Test::Unit::TestCase
|
6
|
+
def test_should_extract_fields_from_pattern
|
7
|
+
template = LogParser::Template.new(":a :b :c")
|
8
|
+
assert_equal [:a, :b, :c], template.fields
|
9
|
+
end
|
10
|
+
|
11
|
+
def test_should_escape_regexp_properly
|
12
|
+
template = LogParser::Template.new("[:a]")
|
13
|
+
assert_equal(/\A\[(.*?)\]\Z/, template.regexp)
|
14
|
+
end
|
15
|
+
|
16
|
+
def test_should_extract_data_according_to_pattern
|
17
|
+
template = LogParser::Template.new(":foo :bar [:baz]")
|
18
|
+
assert_equal({:foo => 'Foo', :bar => 'Bar', :baz => 'Baz'}, template.apply('Foo Bar [Baz]'))
|
19
|
+
end
|
20
|
+
end
|
metadata
ADDED
@@ -0,0 +1,61 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: logparser
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Paul Battley
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
|
12
|
+
date: 2009-10-11 00:00:00 +01:00
|
13
|
+
default_executable:
|
14
|
+
dependencies: []
|
15
|
+
|
16
|
+
description:
|
17
|
+
email: pbattley@gmail.com
|
18
|
+
executables: []
|
19
|
+
|
20
|
+
extensions: []
|
21
|
+
|
22
|
+
extra_rdoc_files: []
|
23
|
+
|
24
|
+
files:
|
25
|
+
- Rakefile
|
26
|
+
- test/template_test.rb
|
27
|
+
- test/parsing_test.rb
|
28
|
+
- lib/logparser.rb
|
29
|
+
- lib/logparser/line.rb
|
30
|
+
- lib/logparser/template.rb
|
31
|
+
- lib/logparser/version.rb
|
32
|
+
has_rdoc: true
|
33
|
+
homepage: http://github.com/threedaymonk/logparser
|
34
|
+
licenses: []
|
35
|
+
|
36
|
+
post_install_message:
|
37
|
+
rdoc_options: []
|
38
|
+
|
39
|
+
require_paths:
|
40
|
+
- lib
|
41
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
42
|
+
requirements:
|
43
|
+
- - ">="
|
44
|
+
- !ruby/object:Gem::Version
|
45
|
+
version: "0"
|
46
|
+
version:
|
47
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
48
|
+
requirements:
|
49
|
+
- - ">="
|
50
|
+
- !ruby/object:Gem::Version
|
51
|
+
version: "0"
|
52
|
+
version:
|
53
|
+
requirements: []
|
54
|
+
|
55
|
+
rubyforge_project:
|
56
|
+
rubygems_version: 1.3.5
|
57
|
+
signing_key:
|
58
|
+
specification_version: 3
|
59
|
+
summary: Parse log files using a simple syntax.
|
60
|
+
test_files: []
|
61
|
+
|