logparser 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Rakefile +46 -0
- data/lib/logparser.rb +3 -0
- data/lib/logparser/line.rb +69 -0
- data/lib/logparser/template.rb +29 -0
- data/lib/logparser/version.rb +9 -0
- data/test/parsing_test.rb +104 -0
- data/test/template_test.rb +20 -0
- metadata +61 -0
data/Rakefile
ADDED
@@ -0,0 +1,46 @@
|
|
1
|
+
require 'rake'
|
2
|
+
require 'rake/testtask'
|
3
|
+
require "rake/rdoctask"
|
4
|
+
|
5
|
+
desc 'Default: run unit tests.'
|
6
|
+
task :default => :test
|
7
|
+
|
8
|
+
desc 'Test the library.'
|
9
|
+
Rake::TestTask.new(:test) do |t|
|
10
|
+
t.pattern = 'test/**/*_test.rb'
|
11
|
+
t.verbose = true
|
12
|
+
end
|
13
|
+
|
14
|
+
begin
|
15
|
+
require "rake/gempackagetask"
|
16
|
+
require File.dirname(__FILE__)+"/lib/logparser/version"
|
17
|
+
|
18
|
+
spec = Gem::Specification.new do |s|
|
19
|
+
# Change these as appropriate
|
20
|
+
s.name = "logparser"
|
21
|
+
s.version = LogParser::VERSION::STRING
|
22
|
+
s.summary = "Parse log files using a simple syntax."
|
23
|
+
s.author = "Paul Battley"
|
24
|
+
s.email = "pbattley@gmail.com"
|
25
|
+
s.homepage = "http://github.com/threedaymonk/logparser"
|
26
|
+
|
27
|
+
s.has_rdoc = false
|
28
|
+
|
29
|
+
# Add any extra files to include in the gem (like your README)
|
30
|
+
s.files = %w(Rakefile) + Dir.glob("{test,lib}/**/*")
|
31
|
+
|
32
|
+
s.require_paths = ["lib"]
|
33
|
+
|
34
|
+
# If you want to depend on other gems, add them here, along with any
|
35
|
+
# relevant versions
|
36
|
+
# s.add_dependency("some_other_gem", "~> 0.1.0")
|
37
|
+
|
38
|
+
# If your tests use any gems, include them here
|
39
|
+
# s.add_development_dependency("mocha")
|
40
|
+
end
|
41
|
+
|
42
|
+
Rake::GemPackageTask.new(spec) do |pkg|
|
43
|
+
pkg.gem_spec = spec
|
44
|
+
end
|
45
|
+
rescue LoadError
|
46
|
+
end
|
data/lib/logparser.rb
ADDED
@@ -0,0 +1,69 @@
|
|
1
|
+
require 'date'
|
2
|
+
|
3
|
+
module LogParser
|
4
|
+
class Line
|
5
|
+
def initialize(template, raw)
|
6
|
+
@template = template
|
7
|
+
@raw = raw.strip
|
8
|
+
end
|
9
|
+
|
10
|
+
def raw_fields
|
11
|
+
@raw_fields ||= @template.apply(@raw)
|
12
|
+
end
|
13
|
+
|
14
|
+
def host
|
15
|
+
@host ||= decode_string(raw_fields[:host])
|
16
|
+
end
|
17
|
+
|
18
|
+
def domain
|
19
|
+
@domain ||= decode_string(raw_fields[:domain])
|
20
|
+
end
|
21
|
+
|
22
|
+
def timestamp
|
23
|
+
@timestamp ||= DateTime.strptime(raw_fields[:timestamp], '%d/%b/%Y:%H:%M:%S %Z')
|
24
|
+
end
|
25
|
+
|
26
|
+
def verb
|
27
|
+
@verb ||= raw_fields[:verb].downcase.to_sym
|
28
|
+
end
|
29
|
+
|
30
|
+
def path
|
31
|
+
@path ||= raw_fields[:path]
|
32
|
+
end
|
33
|
+
|
34
|
+
def protocol
|
35
|
+
@protocol ||= raw_fields[:protocol]
|
36
|
+
end
|
37
|
+
|
38
|
+
def status
|
39
|
+
@status ||= raw_fields[:status].to_i
|
40
|
+
end
|
41
|
+
|
42
|
+
def bytes
|
43
|
+
@bytes ||= raw_fields[:bytes].to_i
|
44
|
+
end
|
45
|
+
|
46
|
+
def referrer
|
47
|
+
@referrer ||= decode_string(raw_fields[:referrer])
|
48
|
+
end
|
49
|
+
|
50
|
+
def user_agent
|
51
|
+
@user_agent ||= decode_string(raw_fields[:user_agent])
|
52
|
+
end
|
53
|
+
|
54
|
+
def time_taken
|
55
|
+
@time_taken ||= raw_fields[:time_taken] ? raw_fields[:time_taken].to_f : nil
|
56
|
+
end
|
57
|
+
|
58
|
+
private
|
59
|
+
|
60
|
+
def decode_string(str)
|
61
|
+
case str
|
62
|
+
when '-', ''
|
63
|
+
return nil
|
64
|
+
else
|
65
|
+
return str
|
66
|
+
end
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
@@ -0,0 +1,29 @@
|
|
1
|
+
module LogParser
|
2
|
+
class Template
|
3
|
+
attr_reader :fields, :regexp
|
4
|
+
|
5
|
+
def initialize(pattern)
|
6
|
+
build_regexp(pattern)
|
7
|
+
end
|
8
|
+
|
9
|
+
def build_regexp(pattern)
|
10
|
+
@fields = []
|
11
|
+
@regexp = Regexp.new("\\A" << Regexp.escape(pattern).gsub(/:([a-z_][a-z0-9_]*)/){
|
12
|
+
@fields << $1.to_sym
|
13
|
+
"(.*?)"
|
14
|
+
} << "\\Z" )
|
15
|
+
end
|
16
|
+
|
17
|
+
def apply(str)
|
18
|
+
if matches = str.match(@regexp)
|
19
|
+
hash = {}
|
20
|
+
@fields.each_with_index do |key, i|
|
21
|
+
hash[key] = matches[i+1]
|
22
|
+
end
|
23
|
+
return hash
|
24
|
+
else
|
25
|
+
return nil
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
@@ -0,0 +1,104 @@
|
|
1
|
+
$:.unshift(File.join(File.dirname(__FILE__), "..", "lib"))
|
2
|
+
require 'test/unit'
|
3
|
+
require 'logparser'
|
4
|
+
|
5
|
+
class GeneralParsingTest < Test::Unit::TestCase
|
6
|
+
include LogParser
|
7
|
+
|
8
|
+
def test_should_extract_host
|
9
|
+
line = Line.new(Template.new(':host'), '83.148.169.161')
|
10
|
+
assert_equal '83.148.169.161', line.host
|
11
|
+
end
|
12
|
+
|
13
|
+
def test_should_extract_blank_data
|
14
|
+
line = Line.new(Template.new('":host"'), '""')
|
15
|
+
assert_nil line.host
|
16
|
+
end
|
17
|
+
|
18
|
+
def test_should_extract_domain
|
19
|
+
line = Line.new(Template.new(':domain'), 'www.reevoo.com')
|
20
|
+
assert_equal 'www.reevoo.com', line.domain
|
21
|
+
end
|
22
|
+
|
23
|
+
def test_should_extract_timestamp
|
24
|
+
line = Line.new(Template.new('[:timestamp]'), '[02/Nov/2006:13:41:41 +0000]')
|
25
|
+
assert_equal DateTime.parse('2006-11-02T13:41:41Z'), line.timestamp
|
26
|
+
end
|
27
|
+
|
28
|
+
def test_should_extract_verb
|
29
|
+
line = Line.new(Template.new('":verb :path :protocol"'), '"GET /javascripts/effects.js?1161276768 HTTP/1.0"')
|
30
|
+
assert_equal :get, line.verb
|
31
|
+
end
|
32
|
+
|
33
|
+
def test_should_extract_path
|
34
|
+
line = Line.new(Template.new('":verb :path :protocol"'), '"GET /javascripts/effects.js?1161276768 HTTP/1.0"')
|
35
|
+
assert_equal '/javascripts/effects.js?1161276768', line.path
|
36
|
+
end
|
37
|
+
|
38
|
+
def test_should_extract_protocol
|
39
|
+
line = Line.new(Template.new('":verb :path :protocol"'), '"GET /javascripts/effects.js?1161276768 HTTP/1.0"')
|
40
|
+
assert_equal 'HTTP/1.0', line.protocol
|
41
|
+
end
|
42
|
+
|
43
|
+
def test_should_extract_status
|
44
|
+
line = Line.new(Template.new(':status'), '200')
|
45
|
+
assert_equal 200, line.status
|
46
|
+
end
|
47
|
+
|
48
|
+
def test_should_extract_bytes
|
49
|
+
line = Line.new(Template.new(':bytes'), '32871')
|
50
|
+
assert_equal 32871, line.bytes
|
51
|
+
end
|
52
|
+
|
53
|
+
def test_should_extract_referrer
|
54
|
+
line = Line.new(Template.new('":referrer"'), '"http://www.reevoo.com/reviews/mpn/hotpoint/fdw60p"')
|
55
|
+
assert_equal 'http://www.reevoo.com/reviews/mpn/hotpoint/fdw60p', line.referrer
|
56
|
+
end
|
57
|
+
|
58
|
+
def test_should_extract_user_agent
|
59
|
+
line = Line.new(Template.new('":user_agent"'), '"Mozilla/4.0 (compatible; MSIE 6.0; Windows 98)"')
|
60
|
+
assert_equal 'Mozilla/4.0 (compatible; MSIE 6.0; Windows 98)', line.user_agent
|
61
|
+
end
|
62
|
+
|
63
|
+
def test_should_have_nil_referrer_when_log_contains_hyphen_placeholder
|
64
|
+
line = Line.new(Template.new('":referrer"'), '"-"')
|
65
|
+
assert_nil line.referrer
|
66
|
+
end
|
67
|
+
|
68
|
+
def test_should_extract_time_taken
|
69
|
+
line = Line.new(Template.new(':time_taken msec'), '0.004 msec')
|
70
|
+
assert_in_delta 0.004, line.time_taken, 0.00001
|
71
|
+
end
|
72
|
+
|
73
|
+
def test_should_give_time_taken_as_nil_when_not_given
|
74
|
+
line = Line.new(Template.new(''), '')
|
75
|
+
assert_nil line.time_taken
|
76
|
+
end
|
77
|
+
|
78
|
+
def test_should_extract_all_fields_from_sample_line
|
79
|
+
sample = '83.148.169.161 www.reevoo.com - [02/Nov/2006:13:41:41 +0000] '+
|
80
|
+
'"GET /javascripts/effects.js?1161276768 HTTP/1.0" 200 32871 '+
|
81
|
+
'"http://www.reevoo.com/reviews/mpn/hotpoint/fdw60p" '+
|
82
|
+
'"Mozilla/4.0 (compatible; MSIE 6.0; Windows 98)"'
|
83
|
+
line = Line.new(
|
84
|
+
Template.new(':host :domain :unknown [:timestamp] ":verb :path :protocol" :status :bytes ":referrer" ":user_agent"'),
|
85
|
+
sample
|
86
|
+
)
|
87
|
+
assert_equal '83.148.169.161', line.host
|
88
|
+
assert_equal 'www.reevoo.com', line.domain
|
89
|
+
assert_equal DateTime.parse('2006-11-02T13:41:41Z'), line.timestamp
|
90
|
+
assert_equal :get, line.verb
|
91
|
+
assert_equal '/javascripts/effects.js?1161276768', line.path
|
92
|
+
assert_equal '/javascripts/effects.js?1161276768', line.path
|
93
|
+
assert_equal 'HTTP/1.0', line.protocol
|
94
|
+
assert_equal 200, line.status
|
95
|
+
assert_equal 32871, line.bytes
|
96
|
+
assert_equal 'http://www.reevoo.com/reviews/mpn/hotpoint/fdw60p', line.referrer
|
97
|
+
assert_equal 'Mozilla/4.0 (compatible; MSIE 6.0; Windows 98)', line.user_agent
|
98
|
+
end
|
99
|
+
|
100
|
+
def test_should_ignore_trailing_newline
|
101
|
+
line = Line.new(Template.new('":referrer"'), %{"foo"\n})
|
102
|
+
assert_equal 'foo', line.referrer
|
103
|
+
end
|
104
|
+
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
$:.unshift(File.join(File.dirname(__FILE__), "..", "lib"))
|
2
|
+
require 'test/unit'
|
3
|
+
require 'logparser/template'
|
4
|
+
|
5
|
+
class TemplateTest < Test::Unit::TestCase
|
6
|
+
def test_should_extract_fields_from_pattern
|
7
|
+
template = LogParser::Template.new(":a :b :c")
|
8
|
+
assert_equal [:a, :b, :c], template.fields
|
9
|
+
end
|
10
|
+
|
11
|
+
def test_should_escape_regexp_properly
|
12
|
+
template = LogParser::Template.new("[:a]")
|
13
|
+
assert_equal(/\A\[(.*?)\]\Z/, template.regexp)
|
14
|
+
end
|
15
|
+
|
16
|
+
def test_should_extract_data_according_to_pattern
|
17
|
+
template = LogParser::Template.new(":foo :bar [:baz]")
|
18
|
+
assert_equal({:foo => 'Foo', :bar => 'Bar', :baz => 'Baz'}, template.apply('Foo Bar [Baz]'))
|
19
|
+
end
|
20
|
+
end
|
metadata
ADDED
@@ -0,0 +1,61 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: logparser
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Paul Battley
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
|
12
|
+
date: 2009-10-11 00:00:00 +01:00
|
13
|
+
default_executable:
|
14
|
+
dependencies: []
|
15
|
+
|
16
|
+
description:
|
17
|
+
email: pbattley@gmail.com
|
18
|
+
executables: []
|
19
|
+
|
20
|
+
extensions: []
|
21
|
+
|
22
|
+
extra_rdoc_files: []
|
23
|
+
|
24
|
+
files:
|
25
|
+
- Rakefile
|
26
|
+
- test/template_test.rb
|
27
|
+
- test/parsing_test.rb
|
28
|
+
- lib/logparser.rb
|
29
|
+
- lib/logparser/line.rb
|
30
|
+
- lib/logparser/template.rb
|
31
|
+
- lib/logparser/version.rb
|
32
|
+
has_rdoc: true
|
33
|
+
homepage: http://github.com/threedaymonk/logparser
|
34
|
+
licenses: []
|
35
|
+
|
36
|
+
post_install_message:
|
37
|
+
rdoc_options: []
|
38
|
+
|
39
|
+
require_paths:
|
40
|
+
- lib
|
41
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
42
|
+
requirements:
|
43
|
+
- - ">="
|
44
|
+
- !ruby/object:Gem::Version
|
45
|
+
version: "0"
|
46
|
+
version:
|
47
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
48
|
+
requirements:
|
49
|
+
- - ">="
|
50
|
+
- !ruby/object:Gem::Version
|
51
|
+
version: "0"
|
52
|
+
version:
|
53
|
+
requirements: []
|
54
|
+
|
55
|
+
rubyforge_project:
|
56
|
+
rubygems_version: 1.3.5
|
57
|
+
signing_key:
|
58
|
+
specification_version: 3
|
59
|
+
summary: Parse log files using a simple syntax.
|
60
|
+
test_files: []
|
61
|
+
|