server_log_parser 0.3.0 → 0.4.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/README.md +36 -2
- data/lib/server_log_parser/parser.rb +61 -0
- data/lib/server_log_parser/version.rb +1 -1
- data/server_log_parser.gemspec +2 -2
- data/test/{parse_test.rb → parser_test.rb} +43 -0
- metadata +5 -7
- data/test/fixtures/log.log +0 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 99d0578102face3c2664838e749c0de0192322ab
|
4
|
+
data.tar.gz: 8d2030582ca02f1e9cd98ee0f773be0baeb0e7d1
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a677d61e5c9ffadf121726262b38b05b19c49883ce5f1b0f4bcb223ce3c04dfdd9c4489179c510213e54931255c9b90140fad2dcc985190dd9ee3af5aabf8d15
|
7
|
+
data.tar.gz: 34176519e78657e8e67c4639d4baa1a7faf92af489e8e2d26027cda42fe3cfad8a57703acc0e6d193af53fcb510a8380d612e08187541cfdea6b5522591421d4
|
data/CHANGELOG.md
CHANGED
data/README.md
CHANGED
@@ -48,8 +48,42 @@ end
|
|
48
48
|
`ServerLogParser#parse` will silently ignore errors, but if you'd prefer,
|
49
49
|
`ServerLogParser#parse!` will raise a `ParseError` exception.
|
50
50
|
|
51
|
-
|
52
|
-
|
51
|
+
### Handling
|
52
|
+
|
53
|
+
```ruby
|
54
|
+
File.foreach('/var/log/apache/access.log') do |line|
|
55
|
+
parsed = parser.handle(line)
|
56
|
+
# {
|
57
|
+
# '%h' => '212.74.15.68',
|
58
|
+
# '%l' => nil,
|
59
|
+
# '%u' => nil,
|
60
|
+
# '%t' => DateTime.new(2004, 1, 23, 11, 36, 20, '+0'),
|
61
|
+
# '%r' => {"method" => "GET", "resource" => "/images/previous.png", "protocol" => "HTTP/1.1"},
|
62
|
+
# '%>s' => 200,
|
63
|
+
# '%b' => 2607,
|
64
|
+
# '%{Referer}i' => 'http://peterhi.dyndns.org/bandwidth/index.html',
|
65
|
+
# '%{User-Agent}i' => 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.2) Gecko/20021202'
|
66
|
+
# }
|
67
|
+
end
|
68
|
+
```
|
69
|
+
|
70
|
+
Apache log files use `-` to mean no data is present and these are replaced with `nil`,
|
71
|
+
like the `%l` and `%u` values above. Request is split into a nested hash.
|
72
|
+
|
73
|
+
The following fields are stored as `Integer`: `%B`, `%b`, `%k`, `%p`, `%{format}p`,
|
74
|
+
`%P`, `%{format}P`, `%s`, `%>s`, `%I`, `%O`.
|
75
|
+
|
76
|
+
The following fields are stored as `Float`: `%D`, `%T`.
|
77
|
+
|
78
|
+
The following fields are stored as `DateTime`: `%t`.
|
79
|
+
Note: `%{format}t` is stored as `String` currently.
|
80
|
+
|
81
|
+
The field `%r` is special, see above.
|
82
|
+
|
83
|
+
All other fields are stored as `String`.
|
84
|
+
|
85
|
+
`ServerLogParser#handle` will silently ignore errors, but if you'd prefer,
|
86
|
+
`ServerLogParser#handle!` will raise a `ParseError` exception.
|
53
87
|
|
54
88
|
### Log Formats
|
55
89
|
|
@@ -41,6 +41,40 @@ module ServerLogParser
|
|
41
41
|
parse(line) || raise(ParseError, "Invalid format `%s` for line `%s`" % [@format, line])
|
42
42
|
end
|
43
43
|
|
44
|
+
# Parses <tt>line</tt> according to current log <tt>format</tt>
|
45
|
+
# and returns an hash of log field => typed value on success.
|
46
|
+
# Returns <tt>nil</tt> if <tt>line</tt> doesn't match current log <tt>format</tt>.
|
47
|
+
def handle(line)
|
48
|
+
parsed = parse(line)
|
49
|
+
return unless parsed
|
50
|
+
|
51
|
+
handle_parsed(parsed)
|
52
|
+
end
|
53
|
+
|
54
|
+
# Same as <tt>ServerLogParser#parse</tt> but raises a <tt>ParseError</tt>
|
55
|
+
# if <tt>line</tt> doesn't match current <tt>format</tt>.
|
56
|
+
#
|
57
|
+
# ==== Raises
|
58
|
+
#
|
59
|
+
# ParseError:: if <tt>line</tt> doesn't match current <tt>format</tt>
|
60
|
+
#
|
61
|
+
def parse!(line)
|
62
|
+
parse(line) || raise(ParseError, "Invalid format `%s` for line `%s`" % [@format, line])
|
63
|
+
end
|
64
|
+
|
65
|
+
# Same as <tt>ServerLogParser#handle</tt> but raises a <tt>ParseError</tt>
|
66
|
+
# if <tt>line</tt> doesn't match current <tt>format</tt>.
|
67
|
+
#
|
68
|
+
# ==== Raises
|
69
|
+
#
|
70
|
+
# ParseError:: if <tt>line</tt> doesn't match current <tt>format</tt>
|
71
|
+
#
|
72
|
+
def handle!(line)
|
73
|
+
parsed = parse!(line)
|
74
|
+
|
75
|
+
handle_parsed(parsed)
|
76
|
+
end
|
77
|
+
|
44
78
|
|
45
79
|
protected
|
46
80
|
|
@@ -88,6 +122,33 @@ module ServerLogParser
|
|
88
122
|
format
|
89
123
|
end
|
90
124
|
|
125
|
+
def handle_parsed(parsed)
|
126
|
+
data = {}
|
127
|
+
|
128
|
+
parsed.each_pair do |field, value|
|
129
|
+
data[field] = if value == '-'
|
130
|
+
nil
|
131
|
+
else
|
132
|
+
case field
|
133
|
+
when '%B', '%b', '%k', '%p', /%{\S+}p/, '%P', /%{\S+}P/, '%s', '%>s', '%I', '%O'
|
134
|
+
Integer(value)
|
135
|
+
when '%D', '%T'
|
136
|
+
Float(value)
|
137
|
+
when '%t'
|
138
|
+
DateTime.strptime(value, '[%d/%b/%Y:%H:%M:%S %Z]')
|
139
|
+
when '%r'
|
140
|
+
{ 'method' => value[/^(\w*)/, 1],
|
141
|
+
'resource' => value[/(\/\S*) /, 1],
|
142
|
+
'protocol' => value[/.* (.*)$/, 1] }
|
143
|
+
else
|
144
|
+
value
|
145
|
+
end
|
146
|
+
end
|
147
|
+
end
|
148
|
+
|
149
|
+
data
|
150
|
+
end
|
151
|
+
|
91
152
|
end
|
92
153
|
|
93
154
|
end
|
data/server_log_parser.gemspec
CHANGED
@@ -5,8 +5,8 @@ Gem::Specification.new do |spec|
|
|
5
5
|
spec.version = ServerLogParser::VERSION
|
6
6
|
spec.authors = ["Alexander Kurakin"]
|
7
7
|
spec.email = ["kuraga333@mail.ru"]
|
8
|
-
spec.summary = %q{Ruby library to parse
|
9
|
-
spec.description = %q{ServerLogParser provides a high-level Ruby library for parsing server server log files (common log format, with or without virtual hosts and combined log format) as used by
|
8
|
+
spec.summary = %q{Ruby library to parse Apache server log files using regular expressions.}
|
9
|
+
spec.description = %q{ServerLogParser provides a high-level Ruby library for parsing server server log files (common log format, with or without virtual hosts and combined log format) as used by Apache, Nginx and others.}
|
10
10
|
spec.homepage = "https://github.com/kuraga/server_log_parser"
|
11
11
|
spec.license = "MIT"
|
12
12
|
|
@@ -93,6 +93,49 @@ describe ServerLogParser::Parser do
|
|
93
93
|
|
94
94
|
end
|
95
95
|
|
96
|
+
describe "#handle" do
|
97
|
+
|
98
|
+
it "should handle line" do
|
99
|
+
expected = { '%h' => '212.74.15.68',
|
100
|
+
'%l' => nil,
|
101
|
+
'%u' => nil,
|
102
|
+
'%t' => DateTime.new(2004, 1, 23, 11, 36, 20, '+0'),
|
103
|
+
'%r' => {"method" => "GET", "resource" => "/images/previous.png", "protocol" => "HTTP/1.1"},
|
104
|
+
'%>s' => 200,
|
105
|
+
'%b' => 2607,
|
106
|
+
'%{Referer}i' => 'http://peterhi.dyndns.org/bandwidth/index.html',
|
107
|
+
'%{User-Agent}i' => 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.2) Gecko/20021202' }
|
108
|
+
results = @parser.handle(read_testcase('line.log'))
|
109
|
+
|
110
|
+
assert_kind_of(Hash, results)
|
111
|
+
assert_match_expected_hash(expected, results)
|
112
|
+
end
|
113
|
+
|
114
|
+
it "return nil on invalid format" do
|
115
|
+
results = @parser.handle('foobar')
|
116
|
+
|
117
|
+
assert_nil(results)
|
118
|
+
end
|
119
|
+
|
120
|
+
end
|
121
|
+
|
122
|
+
describe "#handle!" do
|
123
|
+
|
124
|
+
it "should work" do
|
125
|
+
testcase = read_testcase('line.log')
|
126
|
+
|
127
|
+
expected = @parser.handle(testcase)
|
128
|
+
results = @parser.handle!(testcase)
|
129
|
+
|
130
|
+
assert_equal(expected, results)
|
131
|
+
end
|
132
|
+
|
133
|
+
it "should raise on invalid format" do
|
134
|
+
error = assert_raises(ServerLogParser::ParseError) { @parser.handle!('foobar') }
|
135
|
+
assert_match(/Invalid format/, error.message)
|
136
|
+
end
|
137
|
+
|
138
|
+
end
|
96
139
|
|
97
140
|
protected
|
98
141
|
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: server_log_parser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.4.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Alexander Kurakin
|
@@ -54,7 +54,7 @@ dependencies:
|
|
54
54
|
version: '5.8'
|
55
55
|
description: ServerLogParser provides a high-level Ruby library for parsing server
|
56
56
|
server log files (common log format, with or without virtual hosts and combined
|
57
|
-
log format) as used by
|
57
|
+
log format) as used by Apache, Nginx and others.
|
58
58
|
email:
|
59
59
|
- kuraga333@mail.ru
|
60
60
|
executables: []
|
@@ -74,8 +74,7 @@ files:
|
|
74
74
|
- test/fixtures/line-with-slash-quote-in-referer.log
|
75
75
|
- test/fixtures/line-with-slash-quote-in-request.log
|
76
76
|
- test/fixtures/line.log
|
77
|
-
- test/
|
78
|
-
- test/parse_test.rb
|
77
|
+
- test/parser_test.rb
|
79
78
|
- test/server_log_parser_test.rb
|
80
79
|
- test/test_helper.rb
|
81
80
|
homepage: https://github.com/kuraga/server_log_parser
|
@@ -101,12 +100,11 @@ rubyforge_project:
|
|
101
100
|
rubygems_version: 2.4.5.1
|
102
101
|
signing_key:
|
103
102
|
specification_version: 4
|
104
|
-
summary: Ruby library to parse
|
103
|
+
summary: Ruby library to parse Apache server log files using regular expressions.
|
105
104
|
test_files:
|
106
105
|
- test/fixtures/line-with-slash-quote-in-referer.log
|
107
106
|
- test/fixtures/line-with-slash-quote-in-request.log
|
108
107
|
- test/fixtures/line.log
|
109
|
-
- test/
|
110
|
-
- test/parse_test.rb
|
108
|
+
- test/parser_test.rb
|
111
109
|
- test/server_log_parser_test.rb
|
112
110
|
- test/test_helper.rb
|
data/test/fixtures/log.log
DELETED
@@ -1,5 +0,0 @@
|
|
1
|
-
87.18.183.252 - - [13/Aug/2008:00:50:49 -0700] "GET /blog/index.xml HTTP/1.1" 302 527 "-" "Feedreader 3.13 (Powered by Newsbrain)"
|
2
|
-
79.28.16.191 - - [13/Aug/2008:00:50:55 -0700] "GET /blog/public/2008/08/gmail-offline-dati-a-rischio/gmaildown.png HTTP/1.1" 304 283 "-" "FeedDemon/2.7 (http://www.newsgator.com/; Microsoft Windows XP)"
|
3
|
-
79.28.16.191 - - [13/Aug/2008:00:50:55 -0700] "GET /blog/public/2008/08/nuovo-microsoft-webmaster-center/overview.png HTTP/1.1" 304 283 "-" "FeedDemon/2.7 (http://www.newsgator.com/; Microsoft Windows XP)"
|
4
|
-
69.150.40.169 - - [13/Aug/2008:00:51:06 -0700] "POST http://www.simonecarletti.com/mt4/mt-ttb.cgi/563 HTTP/1.1" 404 610 "-" "-"
|
5
|
-
217.220.110.75 - - [13/Aug/2008:00:51:02 -0700] "GET /blog/2007/05/microsoft-outlook-pst.php HTTP/1.1" 200 82331 "http://www.google.it/search?hl=it&q=outlook+pst+file+4+GB&meta=" "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1) ; .NET CLR 1.1.4322; .NET CLR 2.0.50727)"
|