server_log_parser 0.3.0 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/README.md +36 -2
- data/lib/server_log_parser/parser.rb +61 -0
- data/lib/server_log_parser/version.rb +1 -1
- data/server_log_parser.gemspec +2 -2
- data/test/{parse_test.rb → parser_test.rb} +43 -0
- metadata +5 -7
- data/test/fixtures/log.log +0 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 99d0578102face3c2664838e749c0de0192322ab
|
4
|
+
data.tar.gz: 8d2030582ca02f1e9cd98ee0f773be0baeb0e7d1
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a677d61e5c9ffadf121726262b38b05b19c49883ce5f1b0f4bcb223ce3c04dfdd9c4489179c510213e54931255c9b90140fad2dcc985190dd9ee3af5aabf8d15
|
7
|
+
data.tar.gz: 34176519e78657e8e67c4639d4baa1a7faf92af489e8e2d26027cda42fe3cfad8a57703acc0e6d193af53fcb510a8380d612e08187541cfdea6b5522591421d4
|
data/CHANGELOG.md
CHANGED
data/README.md
CHANGED
@@ -48,8 +48,42 @@ end
|
|
48
48
|
`ServerLogParser#parse` will silently ignore errors, but if you'd prefer,
|
49
49
|
`ServerLogParser#parse!` will raise a `ParseError` exception.
|
50
50
|
|
51
|
-
|
52
|
-
|
51
|
+
### Handling
|
52
|
+
|
53
|
+
```ruby
|
54
|
+
File.foreach('/var/log/apache/access.log') do |line|
|
55
|
+
parsed = parser.handle(line)
|
56
|
+
# {
|
57
|
+
# '%h' => '212.74.15.68',
|
58
|
+
# '%l' => nil,
|
59
|
+
# '%u' => nil,
|
60
|
+
# '%t' => DateTime.new(2004, 1, 23, 11, 36, 20, '+0'),
|
61
|
+
# '%r' => {"method" => "GET", "resource" => "/images/previous.png", "protocol" => "HTTP/1.1"},
|
62
|
+
# '%>s' => 200,
|
63
|
+
# '%b' => 2607,
|
64
|
+
# '%{Referer}i' => 'http://peterhi.dyndns.org/bandwidth/index.html',
|
65
|
+
# '%{User-Agent}i' => 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.2) Gecko/20021202'
|
66
|
+
# }
|
67
|
+
end
|
68
|
+
```
|
69
|
+
|
70
|
+
Apache log files use `-` to mean no data is present and these are replaced with `nil`,
|
71
|
+
like the `%l` and `%u` values above. Request is split into a nested hash.
|
72
|
+
|
73
|
+
The following fields are stored as `Integer`: `%B`, `%b`, `%k`, `%p`, `%{format}p`,
|
74
|
+
`%P`, `%{format}P`, `%s`, `%>s`, `%I`, `%O`.
|
75
|
+
|
76
|
+
The following fields are stored as `Float`: `%D`, `%T`.
|
77
|
+
|
78
|
+
The following fields are stored as `DateTime`: `%t`.
|
79
|
+
Note: `%{format}t` is stored as `String` currently.
|
80
|
+
|
81
|
+
The field `%r` is special, see above.
|
82
|
+
|
83
|
+
All other fields are stored as `String`.
|
84
|
+
|
85
|
+
`ServerLogParser#handle` will silently ignore errors, but if you'd prefer,
|
86
|
+
`ServerLogParser#handle!` will raise a `ParseError` exception.
|
53
87
|
|
54
88
|
### Log Formats
|
55
89
|
|
@@ -41,6 +41,40 @@ module ServerLogParser
|
|
41
41
|
parse(line) || raise(ParseError, "Invalid format `%s` for line `%s`" % [@format, line])
|
42
42
|
end
|
43
43
|
|
44
|
+
# Parses <tt>line</tt> according to current log <tt>format</tt>
|
45
|
+
# and returns an hash of log field => typed value on success.
|
46
|
+
# Returns <tt>nil</tt> if <tt>line</tt> doesn't match current log <tt>format</tt>.
|
47
|
+
def handle(line)
|
48
|
+
parsed = parse(line)
|
49
|
+
return unless parsed
|
50
|
+
|
51
|
+
handle_parsed(parsed)
|
52
|
+
end
|
53
|
+
|
54
|
+
# Same as <tt>ServerLogParser#parse</tt> but raises a <tt>ParseError</tt>
|
55
|
+
# if <tt>line</tt> doesn't match current <tt>format</tt>.
|
56
|
+
#
|
57
|
+
# ==== Raises
|
58
|
+
#
|
59
|
+
# ParseError:: if <tt>line</tt> doesn't match current <tt>format</tt>
|
60
|
+
#
|
61
|
+
def parse!(line)
|
62
|
+
parse(line) || raise(ParseError, "Invalid format `%s` for line `%s`" % [@format, line])
|
63
|
+
end
|
64
|
+
|
65
|
+
# Same as <tt>ServerLogParser#handle</tt> but raises a <tt>ParseError</tt>
|
66
|
+
# if <tt>line</tt> doesn't match current <tt>format</tt>.
|
67
|
+
#
|
68
|
+
# ==== Raises
|
69
|
+
#
|
70
|
+
# ParseError:: if <tt>line</tt> doesn't match current <tt>format</tt>
|
71
|
+
#
|
72
|
+
def handle!(line)
|
73
|
+
parsed = parse!(line)
|
74
|
+
|
75
|
+
handle_parsed(parsed)
|
76
|
+
end
|
77
|
+
|
44
78
|
|
45
79
|
protected
|
46
80
|
|
@@ -88,6 +122,33 @@ module ServerLogParser
|
|
88
122
|
format
|
89
123
|
end
|
90
124
|
|
125
|
+
def handle_parsed(parsed)
|
126
|
+
data = {}
|
127
|
+
|
128
|
+
parsed.each_pair do |field, value|
|
129
|
+
data[field] = if value == '-'
|
130
|
+
nil
|
131
|
+
else
|
132
|
+
case field
|
133
|
+
when '%B', '%b', '%k', '%p', /%{\S+}p/, '%P', /%{\S+}P/, '%s', '%>s', '%I', '%O'
|
134
|
+
Integer(value)
|
135
|
+
when '%D', '%T'
|
136
|
+
Float(value)
|
137
|
+
when '%t'
|
138
|
+
DateTime.strptime(value, '[%d/%b/%Y:%H:%M:%S %Z]')
|
139
|
+
when '%r'
|
140
|
+
{ 'method' => value[/^(\w*)/, 1],
|
141
|
+
'resource' => value[/(\/\S*) /, 1],
|
142
|
+
'protocol' => value[/.* (.*)$/, 1] }
|
143
|
+
else
|
144
|
+
value
|
145
|
+
end
|
146
|
+
end
|
147
|
+
end
|
148
|
+
|
149
|
+
data
|
150
|
+
end
|
151
|
+
|
91
152
|
end
|
92
153
|
|
93
154
|
end
|
data/server_log_parser.gemspec
CHANGED
@@ -5,8 +5,8 @@ Gem::Specification.new do |spec|
|
|
5
5
|
spec.version = ServerLogParser::VERSION
|
6
6
|
spec.authors = ["Alexander Kurakin"]
|
7
7
|
spec.email = ["kuraga333@mail.ru"]
|
8
|
-
spec.summary = %q{Ruby library to parse
|
9
|
-
spec.description = %q{ServerLogParser provides a high-level Ruby library for parsing server server log files (common log format, with or without virtual hosts and combined log format) as used by
|
8
|
+
spec.summary = %q{Ruby library to parse Apache server log files using regular expressions.}
|
9
|
+
spec.description = %q{ServerLogParser provides a high-level Ruby library for parsing server server log files (common log format, with or without virtual hosts and combined log format) as used by Apache, Nginx and others.}
|
10
10
|
spec.homepage = "https://github.com/kuraga/server_log_parser"
|
11
11
|
spec.license = "MIT"
|
12
12
|
|
@@ -93,6 +93,49 @@ describe ServerLogParser::Parser do
|
|
93
93
|
|
94
94
|
end
|
95
95
|
|
96
|
+
describe "#handle" do
|
97
|
+
|
98
|
+
it "should handle line" do
|
99
|
+
expected = { '%h' => '212.74.15.68',
|
100
|
+
'%l' => nil,
|
101
|
+
'%u' => nil,
|
102
|
+
'%t' => DateTime.new(2004, 1, 23, 11, 36, 20, '+0'),
|
103
|
+
'%r' => {"method" => "GET", "resource" => "/images/previous.png", "protocol" => "HTTP/1.1"},
|
104
|
+
'%>s' => 200,
|
105
|
+
'%b' => 2607,
|
106
|
+
'%{Referer}i' => 'http://peterhi.dyndns.org/bandwidth/index.html',
|
107
|
+
'%{User-Agent}i' => 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.2) Gecko/20021202' }
|
108
|
+
results = @parser.handle(read_testcase('line.log'))
|
109
|
+
|
110
|
+
assert_kind_of(Hash, results)
|
111
|
+
assert_match_expected_hash(expected, results)
|
112
|
+
end
|
113
|
+
|
114
|
+
it "return nil on invalid format" do
|
115
|
+
results = @parser.handle('foobar')
|
116
|
+
|
117
|
+
assert_nil(results)
|
118
|
+
end
|
119
|
+
|
120
|
+
end
|
121
|
+
|
122
|
+
describe "#handle!" do
|
123
|
+
|
124
|
+
it "should work" do
|
125
|
+
testcase = read_testcase('line.log')
|
126
|
+
|
127
|
+
expected = @parser.handle(testcase)
|
128
|
+
results = @parser.handle!(testcase)
|
129
|
+
|
130
|
+
assert_equal(expected, results)
|
131
|
+
end
|
132
|
+
|
133
|
+
it "should raise on invalid format" do
|
134
|
+
error = assert_raises(ServerLogParser::ParseError) { @parser.handle!('foobar') }
|
135
|
+
assert_match(/Invalid format/, error.message)
|
136
|
+
end
|
137
|
+
|
138
|
+
end
|
96
139
|
|
97
140
|
protected
|
98
141
|
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: server_log_parser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.4.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Alexander Kurakin
|
@@ -54,7 +54,7 @@ dependencies:
|
|
54
54
|
version: '5.8'
|
55
55
|
description: ServerLogParser provides a high-level Ruby library for parsing server
|
56
56
|
server log files (common log format, with or without virtual hosts and combined
|
57
|
-
log format) as used by
|
57
|
+
log format) as used by Apache, Nginx and others.
|
58
58
|
email:
|
59
59
|
- kuraga333@mail.ru
|
60
60
|
executables: []
|
@@ -74,8 +74,7 @@ files:
|
|
74
74
|
- test/fixtures/line-with-slash-quote-in-referer.log
|
75
75
|
- test/fixtures/line-with-slash-quote-in-request.log
|
76
76
|
- test/fixtures/line.log
|
77
|
-
- test/
|
78
|
-
- test/parse_test.rb
|
77
|
+
- test/parser_test.rb
|
79
78
|
- test/server_log_parser_test.rb
|
80
79
|
- test/test_helper.rb
|
81
80
|
homepage: https://github.com/kuraga/server_log_parser
|
@@ -101,12 +100,11 @@ rubyforge_project:
|
|
101
100
|
rubygems_version: 2.4.5.1
|
102
101
|
signing_key:
|
103
102
|
specification_version: 4
|
104
|
-
summary: Ruby library to parse
|
103
|
+
summary: Ruby library to parse Apache server log files using regular expressions.
|
105
104
|
test_files:
|
106
105
|
- test/fixtures/line-with-slash-quote-in-referer.log
|
107
106
|
- test/fixtures/line-with-slash-quote-in-request.log
|
108
107
|
- test/fixtures/line.log
|
109
|
-
- test/
|
110
|
-
- test/parse_test.rb
|
108
|
+
- test/parser_test.rb
|
111
109
|
- test/server_log_parser_test.rb
|
112
110
|
- test/test_helper.rb
|
data/test/fixtures/log.log
DELETED
@@ -1,5 +0,0 @@
|
|
1
|
-
87.18.183.252 - - [13/Aug/2008:00:50:49 -0700] "GET /blog/index.xml HTTP/1.1" 302 527 "-" "Feedreader 3.13 (Powered by Newsbrain)"
|
2
|
-
79.28.16.191 - - [13/Aug/2008:00:50:55 -0700] "GET /blog/public/2008/08/gmail-offline-dati-a-rischio/gmaildown.png HTTP/1.1" 304 283 "-" "FeedDemon/2.7 (http://www.newsgator.com/; Microsoft Windows XP)"
|
3
|
-
79.28.16.191 - - [13/Aug/2008:00:50:55 -0700] "GET /blog/public/2008/08/nuovo-microsoft-webmaster-center/overview.png HTTP/1.1" 304 283 "-" "FeedDemon/2.7 (http://www.newsgator.com/; Microsoft Windows XP)"
|
4
|
-
69.150.40.169 - - [13/Aug/2008:00:51:06 -0700] "POST http://www.simonecarletti.com/mt4/mt-ttb.cgi/563 HTTP/1.1" 404 610 "-" "-"
|
5
|
-
217.220.110.75 - - [13/Aug/2008:00:51:02 -0700] "GET /blog/2007/05/microsoft-outlook-pst.php HTTP/1.1" 200 82331 "http://www.google.it/search?hl=it&q=outlook+pst+file+4+GB&meta=" "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1) ; .NET CLR 1.1.4322; .NET CLR 2.0.50727)"
|