ncsa-parser 0.0.2
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +12 -0
- data/Gemfile +3 -0
- data/MIT-LICENSE +20 -0
- data/README.rdoc +23 -0
- data/Rakefile +37 -0
- data/lib/ncsa-parser.rb +22 -0
- data/lib/ncsa-parser/helper.rb +20 -0
- data/lib/ncsa-parser/log.rb +41 -0
- data/lib/ncsa-parser/parsed_line.rb +152 -0
- data/lib/ncsa-parser/parser.rb +111 -0
- data/lib/ncsa-parser/version.rb +5 -0
- data/ncsa-parser.gemspec +27 -0
- data/test/ncsa_parser_tests.rb +175 -0
- data/test/resources/access_log +7 -0
- data/test/test_helper.rb +25 -0
- metadata +127 -0
data/.gitignore
ADDED
data/Gemfile
ADDED
data/MIT-LICENSE
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
Copyright (c) 2012 J Smith <dark.panda@#gmail.com>
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
4
|
+
a copy of this software and associated documentation files (the
|
5
|
+
"Software"), to deal in the Software without restriction, including
|
6
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
7
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
+
permit persons to whom the Software is furnished to do so, subject to
|
9
|
+
the following conditions:
|
10
|
+
|
11
|
+
The above copyright notice and this permission notice shall be
|
12
|
+
included in all copies or substantial portions of the Software.
|
13
|
+
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
17
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
18
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
19
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
20
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.rdoc
ADDED
@@ -0,0 +1,23 @@
|
|
1
|
+
|
2
|
+
= NCSA Parser
|
3
|
+
|
4
|
+
Here's a quick little library for reading NCSA-style web server logs. Quick
|
5
|
+
usage:
|
6
|
+
|
7
|
+
NCSAParser.each_line(File.open('/var/log/httpd/access_log'), :pattern => %w{
|
8
|
+
host ident username datetime request
|
9
|
+
status bytes referer ua
|
10
|
+
outstream instream ratio
|
11
|
+
}) do |b|
|
12
|
+
puts b.inspect
|
13
|
+
end
|
14
|
+
|
15
|
+
parser = NCSAParser::Parser.new
|
16
|
+
parsed = parser.parse_line('...')
|
17
|
+
|
18
|
+
There are more examples available in the tests.
|
19
|
+
|
20
|
+
== License
|
21
|
+
|
22
|
+
This gem is licensed under an MIT-style license. See the +MIT-LICENSE+ file for
|
23
|
+
details.
|
data/Rakefile
ADDED
@@ -0,0 +1,37 @@
|
|
1
|
+
# -*- ruby -*-
|
2
|
+
|
3
|
+
require 'rubygems'
|
4
|
+
|
5
|
+
gem 'rdoc', '~> 3.12'
|
6
|
+
|
7
|
+
require 'rubygems/package_task'
|
8
|
+
require 'rake/testtask'
|
9
|
+
require 'rdoc/task'
|
10
|
+
require 'bundler/gem_tasks'
|
11
|
+
|
12
|
+
if RUBY_VERSION >= '1.9'
|
13
|
+
begin
|
14
|
+
gem 'psych'
|
15
|
+
rescue Exception => e
|
16
|
+
# it's okay, fall back on the bundled psych
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
$:.push 'lib'
|
21
|
+
|
22
|
+
version = NCSAParser::VERSION
|
23
|
+
|
24
|
+
desc 'Test NCSA parser library'
|
25
|
+
Rake::TestTask.new(:test) do |t|
|
26
|
+
t.test_files = FileList['test/**/*_tests.rb']
|
27
|
+
t.verbose = !!ENV['VERBOSE_TESTS']
|
28
|
+
t.warning = !!ENV['WARNINGS']
|
29
|
+
end
|
30
|
+
|
31
|
+
desc 'Build docs'
|
32
|
+
Rake::RDocTask.new do |t|
|
33
|
+
t.title = "NCSA Parser #{version}"
|
34
|
+
t.main = 'README.rdoc'
|
35
|
+
t.rdoc_dir = 'doc'
|
36
|
+
t.rdoc_files.include('README.rdoc', 'MIT-LICENSE', 'lib/**/*.rb')
|
37
|
+
end
|
data/lib/ncsa-parser.rb
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
|
2
|
+
require 'ncsa-parser/version'
|
3
|
+
require 'ncsa-parser/parser'
|
4
|
+
require 'ncsa-parser/helper'
|
5
|
+
require 'ncsa-parser/parsed_line'
|
6
|
+
require 'ncsa-parser/log'
|
7
|
+
|
8
|
+
module NCSAParser
|
9
|
+
class << self
|
10
|
+
# Opens a log file and iterates through the lines.
|
11
|
+
def each_line(log, options = {}, &block)
|
12
|
+
self.open(log, options).each(&block)
|
13
|
+
end
|
14
|
+
alias :foreach :each_line
|
15
|
+
|
16
|
+
# Opens a log file for parsing. This is a convenience method that proxies
|
17
|
+
# to NCSAParser::Log.open.
|
18
|
+
def open(log, options = {})
|
19
|
+
Log.open(log, options)
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
|
2
|
+
module NCSAParser
|
3
|
+
module Helper
|
4
|
+
def self.clean_uri(uri)
|
5
|
+
uri.
|
6
|
+
gsub(/ /, '+').
|
7
|
+
gsub(/\\"/, '%22').
|
8
|
+
gsub(/,/, '%2C')
|
9
|
+
end
|
10
|
+
|
11
|
+
def self.deep_symbolize_keys(hash)
|
12
|
+
hash.inject({}) do |memo, (key, value)|
|
13
|
+
key = key.to_sym if key.respond_to?(:to_sym) rescue :nil
|
14
|
+
value = NCSAHelper.deep_symbolize_keys(value) if value.is_a?(Hash)
|
15
|
+
memo[key] = value
|
16
|
+
memo
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
@@ -0,0 +1,41 @@
|
|
1
|
+
|
2
|
+
module NCSAParser
|
3
|
+
class Log
|
4
|
+
include Enumerable
|
5
|
+
|
6
|
+
attr_reader :log, :parser
|
7
|
+
|
8
|
+
def initialize(log, options = {})
|
9
|
+
@log = log
|
10
|
+
@parser = Parser.new(options)
|
11
|
+
end
|
12
|
+
|
13
|
+
def self.open(file, options = {})
|
14
|
+
file = if file.is_a?(String)
|
15
|
+
File.open(file)
|
16
|
+
else
|
17
|
+
file
|
18
|
+
end
|
19
|
+
|
20
|
+
self.new(file, options)
|
21
|
+
end
|
22
|
+
|
23
|
+
def each
|
24
|
+
if block_given?
|
25
|
+
self.log.each do |l|
|
26
|
+
yield self.parser.parse_line(l)
|
27
|
+
end
|
28
|
+
else
|
29
|
+
self.log.collect do |l|
|
30
|
+
self.parser.parse_line(l)
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
def next_line
|
36
|
+
self.parser.parse_line(self.log.gets).tap { |parsed|
|
37
|
+
yield parsed if block_given?
|
38
|
+
}
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
@@ -0,0 +1,152 @@
|
|
1
|
+
|
2
|
+
module NCSAParser
|
3
|
+
# NCSAParser::ParsedLine handles some token conversions and the like on the
|
4
|
+
# fly after a successful line parse. You can add your own token conversions
|
5
|
+
# or override existing ones by passing along a +:token_conversions+ option
|
6
|
+
# that contains converters in the same manner as those found in
|
7
|
+
# NCSAParser::ParsedLine::TOKEN_CONVERSIONS.
|
8
|
+
#
|
9
|
+
# To access a parsed value without any sort of token conversion, use the
|
10
|
+
# +attributes+ method. The +[]+ method will perform the token conversion
|
11
|
+
# on the fly for you.
|
12
|
+
#
|
13
|
+
# For token converters that handle URIs, the Symbol :bad_uri will be returned
|
14
|
+
# if the URI parser fails for whatever reason.
|
15
|
+
class ParsedLine
|
16
|
+
TOKEN_CONVERSIONS = {
|
17
|
+
:datetime => proc { |match, options|
|
18
|
+
DateTime.strptime(match.attributes[:datetime], options[:datetime_format])
|
19
|
+
},
|
20
|
+
|
21
|
+
:request_uri => proc { |match, options|
|
22
|
+
begin
|
23
|
+
request = match.attributes[:request].scan(/^"[A-Z]+ (.+) HTTP\/\d+\.\d+"$/).flatten[0]
|
24
|
+
URI.parse("http://#{options[:domain]}#{request}")
|
25
|
+
rescue
|
26
|
+
:bad_uri
|
27
|
+
end if match.attributes[:request]
|
28
|
+
},
|
29
|
+
|
30
|
+
:request_path => proc { |match, options|
|
31
|
+
match.attributes[:request].scan(/^"[A-Z]+ ([^?]+)/).flatten[0] rescue nil if match.attributes[:request]
|
32
|
+
},
|
33
|
+
|
34
|
+
:http_method => proc { |match, options|
|
35
|
+
match.attributes[:request].scan(/^"([A-Z]+)/).flatten[0] rescue nil if match.attributes[:request]
|
36
|
+
},
|
37
|
+
|
38
|
+
:http_version => proc { |match, options|
|
39
|
+
match.attributes[:request].scan(/(\d+\.\d+)"$/).flatten[0] rescue nil if match.attributes[:request]
|
40
|
+
},
|
41
|
+
|
42
|
+
:query_string => proc { |match, options|
|
43
|
+
if match[:request_uri]
|
44
|
+
if match[:request_uri] && match[:request_uri].query
|
45
|
+
CGI.parse(match[:request_uri].query)
|
46
|
+
else
|
47
|
+
Hash.new
|
48
|
+
end
|
49
|
+
end
|
50
|
+
},
|
51
|
+
|
52
|
+
:referer_uri => proc { |match, options|
|
53
|
+
if match[:referer]
|
54
|
+
if match[:referer] != '"-"'
|
55
|
+
referer = match[:referer].sub(/^"(.+)"$/, '\1')
|
56
|
+
NCSAParser::Helper.clean_uri(referer)
|
57
|
+
|
58
|
+
begin
|
59
|
+
URI.parse(referer)
|
60
|
+
rescue
|
61
|
+
:bad_uri
|
62
|
+
end
|
63
|
+
else
|
64
|
+
'-'
|
65
|
+
end
|
66
|
+
end
|
67
|
+
},
|
68
|
+
|
69
|
+
:browscap => proc { |match, options|
|
70
|
+
options[:browscap].query(match[:ua].sub(/^"(.+)"$/, '\1')) if options[:browscap]
|
71
|
+
},
|
72
|
+
|
73
|
+
:ratio => proc { |match, options|
|
74
|
+
match.attributes[:ratio].to_f / 100 rescue nil if match.attributes[:ratio]
|
75
|
+
},
|
76
|
+
|
77
|
+
:host => proc { |match, options|
|
78
|
+
if match.attributes[:host]
|
79
|
+
match.attributes[:host]
|
80
|
+
elsif match.attributes[:host_proxy]
|
81
|
+
match.attributes[:host_proxy].split(',')[0].strip
|
82
|
+
end
|
83
|
+
}
|
84
|
+
}
|
85
|
+
|
86
|
+
%w{ status instream outstream bytes }.each do |field|
|
87
|
+
class_eval(<<-EOF, __FILE__, __LINE__ + 1)
|
88
|
+
TOKEN_CONVERSIONS[:#{field}] = proc { |match, options|
|
89
|
+
match.attributes[:#{field}].to_i rescue nil if match.attributes[:#{field}]
|
90
|
+
}
|
91
|
+
EOF
|
92
|
+
end
|
93
|
+
|
94
|
+
attr_reader :attributes
|
95
|
+
|
96
|
+
def initialize(attributes, options = {})
|
97
|
+
@attributes, @options = attributes, options
|
98
|
+
@parsed_attributes = {}
|
99
|
+
|
100
|
+
if options[:browscap] && !options[:browscap].respond_to?(:query)
|
101
|
+
raise ArgumentError.new("The :browscap object should respond to the #query method.")
|
102
|
+
end
|
103
|
+
end
|
104
|
+
|
105
|
+
# Accesses either an attribute or an attribute that has been passed
|
106
|
+
# through a token converter. You can access the raw, unconverted attributes
|
107
|
+
# via the +attributes+ method. If a converter fails for whatever reason,
|
108
|
+
# a value of +:bad_conversion+ is returned.
|
109
|
+
def [](key)
|
110
|
+
key = key.to_sym unless key.is_a?(Symbol)
|
111
|
+
|
112
|
+
if @parsed_attributes.has_key?(key)
|
113
|
+
@parsed_attributes[key]
|
114
|
+
elsif @options[:token_conversions] && @options[:token_conversions][key]
|
115
|
+
@parsed_attributes[key] = @options[:token_conversions][key].call(self, @options)
|
116
|
+
elsif TOKEN_CONVERSIONS[key]
|
117
|
+
@parsed_attributes[key] = (TOKEN_CONVERSIONS[key].call(self, @options) rescue :bad_conversion)
|
118
|
+
else
|
119
|
+
@attributes[key]
|
120
|
+
end
|
121
|
+
end
|
122
|
+
|
123
|
+
# Gathers up the requested attributes and spits them out into a Hash.
|
124
|
+
# The +values+ argument determines what gets inserted into the Hash:
|
125
|
+
#
|
126
|
+
# * +:all+ - both attributes and parsed attributes. In cases where
|
127
|
+
# the values share the same names, the parsed attribute wins out.
|
128
|
+
# * +:attributes+ - unparsed attributes only.
|
129
|
+
# * +:parsed+ - parsed attributes only.
|
130
|
+
#
|
131
|
+
# The default value is +:all+. Any +nil+ values are automatically stripped
|
132
|
+
# from the Hash.
|
133
|
+
def to_hash(values = :all)
|
134
|
+
retval = {}
|
135
|
+
|
136
|
+
if values == :all || values == :attributes
|
137
|
+
retval.merge!(@attributes)
|
138
|
+
end
|
139
|
+
|
140
|
+
if values == :all || values == :parsed
|
141
|
+
TOKEN_CONVERSIONS.each { |t, v| self[t] }
|
142
|
+
retval.merge!(@parsed_attributes)
|
143
|
+
end
|
144
|
+
|
145
|
+
retval.reject! { |k, v|
|
146
|
+
v.nil?
|
147
|
+
}
|
148
|
+
|
149
|
+
retval
|
150
|
+
end
|
151
|
+
end
|
152
|
+
end
|
@@ -0,0 +1,111 @@
|
|
1
|
+
|
2
|
+
require 'uri'
|
3
|
+
require 'date'
|
4
|
+
require 'cgi'
|
5
|
+
|
6
|
+
module NCSAParser
|
7
|
+
class BadLogLine < Exception
|
8
|
+
def initialize(line, pattern)
|
9
|
+
super("Bad log line. Pattern: |#{pattern.join(' ')}| Line: |#{line}|")
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
# A line parser for a log file. Lines are parsed via Regexps. You can
|
14
|
+
# inject new tokens or override existing ones by modifying the passing along
|
15
|
+
# a +:tokens+ option and adding the keys to the +:pattern+ option
|
16
|
+
# accordingly.
|
17
|
+
class Parser
|
18
|
+
IP_ADDRESS = '\d+\.\d+\.\d+\.\d+|unknown'
|
19
|
+
|
20
|
+
TOKENS = {
|
21
|
+
:host => "(?:#{IP_ADDRESS}|-|::1)",
|
22
|
+
:host_proxy => "(?:#{IP_ADDRESS})(?:,\\s+#{IP_ADDRESS})*|-",
|
23
|
+
:ident => '[^\s]+',
|
24
|
+
:username => '[^\s]+',
|
25
|
+
:datetime => '\[[^\]]+\]',
|
26
|
+
:request => '".+"',
|
27
|
+
:status => '\d+',
|
28
|
+
:bytes => '\d+|-',
|
29
|
+
:referer => '".*"',
|
30
|
+
:ua => '".*"',
|
31
|
+
:usertrack => "(?:#{IP_ADDRESS})[^ ]+|-",
|
32
|
+
:outstream => '\d+|-',
|
33
|
+
:instream => '\d+|-',
|
34
|
+
:ratio => '\d+%|-%'
|
35
|
+
}
|
36
|
+
|
37
|
+
LOG_FORMAT_COMMON = %w{
|
38
|
+
host ident username datetime request status bytes
|
39
|
+
}
|
40
|
+
|
41
|
+
LOG_FORMAT_COMBINED = %w{
|
42
|
+
host ident username datetime request status bytes referer ua
|
43
|
+
}
|
44
|
+
|
45
|
+
attr_reader :pattern, :matcher, :re
|
46
|
+
|
47
|
+
# Creates a new Parser object.
|
48
|
+
#
|
49
|
+
# == Options
|
50
|
+
#
|
51
|
+
# * +:domain+ - when parsing query strings, use this domain as the URL's
|
52
|
+
# domain. The default is +"www.example.com"+.
|
53
|
+
# * +:datetime_format+ - sets the datetime format for when tokens are
|
54
|
+
# converted in NCSAParser::ParsedLine. The default is +"[%d/%b/%Y:%H:%M:%S %Z]"+.
|
55
|
+
# * +:pattern+ - the default log line format to use. The default is
|
56
|
+
# +LOG_FORMAT_COMBINED+, which matches the "combined" log format in
|
57
|
+
# Apache. The value for +:pattern+ can be either a space-delimited
|
58
|
+
# String of token names or an Array of token names.
|
59
|
+
# * +:browscap+ - a browser capabilities object to use when sniffing out
|
60
|
+
# user agents. This object should be able to respond to the +query+
|
61
|
+
# method. Several browscap extensions are available for Ruby, and the
|
62
|
+
# the author of this extension's version is called Browscapper and is
|
63
|
+
# available at https://github.com/dark-panda/browscapper .
|
64
|
+
# * +:token_conversions+ - converters to pass along to the line parser.
|
65
|
+
# See NCSAParser::ParsedLine for details.
|
66
|
+
# * +:tokens+ - tokens to add to the generated Regexp.
|
67
|
+
def initialize(options = {})
|
68
|
+
options = {
|
69
|
+
:domain => 'www.example.com',
|
70
|
+
:datetime_format => '[%d/%b/%Y:%H:%M:%S %Z]',
|
71
|
+
:pattern => LOG_FORMAT_COMBINED
|
72
|
+
}.merge(options)
|
73
|
+
|
74
|
+
@options = options
|
75
|
+
@pattern = if options[:pattern].is_a?(Array)
|
76
|
+
options[:pattern]
|
77
|
+
else
|
78
|
+
options[:pattern].to_s.split(/\s+/)
|
79
|
+
end
|
80
|
+
|
81
|
+
@re = '^' + @pattern.collect { |tk|
|
82
|
+
tk = tk.to_sym
|
83
|
+
token = if options[:tokens] && options[:tokens][tk]
|
84
|
+
options[:tokens][tk]
|
85
|
+
elsif TOKENS[tk]
|
86
|
+
TOKENS[tk]
|
87
|
+
else
|
88
|
+
raise ArgumentError.new("Token :#{tk} not found!")
|
89
|
+
end
|
90
|
+
|
91
|
+
"(#{token})"
|
92
|
+
}.join(' ') + '$'
|
93
|
+
@matcher = Regexp.new(@re)
|
94
|
+
end
|
95
|
+
|
96
|
+
# Parses a single line and returns an NCSAParser::ParsedLine object.
|
97
|
+
def parse_line(line)
|
98
|
+
match = Hash.new
|
99
|
+
if md = @matcher.match(line)
|
100
|
+
@pattern.each_with_index do |k, j|
|
101
|
+
match[k.to_sym] = md[j + 1]
|
102
|
+
end
|
103
|
+
match[:original] = line.strip
|
104
|
+
else
|
105
|
+
raise BadLogLine.new(line, @options[:pattern])
|
106
|
+
end
|
107
|
+
ParsedLine.new(match, @options)
|
108
|
+
end
|
109
|
+
alias :parse :parse_line
|
110
|
+
end
|
111
|
+
end
|
data/ncsa-parser.gemspec
ADDED
@@ -0,0 +1,27 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
|
3
|
+
require File.expand_path('../lib/ncsa-parser/version', __FILE__)
|
4
|
+
|
5
|
+
Gem::Specification.new do |s|
|
6
|
+
s.name = "ncsa-parser"
|
7
|
+
s.version = NCSAParser::VERSION
|
8
|
+
|
9
|
+
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
10
|
+
s.authors = ["J Smith"]
|
11
|
+
s.description = "A simple NCSA-style log file parser."
|
12
|
+
s.summary = s.description
|
13
|
+
s.email = "dark.panda@gmail.com"
|
14
|
+
s.extra_rdoc_files = [
|
15
|
+
"README.rdoc"
|
16
|
+
]
|
17
|
+
s.files = `git ls-files`.split($\)
|
18
|
+
s.executables = s.files.grep(%r{^bin/}).map { |f| File.basename(f) }
|
19
|
+
s.test_files = s.files.grep(%r{^(test|spec|features)/})
|
20
|
+
s.homepage = "http://github.com/dark-panda/ncsa-parser"
|
21
|
+
s.require_paths = ["lib"]
|
22
|
+
|
23
|
+
s.add_dependency("rdoc")
|
24
|
+
s.add_dependency("rake", ["~> 0.9"])
|
25
|
+
s.add_dependency("minitest")
|
26
|
+
s.add_dependency("turn")
|
27
|
+
end
|
@@ -0,0 +1,175 @@
|
|
1
|
+
|
2
|
+
$: << File.dirname(__FILE__)
|
3
|
+
require 'test_helper'
|
4
|
+
|
5
|
+
class NCSAParserTests < MiniTest::Unit::TestCase
|
6
|
+
include TestHelper
|
7
|
+
|
8
|
+
def test_format_default
|
9
|
+
parser = NCSAParser::Parser.new
|
10
|
+
parsed = parser.parse_line(LOG_COMBINED)
|
11
|
+
|
12
|
+
assert_equal({
|
13
|
+
:host => %{123.123.123.123},
|
14
|
+
:ident => %{-},
|
15
|
+
:username => %{-},
|
16
|
+
:datetime => %{[08/Oct/2012:14:36:07 -0400]},
|
17
|
+
:request => %{"GET /path/to/something?foo=bar&hello=world HTTP/1.1"},
|
18
|
+
:status => %{200},
|
19
|
+
:bytes => %{923},
|
20
|
+
:referer => %{"http://www.example.com/referer"},
|
21
|
+
:ua => %{"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.4 (KHTML, like Gecko) Chrome/22.0.1229.79 Safari/537.4"},
|
22
|
+
:original => LOG_COMBINED,
|
23
|
+
}, parsed.attributes)
|
24
|
+
end
|
25
|
+
|
26
|
+
def test_format_common
|
27
|
+
parser = NCSAParser::Parser.new(:pattern => NCSAParser::Parser::LOG_FORMAT_COMMON)
|
28
|
+
|
29
|
+
parsed = parser.parse_line(LOG_COMMON)
|
30
|
+
assert_equal({
|
31
|
+
:host => %{123.123.123.123},
|
32
|
+
:ident => %{-},
|
33
|
+
:username => %{-},
|
34
|
+
:datetime => %{[08/Oct/2012:14:36:07 -0400]},
|
35
|
+
:request => %{"GET /path/to/something?foo=bar&hello=world HTTP/1.1"},
|
36
|
+
:status => %{200},
|
37
|
+
:bytes => %{923},
|
38
|
+
:original => LOG_COMMON,
|
39
|
+
}, parsed.attributes)
|
40
|
+
end
|
41
|
+
|
42
|
+
def test_format_usertrack
|
43
|
+
parser = NCSAParser::Parser.new(:pattern => %w{
|
44
|
+
host ident username datetime request status bytes referer ua usertrack
|
45
|
+
})
|
46
|
+
|
47
|
+
parsed = parser.parse_line(LOG_USERTRACK)
|
48
|
+
|
49
|
+
assert_equal({
|
50
|
+
:host => %{123.123.123.123},
|
51
|
+
:ident => %{-},
|
52
|
+
:username => %{-},
|
53
|
+
:datetime => %{[08/Oct/2012:14:36:07 -0400]},
|
54
|
+
:request => %{"GET /path/to/something?foo=bar&hello=world HTTP/1.1"},
|
55
|
+
:status => %{200},
|
56
|
+
:bytes => %{923},
|
57
|
+
:referer => %{"http://www.example.com/referer"},
|
58
|
+
:ua => %{"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.4 (KHTML, like Gecko) Chrome/22.0.1229.79 Safari/537.4"},
|
59
|
+
:usertrack => %{123.123.123.123.1349718542489266},
|
60
|
+
:original => LOG_USERTRACK
|
61
|
+
}, parsed.attributes)
|
62
|
+
end
|
63
|
+
|
64
|
+
def test_format_deflate
|
65
|
+
parser = NCSAParser::Parser.new(:pattern => %w{
|
66
|
+
host ident username datetime request status bytes referer ua instream outstream ratio
|
67
|
+
})
|
68
|
+
|
69
|
+
parsed = parser.parse_line(LOG_DEFLATE)
|
70
|
+
|
71
|
+
assert_equal({
|
72
|
+
:host => %{123.123.123.123},
|
73
|
+
:ident => %{-},
|
74
|
+
:username => %{-},
|
75
|
+
:datetime => %{[08/Oct/2012:14:36:07 -0400]},
|
76
|
+
:request => %{"GET /path/to/something?foo=bar&hello=world HTTP/1.1"},
|
77
|
+
:status => %{200},
|
78
|
+
:bytes => %{923},
|
79
|
+
:referer => %{"http://www.example.com/referer"},
|
80
|
+
:ua => %{"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.4 (KHTML, like Gecko) Chrome/22.0.1229.79 Safari/537.4"},
|
81
|
+
:instream => %{905},
|
82
|
+
:outstream => %{1976},
|
83
|
+
:ratio => %{45%},
|
84
|
+
:original => LOG_DEFLATE
|
85
|
+
}, parsed.attributes)
|
86
|
+
end
|
87
|
+
|
88
|
+
def test_format_bad
|
89
|
+
parser = NCSAParser::Parser.new
|
90
|
+
|
91
|
+
assert_raises(NCSAParser::BadLogLine) do
|
92
|
+
parser.parse_line('what happen')
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
96
|
+
def test_open_file
|
97
|
+
log = NCSAParser.open('./test/resources/access_log', :pattern => %w{
|
98
|
+
host ident username datetime request status bytes referer ua usertrack instream outstream ratio
|
99
|
+
})
|
100
|
+
|
101
|
+
expect = {
|
102
|
+
:host => %{123.123.123.123},
|
103
|
+
:ident => %{-},
|
104
|
+
:username => %{-},
|
105
|
+
:datetime => %{[08/Oct/2012:14:36:07 -0400]},
|
106
|
+
:request => %{"GET /path/to/something?foo=bar&hello=world HTTP/1.1"},
|
107
|
+
:status => %{200},
|
108
|
+
:bytes => %{923},
|
109
|
+
:referer => %{"http://www.example.com/referer"},
|
110
|
+
:ua => %{"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.4 (KHTML, like Gecko) Chrome/22.0.1229.79 Safari/537.4"},
|
111
|
+
:usertrack => %{123.123.123.123.1349718542489266},
|
112
|
+
:instream => %{905},
|
113
|
+
:outstream => %{1976},
|
114
|
+
:ratio => %{45%},
|
115
|
+
:original => %{123.123.123.123 - - [08/Oct/2012:14:36:07 -0400] "GET /path/to/something?foo=bar&hello=world HTTP/1.1" 200 923 "http://www.example.com/referer" "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.4 (KHTML, like Gecko) Chrome/22.0.1229.79 Safari/537.4" 123.123.123.123.1349718542489266 905 1976 45%}
|
116
|
+
}
|
117
|
+
|
118
|
+
log.each do |parsed|
|
119
|
+
assert_equal(expect, parsed.attributes)
|
120
|
+
end
|
121
|
+
end
|
122
|
+
|
123
|
+
def test_token_conversions
|
124
|
+
line = %{[08/Oct/2012:14:36:07 -0400] "GET /path/to/something?foo=bar&hello=world HTTP/1.1" "http://www.example.com/referer" 1000 100 10% 123.123.123.123 200 110}
|
125
|
+
|
126
|
+
parser = NCSAParser::Parser.new(:pattern => %w{
|
127
|
+
datetime request referer instream outstream ratio host_proxy status bytes
|
128
|
+
})
|
129
|
+
|
130
|
+
parsed = parser.parse_line(line)
|
131
|
+
|
132
|
+
assert_equal(DateTime.strptime('[08/Oct/2012:14:36:07 -0400]', '[%d/%b/%Y:%H:%M:%S %Z]'), parsed[:datetime])
|
133
|
+
assert_equal(URI.parse('http://www.example.com/path/to/something?foo=bar&hello=world'), parsed[:request_uri])
|
134
|
+
assert_equal('/path/to/something', parsed[:request_path])
|
135
|
+
assert_equal('GET', parsed[:http_method])
|
136
|
+
assert_equal('1.1', parsed[:http_version])
|
137
|
+
assert_equal(URI.parse('http://www.example.com/referer'), parsed[:referer_uri])
|
138
|
+
assert_equal(0.1, parsed[:ratio])
|
139
|
+
assert_equal('123.123.123.123', parsed[:host])
|
140
|
+
assert_equal(200, parsed[:status])
|
141
|
+
assert_equal(1000, parsed[:instream])
|
142
|
+
assert_equal(100, parsed[:outstream])
|
143
|
+
assert_equal(110, parsed[:bytes])
|
144
|
+
end
|
145
|
+
|
146
|
+
def test_custom_tokens_and_conversion
|
147
|
+
parser = NCSAParser::Parser.new(
|
148
|
+
:pattern => 'email',
|
149
|
+
|
150
|
+
:tokens => {
|
151
|
+
:email => '[^@]+@[^@]+'
|
152
|
+
},
|
153
|
+
|
154
|
+
:token_conversions => {
|
155
|
+
:email => proc { |match, options|
|
156
|
+
URI.parse(match.attributes[:email])
|
157
|
+
}
|
158
|
+
}
|
159
|
+
)
|
160
|
+
|
161
|
+
parsed = parser.parse_line('test@example.com')
|
162
|
+
assert_equal(URI.parse('test@example.com'), parsed[:email])
|
163
|
+
end
|
164
|
+
|
165
|
+
def test_to_hash
|
166
|
+
parser = NCSAParser::Parser.new(:pattern => NCSAParser::Parser::LOG_FORMAT_COMMON)
|
167
|
+
parsed = parser.parse_line(LOG_COMMON)
|
168
|
+
|
169
|
+
assert_equal([
|
170
|
+
:host, :ident, :username, :datetime, :request, :status, :bytes,
|
171
|
+
:original, :request_uri, :request_path, :http_method,
|
172
|
+
:http_version, :query_string
|
173
|
+
].sort_by(&:to_s), parsed.to_hash.keys.sort_by(&:to_s))
|
174
|
+
end
|
175
|
+
end
|
@@ -0,0 +1,7 @@
|
|
1
|
+
123.123.123.123 - - [08/Oct/2012:14:36:07 -0400] "GET /path/to/something?foo=bar&hello=world HTTP/1.1" 200 923 "http://www.example.com/referer" "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.4 (KHTML, like Gecko) Chrome/22.0.1229.79 Safari/537.4" 123.123.123.123.1349718542489266 905 1976 45%
|
2
|
+
123.123.123.123 - - [08/Oct/2012:14:36:07 -0400] "GET /path/to/something?foo=bar&hello=world HTTP/1.1" 200 923 "http://www.example.com/referer" "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.4 (KHTML, like Gecko) Chrome/22.0.1229.79 Safari/537.4" 123.123.123.123.1349718542489266 905 1976 45%
|
3
|
+
123.123.123.123 - - [08/Oct/2012:14:36:07 -0400] "GET /path/to/something?foo=bar&hello=world HTTP/1.1" 200 923 "http://www.example.com/referer" "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.4 (KHTML, like Gecko) Chrome/22.0.1229.79 Safari/537.4" 123.123.123.123.1349718542489266 905 1976 45%
|
4
|
+
123.123.123.123 - - [08/Oct/2012:14:36:07 -0400] "GET /path/to/something?foo=bar&hello=world HTTP/1.1" 200 923 "http://www.example.com/referer" "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.4 (KHTML, like Gecko) Chrome/22.0.1229.79 Safari/537.4" 123.123.123.123.1349718542489266 905 1976 45%
|
5
|
+
123.123.123.123 - - [08/Oct/2012:14:36:07 -0400] "GET /path/to/something?foo=bar&hello=world HTTP/1.1" 200 923 "http://www.example.com/referer" "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.4 (KHTML, like Gecko) Chrome/22.0.1229.79 Safari/537.4" 123.123.123.123.1349718542489266 905 1976 45%
|
6
|
+
123.123.123.123 - - [08/Oct/2012:14:36:07 -0400] "GET /path/to/something?foo=bar&hello=world HTTP/1.1" 200 923 "http://www.example.com/referer" "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.4 (KHTML, like Gecko) Chrome/22.0.1229.79 Safari/537.4" 123.123.123.123.1349718542489266 905 1976 45%
|
7
|
+
123.123.123.123 - - [08/Oct/2012:14:36:07 -0400] "GET /path/to/something?foo=bar&hello=world HTTP/1.1" 200 923 "http://www.example.com/referer" "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.4 (KHTML, like Gecko) Chrome/22.0.1229.79 Safari/537.4" 123.123.123.123.1349718542489266 905 1976 45%
|
data/test/test_helper.rb
ADDED
@@ -0,0 +1,25 @@
|
|
1
|
+
|
2
|
+
require 'rubygems'
|
3
|
+
require 'minitest/autorun'
|
4
|
+
require 'turn/autorun'
|
5
|
+
require File.join(File.dirname(__FILE__), %w{ .. lib ncsa-parser })
|
6
|
+
|
7
|
+
puts "NCSAParser version #{NCSAParser::VERSION}"
|
8
|
+
|
9
|
+
module TestHelper
|
10
|
+
LOG_COMMON = %{123.123.123.123 - - [08/Oct/2012:14:36:07 -0400] "GET /path/to/something?foo=bar&hello=world HTTP/1.1" 200 923}
|
11
|
+
|
12
|
+
LOG_COMBINED = %{123.123.123.123 - - [08/Oct/2012:14:36:07 -0400] "GET /path/to/something?foo=bar&hello=world HTTP/1.1" 200 923 "http://www.example.com/referer" "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.4 (KHTML, like Gecko) Chrome/22.0.1229.79 Safari/537.4"}
|
13
|
+
|
14
|
+
LOG_USERTRACK = %{123.123.123.123 - - [08/Oct/2012:14:36:07 -0400] "GET /path/to/something?foo=bar&hello=world HTTP/1.1" 200 923 "http://www.example.com/referer" "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.4 (KHTML, like Gecko) Chrome/22.0.1229.79 Safari/537.4" 123.123.123.123.1349718542489266}
|
15
|
+
|
16
|
+
LOG_DEFLATE = %{123.123.123.123 - - [08/Oct/2012:14:36:07 -0400] "GET /path/to/something?foo=bar&hello=world HTTP/1.1" 200 923 "http://www.example.com/referer" "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.4 (KHTML, like Gecko) Chrome/22.0.1229.79 Safari/537.4" 905 1976 45%}
|
17
|
+
end
|
18
|
+
|
19
|
+
if ENV['autotest']
|
20
|
+
module Turn::Colorize
|
21
|
+
def self.color_supported?
|
22
|
+
true
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
metadata
ADDED
@@ -0,0 +1,127 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: ncsa-parser
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.2
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- J Smith
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2012-10-08 00:00:00.000000000 Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: rdoc
|
16
|
+
requirement: !ruby/object:Gem::Requirement
|
17
|
+
none: false
|
18
|
+
requirements:
|
19
|
+
- - ! '>='
|
20
|
+
- !ruby/object:Gem::Version
|
21
|
+
version: '0'
|
22
|
+
type: :runtime
|
23
|
+
prerelease: false
|
24
|
+
version_requirements: !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
26
|
+
requirements:
|
27
|
+
- - ! '>='
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
version: '0'
|
30
|
+
- !ruby/object:Gem::Dependency
|
31
|
+
name: rake
|
32
|
+
requirement: !ruby/object:Gem::Requirement
|
33
|
+
none: false
|
34
|
+
requirements:
|
35
|
+
- - ~>
|
36
|
+
- !ruby/object:Gem::Version
|
37
|
+
version: '0.9'
|
38
|
+
type: :runtime
|
39
|
+
prerelease: false
|
40
|
+
version_requirements: !ruby/object:Gem::Requirement
|
41
|
+
none: false
|
42
|
+
requirements:
|
43
|
+
- - ~>
|
44
|
+
- !ruby/object:Gem::Version
|
45
|
+
version: '0.9'
|
46
|
+
- !ruby/object:Gem::Dependency
|
47
|
+
name: minitest
|
48
|
+
requirement: !ruby/object:Gem::Requirement
|
49
|
+
none: false
|
50
|
+
requirements:
|
51
|
+
- - ! '>='
|
52
|
+
- !ruby/object:Gem::Version
|
53
|
+
version: '0'
|
54
|
+
type: :runtime
|
55
|
+
prerelease: false
|
56
|
+
version_requirements: !ruby/object:Gem::Requirement
|
57
|
+
none: false
|
58
|
+
requirements:
|
59
|
+
- - ! '>='
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
62
|
+
- !ruby/object:Gem::Dependency
|
63
|
+
name: turn
|
64
|
+
requirement: !ruby/object:Gem::Requirement
|
65
|
+
none: false
|
66
|
+
requirements:
|
67
|
+
- - ! '>='
|
68
|
+
- !ruby/object:Gem::Version
|
69
|
+
version: '0'
|
70
|
+
type: :runtime
|
71
|
+
prerelease: false
|
72
|
+
version_requirements: !ruby/object:Gem::Requirement
|
73
|
+
none: false
|
74
|
+
requirements:
|
75
|
+
- - ! '>='
|
76
|
+
- !ruby/object:Gem::Version
|
77
|
+
version: '0'
|
78
|
+
description: A simple NCSA-style log file parser.
|
79
|
+
email: dark.panda@gmail.com
|
80
|
+
executables: []
|
81
|
+
extensions: []
|
82
|
+
extra_rdoc_files:
|
83
|
+
- README.rdoc
|
84
|
+
files:
|
85
|
+
- .gitignore
|
86
|
+
- Gemfile
|
87
|
+
- MIT-LICENSE
|
88
|
+
- README.rdoc
|
89
|
+
- Rakefile
|
90
|
+
- lib/ncsa-parser.rb
|
91
|
+
- lib/ncsa-parser/helper.rb
|
92
|
+
- lib/ncsa-parser/log.rb
|
93
|
+
- lib/ncsa-parser/parsed_line.rb
|
94
|
+
- lib/ncsa-parser/parser.rb
|
95
|
+
- lib/ncsa-parser/version.rb
|
96
|
+
- ncsa-parser.gemspec
|
97
|
+
- test/ncsa_parser_tests.rb
|
98
|
+
- test/resources/access_log
|
99
|
+
- test/test_helper.rb
|
100
|
+
homepage: http://github.com/dark-panda/ncsa-parser
|
101
|
+
licenses: []
|
102
|
+
post_install_message:
|
103
|
+
rdoc_options: []
|
104
|
+
require_paths:
|
105
|
+
- lib
|
106
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
107
|
+
none: false
|
108
|
+
requirements:
|
109
|
+
- - ! '>='
|
110
|
+
- !ruby/object:Gem::Version
|
111
|
+
version: '0'
|
112
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
113
|
+
none: false
|
114
|
+
requirements:
|
115
|
+
- - ! '>='
|
116
|
+
- !ruby/object:Gem::Version
|
117
|
+
version: '0'
|
118
|
+
requirements: []
|
119
|
+
rubyforge_project:
|
120
|
+
rubygems_version: 1.8.24
|
121
|
+
signing_key:
|
122
|
+
specification_version: 3
|
123
|
+
summary: A simple NCSA-style log file parser.
|
124
|
+
test_files:
|
125
|
+
- test/ncsa_parser_tests.rb
|
126
|
+
- test/resources/access_log
|
127
|
+
- test/test_helper.rb
|