ncsa-parser 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +12 -0
- data/Gemfile +3 -0
- data/MIT-LICENSE +20 -0
- data/README.rdoc +23 -0
- data/Rakefile +37 -0
- data/lib/ncsa-parser.rb +22 -0
- data/lib/ncsa-parser/helper.rb +20 -0
- data/lib/ncsa-parser/log.rb +41 -0
- data/lib/ncsa-parser/parsed_line.rb +152 -0
- data/lib/ncsa-parser/parser.rb +111 -0
- data/lib/ncsa-parser/version.rb +5 -0
- data/ncsa-parser.gemspec +27 -0
- data/test/ncsa_parser_tests.rb +175 -0
- data/test/resources/access_log +7 -0
- data/test/test_helper.rb +25 -0
- metadata +127 -0
data/.gitignore
ADDED
data/Gemfile
ADDED
data/MIT-LICENSE
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
Copyright (c) 2012 J Smith <dark.panda@#gmail.com>
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
4
|
+
a copy of this software and associated documentation files (the
|
5
|
+
"Software"), to deal in the Software without restriction, including
|
6
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
7
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
+
permit persons to whom the Software is furnished to do so, subject to
|
9
|
+
the following conditions:
|
10
|
+
|
11
|
+
The above copyright notice and this permission notice shall be
|
12
|
+
included in all copies or substantial portions of the Software.
|
13
|
+
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
17
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
18
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
19
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
20
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.rdoc
ADDED
@@ -0,0 +1,23 @@
|
|
1
|
+
|
2
|
+
= NCSA Parser
|
3
|
+
|
4
|
+
Here's a quick little library for reading NCSA-style web server logs. Quick
|
5
|
+
usage:
|
6
|
+
|
7
|
+
NCSAParser.each_line(File.open('/var/log/httpd/access_log'), :pattern => %w{
|
8
|
+
host ident username datetime request
|
9
|
+
status bytes referer ua
|
10
|
+
outstream instream ratio
|
11
|
+
}) do |b|
|
12
|
+
puts b.inspect
|
13
|
+
end
|
14
|
+
|
15
|
+
parser = NCSAParser::Parser.new
|
16
|
+
parsed = parser.parse_line('...')
|
17
|
+
|
18
|
+
There are more examples available in the tests.
|
19
|
+
|
20
|
+
== License
|
21
|
+
|
22
|
+
This gem is licensed under an MIT-style license. See the +MIT-LICENSE+ file for
|
23
|
+
details.
|
data/Rakefile
ADDED
@@ -0,0 +1,37 @@
|
|
1
|
+
# -*- ruby -*-
|
2
|
+
|
3
|
+
require 'rubygems'
|
4
|
+
|
5
|
+
gem 'rdoc', '~> 3.12'
|
6
|
+
|
7
|
+
require 'rubygems/package_task'
|
8
|
+
require 'rake/testtask'
|
9
|
+
require 'rdoc/task'
|
10
|
+
require 'bundler/gem_tasks'
|
11
|
+
|
12
|
+
if RUBY_VERSION >= '1.9'
|
13
|
+
begin
|
14
|
+
gem 'psych'
|
15
|
+
rescue Exception => e
|
16
|
+
# it's okay, fall back on the bundled psych
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
$:.push 'lib'
|
21
|
+
|
22
|
+
version = NCSAParser::VERSION
|
23
|
+
|
24
|
+
desc 'Test NCSA parser library'
|
25
|
+
Rake::TestTask.new(:test) do |t|
|
26
|
+
t.test_files = FileList['test/**/*_tests.rb']
|
27
|
+
t.verbose = !!ENV['VERBOSE_TESTS']
|
28
|
+
t.warning = !!ENV['WARNINGS']
|
29
|
+
end
|
30
|
+
|
31
|
+
desc 'Build docs'
|
32
|
+
Rake::RDocTask.new do |t|
|
33
|
+
t.title = "NCSA Parser #{version}"
|
34
|
+
t.main = 'README.rdoc'
|
35
|
+
t.rdoc_dir = 'doc'
|
36
|
+
t.rdoc_files.include('README.rdoc', 'MIT-LICENSE', 'lib/**/*.rb')
|
37
|
+
end
|
data/lib/ncsa-parser.rb
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
|
2
|
+
require 'ncsa-parser/version'
|
3
|
+
require 'ncsa-parser/parser'
|
4
|
+
require 'ncsa-parser/helper'
|
5
|
+
require 'ncsa-parser/parsed_line'
|
6
|
+
require 'ncsa-parser/log'
|
7
|
+
|
8
|
+
module NCSAParser
|
9
|
+
class << self
|
10
|
+
# Opens a log file and iterates through the lines.
|
11
|
+
def each_line(log, options = {}, &block)
|
12
|
+
self.open(log, options).each(&block)
|
13
|
+
end
|
14
|
+
alias :foreach :each_line
|
15
|
+
|
16
|
+
# Opens a log file for parsing. This is a convenience method that proxies
|
17
|
+
# to NCSAParser::Log.open.
|
18
|
+
def open(log, options = {})
|
19
|
+
Log.open(log, options)
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
|
2
|
+
module NCSAParser
|
3
|
+
module Helper
|
4
|
+
def self.clean_uri(uri)
|
5
|
+
uri.
|
6
|
+
gsub(/ /, '+').
|
7
|
+
gsub(/\\"/, '%22').
|
8
|
+
gsub(/,/, '%2C')
|
9
|
+
end
|
10
|
+
|
11
|
+
def self.deep_symbolize_keys(hash)
|
12
|
+
hash.inject({}) do |memo, (key, value)|
|
13
|
+
key = key.to_sym if key.respond_to?(:to_sym) rescue :nil
|
14
|
+
value = NCSAHelper.deep_symbolize_keys(value) if value.is_a?(Hash)
|
15
|
+
memo[key] = value
|
16
|
+
memo
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
@@ -0,0 +1,41 @@
|
|
1
|
+
|
2
|
+
module NCSAParser
|
3
|
+
class Log
|
4
|
+
include Enumerable
|
5
|
+
|
6
|
+
attr_reader :log, :parser
|
7
|
+
|
8
|
+
def initialize(log, options = {})
|
9
|
+
@log = log
|
10
|
+
@parser = Parser.new(options)
|
11
|
+
end
|
12
|
+
|
13
|
+
def self.open(file, options = {})
|
14
|
+
file = if file.is_a?(String)
|
15
|
+
File.open(file)
|
16
|
+
else
|
17
|
+
file
|
18
|
+
end
|
19
|
+
|
20
|
+
self.new(file, options)
|
21
|
+
end
|
22
|
+
|
23
|
+
def each
|
24
|
+
if block_given?
|
25
|
+
self.log.each do |l|
|
26
|
+
yield self.parser.parse_line(l)
|
27
|
+
end
|
28
|
+
else
|
29
|
+
self.log.collect do |l|
|
30
|
+
self.parser.parse_line(l)
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
def next_line
|
36
|
+
self.parser.parse_line(self.log.gets).tap { |parsed|
|
37
|
+
yield parsed if block_given?
|
38
|
+
}
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
@@ -0,0 +1,152 @@
|
|
1
|
+
|
2
|
+
module NCSAParser
|
3
|
+
# NCSAParser::ParsedLine handles some token conversions and the like on the
|
4
|
+
# fly after a successful line parse. You can add your own token conversions
|
5
|
+
# or override existing ones by passing along a +:token_conversions+ option
|
6
|
+
# that contains converters in the same manner as those found in
|
7
|
+
# NCSAParser::ParsedLine::TOKEN_CONVERSIONS.
|
8
|
+
#
|
9
|
+
# To access a parsed value without any sort of token conversion, use the
|
10
|
+
# +attributes+ method. The +[]+ method will perform the token conversion
|
11
|
+
# on the fly for you.
|
12
|
+
#
|
13
|
+
# For token converters that handle URIs, the Symbol :bad_uri will be returned
|
14
|
+
# if the URI parser fails for whatever reason.
|
15
|
+
class ParsedLine
|
16
|
+
TOKEN_CONVERSIONS = {
|
17
|
+
:datetime => proc { |match, options|
|
18
|
+
DateTime.strptime(match.attributes[:datetime], options[:datetime_format])
|
19
|
+
},
|
20
|
+
|
21
|
+
:request_uri => proc { |match, options|
|
22
|
+
begin
|
23
|
+
request = match.attributes[:request].scan(/^"[A-Z]+ (.+) HTTP\/\d+\.\d+"$/).flatten[0]
|
24
|
+
URI.parse("http://#{options[:domain]}#{request}")
|
25
|
+
rescue
|
26
|
+
:bad_uri
|
27
|
+
end if match.attributes[:request]
|
28
|
+
},
|
29
|
+
|
30
|
+
:request_path => proc { |match, options|
|
31
|
+
match.attributes[:request].scan(/^"[A-Z]+ ([^?]+)/).flatten[0] rescue nil if match.attributes[:request]
|
32
|
+
},
|
33
|
+
|
34
|
+
:http_method => proc { |match, options|
|
35
|
+
match.attributes[:request].scan(/^"([A-Z]+)/).flatten[0] rescue nil if match.attributes[:request]
|
36
|
+
},
|
37
|
+
|
38
|
+
:http_version => proc { |match, options|
|
39
|
+
match.attributes[:request].scan(/(\d+\.\d+)"$/).flatten[0] rescue nil if match.attributes[:request]
|
40
|
+
},
|
41
|
+
|
42
|
+
:query_string => proc { |match, options|
|
43
|
+
if match[:request_uri]
|
44
|
+
if match[:request_uri] && match[:request_uri].query
|
45
|
+
CGI.parse(match[:request_uri].query)
|
46
|
+
else
|
47
|
+
Hash.new
|
48
|
+
end
|
49
|
+
end
|
50
|
+
},
|
51
|
+
|
52
|
+
:referer_uri => proc { |match, options|
|
53
|
+
if match[:referer]
|
54
|
+
if match[:referer] != '"-"'
|
55
|
+
referer = match[:referer].sub(/^"(.+)"$/, '\1')
|
56
|
+
NCSAParser::Helper.clean_uri(referer)
|
57
|
+
|
58
|
+
begin
|
59
|
+
URI.parse(referer)
|
60
|
+
rescue
|
61
|
+
:bad_uri
|
62
|
+
end
|
63
|
+
else
|
64
|
+
'-'
|
65
|
+
end
|
66
|
+
end
|
67
|
+
},
|
68
|
+
|
69
|
+
:browscap => proc { |match, options|
|
70
|
+
options[:browscap].query(match[:ua].sub(/^"(.+)"$/, '\1')) if options[:browscap]
|
71
|
+
},
|
72
|
+
|
73
|
+
:ratio => proc { |match, options|
|
74
|
+
match.attributes[:ratio].to_f / 100 rescue nil if match.attributes[:ratio]
|
75
|
+
},
|
76
|
+
|
77
|
+
:host => proc { |match, options|
|
78
|
+
if match.attributes[:host]
|
79
|
+
match.attributes[:host]
|
80
|
+
elsif match.attributes[:host_proxy]
|
81
|
+
match.attributes[:host_proxy].split(',')[0].strip
|
82
|
+
end
|
83
|
+
}
|
84
|
+
}
|
85
|
+
|
86
|
+
%w{ status instream outstream bytes }.each do |field|
|
87
|
+
class_eval(<<-EOF, __FILE__, __LINE__ + 1)
|
88
|
+
TOKEN_CONVERSIONS[:#{field}] = proc { |match, options|
|
89
|
+
match.attributes[:#{field}].to_i rescue nil if match.attributes[:#{field}]
|
90
|
+
}
|
91
|
+
EOF
|
92
|
+
end
|
93
|
+
|
94
|
+
attr_reader :attributes
|
95
|
+
|
96
|
+
def initialize(attributes, options = {})
|
97
|
+
@attributes, @options = attributes, options
|
98
|
+
@parsed_attributes = {}
|
99
|
+
|
100
|
+
if options[:browscap] && !options[:browscap].respond_to?(:query)
|
101
|
+
raise ArgumentError.new("The :browscap object should respond to the #query method.")
|
102
|
+
end
|
103
|
+
end
|
104
|
+
|
105
|
+
# Accesses either an attribute or an attribute that has been passed
|
106
|
+
# through a token converter. You can access the raw, unconverted attributes
|
107
|
+
# via the +attributes+ method. If a converter fails for whatever reason,
|
108
|
+
# a value of +:bad_conversion+ is returned.
|
109
|
+
def [](key)
|
110
|
+
key = key.to_sym unless key.is_a?(Symbol)
|
111
|
+
|
112
|
+
if @parsed_attributes.has_key?(key)
|
113
|
+
@parsed_attributes[key]
|
114
|
+
elsif @options[:token_conversions] && @options[:token_conversions][key]
|
115
|
+
@parsed_attributes[key] = @options[:token_conversions][key].call(self, @options)
|
116
|
+
elsif TOKEN_CONVERSIONS[key]
|
117
|
+
@parsed_attributes[key] = (TOKEN_CONVERSIONS[key].call(self, @options) rescue :bad_conversion)
|
118
|
+
else
|
119
|
+
@attributes[key]
|
120
|
+
end
|
121
|
+
end
|
122
|
+
|
123
|
+
# Gathers up the requested attributes and spits them out into a Hash.
|
124
|
+
# The +values+ argument determines what gets inserted into the Hash:
|
125
|
+
#
|
126
|
+
# * +:all+ - both attributes and parsed attributes. In cases where
|
127
|
+
# the values share the same names, the parsed attribute wins out.
|
128
|
+
# * +:attributes+ - unparsed attributes only.
|
129
|
+
# * +:parsed+ - parsed attributes only.
|
130
|
+
#
|
131
|
+
# The default value is +:all+. Any +nil+ values are automatically stripped
|
132
|
+
# from the Hash.
|
133
|
+
def to_hash(values = :all)
|
134
|
+
retval = {}
|
135
|
+
|
136
|
+
if values == :all || values == :attributes
|
137
|
+
retval.merge!(@attributes)
|
138
|
+
end
|
139
|
+
|
140
|
+
if values == :all || values == :parsed
|
141
|
+
TOKEN_CONVERSIONS.each { |t, v| self[t] }
|
142
|
+
retval.merge!(@parsed_attributes)
|
143
|
+
end
|
144
|
+
|
145
|
+
retval.reject! { |k, v|
|
146
|
+
v.nil?
|
147
|
+
}
|
148
|
+
|
149
|
+
retval
|
150
|
+
end
|
151
|
+
end
|
152
|
+
end
|
@@ -0,0 +1,111 @@
|
|
1
|
+
|
2
|
+
require 'uri'
|
3
|
+
require 'date'
|
4
|
+
require 'cgi'
|
5
|
+
|
6
|
+
module NCSAParser
|
7
|
+
class BadLogLine < Exception
|
8
|
+
def initialize(line, pattern)
|
9
|
+
super("Bad log line. Pattern: |#{pattern.join(' ')}| Line: |#{line}|")
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
# A line parser for a log file. Lines are parsed via Regexps. You can
|
14
|
+
# inject new tokens or override existing ones by modifying the passing along
|
15
|
+
# a +:tokens+ option and adding the keys to the +:pattern+ option
|
16
|
+
# accordingly.
|
17
|
+
class Parser
|
18
|
+
IP_ADDRESS = '\d+\.\d+\.\d+\.\d+|unknown'
|
19
|
+
|
20
|
+
TOKENS = {
|
21
|
+
:host => "(?:#{IP_ADDRESS}|-|::1)",
|
22
|
+
:host_proxy => "(?:#{IP_ADDRESS})(?:,\\s+#{IP_ADDRESS})*|-",
|
23
|
+
:ident => '[^\s]+',
|
24
|
+
:username => '[^\s]+',
|
25
|
+
:datetime => '\[[^\]]+\]',
|
26
|
+
:request => '".+"',
|
27
|
+
:status => '\d+',
|
28
|
+
:bytes => '\d+|-',
|
29
|
+
:referer => '".*"',
|
30
|
+
:ua => '".*"',
|
31
|
+
:usertrack => "(?:#{IP_ADDRESS})[^ ]+|-",
|
32
|
+
:outstream => '\d+|-',
|
33
|
+
:instream => '\d+|-',
|
34
|
+
:ratio => '\d+%|-%'
|
35
|
+
}
|
36
|
+
|
37
|
+
LOG_FORMAT_COMMON = %w{
|
38
|
+
host ident username datetime request status bytes
|
39
|
+
}
|
40
|
+
|
41
|
+
LOG_FORMAT_COMBINED = %w{
|
42
|
+
host ident username datetime request status bytes referer ua
|
43
|
+
}
|
44
|
+
|
45
|
+
attr_reader :pattern, :matcher, :re
|
46
|
+
|
47
|
+
# Creates a new Parser object.
|
48
|
+
#
|
49
|
+
# == Options
|
50
|
+
#
|
51
|
+
# * +:domain+ - when parsing query strings, use this domain as the URL's
|
52
|
+
# domain. The default is +"www.example.com"+.
|
53
|
+
# * +:datetime_format+ - sets the datetime format for when tokens are
|
54
|
+
# converted in NCSAParser::ParsedLine. The default is +"[%d/%b/%Y:%H:%M:%S %Z]"+.
|
55
|
+
# * +:pattern+ - the default log line format to use. The default is
|
56
|
+
# +LOG_FORMAT_COMBINED+, which matches the "combined" log format in
|
57
|
+
# Apache. The value for +:pattern+ can be either a space-delimited
|
58
|
+
# String of token names or an Array of token names.
|
59
|
+
# * +:browscap+ - a browser capabilities object to use when sniffing out
|
60
|
+
# user agents. This object should be able to respond to the +query+
|
61
|
+
# method. Several browscap extensions are available for Ruby, and the
|
62
|
+
# the author of this extension's version is called Browscapper and is
|
63
|
+
# available at https://github.com/dark-panda/browscapper .
|
64
|
+
# * +:token_conversions+ - converters to pass along to the line parser.
|
65
|
+
# See NCSAParser::ParsedLine for details.
|
66
|
+
# * +:tokens+ - tokens to add to the generated Regexp.
|
67
|
+
def initialize(options = {})
|
68
|
+
options = {
|
69
|
+
:domain => 'www.example.com',
|
70
|
+
:datetime_format => '[%d/%b/%Y:%H:%M:%S %Z]',
|
71
|
+
:pattern => LOG_FORMAT_COMBINED
|
72
|
+
}.merge(options)
|
73
|
+
|
74
|
+
@options = options
|
75
|
+
@pattern = if options[:pattern].is_a?(Array)
|
76
|
+
options[:pattern]
|
77
|
+
else
|
78
|
+
options[:pattern].to_s.split(/\s+/)
|
79
|
+
end
|
80
|
+
|
81
|
+
@re = '^' + @pattern.collect { |tk|
|
82
|
+
tk = tk.to_sym
|
83
|
+
token = if options[:tokens] && options[:tokens][tk]
|
84
|
+
options[:tokens][tk]
|
85
|
+
elsif TOKENS[tk]
|
86
|
+
TOKENS[tk]
|
87
|
+
else
|
88
|
+
raise ArgumentError.new("Token :#{tk} not found!")
|
89
|
+
end
|
90
|
+
|
91
|
+
"(#{token})"
|
92
|
+
}.join(' ') + '$'
|
93
|
+
@matcher = Regexp.new(@re)
|
94
|
+
end
|
95
|
+
|
96
|
+
# Parses a single line and returns an NCSAParser::ParsedLine object.
|
97
|
+
def parse_line(line)
|
98
|
+
match = Hash.new
|
99
|
+
if md = @matcher.match(line)
|
100
|
+
@pattern.each_with_index do |k, j|
|
101
|
+
match[k.to_sym] = md[j + 1]
|
102
|
+
end
|
103
|
+
match[:original] = line.strip
|
104
|
+
else
|
105
|
+
raise BadLogLine.new(line, @options[:pattern])
|
106
|
+
end
|
107
|
+
ParsedLine.new(match, @options)
|
108
|
+
end
|
109
|
+
alias :parse :parse_line
|
110
|
+
end
|
111
|
+
end
|
data/ncsa-parser.gemspec
ADDED
@@ -0,0 +1,27 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
|
3
|
+
require File.expand_path('../lib/ncsa-parser/version', __FILE__)
|
4
|
+
|
5
|
+
Gem::Specification.new do |s|
|
6
|
+
s.name = "ncsa-parser"
|
7
|
+
s.version = NCSAParser::VERSION
|
8
|
+
|
9
|
+
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
10
|
+
s.authors = ["J Smith"]
|
11
|
+
s.description = "A simple NCSA-style log file parser."
|
12
|
+
s.summary = s.description
|
13
|
+
s.email = "dark.panda@gmail.com"
|
14
|
+
s.extra_rdoc_files = [
|
15
|
+
"README.rdoc"
|
16
|
+
]
|
17
|
+
s.files = `git ls-files`.split($\)
|
18
|
+
s.executables = s.files.grep(%r{^bin/}).map { |f| File.basename(f) }
|
19
|
+
s.test_files = s.files.grep(%r{^(test|spec|features)/})
|
20
|
+
s.homepage = "http://github.com/dark-panda/ncsa-parser"
|
21
|
+
s.require_paths = ["lib"]
|
22
|
+
|
23
|
+
s.add_dependency("rdoc")
|
24
|
+
s.add_dependency("rake", ["~> 0.9"])
|
25
|
+
s.add_dependency("minitest")
|
26
|
+
s.add_dependency("turn")
|
27
|
+
end
|
@@ -0,0 +1,175 @@
|
|
1
|
+
|
2
|
+
$: << File.dirname(__FILE__)
|
3
|
+
require 'test_helper'
|
4
|
+
|
5
|
+
class NCSAParserTests < MiniTest::Unit::TestCase
|
6
|
+
include TestHelper
|
7
|
+
|
8
|
+
def test_format_default
|
9
|
+
parser = NCSAParser::Parser.new
|
10
|
+
parsed = parser.parse_line(LOG_COMBINED)
|
11
|
+
|
12
|
+
assert_equal({
|
13
|
+
:host => %{123.123.123.123},
|
14
|
+
:ident => %{-},
|
15
|
+
:username => %{-},
|
16
|
+
:datetime => %{[08/Oct/2012:14:36:07 -0400]},
|
17
|
+
:request => %{"GET /path/to/something?foo=bar&hello=world HTTP/1.1"},
|
18
|
+
:status => %{200},
|
19
|
+
:bytes => %{923},
|
20
|
+
:referer => %{"http://www.example.com/referer"},
|
21
|
+
:ua => %{"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.4 (KHTML, like Gecko) Chrome/22.0.1229.79 Safari/537.4"},
|
22
|
+
:original => LOG_COMBINED,
|
23
|
+
}, parsed.attributes)
|
24
|
+
end
|
25
|
+
|
26
|
+
def test_format_common
|
27
|
+
parser = NCSAParser::Parser.new(:pattern => NCSAParser::Parser::LOG_FORMAT_COMMON)
|
28
|
+
|
29
|
+
parsed = parser.parse_line(LOG_COMMON)
|
30
|
+
assert_equal({
|
31
|
+
:host => %{123.123.123.123},
|
32
|
+
:ident => %{-},
|
33
|
+
:username => %{-},
|
34
|
+
:datetime => %{[08/Oct/2012:14:36:07 -0400]},
|
35
|
+
:request => %{"GET /path/to/something?foo=bar&hello=world HTTP/1.1"},
|
36
|
+
:status => %{200},
|
37
|
+
:bytes => %{923},
|
38
|
+
:original => LOG_COMMON,
|
39
|
+
}, parsed.attributes)
|
40
|
+
end
|
41
|
+
|
42
|
+
def test_format_usertrack
|
43
|
+
parser = NCSAParser::Parser.new(:pattern => %w{
|
44
|
+
host ident username datetime request status bytes referer ua usertrack
|
45
|
+
})
|
46
|
+
|
47
|
+
parsed = parser.parse_line(LOG_USERTRACK)
|
48
|
+
|
49
|
+
assert_equal({
|
50
|
+
:host => %{123.123.123.123},
|
51
|
+
:ident => %{-},
|
52
|
+
:username => %{-},
|
53
|
+
:datetime => %{[08/Oct/2012:14:36:07 -0400]},
|
54
|
+
:request => %{"GET /path/to/something?foo=bar&hello=world HTTP/1.1"},
|
55
|
+
:status => %{200},
|
56
|
+
:bytes => %{923},
|
57
|
+
:referer => %{"http://www.example.com/referer"},
|
58
|
+
:ua => %{"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.4 (KHTML, like Gecko) Chrome/22.0.1229.79 Safari/537.4"},
|
59
|
+
:usertrack => %{123.123.123.123.1349718542489266},
|
60
|
+
:original => LOG_USERTRACK
|
61
|
+
}, parsed.attributes)
|
62
|
+
end
|
63
|
+
|
64
|
+
def test_format_deflate
|
65
|
+
parser = NCSAParser::Parser.new(:pattern => %w{
|
66
|
+
host ident username datetime request status bytes referer ua instream outstream ratio
|
67
|
+
})
|
68
|
+
|
69
|
+
parsed = parser.parse_line(LOG_DEFLATE)
|
70
|
+
|
71
|
+
assert_equal({
|
72
|
+
:host => %{123.123.123.123},
|
73
|
+
:ident => %{-},
|
74
|
+
:username => %{-},
|
75
|
+
:datetime => %{[08/Oct/2012:14:36:07 -0400]},
|
76
|
+
:request => %{"GET /path/to/something?foo=bar&hello=world HTTP/1.1"},
|
77
|
+
:status => %{200},
|
78
|
+
:bytes => %{923},
|
79
|
+
:referer => %{"http://www.example.com/referer"},
|
80
|
+
:ua => %{"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.4 (KHTML, like Gecko) Chrome/22.0.1229.79 Safari/537.4"},
|
81
|
+
:instream => %{905},
|
82
|
+
:outstream => %{1976},
|
83
|
+
:ratio => %{45%},
|
84
|
+
:original => LOG_DEFLATE
|
85
|
+
}, parsed.attributes)
|
86
|
+
end
|
87
|
+
|
88
|
+
def test_format_bad
|
89
|
+
parser = NCSAParser::Parser.new
|
90
|
+
|
91
|
+
assert_raises(NCSAParser::BadLogLine) do
|
92
|
+
parser.parse_line('what happen')
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
96
|
+
def test_open_file
|
97
|
+
log = NCSAParser.open('./test/resources/access_log', :pattern => %w{
|
98
|
+
host ident username datetime request status bytes referer ua usertrack instream outstream ratio
|
99
|
+
})
|
100
|
+
|
101
|
+
expect = {
|
102
|
+
:host => %{123.123.123.123},
|
103
|
+
:ident => %{-},
|
104
|
+
:username => %{-},
|
105
|
+
:datetime => %{[08/Oct/2012:14:36:07 -0400]},
|
106
|
+
:request => %{"GET /path/to/something?foo=bar&hello=world HTTP/1.1"},
|
107
|
+
:status => %{200},
|
108
|
+
:bytes => %{923},
|
109
|
+
:referer => %{"http://www.example.com/referer"},
|
110
|
+
:ua => %{"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.4 (KHTML, like Gecko) Chrome/22.0.1229.79 Safari/537.4"},
|
111
|
+
:usertrack => %{123.123.123.123.1349718542489266},
|
112
|
+
:instream => %{905},
|
113
|
+
:outstream => %{1976},
|
114
|
+
:ratio => %{45%},
|
115
|
+
:original => %{123.123.123.123 - - [08/Oct/2012:14:36:07 -0400] "GET /path/to/something?foo=bar&hello=world HTTP/1.1" 200 923 "http://www.example.com/referer" "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.4 (KHTML, like Gecko) Chrome/22.0.1229.79 Safari/537.4" 123.123.123.123.1349718542489266 905 1976 45%}
|
116
|
+
}
|
117
|
+
|
118
|
+
log.each do |parsed|
|
119
|
+
assert_equal(expect, parsed.attributes)
|
120
|
+
end
|
121
|
+
end
|
122
|
+
|
123
|
+
def test_token_conversions
|
124
|
+
line = %{[08/Oct/2012:14:36:07 -0400] "GET /path/to/something?foo=bar&hello=world HTTP/1.1" "http://www.example.com/referer" 1000 100 10% 123.123.123.123 200 110}
|
125
|
+
|
126
|
+
parser = NCSAParser::Parser.new(:pattern => %w{
|
127
|
+
datetime request referer instream outstream ratio host_proxy status bytes
|
128
|
+
})
|
129
|
+
|
130
|
+
parsed = parser.parse_line(line)
|
131
|
+
|
132
|
+
assert_equal(DateTime.strptime('[08/Oct/2012:14:36:07 -0400]', '[%d/%b/%Y:%H:%M:%S %Z]'), parsed[:datetime])
|
133
|
+
assert_equal(URI.parse('http://www.example.com/path/to/something?foo=bar&hello=world'), parsed[:request_uri])
|
134
|
+
assert_equal('/path/to/something', parsed[:request_path])
|
135
|
+
assert_equal('GET', parsed[:http_method])
|
136
|
+
assert_equal('1.1', parsed[:http_version])
|
137
|
+
assert_equal(URI.parse('http://www.example.com/referer'), parsed[:referer_uri])
|
138
|
+
assert_equal(0.1, parsed[:ratio])
|
139
|
+
assert_equal('123.123.123.123', parsed[:host])
|
140
|
+
assert_equal(200, parsed[:status])
|
141
|
+
assert_equal(1000, parsed[:instream])
|
142
|
+
assert_equal(100, parsed[:outstream])
|
143
|
+
assert_equal(110, parsed[:bytes])
|
144
|
+
end
|
145
|
+
|
146
|
+
def test_custom_tokens_and_conversion
|
147
|
+
parser = NCSAParser::Parser.new(
|
148
|
+
:pattern => 'email',
|
149
|
+
|
150
|
+
:tokens => {
|
151
|
+
:email => '[^@]+@[^@]+'
|
152
|
+
},
|
153
|
+
|
154
|
+
:token_conversions => {
|
155
|
+
:email => proc { |match, options|
|
156
|
+
URI.parse(match.attributes[:email])
|
157
|
+
}
|
158
|
+
}
|
159
|
+
)
|
160
|
+
|
161
|
+
parsed = parser.parse_line('test@example.com')
|
162
|
+
assert_equal(URI.parse('test@example.com'), parsed[:email])
|
163
|
+
end
|
164
|
+
|
165
|
+
def test_to_hash
|
166
|
+
parser = NCSAParser::Parser.new(:pattern => NCSAParser::Parser::LOG_FORMAT_COMMON)
|
167
|
+
parsed = parser.parse_line(LOG_COMMON)
|
168
|
+
|
169
|
+
assert_equal([
|
170
|
+
:host, :ident, :username, :datetime, :request, :status, :bytes,
|
171
|
+
:original, :request_uri, :request_path, :http_method,
|
172
|
+
:http_version, :query_string
|
173
|
+
].sort_by(&:to_s), parsed.to_hash.keys.sort_by(&:to_s))
|
174
|
+
end
|
175
|
+
end
|
@@ -0,0 +1,7 @@
|
|
1
|
+
123.123.123.123 - - [08/Oct/2012:14:36:07 -0400] "GET /path/to/something?foo=bar&hello=world HTTP/1.1" 200 923 "http://www.example.com/referer" "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.4 (KHTML, like Gecko) Chrome/22.0.1229.79 Safari/537.4" 123.123.123.123.1349718542489266 905 1976 45%
|
2
|
+
123.123.123.123 - - [08/Oct/2012:14:36:07 -0400] "GET /path/to/something?foo=bar&hello=world HTTP/1.1" 200 923 "http://www.example.com/referer" "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.4 (KHTML, like Gecko) Chrome/22.0.1229.79 Safari/537.4" 123.123.123.123.1349718542489266 905 1976 45%
|
3
|
+
123.123.123.123 - - [08/Oct/2012:14:36:07 -0400] "GET /path/to/something?foo=bar&hello=world HTTP/1.1" 200 923 "http://www.example.com/referer" "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.4 (KHTML, like Gecko) Chrome/22.0.1229.79 Safari/537.4" 123.123.123.123.1349718542489266 905 1976 45%
|
4
|
+
123.123.123.123 - - [08/Oct/2012:14:36:07 -0400] "GET /path/to/something?foo=bar&hello=world HTTP/1.1" 200 923 "http://www.example.com/referer" "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.4 (KHTML, like Gecko) Chrome/22.0.1229.79 Safari/537.4" 123.123.123.123.1349718542489266 905 1976 45%
|
5
|
+
123.123.123.123 - - [08/Oct/2012:14:36:07 -0400] "GET /path/to/something?foo=bar&hello=world HTTP/1.1" 200 923 "http://www.example.com/referer" "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.4 (KHTML, like Gecko) Chrome/22.0.1229.79 Safari/537.4" 123.123.123.123.1349718542489266 905 1976 45%
|
6
|
+
123.123.123.123 - - [08/Oct/2012:14:36:07 -0400] "GET /path/to/something?foo=bar&hello=world HTTP/1.1" 200 923 "http://www.example.com/referer" "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.4 (KHTML, like Gecko) Chrome/22.0.1229.79 Safari/537.4" 123.123.123.123.1349718542489266 905 1976 45%
|
7
|
+
123.123.123.123 - - [08/Oct/2012:14:36:07 -0400] "GET /path/to/something?foo=bar&hello=world HTTP/1.1" 200 923 "http://www.example.com/referer" "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.4 (KHTML, like Gecko) Chrome/22.0.1229.79 Safari/537.4" 123.123.123.123.1349718542489266 905 1976 45%
|
data/test/test_helper.rb
ADDED
@@ -0,0 +1,25 @@
|
|
1
|
+
|
2
|
+
require 'rubygems'
|
3
|
+
require 'minitest/autorun'
|
4
|
+
require 'turn/autorun'
|
5
|
+
require File.join(File.dirname(__FILE__), %w{ .. lib ncsa-parser })
|
6
|
+
|
7
|
+
puts "NCSAParser version #{NCSAParser::VERSION}"
|
8
|
+
|
9
|
+
module TestHelper
|
10
|
+
LOG_COMMON = %{123.123.123.123 - - [08/Oct/2012:14:36:07 -0400] "GET /path/to/something?foo=bar&hello=world HTTP/1.1" 200 923}
|
11
|
+
|
12
|
+
LOG_COMBINED = %{123.123.123.123 - - [08/Oct/2012:14:36:07 -0400] "GET /path/to/something?foo=bar&hello=world HTTP/1.1" 200 923 "http://www.example.com/referer" "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.4 (KHTML, like Gecko) Chrome/22.0.1229.79 Safari/537.4"}
|
13
|
+
|
14
|
+
LOG_USERTRACK = %{123.123.123.123 - - [08/Oct/2012:14:36:07 -0400] "GET /path/to/something?foo=bar&hello=world HTTP/1.1" 200 923 "http://www.example.com/referer" "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.4 (KHTML, like Gecko) Chrome/22.0.1229.79 Safari/537.4" 123.123.123.123.1349718542489266}
|
15
|
+
|
16
|
+
LOG_DEFLATE = %{123.123.123.123 - - [08/Oct/2012:14:36:07 -0400] "GET /path/to/something?foo=bar&hello=world HTTP/1.1" 200 923 "http://www.example.com/referer" "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.4 (KHTML, like Gecko) Chrome/22.0.1229.79 Safari/537.4" 905 1976 45%}
|
17
|
+
end
|
18
|
+
|
19
|
+
if ENV['autotest']
|
20
|
+
module Turn::Colorize
|
21
|
+
def self.color_supported?
|
22
|
+
true
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
metadata
ADDED
@@ -0,0 +1,127 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: ncsa-parser
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.2
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- J Smith
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2012-10-08 00:00:00.000000000 Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: rdoc
|
16
|
+
requirement: !ruby/object:Gem::Requirement
|
17
|
+
none: false
|
18
|
+
requirements:
|
19
|
+
- - ! '>='
|
20
|
+
- !ruby/object:Gem::Version
|
21
|
+
version: '0'
|
22
|
+
type: :runtime
|
23
|
+
prerelease: false
|
24
|
+
version_requirements: !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
26
|
+
requirements:
|
27
|
+
- - ! '>='
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
version: '0'
|
30
|
+
- !ruby/object:Gem::Dependency
|
31
|
+
name: rake
|
32
|
+
requirement: !ruby/object:Gem::Requirement
|
33
|
+
none: false
|
34
|
+
requirements:
|
35
|
+
- - ~>
|
36
|
+
- !ruby/object:Gem::Version
|
37
|
+
version: '0.9'
|
38
|
+
type: :runtime
|
39
|
+
prerelease: false
|
40
|
+
version_requirements: !ruby/object:Gem::Requirement
|
41
|
+
none: false
|
42
|
+
requirements:
|
43
|
+
- - ~>
|
44
|
+
- !ruby/object:Gem::Version
|
45
|
+
version: '0.9'
|
46
|
+
- !ruby/object:Gem::Dependency
|
47
|
+
name: minitest
|
48
|
+
requirement: !ruby/object:Gem::Requirement
|
49
|
+
none: false
|
50
|
+
requirements:
|
51
|
+
- - ! '>='
|
52
|
+
- !ruby/object:Gem::Version
|
53
|
+
version: '0'
|
54
|
+
type: :runtime
|
55
|
+
prerelease: false
|
56
|
+
version_requirements: !ruby/object:Gem::Requirement
|
57
|
+
none: false
|
58
|
+
requirements:
|
59
|
+
- - ! '>='
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
62
|
+
- !ruby/object:Gem::Dependency
|
63
|
+
name: turn
|
64
|
+
requirement: !ruby/object:Gem::Requirement
|
65
|
+
none: false
|
66
|
+
requirements:
|
67
|
+
- - ! '>='
|
68
|
+
- !ruby/object:Gem::Version
|
69
|
+
version: '0'
|
70
|
+
type: :runtime
|
71
|
+
prerelease: false
|
72
|
+
version_requirements: !ruby/object:Gem::Requirement
|
73
|
+
none: false
|
74
|
+
requirements:
|
75
|
+
- - ! '>='
|
76
|
+
- !ruby/object:Gem::Version
|
77
|
+
version: '0'
|
78
|
+
description: A simple NCSA-style log file parser.
|
79
|
+
email: dark.panda@gmail.com
|
80
|
+
executables: []
|
81
|
+
extensions: []
|
82
|
+
extra_rdoc_files:
|
83
|
+
- README.rdoc
|
84
|
+
files:
|
85
|
+
- .gitignore
|
86
|
+
- Gemfile
|
87
|
+
- MIT-LICENSE
|
88
|
+
- README.rdoc
|
89
|
+
- Rakefile
|
90
|
+
- lib/ncsa-parser.rb
|
91
|
+
- lib/ncsa-parser/helper.rb
|
92
|
+
- lib/ncsa-parser/log.rb
|
93
|
+
- lib/ncsa-parser/parsed_line.rb
|
94
|
+
- lib/ncsa-parser/parser.rb
|
95
|
+
- lib/ncsa-parser/version.rb
|
96
|
+
- ncsa-parser.gemspec
|
97
|
+
- test/ncsa_parser_tests.rb
|
98
|
+
- test/resources/access_log
|
99
|
+
- test/test_helper.rb
|
100
|
+
homepage: http://github.com/dark-panda/ncsa-parser
|
101
|
+
licenses: []
|
102
|
+
post_install_message:
|
103
|
+
rdoc_options: []
|
104
|
+
require_paths:
|
105
|
+
- lib
|
106
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
107
|
+
none: false
|
108
|
+
requirements:
|
109
|
+
- - ! '>='
|
110
|
+
- !ruby/object:Gem::Version
|
111
|
+
version: '0'
|
112
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
113
|
+
none: false
|
114
|
+
requirements:
|
115
|
+
- - ! '>='
|
116
|
+
- !ruby/object:Gem::Version
|
117
|
+
version: '0'
|
118
|
+
requirements: []
|
119
|
+
rubyforge_project:
|
120
|
+
rubygems_version: 1.8.24
|
121
|
+
signing_key:
|
122
|
+
specification_version: 3
|
123
|
+
summary: A simple NCSA-style log file parser.
|
124
|
+
test_files:
|
125
|
+
- test/ncsa_parser_tests.rb
|
126
|
+
- test/resources/access_log
|
127
|
+
- test/test_helper.rb
|