ncsa-parser 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,12 @@
1
+ *.orig
2
+ .*.swp
3
+ .*.swo
4
+ *.tmp
5
+ *.patch
6
+ *.kpf
7
+ *~
8
+ .DS_Store
9
+ Thumbs.db
10
+ /doc
11
+ /pkg
12
+ Gemfile.lock
data/Gemfile ADDED
@@ -0,0 +1,3 @@
1
+ source 'https://rubygems.org'
2
+
3
+ gemspec
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2012 J Smith <dark.panda@#gmail.com>
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,23 @@
1
+
2
+ = NCSA Parser
3
+
4
+ Here's a quick little library for reading NCSA-style web server logs. Quick
5
+ usage:
6
+
7
+ NCSAParser.each_line(File.open('/var/log/httpd/access_log'), :pattern => %w{
8
+ host ident username datetime request
9
+ status bytes referer ua
10
+ outstream instream ratio
11
+ }) do |b|
12
+ puts b.inspect
13
+ end
14
+
15
+ parser = NCSAParser::Parser.new
16
+ parsed = parser.parse_line('...')
17
+
18
+ There are more examples available in the tests.
19
+
20
+ == License
21
+
22
+ This gem is licensed under an MIT-style license. See the +MIT-LICENSE+ file for
23
+ details.
@@ -0,0 +1,37 @@
1
+ # -*- ruby -*-
2
+
3
+ require 'rubygems'
4
+
5
+ gem 'rdoc', '~> 3.12'
6
+
7
+ require 'rubygems/package_task'
8
+ require 'rake/testtask'
9
+ require 'rdoc/task'
10
+ require 'bundler/gem_tasks'
11
+
12
+ if RUBY_VERSION >= '1.9'
13
+ begin
14
+ gem 'psych'
15
+ rescue Exception => e
16
+ # it's okay, fall back on the bundled psych
17
+ end
18
+ end
19
+
20
+ $:.push 'lib'
21
+
22
+ version = NCSAParser::VERSION
23
+
24
+ desc 'Test NCSA parser library'
25
+ Rake::TestTask.new(:test) do |t|
26
+ t.test_files = FileList['test/**/*_tests.rb']
27
+ t.verbose = !!ENV['VERBOSE_TESTS']
28
+ t.warning = !!ENV['WARNINGS']
29
+ end
30
+
31
+ desc 'Build docs'
32
+ Rake::RDocTask.new do |t|
33
+ t.title = "NCSA Parser #{version}"
34
+ t.main = 'README.rdoc'
35
+ t.rdoc_dir = 'doc'
36
+ t.rdoc_files.include('README.rdoc', 'MIT-LICENSE', 'lib/**/*.rb')
37
+ end
@@ -0,0 +1,22 @@
1
+
2
+ require 'ncsa-parser/version'
3
+ require 'ncsa-parser/parser'
4
+ require 'ncsa-parser/helper'
5
+ require 'ncsa-parser/parsed_line'
6
+ require 'ncsa-parser/log'
7
+
8
+ module NCSAParser
9
+ class << self
10
+ # Opens a log file and iterates through the lines.
11
+ def each_line(log, options = {}, &block)
12
+ self.open(log, options).each(&block)
13
+ end
14
+ alias :foreach :each_line
15
+
16
+ # Opens a log file for parsing. This is a convenience method that proxies
17
+ # to NCSAParser::Log.open.
18
+ def open(log, options = {})
19
+ Log.open(log, options)
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,20 @@
1
+
2
+ module NCSAParser
3
+ module Helper
4
+ def self.clean_uri(uri)
5
+ uri.
6
+ gsub(/ /, '+').
7
+ gsub(/\\"/, '%22').
8
+ gsub(/,/, '%2C')
9
+ end
10
+
11
+ def self.deep_symbolize_keys(hash)
12
+ hash.inject({}) do |memo, (key, value)|
13
+ key = key.to_sym if key.respond_to?(:to_sym) rescue :nil
14
+ value = NCSAHelper.deep_symbolize_keys(value) if value.is_a?(Hash)
15
+ memo[key] = value
16
+ memo
17
+ end
18
+ end
19
+ end
20
+ end
@@ -0,0 +1,41 @@
1
+
2
+ module NCSAParser
3
+ class Log
4
+ include Enumerable
5
+
6
+ attr_reader :log, :parser
7
+
8
+ def initialize(log, options = {})
9
+ @log = log
10
+ @parser = Parser.new(options)
11
+ end
12
+
13
+ def self.open(file, options = {})
14
+ file = if file.is_a?(String)
15
+ File.open(file)
16
+ else
17
+ file
18
+ end
19
+
20
+ self.new(file, options)
21
+ end
22
+
23
+ def each
24
+ if block_given?
25
+ self.log.each do |l|
26
+ yield self.parser.parse_line(l)
27
+ end
28
+ else
29
+ self.log.collect do |l|
30
+ self.parser.parse_line(l)
31
+ end
32
+ end
33
+ end
34
+
35
+ def next_line
36
+ self.parser.parse_line(self.log.gets).tap { |parsed|
37
+ yield parsed if block_given?
38
+ }
39
+ end
40
+ end
41
+ end
@@ -0,0 +1,152 @@
1
+
2
+ module NCSAParser
3
+ # NCSAParser::ParsedLine handles some token conversions and the like on the
4
+ # fly after a successful line parse. You can add your own token conversions
5
+ # or override existing ones by passing along a +:token_conversions+ option
6
+ # that contains converters in the same manner as those found in
7
+ # NCSAParser::ParsedLine::TOKEN_CONVERSIONS.
8
+ #
9
+ # To access a parsed value without any sort of token conversion, use the
10
+ # +attributes+ method. The +[]+ method will perform the token conversion
11
+ # on the fly for you.
12
+ #
13
+ # For token converters that handle URIs, the Symbol :bad_uri will be returned
14
+ # if the URI parser fails for whatever reason.
15
+ class ParsedLine
16
+ TOKEN_CONVERSIONS = {
17
+ :datetime => proc { |match, options|
18
+ DateTime.strptime(match.attributes[:datetime], options[:datetime_format])
19
+ },
20
+
21
+ :request_uri => proc { |match, options|
22
+ begin
23
+ request = match.attributes[:request].scan(/^"[A-Z]+ (.+) HTTP\/\d+\.\d+"$/).flatten[0]
24
+ URI.parse("http://#{options[:domain]}#{request}")
25
+ rescue
26
+ :bad_uri
27
+ end if match.attributes[:request]
28
+ },
29
+
30
+ :request_path => proc { |match, options|
31
+ match.attributes[:request].scan(/^"[A-Z]+ ([^?]+)/).flatten[0] rescue nil if match.attributes[:request]
32
+ },
33
+
34
+ :http_method => proc { |match, options|
35
+ match.attributes[:request].scan(/^"([A-Z]+)/).flatten[0] rescue nil if match.attributes[:request]
36
+ },
37
+
38
+ :http_version => proc { |match, options|
39
+ match.attributes[:request].scan(/(\d+\.\d+)"$/).flatten[0] rescue nil if match.attributes[:request]
40
+ },
41
+
42
+ :query_string => proc { |match, options|
43
+ if match[:request_uri]
44
+ if match[:request_uri] && match[:request_uri].query
45
+ CGI.parse(match[:request_uri].query)
46
+ else
47
+ Hash.new
48
+ end
49
+ end
50
+ },
51
+
52
+ :referer_uri => proc { |match, options|
53
+ if match[:referer]
54
+ if match[:referer] != '"-"'
55
+ referer = match[:referer].sub(/^"(.+)"$/, '\1')
56
+ NCSAParser::Helper.clean_uri(referer)
57
+
58
+ begin
59
+ URI.parse(referer)
60
+ rescue
61
+ :bad_uri
62
+ end
63
+ else
64
+ '-'
65
+ end
66
+ end
67
+ },
68
+
69
+ :browscap => proc { |match, options|
70
+ options[:browscap].query(match[:ua].sub(/^"(.+)"$/, '\1')) if options[:browscap]
71
+ },
72
+
73
+ :ratio => proc { |match, options|
74
+ match.attributes[:ratio].to_f / 100 rescue nil if match.attributes[:ratio]
75
+ },
76
+
77
+ :host => proc { |match, options|
78
+ if match.attributes[:host]
79
+ match.attributes[:host]
80
+ elsif match.attributes[:host_proxy]
81
+ match.attributes[:host_proxy].split(',')[0].strip
82
+ end
83
+ }
84
+ }
85
+
86
+ %w{ status instream outstream bytes }.each do |field|
87
+ class_eval(<<-EOF, __FILE__, __LINE__ + 1)
88
+ TOKEN_CONVERSIONS[:#{field}] = proc { |match, options|
89
+ match.attributes[:#{field}].to_i rescue nil if match.attributes[:#{field}]
90
+ }
91
+ EOF
92
+ end
93
+
94
+ attr_reader :attributes
95
+
96
+ def initialize(attributes, options = {})
97
+ @attributes, @options = attributes, options
98
+ @parsed_attributes = {}
99
+
100
+ if options[:browscap] && !options[:browscap].respond_to?(:query)
101
+ raise ArgumentError.new("The :browscap object should respond to the #query method.")
102
+ end
103
+ end
104
+
105
+ # Accesses either an attribute or an attribute that has been passed
106
+ # through a token converter. You can access the raw, unconverted attributes
107
+ # via the +attributes+ method. If a converter fails for whatever reason,
108
+ # a value of +:bad_conversion+ is returned.
109
+ def [](key)
110
+ key = key.to_sym unless key.is_a?(Symbol)
111
+
112
+ if @parsed_attributes.has_key?(key)
113
+ @parsed_attributes[key]
114
+ elsif @options[:token_conversions] && @options[:token_conversions][key]
115
+ @parsed_attributes[key] = @options[:token_conversions][key].call(self, @options)
116
+ elsif TOKEN_CONVERSIONS[key]
117
+ @parsed_attributes[key] = (TOKEN_CONVERSIONS[key].call(self, @options) rescue :bad_conversion)
118
+ else
119
+ @attributes[key]
120
+ end
121
+ end
122
+
123
+ # Gathers up the requested attributes and spits them out into a Hash.
124
+ # The +values+ argument determines what gets inserted into the Hash:
125
+ #
126
+ # * +:all+ - both attributes and parsed attributes. In cases where
127
+ # the values share the same names, the parsed attribute wins out.
128
+ # * +:attributes+ - unparsed attributes only.
129
+ # * +:parsed+ - parsed attributes only.
130
+ #
131
+ # The default value is +:all+. Any +nil+ values are automatically stripped
132
+ # from the Hash.
133
+ def to_hash(values = :all)
134
+ retval = {}
135
+
136
+ if values == :all || values == :attributes
137
+ retval.merge!(@attributes)
138
+ end
139
+
140
+ if values == :all || values == :parsed
141
+ TOKEN_CONVERSIONS.each { |t, v| self[t] }
142
+ retval.merge!(@parsed_attributes)
143
+ end
144
+
145
+ retval.reject! { |k, v|
146
+ v.nil?
147
+ }
148
+
149
+ retval
150
+ end
151
+ end
152
+ end
@@ -0,0 +1,111 @@
1
+
2
+ require 'uri'
3
+ require 'date'
4
+ require 'cgi'
5
+
6
+ module NCSAParser
7
+ class BadLogLine < Exception
8
+ def initialize(line, pattern)
9
+ super("Bad log line. Pattern: |#{pattern.join(' ')}| Line: |#{line}|")
10
+ end
11
+ end
12
+
13
+ # A line parser for a log file. Lines are parsed via Regexps. You can
14
+ # inject new tokens or override existing ones by modifying the passing along
15
+ # a +:tokens+ option and adding the keys to the +:pattern+ option
16
+ # accordingly.
17
+ class Parser
18
+ IP_ADDRESS = '\d+\.\d+\.\d+\.\d+|unknown'
19
+
20
+ TOKENS = {
21
+ :host => "(?:#{IP_ADDRESS}|-|::1)",
22
+ :host_proxy => "(?:#{IP_ADDRESS})(?:,\\s+#{IP_ADDRESS})*|-",
23
+ :ident => '[^\s]+',
24
+ :username => '[^\s]+',
25
+ :datetime => '\[[^\]]+\]',
26
+ :request => '".+"',
27
+ :status => '\d+',
28
+ :bytes => '\d+|-',
29
+ :referer => '".*"',
30
+ :ua => '".*"',
31
+ :usertrack => "(?:#{IP_ADDRESS})[^ ]+|-",
32
+ :outstream => '\d+|-',
33
+ :instream => '\d+|-',
34
+ :ratio => '\d+%|-%'
35
+ }
36
+
37
+ LOG_FORMAT_COMMON = %w{
38
+ host ident username datetime request status bytes
39
+ }
40
+
41
+ LOG_FORMAT_COMBINED = %w{
42
+ host ident username datetime request status bytes referer ua
43
+ }
44
+
45
+ attr_reader :pattern, :matcher, :re
46
+
47
+ # Creates a new Parser object.
48
+ #
49
+ # == Options
50
+ #
51
+ # * +:domain+ - when parsing query strings, use this domain as the URL's
52
+ # domain. The default is +"www.example.com"+.
53
+ # * +:datetime_format+ - sets the datetime format for when tokens are
54
+ # converted in NCSAParser::ParsedLine. The default is +"[%d/%b/%Y:%H:%M:%S %Z]"+.
55
+ # * +:pattern+ - the default log line format to use. The default is
56
+ # +LOG_FORMAT_COMBINED+, which matches the "combined" log format in
57
+ # Apache. The value for +:pattern+ can be either a space-delimited
58
+ # String of token names or an Array of token names.
59
+ # * +:browscap+ - a browser capabilities object to use when sniffing out
60
+ # user agents. This object should be able to respond to the +query+
61
+ # method. Several browscap extensions are available for Ruby, and the
62
+ # the author of this extension's version is called Browscapper and is
63
+ # available at https://github.com/dark-panda/browscapper .
64
+ # * +:token_conversions+ - converters to pass along to the line parser.
65
+ # See NCSAParser::ParsedLine for details.
66
+ # * +:tokens+ - tokens to add to the generated Regexp.
67
+ def initialize(options = {})
68
+ options = {
69
+ :domain => 'www.example.com',
70
+ :datetime_format => '[%d/%b/%Y:%H:%M:%S %Z]',
71
+ :pattern => LOG_FORMAT_COMBINED
72
+ }.merge(options)
73
+
74
+ @options = options
75
+ @pattern = if options[:pattern].is_a?(Array)
76
+ options[:pattern]
77
+ else
78
+ options[:pattern].to_s.split(/\s+/)
79
+ end
80
+
81
+ @re = '^' + @pattern.collect { |tk|
82
+ tk = tk.to_sym
83
+ token = if options[:tokens] && options[:tokens][tk]
84
+ options[:tokens][tk]
85
+ elsif TOKENS[tk]
86
+ TOKENS[tk]
87
+ else
88
+ raise ArgumentError.new("Token :#{tk} not found!")
89
+ end
90
+
91
+ "(#{token})"
92
+ }.join(' ') + '$'
93
+ @matcher = Regexp.new(@re)
94
+ end
95
+
96
+ # Parses a single line and returns an NCSAParser::ParsedLine object.
97
+ def parse_line(line)
98
+ match = Hash.new
99
+ if md = @matcher.match(line)
100
+ @pattern.each_with_index do |k, j|
101
+ match[k.to_sym] = md[j + 1]
102
+ end
103
+ match[:original] = line.strip
104
+ else
105
+ raise BadLogLine.new(line, @options[:pattern])
106
+ end
107
+ ParsedLine.new(match, @options)
108
+ end
109
+ alias :parse :parse_line
110
+ end
111
+ end
@@ -0,0 +1,5 @@
1
+
2
+ module NCSAParser
3
+ VERSION = '0.0.2'
4
+ end
5
+
@@ -0,0 +1,27 @@
1
+ # -*- encoding: utf-8 -*-
2
+
3
+ require File.expand_path('../lib/ncsa-parser/version', __FILE__)
4
+
5
+ Gem::Specification.new do |s|
6
+ s.name = "ncsa-parser"
7
+ s.version = NCSAParser::VERSION
8
+
9
+ s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
10
+ s.authors = ["J Smith"]
11
+ s.description = "A simple NCSA-style log file parser."
12
+ s.summary = s.description
13
+ s.email = "dark.panda@gmail.com"
14
+ s.extra_rdoc_files = [
15
+ "README.rdoc"
16
+ ]
17
+ s.files = `git ls-files`.split($\)
18
+ s.executables = s.files.grep(%r{^bin/}).map { |f| File.basename(f) }
19
+ s.test_files = s.files.grep(%r{^(test|spec|features)/})
20
+ s.homepage = "http://github.com/dark-panda/ncsa-parser"
21
+ s.require_paths = ["lib"]
22
+
23
+ s.add_dependency("rdoc")
24
+ s.add_dependency("rake", ["~> 0.9"])
25
+ s.add_dependency("minitest")
26
+ s.add_dependency("turn")
27
+ end
@@ -0,0 +1,175 @@
1
+
2
+ $: << File.dirname(__FILE__)
3
+ require 'test_helper'
4
+
5
+ class NCSAParserTests < MiniTest::Unit::TestCase
6
+ include TestHelper
7
+
8
+ def test_format_default
9
+ parser = NCSAParser::Parser.new
10
+ parsed = parser.parse_line(LOG_COMBINED)
11
+
12
+ assert_equal({
13
+ :host => %{123.123.123.123},
14
+ :ident => %{-},
15
+ :username => %{-},
16
+ :datetime => %{[08/Oct/2012:14:36:07 -0400]},
17
+ :request => %{"GET /path/to/something?foo=bar&hello=world HTTP/1.1"},
18
+ :status => %{200},
19
+ :bytes => %{923},
20
+ :referer => %{"http://www.example.com/referer"},
21
+ :ua => %{"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.4 (KHTML, like Gecko) Chrome/22.0.1229.79 Safari/537.4"},
22
+ :original => LOG_COMBINED,
23
+ }, parsed.attributes)
24
+ end
25
+
26
+ def test_format_common
27
+ parser = NCSAParser::Parser.new(:pattern => NCSAParser::Parser::LOG_FORMAT_COMMON)
28
+
29
+ parsed = parser.parse_line(LOG_COMMON)
30
+ assert_equal({
31
+ :host => %{123.123.123.123},
32
+ :ident => %{-},
33
+ :username => %{-},
34
+ :datetime => %{[08/Oct/2012:14:36:07 -0400]},
35
+ :request => %{"GET /path/to/something?foo=bar&hello=world HTTP/1.1"},
36
+ :status => %{200},
37
+ :bytes => %{923},
38
+ :original => LOG_COMMON,
39
+ }, parsed.attributes)
40
+ end
41
+
42
+ def test_format_usertrack
43
+ parser = NCSAParser::Parser.new(:pattern => %w{
44
+ host ident username datetime request status bytes referer ua usertrack
45
+ })
46
+
47
+ parsed = parser.parse_line(LOG_USERTRACK)
48
+
49
+ assert_equal({
50
+ :host => %{123.123.123.123},
51
+ :ident => %{-},
52
+ :username => %{-},
53
+ :datetime => %{[08/Oct/2012:14:36:07 -0400]},
54
+ :request => %{"GET /path/to/something?foo=bar&hello=world HTTP/1.1"},
55
+ :status => %{200},
56
+ :bytes => %{923},
57
+ :referer => %{"http://www.example.com/referer"},
58
+ :ua => %{"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.4 (KHTML, like Gecko) Chrome/22.0.1229.79 Safari/537.4"},
59
+ :usertrack => %{123.123.123.123.1349718542489266},
60
+ :original => LOG_USERTRACK
61
+ }, parsed.attributes)
62
+ end
63
+
64
+ def test_format_deflate
65
+ parser = NCSAParser::Parser.new(:pattern => %w{
66
+ host ident username datetime request status bytes referer ua instream outstream ratio
67
+ })
68
+
69
+ parsed = parser.parse_line(LOG_DEFLATE)
70
+
71
+ assert_equal({
72
+ :host => %{123.123.123.123},
73
+ :ident => %{-},
74
+ :username => %{-},
75
+ :datetime => %{[08/Oct/2012:14:36:07 -0400]},
76
+ :request => %{"GET /path/to/something?foo=bar&hello=world HTTP/1.1"},
77
+ :status => %{200},
78
+ :bytes => %{923},
79
+ :referer => %{"http://www.example.com/referer"},
80
+ :ua => %{"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.4 (KHTML, like Gecko) Chrome/22.0.1229.79 Safari/537.4"},
81
+ :instream => %{905},
82
+ :outstream => %{1976},
83
+ :ratio => %{45%},
84
+ :original => LOG_DEFLATE
85
+ }, parsed.attributes)
86
+ end
87
+
88
+ def test_format_bad
89
+ parser = NCSAParser::Parser.new
90
+
91
+ assert_raises(NCSAParser::BadLogLine) do
92
+ parser.parse_line('what happen')
93
+ end
94
+ end
95
+
96
+ def test_open_file
97
+ log = NCSAParser.open('./test/resources/access_log', :pattern => %w{
98
+ host ident username datetime request status bytes referer ua usertrack instream outstream ratio
99
+ })
100
+
101
+ expect = {
102
+ :host => %{123.123.123.123},
103
+ :ident => %{-},
104
+ :username => %{-},
105
+ :datetime => %{[08/Oct/2012:14:36:07 -0400]},
106
+ :request => %{"GET /path/to/something?foo=bar&hello=world HTTP/1.1"},
107
+ :status => %{200},
108
+ :bytes => %{923},
109
+ :referer => %{"http://www.example.com/referer"},
110
+ :ua => %{"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.4 (KHTML, like Gecko) Chrome/22.0.1229.79 Safari/537.4"},
111
+ :usertrack => %{123.123.123.123.1349718542489266},
112
+ :instream => %{905},
113
+ :outstream => %{1976},
114
+ :ratio => %{45%},
115
+ :original => %{123.123.123.123 - - [08/Oct/2012:14:36:07 -0400] "GET /path/to/something?foo=bar&hello=world HTTP/1.1" 200 923 "http://www.example.com/referer" "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.4 (KHTML, like Gecko) Chrome/22.0.1229.79 Safari/537.4" 123.123.123.123.1349718542489266 905 1976 45%}
116
+ }
117
+
118
+ log.each do |parsed|
119
+ assert_equal(expect, parsed.attributes)
120
+ end
121
+ end
122
+
123
+ def test_token_conversions
124
+ line = %{[08/Oct/2012:14:36:07 -0400] "GET /path/to/something?foo=bar&hello=world HTTP/1.1" "http://www.example.com/referer" 1000 100 10% 123.123.123.123 200 110}
125
+
126
+ parser = NCSAParser::Parser.new(:pattern => %w{
127
+ datetime request referer instream outstream ratio host_proxy status bytes
128
+ })
129
+
130
+ parsed = parser.parse_line(line)
131
+
132
+ assert_equal(DateTime.strptime('[08/Oct/2012:14:36:07 -0400]', '[%d/%b/%Y:%H:%M:%S %Z]'), parsed[:datetime])
133
+ assert_equal(URI.parse('http://www.example.com/path/to/something?foo=bar&hello=world'), parsed[:request_uri])
134
+ assert_equal('/path/to/something', parsed[:request_path])
135
+ assert_equal('GET', parsed[:http_method])
136
+ assert_equal('1.1', parsed[:http_version])
137
+ assert_equal(URI.parse('http://www.example.com/referer'), parsed[:referer_uri])
138
+ assert_equal(0.1, parsed[:ratio])
139
+ assert_equal('123.123.123.123', parsed[:host])
140
+ assert_equal(200, parsed[:status])
141
+ assert_equal(1000, parsed[:instream])
142
+ assert_equal(100, parsed[:outstream])
143
+ assert_equal(110, parsed[:bytes])
144
+ end
145
+
146
+ def test_custom_tokens_and_conversion
147
+ parser = NCSAParser::Parser.new(
148
+ :pattern => 'email',
149
+
150
+ :tokens => {
151
+ :email => '[^@]+@[^@]+'
152
+ },
153
+
154
+ :token_conversions => {
155
+ :email => proc { |match, options|
156
+ URI.parse(match.attributes[:email])
157
+ }
158
+ }
159
+ )
160
+
161
+ parsed = parser.parse_line('test@example.com')
162
+ assert_equal(URI.parse('test@example.com'), parsed[:email])
163
+ end
164
+
165
+ def test_to_hash
166
+ parser = NCSAParser::Parser.new(:pattern => NCSAParser::Parser::LOG_FORMAT_COMMON)
167
+ parsed = parser.parse_line(LOG_COMMON)
168
+
169
+ assert_equal([
170
+ :host, :ident, :username, :datetime, :request, :status, :bytes,
171
+ :original, :request_uri, :request_path, :http_method,
172
+ :http_version, :query_string
173
+ ].sort_by(&:to_s), parsed.to_hash.keys.sort_by(&:to_s))
174
+ end
175
+ end
@@ -0,0 +1,7 @@
1
+ 123.123.123.123 - - [08/Oct/2012:14:36:07 -0400] "GET /path/to/something?foo=bar&hello=world HTTP/1.1" 200 923 "http://www.example.com/referer" "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.4 (KHTML, like Gecko) Chrome/22.0.1229.79 Safari/537.4" 123.123.123.123.1349718542489266 905 1976 45%
2
+ 123.123.123.123 - - [08/Oct/2012:14:36:07 -0400] "GET /path/to/something?foo=bar&hello=world HTTP/1.1" 200 923 "http://www.example.com/referer" "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.4 (KHTML, like Gecko) Chrome/22.0.1229.79 Safari/537.4" 123.123.123.123.1349718542489266 905 1976 45%
3
+ 123.123.123.123 - - [08/Oct/2012:14:36:07 -0400] "GET /path/to/something?foo=bar&hello=world HTTP/1.1" 200 923 "http://www.example.com/referer" "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.4 (KHTML, like Gecko) Chrome/22.0.1229.79 Safari/537.4" 123.123.123.123.1349718542489266 905 1976 45%
4
+ 123.123.123.123 - - [08/Oct/2012:14:36:07 -0400] "GET /path/to/something?foo=bar&hello=world HTTP/1.1" 200 923 "http://www.example.com/referer" "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.4 (KHTML, like Gecko) Chrome/22.0.1229.79 Safari/537.4" 123.123.123.123.1349718542489266 905 1976 45%
5
+ 123.123.123.123 - - [08/Oct/2012:14:36:07 -0400] "GET /path/to/something?foo=bar&hello=world HTTP/1.1" 200 923 "http://www.example.com/referer" "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.4 (KHTML, like Gecko) Chrome/22.0.1229.79 Safari/537.4" 123.123.123.123.1349718542489266 905 1976 45%
6
+ 123.123.123.123 - - [08/Oct/2012:14:36:07 -0400] "GET /path/to/something?foo=bar&hello=world HTTP/1.1" 200 923 "http://www.example.com/referer" "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.4 (KHTML, like Gecko) Chrome/22.0.1229.79 Safari/537.4" 123.123.123.123.1349718542489266 905 1976 45%
7
+ 123.123.123.123 - - [08/Oct/2012:14:36:07 -0400] "GET /path/to/something?foo=bar&hello=world HTTP/1.1" 200 923 "http://www.example.com/referer" "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.4 (KHTML, like Gecko) Chrome/22.0.1229.79 Safari/537.4" 123.123.123.123.1349718542489266 905 1976 45%
@@ -0,0 +1,25 @@
1
+
2
+ require 'rubygems'
3
+ require 'minitest/autorun'
4
+ require 'turn/autorun'
5
+ require File.join(File.dirname(__FILE__), %w{ .. lib ncsa-parser })
6
+
7
+ puts "NCSAParser version #{NCSAParser::VERSION}"
8
+
9
+ module TestHelper
10
+ LOG_COMMON = %{123.123.123.123 - - [08/Oct/2012:14:36:07 -0400] "GET /path/to/something?foo=bar&hello=world HTTP/1.1" 200 923}
11
+
12
+ LOG_COMBINED = %{123.123.123.123 - - [08/Oct/2012:14:36:07 -0400] "GET /path/to/something?foo=bar&hello=world HTTP/1.1" 200 923 "http://www.example.com/referer" "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.4 (KHTML, like Gecko) Chrome/22.0.1229.79 Safari/537.4"}
13
+
14
+ LOG_USERTRACK = %{123.123.123.123 - - [08/Oct/2012:14:36:07 -0400] "GET /path/to/something?foo=bar&hello=world HTTP/1.1" 200 923 "http://www.example.com/referer" "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.4 (KHTML, like Gecko) Chrome/22.0.1229.79 Safari/537.4" 123.123.123.123.1349718542489266}
15
+
16
+ LOG_DEFLATE = %{123.123.123.123 - - [08/Oct/2012:14:36:07 -0400] "GET /path/to/something?foo=bar&hello=world HTTP/1.1" 200 923 "http://www.example.com/referer" "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.4 (KHTML, like Gecko) Chrome/22.0.1229.79 Safari/537.4" 905 1976 45%}
17
+ end
18
+
19
+ if ENV['autotest']
20
+ module Turn::Colorize
21
+ def self.color_supported?
22
+ true
23
+ end
24
+ end
25
+ end
metadata ADDED
@@ -0,0 +1,127 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: ncsa-parser
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.2
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - J Smith
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2012-10-08 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: rdoc
16
+ requirement: !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: '0'
22
+ type: :runtime
23
+ prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ! '>='
28
+ - !ruby/object:Gem::Version
29
+ version: '0'
30
+ - !ruby/object:Gem::Dependency
31
+ name: rake
32
+ requirement: !ruby/object:Gem::Requirement
33
+ none: false
34
+ requirements:
35
+ - - ~>
36
+ - !ruby/object:Gem::Version
37
+ version: '0.9'
38
+ type: :runtime
39
+ prerelease: false
40
+ version_requirements: !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ~>
44
+ - !ruby/object:Gem::Version
45
+ version: '0.9'
46
+ - !ruby/object:Gem::Dependency
47
+ name: minitest
48
+ requirement: !ruby/object:Gem::Requirement
49
+ none: false
50
+ requirements:
51
+ - - ! '>='
52
+ - !ruby/object:Gem::Version
53
+ version: '0'
54
+ type: :runtime
55
+ prerelease: false
56
+ version_requirements: !ruby/object:Gem::Requirement
57
+ none: false
58
+ requirements:
59
+ - - ! '>='
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ - !ruby/object:Gem::Dependency
63
+ name: turn
64
+ requirement: !ruby/object:Gem::Requirement
65
+ none: false
66
+ requirements:
67
+ - - ! '>='
68
+ - !ruby/object:Gem::Version
69
+ version: '0'
70
+ type: :runtime
71
+ prerelease: false
72
+ version_requirements: !ruby/object:Gem::Requirement
73
+ none: false
74
+ requirements:
75
+ - - ! '>='
76
+ - !ruby/object:Gem::Version
77
+ version: '0'
78
+ description: A simple NCSA-style log file parser.
79
+ email: dark.panda@gmail.com
80
+ executables: []
81
+ extensions: []
82
+ extra_rdoc_files:
83
+ - README.rdoc
84
+ files:
85
+ - .gitignore
86
+ - Gemfile
87
+ - MIT-LICENSE
88
+ - README.rdoc
89
+ - Rakefile
90
+ - lib/ncsa-parser.rb
91
+ - lib/ncsa-parser/helper.rb
92
+ - lib/ncsa-parser/log.rb
93
+ - lib/ncsa-parser/parsed_line.rb
94
+ - lib/ncsa-parser/parser.rb
95
+ - lib/ncsa-parser/version.rb
96
+ - ncsa-parser.gemspec
97
+ - test/ncsa_parser_tests.rb
98
+ - test/resources/access_log
99
+ - test/test_helper.rb
100
+ homepage: http://github.com/dark-panda/ncsa-parser
101
+ licenses: []
102
+ post_install_message:
103
+ rdoc_options: []
104
+ require_paths:
105
+ - lib
106
+ required_ruby_version: !ruby/object:Gem::Requirement
107
+ none: false
108
+ requirements:
109
+ - - ! '>='
110
+ - !ruby/object:Gem::Version
111
+ version: '0'
112
+ required_rubygems_version: !ruby/object:Gem::Requirement
113
+ none: false
114
+ requirements:
115
+ - - ! '>='
116
+ - !ruby/object:Gem::Version
117
+ version: '0'
118
+ requirements: []
119
+ rubyforge_project:
120
+ rubygems_version: 1.8.24
121
+ signing_key:
122
+ specification_version: 3
123
+ summary: A simple NCSA-style log file parser.
124
+ test_files:
125
+ - test/ncsa_parser_tests.rb
126
+ - test/resources/access_log
127
+ - test/test_helper.rb