ncsa-parser 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,12 @@
1
+ *.orig
2
+ .*.swp
3
+ .*.swo
4
+ *.tmp
5
+ *.patch
6
+ *.kpf
7
+ *~
8
+ .DS_Store
9
+ Thumbs.db
10
+ /doc
11
+ /pkg
12
+ Gemfile.lock
data/Gemfile ADDED
@@ -0,0 +1,3 @@
1
+ source 'https://rubygems.org'
2
+
3
+ gemspec
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2012 J Smith <dark.panda@#gmail.com>
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,23 @@
1
+
2
+ = NCSA Parser
3
+
4
+ Here's a quick little library for reading NCSA-style web server logs. Quick
5
+ usage:
6
+
7
+ NCSAParser.each_line(File.open('/var/log/httpd/access_log'), :pattern => %w{
8
+ host ident username datetime request
9
+ status bytes referer ua
10
+ outstream instream ratio
11
+ }) do |b|
12
+ puts b.inspect
13
+ end
14
+
15
+ parser = NCSAParser::Parser.new
16
+ parsed = parser.parse_line('...')
17
+
18
+ There are more examples available in the tests.
19
+
20
+ == License
21
+
22
+ This gem is licensed under an MIT-style license. See the +MIT-LICENSE+ file for
23
+ details.
@@ -0,0 +1,37 @@
1
+ # -*- ruby -*-
2
+
3
+ require 'rubygems'
4
+
5
+ gem 'rdoc', '~> 3.12'
6
+
7
+ require 'rubygems/package_task'
8
+ require 'rake/testtask'
9
+ require 'rdoc/task'
10
+ require 'bundler/gem_tasks'
11
+
12
+ if RUBY_VERSION >= '1.9'
13
+ begin
14
+ gem 'psych'
15
+ rescue Exception => e
16
+ # it's okay, fall back on the bundled psych
17
+ end
18
+ end
19
+
20
+ $:.push 'lib'
21
+
22
+ version = NCSAParser::VERSION
23
+
24
+ desc 'Test NCSA parser library'
25
+ Rake::TestTask.new(:test) do |t|
26
+ t.test_files = FileList['test/**/*_tests.rb']
27
+ t.verbose = !!ENV['VERBOSE_TESTS']
28
+ t.warning = !!ENV['WARNINGS']
29
+ end
30
+
31
+ desc 'Build docs'
32
+ Rake::RDocTask.new do |t|
33
+ t.title = "NCSA Parser #{version}"
34
+ t.main = 'README.rdoc'
35
+ t.rdoc_dir = 'doc'
36
+ t.rdoc_files.include('README.rdoc', 'MIT-LICENSE', 'lib/**/*.rb')
37
+ end
@@ -0,0 +1,22 @@
1
+
2
+ require 'ncsa-parser/version'
3
+ require 'ncsa-parser/parser'
4
+ require 'ncsa-parser/helper'
5
+ require 'ncsa-parser/parsed_line'
6
+ require 'ncsa-parser/log'
7
+
8
+ module NCSAParser
9
+ class << self
10
+ # Opens a log file and iterates through the lines.
11
+ def each_line(log, options = {}, &block)
12
+ self.open(log, options).each(&block)
13
+ end
14
+ alias :foreach :each_line
15
+
16
+ # Opens a log file for parsing. This is a convenience method that proxies
17
+ # to NCSAParser::Log.open.
18
+ def open(log, options = {})
19
+ Log.open(log, options)
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,20 @@
1
+
2
+ module NCSAParser
3
+ module Helper
4
+ def self.clean_uri(uri)
5
+ uri.
6
+ gsub(/ /, '+').
7
+ gsub(/\\"/, '%22').
8
+ gsub(/,/, '%2C')
9
+ end
10
+
11
+ def self.deep_symbolize_keys(hash)
12
+ hash.inject({}) do |memo, (key, value)|
13
+ key = key.to_sym if key.respond_to?(:to_sym) rescue :nil
14
+ value = NCSAHelper.deep_symbolize_keys(value) if value.is_a?(Hash)
15
+ memo[key] = value
16
+ memo
17
+ end
18
+ end
19
+ end
20
+ end
@@ -0,0 +1,41 @@
1
+
2
+ module NCSAParser
3
+ class Log
4
+ include Enumerable
5
+
6
+ attr_reader :log, :parser
7
+
8
+ def initialize(log, options = {})
9
+ @log = log
10
+ @parser = Parser.new(options)
11
+ end
12
+
13
+ def self.open(file, options = {})
14
+ file = if file.is_a?(String)
15
+ File.open(file)
16
+ else
17
+ file
18
+ end
19
+
20
+ self.new(file, options)
21
+ end
22
+
23
+ def each
24
+ if block_given?
25
+ self.log.each do |l|
26
+ yield self.parser.parse_line(l)
27
+ end
28
+ else
29
+ self.log.collect do |l|
30
+ self.parser.parse_line(l)
31
+ end
32
+ end
33
+ end
34
+
35
+ def next_line
36
+ self.parser.parse_line(self.log.gets).tap { |parsed|
37
+ yield parsed if block_given?
38
+ }
39
+ end
40
+ end
41
+ end
@@ -0,0 +1,152 @@
1
+
2
+ module NCSAParser
3
+ # NCSAParser::ParsedLine handles some token conversions and the like on the
4
+ # fly after a successful line parse. You can add your own token conversions
5
+ # or override existing ones by passing along a +:token_conversions+ option
6
+ # that contains converters in the same manner as those found in
7
+ # NCSAParser::ParsedLine::TOKEN_CONVERSIONS.
8
+ #
9
+ # To access a parsed value without any sort of token conversion, use the
10
+ # +attributes+ method. The +[]+ method will perform the token conversion
11
+ # on the fly for you.
12
+ #
13
+ # For token converters that handle URIs, the Symbol :bad_uri will be returned
14
+ # if the URI parser fails for whatever reason.
15
+ class ParsedLine
16
+ TOKEN_CONVERSIONS = {
17
+ :datetime => proc { |match, options|
18
+ DateTime.strptime(match.attributes[:datetime], options[:datetime_format])
19
+ },
20
+
21
+ :request_uri => proc { |match, options|
22
+ begin
23
+ request = match.attributes[:request].scan(/^"[A-Z]+ (.+) HTTP\/\d+\.\d+"$/).flatten[0]
24
+ URI.parse("http://#{options[:domain]}#{request}")
25
+ rescue
26
+ :bad_uri
27
+ end if match.attributes[:request]
28
+ },
29
+
30
+ :request_path => proc { |match, options|
31
+ match.attributes[:request].scan(/^"[A-Z]+ ([^?]+)/).flatten[0] rescue nil if match.attributes[:request]
32
+ },
33
+
34
+ :http_method => proc { |match, options|
35
+ match.attributes[:request].scan(/^"([A-Z]+)/).flatten[0] rescue nil if match.attributes[:request]
36
+ },
37
+
38
+ :http_version => proc { |match, options|
39
+ match.attributes[:request].scan(/(\d+\.\d+)"$/).flatten[0] rescue nil if match.attributes[:request]
40
+ },
41
+
42
+ :query_string => proc { |match, options|
43
+ if match[:request_uri]
44
+ if match[:request_uri] && match[:request_uri].query
45
+ CGI.parse(match[:request_uri].query)
46
+ else
47
+ Hash.new
48
+ end
49
+ end
50
+ },
51
+
52
+ :referer_uri => proc { |match, options|
53
+ if match[:referer]
54
+ if match[:referer] != '"-"'
55
+ referer = match[:referer].sub(/^"(.+)"$/, '\1')
56
+ NCSAParser::Helper.clean_uri(referer)
57
+
58
+ begin
59
+ URI.parse(referer)
60
+ rescue
61
+ :bad_uri
62
+ end
63
+ else
64
+ '-'
65
+ end
66
+ end
67
+ },
68
+
69
+ :browscap => proc { |match, options|
70
+ options[:browscap].query(match[:ua].sub(/^"(.+)"$/, '\1')) if options[:browscap]
71
+ },
72
+
73
+ :ratio => proc { |match, options|
74
+ match.attributes[:ratio].to_f / 100 rescue nil if match.attributes[:ratio]
75
+ },
76
+
77
+ :host => proc { |match, options|
78
+ if match.attributes[:host]
79
+ match.attributes[:host]
80
+ elsif match.attributes[:host_proxy]
81
+ match.attributes[:host_proxy].split(',')[0].strip
82
+ end
83
+ }
84
+ }
85
+
86
+ %w{ status instream outstream bytes }.each do |field|
87
+ class_eval(<<-EOF, __FILE__, __LINE__ + 1)
88
+ TOKEN_CONVERSIONS[:#{field}] = proc { |match, options|
89
+ match.attributes[:#{field}].to_i rescue nil if match.attributes[:#{field}]
90
+ }
91
+ EOF
92
+ end
93
+
94
+ attr_reader :attributes
95
+
96
+ def initialize(attributes, options = {})
97
+ @attributes, @options = attributes, options
98
+ @parsed_attributes = {}
99
+
100
+ if options[:browscap] && !options[:browscap].respond_to?(:query)
101
+ raise ArgumentError.new("The :browscap object should respond to the #query method.")
102
+ end
103
+ end
104
+
105
+ # Accesses either an attribute or an attribute that has been passed
106
+ # through a token converter. You can access the raw, unconverted attributes
107
+ # via the +attributes+ method. If a converter fails for whatever reason,
108
+ # a value of +:bad_conversion+ is returned.
109
+ def [](key)
110
+ key = key.to_sym unless key.is_a?(Symbol)
111
+
112
+ if @parsed_attributes.has_key?(key)
113
+ @parsed_attributes[key]
114
+ elsif @options[:token_conversions] && @options[:token_conversions][key]
115
+ @parsed_attributes[key] = @options[:token_conversions][key].call(self, @options)
116
+ elsif TOKEN_CONVERSIONS[key]
117
+ @parsed_attributes[key] = (TOKEN_CONVERSIONS[key].call(self, @options) rescue :bad_conversion)
118
+ else
119
+ @attributes[key]
120
+ end
121
+ end
122
+
123
+ # Gathers up the requested attributes and spits them out into a Hash.
124
+ # The +values+ argument determines what gets inserted into the Hash:
125
+ #
126
+ # * +:all+ - both attributes and parsed attributes. In cases where
127
+ # the values share the same names, the parsed attribute wins out.
128
+ # * +:attributes+ - unparsed attributes only.
129
+ # * +:parsed+ - parsed attributes only.
130
+ #
131
+ # The default value is +:all+. Any +nil+ values are automatically stripped
132
+ # from the Hash.
133
+ def to_hash(values = :all)
134
+ retval = {}
135
+
136
+ if values == :all || values == :attributes
137
+ retval.merge!(@attributes)
138
+ end
139
+
140
+ if values == :all || values == :parsed
141
+ TOKEN_CONVERSIONS.each { |t, v| self[t] }
142
+ retval.merge!(@parsed_attributes)
143
+ end
144
+
145
+ retval.reject! { |k, v|
146
+ v.nil?
147
+ }
148
+
149
+ retval
150
+ end
151
+ end
152
+ end
@@ -0,0 +1,111 @@
1
+
2
+ require 'uri'
3
+ require 'date'
4
+ require 'cgi'
5
+
6
+ module NCSAParser
7
+ class BadLogLine < Exception
8
+ def initialize(line, pattern)
9
+ super("Bad log line. Pattern: |#{pattern.join(' ')}| Line: |#{line}|")
10
+ end
11
+ end
12
+
13
+ # A line parser for a log file. Lines are parsed via Regexps. You can
14
+ # inject new tokens or override existing ones by modifying the passing along
15
+ # a +:tokens+ option and adding the keys to the +:pattern+ option
16
+ # accordingly.
17
+ class Parser
18
+ IP_ADDRESS = '\d+\.\d+\.\d+\.\d+|unknown'
19
+
20
+ TOKENS = {
21
+ :host => "(?:#{IP_ADDRESS}|-|::1)",
22
+ :host_proxy => "(?:#{IP_ADDRESS})(?:,\\s+#{IP_ADDRESS})*|-",
23
+ :ident => '[^\s]+',
24
+ :username => '[^\s]+',
25
+ :datetime => '\[[^\]]+\]',
26
+ :request => '".+"',
27
+ :status => '\d+',
28
+ :bytes => '\d+|-',
29
+ :referer => '".*"',
30
+ :ua => '".*"',
31
+ :usertrack => "(?:#{IP_ADDRESS})[^ ]+|-",
32
+ :outstream => '\d+|-',
33
+ :instream => '\d+|-',
34
+ :ratio => '\d+%|-%'
35
+ }
36
+
37
+ LOG_FORMAT_COMMON = %w{
38
+ host ident username datetime request status bytes
39
+ }
40
+
41
+ LOG_FORMAT_COMBINED = %w{
42
+ host ident username datetime request status bytes referer ua
43
+ }
44
+
45
+ attr_reader :pattern, :matcher, :re
46
+
47
+ # Creates a new Parser object.
48
+ #
49
+ # == Options
50
+ #
51
+ # * +:domain+ - when parsing query strings, use this domain as the URL's
52
+ # domain. The default is +"www.example.com"+.
53
+ # * +:datetime_format+ - sets the datetime format for when tokens are
54
+ # converted in NCSAParser::ParsedLine. The default is +"[%d/%b/%Y:%H:%M:%S %Z]"+.
55
+ # * +:pattern+ - the default log line format to use. The default is
56
+ # +LOG_FORMAT_COMBINED+, which matches the "combined" log format in
57
+ # Apache. The value for +:pattern+ can be either a space-delimited
58
+ # String of token names or an Array of token names.
59
+ # * +:browscap+ - a browser capabilities object to use when sniffing out
60
+ # user agents. This object should be able to respond to the +query+
61
+ # method. Several browscap extensions are available for Ruby, and the
62
+ # the author of this extension's version is called Browscapper and is
63
+ # available at https://github.com/dark-panda/browscapper .
64
+ # * +:token_conversions+ - converters to pass along to the line parser.
65
+ # See NCSAParser::ParsedLine for details.
66
+ # * +:tokens+ - tokens to add to the generated Regexp.
67
+ def initialize(options = {})
68
+ options = {
69
+ :domain => 'www.example.com',
70
+ :datetime_format => '[%d/%b/%Y:%H:%M:%S %Z]',
71
+ :pattern => LOG_FORMAT_COMBINED
72
+ }.merge(options)
73
+
74
+ @options = options
75
+ @pattern = if options[:pattern].is_a?(Array)
76
+ options[:pattern]
77
+ else
78
+ options[:pattern].to_s.split(/\s+/)
79
+ end
80
+
81
+ @re = '^' + @pattern.collect { |tk|
82
+ tk = tk.to_sym
83
+ token = if options[:tokens] && options[:tokens][tk]
84
+ options[:tokens][tk]
85
+ elsif TOKENS[tk]
86
+ TOKENS[tk]
87
+ else
88
+ raise ArgumentError.new("Token :#{tk} not found!")
89
+ end
90
+
91
+ "(#{token})"
92
+ }.join(' ') + '$'
93
+ @matcher = Regexp.new(@re)
94
+ end
95
+
96
+ # Parses a single line and returns an NCSAParser::ParsedLine object.
97
+ def parse_line(line)
98
+ match = Hash.new
99
+ if md = @matcher.match(line)
100
+ @pattern.each_with_index do |k, j|
101
+ match[k.to_sym] = md[j + 1]
102
+ end
103
+ match[:original] = line.strip
104
+ else
105
+ raise BadLogLine.new(line, @options[:pattern])
106
+ end
107
+ ParsedLine.new(match, @options)
108
+ end
109
+ alias :parse :parse_line
110
+ end
111
+ end
@@ -0,0 +1,5 @@
1
+
2
+ module NCSAParser
3
+ VERSION = '0.0.2'
4
+ end
5
+
@@ -0,0 +1,27 @@
1
+ # -*- encoding: utf-8 -*-
2
+
3
+ require File.expand_path('../lib/ncsa-parser/version', __FILE__)
4
+
5
+ Gem::Specification.new do |s|
6
+ s.name = "ncsa-parser"
7
+ s.version = NCSAParser::VERSION
8
+
9
+ s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
10
+ s.authors = ["J Smith"]
11
+ s.description = "A simple NCSA-style log file parser."
12
+ s.summary = s.description
13
+ s.email = "dark.panda@gmail.com"
14
+ s.extra_rdoc_files = [
15
+ "README.rdoc"
16
+ ]
17
+ s.files = `git ls-files`.split($\)
18
+ s.executables = s.files.grep(%r{^bin/}).map { |f| File.basename(f) }
19
+ s.test_files = s.files.grep(%r{^(test|spec|features)/})
20
+ s.homepage = "http://github.com/dark-panda/ncsa-parser"
21
+ s.require_paths = ["lib"]
22
+
23
+ s.add_dependency("rdoc")
24
+ s.add_dependency("rake", ["~> 0.9"])
25
+ s.add_dependency("minitest")
26
+ s.add_dependency("turn")
27
+ end
@@ -0,0 +1,175 @@
1
+
2
+ $: << File.dirname(__FILE__)
3
+ require 'test_helper'
4
+
5
+ class NCSAParserTests < MiniTest::Unit::TestCase
6
+ include TestHelper
7
+
8
+ def test_format_default
9
+ parser = NCSAParser::Parser.new
10
+ parsed = parser.parse_line(LOG_COMBINED)
11
+
12
+ assert_equal({
13
+ :host => %{123.123.123.123},
14
+ :ident => %{-},
15
+ :username => %{-},
16
+ :datetime => %{[08/Oct/2012:14:36:07 -0400]},
17
+ :request => %{"GET /path/to/something?foo=bar&hello=world HTTP/1.1"},
18
+ :status => %{200},
19
+ :bytes => %{923},
20
+ :referer => %{"http://www.example.com/referer"},
21
+ :ua => %{"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.4 (KHTML, like Gecko) Chrome/22.0.1229.79 Safari/537.4"},
22
+ :original => LOG_COMBINED,
23
+ }, parsed.attributes)
24
+ end
25
+
26
+ def test_format_common
27
+ parser = NCSAParser::Parser.new(:pattern => NCSAParser::Parser::LOG_FORMAT_COMMON)
28
+
29
+ parsed = parser.parse_line(LOG_COMMON)
30
+ assert_equal({
31
+ :host => %{123.123.123.123},
32
+ :ident => %{-},
33
+ :username => %{-},
34
+ :datetime => %{[08/Oct/2012:14:36:07 -0400]},
35
+ :request => %{"GET /path/to/something?foo=bar&hello=world HTTP/1.1"},
36
+ :status => %{200},
37
+ :bytes => %{923},
38
+ :original => LOG_COMMON,
39
+ }, parsed.attributes)
40
+ end
41
+
42
+ def test_format_usertrack
43
+ parser = NCSAParser::Parser.new(:pattern => %w{
44
+ host ident username datetime request status bytes referer ua usertrack
45
+ })
46
+
47
+ parsed = parser.parse_line(LOG_USERTRACK)
48
+
49
+ assert_equal({
50
+ :host => %{123.123.123.123},
51
+ :ident => %{-},
52
+ :username => %{-},
53
+ :datetime => %{[08/Oct/2012:14:36:07 -0400]},
54
+ :request => %{"GET /path/to/something?foo=bar&hello=world HTTP/1.1"},
55
+ :status => %{200},
56
+ :bytes => %{923},
57
+ :referer => %{"http://www.example.com/referer"},
58
+ :ua => %{"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.4 (KHTML, like Gecko) Chrome/22.0.1229.79 Safari/537.4"},
59
+ :usertrack => %{123.123.123.123.1349718542489266},
60
+ :original => LOG_USERTRACK
61
+ }, parsed.attributes)
62
+ end
63
+
64
+ def test_format_deflate
65
+ parser = NCSAParser::Parser.new(:pattern => %w{
66
+ host ident username datetime request status bytes referer ua instream outstream ratio
67
+ })
68
+
69
+ parsed = parser.parse_line(LOG_DEFLATE)
70
+
71
+ assert_equal({
72
+ :host => %{123.123.123.123},
73
+ :ident => %{-},
74
+ :username => %{-},
75
+ :datetime => %{[08/Oct/2012:14:36:07 -0400]},
76
+ :request => %{"GET /path/to/something?foo=bar&hello=world HTTP/1.1"},
77
+ :status => %{200},
78
+ :bytes => %{923},
79
+ :referer => %{"http://www.example.com/referer"},
80
+ :ua => %{"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.4 (KHTML, like Gecko) Chrome/22.0.1229.79 Safari/537.4"},
81
+ :instream => %{905},
82
+ :outstream => %{1976},
83
+ :ratio => %{45%},
84
+ :original => LOG_DEFLATE
85
+ }, parsed.attributes)
86
+ end
87
+
88
+ def test_format_bad
89
+ parser = NCSAParser::Parser.new
90
+
91
+ assert_raises(NCSAParser::BadLogLine) do
92
+ parser.parse_line('what happen')
93
+ end
94
+ end
95
+
96
+ def test_open_file
97
+ log = NCSAParser.open('./test/resources/access_log', :pattern => %w{
98
+ host ident username datetime request status bytes referer ua usertrack instream outstream ratio
99
+ })
100
+
101
+ expect = {
102
+ :host => %{123.123.123.123},
103
+ :ident => %{-},
104
+ :username => %{-},
105
+ :datetime => %{[08/Oct/2012:14:36:07 -0400]},
106
+ :request => %{"GET /path/to/something?foo=bar&hello=world HTTP/1.1"},
107
+ :status => %{200},
108
+ :bytes => %{923},
109
+ :referer => %{"http://www.example.com/referer"},
110
+ :ua => %{"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.4 (KHTML, like Gecko) Chrome/22.0.1229.79 Safari/537.4"},
111
+ :usertrack => %{123.123.123.123.1349718542489266},
112
+ :instream => %{905},
113
+ :outstream => %{1976},
114
+ :ratio => %{45%},
115
+ :original => %{123.123.123.123 - - [08/Oct/2012:14:36:07 -0400] "GET /path/to/something?foo=bar&hello=world HTTP/1.1" 200 923 "http://www.example.com/referer" "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.4 (KHTML, like Gecko) Chrome/22.0.1229.79 Safari/537.4" 123.123.123.123.1349718542489266 905 1976 45%}
116
+ }
117
+
118
+ log.each do |parsed|
119
+ assert_equal(expect, parsed.attributes)
120
+ end
121
+ end
122
+
123
+ def test_token_conversions
124
+ line = %{[08/Oct/2012:14:36:07 -0400] "GET /path/to/something?foo=bar&hello=world HTTP/1.1" "http://www.example.com/referer" 1000 100 10% 123.123.123.123 200 110}
125
+
126
+ parser = NCSAParser::Parser.new(:pattern => %w{
127
+ datetime request referer instream outstream ratio host_proxy status bytes
128
+ })
129
+
130
+ parsed = parser.parse_line(line)
131
+
132
+ assert_equal(DateTime.strptime('[08/Oct/2012:14:36:07 -0400]', '[%d/%b/%Y:%H:%M:%S %Z]'), parsed[:datetime])
133
+ assert_equal(URI.parse('http://www.example.com/path/to/something?foo=bar&hello=world'), parsed[:request_uri])
134
+ assert_equal('/path/to/something', parsed[:request_path])
135
+ assert_equal('GET', parsed[:http_method])
136
+ assert_equal('1.1', parsed[:http_version])
137
+ assert_equal(URI.parse('http://www.example.com/referer'), parsed[:referer_uri])
138
+ assert_equal(0.1, parsed[:ratio])
139
+ assert_equal('123.123.123.123', parsed[:host])
140
+ assert_equal(200, parsed[:status])
141
+ assert_equal(1000, parsed[:instream])
142
+ assert_equal(100, parsed[:outstream])
143
+ assert_equal(110, parsed[:bytes])
144
+ end
145
+
146
+ def test_custom_tokens_and_conversion
147
+ parser = NCSAParser::Parser.new(
148
+ :pattern => 'email',
149
+
150
+ :tokens => {
151
+ :email => '[^@]+@[^@]+'
152
+ },
153
+
154
+ :token_conversions => {
155
+ :email => proc { |match, options|
156
+ URI.parse(match.attributes[:email])
157
+ }
158
+ }
159
+ )
160
+
161
+ parsed = parser.parse_line('test@example.com')
162
+ assert_equal(URI.parse('test@example.com'), parsed[:email])
163
+ end
164
+
165
+ def test_to_hash
166
+ parser = NCSAParser::Parser.new(:pattern => NCSAParser::Parser::LOG_FORMAT_COMMON)
167
+ parsed = parser.parse_line(LOG_COMMON)
168
+
169
+ assert_equal([
170
+ :host, :ident, :username, :datetime, :request, :status, :bytes,
171
+ :original, :request_uri, :request_path, :http_method,
172
+ :http_version, :query_string
173
+ ].sort_by(&:to_s), parsed.to_hash.keys.sort_by(&:to_s))
174
+ end
175
+ end
@@ -0,0 +1,7 @@
1
+ 123.123.123.123 - - [08/Oct/2012:14:36:07 -0400] "GET /path/to/something?foo=bar&hello=world HTTP/1.1" 200 923 "http://www.example.com/referer" "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.4 (KHTML, like Gecko) Chrome/22.0.1229.79 Safari/537.4" 123.123.123.123.1349718542489266 905 1976 45%
2
+ 123.123.123.123 - - [08/Oct/2012:14:36:07 -0400] "GET /path/to/something?foo=bar&hello=world HTTP/1.1" 200 923 "http://www.example.com/referer" "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.4 (KHTML, like Gecko) Chrome/22.0.1229.79 Safari/537.4" 123.123.123.123.1349718542489266 905 1976 45%
3
+ 123.123.123.123 - - [08/Oct/2012:14:36:07 -0400] "GET /path/to/something?foo=bar&hello=world HTTP/1.1" 200 923 "http://www.example.com/referer" "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.4 (KHTML, like Gecko) Chrome/22.0.1229.79 Safari/537.4" 123.123.123.123.1349718542489266 905 1976 45%
4
+ 123.123.123.123 - - [08/Oct/2012:14:36:07 -0400] "GET /path/to/something?foo=bar&hello=world HTTP/1.1" 200 923 "http://www.example.com/referer" "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.4 (KHTML, like Gecko) Chrome/22.0.1229.79 Safari/537.4" 123.123.123.123.1349718542489266 905 1976 45%
5
+ 123.123.123.123 - - [08/Oct/2012:14:36:07 -0400] "GET /path/to/something?foo=bar&hello=world HTTP/1.1" 200 923 "http://www.example.com/referer" "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.4 (KHTML, like Gecko) Chrome/22.0.1229.79 Safari/537.4" 123.123.123.123.1349718542489266 905 1976 45%
6
+ 123.123.123.123 - - [08/Oct/2012:14:36:07 -0400] "GET /path/to/something?foo=bar&hello=world HTTP/1.1" 200 923 "http://www.example.com/referer" "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.4 (KHTML, like Gecko) Chrome/22.0.1229.79 Safari/537.4" 123.123.123.123.1349718542489266 905 1976 45%
7
+ 123.123.123.123 - - [08/Oct/2012:14:36:07 -0400] "GET /path/to/something?foo=bar&hello=world HTTP/1.1" 200 923 "http://www.example.com/referer" "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.4 (KHTML, like Gecko) Chrome/22.0.1229.79 Safari/537.4" 123.123.123.123.1349718542489266 905 1976 45%
@@ -0,0 +1,25 @@
1
+
2
+ require 'rubygems'
3
+ require 'minitest/autorun'
4
+ require 'turn/autorun'
5
+ require File.join(File.dirname(__FILE__), %w{ .. lib ncsa-parser })
6
+
7
+ puts "NCSAParser version #{NCSAParser::VERSION}"
8
+
9
+ module TestHelper
10
+ LOG_COMMON = %{123.123.123.123 - - [08/Oct/2012:14:36:07 -0400] "GET /path/to/something?foo=bar&hello=world HTTP/1.1" 200 923}
11
+
12
+ LOG_COMBINED = %{123.123.123.123 - - [08/Oct/2012:14:36:07 -0400] "GET /path/to/something?foo=bar&hello=world HTTP/1.1" 200 923 "http://www.example.com/referer" "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.4 (KHTML, like Gecko) Chrome/22.0.1229.79 Safari/537.4"}
13
+
14
+ LOG_USERTRACK = %{123.123.123.123 - - [08/Oct/2012:14:36:07 -0400] "GET /path/to/something?foo=bar&hello=world HTTP/1.1" 200 923 "http://www.example.com/referer" "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.4 (KHTML, like Gecko) Chrome/22.0.1229.79 Safari/537.4" 123.123.123.123.1349718542489266}
15
+
16
+ LOG_DEFLATE = %{123.123.123.123 - - [08/Oct/2012:14:36:07 -0400] "GET /path/to/something?foo=bar&hello=world HTTP/1.1" 200 923 "http://www.example.com/referer" "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.4 (KHTML, like Gecko) Chrome/22.0.1229.79 Safari/537.4" 905 1976 45%}
17
+ end
18
+
19
+ if ENV['autotest']
20
+ module Turn::Colorize
21
+ def self.color_supported?
22
+ true
23
+ end
24
+ end
25
+ end
metadata ADDED
@@ -0,0 +1,127 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: ncsa-parser
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.2
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - J Smith
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2012-10-08 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: rdoc
16
+ requirement: !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: '0'
22
+ type: :runtime
23
+ prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ! '>='
28
+ - !ruby/object:Gem::Version
29
+ version: '0'
30
+ - !ruby/object:Gem::Dependency
31
+ name: rake
32
+ requirement: !ruby/object:Gem::Requirement
33
+ none: false
34
+ requirements:
35
+ - - ~>
36
+ - !ruby/object:Gem::Version
37
+ version: '0.9'
38
+ type: :runtime
39
+ prerelease: false
40
+ version_requirements: !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ~>
44
+ - !ruby/object:Gem::Version
45
+ version: '0.9'
46
+ - !ruby/object:Gem::Dependency
47
+ name: minitest
48
+ requirement: !ruby/object:Gem::Requirement
49
+ none: false
50
+ requirements:
51
+ - - ! '>='
52
+ - !ruby/object:Gem::Version
53
+ version: '0'
54
+ type: :runtime
55
+ prerelease: false
56
+ version_requirements: !ruby/object:Gem::Requirement
57
+ none: false
58
+ requirements:
59
+ - - ! '>='
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ - !ruby/object:Gem::Dependency
63
+ name: turn
64
+ requirement: !ruby/object:Gem::Requirement
65
+ none: false
66
+ requirements:
67
+ - - ! '>='
68
+ - !ruby/object:Gem::Version
69
+ version: '0'
70
+ type: :runtime
71
+ prerelease: false
72
+ version_requirements: !ruby/object:Gem::Requirement
73
+ none: false
74
+ requirements:
75
+ - - ! '>='
76
+ - !ruby/object:Gem::Version
77
+ version: '0'
78
+ description: A simple NCSA-style log file parser.
79
+ email: dark.panda@gmail.com
80
+ executables: []
81
+ extensions: []
82
+ extra_rdoc_files:
83
+ - README.rdoc
84
+ files:
85
+ - .gitignore
86
+ - Gemfile
87
+ - MIT-LICENSE
88
+ - README.rdoc
89
+ - Rakefile
90
+ - lib/ncsa-parser.rb
91
+ - lib/ncsa-parser/helper.rb
92
+ - lib/ncsa-parser/log.rb
93
+ - lib/ncsa-parser/parsed_line.rb
94
+ - lib/ncsa-parser/parser.rb
95
+ - lib/ncsa-parser/version.rb
96
+ - ncsa-parser.gemspec
97
+ - test/ncsa_parser_tests.rb
98
+ - test/resources/access_log
99
+ - test/test_helper.rb
100
+ homepage: http://github.com/dark-panda/ncsa-parser
101
+ licenses: []
102
+ post_install_message:
103
+ rdoc_options: []
104
+ require_paths:
105
+ - lib
106
+ required_ruby_version: !ruby/object:Gem::Requirement
107
+ none: false
108
+ requirements:
109
+ - - ! '>='
110
+ - !ruby/object:Gem::Version
111
+ version: '0'
112
+ required_rubygems_version: !ruby/object:Gem::Requirement
113
+ none: false
114
+ requirements:
115
+ - - ! '>='
116
+ - !ruby/object:Gem::Version
117
+ version: '0'
118
+ requirements: []
119
+ rubyforge_project:
120
+ rubygems_version: 1.8.24
121
+ signing_key:
122
+ specification_version: 3
123
+ summary: A simple NCSA-style log file parser.
124
+ test_files:
125
+ - test/ncsa_parser_tests.rb
126
+ - test/resources/access_log
127
+ - test/test_helper.rb