apache_log-parser 2.0.2 → 3.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +9 -16
- data/apache_log-parser.gemspec +3 -3
- data/benchmark.rb +18 -0
- data/lib/apache_log/parser.rb +35 -35
- data/lib/apache_log/parser/version.rb +2 -2
- data/spec/apache_log/parser_spec.rb +17 -11
- metadata +7 -6
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 338af4fdaec6a5a65bf000f5dc549f73ba1db221
|
4
|
+
data.tar.gz: b5366447d647a4da67dbce66468730fa993fbd15
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: fe677bb02e77958f076a1b68a1b6cf8f5c619c4b7f7c551e6e8ef9b7496617a13b7b25ff268881d0f6f9f3a3f978c2ee6165500a2477b507d8cb91d5eba79c13
|
7
|
+
data.tar.gz: 2ee1ab1f75310b24e9ad8cdcfabcaa69607e76ce83534301519cfba81640ef3ebe80be56a35b7efde54f2b71c3d599a0c0eb443c341d670769b58551dfabf48a
|
data/README.md
CHANGED
@@ -1,18 +1,8 @@
|
|
1
|
-
# ApacheLog::Parser
|
2
|
-
[![Build Status](https://travis-ci.org/takady/apache_log-parser.svg?branch=master)](https://travis-ci.org/takady/apache_log-parser) [![Code Climate](https://codeclimate.com/github/takady/apache_log-parser/badges/gpa.svg)](https://codeclimate.com/github/takady/apache_log-parser)
|
3
|
-
Gem to parse apache log including common, combined and customized format.
|
1
|
+
# ApacheLog::Parser [![Build Status](https://travis-ci.org/takady/apache_log-parser.svg?branch=master)](https://travis-ci.org/takady/apache_log-parser) [![Code Climate](https://codeclimate.com/github/takady/apache_log-parser/badges/gpa.svg)](https://codeclimate.com/github/takady/apache_log-parser)
|
4
2
|
|
5
|
-
|
6
|
-
|
7
|
-
Add this line to your application's Gemfile:
|
8
|
-
|
9
|
-
gem 'apache_log-parser'
|
3
|
+
Parse apache log including common, combined and customized format
|
10
4
|
|
11
|
-
|
12
|
-
|
13
|
-
$ bundle
|
14
|
-
|
15
|
-
Or install it yourself as:
|
5
|
+
## Installation
|
16
6
|
|
17
7
|
$ gem install apache_log-parser
|
18
8
|
|
@@ -22,19 +12,22 @@ Or install it yourself as:
|
|
22
12
|
require 'apache_log/parser'
|
23
13
|
|
24
14
|
# common format
|
25
|
-
|
15
|
+
parser = ApacheLog::Parser.new('common')
|
16
|
+
common_log = parser.parse(log_line)
|
26
17
|
common_log[:remote_host] #=> remote host
|
27
18
|
common_log[:datetime] #=> datetime
|
28
19
|
common_log[:request] #=> request
|
29
20
|
|
30
21
|
# combined format
|
31
|
-
|
22
|
+
parser = ApacheLog::Parser.new('combined')
|
23
|
+
combined_log = parser.parse(log_line)
|
32
24
|
combined_log[:referer] #=> referer
|
33
25
|
combined_log[:user_agent] #=> user_agent
|
34
26
|
|
35
27
|
# custom format(additional fields after 'combined')
|
36
28
|
# e.g. "%h %l %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-Agent}i\" \"%v\" \"%{cookie}n\" %D"
|
37
|
-
|
29
|
+
parser = ApacheLog::Parser.new('combined', %w(vhost usertrack request_duration))
|
30
|
+
custom_log = parser.parse(log_line)
|
38
31
|
custom_log[:user_agent] #=> user_agent
|
39
32
|
custom_log[:vhost] #=> vhost
|
40
33
|
custom_log[:usertrack] #=> usertrack
|
data/apache_log-parser.gemspec
CHANGED
@@ -7,9 +7,9 @@ Gem::Specification.new do |spec|
|
|
7
7
|
spec.name = "apache_log-parser"
|
8
8
|
spec.version = ApacheLog::Parser::VERSION
|
9
9
|
spec.authors = ["Yuichi Takada"]
|
10
|
-
spec.email = ["
|
11
|
-
spec.summary = "
|
12
|
-
spec.description =
|
10
|
+
spec.email = ["takadyuichi@gmail.com"]
|
11
|
+
spec.summary = "Parse apache log including common, combined and customized format"
|
12
|
+
spec.description = spec.summary
|
13
13
|
spec.homepage = "https://github.com/takady/apache_log-parser"
|
14
14
|
spec.license = "MIT"
|
15
15
|
|
data/benchmark.rb
ADDED
@@ -0,0 +1,18 @@
|
|
1
|
+
$LOAD_PATH.unshift File.expand_path('../lib', __FILE__)
|
2
|
+
require 'apache_log/parser'
|
3
|
+
require 'benchmark'
|
4
|
+
|
5
|
+
common_line = '127.0.0.1 - - [20/May/2014:20:04:04 +0900] "GET /test/indx.html HTTP/1.1" 200 4576'
|
6
|
+
combined_line = '104.24.160.39 - - [07/Jun/2014:14:58:55 +0900] "GET /category/electronics HTTP/1.1" 200 128 "-" "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.6; rv:9.0.1) Gecko/20100101 Firefox/9.0.1"'
|
7
|
+
customized_line = '192.168.0.1 - - [07/Feb/2011:10:59:59 +0900] "GET /x/i.cgi/net/0000/ HTTP/1.1" 200 9891 "-" "DoCoMo/2.0 P03B(c500;TB;W24H16)" virtualhost.example.jp "192.0.2.16794832933550" "09011112222333_xx.ezweb.ne.jp" 533593'
|
8
|
+
|
9
|
+
common_parser = ApacheLog::Parser.new('common')
|
10
|
+
combined_parser = ApacheLog::Parser.new('combined')
|
11
|
+
customized_parser = ApacheLog::Parser.new('combined', %w(vhost usertrack mobileid request_duration))
|
12
|
+
|
13
|
+
n = 1_000_000
|
14
|
+
Benchmark.bm(12) do |x|
|
15
|
+
x.report('common:') { (1..n).each{common_parser.parse(common_line)} }
|
16
|
+
x.report('combined:') { (1..n).each{combined_parser.parse(combined_line)} }
|
17
|
+
x.report('customized:') { (1..n).each{customized_parser.parse(customized_line)} }
|
18
|
+
end
|
data/lib/apache_log/parser.rb
CHANGED
@@ -2,9 +2,8 @@ require 'apache_log/parser/version'
|
|
2
2
|
require 'date'
|
3
3
|
|
4
4
|
module ApacheLog
|
5
|
-
|
6
|
-
|
7
|
-
def self.parse(line, format, additional_fields=[])
|
5
|
+
class Parser
|
6
|
+
def initialize(format, additional_fields=[])
|
8
7
|
common_fields = %w(remote_host identity_check user datetime request status size)
|
9
8
|
combined_fields = common_fields + %w(referer user_agent)
|
10
9
|
|
@@ -18,52 +17,53 @@ module ApacheLog
|
|
18
17
|
|
19
18
|
case format
|
20
19
|
when 'common'
|
21
|
-
fields = common_fields + additional_fields
|
22
|
-
pattern = /^#{common_pattern}#{additional_pattern}$/
|
20
|
+
@fields = common_fields + additional_fields
|
21
|
+
@pattern = /^#{common_pattern}#{additional_pattern}$/
|
23
22
|
when 'combined'
|
24
|
-
fields = combined_fields + additional_fields
|
25
|
-
pattern = /^#{combined_pattern}#{additional_pattern}$/
|
23
|
+
@fields = combined_fields + additional_fields
|
24
|
+
@pattern = /^#{combined_pattern}#{additional_pattern}$/
|
26
25
|
else
|
27
26
|
raise "format error\n no such format: <#{format}> \n"
|
28
27
|
end
|
28
|
+
end
|
29
29
|
|
30
|
-
|
30
|
+
def parse(line)
|
31
|
+
matched = @pattern.match(line)
|
31
32
|
raise "parse error\n at line: <#{line}> \n" if matched.nil?
|
32
|
-
|
33
|
-
generate_hash(fields, matched.to_a)
|
33
|
+
generate_hash(@fields, matched.to_a)
|
34
34
|
end
|
35
35
|
|
36
36
|
private
|
37
|
-
def self.generate_hash(keys, values)
|
38
|
-
hash = {}
|
39
37
|
|
40
|
-
|
41
|
-
|
42
|
-
case key
|
43
|
-
when :datetime
|
44
|
-
hash[key] = to_datetime(values[idx+1])
|
45
|
-
when :request
|
46
|
-
hash[key] = parse_request(values[idx+1])
|
47
|
-
else
|
48
|
-
hash[key] = values[idx+1]
|
49
|
-
end
|
50
|
-
end
|
38
|
+
def generate_hash(keys, values)
|
39
|
+
hash = {}
|
51
40
|
|
52
|
-
|
41
|
+
keys.each.with_index(1) do |key, idx|
|
42
|
+
key = key.to_sym
|
43
|
+
case key
|
44
|
+
when :datetime
|
45
|
+
hash[key] = to_datetime(values[idx])
|
46
|
+
when :request
|
47
|
+
hash[key] = parse_request(values[idx])
|
48
|
+
else
|
49
|
+
hash[key] = values[idx]
|
50
|
+
end
|
53
51
|
end
|
54
52
|
|
55
|
-
|
56
|
-
|
57
|
-
end
|
53
|
+
hash
|
54
|
+
end
|
58
55
|
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
method: method,
|
63
|
-
path: path,
|
64
|
-
protocol: protocol,
|
65
|
-
}
|
66
|
-
end
|
56
|
+
def to_datetime(str)
|
57
|
+
DateTime.strptime(str, '%d/%b/%Y:%T %z')
|
58
|
+
end
|
67
59
|
|
60
|
+
def parse_request(str)
|
61
|
+
method, path, protocol = str.split
|
62
|
+
{
|
63
|
+
method: method,
|
64
|
+
path: path,
|
65
|
+
protocol: protocol,
|
66
|
+
}
|
67
|
+
end
|
68
68
|
end
|
69
69
|
end
|
@@ -3,15 +3,14 @@ require 'date'
|
|
3
3
|
|
4
4
|
describe ApacheLog::Parser do
|
5
5
|
|
6
|
-
before { @parser = ApacheLog::Parser }
|
7
|
-
|
8
6
|
it 'has a version number' do
|
9
|
-
expect(
|
7
|
+
expect(ApacheLog::Parser::VERSION).not_to be nil
|
10
8
|
end
|
11
9
|
|
12
10
|
it 'can parse common format log' do
|
13
11
|
line = '127.0.0.1 - - [20/May/2014:20:04:04 +0900] "GET /test/indx.html HTTP/1.1" 200 4576'
|
14
|
-
|
12
|
+
parser = ApacheLog::Parser.new('common')
|
13
|
+
entity = parser.parse(line.chomp)
|
15
14
|
expect = {remote_host: '127.0.0.1', identity_check: '-', user: '-', datetime: DateTime.new(2014, 5, 20, 20, 04, 04, 0.375),
|
16
15
|
request: {method: 'GET', path: '/test/indx.html', protocol: 'HTTP/1.1'}, status: '200', size: '4576'}
|
17
16
|
expect(entity).to eq(expect)
|
@@ -19,7 +18,8 @@ describe ApacheLog::Parser do
|
|
19
18
|
|
20
19
|
it 'can parse tab separated common format log' do
|
21
20
|
line = "192.168.0.1\t-\t-\t[07/Feb/2011:10:59:59 +0900]\t\"GET /x/i.cgi/net/0000/ HTTP/1.1\"\t200\t9891";
|
22
|
-
|
21
|
+
parser = ApacheLog::Parser.new('common')
|
22
|
+
entity = parser.parse(line.chomp)
|
23
23
|
expect = {remote_host: '192.168.0.1', identity_check: '-', user: '-', datetime: DateTime.new(2011, 2, 07, 10, 59, 59, 0.375),
|
24
24
|
request: {method: 'GET', path: '/x/i.cgi/net/0000/', protocol: 'HTTP/1.1'}, status: '200', size: '9891'}
|
25
25
|
expect(entity).to eq(expect)
|
@@ -27,7 +27,8 @@ describe ApacheLog::Parser do
|
|
27
27
|
|
28
28
|
it 'can parse combined format log' do
|
29
29
|
line = '104.24.160.39 - - [07/Jun/2014:14:58:55 +0900] "GET /category/electronics HTTP/1.1" 200 128 "-" "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.6; rv:9.0.1) Gecko/20100101 Firefox/9.0.1"'
|
30
|
-
|
30
|
+
parser = ApacheLog::Parser.new('combined')
|
31
|
+
entity = parser.parse(line.chomp)
|
31
32
|
expect = {remote_host: '104.24.160.39', identity_check: '-', user: '-', datetime: DateTime.new(2014, 6, 7, 14, 58, 55, 0.375),
|
32
33
|
request: {method: 'GET', path: '/category/electronics', protocol: 'HTTP/1.1'}, status: '200', size: '128', referer: '-',
|
33
34
|
user_agent: 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.6; rv:9.0.1) Gecko/20100101 Firefox/9.0.1'}
|
@@ -36,7 +37,8 @@ describe ApacheLog::Parser do
|
|
36
37
|
|
37
38
|
it 'can parse combined format log' do
|
38
39
|
line = '192.168.0.1 - - [07/Feb/2011:10:59:59 +0900] "GET /x/i.cgi/net/0000/ HTTP/1.1" 200 9891 "-" "DoCoMo/2.0 P03B(c500;TB;W24H16)"';
|
39
|
-
|
40
|
+
parser = ApacheLog::Parser.new('combined')
|
41
|
+
entity = parser.parse(line.chomp)
|
40
42
|
expect = {remote_host: '192.168.0.1', identity_check: '-', user:'-', datetime: DateTime.new(2011, 2, 7, 10, 59, 59, 0.375),
|
41
43
|
request: {method: 'GET', path: '/x/i.cgi/net/0000/', protocol: 'HTTP/1.1'}, status: '200', size: '9891', referer: '-',
|
42
44
|
user_agent: 'DoCoMo/2.0 P03B(c500;TB;W24H16)'}
|
@@ -45,7 +47,8 @@ describe ApacheLog::Parser do
|
|
45
47
|
|
46
48
|
it 'can parse tab separated combined format log' do
|
47
49
|
line = "203.0.113.254\t-\t-\t[07/Feb/2011:10:59:59 +0900]\t\"GET /x/i.cgi/movie/0001/-0002 HTTP/1.1\"\t200\t14462\t\"-\"\t\"DoCoMo/2.0 F08A3(c500;TB;W30H20)\"";
|
48
|
-
|
50
|
+
parser = ApacheLog::Parser.new('combined')
|
51
|
+
entity = parser.parse(line.chomp)
|
49
52
|
expect = {remote_host: '203.0.113.254', identity_check: '-', user:'-', datetime: DateTime.new(2011, 2, 7, 10, 59, 59, 0.375),
|
50
53
|
request: {method: 'GET', path: '/x/i.cgi/movie/0001/-0002', protocol: 'HTTP/1.1'}, status: '200', size: '14462', referer: '-',
|
51
54
|
user_agent: 'DoCoMo/2.0 F08A3(c500;TB;W30H20)'}
|
@@ -54,7 +57,8 @@ describe ApacheLog::Parser do
|
|
54
57
|
|
55
58
|
it 'can parse custom format log based on combined format' do
|
56
59
|
line = '104.24.160.39 - - [07/Jun/2014:14:58:55 +0900] "GET /category/electronics HTTP/1.1" 200 128 "-" "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.6; rv:9.0.1) Gecko/20100101 Firefox/9.0.1" "example.com" "192.168.0.1201102091208001" "901"'
|
57
|
-
|
60
|
+
parser = ApacheLog::Parser.new('combined', %w(vhost usertrack request_duration))
|
61
|
+
entity = parser.parse(line.chomp)
|
58
62
|
expect = {remote_host: '104.24.160.39', identity_check: '-', user: '-', datetime: DateTime.new(2014, 6, 7, 14, 58, 55, 0.375),
|
59
63
|
request: {method: 'GET', path: '/category/electronics', protocol: 'HTTP/1.1'}, status: '200', size: '128', referer: '-',
|
60
64
|
user_agent: 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.6; rv:9.0.1) Gecko/20100101 Firefox/9.0.1', vhost: 'example.com',
|
@@ -64,7 +68,8 @@ describe ApacheLog::Parser do
|
|
64
68
|
|
65
69
|
it 'can parse custom format log based on combined format' do
|
66
70
|
line = '192.168.0.1 - - [07/Feb/2011:10:59:59 +0900] "GET /x/i.cgi/net/0000/ HTTP/1.1" 200 9891 "-" "DoCoMo/2.0 P03B(c500;TB;W24H16)" virtualhost.example.jp "192.0.2.16794832933550" "09011112222333_xx.ezweb.ne.jp" 533593';
|
67
|
-
|
71
|
+
parser = ApacheLog::Parser.new('combined', %w(vhost usertrack mobileid request_duration))
|
72
|
+
entity = parser.parse(line.chomp)
|
68
73
|
expect = {remote_host: '192.168.0.1', identity_check: '-', user: '-', datetime: DateTime.new(2011, 2, 7, 10, 59, 59, 0.375),
|
69
74
|
request: {method: 'GET', path: '/x/i.cgi/net/0000/', protocol: 'HTTP/1.1'}, status: '200', size: '9891', referer: '-',
|
70
75
|
user_agent: 'DoCoMo/2.0 P03B(c500;TB;W24H16)', vhost: 'virtualhost.example.jp',
|
@@ -74,7 +79,8 @@ describe ApacheLog::Parser do
|
|
74
79
|
|
75
80
|
it 'can parse tab separated custom format log based on combined format' do
|
76
81
|
line = "203.0.113.254\t-\t-\t[07/Feb/2011:10:59:59 +0900]\t\"GET /x/i.cgi/movie/0001/-0002 HTTP/1.1\"\t200\t14462\t\"http://headlines.yahoo.co.jp/hl\"\t\"DoCoMo/2.0 F08A3(c500;TB;W30H20)\"\t\"virtualhost.example.jp\"\t\"192.0.2.16794832933550\"\t\"09011112222333_xx.ezweb.ne.jp\"\t533593";
|
77
|
-
|
82
|
+
parser = ApacheLog::Parser.new('combined', %w(vhost usertrack mobileid request_duration))
|
83
|
+
entity = parser.parse(line.chomp)
|
78
84
|
expect = {remote_host: '203.0.113.254', identity_check: '-', user: '-', datetime: DateTime.new(2011, 2, 7, 10, 59, 59, 0.375),
|
79
85
|
request: {method: 'GET', path: '/x/i.cgi/movie/0001/-0002', protocol: 'HTTP/1.1'}, status: '200', size: '14462', referer: 'http://headlines.yahoo.co.jp/hl',
|
80
86
|
user_agent: 'DoCoMo/2.0 F08A3(c500;TB;W30H20)', vhost: 'virtualhost.example.jp',
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: apache_log-parser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 3.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Yuichi Takada
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2015-02-11 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -52,9 +52,9 @@ dependencies:
|
|
52
52
|
- - ">="
|
53
53
|
- !ruby/object:Gem::Version
|
54
54
|
version: '0'
|
55
|
-
description:
|
55
|
+
description: Parse apache log including common, combined and customized format
|
56
56
|
email:
|
57
|
-
-
|
57
|
+
- takadyuichi@gmail.com
|
58
58
|
executables: []
|
59
59
|
extensions: []
|
60
60
|
extra_rdoc_files: []
|
@@ -66,6 +66,7 @@ files:
|
|
66
66
|
- README.md
|
67
67
|
- Rakefile
|
68
68
|
- apache_log-parser.gemspec
|
69
|
+
- benchmark.rb
|
69
70
|
- lib/apache_log/parser.rb
|
70
71
|
- lib/apache_log/parser/version.rb
|
71
72
|
- spec/apache_log/parser_spec.rb
|
@@ -90,10 +91,10 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
90
91
|
version: '0'
|
91
92
|
requirements: []
|
92
93
|
rubyforge_project:
|
93
|
-
rubygems_version: 2.
|
94
|
+
rubygems_version: 2.4.5
|
94
95
|
signing_key:
|
95
96
|
specification_version: 4
|
96
|
-
summary:
|
97
|
+
summary: Parse apache log including common, combined and customized format
|
97
98
|
test_files:
|
98
99
|
- spec/apache_log/parser_spec.rb
|
99
100
|
- spec/spec_helper.rb
|