apache_log-parser 2.0.2 → 3.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +9 -16
- data/apache_log-parser.gemspec +3 -3
- data/benchmark.rb +18 -0
- data/lib/apache_log/parser.rb +35 -35
- data/lib/apache_log/parser/version.rb +2 -2
- data/spec/apache_log/parser_spec.rb +17 -11
- metadata +7 -6
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 338af4fdaec6a5a65bf000f5dc549f73ba1db221
|
4
|
+
data.tar.gz: b5366447d647a4da67dbce66468730fa993fbd15
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: fe677bb02e77958f076a1b68a1b6cf8f5c619c4b7f7c551e6e8ef9b7496617a13b7b25ff268881d0f6f9f3a3f978c2ee6165500a2477b507d8cb91d5eba79c13
|
7
|
+
data.tar.gz: 2ee1ab1f75310b24e9ad8cdcfabcaa69607e76ce83534301519cfba81640ef3ebe80be56a35b7efde54f2b71c3d599a0c0eb443c341d670769b58551dfabf48a
|
data/README.md
CHANGED
@@ -1,18 +1,8 @@
|
|
1
|
-
# ApacheLog::Parser
|
2
|
-
[](https://travis-ci.org/takady/apache_log-parser) [](https://codeclimate.com/github/takady/apache_log-parser)
|
3
|
-
Gem to parse apache log including common, combined and customized format.
|
1
|
+
# ApacheLog::Parser [](https://travis-ci.org/takady/apache_log-parser) [](https://codeclimate.com/github/takady/apache_log-parser)
|
4
2
|
|
5
|
-
|
6
|
-
|
7
|
-
Add this line to your application's Gemfile:
|
8
|
-
|
9
|
-
gem 'apache_log-parser'
|
3
|
+
Parse apache log including common, combined and customized format
|
10
4
|
|
11
|
-
|
12
|
-
|
13
|
-
$ bundle
|
14
|
-
|
15
|
-
Or install it yourself as:
|
5
|
+
## Installation
|
16
6
|
|
17
7
|
$ gem install apache_log-parser
|
18
8
|
|
@@ -22,19 +12,22 @@ Or install it yourself as:
|
|
22
12
|
require 'apache_log/parser'
|
23
13
|
|
24
14
|
# common format
|
25
|
-
|
15
|
+
parser = ApacheLog::Parser.new('common')
|
16
|
+
common_log = parser.parse(log_line)
|
26
17
|
common_log[:remote_host] #=> remote host
|
27
18
|
common_log[:datetime] #=> datetime
|
28
19
|
common_log[:request] #=> request
|
29
20
|
|
30
21
|
# combined format
|
31
|
-
|
22
|
+
parser = ApacheLog::Parser.new('combined')
|
23
|
+
combined_log = parser.parse(log_line)
|
32
24
|
combined_log[:referer] #=> referer
|
33
25
|
combined_log[:user_agent] #=> user_agent
|
34
26
|
|
35
27
|
# custom format(additional fields after 'combined')
|
36
28
|
# e.g. "%h %l %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-Agent}i\" \"%v\" \"%{cookie}n\" %D"
|
37
|
-
|
29
|
+
parser = ApacheLog::Parser.new('combined', %w(vhost usertrack request_duration))
|
30
|
+
custom_log = parser.parse(log_line)
|
38
31
|
custom_log[:user_agent] #=> user_agent
|
39
32
|
custom_log[:vhost] #=> vhost
|
40
33
|
custom_log[:usertrack] #=> usertrack
|
data/apache_log-parser.gemspec
CHANGED
@@ -7,9 +7,9 @@ Gem::Specification.new do |spec|
|
|
7
7
|
spec.name = "apache_log-parser"
|
8
8
|
spec.version = ApacheLog::Parser::VERSION
|
9
9
|
spec.authors = ["Yuichi Takada"]
|
10
|
-
spec.email = ["
|
11
|
-
spec.summary = "
|
12
|
-
spec.description =
|
10
|
+
spec.email = ["takadyuichi@gmail.com"]
|
11
|
+
spec.summary = "Parse apache log including common, combined and customized format"
|
12
|
+
spec.description = spec.summary
|
13
13
|
spec.homepage = "https://github.com/takady/apache_log-parser"
|
14
14
|
spec.license = "MIT"
|
15
15
|
|
data/benchmark.rb
ADDED
@@ -0,0 +1,18 @@
|
|
1
|
+
$LOAD_PATH.unshift File.expand_path('../lib', __FILE__)
|
2
|
+
require 'apache_log/parser'
|
3
|
+
require 'benchmark'
|
4
|
+
|
5
|
+
common_line = '127.0.0.1 - - [20/May/2014:20:04:04 +0900] "GET /test/indx.html HTTP/1.1" 200 4576'
|
6
|
+
combined_line = '104.24.160.39 - - [07/Jun/2014:14:58:55 +0900] "GET /category/electronics HTTP/1.1" 200 128 "-" "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.6; rv:9.0.1) Gecko/20100101 Firefox/9.0.1"'
|
7
|
+
customized_line = '192.168.0.1 - - [07/Feb/2011:10:59:59 +0900] "GET /x/i.cgi/net/0000/ HTTP/1.1" 200 9891 "-" "DoCoMo/2.0 P03B(c500;TB;W24H16)" virtualhost.example.jp "192.0.2.16794832933550" "09011112222333_xx.ezweb.ne.jp" 533593'
|
8
|
+
|
9
|
+
common_parser = ApacheLog::Parser.new('common')
|
10
|
+
combined_parser = ApacheLog::Parser.new('combined')
|
11
|
+
customized_parser = ApacheLog::Parser.new('combined', %w(vhost usertrack mobileid request_duration))
|
12
|
+
|
13
|
+
n = 1_000_000
|
14
|
+
Benchmark.bm(12) do |x|
|
15
|
+
x.report('common:') { (1..n).each{common_parser.parse(common_line)} }
|
16
|
+
x.report('combined:') { (1..n).each{combined_parser.parse(combined_line)} }
|
17
|
+
x.report('customized:') { (1..n).each{customized_parser.parse(customized_line)} }
|
18
|
+
end
|
data/lib/apache_log/parser.rb
CHANGED
@@ -2,9 +2,8 @@ require 'apache_log/parser/version'
|
|
2
2
|
require 'date'
|
3
3
|
|
4
4
|
module ApacheLog
|
5
|
-
|
6
|
-
|
7
|
-
def self.parse(line, format, additional_fields=[])
|
5
|
+
class Parser
|
6
|
+
def initialize(format, additional_fields=[])
|
8
7
|
common_fields = %w(remote_host identity_check user datetime request status size)
|
9
8
|
combined_fields = common_fields + %w(referer user_agent)
|
10
9
|
|
@@ -18,52 +17,53 @@ module ApacheLog
|
|
18
17
|
|
19
18
|
case format
|
20
19
|
when 'common'
|
21
|
-
fields = common_fields + additional_fields
|
22
|
-
pattern = /^#{common_pattern}#{additional_pattern}$/
|
20
|
+
@fields = common_fields + additional_fields
|
21
|
+
@pattern = /^#{common_pattern}#{additional_pattern}$/
|
23
22
|
when 'combined'
|
24
|
-
fields = combined_fields + additional_fields
|
25
|
-
pattern = /^#{combined_pattern}#{additional_pattern}$/
|
23
|
+
@fields = combined_fields + additional_fields
|
24
|
+
@pattern = /^#{combined_pattern}#{additional_pattern}$/
|
26
25
|
else
|
27
26
|
raise "format error\n no such format: <#{format}> \n"
|
28
27
|
end
|
28
|
+
end
|
29
29
|
|
30
|
-
|
30
|
+
def parse(line)
|
31
|
+
matched = @pattern.match(line)
|
31
32
|
raise "parse error\n at line: <#{line}> \n" if matched.nil?
|
32
|
-
|
33
|
-
generate_hash(fields, matched.to_a)
|
33
|
+
generate_hash(@fields, matched.to_a)
|
34
34
|
end
|
35
35
|
|
36
36
|
private
|
37
|
-
def self.generate_hash(keys, values)
|
38
|
-
hash = {}
|
39
37
|
|
40
|
-
|
41
|
-
|
42
|
-
case key
|
43
|
-
when :datetime
|
44
|
-
hash[key] = to_datetime(values[idx+1])
|
45
|
-
when :request
|
46
|
-
hash[key] = parse_request(values[idx+1])
|
47
|
-
else
|
48
|
-
hash[key] = values[idx+1]
|
49
|
-
end
|
50
|
-
end
|
38
|
+
def generate_hash(keys, values)
|
39
|
+
hash = {}
|
51
40
|
|
52
|
-
|
41
|
+
keys.each.with_index(1) do |key, idx|
|
42
|
+
key = key.to_sym
|
43
|
+
case key
|
44
|
+
when :datetime
|
45
|
+
hash[key] = to_datetime(values[idx])
|
46
|
+
when :request
|
47
|
+
hash[key] = parse_request(values[idx])
|
48
|
+
else
|
49
|
+
hash[key] = values[idx]
|
50
|
+
end
|
53
51
|
end
|
54
52
|
|
55
|
-
|
56
|
-
|
57
|
-
end
|
53
|
+
hash
|
54
|
+
end
|
58
55
|
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
method: method,
|
63
|
-
path: path,
|
64
|
-
protocol: protocol,
|
65
|
-
}
|
66
|
-
end
|
56
|
+
def to_datetime(str)
|
57
|
+
DateTime.strptime(str, '%d/%b/%Y:%T %z')
|
58
|
+
end
|
67
59
|
|
60
|
+
def parse_request(str)
|
61
|
+
method, path, protocol = str.split
|
62
|
+
{
|
63
|
+
method: method,
|
64
|
+
path: path,
|
65
|
+
protocol: protocol,
|
66
|
+
}
|
67
|
+
end
|
68
68
|
end
|
69
69
|
end
|
@@ -3,15 +3,14 @@ require 'date'
|
|
3
3
|
|
4
4
|
describe ApacheLog::Parser do
|
5
5
|
|
6
|
-
before { @parser = ApacheLog::Parser }
|
7
|
-
|
8
6
|
it 'has a version number' do
|
9
|
-
expect(
|
7
|
+
expect(ApacheLog::Parser::VERSION).not_to be nil
|
10
8
|
end
|
11
9
|
|
12
10
|
it 'can parse common format log' do
|
13
11
|
line = '127.0.0.1 - - [20/May/2014:20:04:04 +0900] "GET /test/indx.html HTTP/1.1" 200 4576'
|
14
|
-
|
12
|
+
parser = ApacheLog::Parser.new('common')
|
13
|
+
entity = parser.parse(line.chomp)
|
15
14
|
expect = {remote_host: '127.0.0.1', identity_check: '-', user: '-', datetime: DateTime.new(2014, 5, 20, 20, 04, 04, 0.375),
|
16
15
|
request: {method: 'GET', path: '/test/indx.html', protocol: 'HTTP/1.1'}, status: '200', size: '4576'}
|
17
16
|
expect(entity).to eq(expect)
|
@@ -19,7 +18,8 @@ describe ApacheLog::Parser do
|
|
19
18
|
|
20
19
|
it 'can parse tab separated common format log' do
|
21
20
|
line = "192.168.0.1\t-\t-\t[07/Feb/2011:10:59:59 +0900]\t\"GET /x/i.cgi/net/0000/ HTTP/1.1\"\t200\t9891";
|
22
|
-
|
21
|
+
parser = ApacheLog::Parser.new('common')
|
22
|
+
entity = parser.parse(line.chomp)
|
23
23
|
expect = {remote_host: '192.168.0.1', identity_check: '-', user: '-', datetime: DateTime.new(2011, 2, 07, 10, 59, 59, 0.375),
|
24
24
|
request: {method: 'GET', path: '/x/i.cgi/net/0000/', protocol: 'HTTP/1.1'}, status: '200', size: '9891'}
|
25
25
|
expect(entity).to eq(expect)
|
@@ -27,7 +27,8 @@ describe ApacheLog::Parser do
|
|
27
27
|
|
28
28
|
it 'can parse combined format log' do
|
29
29
|
line = '104.24.160.39 - - [07/Jun/2014:14:58:55 +0900] "GET /category/electronics HTTP/1.1" 200 128 "-" "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.6; rv:9.0.1) Gecko/20100101 Firefox/9.0.1"'
|
30
|
-
|
30
|
+
parser = ApacheLog::Parser.new('combined')
|
31
|
+
entity = parser.parse(line.chomp)
|
31
32
|
expect = {remote_host: '104.24.160.39', identity_check: '-', user: '-', datetime: DateTime.new(2014, 6, 7, 14, 58, 55, 0.375),
|
32
33
|
request: {method: 'GET', path: '/category/electronics', protocol: 'HTTP/1.1'}, status: '200', size: '128', referer: '-',
|
33
34
|
user_agent: 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.6; rv:9.0.1) Gecko/20100101 Firefox/9.0.1'}
|
@@ -36,7 +37,8 @@ describe ApacheLog::Parser do
|
|
36
37
|
|
37
38
|
it 'can parse combined format log' do
|
38
39
|
line = '192.168.0.1 - - [07/Feb/2011:10:59:59 +0900] "GET /x/i.cgi/net/0000/ HTTP/1.1" 200 9891 "-" "DoCoMo/2.0 P03B(c500;TB;W24H16)"';
|
39
|
-
|
40
|
+
parser = ApacheLog::Parser.new('combined')
|
41
|
+
entity = parser.parse(line.chomp)
|
40
42
|
expect = {remote_host: '192.168.0.1', identity_check: '-', user:'-', datetime: DateTime.new(2011, 2, 7, 10, 59, 59, 0.375),
|
41
43
|
request: {method: 'GET', path: '/x/i.cgi/net/0000/', protocol: 'HTTP/1.1'}, status: '200', size: '9891', referer: '-',
|
42
44
|
user_agent: 'DoCoMo/2.0 P03B(c500;TB;W24H16)'}
|
@@ -45,7 +47,8 @@ describe ApacheLog::Parser do
|
|
45
47
|
|
46
48
|
it 'can parse tab separated combined format log' do
|
47
49
|
line = "203.0.113.254\t-\t-\t[07/Feb/2011:10:59:59 +0900]\t\"GET /x/i.cgi/movie/0001/-0002 HTTP/1.1\"\t200\t14462\t\"-\"\t\"DoCoMo/2.0 F08A3(c500;TB;W30H20)\"";
|
48
|
-
|
50
|
+
parser = ApacheLog::Parser.new('combined')
|
51
|
+
entity = parser.parse(line.chomp)
|
49
52
|
expect = {remote_host: '203.0.113.254', identity_check: '-', user:'-', datetime: DateTime.new(2011, 2, 7, 10, 59, 59, 0.375),
|
50
53
|
request: {method: 'GET', path: '/x/i.cgi/movie/0001/-0002', protocol: 'HTTP/1.1'}, status: '200', size: '14462', referer: '-',
|
51
54
|
user_agent: 'DoCoMo/2.0 F08A3(c500;TB;W30H20)'}
|
@@ -54,7 +57,8 @@ describe ApacheLog::Parser do
|
|
54
57
|
|
55
58
|
it 'can parse custom format log based on combined format' do
|
56
59
|
line = '104.24.160.39 - - [07/Jun/2014:14:58:55 +0900] "GET /category/electronics HTTP/1.1" 200 128 "-" "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.6; rv:9.0.1) Gecko/20100101 Firefox/9.0.1" "example.com" "192.168.0.1201102091208001" "901"'
|
57
|
-
|
60
|
+
parser = ApacheLog::Parser.new('combined', %w(vhost usertrack request_duration))
|
61
|
+
entity = parser.parse(line.chomp)
|
58
62
|
expect = {remote_host: '104.24.160.39', identity_check: '-', user: '-', datetime: DateTime.new(2014, 6, 7, 14, 58, 55, 0.375),
|
59
63
|
request: {method: 'GET', path: '/category/electronics', protocol: 'HTTP/1.1'}, status: '200', size: '128', referer: '-',
|
60
64
|
user_agent: 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.6; rv:9.0.1) Gecko/20100101 Firefox/9.0.1', vhost: 'example.com',
|
@@ -64,7 +68,8 @@ describe ApacheLog::Parser do
|
|
64
68
|
|
65
69
|
it 'can parse custom format log based on combined format' do
|
66
70
|
line = '192.168.0.1 - - [07/Feb/2011:10:59:59 +0900] "GET /x/i.cgi/net/0000/ HTTP/1.1" 200 9891 "-" "DoCoMo/2.0 P03B(c500;TB;W24H16)" virtualhost.example.jp "192.0.2.16794832933550" "09011112222333_xx.ezweb.ne.jp" 533593';
|
67
|
-
|
71
|
+
parser = ApacheLog::Parser.new('combined', %w(vhost usertrack mobileid request_duration))
|
72
|
+
entity = parser.parse(line.chomp)
|
68
73
|
expect = {remote_host: '192.168.0.1', identity_check: '-', user: '-', datetime: DateTime.new(2011, 2, 7, 10, 59, 59, 0.375),
|
69
74
|
request: {method: 'GET', path: '/x/i.cgi/net/0000/', protocol: 'HTTP/1.1'}, status: '200', size: '9891', referer: '-',
|
70
75
|
user_agent: 'DoCoMo/2.0 P03B(c500;TB;W24H16)', vhost: 'virtualhost.example.jp',
|
@@ -74,7 +79,8 @@ describe ApacheLog::Parser do
|
|
74
79
|
|
75
80
|
it 'can parse tab separated custom format log based on combined format' do
|
76
81
|
line = "203.0.113.254\t-\t-\t[07/Feb/2011:10:59:59 +0900]\t\"GET /x/i.cgi/movie/0001/-0002 HTTP/1.1\"\t200\t14462\t\"http://headlines.yahoo.co.jp/hl\"\t\"DoCoMo/2.0 F08A3(c500;TB;W30H20)\"\t\"virtualhost.example.jp\"\t\"192.0.2.16794832933550\"\t\"09011112222333_xx.ezweb.ne.jp\"\t533593";
|
77
|
-
|
82
|
+
parser = ApacheLog::Parser.new('combined', %w(vhost usertrack mobileid request_duration))
|
83
|
+
entity = parser.parse(line.chomp)
|
78
84
|
expect = {remote_host: '203.0.113.254', identity_check: '-', user: '-', datetime: DateTime.new(2011, 2, 7, 10, 59, 59, 0.375),
|
79
85
|
request: {method: 'GET', path: '/x/i.cgi/movie/0001/-0002', protocol: 'HTTP/1.1'}, status: '200', size: '14462', referer: 'http://headlines.yahoo.co.jp/hl',
|
80
86
|
user_agent: 'DoCoMo/2.0 F08A3(c500;TB;W30H20)', vhost: 'virtualhost.example.jp',
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: apache_log-parser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 3.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Yuichi Takada
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2015-02-11 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -52,9 +52,9 @@ dependencies:
|
|
52
52
|
- - ">="
|
53
53
|
- !ruby/object:Gem::Version
|
54
54
|
version: '0'
|
55
|
-
description:
|
55
|
+
description: Parse apache log including common, combined and customized format
|
56
56
|
email:
|
57
|
-
-
|
57
|
+
- takadyuichi@gmail.com
|
58
58
|
executables: []
|
59
59
|
extensions: []
|
60
60
|
extra_rdoc_files: []
|
@@ -66,6 +66,7 @@ files:
|
|
66
66
|
- README.md
|
67
67
|
- Rakefile
|
68
68
|
- apache_log-parser.gemspec
|
69
|
+
- benchmark.rb
|
69
70
|
- lib/apache_log/parser.rb
|
70
71
|
- lib/apache_log/parser/version.rb
|
71
72
|
- spec/apache_log/parser_spec.rb
|
@@ -90,10 +91,10 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
90
91
|
version: '0'
|
91
92
|
requirements: []
|
92
93
|
rubyforge_project:
|
93
|
-
rubygems_version: 2.
|
94
|
+
rubygems_version: 2.4.5
|
94
95
|
signing_key:
|
95
96
|
specification_version: 4
|
96
|
-
summary:
|
97
|
+
summary: Parse apache log including common, combined and customized format
|
97
98
|
test_files:
|
98
99
|
- spec/apache_log/parser_spec.rb
|
99
100
|
- spec/spec_helper.rb
|