apache_log-parser 1.0.0 → 2.0.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 909728a9cf9d9f8fe6f3a5dc600da0f69a964759
4
- data.tar.gz: 16d92e4ce7963c67a22cd92ead416d59e65ff3b4
3
+ metadata.gz: f3c8a20f2a6c50f76a758f8e44f8da1e56e8b531
4
+ data.tar.gz: 49e38fea3f0fb27216ed3c32f0fb40e2aed93662
5
5
  SHA512:
6
- metadata.gz: 947dfe5dd1af740c8ed4ab20aeaca12ce547e4084bca890280a8ad90a209613761d25263fc13bd96648d371ef2743495c72a5b6f882cc5b184a529d0a76538de
7
- data.tar.gz: 78cf579ef4fd553a0d6a6d6252af4c1511df5acd9871cda9f281c833445717a66f41e4a90765788a7741d918b9437ed5de70279a07bb5e3e727574e9b3ec6fec
6
+ metadata.gz: febb514e9f8f95b380be58d6b7eaf7dfe3b9b164e61aae1bd289f9e3745f27ac0748cb9d901318d1aea3524d6bbad7c258eae3e00001b424d8e9275d1da8bf35
7
+ data.tar.gz: a8e2766fd986b0af077bd89577649818e9e2f667d2e5d44126b5a3b81e694be9f4ca3cdb452603c4f731bf9565176aa166c52de2a898667471e58479dc34d044
data/.gitignore CHANGED
@@ -21,4 +21,5 @@ tmp
21
21
  *.a
22
22
  mkmf.log
23
23
  bin/
24
+ vendor/
24
25
  apache_log-parser-*
data/README.md CHANGED
@@ -1,6 +1,6 @@
1
1
  # ApacheLog::Parser
2
2
 
3
- Gem to parse popular format apache log files.
3
+ Gem to parse apache log including common, combined and customized format.
4
4
 
5
5
  ## Installation
6
6
 
@@ -21,12 +21,24 @@ Or install it yourself as:
21
21
  ```ruby
22
22
  require 'apache_log/parser'
23
23
 
24
- parser = ApacheLog::Parser.getParser(format)
25
- entity = []
26
-
27
- File.foreach(logfile) do |line|
28
- entity << parser.parse(line.chomp)
29
- end
24
+ # common format
25
+ common_log = ApacheLog::Parser.parse(log_line, 'common')
26
+ common_log[:remote_host] #=> remote host
27
+ common_log[:datetime] #=> datetime
28
+ common_log[:request] #=> request
29
+
30
+ # combined format
31
+ common_log = ApacheLog::Parser.parse(log_line, 'combined')
32
+ common_log[:referer] #=> remote host
33
+ common_log[:user_agent] #=> datetime
34
+
35
+ # custom format(additional fields after 'combined')
36
+ # custom format: LogFormat "%h %l %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-Agent}i\" \"%v\" \"%{cookie}n\" %D"
37
+ common_log = ApacheLog::Parser.parse(log_line, 'combined', %w(vhost usertrack request_duration))
38
+ common_log[:user_agent] #=> datetime
39
+ common_log[:vhost] #=> vhost
40
+ common_log[:usertrack] #=> usertrack
41
+ common_log[:request_duration] #=> request_duration
30
42
  ```
31
43
 
32
44
  The format parameter must be 'common' or 'combined'.
data/Rakefile CHANGED
@@ -1,2 +1,6 @@
1
1
  require "bundler/gem_tasks"
2
+ require "rspec/core/rake_task"
2
3
 
4
+ RSpec::Core::RakeTask.new(:spec)
5
+
6
+ task :default => :spec
@@ -8,8 +8,8 @@ Gem::Specification.new do |spec|
8
8
  spec.version = ApacheLog::Parser::VERSION
9
9
  spec.authors = ["Yuichi Takada"]
10
10
  spec.email = ["takadyy@gmail.com"]
11
- spec.summary = "Gem to parse popular format apache log files."
12
- spec.description = "You can parse common or combined format log."
11
+ spec.summary = "Gem to parse apache log including common, combined and customized format."
12
+ spec.description = "You can parse common, combined and customized format apache log."
13
13
  spec.homepage = "https://github.com/takady/apache_log-parser"
14
14
  spec.license = "MIT"
15
15
 
@@ -20,4 +20,5 @@ Gem::Specification.new do |spec|
20
20
 
21
21
  spec.add_development_dependency "bundler", "~> 1.6"
22
22
  spec.add_development_dependency "rake"
23
+ spec.add_development_dependency "rspec"
23
24
  end
@@ -1,18 +1,65 @@
1
1
  require "apache_log/parser/version"
2
- require "apache_log/parser/common"
3
- require "apache_log/parser/combined"
4
2
 
5
3
  module ApacheLog
6
4
  module Parser
7
- def self.getParser(format)
5
+
6
+ def self.parse(line, format, additional_fields=[])
7
+
8
+ common_fields = %w(remote_host identity_check user datetime request status size)
9
+ combined_fields = common_fields + %w(referer user_agent)
10
+
11
+ common_pattern = '(\S+)\s+(\S+)\s+(\S+)\s+\[(\d{2}\/.*\d{4}:\d{2}:\d{2}:\d{2}\s.*)\]\s+"(\S+\s\S+\s\S+)"\s+(\S+)\s+(\S+)'
12
+ combined_pattern = common_pattern + '\s+"([^"]*)"\s+"([^"]*)"'
13
+ additional_pattern = ''
14
+
15
+ additional_fields.each do
16
+ additional_pattern += '\s+"?([^"]*)"?'
17
+ end
18
+
8
19
  case format
9
20
  when 'common'
10
- ApacheLog::Parser::Common
21
+ fields = common_fields + additional_fields
22
+ pattern = /^#{common_pattern}#{additional_pattern}$/
11
23
  when 'combined'
12
- ApacheLog::Parser::Combined
24
+ fields = combined_fields + additional_fields
25
+ pattern = /^#{combined_pattern}#{additional_pattern}$/
13
26
  else
14
27
  raise "format error\n no such format: <#{format}> \n"
15
28
  end
29
+
30
+ match = pattern.match(line)
31
+ raise "parse error\n at line: <#{line}> \n" if match.nil?
32
+
33
+ columns = match.to_a
34
+
35
+ parsed_hash = {}
36
+ fields.each.with_index do |val, idx|
37
+ val = val.to_sym
38
+ if val == :datetime
39
+ parsed_hash[val] = to_datetime(columns[idx+1])
40
+ elsif val == :request
41
+ parsed_hash[val] = parse_request(columns[idx+1])
42
+ else
43
+ parsed_hash[val] = columns[idx+1]
44
+ end
45
+ end
46
+
47
+ parsed_hash
16
48
  end
49
+
50
+ private
51
+ def self.to_datetime(str)
52
+ DateTime.strptime( str, '%d/%b/%Y:%T %z')
53
+ end
54
+
55
+ def self.parse_request(str)
56
+ method, path, protocol = str.split
57
+ {
58
+ method: method,
59
+ path: path,
60
+ protocol: protocol,
61
+ }
62
+ end
63
+
17
64
  end
18
65
  end
@@ -1,5 +1,5 @@
1
1
  module ApacheLog
2
2
  module Parser
3
- VERSION = "1.0.0"
3
+ VERSION = "2.0.0"
4
4
  end
5
5
  end
@@ -0,0 +1,84 @@
1
+ require 'spec_helper'
2
+ require 'date'
3
+
4
+ describe ApacheLog::Parser do
5
+
6
+ before { @parser = ApacheLog::Parser }
7
+
8
+ it 'has a version number' do
9
+ expect(@parser::VERSION).not_to be nil
10
+ end
11
+
12
+ it 'can parse common format log' do
13
+ line = '127.0.0.1 - - [20/May/2014:20:04:04 +0900] "GET /test/indx.html HTTP/1.1" 200 4576'
14
+ entity = @parser.parse(line.chomp, 'common')
15
+ expect = {remote_host: '127.0.0.1', identity_check: '-', user: '-', datetime: DateTime.new(2014, 5, 20, 20, 04, 04, 0.375),
16
+ request: {method: 'GET', path: '/test/indx.html', protocol: 'HTTP/1.1'}, status: '200', size: '4576'}
17
+ expect(entity).to eq(expect)
18
+ end
19
+
20
+ it 'can parse tab separated common format log' do
21
+ line = "192.168.0.1\t-\t-\t[07/Feb/2011:10:59:59 +0900]\t\"GET /x/i.cgi/net/0000/ HTTP/1.1\"\t200\t9891";
22
+ entity = @parser.parse(line.chomp, 'common')
23
+ expect = {remote_host: '192.168.0.1', identity_check: '-', user: '-', datetime: DateTime.new(2011, 2, 07, 10, 59, 59, 0.375),
24
+ request: {method: 'GET', path: '/x/i.cgi/net/0000/', protocol: 'HTTP/1.1'}, status: '200', size: '9891'}
25
+ expect(entity).to eq(expect)
26
+ end
27
+
28
+ it 'can parse combined format log' do
29
+ line = '104.24.160.39 - - [07/Jun/2014:14:58:55 +0900] "GET /category/electronics HTTP/1.1" 200 128 "-" "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.6; rv:9.0.1) Gecko/20100101 Firefox/9.0.1"'
30
+ entity = @parser.parse(line.chomp, 'combined')
31
+ expect = {remote_host: '104.24.160.39', identity_check: '-', user: '-', datetime: DateTime.new(2014, 6, 7, 14, 58, 55, 0.375),
32
+ request: {method: 'GET', path: '/category/electronics', protocol: 'HTTP/1.1'}, status: '200', size: '128', referer: '-',
33
+ user_agent: 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.6; rv:9.0.1) Gecko/20100101 Firefox/9.0.1'}
34
+ expect(entity).to eq(expect)
35
+ end
36
+
37
+ it 'can parse combined format log' do
38
+ line = '192.168.0.1 - - [07/Feb/2011:10:59:59 +0900] "GET /x/i.cgi/net/0000/ HTTP/1.1" 200 9891 "-" "DoCoMo/2.0 P03B(c500;TB;W24H16)"';
39
+ entity = @parser.parse(line.chomp, 'combined')
40
+ expect = {remote_host: '192.168.0.1', identity_check: '-', user:'-', datetime: DateTime.new(2011, 2, 7, 10, 59, 59, 0.375),
41
+ request: {method: 'GET', path: '/x/i.cgi/net/0000/', protocol: 'HTTP/1.1'}, status: '200', size: '9891', referer: '-',
42
+ user_agent: 'DoCoMo/2.0 P03B(c500;TB;W24H16)'}
43
+ expect(entity).to eq(expect)
44
+ end
45
+
46
+ it 'can parse tab separated combined format log' do
47
+ line = "203.0.113.254\t-\t-\t[07/Feb/2011:10:59:59 +0900]\t\"GET /x/i.cgi/movie/0001/-0002 HTTP/1.1\"\t200\t14462\t\"-\"\t\"DoCoMo/2.0 F08A3(c500;TB;W30H20)\"";
48
+ entity = @parser.parse(line.chomp, 'combined')
49
+ expect = {remote_host: '203.0.113.254', identity_check: '-', user:'-', datetime: DateTime.new(2011, 2, 7, 10, 59, 59, 0.375),
50
+ request: {method: 'GET', path: '/x/i.cgi/movie/0001/-0002', protocol: 'HTTP/1.1'}, status: '200', size: '14462', referer: '-',
51
+ user_agent: 'DoCoMo/2.0 F08A3(c500;TB;W30H20)'}
52
+ expect(entity).to eq(expect)
53
+ end
54
+
55
+ it 'can parse custom format log based on combined format' do
56
+ line = '104.24.160.39 - - [07/Jun/2014:14:58:55 +0900] "GET /category/electronics HTTP/1.1" 200 128 "-" "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.6; rv:9.0.1) Gecko/20100101 Firefox/9.0.1" "example.com" "192.168.0.1201102091208001" "901"'
57
+ entity = @parser.parse(line.chomp, 'combined', %w(vhost usertrack request_duration))
58
+ expect = {remote_host: '104.24.160.39', identity_check: '-', user: '-', datetime: DateTime.new(2014, 6, 7, 14, 58, 55, 0.375),
59
+ request: {method: 'GET', path: '/category/electronics', protocol: 'HTTP/1.1'}, status: '200', size: '128', referer: '-',
60
+ user_agent: 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.6; rv:9.0.1) Gecko/20100101 Firefox/9.0.1', vhost: 'example.com',
61
+ usertrack: '192.168.0.1201102091208001', request_duration: '901'}
62
+ expect(entity).to eq(expect)
63
+ end
64
+
65
+ it 'can parse custom format log based on combined format' do
66
+ line = '192.168.0.1 - - [07/Feb/2011:10:59:59 +0900] "GET /x/i.cgi/net/0000/ HTTP/1.1" 200 9891 "-" "DoCoMo/2.0 P03B(c500;TB;W24H16)" virtualhost.example.jp "192.0.2.16794832933550" "09011112222333_xx.ezweb.ne.jp" 533593';
67
+ entity = @parser.parse(line.chomp, 'combined', %w(vhost usertrack mobileid request_duration))
68
+ expect = {remote_host: '192.168.0.1', identity_check: '-', user: '-', datetime: DateTime.new(2011, 2, 7, 10, 59, 59, 0.375),
69
+ request: {method: 'GET', path: '/x/i.cgi/net/0000/', protocol: 'HTTP/1.1'}, status: '200', size: '9891', referer: '-',
70
+ user_agent: 'DoCoMo/2.0 P03B(c500;TB;W24H16)', vhost: 'virtualhost.example.jp',
71
+ usertrack: '192.0.2.16794832933550', mobileid: '09011112222333_xx.ezweb.ne.jp', request_duration: '533593'}
72
+ expect(entity).to eq(expect)
73
+ end
74
+
75
+ it 'can parse tab separated custom format log based on combined format' do
76
+ line = "203.0.113.254\t-\t-\t[07/Feb/2011:10:59:59 +0900]\t\"GET /x/i.cgi/movie/0001/-0002 HTTP/1.1\"\t200\t14462\t\"http://headlines.yahoo.co.jp/hl\"\t\"DoCoMo/2.0 F08A3(c500;TB;W30H20)\"\t\"virtualhost.example.jp\"\t\"192.0.2.16794832933550\"\t\"09011112222333_xx.ezweb.ne.jp\"\t533593";
77
+ entity = @parser.parse(line.chomp, 'combined', %w(vhost usertrack mobileid request_duration))
78
+ expect = {remote_host: '203.0.113.254', identity_check: '-', user: '-', datetime: DateTime.new(2011, 2, 7, 10, 59, 59, 0.375),
79
+ request: {method: 'GET', path: '/x/i.cgi/movie/0001/-0002', protocol: 'HTTP/1.1'}, status: '200', size: '14462', referer: 'http://headlines.yahoo.co.jp/hl',
80
+ user_agent: 'DoCoMo/2.0 F08A3(c500;TB;W30H20)', vhost: 'virtualhost.example.jp',
81
+ usertrack: '192.0.2.16794832933550', mobileid: '09011112222333_xx.ezweb.ne.jp', request_duration: '533593'}
82
+ expect(entity).to eq(expect)
83
+ end
84
+ end
@@ -0,0 +1,2 @@
1
+ $LOAD_PATH.unshift File.expand_path('../../lib', __FILE__)
2
+ require 'apache_log/parser'
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: apache_log-parser
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.0
4
+ version: 2.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Yuichi Takada
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-06-14 00:00:00.000000000 Z
11
+ date: 2014-08-10 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -38,7 +38,21 @@ dependencies:
38
38
  - - ">="
39
39
  - !ruby/object:Gem::Version
40
40
  version: '0'
41
- description: You can parse common or combined format log.
41
+ - !ruby/object:Gem::Dependency
42
+ name: rspec
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ description: You can parse common, combined and customized format apache log.
42
56
  email:
43
57
  - takadyy@gmail.com
44
58
  executables: []
@@ -52,10 +66,9 @@ files:
52
66
  - Rakefile
53
67
  - apache_log-parser.gemspec
54
68
  - lib/apache_log/parser.rb
55
- - lib/apache_log/parser/combined.rb
56
- - lib/apache_log/parser/common.rb
57
- - lib/apache_log/parser/format.rb
58
69
  - lib/apache_log/parser/version.rb
70
+ - spec/apache_log/parser_spec.rb
71
+ - spec/spec_helper.rb
59
72
  homepage: https://github.com/takady/apache_log-parser
60
73
  licenses:
61
74
  - MIT
@@ -79,5 +92,7 @@ rubyforge_project:
79
92
  rubygems_version: 2.2.0
80
93
  signing_key:
81
94
  specification_version: 4
82
- summary: Gem to parse popular format apache log files.
83
- test_files: []
95
+ summary: Gem to parse apache log including common, combined and customized format.
96
+ test_files:
97
+ - spec/apache_log/parser_spec.rb
98
+ - spec/spec_helper.rb
@@ -1,53 +0,0 @@
1
- require "apache_log/parser/format"
2
-
3
- module ApacheLog
4
- module Parser
5
- class Combined < Format
6
- def initialize
7
- end
8
-
9
- def self.parse(line)
10
- match = log_pattern.match(line)
11
- raise "parse error\n at line: <#{line}> \n" if match.nil?
12
-
13
- columns = match.to_a.values_at(1..9)
14
- {
15
- remote_host: columns[0],
16
- identity_check: columns[1],
17
- user: columns[2],
18
- datetime: to_datetime(columns[3]),
19
- request: parse_request(columns[4]),
20
- status: columns[5],
21
- size: columns[6],
22
- referer: columns[7],
23
- user_agent: columns[8],
24
- }
25
- end
26
-
27
- def self.log_pattern
28
- /^
29
- (\S+) # remote_host
30
- \s+
31
- (\S+) # identity_check
32
- \s+
33
- (\S+) # user
34
- \s+
35
- \[ (\d{2}\/.*?\d{4}:\d{2}:\d{2}:\d{2}\s.*?) \] # date
36
- \s+
37
- " (.*?\s.*?\s.*?) " # request
38
- \s+
39
- (\S+) # status
40
- \s+
41
- (\S+) # size
42
- \s+
43
- " (.*?) " # referer
44
- \s+
45
- " (.*?) " # user_agent
46
- $/x
47
- end
48
-
49
- private_class_method :log_pattern
50
- end
51
-
52
- end
53
- end
@@ -1,47 +0,0 @@
1
- require "apache_log/parser/format"
2
-
3
- module ApacheLog
4
- module Parser
5
- class Common < Format
6
- def initialize
7
- end
8
-
9
- def self.parse(line)
10
- match = log_pattern.match(line)
11
- raise "parse error\n at line: <#{line}> \n" if match.nil?
12
-
13
- columns = match.to_a.values_at(1..7)
14
- {
15
- remote_host: columns[0],
16
- identity_check: columns[1],
17
- user: columns[2],
18
- datetime: to_datetime(columns[3]),
19
- request: parse_request(columns[4]),
20
- status: columns[5],
21
- size: columns[6],
22
- }
23
- end
24
-
25
- def self.log_pattern
26
- /^
27
- (\S+) # remote_host
28
- \s+
29
- (\S+) # identity_check
30
- \s+
31
- (\S+) # user
32
- \s+
33
- \[ (\d{2}\/.*?\d{4}:\d{2}:\d{2}:\d{2}\s.*?) \] # date
34
- \s+
35
- " (.*?\s.*?\s.*?) " # request
36
- \s+
37
- (\S+) # status
38
- \s+
39
- (\S+) # size
40
- $/x
41
- end
42
-
43
- private_class_method :log_pattern
44
- end
45
-
46
- end
47
- end
@@ -1,32 +0,0 @@
1
- require 'date'
2
-
3
- module ApacheLog
4
- module Parser
5
- class Format
6
- def initialize
7
- end
8
-
9
- def self.parse(line)
10
- end
11
-
12
- def self.log_pattern
13
- end
14
-
15
- def self.to_datetime(str)
16
- DateTime.strptime( str, '%d/%b/%Y:%T %z')
17
- end
18
-
19
- def self.parse_request(str)
20
- method, path, protocol = str.split
21
- {
22
- method: method,
23
- path: path,
24
- protocol: protocol,
25
- }
26
- end
27
-
28
- private_class_method :to_datetime, :parse_request
29
- end
30
-
31
- end
32
- end