apache_log-parser 1.0.0 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 909728a9cf9d9f8fe6f3a5dc600da0f69a964759
4
- data.tar.gz: 16d92e4ce7963c67a22cd92ead416d59e65ff3b4
3
+ metadata.gz: f3c8a20f2a6c50f76a758f8e44f8da1e56e8b531
4
+ data.tar.gz: 49e38fea3f0fb27216ed3c32f0fb40e2aed93662
5
5
  SHA512:
6
- metadata.gz: 947dfe5dd1af740c8ed4ab20aeaca12ce547e4084bca890280a8ad90a209613761d25263fc13bd96648d371ef2743495c72a5b6f882cc5b184a529d0a76538de
7
- data.tar.gz: 78cf579ef4fd553a0d6a6d6252af4c1511df5acd9871cda9f281c833445717a66f41e4a90765788a7741d918b9437ed5de70279a07bb5e3e727574e9b3ec6fec
6
+ metadata.gz: febb514e9f8f95b380be58d6b7eaf7dfe3b9b164e61aae1bd289f9e3745f27ac0748cb9d901318d1aea3524d6bbad7c258eae3e00001b424d8e9275d1da8bf35
7
+ data.tar.gz: a8e2766fd986b0af077bd89577649818e9e2f667d2e5d44126b5a3b81e694be9f4ca3cdb452603c4f731bf9565176aa166c52de2a898667471e58479dc34d044
data/.gitignore CHANGED
@@ -21,4 +21,5 @@ tmp
21
21
  *.a
22
22
  mkmf.log
23
23
  bin/
24
+ vendor/
24
25
  apache_log-parser-*
data/README.md CHANGED
@@ -1,6 +1,6 @@
1
1
  # ApacheLog::Parser
2
2
 
3
- Gem to parse popular format apache log files.
3
+ Gem to parse apache log including common, combined and customized format.
4
4
 
5
5
  ## Installation
6
6
 
@@ -21,12 +21,24 @@ Or install it yourself as:
21
21
  ```ruby
22
22
  require 'apache_log/parser'
23
23
 
24
- parser = ApacheLog::Parser.getParser(format)
25
- entity = []
26
-
27
- File.foreach(logfile) do |line|
28
- entity << parser.parse(line.chomp)
29
- end
24
+ # common format
25
+ common_log = ApacheLog::Parser.parse(log_line, 'common')
26
+ common_log[:remote_host] #=> remote host
27
+ common_log[:datetime] #=> datetime
28
+ common_log[:request] #=> request
29
+
30
+ # combined format
31
+ common_log = ApacheLog::Parser.parse(log_line, 'combined')
32
+ common_log[:referer] #=> remote host
33
+ common_log[:user_agent] #=> datetime
34
+
35
+ # custom format(additional fields after 'combined')
36
+ # custom format: LogFormat "%h %l %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-Agent}i\" \"%v\" \"%{cookie}n\" %D"
37
+ common_log = ApacheLog::Parser.parse(log_line, 'combined', %w(vhost usertrack request_duration))
38
+ common_log[:user_agent] #=> datetime
39
+ common_log[:vhost] #=> vhost
40
+ common_log[:usertrack] #=> usertrack
41
+ common_log[:request_duration] #=> request_duration
30
42
  ```
31
43
 
32
44
  The format parameter must be 'common' or 'combined'.
data/Rakefile CHANGED
@@ -1,2 +1,6 @@
1
1
  require "bundler/gem_tasks"
2
+ require "rspec/core/rake_task"
2
3
 
4
+ RSpec::Core::RakeTask.new(:spec)
5
+
6
+ task :default => :spec
@@ -8,8 +8,8 @@ Gem::Specification.new do |spec|
8
8
  spec.version = ApacheLog::Parser::VERSION
9
9
  spec.authors = ["Yuichi Takada"]
10
10
  spec.email = ["takadyy@gmail.com"]
11
- spec.summary = "Gem to parse popular format apache log files."
12
- spec.description = "You can parse common or combined format log."
11
+ spec.summary = "Gem to parse apache log including common, combined and customized format."
12
+ spec.description = "You can parse common, combined and customized format apache log."
13
13
  spec.homepage = "https://github.com/takady/apache_log-parser"
14
14
  spec.license = "MIT"
15
15
 
@@ -20,4 +20,5 @@ Gem::Specification.new do |spec|
20
20
 
21
21
  spec.add_development_dependency "bundler", "~> 1.6"
22
22
  spec.add_development_dependency "rake"
23
+ spec.add_development_dependency "rspec"
23
24
  end
@@ -1,18 +1,65 @@
1
1
  require "apache_log/parser/version"
2
- require "apache_log/parser/common"
3
- require "apache_log/parser/combined"
4
2
 
5
3
  module ApacheLog
6
4
  module Parser
7
- def self.getParser(format)
5
+
6
+ def self.parse(line, format, additional_fields=[])
7
+
8
+ common_fields = %w(remote_host identity_check user datetime request status size)
9
+ combined_fields = common_fields + %w(referer user_agent)
10
+
11
+ common_pattern = '(\S+)\s+(\S+)\s+(\S+)\s+\[(\d{2}\/.*\d{4}:\d{2}:\d{2}:\d{2}\s.*)\]\s+"(\S+\s\S+\s\S+)"\s+(\S+)\s+(\S+)'
12
+ combined_pattern = common_pattern + '\s+"([^"]*)"\s+"([^"]*)"'
13
+ additional_pattern = ''
14
+
15
+ additional_fields.each do
16
+ additional_pattern += '\s+"?([^"]*)"?'
17
+ end
18
+
8
19
  case format
9
20
  when 'common'
10
- ApacheLog::Parser::Common
21
+ fields = common_fields + additional_fields
22
+ pattern = /^#{common_pattern}#{additional_pattern}$/
11
23
  when 'combined'
12
- ApacheLog::Parser::Combined
24
+ fields = combined_fields + additional_fields
25
+ pattern = /^#{combined_pattern}#{additional_pattern}$/
13
26
  else
14
27
  raise "format error\n no such format: <#{format}> \n"
15
28
  end
29
+
30
+ match = pattern.match(line)
31
+ raise "parse error\n at line: <#{line}> \n" if match.nil?
32
+
33
+ columns = match.to_a
34
+
35
+ parsed_hash = {}
36
+ fields.each.with_index do |val, idx|
37
+ val = val.to_sym
38
+ if val == :datetime
39
+ parsed_hash[val] = to_datetime(columns[idx+1])
40
+ elsif val == :request
41
+ parsed_hash[val] = parse_request(columns[idx+1])
42
+ else
43
+ parsed_hash[val] = columns[idx+1]
44
+ end
45
+ end
46
+
47
+ parsed_hash
16
48
  end
49
+
50
+ private
51
+ def self.to_datetime(str)
52
+ DateTime.strptime( str, '%d/%b/%Y:%T %z')
53
+ end
54
+
55
+ def self.parse_request(str)
56
+ method, path, protocol = str.split
57
+ {
58
+ method: method,
59
+ path: path,
60
+ protocol: protocol,
61
+ }
62
+ end
63
+
17
64
  end
18
65
  end
@@ -1,5 +1,5 @@
1
1
  module ApacheLog
2
2
  module Parser
3
- VERSION = "1.0.0"
3
+ VERSION = "2.0.0"
4
4
  end
5
5
  end
@@ -0,0 +1,84 @@
1
+ require 'spec_helper'
2
+ require 'date'
3
+
4
+ describe ApacheLog::Parser do
5
+
6
+ before { @parser = ApacheLog::Parser }
7
+
8
+ it 'has a version number' do
9
+ expect(@parser::VERSION).not_to be nil
10
+ end
11
+
12
+ it 'can parse common format log' do
13
+ line = '127.0.0.1 - - [20/May/2014:20:04:04 +0900] "GET /test/indx.html HTTP/1.1" 200 4576'
14
+ entity = @parser.parse(line.chomp, 'common')
15
+ expect = {remote_host: '127.0.0.1', identity_check: '-', user: '-', datetime: DateTime.new(2014, 5, 20, 20, 04, 04, 0.375),
16
+ request: {method: 'GET', path: '/test/indx.html', protocol: 'HTTP/1.1'}, status: '200', size: '4576'}
17
+ expect(entity).to eq(expect)
18
+ end
19
+
20
+ it 'can parse tab separated common format log' do
21
+ line = "192.168.0.1\t-\t-\t[07/Feb/2011:10:59:59 +0900]\t\"GET /x/i.cgi/net/0000/ HTTP/1.1\"\t200\t9891";
22
+ entity = @parser.parse(line.chomp, 'common')
23
+ expect = {remote_host: '192.168.0.1', identity_check: '-', user: '-', datetime: DateTime.new(2011, 2, 07, 10, 59, 59, 0.375),
24
+ request: {method: 'GET', path: '/x/i.cgi/net/0000/', protocol: 'HTTP/1.1'}, status: '200', size: '9891'}
25
+ expect(entity).to eq(expect)
26
+ end
27
+
28
+ it 'can parse combined format log' do
29
+ line = '104.24.160.39 - - [07/Jun/2014:14:58:55 +0900] "GET /category/electronics HTTP/1.1" 200 128 "-" "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.6; rv:9.0.1) Gecko/20100101 Firefox/9.0.1"'
30
+ entity = @parser.parse(line.chomp, 'combined')
31
+ expect = {remote_host: '104.24.160.39', identity_check: '-', user: '-', datetime: DateTime.new(2014, 6, 7, 14, 58, 55, 0.375),
32
+ request: {method: 'GET', path: '/category/electronics', protocol: 'HTTP/1.1'}, status: '200', size: '128', referer: '-',
33
+ user_agent: 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.6; rv:9.0.1) Gecko/20100101 Firefox/9.0.1'}
34
+ expect(entity).to eq(expect)
35
+ end
36
+
37
+ it 'can parse combined format log' do
38
+ line = '192.168.0.1 - - [07/Feb/2011:10:59:59 +0900] "GET /x/i.cgi/net/0000/ HTTP/1.1" 200 9891 "-" "DoCoMo/2.0 P03B(c500;TB;W24H16)"';
39
+ entity = @parser.parse(line.chomp, 'combined')
40
+ expect = {remote_host: '192.168.0.1', identity_check: '-', user:'-', datetime: DateTime.new(2011, 2, 7, 10, 59, 59, 0.375),
41
+ request: {method: 'GET', path: '/x/i.cgi/net/0000/', protocol: 'HTTP/1.1'}, status: '200', size: '9891', referer: '-',
42
+ user_agent: 'DoCoMo/2.0 P03B(c500;TB;W24H16)'}
43
+ expect(entity).to eq(expect)
44
+ end
45
+
46
+ it 'can parse tab separated combined format log' do
47
+ line = "203.0.113.254\t-\t-\t[07/Feb/2011:10:59:59 +0900]\t\"GET /x/i.cgi/movie/0001/-0002 HTTP/1.1\"\t200\t14462\t\"-\"\t\"DoCoMo/2.0 F08A3(c500;TB;W30H20)\"";
48
+ entity = @parser.parse(line.chomp, 'combined')
49
+ expect = {remote_host: '203.0.113.254', identity_check: '-', user:'-', datetime: DateTime.new(2011, 2, 7, 10, 59, 59, 0.375),
50
+ request: {method: 'GET', path: '/x/i.cgi/movie/0001/-0002', protocol: 'HTTP/1.1'}, status: '200', size: '14462', referer: '-',
51
+ user_agent: 'DoCoMo/2.0 F08A3(c500;TB;W30H20)'}
52
+ expect(entity).to eq(expect)
53
+ end
54
+
55
+ it 'can parse custom format log based on combined format' do
56
+ line = '104.24.160.39 - - [07/Jun/2014:14:58:55 +0900] "GET /category/electronics HTTP/1.1" 200 128 "-" "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.6; rv:9.0.1) Gecko/20100101 Firefox/9.0.1" "example.com" "192.168.0.1201102091208001" "901"'
57
+ entity = @parser.parse(line.chomp, 'combined', %w(vhost usertrack request_duration))
58
+ expect = {remote_host: '104.24.160.39', identity_check: '-', user: '-', datetime: DateTime.new(2014, 6, 7, 14, 58, 55, 0.375),
59
+ request: {method: 'GET', path: '/category/electronics', protocol: 'HTTP/1.1'}, status: '200', size: '128', referer: '-',
60
+ user_agent: 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.6; rv:9.0.1) Gecko/20100101 Firefox/9.0.1', vhost: 'example.com',
61
+ usertrack: '192.168.0.1201102091208001', request_duration: '901'}
62
+ expect(entity).to eq(expect)
63
+ end
64
+
65
+ it 'can parse custom format log based on combined format' do
66
+ line = '192.168.0.1 - - [07/Feb/2011:10:59:59 +0900] "GET /x/i.cgi/net/0000/ HTTP/1.1" 200 9891 "-" "DoCoMo/2.0 P03B(c500;TB;W24H16)" virtualhost.example.jp "192.0.2.16794832933550" "09011112222333_xx.ezweb.ne.jp" 533593';
67
+ entity = @parser.parse(line.chomp, 'combined', %w(vhost usertrack mobileid request_duration))
68
+ expect = {remote_host: '192.168.0.1', identity_check: '-', user: '-', datetime: DateTime.new(2011, 2, 7, 10, 59, 59, 0.375),
69
+ request: {method: 'GET', path: '/x/i.cgi/net/0000/', protocol: 'HTTP/1.1'}, status: '200', size: '9891', referer: '-',
70
+ user_agent: 'DoCoMo/2.0 P03B(c500;TB;W24H16)', vhost: 'virtualhost.example.jp',
71
+ usertrack: '192.0.2.16794832933550', mobileid: '09011112222333_xx.ezweb.ne.jp', request_duration: '533593'}
72
+ expect(entity).to eq(expect)
73
+ end
74
+
75
+ it 'can parse tab separated custom format log based on combined format' do
76
+ line = "203.0.113.254\t-\t-\t[07/Feb/2011:10:59:59 +0900]\t\"GET /x/i.cgi/movie/0001/-0002 HTTP/1.1\"\t200\t14462\t\"http://headlines.yahoo.co.jp/hl\"\t\"DoCoMo/2.0 F08A3(c500;TB;W30H20)\"\t\"virtualhost.example.jp\"\t\"192.0.2.16794832933550\"\t\"09011112222333_xx.ezweb.ne.jp\"\t533593";
77
+ entity = @parser.parse(line.chomp, 'combined', %w(vhost usertrack mobileid request_duration))
78
+ expect = {remote_host: '203.0.113.254', identity_check: '-', user: '-', datetime: DateTime.new(2011, 2, 7, 10, 59, 59, 0.375),
79
+ request: {method: 'GET', path: '/x/i.cgi/movie/0001/-0002', protocol: 'HTTP/1.1'}, status: '200', size: '14462', referer: 'http://headlines.yahoo.co.jp/hl',
80
+ user_agent: 'DoCoMo/2.0 F08A3(c500;TB;W30H20)', vhost: 'virtualhost.example.jp',
81
+ usertrack: '192.0.2.16794832933550', mobileid: '09011112222333_xx.ezweb.ne.jp', request_duration: '533593'}
82
+ expect(entity).to eq(expect)
83
+ end
84
+ end
@@ -0,0 +1,2 @@
1
+ $LOAD_PATH.unshift File.expand_path('../../lib', __FILE__)
2
+ require 'apache_log/parser'
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: apache_log-parser
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.0
4
+ version: 2.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Yuichi Takada
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-06-14 00:00:00.000000000 Z
11
+ date: 2014-08-10 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -38,7 +38,21 @@ dependencies:
38
38
  - - ">="
39
39
  - !ruby/object:Gem::Version
40
40
  version: '0'
41
- description: You can parse common or combined format log.
41
+ - !ruby/object:Gem::Dependency
42
+ name: rspec
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ description: You can parse common, combined and customized format apache log.
42
56
  email:
43
57
  - takadyy@gmail.com
44
58
  executables: []
@@ -52,10 +66,9 @@ files:
52
66
  - Rakefile
53
67
  - apache_log-parser.gemspec
54
68
  - lib/apache_log/parser.rb
55
- - lib/apache_log/parser/combined.rb
56
- - lib/apache_log/parser/common.rb
57
- - lib/apache_log/parser/format.rb
58
69
  - lib/apache_log/parser/version.rb
70
+ - spec/apache_log/parser_spec.rb
71
+ - spec/spec_helper.rb
59
72
  homepage: https://github.com/takady/apache_log-parser
60
73
  licenses:
61
74
  - MIT
@@ -79,5 +92,7 @@ rubyforge_project:
79
92
  rubygems_version: 2.2.0
80
93
  signing_key:
81
94
  specification_version: 4
82
- summary: Gem to parse popular format apache log files.
83
- test_files: []
95
+ summary: Gem to parse apache log including common, combined and customized format.
96
+ test_files:
97
+ - spec/apache_log/parser_spec.rb
98
+ - spec/spec_helper.rb
@@ -1,53 +0,0 @@
1
- require "apache_log/parser/format"
2
-
3
- module ApacheLog
4
- module Parser
5
- class Combined < Format
6
- def initialize
7
- end
8
-
9
- def self.parse(line)
10
- match = log_pattern.match(line)
11
- raise "parse error\n at line: <#{line}> \n" if match.nil?
12
-
13
- columns = match.to_a.values_at(1..9)
14
- {
15
- remote_host: columns[0],
16
- identity_check: columns[1],
17
- user: columns[2],
18
- datetime: to_datetime(columns[3]),
19
- request: parse_request(columns[4]),
20
- status: columns[5],
21
- size: columns[6],
22
- referer: columns[7],
23
- user_agent: columns[8],
24
- }
25
- end
26
-
27
- def self.log_pattern
28
- /^
29
- (\S+) # remote_host
30
- \s+
31
- (\S+) # identity_check
32
- \s+
33
- (\S+) # user
34
- \s+
35
- \[ (\d{2}\/.*?\d{4}:\d{2}:\d{2}:\d{2}\s.*?) \] # date
36
- \s+
37
- " (.*?\s.*?\s.*?) " # request
38
- \s+
39
- (\S+) # status
40
- \s+
41
- (\S+) # size
42
- \s+
43
- " (.*?) " # referer
44
- \s+
45
- " (.*?) " # user_agent
46
- $/x
47
- end
48
-
49
- private_class_method :log_pattern
50
- end
51
-
52
- end
53
- end
@@ -1,47 +0,0 @@
1
- require "apache_log/parser/format"
2
-
3
- module ApacheLog
4
- module Parser
5
- class Common < Format
6
- def initialize
7
- end
8
-
9
- def self.parse(line)
10
- match = log_pattern.match(line)
11
- raise "parse error\n at line: <#{line}> \n" if match.nil?
12
-
13
- columns = match.to_a.values_at(1..7)
14
- {
15
- remote_host: columns[0],
16
- identity_check: columns[1],
17
- user: columns[2],
18
- datetime: to_datetime(columns[3]),
19
- request: parse_request(columns[4]),
20
- status: columns[5],
21
- size: columns[6],
22
- }
23
- end
24
-
25
- def self.log_pattern
26
- /^
27
- (\S+) # remote_host
28
- \s+
29
- (\S+) # identity_check
30
- \s+
31
- (\S+) # user
32
- \s+
33
- \[ (\d{2}\/.*?\d{4}:\d{2}:\d{2}:\d{2}\s.*?) \] # date
34
- \s+
35
- " (.*?\s.*?\s.*?) " # request
36
- \s+
37
- (\S+) # status
38
- \s+
39
- (\S+) # size
40
- $/x
41
- end
42
-
43
- private_class_method :log_pattern
44
- end
45
-
46
- end
47
- end
@@ -1,32 +0,0 @@
1
- require 'date'
2
-
3
- module ApacheLog
4
- module Parser
5
- class Format
6
- def initialize
7
- end
8
-
9
- def self.parse(line)
10
- end
11
-
12
- def self.log_pattern
13
- end
14
-
15
- def self.to_datetime(str)
16
- DateTime.strptime( str, '%d/%b/%Y:%T %z')
17
- end
18
-
19
- def self.parse_request(str)
20
- method, path, protocol = str.split
21
- {
22
- method: method,
23
- path: path,
24
- protocol: protocol,
25
- }
26
- end
27
-
28
- private_class_method :to_datetime, :parse_request
29
- end
30
-
31
- end
32
- end