haproxy_log_parser 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README.rdoc ADDED
@@ -0,0 +1,11 @@
1
+ = haproxy_log_parser
2
+
3
+ haproxy_log_parser is a gem that uses Treetop to parse HAProxy logs in
4
+ HAProxy's HTTP log format.
5
+
6
+ == Example
7
+
8
+ require 'haproxy_log_parser'
9
+ result = HAProxyLogParser.parse('Aug 9 20:30:46 localhost haproxy[2022]: 10.0.8.2:34028 [09/Aug/2011:20:30:46.429] proxy-out proxy-out/cache1 1/0/2/126/+128 301 +223 - - ---- 617/523/336/168/0 0/0 {www.sytadin.equipement.gouv.fr||http://trafic.1wt.eu/} {Apache|230|||http://www.sytadin.} "GET http://www.sytadin.equipement.gouv.fr/ HTTP/1.1"')
10
+ result.client_ip # => "10.0.8.2"
11
+ result.captured_response_headers # => ["Apache", "230", "", "", "http://www.sytadin."]
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.0.1
@@ -0,0 +1,19 @@
1
+ Gem::Specification.new do |s|
2
+ s.name = 'haproxy_log_parser'
3
+ s.version = IO.read('VERSION').chomp
4
+ s.authors = ['Toby Hsieh']
5
+ s.homepage = 'https://github.com/tobyhs/haproxy_log_parser'
6
+ s.summary = 'Parser for HAProxy logs in the HTTP log format'
7
+ s.description = s.summary
8
+
9
+ s.add_dependency 'treetop'
10
+
11
+ s.add_development_dependency 'rspec'
12
+
13
+ s.files = Dir.glob('lib/**/*') + [
14
+ 'README.rdoc',
15
+ 'VERSION',
16
+ 'haproxy_log_parser.gemspec'
17
+ ]
18
+ s.test_files = Dir.glob('spec/**/*')
19
+ end
@@ -0,0 +1,92 @@
1
+ require 'treetop'
2
+
3
+ require 'haproxy_log_parser/entry'
4
+
5
+ Treetop.load(File.expand_path('haproxy_log_parser/line.treetop', File.dirname(__FILE__)))
6
+
7
+ module HAProxyLogParser
8
+ VERSION = IO.read(File.join(File.dirname(__FILE__), '..', 'VERSION')).chomp.freeze
9
+
10
+ @parser = LineParser.new
11
+
12
+ class << self
13
+ # Returns an Entry object resulting from the given HAProxy HTTP-format log
14
+ # +line+, or +nil+ if the +line+ appears to be invalid.
15
+ #
16
+ # @param [String] line a line from an HAProxy log
17
+ # @return [Entry, nil]
18
+ def parse(line)
19
+ result = @parser.parse(line)
20
+ return nil unless result
21
+
22
+ entry = Entry.new
23
+ [
24
+ :client_ip, :frontend_name, :backend_name, :server_name,
25
+ :termination_state
26
+ ].each do |field|
27
+ entry.send("#{field}=", result.send(field).text_value)
28
+ end
29
+ [
30
+ :client_port, :tq, :tw, :tc, :tr, :tt, :status_code, :bytes_read,
31
+ :actconn, :feconn, :beconn, :srv_conn, :retries, :srv_queue,
32
+ :backend_queue
33
+ ].each do |field|
34
+ entry.send("#{field}=", result.send(field).text_value.to_i)
35
+ end
36
+
37
+ entry.accept_date = parse_accept_date(result.accept_date.text_value)
38
+ [:captured_request_cookie, :captured_response_cookie].each do |field|
39
+ cookie = decode_captured_cookie(result.send(field).text_value)
40
+ entry.send("#{field}=", cookie)
41
+ end
42
+ [:captured_request_headers, :captured_response_headers].each do |field|
43
+ headers = decode_captured_headers(result.send(field).text_value)
44
+ entry.send("#{field}=", headers)
45
+ end
46
+ entry.http_request = unescape(result.http_request.text_value)
47
+
48
+ entry
49
+ end
50
+
51
+ # Returns the given string un-escaped. See the "Logging > Non-printable
52
+ # characters" section in HAProxy documentation.
53
+ #
54
+ # @param [String] string
55
+ # @return [String]
56
+ def unescape(string)
57
+ string.gsub(/#[[:xdigit:]]{2}/) do |match|
58
+ match[1..-1].to_i(16).chr
59
+ end
60
+ end
61
+
62
+ # Converts the value of an accept_date field to a Time object.
63
+ #
64
+ # @param [String] string
65
+ # @return [Time]
66
+ def parse_accept_date(string)
67
+ parts = string.split(/[\/:.]/)
68
+ Time.local(*parts.values_at(2, 1, 0, 3..6))
69
+ end
70
+
71
+ # Converts a captured cookie string to a Hash.
72
+ #
73
+ # @param [String] string
74
+ # @return [Hash{String => String}]
75
+ def decode_captured_cookie(string)
76
+ if string == '-'
77
+ {}
78
+ else
79
+ key, value = string.split('=', 2)
80
+ {unescape(key) => unescape(value)}
81
+ end
82
+ end
83
+
84
+ # Converts a captured headers string to an Array.
85
+ #
86
+ # @param [String] string
87
+ # @return [Array<String>]
88
+ def decode_captured_headers(string)
89
+ string.split('|', -1).map! { |header| unescape(header) }
90
+ end
91
+ end
92
+ end
@@ -0,0 +1,84 @@
1
+ module HAProxyLogParser
2
+ # An instance of this class represents a line/entry of an HAProxy log in the
3
+ # HTTP format. See the "Logging > Log formats > HTTP log format" section in
4
+ # HAProxy's configuration.txt for documentation of fields/attributes.
5
+ class Entry
6
+ # @return [String]
7
+ attr_accessor :client_ip
8
+
9
+ # @return [Integer]
10
+ attr_accessor :client_port
11
+
12
+ # @return [Time]
13
+ attr_accessor :accept_date
14
+
15
+ # @return [String]
16
+ attr_accessor :frontend_name
17
+
18
+ # @return [String]
19
+ attr_accessor :backend_name
20
+
21
+ # @return [String]
22
+ attr_accessor :server_name
23
+
24
+ # @return [Integer]
25
+ attr_accessor :tq
26
+
27
+ # @return [Integer]
28
+ attr_accessor :tw
29
+
30
+ # @return [Integer]
31
+ attr_accessor :tc
32
+
33
+ # @return [Integer]
34
+ attr_accessor :tr
35
+
36
+ # @return [Integer]
37
+ attr_accessor :tt
38
+
39
+ # @return [Integer]
40
+ attr_accessor :status_code
41
+
42
+ # @return [Integer]
43
+ attr_accessor :bytes_read
44
+
45
+ # @return [Hash{String => String}]
46
+ attr_accessor :captured_request_cookie
47
+
48
+ # @return [Hash{String => String}]
49
+ attr_accessor :captured_response_cookie
50
+
51
+ # @return [String]
52
+ attr_accessor :termination_state
53
+
54
+ # @return [Integer]
55
+ attr_accessor :actconn
56
+
57
+ # @return [Integer]
58
+ attr_accessor :feconn
59
+
60
+ # @return [Integer]
61
+ attr_accessor :beconn
62
+
63
+ # @return [Integer]
64
+ attr_accessor :srv_conn
65
+
66
+ # @return [Integer]
67
+ attr_accessor :retries
68
+
69
+ # @return [Integer]
70
+ attr_accessor :srv_queue
71
+
72
+ # @return [Integer]
73
+ attr_accessor :backend_queue
74
+
75
+ # @return [Array<String>]
76
+ attr_accessor :captured_request_headers
77
+
78
+ # @return [Array<String>]
79
+ attr_accessor :captured_response_headers
80
+
81
+ # @return [String]
82
+ attr_accessor :http_request
83
+ end
84
+ end
@@ -0,0 +1,58 @@
1
+ module HAProxyLogParser
2
+ grammar Line
3
+ rule line
4
+ syslog_portion:([^\[]+ '[' integer ']: ')
5
+ client_ip:ip4_address ':' client_port:integer ' '
6
+ '[' accept_date '] '
7
+ frontend_name:proxy_name ' '
8
+ backend_name:proxy_name '/' server_name ' '
9
+ tq:integer '/' tw:integer '/' tc:integer '/' tr:integer '/' tt:integer ' '
10
+ status_code:integer ' '
11
+ bytes_read:integer ' '
12
+ captured_request_cookie:([^ ]+) ' '
13
+ captured_response_cookie:([^ ]+) ' '
14
+ termination_state ' '
15
+ actconn:integer '/' feconn:integer '/' beconn:integer '/'
16
+ srv_conn:integer '/' retries:integer ' '
17
+ srv_queue:integer '/' backend_queue:integer ' '
18
+ '{' captured_request_headers:captured_headers '} '
19
+ '{' captured_response_headers:captured_headers '} '
20
+ '"' http_request:[^"]+ '"'
21
+ "\n"?
22
+ end
23
+
24
+ rule integer
25
+ ('-' / '+')? [0-9]+
26
+ end
27
+
28
+ rule time
29
+ ([0-9] 2..2 ':') 2..2 ([0-9] 2..2)
30
+ end
31
+
32
+ rule ip4_address
33
+ ([0-9] 1..3 '.') 3..3 ([0-9] 1..3)
34
+ end
35
+
36
+ rule accept_date
37
+ [0-9] 2..2 '/' [A-Z] [a-z] 2..2 '/' [0-9]+ ':' time '.' ([0-9] 3..3)
38
+ end
39
+
40
+ rule proxy_name
41
+ [-_A-Za-z0-9.:]+
42
+ end
43
+
44
+ rule server_name
45
+ proxy_name / '<NOSRV>' / '<STATS>'
46
+ end
47
+
48
+ rule termination_state
49
+ [-CSPRIcs] [-RQCHDLT] [-NIDVEO] [-NIUPRD]
50
+ end
51
+
52
+ rule captured_headers
53
+ [^}]*
54
+ end
55
+ end
56
+ end
57
+
58
+ # vim:ai
@@ -0,0 +1,72 @@
1
+ require 'haproxy_log_parser'
2
+
3
+ describe HAProxyLogParser do
4
+ # TODO Use something better instead of LINES[0], LINES[1], ...
5
+ LINES = IO.readlines(File.join(File.dirname(__FILE__), 'sample.log'))
6
+
7
+ describe '.parse' do
8
+ it 'parses LINE[0] correctly' do
9
+ entry = HAProxyLogParser.parse(LINES[0])
10
+ entry.client_ip.should == '10.0.8.2'
11
+ entry.client_port.should == 34028
12
+ entry.accept_date.should == Time.local(2011, 8, 9, 20, 30, 46, 429)
13
+ entry.frontend_name.should == 'proxy-out'
14
+ entry.backend_name.should == 'proxy-out'
15
+ entry.server_name.should == 'cache1'
16
+ entry.tq.should == 1
17
+ entry.tw.should == 0
18
+ entry.tc.should == 2
19
+ entry.tr.should == 126
20
+ entry.tt.should == 128
21
+ entry.status_code.should == 301
22
+ entry.bytes_read.should == 223
23
+ entry.captured_request_cookie.should == {}
24
+ entry.captured_response_cookie.should == {}
25
+ entry.termination_state.should == '----'
26
+ entry.actconn.should == 617
27
+ entry.feconn.should == 523
28
+ entry.beconn.should == 336
29
+ entry.srv_conn.should == 168
30
+ entry.retries.should == 0
31
+ entry.srv_queue.should == 0
32
+ entry.backend_queue.should == 0
33
+ entry.captured_request_headers.should == ['www.sytadin.equipement.gouv.fr', '', 'http://trafic.1wt.eu/']
34
+ entry.captured_response_headers.should == ['Apache', '230', '', '', 'http://www.sytadin.']
35
+ entry.http_request.should == 'GET http://www.sytadin.equipement.gouv.fr/ HTTP/1.1'
36
+ end
37
+
38
+ it 'parses LINES[1] correctly' do
39
+ entry = HAProxyLogParser.parse(LINES[1])
40
+ entry.client_ip.should == '192.168.1.215'
41
+ entry.client_port.should == 50679
42
+ entry.accept_date.should == Time.local(2012, 5, 21, 1, 35, 46, 146)
43
+ entry.frontend_name.should == 'webapp'
44
+ entry.backend_name.should == 'webapp_backend'
45
+ entry.server_name.should == 'web09'
46
+ entry.tq.should == 27
47
+ entry.tw.should == 0
48
+ entry.tc.should == 1
49
+ entry.tr.should == 0
50
+ entry.tt.should == 217
51
+ entry.status_code.should == 200
52
+ entry.bytes_read.should == 1367
53
+ entry.captured_request_cookie.should == {'session' => 'abc'}
54
+ entry.captured_response_cookie.should == {'session' => 'xyz'}
55
+ entry.termination_state.should == '----'
56
+ entry.actconn.should == 600
57
+ entry.feconn.should == 529
58
+ entry.beconn.should == 336
59
+ entry.srv_conn.should == 158
60
+ entry.retries.should == 0
61
+ entry.srv_queue.should == 0
62
+ entry.backend_queue.should == 0
63
+ entry.captured_request_headers.should == ['|| {5F41}', 'http://google.com/', '']
64
+ entry.captured_response_headers.should == ['1270925568', '', '']
65
+ entry.http_request.should == 'GET /images/image.gif HTTP/1.1'
66
+ end
67
+
68
+ it 'returns nil if the line is invalid' do
69
+ HAProxyLogParser.parse('asdf jkl;').should be_nil
70
+ end
71
+ end
72
+ end
data/spec/sample.log ADDED
@@ -0,0 +1,2 @@
1
+ Aug 9 20:30:46 localhost haproxy[2022]: 10.0.8.2:34028 [09/Aug/2011:20:30:46.429] proxy-out proxy-out/cache1 1/0/2/126/+128 301 +223 - - ---- 617/523/336/168/0 0/0 {www.sytadin.equipement.gouv.fr||http://trafic.1wt.eu/} {Apache|230|||http://www.sytadin.} "GET http://www.sytadin.equipement.gouv.fr/ HTTP/1.1"
2
+ May 21 01:35:46 10.18.237.5 haproxy[26747]: 192.168.1.215:50679 [21/May/2012:01:35:46.146] webapp webapp_backend/web09 27/0/1/0/217 200 1367 session=abc session=xyz ---- 600/529/336/158/0 0/0 {#7C#7C #7B5F41#7D|http://google.com/|} {1270925568||} "GET /images/image.gif HTTP/1.1"
metadata ADDED
@@ -0,0 +1,102 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: haproxy_log_parser
3
+ version: !ruby/object:Gem::Version
4
+ hash: 29
5
+ prerelease: false
6
+ segments:
7
+ - 0
8
+ - 0
9
+ - 1
10
+ version: 0.0.1
11
+ platform: ruby
12
+ authors:
13
+ - Toby Hsieh
14
+ autorequire:
15
+ bindir: bin
16
+ cert_chain: []
17
+
18
+ date: 2012-12-19 00:00:00 -08:00
19
+ default_executable:
20
+ dependencies:
21
+ - !ruby/object:Gem::Dependency
22
+ name: treetop
23
+ prerelease: false
24
+ requirement: &id001 !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ">="
28
+ - !ruby/object:Gem::Version
29
+ hash: 3
30
+ segments:
31
+ - 0
32
+ version: "0"
33
+ type: :runtime
34
+ version_requirements: *id001
35
+ - !ruby/object:Gem::Dependency
36
+ name: rspec
37
+ prerelease: false
38
+ requirement: &id002 !ruby/object:Gem::Requirement
39
+ none: false
40
+ requirements:
41
+ - - ">="
42
+ - !ruby/object:Gem::Version
43
+ hash: 3
44
+ segments:
45
+ - 0
46
+ version: "0"
47
+ type: :development
48
+ version_requirements: *id002
49
+ description: Parser for HAProxy logs in the HTTP log format
50
+ email:
51
+ executables: []
52
+
53
+ extensions: []
54
+
55
+ extra_rdoc_files: []
56
+
57
+ files:
58
+ - lib/haproxy_log_parser/entry.rb
59
+ - lib/haproxy_log_parser/line.treetop
60
+ - lib/haproxy_log_parser.rb
61
+ - README.rdoc
62
+ - VERSION
63
+ - haproxy_log_parser.gemspec
64
+ - spec/haproxy_log_parser_spec.rb
65
+ - spec/sample.log
66
+ has_rdoc: true
67
+ homepage: https://github.com/tobyhs/haproxy_log_parser
68
+ licenses: []
69
+
70
+ post_install_message:
71
+ rdoc_options: []
72
+
73
+ require_paths:
74
+ - lib
75
+ required_ruby_version: !ruby/object:Gem::Requirement
76
+ none: false
77
+ requirements:
78
+ - - ">="
79
+ - !ruby/object:Gem::Version
80
+ hash: 3
81
+ segments:
82
+ - 0
83
+ version: "0"
84
+ required_rubygems_version: !ruby/object:Gem::Requirement
85
+ none: false
86
+ requirements:
87
+ - - ">="
88
+ - !ruby/object:Gem::Version
89
+ hash: 3
90
+ segments:
91
+ - 0
92
+ version: "0"
93
+ requirements: []
94
+
95
+ rubyforge_project:
96
+ rubygems_version: 1.3.7
97
+ signing_key:
98
+ specification_version: 3
99
+ summary: Parser for HAProxy logs in the HTTP log format
100
+ test_files:
101
+ - spec/haproxy_log_parser_spec.rb
102
+ - spec/sample.log