logfile_interval 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,3 @@
1
+ module LogfileInterval
2
+ VERSION = "1.0.0"
3
+ end
@@ -0,0 +1,25 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'logfile_interval/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "logfile_interval"
8
+ spec.version = LogfileInterval::VERSION
9
+ spec.authors = ["Philippe Le Rohellec"]
10
+ spec.email = ["philippe@lerohellec.com"]
11
+ spec.description = %q{Logfile parser and aggregator}
12
+ spec.summary = %q{Aggregate logfile data into intervals}
13
+ spec.homepage = ""
14
+ spec.license = "MIT"
15
+
16
+ spec.files = `git ls-files`.split($/)
17
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
18
+ spec.test_files = spec.files.grep(%r{^spec/})
19
+ spec.require_paths = ["lib"]
20
+
21
+ spec.add_development_dependency "bundler", "~> 1.3"
22
+ spec.add_development_dependency(%q<debugger>, [">= 0"])
23
+ spec.add_development_dependency(%q<rspec>, ["~> 2.14.0"])
24
+ spec.add_development_dependency "rake"
25
+ end
@@ -0,0 +1,49 @@
1
+ require 'spec_helper'
2
+ require File.join(File.dirname(__FILE__), '..', 'support/lib/timing_log')
3
+
4
+ module LogfileInterval
5
+ data_dir = File.join(File.dirname(__FILE__), '..', 'support/logfiles')
6
+
7
+ describe IntervalBuilder do
8
+ before :each do
9
+ @logfiles = ["#{data_dir}/timing.log", "#{data_dir}/timing.log.1" ]
10
+ @set = LogfileSet.new(@logfiles, LineParser::TimingLog)
11
+ @builder = IntervalBuilder.new(@set, 300)
12
+ end
13
+
14
+ context :each_interval do
15
+ before :each do
16
+ Time.stub(:now).and_return(Time.new(2013,12,01,16,0,0,'-08:00'))
17
+ @intervals = []
18
+ @builder.each_interval do |interval|
19
+ @intervals << interval
20
+ end
21
+ end
22
+
23
+ it 'finds intervals from all logfiles' do
24
+ @intervals.size.should == 2
25
+ end
26
+
27
+ context 'first interval' do
28
+ it 'got records from both logfiles' do
29
+ @intervals.first.size.should == 4
30
+ @intervals.first[:total_time].should == 700.0/4
31
+ @intervals.first[:num_bytes].should == 52000
32
+ @intervals.first[:action]['posts#index'] = 1
33
+ @intervals.first[:action]['posts#create'] = 1
34
+ @intervals.first[:action]['posts#show'] = 2
35
+ end
36
+ end
37
+
38
+ context 'second interval' do
39
+ it 'got records from second logfile only' do
40
+ @intervals.last.size.should == 2
41
+ @intervals.last[:total_time].should == 300
42
+ @intervals.last[:num_bytes].should == 41000
43
+ @intervals.last[:action]['posts#index'] = 1
44
+ @intervals.last[:action]['posts#show'] = 1
45
+ end
46
+ end
47
+ end
48
+ end
49
+ end
@@ -0,0 +1,73 @@
1
+ require 'spec_helper'
2
+ require File.join(File.dirname(__FILE__), '..', 'support/lib/timing_log')
3
+
4
+ module LogfileInterval
5
+ data_dir = File.join(File.dirname(__FILE__), '..', 'support/logfiles')
6
+
7
+ describe Interval do
8
+ it 'gets instantiated with empty data' do
9
+ end_time = Time.new(2013, 12, 01, 16, 00, 00, '-08:00')
10
+ interval = Interval.new(end_time, 300, LineParser::TimingLog)
11
+ interval.size.should == 0
12
+ interval[:total_time].should == 0
13
+ interval[:num_bytes].should == 0
14
+ interval[:action].should be_a(Hash)
15
+ end
16
+
17
+ context :add_record do
18
+ before :each do
19
+ @end_time = Time.new(2013, 12, 01, 16, 00, 00, '-08:00')
20
+ @length = 300
21
+ @interval = Interval.new(@end_time, @length, LineParser::TimingLog)
22
+ end
23
+
24
+ it 'rejects record out of interval' do
25
+ oor_record = LineParser::TimingLog.create_record('1385942450, posts#index, 100, 20000')
26
+ lambda { @interval.add_record(oor_record) }.should raise_error(Interval::OutOfRange)
27
+ end
28
+
29
+ it 'rejects record at interval start_time' do
30
+ oor_record = LineParser::TimingLog.create_record('1385942100, posts#index, 100, 20000')
31
+ lambda { @interval.add_record(oor_record) }.should raise_error(Interval::OutOfRange)
32
+ end
33
+
34
+ it 'adds 1 record to interval' do
35
+ record1 = LineParser::TimingLog.create_record('1385942400, posts#index, 100, 20000')
36
+ @interval.add_record(record1)
37
+
38
+ @interval.size.should == 1
39
+ @interval[:total_time].should == 100
40
+ @interval[:num_bytes].should == 20000
41
+ @interval[:action]['posts#index'].should == 1
42
+ end
43
+
44
+ context '3 records' do
45
+ before :each do
46
+ record1 = LineParser::TimingLog.create_record('1385942400, posts#index, 100, 20000')
47
+ @interval.add_record(record1)
48
+ record2 = LineParser::TimingLog.create_record('1385942300, posts#show, 50, 10000')
49
+ @interval.add_record(record2)
50
+ record3 = LineParser::TimingLog.create_record('1385942200, posts#show, 60, 12000')
51
+ @interval.add_record(record3)
52
+ end
53
+
54
+ it 'increments size' do
55
+ @interval.size.should == 3
56
+ end
57
+
58
+ it 'averages columns with average agg_function' do
59
+ @interval[:total_time].should == 70
60
+ end
61
+
62
+ it 'sums up columns with sum agg_function' do
63
+ @interval[:num_bytes].should == 42000
64
+ end
65
+
66
+ it 'groups and counts columns with group agg_function' do
67
+ @interval[:action]['posts#index'].should == 1
68
+ @interval[:action]['posts#show'].should == 2
69
+ end
70
+ end
71
+ end
72
+ end
73
+ end
@@ -0,0 +1,47 @@
1
+ require 'spec_helper'
2
+
3
+ module LogfileInterval
4
+ module LineParser
5
+ module Aggregator
6
+ describe Aggregator do
7
+ it 'finds the aggregator class' do
8
+ Aggregator.klass(:sum).should == Sum
9
+ Aggregator.klass(:average).should == Average
10
+ Aggregator.klass(:group).should == Group
11
+ end
12
+ end
13
+
14
+ describe Sum do
15
+ it 'sums up values' do
16
+ sum = Sum.new
17
+ sum.add(3)
18
+ sum.add(5)
19
+ sum.value.should == 8
20
+ end
21
+ end
22
+
23
+ describe Average do
24
+ it 'averages values' do
25
+ sum = Average.new
26
+ sum.add(3)
27
+ sum.add(5)
28
+ sum.value.should == 4
29
+ end
30
+ end
31
+
32
+ describe Group do
33
+ it 'groups values and increment counters' do
34
+ g = Group.new
35
+ g.add('200')
36
+ g.add('500')
37
+ g.add('301')
38
+ g.add('200')
39
+ g.value.should be_a(Hash)
40
+ g.value.should include({'200' => 2})
41
+ g.value.should include({'301' => 1})
42
+ g.value.should include({'500' => 1})
43
+ end
44
+ end
45
+ end
46
+ end
47
+ end
@@ -0,0 +1,84 @@
1
+ require 'spec_helper'
2
+
3
+ module LogfileInterval
4
+ data_dir = File.join(File.dirname(__FILE__), '..', 'support/logfiles')
5
+
6
+ module LineParser
7
+
8
+ describe Base do
9
+ before :each do
10
+ @line = '74.75.19.145 - - [31/Mar/2013:06:54:12 -0700] "GET /ppa/google_chrome HTTP/1.1" 200 7855 "https://www.google.com/" "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.22 (KHTML, like Gecko) Ubuntu Chromium/25.0.1364.160 Chrome/25.0.1364.160 Safari/537.22"'
11
+ end
12
+
13
+ it 'parses an access.log line' do
14
+ parsed_line = AccessLog.new(@line)
15
+ parsed_line.ip.should == '74.75.19.145'
16
+ parsed_line.length.should == 7855
17
+ parsed_line.timestamp.should == '31/Mar/2013:06:54:12 -0700'
18
+ parsed_line.time.should == Time.strptime('31/Mar/2013:06:54:12 -0700', '%d/%b/%Y:%H:%M:%S %z')
19
+ end
20
+
21
+ it 'returns an invalid record if line is malformed' do
22
+ line = 'abcdef'
23
+ record = 'unset'
24
+ lambda { record = AccessLog.new(line) }.should_not raise_error
25
+ record.valid?.should be_false
26
+ end
27
+
28
+ context :create_record do
29
+
30
+ it 'instanciates a new AccessLog object' do
31
+ record = AccessLog.create_record(@line)
32
+ record.should be_a(AccessLog)
33
+ record.ip.should == '74.75.19.145'
34
+ end
35
+
36
+ it 'returns nil if line is malformed' do
37
+ line = 'abcdef'
38
+ record = AccessLog.create_record(line)
39
+ record.should be_nil
40
+ end
41
+ end
42
+ end
43
+
44
+ describe 'Broken parsers' do
45
+ class NoRegexLog < Base
46
+ add_column :name => 'ip', :pos => 1, :agg_function => :group
47
+ end
48
+
49
+ class NoColumnLog < Base
50
+ set_regex /^([\d\.]+)\s+\S+\s+\S+\s+\[(\d\d.*\d\d)\]\s+"(?:GET|POST|PUT|HEAD|DELETE)\s+(\S+)\s+HTTP\S+"\s+(\d+)\s+(\d+)\s+"([^"]*)"\s+"([^"]+)"$/
51
+ end
52
+
53
+ before :each do
54
+ @line = '74.75.19.145 - - [31/Mar/2013:06:54:12 -0700] "GET /ppa/google_chrome HTTP/1.1" 200 7855 "https://www.google.com/" "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.22 (KHTML, like Gecko) Ubuntu Chromium/25.0.1364.160 Chrome/25.0.1364.160 Safari/537.22"'
55
+ end
56
+
57
+ it 'must fail unless a regex is set' do
58
+ lambda { NoRegexLog.new(@line) }.should raise_error ConfigurationError
59
+ end
60
+
61
+ it 'must fail unless a column is configured'do
62
+ lambda { NoColumnLog.new(@line) }.should raise_error ConfigurationError
63
+ end
64
+ end
65
+
66
+ describe TimingLog do
67
+ before :each do
68
+ # 1385942400 = 2013/12/01 16:00:00
69
+ @line = '1385942400, posts#index, 100, 20000'
70
+ end
71
+
72
+ it 'parses a timing line' do
73
+ record = TimingLog.create_record(@line)
74
+ record.should_not be_nil
75
+ record.time.should == Time.new(2013, 12, 01, 16, 00, 00, '-08:00')
76
+ record.action.should == 'posts#index'
77
+ record.total_time.should == 100
78
+ record.num_bytes.should == 20000
79
+ end
80
+ end
81
+ end
82
+ end
83
+
84
+
@@ -0,0 +1,39 @@
1
+ require 'spec_helper'
2
+ require File.join(File.dirname(__FILE__), '..', 'support/lib/access_log')
3
+
4
+ module LogfileInterval
5
+ data_dir = File.join(File.dirname(__FILE__), '..', 'support/logfiles')
6
+
7
+ describe LogfileSet do
8
+ before :each do
9
+ @logfiles = ["#{data_dir}/access.log.2", "#{data_dir}/access.log.1"]
10
+ @set = LogfileSet.new(@logfiles, LineParser::AccessLog)
11
+ end
12
+
13
+ it 'ordered_filenames should return the most recent file first' do
14
+ @set.ordered_filenames.should == @logfiles.reverse
15
+ end
16
+
17
+ it 'each_line should enumerate each line in file backwards' do
18
+ lines = []
19
+ @set.each_line do |line|
20
+ lines << line
21
+ end
22
+
23
+ lines.first.should == '66.249.67.176 - - [23/Jun/2013:17:00:01 -0800] "GET /package/core/raring/universe/proposed/openldap HTTP/1.1" 200 185 "-" "Google"'
24
+ lines.last.should == '12.24.48.96 - - [23/Jun/2013:16:49:00 -0800] "GET /package/core/raring/universe/proposed/bash HTTP/1.1" 200 4555 "-" "Bing)"'
25
+ end
26
+
27
+ it 'each_parsed_line should enumerate each line backwards' do
28
+ records = []
29
+ @set.each_parsed_line do |record|
30
+ records << record
31
+ end
32
+
33
+ records.first.time.should == Time.new(2013, 06, 23, 17, 00, 01, '-08:00')
34
+ records.first.code.should == '200'
35
+ records.last.time.should == Time.new(2013, 06, 23, 16, 49, 00, '-08:00')
36
+ records.last.code.should == '200'
37
+ end
38
+ end
39
+ end
@@ -0,0 +1,39 @@
1
+ require 'spec_helper'
2
+ require File.join(File.dirname(__FILE__), '..', 'support/lib/access_log')
3
+
4
+ module LogfileInterval
5
+ data_dir = File.join(File.dirname(__FILE__), '..', 'support/logfiles')
6
+
7
+ describe Logfile do
8
+ before :each do
9
+ @alf = Logfile.new("#{data_dir}/access.log", LineParser::AccessLog)
10
+ end
11
+
12
+ it 'first_timestamp returns time of first line in file' do
13
+ #01/Jan/2012:00:57:47 -0800
14
+ @alf.first_timestamp.should == Time.new(2012, 01, 01, 00, 57, 47, '-08:00')
15
+ end
16
+
17
+ it 'each_line should enumerate each line in file backwards' do
18
+ lines = []
19
+ @alf.each_line do |line|
20
+ lines << line
21
+ end
22
+
23
+ lines.first.should == '78.54.172.146 - - [01/Jan/2012:16:30:51 -0800] "GET /package/core/oneiric/main/base/abrowser-6.0 HTTP/1.1" 200 6801 "http://www.google.com/url?sa=t&rct=j&q=abrowser 6.0&esrc=s&source=web&cd=4&sqi=2&ved=0CDYQFjAD&url=http%3A%2F%2Fwww.ubuntuupdates.org%2Fpackages%2Fshow%2F268762&ei=s-QlT8vJFon1sgb54unBDg&usg=AFQjCNHCHC0bxTf6aXAfUwT6Erjta6WLaQ&sig2=ceCi1odtaB8Vcf6IWg2a3w" "Mozilla/5.0 (Ubuntu; X11; Linux x86_64; rv:9.0.1) Gecko/20100101 Firefox/9.0.1"'
24
+ lines.last.should == '66.249.67.176 - - [01/Jan/2012:00:57:47 -0800] "GET /packages/show/1 HTTP/1.1" 301 185 "-" "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)"'
25
+ end
26
+
27
+ it 'each_parsed_line should enumerate each line backwards' do
28
+ records = []
29
+ @alf.each_parsed_line do |record|
30
+ records << record
31
+ end
32
+
33
+ records.first.time.should == Time.new(2012, 01, 01, 16, 30, 51, '-08:00')
34
+ records.first.code.should == '200'
35
+ records.last.time.should == Time.new(2012, 01, 01, 00, 57, 47, '-08:00')
36
+ records.last.code.should == '301'
37
+ end
38
+ end
39
+ end
@@ -0,0 +1,9 @@
1
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
2
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
3
+
4
+ require 'rspec'
5
+ require 'logfile_interval'
6
+
7
+ # Requires supporting ruby files with custom matchers and macros, etc,
8
+ # in spec/support/ and its subdirectories.
9
+ Dir["#{File.dirname(__FILE__)}/support/**/*.rb"].each {|f| require f}
@@ -0,0 +1,22 @@
1
+ module LogfileInterval
2
+ data_dir = File.join(File.dirname(__FILE__), '..', 'support/logfiles')
3
+
4
+ module LineParser
5
+
6
+ class AccessLog < Base
7
+ # Example line:
8
+ # 74.75.19.145 - - [31/Mar/2013:06:54:12 -0700] "GET /ppa/google_chrome HTTP/1.1" 200 7855 "https://www.google.com/" "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.22 (KHTML, like Gecko) Ubuntu Chromium/25.0.1364.160 Chrome/25.0.1364.160 Safari/537.22"
9
+
10
+ set_regex /^([\d\.]+)\s+\S+\s+\S+\s+\[(\d\d.*\d\d)\]\s+"(?:GET|POST|PUT|HEAD|DELETE)\s+(\S+)\s+HTTP\S+"\s+(\d+)\s+(\d+)\s+"([^"]*)"\s+"([^"]+)"$/
11
+
12
+ add_column :name => 'ip', :pos => 1, :agg_function => :group
13
+ add_column :name => 'timestamp', :pos => 2, :agg_function => :timestamp
14
+ add_column :name => 'code', :pos => 4, :agg_function => :group
15
+ add_column :name => 'length', :pos => 5, :agg_function => :average, :conversion => :integer
16
+
17
+ def time
18
+ Time.strptime(self.timestamp, '%d/%b/%Y:%H:%M:%S %z')
19
+ end
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,19 @@
1
+ module LogfileInterval
2
+ module LineParser
3
+ class TimingLog < Base
4
+ # Line format:
5
+ # timestamp, controller#action, total_time, bytes
6
+
7
+ set_regex /^(\d+),\s*(\w+#\w+),\s*(\d+),\s*(\d+)$/
8
+
9
+ add_column :name => :timestamp, :pos => 1, :agg_function => :timestamp
10
+ add_column :name => :action, :pos => 2, :agg_function => :group
11
+ add_column :name => :total_time, :pos => 3, :agg_function => :average, :conversion => :integer
12
+ add_column :name => :num_bytes, :pos => 4, :agg_function => :sum, :conversion => :integer
13
+
14
+ def time
15
+ Time.at(self.timestamp.to_i)
16
+ end
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,7 @@
1
+ 66.249.67.176 - - [01/Jan/2012:00:57:47 -0800] "GET /packages/show/1 HTTP/1.1" 301 185 "-" "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)"
2
+ 173.192.238.51 - - [01/Jan/2012:00:59:31 -0800] "GET /?dist=all+releases&noppa=2&format=atom HTTP/1.1" 301 185 "-" "Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.0.19; aggregator:Spinn3r (Spinn3r 3.1);"
3
+ 66.249.68.148 - - [01/Jan/2012:00:04:04 -0800] "GET /package/core/hardy/main/updates/libldap2-dev HTTP/1.1" 200 7058 "-" "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)"
4
+ 78.54.172.146 - - [01/Jan/2012:16:30:51 -0800] "GET /package/core/oneiric/main/base/abrowser HTTP/1.1" 200 6801 "http://www.google.com/url?sa=t&rct=j&q=abrowser&esrc=s&source=web&cd=4&sqi=2&ved=0CDYQFjAD&url=http%3A%2F%2Fwww.ubuntuupdates.org%2Fpackages%2Fshow%2F268762&ei=s-QlT8vJFon1sgb54unBDg&usg=AFQjCNHCHC0bxTf6aXAfUwT6Erjta6WLaQ&sig2=ceCi1odtaB8Vcf6IWg2a3w" "Mozilla/5.0 (Ubuntu; X11; Linux x86_64; rv:9.0.1) Gecko/20100101 Firefox/9.0.1"
5
+ 78.54.172.146 - - [01/Jan/2012:16:30:51 -0800] "GET /package/core/oneiric/main/base/firefox HTTP/1.1" 200 6801 "http://www.google.com/url?sa=t&rct=j&q=firefox&esrc=s&source=web&cd=4&sqi=2&ved=0CDYQFjAD&url=http%3A%2F%2Fwww.ubuntuupdates.org%2Fpackages%2Fshow%2F268762&ei=s-QlT8vJFon1sgb54unBDg&usg=AFQjCNHCHC0bxTf6aXAfUwT6Erjta6WLaQ&sig2=ceCi1odtaB8Vcf6IWg2a3w" "Mozilla/5.0 (Ubuntu; X11; Linux x86_64; rv:9.0.1) Gecko/20100101 Firefox/9.0.1"
6
+ 78.54.172.146 - - [01/Jan/2012:16:30:51 -0800] "GET /package/show/2 HTTP/1.1" 302 6801 "http://www.google.com/url?sa=t&rct=j&q=abrowser 6.0&esrc=s&source=web&cd=4&sqi=2&ved=0CDYQFjAD&url=http%3A%2F%2Fwww.ubuntuupdates.org%2Fpackages%2Fshow%2F268762&ei=s-QlT8vJFon1sgb54unBDg&usg=AFQjCNHCHC0bxTf6aXAfUwT6Erjta6WLaQ&sig2=ceCi1odtaB8Vcf6IWg2a3w" "Mozilla/5.0 (Ubuntu; X11; Linux x86_64; rv:9.0.1) Gecko/20100101 Firefox/9.0.1"
7
+ 78.54.172.146 - - [01/Jan/2012:16:30:51 -0800] "GET /package/core/oneiric/main/base/abrowser-6.0 HTTP/1.1" 200 6801 "http://www.google.com/url?sa=t&rct=j&q=abrowser 6.0&esrc=s&source=web&cd=4&sqi=2&ved=0CDYQFjAD&url=http%3A%2F%2Fwww.ubuntuupdates.org%2Fpackages%2Fshow%2F268762&ei=s-QlT8vJFon1sgb54unBDg&usg=AFQjCNHCHC0bxTf6aXAfUwT6Erjta6WLaQ&sig2=ceCi1odtaB8Vcf6IWg2a3w" "Mozilla/5.0 (Ubuntu; X11; Linux x86_64; rv:9.0.1) Gecko/20100101 Firefox/9.0.1"
@@ -0,0 +1,3 @@
1
+ 66.249.67.176 - - [23/Jun/2013:16:58:00 -0800] "GET /package/core/raring/universe/proposed/openldap HTTP/1.1" 200 185 "-" "Google"
2
+ 12.24.48.96 - - [23/Jun/2013:16:59:00 -0800] "GET /package/core/raring/universe/proposed/openldap HTTP/1.1" 200 4555 "-" "Bing)"
3
+ 66.249.67.176 - - [23/Jun/2013:17:00:01 -0800] "GET /package/core/raring/universe/proposed/openldap HTTP/1.1" 200 185 "-" "Google"
@@ -0,0 +1,3 @@
1
+ 12.24.48.96 - - [23/Jun/2013:16:49:00 -0800] "GET /package/core/raring/universe/proposed/bash HTTP/1.1" 200 4555 "-" "Bing)"
2
+ 66.249.67.176 - - [23/Jun/2013:16:56:00 -0800] "GET /package/core/raring/universe/proposed/openldap HTTP/1.1" 200 185 "-" "Google"
3
+