logfile_interval 1.0.0 → 1.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 304480b4bf3c9525f74bf9b835b3ec7fe72b64be
4
- data.tar.gz: ca5f34f5a987aca00b22ea838e083c744793c545
3
+ metadata.gz: e0f1a02f57605b2eb8eacf703a456dbe9038abcd
4
+ data.tar.gz: f1d68cd7e72dc15023f6c3f8ae72d7e2eb104102
5
5
  SHA512:
6
- metadata.gz: e36b8d0c42fbaafed596dcb5c2770450bf7e10ce68d92326f3f04eab197bbb4faa8ea8fa0f7706b55883fd2d65a876435ab689533222148151787c3c58861ba6
7
- data.tar.gz: 34bd857fd154d29703959693c7b33b0e15b4ab0824f97e84f4362aad1e009a332e8f0b1c96fdcde1321f3462666e3bd862c9f53cb15aab41807f6321346551b0
6
+ metadata.gz: 3bc11945565915aaa028f61f01b670340a2765640abf44aca2d8b1d36cb6a6b09c8626b7e3d964e3e3a7f0479893240defa0b600feec3f98e1f6c5176c5cda8a
7
+ data.tar.gz: cb91a1b9aad9bd67aa5fa1ccfa9a9f0d69cab5bbc177ecdfc4ac4f80d08c1edcf39cbf5df250c8251913ca50a2d4e9abf17e92335c41b9b10b17dc32bddf5cc5
data/.travis.yml ADDED
@@ -0,0 +1,5 @@
1
+ language: ruby
2
+ rvm:
3
+ - 1.9.3
4
+ - 2.0.0
5
+ script: bundle exec rspec spec
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- logfile_interval (0.0.1)
4
+ logfile_interval (1.1.0)
5
5
 
6
6
  GEM
7
7
  remote: https://rubygems.org/
@@ -14,6 +14,8 @@ GEM
14
14
  debugger-linecache (1.2.0)
15
15
  debugger-ruby_core_source (1.2.4)
16
16
  diff-lcs (1.2.5)
17
+ docile (1.1.1)
18
+ multi_json (1.8.2)
17
19
  rake (10.1.0)
18
20
  rspec (2.14.1)
19
21
  rspec-core (~> 2.14.0)
@@ -23,6 +25,11 @@ GEM
23
25
  rspec-expectations (2.14.4)
24
26
  diff-lcs (>= 1.1.3, < 2.0)
25
27
  rspec-mocks (2.14.4)
28
+ simplecov (0.8.2)
29
+ docile (~> 1.1.0)
30
+ multi_json
31
+ simplecov-html (~> 0.8.0)
32
+ simplecov-html (0.8.0)
26
33
 
27
34
  PLATFORMS
28
35
  ruby
@@ -33,3 +40,4 @@ DEPENDENCIES
33
40
  logfile_interval!
34
41
  rake
35
42
  rspec (~> 2.14.0)
43
+ simplecov
data/README.md CHANGED
@@ -1,6 +1,9 @@
1
- # LogfileInterval
1
+ # LogfileInterval [![Build Status](https://travis-ci.org/plerohellec/logfile_interval.png?branch=master)](https://travis-ci.org/plerohellec/logfile_interval)
2
2
 
3
- Logfile parser and aggregator
3
+ Logfile parser and aggregator.
4
+
5
+ It iterates over each line of logfiles, parses each line and aggregates all lines in a time interval into a single
6
+ record made up of the sum, the average, the number of occurences per value or average of the deltas between lines.
4
7
 
5
8
  ## Installation
6
9
 
@@ -19,6 +22,7 @@ Or install it yourself as:
19
22
  ## Usage
20
23
 
21
24
  ### Write a LineParser class
25
+ #### Example
22
26
  ```ruby
23
27
  module LogfileInterval
24
28
  module LineParser
@@ -28,10 +32,12 @@ module LogfileInterval
28
32
 
29
33
  set_regex /^([\d\.]+)\s+\S+\s+\S+\s+\[(\d\d.*\d\d)\]\s+"(?:GET|POST|PUT|HEAD|DELETE)\s+(\S+)\s+HTTP\S+"\s+(\d+)\s+(\d+)\s+"([^"]*)"\s+"([^"]+)"$/
30
34
 
31
- add_column :name => 'ip', :pos => 1, :agg_function => :group
32
- add_column :name => 'timestamp', :pos => 2, :agg_function => :timestamp
33
- add_column :name => 'code', :pos => 4, :agg_function => :group
34
- add_column :name => 'length', :pos => 5, :agg_function => :average, :conversion => :integer
35
+ add_column :name => 'ip', :pos => 1, :aggregator => :count, :group_by => 'ip'
36
+ add_column :name => 'timestamp', :pos => 2, :aggregator => :timestamp
37
+ add_column :name => 'code', :pos => 4, :aggregator => :count, :group_by => 'code'
38
+ add_column :name => 'code_by_ip', :pos => 4, :aggregator => :count, :group_by => 'ip'
39
+ add_column :name => 'length', :pos => 5, :aggregator => :average, :conversion => :integer
40
+ add_column :name => 'length_by_ip', :pos => 5, :aggregator => :average, :group_by => 'ip', :conversion => :integer
35
41
 
36
42
  def time
37
43
  Time.strptime(self.timestamp, '%d/%b/%Y:%H:%M:%S %z')
@@ -40,6 +46,29 @@ module LogfileInterval
40
46
  end
41
47
  end
42
48
  ```
49
+ #### Writing a parser class
50
+ The parser must define:
51
+ * A regex that extracts the fields out of each line.
52
+ * A set of columns that will to be parsed and aggregated in time intervals.
53
+ * A 'time' method that converts the mandatory timestamp field of a line into a Time object.
54
+
55
+ #### Attributes of a column:
56
+ * name: a parsed record will have a method with that name returning the value found at that position
57
+ * pos: the position of the captured field in the regex matched data
58
+ * aggregator : the aggregation mode for this field
59
+ * conversion: the parser will convert the field to an integer or a float when building the parsed record
60
+ * group_by: group_by value is the name of another field. The aggregator will apply the aggregator to this field for each distinct value found in the other field.
61
+
62
+ #### Aggregator types and options
63
+ * timestamp: the timestamp field will be used to determine to which interval the line belongs, each line MUST have a timestamp
64
+ * count: the aggregator will count the number of occurence of this field
65
+ * without the group_by option, it will just count the total number of lines (probably useless)
66
+ * with a group_by option pointing to the same field as the current one, it will count the number of occurence
67
+ per distinct value of this column
68
+ * with a group_by option pointing to another field, it will count the number of occurences of (this field, other field) pairs.
69
+ * average: the aggregator will calculate the average value of this field
70
+ * sum: the aggregator will add up the values of this field
71
+ * delta: the aggregator will caclculate the difference between each line and the next and will average all the deltas
43
72
 
44
73
  ### Iterate through lines of a single file
45
74
  And get a parsed record for each line.
@@ -58,8 +87,11 @@ log.each_parsed_line do |record|
58
87
  puts record.class # LineParser::AccessLog
59
88
  puts record.ip
60
89
  puts record.time
90
+ puts record.code
91
+ puts record.length
61
92
  end
62
93
  ```
94
+ **Note**: the Logfile iterators always start with the last line in the file and works its way backward.
63
95
 
64
96
  ### Iterate through lines of multiples files
65
97
  And get a parsed record for each line.
@@ -70,21 +102,25 @@ set.each_parsed_line do |record|
70
102
  puts record.class # LineParser::AccessLog
71
103
  end
72
104
  ```
105
+ **Note**: the LogfileSet iterators always starts with the most recent file.
106
+
73
107
  ### Aggregate lines into intervals
74
108
  ```ruby
75
109
  length = 5.minutes
76
- interval_builder = LogfileInterval::IntervalBuilder.new(logfiles, length)
110
+ interval_builder = LogfileInterval::IntervalBuilder.new(set, length)
77
111
  interval_builder.each_interval do |interval|
78
112
  puts interval.class # LogfileInterval::Interval
79
113
  puts interval.start_time
80
- puts interval.length
114
+ puts interval[:length]
81
115
  interval[:ip].each do |ip, count|
82
116
  puts "#{ip}, #{count}"
83
117
  end
118
+ interval[:length_by_ip].each do |ip, avg_length|
119
+ puts "#{ip}, #{avg_length}"
120
+ end
84
121
  end
85
122
  ```
86
123
 
87
-
88
124
  ## Contributing
89
125
 
90
126
  1. Fork it
data/docs/design.rb CHANGED
@@ -40,7 +40,7 @@ module LogfileInterval
40
40
 
41
41
  class AccessLog < Base
42
42
  set_regex /blah/
43
- add_column :name => :foo, :pos => 1, :conversion => integer, :agg_function => :average
43
+ add_column :name => :foo, :pos => 1, :conversion => integer, :aggregator => :average
44
44
 
45
45
  end
46
46
  end
data/docs/design3.rb CHANGED
@@ -6,8 +6,8 @@ module LogfileInterval
6
6
  end
7
7
 
8
8
  def add_column(name, options)
9
- aggregator = Aggregators.klass(agg_function)
10
- @columns[name] = { :pos => pos, :agg_function => aggregator, :conversion => conversion }
9
+ agg = Aggregators.klass(aggregator)
10
+ @columns[name] = { :pos => pos, :aggregator => agg, :conversion => conversion }
11
11
  define_method(name)
12
12
  end
13
13
 
@@ -26,7 +26,7 @@ module LogfileInterval
26
26
 
27
27
  class AccessLog < Base
28
28
  set_regex /blah/
29
- add_column :name => :foo, :pos => 1, :conversion => integer, :agg_function => :average
29
+ add_column :name => :foo, :pos => 1, :conversion => integer, :aggregator => :average
30
30
 
31
31
  def initialize(line)
32
32
  @data = self.class.parse(line)
@@ -34,8 +34,8 @@ module LogfileInterval
34
34
  end
35
35
 
36
36
  module Aggregator
37
- def self.klass(agg_function)
38
- case agg_function
37
+ def self.klass(aggregator)
38
+ case aggregator
39
39
  when :sum then Sum
40
40
  end
41
41
  end
@@ -54,7 +54,7 @@ module LogfileInterval
54
54
  end
55
55
  end
56
56
 
57
- class Group
57
+ class Count
58
58
  def initialize
59
59
  @val = Counter.new
60
60
  end
@@ -22,7 +22,7 @@ module LogfileInterval
22
22
  end
23
23
 
24
24
  def [](name)
25
- @data[name].value
25
+ @data[name].values
26
26
  end
27
27
 
28
28
  def add_record(record)
@@ -35,7 +35,8 @@ module LogfileInterval
35
35
 
36
36
  parser.columns.each do |name, options|
37
37
  next unless @data[name]
38
- @data[name].add(record[name])
38
+ group_by_value = record[options[:group_by]] if options[:group_by]
39
+ @data[name].add(record[name], group_by_value)
39
40
  end
40
41
  end
41
42
  end
@@ -1,59 +1,106 @@
1
1
  module LogfileInterval
2
2
  module LineParser
3
3
  module Aggregator
4
- def self.klass(agg_function)
5
- case agg_function
6
- when :sum then Sum
7
- when :average then Average
8
- when :group then Group
4
+ def self.klass(aggregator)
5
+ case aggregator
6
+ when :sum then Sum
7
+ when :average then Average
8
+ when :count then Count
9
+ when :group_and_count then GroupAndCount
10
+ when :delta then Delta
9
11
  end
10
12
  end
11
13
 
12
- class Sum
14
+ class Base
15
+ include Enumerable
16
+
13
17
  def initialize
14
- @val = 0
18
+ @val = Counter.new
19
+ @size = Counter.new
15
20
  end
16
21
 
17
- def add(value)
18
- @val += value
22
+ def value(group = nil)
23
+ average(key(group))
19
24
  end
20
25
 
21
- def value
22
- @val
26
+ def values
27
+ if single_value?
28
+ value
29
+ else
30
+ self.inject({}) { |h, v| h[v[0]] = v[1]; h }
31
+ end
23
32
  end
24
- end
25
33
 
26
- class Average
27
- def initialize
28
- @val = 0
29
- @size = 0
34
+ def add(value, group_by = nil)
35
+ raise NotImplementedError
30
36
  end
31
37
 
32
- def add(value)
33
- @val += value
34
- @size += 1
38
+ private
39
+ def key(group_by = nil)
40
+ group_by ? group_by : :all
35
41
  end
36
42
 
37
- def value
38
- if @size > 0
39
- @val.to_f / @size.to_f
40
- else
41
- 0
43
+ def single_value?
44
+ return true if @val.empty?
45
+ @val.keys.count == 1 && @val.keys.first == :all
46
+ end
47
+
48
+ def each
49
+ @val.each_key do |k|
50
+ yield k, average(k)
42
51
  end
43
52
  end
53
+
54
+ def average(k)
55
+ @size[k] > 0 ? @val[k].to_f / @size[k].to_f : 0
56
+ end
44
57
  end
45
58
 
46
- class Group
47
- def initialize
48
- @val = Counter.new
59
+ class Sum < Base
60
+ def add(value, group_by = nil)
61
+ @val.add(key(group_by), value)
62
+ @size.set(key(group_by), 1)
49
63
  end
64
+ end
50
65
 
51
- def add(value)
52
- @val.increment(value)
66
+ class Average < Base
67
+ def add(value, group_by = nil)
68
+ @val.add(key(group_by), value)
69
+ @size.increment(key(group_by))
53
70
  end
71
+ end
72
+
73
+ class Count < Base
74
+ def add(value, group_by = nil)
75
+ @val.add(key(group_by), 1)
76
+ @size.set(key(group_by), 1)
77
+ end
78
+ end
54
79
 
55
- def value
56
- @val
80
+ class GroupAndCount < Base
81
+ def each
82
+ @val.each { |k, v| yield k, v }
83
+ end
84
+
85
+ def add(value, group_by)
86
+ raise ArgumentError, 'group_by argument is mandatory for GroupAndCount#add' unless group_by
87
+ @val.increment_subkey(value, key(group_by))
88
+ @size.set(key(group_by), 1)
89
+ end
90
+ end
91
+
92
+ class Delta < Base
93
+ def initialize
94
+ @previous = Counter.new
95
+ super
96
+ end
97
+
98
+ def add(value, group_by = nil)
99
+ if @previous.has_key?(key(group_by))
100
+ @val.add(key(group_by), @previous[key(group_by)] - value)
101
+ @size.increment(key(group_by))
102
+ end
103
+ @previous.set(key(group_by), value)
57
104
  end
58
105
  end
59
106
  end
@@ -1,11 +1,13 @@
1
1
  module LogfileInterval
2
2
  module LineParser
3
- AGGREGATION_FUNCTIONS = [ :sum, :average, :timestamp, :group ]
3
+ AGGREGATION_FUNCTIONS = [ :sum, :average, :timestamp, :count, :delta ]
4
4
 
5
5
  class InvalidLine < StandardError; end
6
6
  class ConfigurationError < StandardError; end
7
7
 
8
8
  class Base
9
+ attr_reader :data
10
+
9
11
  class << self
10
12
  attr_reader :regex
11
13
 
@@ -20,14 +22,19 @@ module LogfileInterval
20
22
  def add_column(options)
21
23
  name = options.fetch(:name)
22
24
  pos = options.fetch(:pos)
23
- agg_function = options.fetch(:agg_function)
25
+ aggregator = options.fetch(:aggregator)
24
26
  conversion = options.fetch(:conversion, :string)
25
- unless AGGREGATION_FUNCTIONS.include?(agg_function)
26
- raise ArgumentError, "agg_function must be one of #{AGGREGATION_FUNCTIONS.join(', ')}"
27
+ unless AGGREGATION_FUNCTIONS.include?(aggregator)
28
+ raise ArgumentError, "aggregator must be one of #{AGGREGATION_FUNCTIONS.join(', ')}"
29
+ end
30
+
31
+ if aggregator == :count && options[:group_by] && options[:group_by] != name
32
+ aggregator = :group_and_count
27
33
  end
28
34
 
29
- aggregator = Aggregator.klass(agg_function)
35
+ aggregator = Aggregator.klass(aggregator)
30
36
  columns[name] = { :pos => pos, :aggregator => aggregator, :conversion => conversion }
37
+ columns[name][:group_by] = options[:group_by]
31
38
 
32
39
  define_method(name) do
33
40
  @data[name]
@@ -1,22 +1,43 @@
1
1
  module LogfileInterval
2
2
  module LineParser
3
3
  class Counter < Hash
4
- def increment(val)
5
- if self.has_key?(val)
6
- self[val] += 1
4
+ class ValueTypeError < StandardError; end
5
+
6
+ def increment(key)
7
+ if self.has_key?(key)
8
+ self[key] += 1
7
9
  else
8
- self[val] = 1
10
+ self[key] = 1
11
+ end
12
+ end
13
+
14
+ def increment_subkey(key, subkey)
15
+ if self.has_key?(key) && !self[key].is_a?(Counter)
16
+ raise "Value for #{key} is not a Counter"
17
+ end
18
+
19
+ unless self.has_key?(key)
20
+ self[key] = Counter.new
9
21
  end
22
+ self[key].increment(subkey)
10
23
  end
11
24
 
12
- def add(val, num)
13
- if self.has_key?(val)
14
- self[val] += num
25
+ def add(key, num)
26
+ if self.has_key?(key)
27
+ self[key] += num
15
28
  else
16
- self[val] = num
29
+ self[key] = num
17
30
  end
18
31
  end
19
32
 
33
+ def set(key, num)
34
+ self[key] = num
35
+ end
36
+
37
+ def [](key)
38
+ self.fetch(key, 0)
39
+ end
40
+
20
41
  def merge(c)
21
42
  c.keys.each do |k|
22
43
  self.add c[k]
@@ -10,8 +10,11 @@ module LogfileInterval
10
10
  def first_timestamp
11
11
  return nil unless File.exist?(@filename)
12
12
  File.open(@filename) do |f|
13
- line = parser.create_record(f.gets)
14
- line.time
13
+ while line = f.gets
14
+ if record = parser.create_record(line)
15
+ return record.time
16
+ end
17
+ end
15
18
  end
16
19
  end
17
20
 
@@ -1,3 +1,3 @@
1
1
  module LogfileInterval
2
- VERSION = "1.0.0"
2
+ VERSION = "1.1.0"
3
3
  end
@@ -8,9 +8,9 @@ Gem::Specification.new do |spec|
8
8
  spec.version = LogfileInterval::VERSION
9
9
  spec.authors = ["Philippe Le Rohellec"]
10
10
  spec.email = ["philippe@lerohellec.com"]
11
- spec.description = %q{Logfile parser and aggregator}
12
- spec.summary = %q{Aggregate logfile data into intervals}
13
- spec.homepage = ""
11
+ spec.description = "Logfile parser and aggregator"
12
+ spec.summary = "Aggregate logfile data into intervals"
13
+ spec.homepage = "https://github.com/plerohellec/logfile_interval"
14
14
  spec.license = "MIT"
15
15
 
16
16
  spec.files = `git ls-files`.split($/)
@@ -19,7 +19,8 @@ Gem::Specification.new do |spec|
19
19
  spec.require_paths = ["lib"]
20
20
 
21
21
  spec.add_development_dependency "bundler", "~> 1.3"
22
- spec.add_development_dependency(%q<debugger>, [">= 0"])
23
- spec.add_development_dependency(%q<rspec>, ["~> 2.14.0"])
22
+ spec.add_development_dependency "debugger", [">= 0"]
23
+ spec.add_development_dependency "rspec", ["~> 2.14.0"]
24
24
  spec.add_development_dependency "rake"
25
+ spec.add_development_dependency "simplecov"
25
26
  end
@@ -29,9 +29,9 @@ module LogfileInterval
29
29
  @intervals.first.size.should == 4
30
30
  @intervals.first[:total_time].should == 700.0/4
31
31
  @intervals.first[:num_bytes].should == 52000
32
- @intervals.first[:action]['posts#index'] = 1
33
- @intervals.first[:action]['posts#create'] = 1
34
- @intervals.first[:action]['posts#show'] = 2
32
+ @intervals.first[:rss].round(5).should == 0.60
33
+ @intervals.first[:ip].should == 4
34
+ @intervals.first[:action].should == 4
35
35
  end
36
36
  end
37
37
 
@@ -40,8 +40,9 @@ module LogfileInterval
40
40
  @intervals.last.size.should == 2
41
41
  @intervals.last[:total_time].should == 300
42
42
  @intervals.last[:num_bytes].should == 41000
43
- @intervals.last[:action]['posts#index'] = 1
44
- @intervals.last[:action]['posts#show'] = 1
43
+ @intervals.last[:rss].round(5).should == 0.20
44
+ @intervals.last[:ip].should == 2
45
+ @intervals.last[:action].should == 2
45
46
  end
46
47
  end
47
48
  end
@@ -11,43 +11,66 @@ module LogfileInterval
11
11
  interval.size.should == 0
12
12
  interval[:total_time].should == 0
13
13
  interval[:num_bytes].should == 0
14
- interval[:action].should be_a(Hash)
14
+ interval[:action].should == 0
15
+ interval[:ip].should == 0
15
16
  end
16
17
 
17
18
  context :add_record do
18
19
  before :each do
19
20
  @end_time = Time.new(2013, 12, 01, 16, 00, 00, '-08:00')
20
21
  @length = 300
21
- @interval = Interval.new(@end_time, @length, LineParser::TimingLog)
22
22
  end
23
23
 
24
- it 'rejects record out of interval' do
25
- oor_record = LineParser::TimingLog.create_record('1385942450, posts#index, 100, 20000')
26
- lambda { @interval.add_record(oor_record) }.should raise_error(Interval::OutOfRange)
27
- end
24
+ context 'basics' do
25
+ before :each do
26
+ @interval = Interval.new(@end_time, @length, LineParser::TimingLog)
27
+ end
28
+
29
+ it 'rejects record out of interval' do
30
+ oor_record = LineParser::TimingLog.create_record('1385942450, 192.168.0.5, posts#index, 100, 20000, 50.0')
31
+ lambda { @interval.add_record(oor_record) }.should raise_error(Interval::OutOfRange)
32
+ end
33
+
34
+ it 'rejects record at interval start_time' do
35
+ oor_record = LineParser::TimingLog.create_record('1385942100, 192.168.0.5, posts#index, 100, 20000, 50.0')
36
+ lambda { @interval.add_record(oor_record) }.should raise_error(Interval::OutOfRange)
37
+ end
38
+
39
+ it 'adds 1 record to interval' do
40
+ record1 = LineParser::TimingLog.create_record('1385942400, 192.168.0.5, posts#index, 100, 20000, 50.0')
41
+ @interval.add_record(record1)
28
42
 
29
- it 'rejects record at interval start_time' do
30
- oor_record = LineParser::TimingLog.create_record('1385942100, posts#index, 100, 20000')
31
- lambda { @interval.add_record(oor_record) }.should raise_error(Interval::OutOfRange)
43
+ @interval.size.should == 1
44
+ @interval[:total_time].should == 100
45
+ @interval[:num_bytes].should == 20000
46
+ @interval[:action].should == 1
47
+ @interval[:ip].should == 1
48
+ end
32
49
  end
33
50
 
34
- it 'adds 1 record to interval' do
35
- record1 = LineParser::TimingLog.create_record('1385942400, posts#index, 100, 20000')
36
- @interval.add_record(record1)
51
+ context 'with count and group by options' do
52
+ it 'creates an aggregator of type GroupAndCount' do
53
+ expect(LineParser::Aggregator::GroupAndCount).to receive(:new)
54
+ interval = Interval.new(@end_time, @length, LineParser::TimingLogWithGrouping)
55
+ end
37
56
 
38
- @interval.size.should == 1
39
- @interval[:total_time].should == 100
40
- @interval[:num_bytes].should == 20000
41
- @interval[:action]['posts#index'].should == 1
57
+ it 'add_record accepts key and subkey' do
58
+ interval = Interval.new(@end_time, @length, LineParser::TimingLogWithGrouping)
59
+ record1 = LineParser::TimingLogWithGrouping.create_record('1385942400, 192.168.0.5, posts#index, 100, 20000, 53.0')
60
+ interval.add_record(record1)
61
+ interval.size.should == 1
62
+ end
42
63
  end
43
64
 
44
- context '3 records' do
65
+ context 'with 3 records' do
45
66
  before :each do
46
- record1 = LineParser::TimingLog.create_record('1385942400, posts#index, 100, 20000')
67
+ @interval = Interval.new(@end_time, @length, LineParser::TimingLog)
68
+
69
+ record1 = LineParser::TimingLog.create_record('1385942400, 192.168.0.5, posts#index, 100, 20000, 53.0')
47
70
  @interval.add_record(record1)
48
- record2 = LineParser::TimingLog.create_record('1385942300, posts#show, 50, 10000')
71
+ record2 = LineParser::TimingLog.create_record('1385942300, 192.168.0.5, posts#show, 50, 10000, 51.0')
49
72
  @interval.add_record(record2)
50
- record3 = LineParser::TimingLog.create_record('1385942200, posts#show, 60, 12000')
73
+ record3 = LineParser::TimingLog.create_record('1385942200, 10.10.10.10, posts#show, 60, 12000, 50.0')
51
74
  @interval.add_record(record3)
52
75
  end
53
76
 
@@ -55,17 +78,72 @@ module LogfileInterval
55
78
  @interval.size.should == 3
56
79
  end
57
80
 
58
- it 'averages columns with average agg_function' do
81
+ it 'averages columns with average aggregator' do
59
82
  @interval[:total_time].should == 70
60
83
  end
61
84
 
62
- it 'sums up columns with sum agg_function' do
85
+ it 'sums up columns with sum aggregator' do
63
86
  @interval[:num_bytes].should == 42000
64
87
  end
65
88
 
66
- it 'groups and counts columns with group agg_function' do
67
- @interval[:action]['posts#index'].should == 1
68
- @interval[:action]['posts#show'].should == 2
89
+ it 'averages the delta columns with delta aggregator' do
90
+ @interval[:rss].should == 1.5
91
+ end
92
+
93
+ it 'counts columns with group aggregator' do
94
+ @interval[:ip].should == 3
95
+ @interval[:action].should == 3
96
+ end
97
+ end
98
+
99
+ context 'with group_by key' do
100
+ before :each do
101
+ @interval = Interval.new(@end_time, @length, LineParser::TimingLogWithGrouping)
102
+
103
+ record1 = LineParser::TimingLogWithGrouping.create_record('1385942400, 192.168.0.5, posts#index, 100, 20000, 53.0')
104
+ @interval.add_record(record1)
105
+ record2 = LineParser::TimingLogWithGrouping.create_record('1385942300, 192.168.0.5, posts#show, 50, 10000, 51.0')
106
+ @interval.add_record(record2)
107
+ record3 = LineParser::TimingLogWithGrouping.create_record('1385942200, 192.168.0.5, posts#show, 60, 12000, 50.0')
108
+ @interval.add_record(record3)
109
+ record4 = LineParser::TimingLogWithGrouping.create_record('1385942180, 10.10.10.10, posts#index, 100, 20000, 48.0')
110
+ @interval.add_record(record4)
111
+ end
112
+
113
+ it 'counts value column per group column' do
114
+ @interval[:action].should be_a(Hash)
115
+ @interval[:action].size.should == 2
116
+ @interval[:action]['posts#index'].should == 2
117
+ @interval[:action]['posts#show'].should == 2
118
+ end
119
+
120
+ it 'counts value and group_by pairs' do
121
+ @interval[:ip_by_action].should be_a(Hash)
122
+ @interval[:ip_by_action]['192.168.0.5'].should be_a(Hash)
123
+ @interval[:ip_by_action]['192.168.0.5']['posts#index'].should == 1
124
+ @interval[:ip_by_action]['192.168.0.5']['posts#show'].should == 2
125
+ @interval[:ip_by_action]['10.10.10.10']['posts#index'].should == 1
126
+ end
127
+
128
+ it 'averages value column per group column' do
129
+ @interval[:total_time].should be_a(Hash)
130
+ @interval[:total_time].size.should == 2
131
+ @interval[:total_time]['posts#index'].should == 100
132
+ @interval[:total_time]['posts#show'].should == 55
133
+ end
134
+
135
+ it 'sums up value column per group column' do
136
+ @interval[:num_bytes].should be_a(Hash)
137
+ @interval[:num_bytes].size.should == 2
138
+ @interval[:num_bytes]['posts#index'].should == 40000
139
+ @interval[:num_bytes]['posts#show'].should == 22000
140
+ end
141
+
142
+ it 'averages deltas on value column per group column' do
143
+ @interval[:rss].should be_a(Hash)
144
+ @interval[:rss].size.should == 2
145
+ @interval[:rss]['posts#index'].should == 5
146
+ @interval[:rss]['posts#show'].should == 1
69
147
  end
70
148
  end
71
149
  end
@@ -7,40 +7,204 @@ module LogfileInterval
7
7
  it 'finds the aggregator class' do
8
8
  Aggregator.klass(:sum).should == Sum
9
9
  Aggregator.klass(:average).should == Average
10
- Aggregator.klass(:group).should == Group
10
+ Aggregator.klass(:count).should == Count
11
+ Aggregator.klass(:group_and_count).should == GroupAndCount
12
+ Aggregator.klass(:delta).should == Delta
11
13
  end
12
14
  end
13
15
 
14
- describe Sum do
15
- it 'sums up values' do
16
- sum = Sum.new
17
- sum.add(3)
18
- sum.add(5)
19
- sum.value.should == 8
16
+ shared_examples 'an aggregator' do
17
+ let(:aggregator) { described_class.new }
18
+
19
+ [ :add, :value, :values ].each do |method|
20
+ it "responds to #{method}" do
21
+ aggregator.should respond_to(method)
22
+ end
23
+ end
24
+
25
+ context 'values' do
26
+ context 'with one group' do
27
+ before :each do
28
+ aggregator.add(5, :key1)
29
+ end
30
+
31
+ it 'returns a hash' do
32
+ aggregator.values.should be_a(Hash) unless aggregator.is_a?(Delta)
33
+ end
34
+ end
35
+
36
+ context 'with several groups' do
37
+ before :each do
38
+ aggregator.add(5, :key1)
39
+ aggregator.add(3, :key2)
40
+ aggregator.add(3, :key1)
41
+ end
42
+
43
+ it 'returns a hash' do
44
+ aggregator.values.should be_a(Hash)
45
+ end
46
+ end
47
+
48
+ context 'with no group' do
49
+ before :each do
50
+ aggregator.add(5)
51
+ aggregator.add(3)
52
+ end
53
+
54
+ it 'returns a numeric' do
55
+ aggregator.values.should be_a(Numeric) unless aggregator.is_a?(Count)
56
+ end
57
+ end
20
58
  end
21
59
  end
22
60
 
23
- describe Average do
24
- it 'averages values' do
25
- sum = Average.new
26
- sum.add(3)
27
- sum.add(5)
28
- sum.value.should == 4
61
+ [ Count, Sum, Average, Delta ]. each do |klass|
62
+ describe klass do
63
+ it_behaves_like 'an aggregator'
29
64
  end
30
65
  end
31
66
 
32
- describe Group do
33
- it 'groups values and increment counters' do
34
- g = Group.new
35
- g.add('200')
36
- g.add('500')
37
- g.add('301')
38
- g.add('200')
39
- g.value.should be_a(Hash)
40
- g.value.should include({'200' => 2})
41
- g.value.should include({'301' => 1})
42
- g.value.should include({'500' => 1})
67
+
68
+ describe 'without group_by key' do
69
+ describe Sum do
70
+ it 'sums up values' do
71
+ sum = Sum.new
72
+ sum.add(3)
73
+ sum.add(5)
74
+ sum.value.should == 8
75
+ end
43
76
  end
77
+
78
+ describe Average do
79
+ it 'averages values' do
80
+ avg = Average.new
81
+ avg.add(3)
82
+ avg.add(5)
83
+ avg.value.should == 4
84
+ end
85
+ end
86
+
87
+ describe Delta do
88
+ it 'averages delta values' do
89
+ d = Delta.new
90
+ d.add(1.4)
91
+ d.add(1.1)
92
+ d.add(1.0)
93
+ d.value.round(5).should == 0.2
94
+ end
95
+ end
96
+
97
+ describe Count do
98
+ it 'groups values and increment counters' do
99
+ g = Count.new
100
+ g.add('200')
101
+ g.add('500')
102
+ g.add('301')
103
+ g.add('200')
104
+ g.value.should == 4
105
+ end
106
+ end
107
+ end
108
+
109
+ describe 'with group_by key' do
110
+
111
+ describe Sum do
112
+ it 'sums up values by key' do
113
+ sum = Sum.new
114
+ sum.add(3, :key1)
115
+ sum.add(5, :key2)
116
+ sum.add(5, :key1)
117
+ sum.values.should be_a(Hash)
118
+ sum.values.size.should == 2
119
+ sum.value(:key1).should == 8
120
+ sum.values[:key1].should == 8
121
+ sum.value(:key2).should == 5
122
+ sum.values[:key2].should == 5
123
+ end
124
+ end
125
+
126
+
127
+ describe Average do
128
+ it 'averages values by key' do
129
+ avg = Average.new
130
+ avg.add(3, :key1)
131
+ avg.add(5, :key2)
132
+ avg.add(5, :key1)
133
+ avg.values.should be_a(Hash)
134
+ avg.values.size.should == 2
135
+ avg.value(:key1).should == 4
136
+ avg.values[:key1].should == 4
137
+ avg.value(:key2).should == 5
138
+ avg.values[:key2].should == 5
139
+ end
140
+ end
141
+
142
+ describe Count do
143
+ it 'groups values and increment counters' do
144
+ g = Count.new
145
+ g.add('200', '200')
146
+ g.add('500', '500')
147
+ g.add('301', '301')
148
+ g.add('200', '200')
149
+ g.values.should be_a(Hash)
150
+ g.values.should include({'200' => 2})
151
+ g.values.should include({'301' => 1})
152
+ g.values.should include({'500' => 1})
153
+ end
154
+ end
155
+
156
+ describe GroupAndCount do
157
+ it 'each yields a key and a hash' do
158
+ gac = GroupAndCount.new
159
+ gac.add :key1, :subkey1
160
+ gac.first.should be_an(Array)
161
+ gac.first.size.should == 2
162
+ gac.first[1].should be_a(Hash)
163
+ end
164
+
165
+ context :add do
166
+ before :each do
167
+ @gac = GroupAndCount.new
168
+ end
169
+
170
+ it 'requires a group_by argument' do
171
+ lambda { @gac.add('foo') }.should raise_error ArgumentError
172
+ end
173
+
174
+ it 'counts number of occurence of subkey for key' do
175
+ @gac.add :key1, :subkey1
176
+ @gac.add :key1, :subkey2
177
+ @gac.add :key2, :subkey1
178
+ @gac.add :key2, :subkey1
179
+ @gac.add :key2, :subkey3
180
+
181
+ @gac.values[:key1][:subkey1].should == 1
182
+ @gac.values[:key1][:subkey2].should == 1
183
+ @gac.values[:key2][:subkey1].should == 2
184
+ @gac.values[:key2][:subkey2].should == 0
185
+ @gac.values[:key2][:subkey3].should == 1
186
+ end
187
+ end
188
+ end
189
+
190
+ describe Delta do
191
+ it 'averages deltas by key' do
192
+ d = Delta.new
193
+ d.add(9, :key1)
194
+ d.add(10, :key2)
195
+ d.add(5, :key1)
196
+ d.add(8, :key2)
197
+ d.add(3, :key1)
198
+ d.add(5, :key2)
199
+ d.values.should be_a(Hash)
200
+ d.values.size.should == 2
201
+ d.value(:key1).should == 3
202
+ d.values[:key1].should == 3
203
+ d.value(:key2).should == 2.5
204
+ d.values[:key2].should == 2.5
205
+ end
206
+ end
207
+
44
208
  end
45
209
  end
46
210
  end
@@ -43,7 +43,7 @@ module LogfileInterval
43
43
 
44
44
  describe 'Broken parsers' do
45
45
  class NoRegexLog < Base
46
- add_column :name => 'ip', :pos => 1, :agg_function => :group
46
+ add_column :name => 'ip', :pos => 1, :aggregator => :count
47
47
  end
48
48
 
49
49
  class NoColumnLog < Base
@@ -66,7 +66,7 @@ module LogfileInterval
66
66
  describe TimingLog do
67
67
  before :each do
68
68
  # 1385942400 = 2013/12/01 16:00:00
69
- @line = '1385942400, posts#index, 100, 20000'
69
+ @line = '1385942400, 192.168.0.5, posts#index, 100, 20000, 50.00'
70
70
  end
71
71
 
72
72
  it 'parses a timing line' do
@@ -76,6 +76,7 @@ module LogfileInterval
76
76
  record.action.should == 'posts#index'
77
77
  record.total_time.should == 100
78
78
  record.num_bytes.should == 20000
79
+ record.rss.should == 50.0
79
80
  end
80
81
  end
81
82
  end
@@ -32,8 +32,12 @@ module LogfileInterval
32
32
 
33
33
  records.first.time.should == Time.new(2012, 01, 01, 16, 30, 51, '-08:00')
34
34
  records.first.code.should == '200'
35
+ records.first.length.should == 6801
36
+ records.first.length_by_ip.should == 6801
35
37
  records.last.time.should == Time.new(2012, 01, 01, 00, 57, 47, '-08:00')
36
38
  records.last.code.should == '301'
39
+ records.last.length.should == 185
40
+ records.last.length_by_ip.should == 185
37
41
  end
38
42
  end
39
43
  end
data/spec/spec_helper.rb CHANGED
@@ -1,6 +1,13 @@
1
1
  $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
2
2
  $LOAD_PATH.unshift(File.dirname(__FILE__))
3
3
 
4
+ require 'simplecov'
5
+
6
+ SimpleCov.start do
7
+ add_filter '/spec/'
8
+ add_filter '/config/'
9
+ end
10
+
4
11
  require 'rspec'
5
12
  require 'logfile_interval'
6
13
 
@@ -9,10 +9,11 @@ module LogfileInterval
9
9
 
10
10
  set_regex /^([\d\.]+)\s+\S+\s+\S+\s+\[(\d\d.*\d\d)\]\s+"(?:GET|POST|PUT|HEAD|DELETE)\s+(\S+)\s+HTTP\S+"\s+(\d+)\s+(\d+)\s+"([^"]*)"\s+"([^"]+)"$/
11
11
 
12
- add_column :name => 'ip', :pos => 1, :agg_function => :group
13
- add_column :name => 'timestamp', :pos => 2, :agg_function => :timestamp
14
- add_column :name => 'code', :pos => 4, :agg_function => :group
15
- add_column :name => 'length', :pos => 5, :agg_function => :average, :conversion => :integer
12
+ add_column :name => 'ip', :pos => 1, :aggregator => :count, :group_by => 'id'
13
+ add_column :name => 'timestamp', :pos => 2, :aggregator => :timestamp
14
+ add_column :name => 'code', :pos => 4, :aggregator => :count, :group_by => 'id'
15
+ add_column :name => 'length', :pos => 5, :aggregator => :average, :conversion => :integer
16
+ add_column :name => 'length_by_ip', :pos => 5, :aggregator => :average, :conversion => :integer, :group_by => 'ip'
16
17
 
17
18
  def time
18
19
  Time.strptime(self.timestamp, '%d/%b/%Y:%H:%M:%S %z')
@@ -1,15 +1,35 @@
1
1
  module LogfileInterval
2
2
  module LineParser
3
3
  class TimingLog < Base
4
+ # Line format:
5
+ # timestamp, ip, controller#action, total_time, bytes, rss
6
+
7
+ set_regex /^(\d+),\s*([\d\.]+),\s*(\w+#\w+),\s*(\d+),\s*(\d+),\s*([\d\.]+)$/
8
+
9
+ add_column :name => :timestamp, :pos => 1, :aggregator => :timestamp
10
+ add_column :name => :ip, :pos => 2, :aggregator => :count
11
+ add_column :name => :action, :pos => 3, :aggregator => :count
12
+ add_column :name => :total_time, :pos => 4, :aggregator => :average, :conversion => :integer
13
+ add_column :name => :num_bytes, :pos => 5, :aggregator => :sum, :conversion => :integer
14
+ add_column :name => :rss, :pos => 6, :aggregator => :delta, :conversion => :float
15
+
16
+ def time
17
+ Time.at(self.timestamp.to_i)
18
+ end
19
+ end
20
+
21
+ class TimingLogWithGrouping < Base
4
22
  # Line format:
5
23
  # timestamp, controller#action, total_time, bytes
6
24
 
7
- set_regex /^(\d+),\s*(\w+#\w+),\s*(\d+),\s*(\d+)$/
25
+ set_regex /^(\d+),\s*([\d\.]+),\s*(\w+#\w+),\s*(\d+),\s*(\d+),\s*([\d\.]+)$/
8
26
 
9
- add_column :name => :timestamp, :pos => 1, :agg_function => :timestamp
10
- add_column :name => :action, :pos => 2, :agg_function => :group
11
- add_column :name => :total_time, :pos => 3, :agg_function => :average, :conversion => :integer
12
- add_column :name => :num_bytes, :pos => 4, :agg_function => :sum, :conversion => :integer
27
+ add_column :name => :timestamp, :pos => 1, :aggregator => :timestamp
28
+ add_column :name => :ip_by_action, :pos => 2, :aggregator => :count, :group_by => :action
29
+ add_column :name => :action, :pos => 3, :aggregator => :count, :group_by => :action
30
+ add_column :name => :total_time, :pos => 4, :aggregator => :average, :group_by => :action, :conversion => :integer
31
+ add_column :name => :num_bytes, :pos => 5, :aggregator => :sum, :group_by => :action, :conversion => :integer
32
+ add_column :name => :rss, :pos => 6, :aggregator => :delta, :group_by => :action, :conversion => :float
13
33
 
14
34
  def time
15
35
  Time.at(self.timestamp.to_i)
@@ -1,3 +1,4 @@
1
+ # invalid line
1
2
  66.249.67.176 - - [23/Jun/2013:16:58:00 -0800] "GET /package/core/raring/universe/proposed/openldap HTTP/1.1" 200 185 "-" "Google"
2
3
  12.24.48.96 - - [23/Jun/2013:16:59:00 -0800] "GET /package/core/raring/universe/proposed/openldap HTTP/1.1" 200 4555 "-" "Bing)"
3
4
  66.249.67.176 - - [23/Jun/2013:17:00:01 -0800] "GET /package/core/raring/universe/proposed/openldap HTTP/1.1" 200 185 "-" "Google"
@@ -1,3 +1,3 @@
1
- 1385942280, posts#index, 100, 20000
2
- 1385942340, posts#create, 200, 5000
3
- 1385942400, posts#show, 100, 15000
1
+ 1385942280, 192.168.0.10, posts#index, 100, 20000, 50.20
2
+ 1385942340, 192.168.0.5, posts#create, 200, 5000, 50.20
3
+ 1385942400, 192.168.0.5, posts#show, 100, 15000, 51.00
@@ -1,3 +1,3 @@
1
- 1385941980, posts#show, 100, 16000
2
- 1385942040, posts#index, 500, 25000
3
- 1385942160, posts#show, 300, 12000
1
+ 1385941980, 192.168.0.5, posts#show, 100, 16000, 48.00
2
+ 1385942040, 192.168.0.10, posts#index, 500, 25000, 48.20
3
+ 1385942160, 192.168.0.5, posts#show, 300, 12000, 49.20
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: logfile_interval
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.0
4
+ version: 1.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Philippe Le Rohellec
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2013-12-03 00:00:00.000000000 Z
11
+ date: 2013-12-08 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -66,6 +66,20 @@ dependencies:
66
66
  - - '>='
67
67
  - !ruby/object:Gem::Version
68
68
  version: '0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: simplecov
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - '>='
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - '>='
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
69
83
  description: Logfile parser and aggregator
70
84
  email:
71
85
  - philippe@lerohellec.com
@@ -74,6 +88,7 @@ extensions: []
74
88
  extra_rdoc_files: []
75
89
  files:
76
90
  - .gitignore
91
+ - .travis.yml
77
92
  - Gemfile
78
93
  - Gemfile.lock
79
94
  - LICENSE.txt
@@ -108,7 +123,7 @@ files:
108
123
  - spec/support/logfiles/access.log.2
109
124
  - spec/support/logfiles/timing.log
110
125
  - spec/support/logfiles/timing.log.1
111
- homepage: ''
126
+ homepage: https://github.com/plerohellec/logfile_interval
112
127
  licenses:
113
128
  - MIT
114
129
  metadata: {}