logfile_interval 1.0.0 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 304480b4bf3c9525f74bf9b835b3ec7fe72b64be
4
- data.tar.gz: ca5f34f5a987aca00b22ea838e083c744793c545
3
+ metadata.gz: e0f1a02f57605b2eb8eacf703a456dbe9038abcd
4
+ data.tar.gz: f1d68cd7e72dc15023f6c3f8ae72d7e2eb104102
5
5
  SHA512:
6
- metadata.gz: e36b8d0c42fbaafed596dcb5c2770450bf7e10ce68d92326f3f04eab197bbb4faa8ea8fa0f7706b55883fd2d65a876435ab689533222148151787c3c58861ba6
7
- data.tar.gz: 34bd857fd154d29703959693c7b33b0e15b4ab0824f97e84f4362aad1e009a332e8f0b1c96fdcde1321f3462666e3bd862c9f53cb15aab41807f6321346551b0
6
+ metadata.gz: 3bc11945565915aaa028f61f01b670340a2765640abf44aca2d8b1d36cb6a6b09c8626b7e3d964e3e3a7f0479893240defa0b600feec3f98e1f6c5176c5cda8a
7
+ data.tar.gz: cb91a1b9aad9bd67aa5fa1ccfa9a9f0d69cab5bbc177ecdfc4ac4f80d08c1edcf39cbf5df250c8251913ca50a2d4e9abf17e92335c41b9b10b17dc32bddf5cc5
data/.travis.yml ADDED
@@ -0,0 +1,5 @@
1
+ language: ruby
2
+ rvm:
3
+ - 1.9.3
4
+ - 2.0.0
5
+ script: bundle exec rspec spec
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- logfile_interval (0.0.1)
4
+ logfile_interval (1.1.0)
5
5
 
6
6
  GEM
7
7
  remote: https://rubygems.org/
@@ -14,6 +14,8 @@ GEM
14
14
  debugger-linecache (1.2.0)
15
15
  debugger-ruby_core_source (1.2.4)
16
16
  diff-lcs (1.2.5)
17
+ docile (1.1.1)
18
+ multi_json (1.8.2)
17
19
  rake (10.1.0)
18
20
  rspec (2.14.1)
19
21
  rspec-core (~> 2.14.0)
@@ -23,6 +25,11 @@ GEM
23
25
  rspec-expectations (2.14.4)
24
26
  diff-lcs (>= 1.1.3, < 2.0)
25
27
  rspec-mocks (2.14.4)
28
+ simplecov (0.8.2)
29
+ docile (~> 1.1.0)
30
+ multi_json
31
+ simplecov-html (~> 0.8.0)
32
+ simplecov-html (0.8.0)
26
33
 
27
34
  PLATFORMS
28
35
  ruby
@@ -33,3 +40,4 @@ DEPENDENCIES
33
40
  logfile_interval!
34
41
  rake
35
42
  rspec (~> 2.14.0)
43
+ simplecov
data/README.md CHANGED
@@ -1,6 +1,9 @@
1
- # LogfileInterval
1
+ # LogfileInterval [![Build Status](https://travis-ci.org/plerohellec/logfile_interval.png?branch=master)](https://travis-ci.org/plerohellec/logfile_interval)
2
2
 
3
- Logfile parser and aggregator
3
+ Logfile parser and aggregator.
4
+
5
+ It iterates over each line of logfiles, parses each line and aggregates all lines in a time interval into a single
6
+ record made up of the sum, the average, the number of occurences per value or average of the deltas between lines.
4
7
 
5
8
  ## Installation
6
9
 
@@ -19,6 +22,7 @@ Or install it yourself as:
19
22
  ## Usage
20
23
 
21
24
  ### Write a LineParser class
25
+ #### Example
22
26
  ```ruby
23
27
  module LogfileInterval
24
28
  module LineParser
@@ -28,10 +32,12 @@ module LogfileInterval
28
32
 
29
33
  set_regex /^([\d\.]+)\s+\S+\s+\S+\s+\[(\d\d.*\d\d)\]\s+"(?:GET|POST|PUT|HEAD|DELETE)\s+(\S+)\s+HTTP\S+"\s+(\d+)\s+(\d+)\s+"([^"]*)"\s+"([^"]+)"$/
30
34
 
31
- add_column :name => 'ip', :pos => 1, :agg_function => :group
32
- add_column :name => 'timestamp', :pos => 2, :agg_function => :timestamp
33
- add_column :name => 'code', :pos => 4, :agg_function => :group
34
- add_column :name => 'length', :pos => 5, :agg_function => :average, :conversion => :integer
35
+ add_column :name => 'ip', :pos => 1, :aggregator => :count, :group_by => 'ip'
36
+ add_column :name => 'timestamp', :pos => 2, :aggregator => :timestamp
37
+ add_column :name => 'code', :pos => 4, :aggregator => :count, :group_by => 'code'
38
+ add_column :name => 'code_by_ip', :pos => 4, :aggregator => :count, :group_by => 'ip'
39
+ add_column :name => 'length', :pos => 5, :aggregator => :average, :conversion => :integer
40
+ add_column :name => 'length_by_ip', :pos => 5, :aggregator => :average, :group_by => 'ip', :conversion => :integer
35
41
 
36
42
  def time
37
43
  Time.strptime(self.timestamp, '%d/%b/%Y:%H:%M:%S %z')
@@ -40,6 +46,29 @@ module LogfileInterval
40
46
  end
41
47
  end
42
48
  ```
49
+ #### Writing a parser class
50
+ The parser must define:
51
+ * A regex that extracts the fields out of each line.
52
+ * A set of columns that will to be parsed and aggregated in time intervals.
53
+ * A 'time' method that converts the mandatory timestamp field of a line into a Time object.
54
+
55
+ #### Attributes of a column:
56
+ * name: a parsed record will have a method with that name returning the value found at that position
57
+ * pos: the position of the captured field in the regex matched data
58
+ * aggregator : the aggregation mode for this field
59
+ * conversion: the parser will convert the field to an integer or a float when building the parsed record
60
+ * group_by: group_by value is the name of another field. The aggregator will apply the aggregator to this field for each distinct value found in the other field.
61
+
62
+ #### Aggregator types and options
63
+ * timestamp: the timestamp field will be used to determine to which interval the line belongs, each line MUST have a timestamp
64
+ * count: the aggregator will count the number of occurence of this field
65
+ * without the group_by option, it will just count the total number of lines (probably useless)
66
+ * with a group_by option pointing to the same field as the current one, it will count the number of occurence
67
+ per distinct value of this column
68
+ * with a group_by option pointing to another field, it will count the number of occurences of (this field, other field) pairs.
69
+ * average: the aggregator will calculate the average value of this field
70
+ * sum: the aggregator will add up the values of this field
71
+ * delta: the aggregator will caclculate the difference between each line and the next and will average all the deltas
43
72
 
44
73
  ### Iterate through lines of a single file
45
74
  And get a parsed record for each line.
@@ -58,8 +87,11 @@ log.each_parsed_line do |record|
58
87
  puts record.class # LineParser::AccessLog
59
88
  puts record.ip
60
89
  puts record.time
90
+ puts record.code
91
+ puts record.length
61
92
  end
62
93
  ```
94
+ **Note**: the Logfile iterators always start with the last line in the file and works its way backward.
63
95
 
64
96
  ### Iterate through lines of multiples files
65
97
  And get a parsed record for each line.
@@ -70,21 +102,25 @@ set.each_parsed_line do |record|
70
102
  puts record.class # LineParser::AccessLog
71
103
  end
72
104
  ```
105
+ **Note**: the LogfileSet iterators always starts with the most recent file.
106
+
73
107
  ### Aggregate lines into intervals
74
108
  ```ruby
75
109
  length = 5.minutes
76
- interval_builder = LogfileInterval::IntervalBuilder.new(logfiles, length)
110
+ interval_builder = LogfileInterval::IntervalBuilder.new(set, length)
77
111
  interval_builder.each_interval do |interval|
78
112
  puts interval.class # LogfileInterval::Interval
79
113
  puts interval.start_time
80
- puts interval.length
114
+ puts interval[:length]
81
115
  interval[:ip].each do |ip, count|
82
116
  puts "#{ip}, #{count}"
83
117
  end
118
+ interval[:length_by_ip].each do |ip, avg_length|
119
+ puts "#{ip}, #{avg_length}"
120
+ end
84
121
  end
85
122
  ```
86
123
 
87
-
88
124
  ## Contributing
89
125
 
90
126
  1. Fork it
data/docs/design.rb CHANGED
@@ -40,7 +40,7 @@ module LogfileInterval
40
40
 
41
41
  class AccessLog < Base
42
42
  set_regex /blah/
43
- add_column :name => :foo, :pos => 1, :conversion => integer, :agg_function => :average
43
+ add_column :name => :foo, :pos => 1, :conversion => integer, :aggregator => :average
44
44
 
45
45
  end
46
46
  end
data/docs/design3.rb CHANGED
@@ -6,8 +6,8 @@ module LogfileInterval
6
6
  end
7
7
 
8
8
  def add_column(name, options)
9
- aggregator = Aggregators.klass(agg_function)
10
- @columns[name] = { :pos => pos, :agg_function => aggregator, :conversion => conversion }
9
+ agg = Aggregators.klass(aggregator)
10
+ @columns[name] = { :pos => pos, :aggregator => agg, :conversion => conversion }
11
11
  define_method(name)
12
12
  end
13
13
 
@@ -26,7 +26,7 @@ module LogfileInterval
26
26
 
27
27
  class AccessLog < Base
28
28
  set_regex /blah/
29
- add_column :name => :foo, :pos => 1, :conversion => integer, :agg_function => :average
29
+ add_column :name => :foo, :pos => 1, :conversion => integer, :aggregator => :average
30
30
 
31
31
  def initialize(line)
32
32
  @data = self.class.parse(line)
@@ -34,8 +34,8 @@ module LogfileInterval
34
34
  end
35
35
 
36
36
  module Aggregator
37
- def self.klass(agg_function)
38
- case agg_function
37
+ def self.klass(aggregator)
38
+ case aggregator
39
39
  when :sum then Sum
40
40
  end
41
41
  end
@@ -54,7 +54,7 @@ module LogfileInterval
54
54
  end
55
55
  end
56
56
 
57
- class Group
57
+ class Count
58
58
  def initialize
59
59
  @val = Counter.new
60
60
  end
@@ -22,7 +22,7 @@ module LogfileInterval
22
22
  end
23
23
 
24
24
  def [](name)
25
- @data[name].value
25
+ @data[name].values
26
26
  end
27
27
 
28
28
  def add_record(record)
@@ -35,7 +35,8 @@ module LogfileInterval
35
35
 
36
36
  parser.columns.each do |name, options|
37
37
  next unless @data[name]
38
- @data[name].add(record[name])
38
+ group_by_value = record[options[:group_by]] if options[:group_by]
39
+ @data[name].add(record[name], group_by_value)
39
40
  end
40
41
  end
41
42
  end
@@ -1,59 +1,106 @@
1
1
  module LogfileInterval
2
2
  module LineParser
3
3
  module Aggregator
4
- def self.klass(agg_function)
5
- case agg_function
6
- when :sum then Sum
7
- when :average then Average
8
- when :group then Group
4
+ def self.klass(aggregator)
5
+ case aggregator
6
+ when :sum then Sum
7
+ when :average then Average
8
+ when :count then Count
9
+ when :group_and_count then GroupAndCount
10
+ when :delta then Delta
9
11
  end
10
12
  end
11
13
 
12
- class Sum
14
+ class Base
15
+ include Enumerable
16
+
13
17
  def initialize
14
- @val = 0
18
+ @val = Counter.new
19
+ @size = Counter.new
15
20
  end
16
21
 
17
- def add(value)
18
- @val += value
22
+ def value(group = nil)
23
+ average(key(group))
19
24
  end
20
25
 
21
- def value
22
- @val
26
+ def values
27
+ if single_value?
28
+ value
29
+ else
30
+ self.inject({}) { |h, v| h[v[0]] = v[1]; h }
31
+ end
23
32
  end
24
- end
25
33
 
26
- class Average
27
- def initialize
28
- @val = 0
29
- @size = 0
34
+ def add(value, group_by = nil)
35
+ raise NotImplementedError
30
36
  end
31
37
 
32
- def add(value)
33
- @val += value
34
- @size += 1
38
+ private
39
+ def key(group_by = nil)
40
+ group_by ? group_by : :all
35
41
  end
36
42
 
37
- def value
38
- if @size > 0
39
- @val.to_f / @size.to_f
40
- else
41
- 0
43
+ def single_value?
44
+ return true if @val.empty?
45
+ @val.keys.count == 1 && @val.keys.first == :all
46
+ end
47
+
48
+ def each
49
+ @val.each_key do |k|
50
+ yield k, average(k)
42
51
  end
43
52
  end
53
+
54
+ def average(k)
55
+ @size[k] > 0 ? @val[k].to_f / @size[k].to_f : 0
56
+ end
44
57
  end
45
58
 
46
- class Group
47
- def initialize
48
- @val = Counter.new
59
+ class Sum < Base
60
+ def add(value, group_by = nil)
61
+ @val.add(key(group_by), value)
62
+ @size.set(key(group_by), 1)
49
63
  end
64
+ end
50
65
 
51
- def add(value)
52
- @val.increment(value)
66
+ class Average < Base
67
+ def add(value, group_by = nil)
68
+ @val.add(key(group_by), value)
69
+ @size.increment(key(group_by))
53
70
  end
71
+ end
72
+
73
+ class Count < Base
74
+ def add(value, group_by = nil)
75
+ @val.add(key(group_by), 1)
76
+ @size.set(key(group_by), 1)
77
+ end
78
+ end
54
79
 
55
- def value
56
- @val
80
+ class GroupAndCount < Base
81
+ def each
82
+ @val.each { |k, v| yield k, v }
83
+ end
84
+
85
+ def add(value, group_by)
86
+ raise ArgumentError, 'group_by argument is mandatory for GroupAndCount#add' unless group_by
87
+ @val.increment_subkey(value, key(group_by))
88
+ @size.set(key(group_by), 1)
89
+ end
90
+ end
91
+
92
+ class Delta < Base
93
+ def initialize
94
+ @previous = Counter.new
95
+ super
96
+ end
97
+
98
+ def add(value, group_by = nil)
99
+ if @previous.has_key?(key(group_by))
100
+ @val.add(key(group_by), @previous[key(group_by)] - value)
101
+ @size.increment(key(group_by))
102
+ end
103
+ @previous.set(key(group_by), value)
57
104
  end
58
105
  end
59
106
  end
@@ -1,11 +1,13 @@
1
1
  module LogfileInterval
2
2
  module LineParser
3
- AGGREGATION_FUNCTIONS = [ :sum, :average, :timestamp, :group ]
3
+ AGGREGATION_FUNCTIONS = [ :sum, :average, :timestamp, :count, :delta ]
4
4
 
5
5
  class InvalidLine < StandardError; end
6
6
  class ConfigurationError < StandardError; end
7
7
 
8
8
  class Base
9
+ attr_reader :data
10
+
9
11
  class << self
10
12
  attr_reader :regex
11
13
 
@@ -20,14 +22,19 @@ module LogfileInterval
20
22
  def add_column(options)
21
23
  name = options.fetch(:name)
22
24
  pos = options.fetch(:pos)
23
- agg_function = options.fetch(:agg_function)
25
+ aggregator = options.fetch(:aggregator)
24
26
  conversion = options.fetch(:conversion, :string)
25
- unless AGGREGATION_FUNCTIONS.include?(agg_function)
26
- raise ArgumentError, "agg_function must be one of #{AGGREGATION_FUNCTIONS.join(', ')}"
27
+ unless AGGREGATION_FUNCTIONS.include?(aggregator)
28
+ raise ArgumentError, "aggregator must be one of #{AGGREGATION_FUNCTIONS.join(', ')}"
29
+ end
30
+
31
+ if aggregator == :count && options[:group_by] && options[:group_by] != name
32
+ aggregator = :group_and_count
27
33
  end
28
34
 
29
- aggregator = Aggregator.klass(agg_function)
35
+ aggregator = Aggregator.klass(aggregator)
30
36
  columns[name] = { :pos => pos, :aggregator => aggregator, :conversion => conversion }
37
+ columns[name][:group_by] = options[:group_by]
31
38
 
32
39
  define_method(name) do
33
40
  @data[name]
@@ -1,22 +1,43 @@
1
1
  module LogfileInterval
2
2
  module LineParser
3
3
  class Counter < Hash
4
- def increment(val)
5
- if self.has_key?(val)
6
- self[val] += 1
4
+ class ValueTypeError < StandardError; end
5
+
6
+ def increment(key)
7
+ if self.has_key?(key)
8
+ self[key] += 1
7
9
  else
8
- self[val] = 1
10
+ self[key] = 1
11
+ end
12
+ end
13
+
14
+ def increment_subkey(key, subkey)
15
+ if self.has_key?(key) && !self[key].is_a?(Counter)
16
+ raise "Value for #{key} is not a Counter"
17
+ end
18
+
19
+ unless self.has_key?(key)
20
+ self[key] = Counter.new
9
21
  end
22
+ self[key].increment(subkey)
10
23
  end
11
24
 
12
- def add(val, num)
13
- if self.has_key?(val)
14
- self[val] += num
25
+ def add(key, num)
26
+ if self.has_key?(key)
27
+ self[key] += num
15
28
  else
16
- self[val] = num
29
+ self[key] = num
17
30
  end
18
31
  end
19
32
 
33
+ def set(key, num)
34
+ self[key] = num
35
+ end
36
+
37
+ def [](key)
38
+ self.fetch(key, 0)
39
+ end
40
+
20
41
  def merge(c)
21
42
  c.keys.each do |k|
22
43
  self.add c[k]
@@ -10,8 +10,11 @@ module LogfileInterval
10
10
  def first_timestamp
11
11
  return nil unless File.exist?(@filename)
12
12
  File.open(@filename) do |f|
13
- line = parser.create_record(f.gets)
14
- line.time
13
+ while line = f.gets
14
+ if record = parser.create_record(line)
15
+ return record.time
16
+ end
17
+ end
15
18
  end
16
19
  end
17
20
 
@@ -1,3 +1,3 @@
1
1
  module LogfileInterval
2
- VERSION = "1.0.0"
2
+ VERSION = "1.1.0"
3
3
  end
@@ -8,9 +8,9 @@ Gem::Specification.new do |spec|
8
8
  spec.version = LogfileInterval::VERSION
9
9
  spec.authors = ["Philippe Le Rohellec"]
10
10
  spec.email = ["philippe@lerohellec.com"]
11
- spec.description = %q{Logfile parser and aggregator}
12
- spec.summary = %q{Aggregate logfile data into intervals}
13
- spec.homepage = ""
11
+ spec.description = "Logfile parser and aggregator"
12
+ spec.summary = "Aggregate logfile data into intervals"
13
+ spec.homepage = "https://github.com/plerohellec/logfile_interval"
14
14
  spec.license = "MIT"
15
15
 
16
16
  spec.files = `git ls-files`.split($/)
@@ -19,7 +19,8 @@ Gem::Specification.new do |spec|
19
19
  spec.require_paths = ["lib"]
20
20
 
21
21
  spec.add_development_dependency "bundler", "~> 1.3"
22
- spec.add_development_dependency(%q<debugger>, [">= 0"])
23
- spec.add_development_dependency(%q<rspec>, ["~> 2.14.0"])
22
+ spec.add_development_dependency "debugger", [">= 0"]
23
+ spec.add_development_dependency "rspec", ["~> 2.14.0"]
24
24
  spec.add_development_dependency "rake"
25
+ spec.add_development_dependency "simplecov"
25
26
  end
@@ -29,9 +29,9 @@ module LogfileInterval
29
29
  @intervals.first.size.should == 4
30
30
  @intervals.first[:total_time].should == 700.0/4
31
31
  @intervals.first[:num_bytes].should == 52000
32
- @intervals.first[:action]['posts#index'] = 1
33
- @intervals.first[:action]['posts#create'] = 1
34
- @intervals.first[:action]['posts#show'] = 2
32
+ @intervals.first[:rss].round(5).should == 0.60
33
+ @intervals.first[:ip].should == 4
34
+ @intervals.first[:action].should == 4
35
35
  end
36
36
  end
37
37
 
@@ -40,8 +40,9 @@ module LogfileInterval
40
40
  @intervals.last.size.should == 2
41
41
  @intervals.last[:total_time].should == 300
42
42
  @intervals.last[:num_bytes].should == 41000
43
- @intervals.last[:action]['posts#index'] = 1
44
- @intervals.last[:action]['posts#show'] = 1
43
+ @intervals.last[:rss].round(5).should == 0.20
44
+ @intervals.last[:ip].should == 2
45
+ @intervals.last[:action].should == 2
45
46
  end
46
47
  end
47
48
  end
@@ -11,43 +11,66 @@ module LogfileInterval
11
11
  interval.size.should == 0
12
12
  interval[:total_time].should == 0
13
13
  interval[:num_bytes].should == 0
14
- interval[:action].should be_a(Hash)
14
+ interval[:action].should == 0
15
+ interval[:ip].should == 0
15
16
  end
16
17
 
17
18
  context :add_record do
18
19
  before :each do
19
20
  @end_time = Time.new(2013, 12, 01, 16, 00, 00, '-08:00')
20
21
  @length = 300
21
- @interval = Interval.new(@end_time, @length, LineParser::TimingLog)
22
22
  end
23
23
 
24
- it 'rejects record out of interval' do
25
- oor_record = LineParser::TimingLog.create_record('1385942450, posts#index, 100, 20000')
26
- lambda { @interval.add_record(oor_record) }.should raise_error(Interval::OutOfRange)
27
- end
24
+ context 'basics' do
25
+ before :each do
26
+ @interval = Interval.new(@end_time, @length, LineParser::TimingLog)
27
+ end
28
+
29
+ it 'rejects record out of interval' do
30
+ oor_record = LineParser::TimingLog.create_record('1385942450, 192.168.0.5, posts#index, 100, 20000, 50.0')
31
+ lambda { @interval.add_record(oor_record) }.should raise_error(Interval::OutOfRange)
32
+ end
33
+
34
+ it 'rejects record at interval start_time' do
35
+ oor_record = LineParser::TimingLog.create_record('1385942100, 192.168.0.5, posts#index, 100, 20000, 50.0')
36
+ lambda { @interval.add_record(oor_record) }.should raise_error(Interval::OutOfRange)
37
+ end
38
+
39
+ it 'adds 1 record to interval' do
40
+ record1 = LineParser::TimingLog.create_record('1385942400, 192.168.0.5, posts#index, 100, 20000, 50.0')
41
+ @interval.add_record(record1)
28
42
 
29
- it 'rejects record at interval start_time' do
30
- oor_record = LineParser::TimingLog.create_record('1385942100, posts#index, 100, 20000')
31
- lambda { @interval.add_record(oor_record) }.should raise_error(Interval::OutOfRange)
43
+ @interval.size.should == 1
44
+ @interval[:total_time].should == 100
45
+ @interval[:num_bytes].should == 20000
46
+ @interval[:action].should == 1
47
+ @interval[:ip].should == 1
48
+ end
32
49
  end
33
50
 
34
- it 'adds 1 record to interval' do
35
- record1 = LineParser::TimingLog.create_record('1385942400, posts#index, 100, 20000')
36
- @interval.add_record(record1)
51
+ context 'with count and group by options' do
52
+ it 'creates an aggregator of type GroupAndCount' do
53
+ expect(LineParser::Aggregator::GroupAndCount).to receive(:new)
54
+ interval = Interval.new(@end_time, @length, LineParser::TimingLogWithGrouping)
55
+ end
37
56
 
38
- @interval.size.should == 1
39
- @interval[:total_time].should == 100
40
- @interval[:num_bytes].should == 20000
41
- @interval[:action]['posts#index'].should == 1
57
+ it 'add_record accepts key and subkey' do
58
+ interval = Interval.new(@end_time, @length, LineParser::TimingLogWithGrouping)
59
+ record1 = LineParser::TimingLogWithGrouping.create_record('1385942400, 192.168.0.5, posts#index, 100, 20000, 53.0')
60
+ interval.add_record(record1)
61
+ interval.size.should == 1
62
+ end
42
63
  end
43
64
 
44
- context '3 records' do
65
+ context 'with 3 records' do
45
66
  before :each do
46
- record1 = LineParser::TimingLog.create_record('1385942400, posts#index, 100, 20000')
67
+ @interval = Interval.new(@end_time, @length, LineParser::TimingLog)
68
+
69
+ record1 = LineParser::TimingLog.create_record('1385942400, 192.168.0.5, posts#index, 100, 20000, 53.0')
47
70
  @interval.add_record(record1)
48
- record2 = LineParser::TimingLog.create_record('1385942300, posts#show, 50, 10000')
71
+ record2 = LineParser::TimingLog.create_record('1385942300, 192.168.0.5, posts#show, 50, 10000, 51.0')
49
72
  @interval.add_record(record2)
50
- record3 = LineParser::TimingLog.create_record('1385942200, posts#show, 60, 12000')
73
+ record3 = LineParser::TimingLog.create_record('1385942200, 10.10.10.10, posts#show, 60, 12000, 50.0')
51
74
  @interval.add_record(record3)
52
75
  end
53
76
 
@@ -55,17 +78,72 @@ module LogfileInterval
55
78
  @interval.size.should == 3
56
79
  end
57
80
 
58
- it 'averages columns with average agg_function' do
81
+ it 'averages columns with average aggregator' do
59
82
  @interval[:total_time].should == 70
60
83
  end
61
84
 
62
- it 'sums up columns with sum agg_function' do
85
+ it 'sums up columns with sum aggregator' do
63
86
  @interval[:num_bytes].should == 42000
64
87
  end
65
88
 
66
- it 'groups and counts columns with group agg_function' do
67
- @interval[:action]['posts#index'].should == 1
68
- @interval[:action]['posts#show'].should == 2
89
+ it 'averages the delta columns with delta aggregator' do
90
+ @interval[:rss].should == 1.5
91
+ end
92
+
93
+ it 'counts columns with group aggregator' do
94
+ @interval[:ip].should == 3
95
+ @interval[:action].should == 3
96
+ end
97
+ end
98
+
99
+ context 'with group_by key' do
100
+ before :each do
101
+ @interval = Interval.new(@end_time, @length, LineParser::TimingLogWithGrouping)
102
+
103
+ record1 = LineParser::TimingLogWithGrouping.create_record('1385942400, 192.168.0.5, posts#index, 100, 20000, 53.0')
104
+ @interval.add_record(record1)
105
+ record2 = LineParser::TimingLogWithGrouping.create_record('1385942300, 192.168.0.5, posts#show, 50, 10000, 51.0')
106
+ @interval.add_record(record2)
107
+ record3 = LineParser::TimingLogWithGrouping.create_record('1385942200, 192.168.0.5, posts#show, 60, 12000, 50.0')
108
+ @interval.add_record(record3)
109
+ record4 = LineParser::TimingLogWithGrouping.create_record('1385942180, 10.10.10.10, posts#index, 100, 20000, 48.0')
110
+ @interval.add_record(record4)
111
+ end
112
+
113
+ it 'counts value column per group column' do
114
+ @interval[:action].should be_a(Hash)
115
+ @interval[:action].size.should == 2
116
+ @interval[:action]['posts#index'].should == 2
117
+ @interval[:action]['posts#show'].should == 2
118
+ end
119
+
120
+ it 'counts value and group_by pairs' do
121
+ @interval[:ip_by_action].should be_a(Hash)
122
+ @interval[:ip_by_action]['192.168.0.5'].should be_a(Hash)
123
+ @interval[:ip_by_action]['192.168.0.5']['posts#index'].should == 1
124
+ @interval[:ip_by_action]['192.168.0.5']['posts#show'].should == 2
125
+ @interval[:ip_by_action]['10.10.10.10']['posts#index'].should == 1
126
+ end
127
+
128
+ it 'averages value column per group column' do
129
+ @interval[:total_time].should be_a(Hash)
130
+ @interval[:total_time].size.should == 2
131
+ @interval[:total_time]['posts#index'].should == 100
132
+ @interval[:total_time]['posts#show'].should == 55
133
+ end
134
+
135
+ it 'sums up value column per group column' do
136
+ @interval[:num_bytes].should be_a(Hash)
137
+ @interval[:num_bytes].size.should == 2
138
+ @interval[:num_bytes]['posts#index'].should == 40000
139
+ @interval[:num_bytes]['posts#show'].should == 22000
140
+ end
141
+
142
+ it 'averages deltas on value column per group column' do
143
+ @interval[:rss].should be_a(Hash)
144
+ @interval[:rss].size.should == 2
145
+ @interval[:rss]['posts#index'].should == 5
146
+ @interval[:rss]['posts#show'].should == 1
69
147
  end
70
148
  end
71
149
  end
@@ -7,40 +7,204 @@ module LogfileInterval
7
7
  it 'finds the aggregator class' do
8
8
  Aggregator.klass(:sum).should == Sum
9
9
  Aggregator.klass(:average).should == Average
10
- Aggregator.klass(:group).should == Group
10
+ Aggregator.klass(:count).should == Count
11
+ Aggregator.klass(:group_and_count).should == GroupAndCount
12
+ Aggregator.klass(:delta).should == Delta
11
13
  end
12
14
  end
13
15
 
14
- describe Sum do
15
- it 'sums up values' do
16
- sum = Sum.new
17
- sum.add(3)
18
- sum.add(5)
19
- sum.value.should == 8
16
+ shared_examples 'an aggregator' do
17
+ let(:aggregator) { described_class.new }
18
+
19
+ [ :add, :value, :values ].each do |method|
20
+ it "responds to #{method}" do
21
+ aggregator.should respond_to(method)
22
+ end
23
+ end
24
+
25
+ context 'values' do
26
+ context 'with one group' do
27
+ before :each do
28
+ aggregator.add(5, :key1)
29
+ end
30
+
31
+ it 'returns a hash' do
32
+ aggregator.values.should be_a(Hash) unless aggregator.is_a?(Delta)
33
+ end
34
+ end
35
+
36
+ context 'with several groups' do
37
+ before :each do
38
+ aggregator.add(5, :key1)
39
+ aggregator.add(3, :key2)
40
+ aggregator.add(3, :key1)
41
+ end
42
+
43
+ it 'returns a hash' do
44
+ aggregator.values.should be_a(Hash)
45
+ end
46
+ end
47
+
48
+ context 'with no group' do
49
+ before :each do
50
+ aggregator.add(5)
51
+ aggregator.add(3)
52
+ end
53
+
54
+ it 'returns a numeric' do
55
+ aggregator.values.should be_a(Numeric) unless aggregator.is_a?(Count)
56
+ end
57
+ end
20
58
  end
21
59
  end
22
60
 
23
- describe Average do
24
- it 'averages values' do
25
- sum = Average.new
26
- sum.add(3)
27
- sum.add(5)
28
- sum.value.should == 4
61
+ [ Count, Sum, Average, Delta ]. each do |klass|
62
+ describe klass do
63
+ it_behaves_like 'an aggregator'
29
64
  end
30
65
  end
31
66
 
32
- describe Group do
33
- it 'groups values and increment counters' do
34
- g = Group.new
35
- g.add('200')
36
- g.add('500')
37
- g.add('301')
38
- g.add('200')
39
- g.value.should be_a(Hash)
40
- g.value.should include({'200' => 2})
41
- g.value.should include({'301' => 1})
42
- g.value.should include({'500' => 1})
67
+
68
+ describe 'without group_by key' do
69
+ describe Sum do
70
+ it 'sums up values' do
71
+ sum = Sum.new
72
+ sum.add(3)
73
+ sum.add(5)
74
+ sum.value.should == 8
75
+ end
43
76
  end
77
+
78
+ describe Average do
79
+ it 'averages values' do
80
+ avg = Average.new
81
+ avg.add(3)
82
+ avg.add(5)
83
+ avg.value.should == 4
84
+ end
85
+ end
86
+
87
+ describe Delta do
88
+ it 'averages delta values' do
89
+ d = Delta.new
90
+ d.add(1.4)
91
+ d.add(1.1)
92
+ d.add(1.0)
93
+ d.value.round(5).should == 0.2
94
+ end
95
+ end
96
+
97
+ describe Count do
98
+ it 'groups values and increment counters' do
99
+ g = Count.new
100
+ g.add('200')
101
+ g.add('500')
102
+ g.add('301')
103
+ g.add('200')
104
+ g.value.should == 4
105
+ end
106
+ end
107
+ end
108
+
109
+ describe 'with group_by key' do
110
+
111
+ describe Sum do
112
+ it 'sums up values by key' do
113
+ sum = Sum.new
114
+ sum.add(3, :key1)
115
+ sum.add(5, :key2)
116
+ sum.add(5, :key1)
117
+ sum.values.should be_a(Hash)
118
+ sum.values.size.should == 2
119
+ sum.value(:key1).should == 8
120
+ sum.values[:key1].should == 8
121
+ sum.value(:key2).should == 5
122
+ sum.values[:key2].should == 5
123
+ end
124
+ end
125
+
126
+
127
+ describe Average do
128
+ it 'averages values by key' do
129
+ avg = Average.new
130
+ avg.add(3, :key1)
131
+ avg.add(5, :key2)
132
+ avg.add(5, :key1)
133
+ avg.values.should be_a(Hash)
134
+ avg.values.size.should == 2
135
+ avg.value(:key1).should == 4
136
+ avg.values[:key1].should == 4
137
+ avg.value(:key2).should == 5
138
+ avg.values[:key2].should == 5
139
+ end
140
+ end
141
+
142
+ describe Count do
143
+ it 'groups values and increment counters' do
144
+ g = Count.new
145
+ g.add('200', '200')
146
+ g.add('500', '500')
147
+ g.add('301', '301')
148
+ g.add('200', '200')
149
+ g.values.should be_a(Hash)
150
+ g.values.should include({'200' => 2})
151
+ g.values.should include({'301' => 1})
152
+ g.values.should include({'500' => 1})
153
+ end
154
+ end
155
+
156
+ describe GroupAndCount do
157
+ it 'each yields a key and a hash' do
158
+ gac = GroupAndCount.new
159
+ gac.add :key1, :subkey1
160
+ gac.first.should be_an(Array)
161
+ gac.first.size.should == 2
162
+ gac.first[1].should be_a(Hash)
163
+ end
164
+
165
+ context :add do
166
+ before :each do
167
+ @gac = GroupAndCount.new
168
+ end
169
+
170
+ it 'requires a group_by argument' do
171
+ lambda { @gac.add('foo') }.should raise_error ArgumentError
172
+ end
173
+
174
+ it 'counts number of occurence of subkey for key' do
175
+ @gac.add :key1, :subkey1
176
+ @gac.add :key1, :subkey2
177
+ @gac.add :key2, :subkey1
178
+ @gac.add :key2, :subkey1
179
+ @gac.add :key2, :subkey3
180
+
181
+ @gac.values[:key1][:subkey1].should == 1
182
+ @gac.values[:key1][:subkey2].should == 1
183
+ @gac.values[:key2][:subkey1].should == 2
184
+ @gac.values[:key2][:subkey2].should == 0
185
+ @gac.values[:key2][:subkey3].should == 1
186
+ end
187
+ end
188
+ end
189
+
190
+ describe Delta do
191
+ it 'averages deltas by key' do
192
+ d = Delta.new
193
+ d.add(9, :key1)
194
+ d.add(10, :key2)
195
+ d.add(5, :key1)
196
+ d.add(8, :key2)
197
+ d.add(3, :key1)
198
+ d.add(5, :key2)
199
+ d.values.should be_a(Hash)
200
+ d.values.size.should == 2
201
+ d.value(:key1).should == 3
202
+ d.values[:key1].should == 3
203
+ d.value(:key2).should == 2.5
204
+ d.values[:key2].should == 2.5
205
+ end
206
+ end
207
+
44
208
  end
45
209
  end
46
210
  end
@@ -43,7 +43,7 @@ module LogfileInterval
43
43
 
44
44
  describe 'Broken parsers' do
45
45
  class NoRegexLog < Base
46
- add_column :name => 'ip', :pos => 1, :agg_function => :group
46
+ add_column :name => 'ip', :pos => 1, :aggregator => :count
47
47
  end
48
48
 
49
49
  class NoColumnLog < Base
@@ -66,7 +66,7 @@ module LogfileInterval
66
66
  describe TimingLog do
67
67
  before :each do
68
68
  # 1385942400 = 2013/12/01 16:00:00
69
- @line = '1385942400, posts#index, 100, 20000'
69
+ @line = '1385942400, 192.168.0.5, posts#index, 100, 20000, 50.00'
70
70
  end
71
71
 
72
72
  it 'parses a timing line' do
@@ -76,6 +76,7 @@ module LogfileInterval
76
76
  record.action.should == 'posts#index'
77
77
  record.total_time.should == 100
78
78
  record.num_bytes.should == 20000
79
+ record.rss.should == 50.0
79
80
  end
80
81
  end
81
82
  end
@@ -32,8 +32,12 @@ module LogfileInterval
32
32
 
33
33
  records.first.time.should == Time.new(2012, 01, 01, 16, 30, 51, '-08:00')
34
34
  records.first.code.should == '200'
35
+ records.first.length.should == 6801
36
+ records.first.length_by_ip.should == 6801
35
37
  records.last.time.should == Time.new(2012, 01, 01, 00, 57, 47, '-08:00')
36
38
  records.last.code.should == '301'
39
+ records.last.length.should == 185
40
+ records.last.length_by_ip.should == 185
37
41
  end
38
42
  end
39
43
  end
data/spec/spec_helper.rb CHANGED
@@ -1,6 +1,13 @@
1
1
  $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
2
2
  $LOAD_PATH.unshift(File.dirname(__FILE__))
3
3
 
4
+ require 'simplecov'
5
+
6
+ SimpleCov.start do
7
+ add_filter '/spec/'
8
+ add_filter '/config/'
9
+ end
10
+
4
11
  require 'rspec'
5
12
  require 'logfile_interval'
6
13
 
@@ -9,10 +9,11 @@ module LogfileInterval
9
9
 
10
10
  set_regex /^([\d\.]+)\s+\S+\s+\S+\s+\[(\d\d.*\d\d)\]\s+"(?:GET|POST|PUT|HEAD|DELETE)\s+(\S+)\s+HTTP\S+"\s+(\d+)\s+(\d+)\s+"([^"]*)"\s+"([^"]+)"$/
11
11
 
12
- add_column :name => 'ip', :pos => 1, :agg_function => :group
13
- add_column :name => 'timestamp', :pos => 2, :agg_function => :timestamp
14
- add_column :name => 'code', :pos => 4, :agg_function => :group
15
- add_column :name => 'length', :pos => 5, :agg_function => :average, :conversion => :integer
12
+ add_column :name => 'ip', :pos => 1, :aggregator => :count, :group_by => 'id'
13
+ add_column :name => 'timestamp', :pos => 2, :aggregator => :timestamp
14
+ add_column :name => 'code', :pos => 4, :aggregator => :count, :group_by => 'id'
15
+ add_column :name => 'length', :pos => 5, :aggregator => :average, :conversion => :integer
16
+ add_column :name => 'length_by_ip', :pos => 5, :aggregator => :average, :conversion => :integer, :group_by => 'ip'
16
17
 
17
18
  def time
18
19
  Time.strptime(self.timestamp, '%d/%b/%Y:%H:%M:%S %z')
@@ -1,15 +1,35 @@
1
1
  module LogfileInterval
2
2
  module LineParser
3
3
  class TimingLog < Base
4
+ # Line format:
5
+ # timestamp, ip, controller#action, total_time, bytes, rss
6
+
7
+ set_regex /^(\d+),\s*([\d\.]+),\s*(\w+#\w+),\s*(\d+),\s*(\d+),\s*([\d\.]+)$/
8
+
9
+ add_column :name => :timestamp, :pos => 1, :aggregator => :timestamp
10
+ add_column :name => :ip, :pos => 2, :aggregator => :count
11
+ add_column :name => :action, :pos => 3, :aggregator => :count
12
+ add_column :name => :total_time, :pos => 4, :aggregator => :average, :conversion => :integer
13
+ add_column :name => :num_bytes, :pos => 5, :aggregator => :sum, :conversion => :integer
14
+ add_column :name => :rss, :pos => 6, :aggregator => :delta, :conversion => :float
15
+
16
+ def time
17
+ Time.at(self.timestamp.to_i)
18
+ end
19
+ end
20
+
21
+ class TimingLogWithGrouping < Base
4
22
  # Line format:
5
23
  # timestamp, controller#action, total_time, bytes
6
24
 
7
- set_regex /^(\d+),\s*(\w+#\w+),\s*(\d+),\s*(\d+)$/
25
+ set_regex /^(\d+),\s*([\d\.]+),\s*(\w+#\w+),\s*(\d+),\s*(\d+),\s*([\d\.]+)$/
8
26
 
9
- add_column :name => :timestamp, :pos => 1, :agg_function => :timestamp
10
- add_column :name => :action, :pos => 2, :agg_function => :group
11
- add_column :name => :total_time, :pos => 3, :agg_function => :average, :conversion => :integer
12
- add_column :name => :num_bytes, :pos => 4, :agg_function => :sum, :conversion => :integer
27
+ add_column :name => :timestamp, :pos => 1, :aggregator => :timestamp
28
+ add_column :name => :ip_by_action, :pos => 2, :aggregator => :count, :group_by => :action
29
+ add_column :name => :action, :pos => 3, :aggregator => :count, :group_by => :action
30
+ add_column :name => :total_time, :pos => 4, :aggregator => :average, :group_by => :action, :conversion => :integer
31
+ add_column :name => :num_bytes, :pos => 5, :aggregator => :sum, :group_by => :action, :conversion => :integer
32
+ add_column :name => :rss, :pos => 6, :aggregator => :delta, :group_by => :action, :conversion => :float
13
33
 
14
34
  def time
15
35
  Time.at(self.timestamp.to_i)
@@ -1,3 +1,4 @@
1
+ # invalid line
1
2
  66.249.67.176 - - [23/Jun/2013:16:58:00 -0800] "GET /package/core/raring/universe/proposed/openldap HTTP/1.1" 200 185 "-" "Google"
2
3
  12.24.48.96 - - [23/Jun/2013:16:59:00 -0800] "GET /package/core/raring/universe/proposed/openldap HTTP/1.1" 200 4555 "-" "Bing)"
3
4
  66.249.67.176 - - [23/Jun/2013:17:00:01 -0800] "GET /package/core/raring/universe/proposed/openldap HTTP/1.1" 200 185 "-" "Google"
@@ -1,3 +1,3 @@
1
- 1385942280, posts#index, 100, 20000
2
- 1385942340, posts#create, 200, 5000
3
- 1385942400, posts#show, 100, 15000
1
+ 1385942280, 192.168.0.10, posts#index, 100, 20000, 50.20
2
+ 1385942340, 192.168.0.5, posts#create, 200, 5000, 50.20
3
+ 1385942400, 192.168.0.5, posts#show, 100, 15000, 51.00
@@ -1,3 +1,3 @@
1
- 1385941980, posts#show, 100, 16000
2
- 1385942040, posts#index, 500, 25000
3
- 1385942160, posts#show, 300, 12000
1
+ 1385941980, 192.168.0.5, posts#show, 100, 16000, 48.00
2
+ 1385942040, 192.168.0.10, posts#index, 500, 25000, 48.20
3
+ 1385942160, 192.168.0.5, posts#show, 300, 12000, 49.20
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: logfile_interval
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.0
4
+ version: 1.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Philippe Le Rohellec
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2013-12-03 00:00:00.000000000 Z
11
+ date: 2013-12-08 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -66,6 +66,20 @@ dependencies:
66
66
  - - '>='
67
67
  - !ruby/object:Gem::Version
68
68
  version: '0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: simplecov
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - '>='
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - '>='
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
69
83
  description: Logfile parser and aggregator
70
84
  email:
71
85
  - philippe@lerohellec.com
@@ -74,6 +88,7 @@ extensions: []
74
88
  extra_rdoc_files: []
75
89
  files:
76
90
  - .gitignore
91
+ - .travis.yml
77
92
  - Gemfile
78
93
  - Gemfile.lock
79
94
  - LICENSE.txt
@@ -108,7 +123,7 @@ files:
108
123
  - spec/support/logfiles/access.log.2
109
124
  - spec/support/logfiles/timing.log
110
125
  - spec/support/logfiles/timing.log.1
111
- homepage: ''
126
+ homepage: https://github.com/plerohellec/logfile_interval
112
127
  licenses:
113
128
  - MIT
114
129
  metadata: {}