logfile_interval 1.2.1 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile.lock +1 -1
  3. data/README.md +15 -11
  4. data/bin/aggregate_access_log.rb +9 -9
  5. data/bin/readme.rb +6 -5
  6. data/docs/design.rb +88 -62
  7. data/lib/logfile_interval/aggregator/base.rb +15 -1
  8. data/lib/logfile_interval/aggregator/count.rb +10 -2
  9. data/lib/logfile_interval/aggregator/delta.rb +6 -6
  10. data/lib/logfile_interval/aggregator/num_lines.rb +13 -0
  11. data/lib/logfile_interval/aggregator/registrar.rb +32 -0
  12. data/lib/logfile_interval/aggregator_set.rb +34 -0
  13. data/lib/logfile_interval/interval.rb +5 -31
  14. data/lib/logfile_interval/interval_builder/ascending.rb +23 -0
  15. data/lib/logfile_interval/interval_builder/descending.rb +22 -0
  16. data/lib/logfile_interval/interval_builder.rb +62 -19
  17. data/lib/logfile_interval/logfile.rb +34 -5
  18. data/lib/logfile_interval/logfile_set.rb +38 -24
  19. data/lib/logfile_interval/parsed_line/base.rb +31 -0
  20. data/lib/logfile_interval/{line_parser/base.rb → parsed_line/parser.rb} +15 -40
  21. data/lib/logfile_interval/version.rb +1 -1
  22. data/lib/logfile_interval.rb +4 -2
  23. data/spec/lib/aggregator_set_spec.rb +15 -0
  24. data/spec/lib/aggregator_spec.rb +39 -29
  25. data/spec/lib/custom_aggregator_spec.rb +3 -3
  26. data/spec/lib/interval_builder_spec.rb +185 -38
  27. data/spec/lib/interval_spec.rb +41 -30
  28. data/spec/lib/line_parser/base_spec.rb +16 -5
  29. data/spec/lib/logfile_set_spec.rb +23 -1
  30. data/spec/lib/logfile_spec.rb +23 -1
  31. data/spec/support/lib/access_log.rb +1 -1
  32. data/spec/support/lib/custom_timing_log.rb +3 -3
  33. data/spec/support/lib/timing_log.rb +4 -2
  34. data/spec/support/logfiles/timing.log.2 +1 -0
  35. metadata +13 -5
  36. data/lib/logfile_interval/aggregator/group_and_count.rb +0 -14
  37. data/lib/logfile_interval/aggregator.rb +0 -27
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 4b50ea1942cafe43439265b1d781190b4a60af69
4
- data.tar.gz: 35fe78e837a6f1377baf2c62b3b4df5d95b814f7
3
+ metadata.gz: 4eb614cbe0e43877376ccae43e38b666c3b58270
4
+ data.tar.gz: 723459738582e7da66b6d53e0f5ce43965576a6c
5
5
  SHA512:
6
- metadata.gz: 37ae14fa68272ef51c2127f65046e8068ad1937b00f2d66cbce37377bbde9582deb4a6312130dd630991a404d832d34a497bb60dc0fc8fe2bf303870241bed06
7
- data.tar.gz: d12a79f9e706e5e02fce96d2ccc8e8b301936245d5819f93376873f837bbe2554ed14c37729895b3f720b48306f71d07193ec65fd9eb4ec797391f6d7ccee736
6
+ metadata.gz: 4d799b181c1d65abaa35910b49c4efa85a31aefc44d009c0ee3c6966979206cec31acd09ffb1cbd97544ec74798a872d7906f43b57758a51e910e1f499935b8b
7
+ data.tar.gz: 21a2d6737ac8d3e92ed6b40dbba0f18d424ecddfac1678c9c067fafb7ea7db1e1fcde234a5fe66cc09c3d1da1077acc14a354ce0eef42bdaed2fbb6e1b16d3ce
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- logfile_interval (1.2.1)
4
+ logfile_interval (2.0.0)
5
5
 
6
6
  GEM
7
7
  remote: https://rubygems.org/
data/README.md CHANGED
@@ -1,4 +1,4 @@
1
- # LogfileInterval [![Build Status](https://travis-ci.org/plerohellec/logfile_interval.png?branch=master)](https://travis-ci.org/plerohellec/logfile_interval)
1
+ # LogfileInterval [![Build Status](https://travis-ci.org/plerohellec/logfile_interval.png?branch=master)](https://travis-ci.org/plerohellec/logfile_interval) [![Code Climate](https://codeclimate.com/github/plerohellec/logfile_interval.png)](https://codeclimate.com/github/plerohellec/logfile_interval)
2
2
 
3
3
  Logfile parser and aggregator.
4
4
 
@@ -28,9 +28,9 @@ class AccessLog < LogfileInterval::LineParser::Base
28
28
 
29
29
  set_regex /^([\d\.]+)\s+.*\s+\[(\d\d.*\d\d)\]\s+"(?:GET|POST|PUT|HEAD|DELETE)\s+(\S+)\s+HTTP\S+"\s+(\d+)\s+/
30
30
 
31
- add_column :name => 'ip', :pos => 1, :aggregator => :count, :group_by => 'ip'
31
+ add_column :name => 'ip', :pos => 1, :aggregator => :count
32
32
  add_column :name => 'timestamp', :pos => 2, :aggregator => :timestamp
33
- add_column :name => 'code', :pos => 4, :aggregator => :count, :group_by => 'code'
33
+ add_column :name => 'code', :pos => 4, :aggregator => :count
34
34
  add_column :name => 'code_by_ip', :pos => 4, :aggregator => :count, :group_by => 'ip'
35
35
 
36
36
  def time
@@ -38,9 +38,11 @@ class AccessLog < LogfileInterval::LineParser::Base
38
38
  end
39
39
  end
40
40
 
41
- file = LogfileInterval::Logfile.new('path_to_logfile', AccessLog)
41
+ parser = AccessLog
42
+ file = LogfileInterval::Logfile.new('path_to_logfile', parser)
43
+ parsed_line_enum = file.each_parsed_line
42
44
 
43
- builder = LogfileInterval::IntervalBuilder.new(file, 300)
45
+ builder = LogfileInterval::IntervalBuilder.new(parsed_line_enum, parser, 300)
44
46
  builder.each_interval do |interval|
45
47
  next unless interval.size > 0
46
48
 
@@ -92,9 +94,9 @@ module LogfileInterval
92
94
 
93
95
  set_regex /^([\d\.]+)\s+\S+\s+\S+\s+\[(\d\d.*\d\d)\]\s+"(?:GET|POST|PUT|HEAD|DELETE)\s+(\S+)\s+HTTP\S+"\s+(\d+)\s+(\d+)\s+"([^"]*)"\s+"([^"]+)"$/
94
96
 
95
- add_column :name => 'ip', :pos => 1, :aggregator => :count, :group_by => 'ip'
97
+ add_column :name => 'ip', :pos => 1, :aggregator => :count
96
98
  add_column :name => 'timestamp', :pos => 2, :aggregator => :timestamp
97
- add_column :name => 'code', :pos => 4, :aggregator => :count, :group_by => 'code'
99
+ add_column :name => 'code', :pos => 4, :aggregator => :count
98
100
  add_column :name => 'code_by_ip', :pos => 4, :aggregator => :count, :group_by => 'ip'
99
101
  add_column :name => 'length', :pos => 5, :aggregator => :average, :conversion => :integer
100
102
  add_column :name => 'length_by_ip', :pos => 5, :aggregator => :average, :group_by => 'ip', :conversion => :integer
@@ -120,6 +122,7 @@ end
120
122
 
121
123
  #### Aggregator types and options
122
124
  * timestamp: the timestamp field will be used to determine to which interval the line belongs, each line MUST have a timestamp
125
+ * num_lines: the most simple aggregator simply counts the number of lines
123
126
  * count: the aggregator will count the number of occurence of this field
124
127
  * without the group_by option, it will just count the total number of lines (probably useless)
125
128
  * with a group_by option pointing to the same field as the current one, it will count the number of occurence
@@ -127,7 +130,7 @@ end
127
130
  * with a group_by option pointing to another field, it will count the number of occurences of (this field, other field) pairs.
128
131
  * average: the aggregator will calculate the average value of this field
129
132
  * sum: the aggregator will add up the values of this field
130
- * delta: the aggregator will caclculate the difference between each line and the next and will average all the deltas
133
+ * delta: the aggregator will calculate the difference between each line and the previous one and will average all the deltas
131
134
 
132
135
  ### Iterate through lines of a single file
133
136
  And get a parsed record for each line.
@@ -150,7 +153,7 @@ log.each_parsed_line do |record|
150
153
  puts record.length
151
154
  end
152
155
  ```
153
- **Note**: the Logfile iterators always start with the last line in the file and works its way backward.
156
+ **Note**: the Logfile iterators by default start with the last line in the file and works its way backward. To iterate in ascending order, pass :asc as the order argument in Logfile or LogfileSet#initialize.
154
157
 
155
158
  ### Iterate through lines of multiples files
156
159
  And get a parsed record for each line.
@@ -161,12 +164,13 @@ set.each_parsed_line do |record|
161
164
  puts record.class # LineParser::AccessLog
162
165
  end
163
166
  ```
164
- **Note**: the LogfileSet iterators always starts with the most recent file.
167
+ **Note**: the LogfileSet iterators advances in the order of logfiles enumerator. If the enumerator is ascending, the builder will yield intervals in ascending order and vice-versa.
165
168
 
166
169
  ### Aggregate lines into intervals
167
170
  ```ruby
168
171
  length = 5.minutes
169
- interval_builder = LogfileInterval::IntervalBuilder.new(set, length)
172
+ parsed_lines_enum = set.each_parsed_line
173
+ interval_builder = LogfileInterval::IntervalBuilder.new(parsed_lines_enum, parser, length)
170
174
  interval_builder.each_interval do |interval|
171
175
  puts interval.class # LogfileInterval::Interval
172
176
  puts interval.start_time
@@ -10,27 +10,27 @@ unless File.exist?(String(logfile))
10
10
  exit 1
11
11
  end
12
12
 
13
- class AccessLogParser < LogfileInterval::LineParser::Base
13
+ class AccessLogParsedLine < LogfileInterval::ParsedLine::Base
14
14
  # Example line:
15
15
  # 74.75.19.145 - - [31/Mar/2013:06:54:12 -0700] "GET /ppa/google_chrome HTTP/1.1" 200 7855 "https://www.google.com/" "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.22 (KHTML, like Gecko) Ubuntu Chromium/25.0.1364.160 Chrome/25.0.1364.160 Safari/537.22"
16
16
 
17
17
  set_regex /^([\d\.]+)\s+\S+\s+\S+\s+\[(\d\d.*\d\d)\]\s+"(?:GET|POST|PUT|HEAD|DELETE)\s+(\S+)\s+HTTP\S+"\s+(\d+)\s+(\d+)\s+"([^"]*)"\s+"([^"]+)"$/
18
18
 
19
- add_column :name => 'ip', :pos => 1, :aggregator => :count, :group_by => 'ip'
19
+ add_column :name => 'ip', :pos => 1, :aggregator => :count
20
20
  add_column :name => 'timestamp', :pos => 2, :aggregator => :timestamp
21
- add_column :name => 'code_by_ip', :pos => 4, :aggregator => :count, :group_by => 'ip'
22
- add_column :name => 'length', :pos => 5, :aggregator => :average, :conversion => :integer
23
- add_column :name => 'length_by_ip', :pos => 5, :aggregator => :average, :conversion => :integer, :group_by => 'ip'
24
- add_column :name => 'referer', :pos => 6, :aggregator => :count, :group_by => :referer
25
- add_column :name => 'referer_by_ip', :pos => 6, :aggregator => :count, :group_by => :ip
21
+ add_column :name => 'code_by_ip', :pos => 4, :aggregator => :count, :group_by => 'ip'
22
+ add_column :name => 'length', :pos => 5, :aggregator => :average, :conversion => :integer
23
+ add_column :name => 'length_by_ip', :pos => 5, :aggregator => :average, :group_by => 'ip', :conversion => :integer
24
+ add_column :name => 'referer', :pos => 6, :aggregator => :count
25
+ add_column :name => 'referer_by_ip', :pos => 6, :aggregator => :count, :group_by => :ip
26
26
 
27
27
  def time
28
28
  DateTime.strptime(self.timestamp, '%d/%b/%Y:%H:%M:%S %z').to_time
29
29
  end
30
30
  end
31
31
 
32
- file = LogfileInterval::Logfile.new(logfile, AccessLogParser)
33
- builder = LogfileInterval::IntervalBuilder.new(file, 300)
32
+ file = LogfileInterval::Logfile.new(logfile, AccessLogParsedLine)
33
+ builder = LogfileInterval::IntervalBuilder.new(file.each_parsed_line, AccessLogParsedLine, 300)
34
34
  builder.each_interval do |interval|
35
35
  next unless interval.size > 0
36
36
 
data/bin/readme.rb CHANGED
@@ -4,15 +4,15 @@ require 'pp'
4
4
  require 'date'
5
5
  require File.join(File.expand_path('../../lib', __FILE__), 'logfile_interval')
6
6
 
7
- class AccessLog < LogfileInterval::LineParser::Base
7
+ class AccessLogParsedLine < LogfileInterval::ParsedLine::Base
8
8
  # Example line:
9
9
  # 74.75.19.145 - - [31/Mar/2013:06:54:12 -0700] "GET /ppa/google_chrome HTTP/1.1" 200 7855 "https://www.google.com/" "Mozilla/5.0 Chrome/25.0.1364.160"
10
10
 
11
11
  set_regex /^([\d\.]+)\s+.*\s+\[(\d\d.*\d\d)\]\s+"(?:GET|POST|PUT|HEAD|DELETE)\s+(\S+)\s+HTTP\S+"\s+(\d+)\s+/
12
12
 
13
- add_column :name => 'ip', :pos => 1, :aggregator => :count, :group_by => 'ip'
13
+ add_column :name => 'ip', :pos => 1, :aggregator => :count
14
14
  add_column :name => 'timestamp', :pos => 2, :aggregator => :timestamp
15
- add_column :name => 'code', :pos => 4, :aggregator => :count, :group_by => 'code'
15
+ add_column :name => 'code', :pos => 4, :aggregator => :count
16
16
  add_column :name => 'code_by_ip', :pos => 4, :aggregator => :count, :group_by => 'ip'
17
17
 
18
18
  def time
@@ -21,13 +21,14 @@ class AccessLog < LogfileInterval::LineParser::Base
21
21
  end
22
22
 
23
23
  path = ENV['ACCESS_LOG_PATH']
24
- file = LogfileInterval::Logfile.new(path, AccessLog)
24
+ file = LogfileInterval::Logfile.new(path, AccessLogParsedLine)
25
25
  unless file.exist?
26
26
  puts "#{path} is not found"
27
27
  exit 1
28
28
  end
29
+ parsed_line_enum = file.each_parsed_line
29
30
 
30
- builder = LogfileInterval::IntervalBuilder.new(file, 300)
31
+ builder = LogfileInterval::IntervalBuilder.new(parsed_line_enum, AccessLogParsedLine, 300)
31
32
  builder.each_interval do |interval|
32
33
  next unless interval.size > 0
33
34
 
data/docs/design.rb CHANGED
@@ -1,31 +1,33 @@
1
1
  module LogfileInterval
2
- module LineParser
3
- class Base
4
- class << self
5
- def set_regex(regex)
6
- end
2
+ module ParsedLine
7
3
 
8
- def add_column(name, options)
9
- agg = Aggregators.klass(aggregator)
10
- @columns[name] = { :pos => pos, :aggregator => agg, :conversion => conversion }
11
- define_method(name)
12
- end
4
+ module Parser
5
+ def columns
6
+ @columns ||= {}
7
+ end
13
8
 
14
- def parse(line)
15
- match_data = regex.match(line)
16
- data = f(match_data)
17
- end
9
+ def set_regex
10
+ end
18
11
 
19
- def create_record(line)
20
- record = new(line)
21
- return record.valid? ? record : nil
22
- end
12
+ def add_column
13
+ agg = Aggregator::Base.klass(aggregator)
14
+ @columns[name] = { :pos => pos, :aggregator => agg, :conversion => conversion }
15
+ define_method(name)
16
+ end
17
+
18
+ def parse(line)
19
+ match_data = regex.match(line)
20
+ data = {}
21
+ data = f(match_data)
22
+ end
23
+
24
+ def each(&block)
25
+ columns.each(&block)
23
26
  end
24
27
  end
25
28
 
26
- class AccessLog < Base
27
- set_regex /blah/
28
- add_column :name => :foo, :pos => 1, :conversion => integer, :aggregator => :average
29
+ class Base
30
+ extend Parser
29
31
 
30
32
  def initialize(line)
31
33
  @data = self.class.parse(line)
@@ -33,38 +35,72 @@ module LogfileInterval
33
35
  end
34
36
  end
35
37
 
38
+ class AggregatorSet
39
+ def initialize(parser_columns)
40
+ @aggregators = {}
41
+ parser_columns.each do |name, options|
42
+ @aggregators[name] = options[:aggregator].new(options)
43
+ end
44
+ end
36
45
 
37
- class Interval
38
- def initialize(end_time, length, parser)
39
- @data = {}
40
- parser.columns.each do |name, options|
41
- @data[name] = options[:aggregator].new
46
+ def add(record)
47
+ @aggregators.each do |name, agg|
48
+ agg.add_record(record)
42
49
  end
43
50
  end
44
51
 
45
- def [](name)
46
- @data[name].value
52
+ def each
53
+ @aggregators.each do |name, agg|
54
+ yield name, agg
55
+ end
47
56
  end
57
+ end
48
58
 
49
- def add_record(record)
50
- return unless record.valid?
51
- raise ParserMismatch unless record.class == parser
59
+ class Interval
60
+ def initialize(end_time, length, parser_columns)
61
+ @aggregators = AggregatorSet.new(parser_columns)
62
+ end
52
63
 
64
+ def [](name)
65
+ @aggregators[name].value
66
+ end
67
+
68
+ def add(record)
53
69
  @size += 1
54
- parser.columns.each do |name, options|
55
- @data[name].add(record[name])
70
+ @aggregators.add_record(record)
71
+ end
72
+ end
73
+
74
+ class IntervalBuilder
75
+ def initialize(parsed_lines_enum, parser_columns, length)
76
+ end
77
+
78
+ def each_interval
79
+ interval = Interval.new(now, length, parser_columns)
80
+ parsed_lines_enum.each do |record|
81
+ while record.time < interval.start_time do
82
+ yield interval
83
+ interval = Interval.new(interval.start_time, length, aggregators)
84
+ end
85
+ interval.add(record)
56
86
  end
57
87
  end
58
88
  end
59
89
 
60
90
  module Aggregator
61
- def self.klass(aggregator)
62
- case aggregator
63
- when :sum then Sum
91
+ class Base
92
+ def self.register(name, klass)
93
+ @aggregator_classes[name] = klass
94
+ end
95
+
96
+ def self.klass(name)
97
+ @aggregator_classes.fetch(name)
64
98
  end
65
99
  end
66
100
 
67
- class Sum
101
+ class Sum < Base
102
+ register :sum, self
103
+
68
104
  def initialize
69
105
  @val = 0
70
106
  end
@@ -74,7 +110,7 @@ module LogfileInterval
74
110
  end
75
111
  end
76
112
 
77
- class Count
113
+ class Count < Base
78
114
  def initialize
79
115
  @val = Counter.new
80
116
  end
@@ -90,6 +126,8 @@ module LogfileInterval
90
126
  end
91
127
 
92
128
  def each_line
129
+ return enum_for(:each_line) unless block_given?
130
+ ...
93
131
  end
94
132
 
95
133
  def each_parsed_line
@@ -114,23 +152,6 @@ module LogfileInterval
114
152
  end
115
153
  end
116
154
 
117
- class IntervalBuilder
118
- def initialize(logfile_set, length)
119
- parser = logfile_set.parser
120
- end
121
-
122
- def each_interval
123
- interval = Interval.new(now, length)
124
- set.each_parsed_line(parser) do |record|
125
- while record.time < interval.start_time do
126
- yield interval
127
- interval = Interval.new(interval.start_time, length)
128
- end
129
- interval.add(record)
130
- end
131
- end
132
- end
133
-
134
155
  class Counter < Hash
135
156
  def increment(key)
136
157
  self[key] = self[key] ? self[key] + 1 : 1
@@ -138,10 +159,16 @@ module LogfileInterval
138
159
  end
139
160
  end
140
161
 
162
+ class AccessLogParsedLine < LogfileInterval::Parse::Base
163
+ set_regex /blah/
164
+ add_column :name => :foo, :pos => 1, :conversion => integer, :aggregator => :average
165
+ end
166
+
167
+
141
168
  logfiles = [ 'access.log', 'access.log.1', 'access.log.2' ]
142
169
  logfile = logfiles.first
143
170
 
144
- parser = LineParser::AccessLog
171
+ parser = AccessLogParsedLine
145
172
 
146
173
  logfile_iterator = LogfileInterval::Logfile.new(logfile, parser)
147
174
  logfile_iterator.each_line do |line|
@@ -149,20 +176,19 @@ logfile_iterator.each_line do |line|
149
176
  puts line
150
177
  end
151
178
 
152
- parser = LineParser::AccessLog
153
179
  logfile_iterator.each_parsed_line do |record|
154
- puts record.class # LineParser::AccessLog
180
+ puts record.class # ParsedLine::AccessLog
155
181
  puts record.ip
156
182
  puts record.time
157
183
  end
158
184
 
159
- set_iterator = LogfileInterval::LogfileSet.new(logfiles, parser)
160
- set_iterator.each_parsed_line do |record|
161
- puts record.class # LineParser::AccessLog
185
+ set = LogfileInterval::LogfileSet.new(logfiles, parser)
186
+ set.each_parsed_line do |record|
187
+ puts record.class # ParsedLine::AccessLog
162
188
  end
163
189
 
164
190
  length = 5.minutes
165
- interval_builder = LogfileInterval::IntervalBuilder.new(logfiles, length)
191
+ interval_builder = LogfileInterval::IntervalBuilder.new(set.each_parsed_line, parser, length)
166
192
  interval_builder.each_interval do |interval|
167
193
  puts interval.class # LogfileInterval::Interval
168
194
  puts interval.start_time
@@ -1,9 +1,16 @@
1
+ require File.join(File.expand_path('..', __FILE__), '/registrar')
2
+
1
3
  module LogfileInterval
2
4
  module Aggregator
3
5
  class Base
6
+
7
+ extend Registrar
4
8
  include Enumerable
5
9
 
10
+ attr_reader :name
11
+
6
12
  def initialize(options = {})
13
+ @name = options[:name]
7
14
  @val = Util::Counter.new
8
15
  @size = Util::Counter.new
9
16
  @options = options
@@ -21,11 +28,12 @@ module LogfileInterval
21
28
  end
22
29
  end
23
30
 
24
- def add(value, group_by = nil)
31
+ def add(value, group_by_value = nil)
25
32
  raise NotImplementedError
26
33
  end
27
34
 
28
35
  private
36
+
29
37
  def key(group_by = nil)
30
38
  group_by ? group_by : :all
31
39
  end
@@ -51,3 +59,9 @@ module LogfileInterval
51
59
  end
52
60
  end
53
61
  end
62
+
63
+ current_dir = File.expand_path('..', __FILE__)
64
+ agg_files = Dir.glob("#{current_dir}/*.rb").reject { |file| file =~ /base\.rb/ || file =~ /registrar\.rb/ }
65
+ agg_files.each do |agg_file|
66
+ require agg_file
67
+ end
@@ -1,8 +1,16 @@
1
1
  module LogfileInterval
2
2
  module Aggregator
3
3
  class Count < Base
4
- def add(value, group_by = nil)
5
- @val.add(key(group_by), 1)
4
+ def each
5
+ @val.each { |k, v| yield k, v }
6
+ end
7
+
8
+ def add(value, group_by_value = nil)
9
+ if group_by_value
10
+ @val.increment_subkey(value, key(group_by_value))
11
+ else
12
+ @val.increment(value)
13
+ end
6
14
  end
7
15
  end
8
16
  end