logfile_interval 1.2.1 → 2.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (37) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile.lock +1 -1
  3. data/README.md +15 -11
  4. data/bin/aggregate_access_log.rb +9 -9
  5. data/bin/readme.rb +6 -5
  6. data/docs/design.rb +88 -62
  7. data/lib/logfile_interval/aggregator/base.rb +15 -1
  8. data/lib/logfile_interval/aggregator/count.rb +10 -2
  9. data/lib/logfile_interval/aggregator/delta.rb +6 -6
  10. data/lib/logfile_interval/aggregator/num_lines.rb +13 -0
  11. data/lib/logfile_interval/aggregator/registrar.rb +32 -0
  12. data/lib/logfile_interval/aggregator_set.rb +34 -0
  13. data/lib/logfile_interval/interval.rb +5 -31
  14. data/lib/logfile_interval/interval_builder/ascending.rb +23 -0
  15. data/lib/logfile_interval/interval_builder/descending.rb +22 -0
  16. data/lib/logfile_interval/interval_builder.rb +62 -19
  17. data/lib/logfile_interval/logfile.rb +34 -5
  18. data/lib/logfile_interval/logfile_set.rb +38 -24
  19. data/lib/logfile_interval/parsed_line/base.rb +31 -0
  20. data/lib/logfile_interval/{line_parser/base.rb → parsed_line/parser.rb} +15 -40
  21. data/lib/logfile_interval/version.rb +1 -1
  22. data/lib/logfile_interval.rb +4 -2
  23. data/spec/lib/aggregator_set_spec.rb +15 -0
  24. data/spec/lib/aggregator_spec.rb +39 -29
  25. data/spec/lib/custom_aggregator_spec.rb +3 -3
  26. data/spec/lib/interval_builder_spec.rb +185 -38
  27. data/spec/lib/interval_spec.rb +41 -30
  28. data/spec/lib/line_parser/base_spec.rb +16 -5
  29. data/spec/lib/logfile_set_spec.rb +23 -1
  30. data/spec/lib/logfile_spec.rb +23 -1
  31. data/spec/support/lib/access_log.rb +1 -1
  32. data/spec/support/lib/custom_timing_log.rb +3 -3
  33. data/spec/support/lib/timing_log.rb +4 -2
  34. data/spec/support/logfiles/timing.log.2 +1 -0
  35. metadata +13 -5
  36. data/lib/logfile_interval/aggregator/group_and_count.rb +0 -14
  37. data/lib/logfile_interval/aggregator.rb +0 -27
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 4b50ea1942cafe43439265b1d781190b4a60af69
4
- data.tar.gz: 35fe78e837a6f1377baf2c62b3b4df5d95b814f7
3
+ metadata.gz: 4eb614cbe0e43877376ccae43e38b666c3b58270
4
+ data.tar.gz: 723459738582e7da66b6d53e0f5ce43965576a6c
5
5
  SHA512:
6
- metadata.gz: 37ae14fa68272ef51c2127f65046e8068ad1937b00f2d66cbce37377bbde9582deb4a6312130dd630991a404d832d34a497bb60dc0fc8fe2bf303870241bed06
7
- data.tar.gz: d12a79f9e706e5e02fce96d2ccc8e8b301936245d5819f93376873f837bbe2554ed14c37729895b3f720b48306f71d07193ec65fd9eb4ec797391f6d7ccee736
6
+ metadata.gz: 4d799b181c1d65abaa35910b49c4efa85a31aefc44d009c0ee3c6966979206cec31acd09ffb1cbd97544ec74798a872d7906f43b57758a51e910e1f499935b8b
7
+ data.tar.gz: 21a2d6737ac8d3e92ed6b40dbba0f18d424ecddfac1678c9c067fafb7ea7db1e1fcde234a5fe66cc09c3d1da1077acc14a354ce0eef42bdaed2fbb6e1b16d3ce
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- logfile_interval (1.2.1)
4
+ logfile_interval (2.0.0)
5
5
 
6
6
  GEM
7
7
  remote: https://rubygems.org/
data/README.md CHANGED
@@ -1,4 +1,4 @@
1
- # LogfileInterval [![Build Status](https://travis-ci.org/plerohellec/logfile_interval.png?branch=master)](https://travis-ci.org/plerohellec/logfile_interval)
1
+ # LogfileInterval [![Build Status](https://travis-ci.org/plerohellec/logfile_interval.png?branch=master)](https://travis-ci.org/plerohellec/logfile_interval) [![Code Climate](https://codeclimate.com/github/plerohellec/logfile_interval.png)](https://codeclimate.com/github/plerohellec/logfile_interval)
2
2
 
3
3
  Logfile parser and aggregator.
4
4
 
@@ -28,9 +28,9 @@ class AccessLog < LogfileInterval::LineParser::Base
28
28
 
29
29
  set_regex /^([\d\.]+)\s+.*\s+\[(\d\d.*\d\d)\]\s+"(?:GET|POST|PUT|HEAD|DELETE)\s+(\S+)\s+HTTP\S+"\s+(\d+)\s+/
30
30
 
31
- add_column :name => 'ip', :pos => 1, :aggregator => :count, :group_by => 'ip'
31
+ add_column :name => 'ip', :pos => 1, :aggregator => :count
32
32
  add_column :name => 'timestamp', :pos => 2, :aggregator => :timestamp
33
- add_column :name => 'code', :pos => 4, :aggregator => :count, :group_by => 'code'
33
+ add_column :name => 'code', :pos => 4, :aggregator => :count
34
34
  add_column :name => 'code_by_ip', :pos => 4, :aggregator => :count, :group_by => 'ip'
35
35
 
36
36
  def time
@@ -38,9 +38,11 @@ class AccessLog < LogfileInterval::LineParser::Base
38
38
  end
39
39
  end
40
40
 
41
- file = LogfileInterval::Logfile.new('path_to_logfile', AccessLog)
41
+ parser = AccessLog
42
+ file = LogfileInterval::Logfile.new('path_to_logfile', parser)
43
+ parsed_line_enum = file.each_parsed_line
42
44
 
43
- builder = LogfileInterval::IntervalBuilder.new(file, 300)
45
+ builder = LogfileInterval::IntervalBuilder.new(parsed_line_enum, parser, 300)
44
46
  builder.each_interval do |interval|
45
47
  next unless interval.size > 0
46
48
 
@@ -92,9 +94,9 @@ module LogfileInterval
92
94
 
93
95
  set_regex /^([\d\.]+)\s+\S+\s+\S+\s+\[(\d\d.*\d\d)\]\s+"(?:GET|POST|PUT|HEAD|DELETE)\s+(\S+)\s+HTTP\S+"\s+(\d+)\s+(\d+)\s+"([^"]*)"\s+"([^"]+)"$/
94
96
 
95
- add_column :name => 'ip', :pos => 1, :aggregator => :count, :group_by => 'ip'
97
+ add_column :name => 'ip', :pos => 1, :aggregator => :count
96
98
  add_column :name => 'timestamp', :pos => 2, :aggregator => :timestamp
97
- add_column :name => 'code', :pos => 4, :aggregator => :count, :group_by => 'code'
99
+ add_column :name => 'code', :pos => 4, :aggregator => :count
98
100
  add_column :name => 'code_by_ip', :pos => 4, :aggregator => :count, :group_by => 'ip'
99
101
  add_column :name => 'length', :pos => 5, :aggregator => :average, :conversion => :integer
100
102
  add_column :name => 'length_by_ip', :pos => 5, :aggregator => :average, :group_by => 'ip', :conversion => :integer
@@ -120,6 +122,7 @@ end
120
122
 
121
123
  #### Aggregator types and options
122
124
  * timestamp: the timestamp field will be used to determine to which interval the line belongs, each line MUST have a timestamp
125
+ * num_lines: the most simple aggregator simply counts the number of lines
123
126
  * count: the aggregator will count the number of occurence of this field
124
127
  * without the group_by option, it will just count the total number of lines (probably useless)
125
128
  * with a group_by option pointing to the same field as the current one, it will count the number of occurence
@@ -127,7 +130,7 @@ end
127
130
  * with a group_by option pointing to another field, it will count the number of occurences of (this field, other field) pairs.
128
131
  * average: the aggregator will calculate the average value of this field
129
132
  * sum: the aggregator will add up the values of this field
130
- * delta: the aggregator will caclculate the difference between each line and the next and will average all the deltas
133
+ * delta: the aggregator will calculate the difference between each line and the previous one and will average all the deltas
131
134
 
132
135
  ### Iterate through lines of a single file
133
136
  And get a parsed record for each line.
@@ -150,7 +153,7 @@ log.each_parsed_line do |record|
150
153
  puts record.length
151
154
  end
152
155
  ```
153
- **Note**: the Logfile iterators always start with the last line in the file and works its way backward.
156
+ **Note**: the Logfile iterators by default start with the last line in the file and works its way backward. To iterate in ascending order, pass :asc as the order argument in Logfile or LogfileSet#initialize.
154
157
 
155
158
  ### Iterate through lines of multiples files
156
159
  And get a parsed record for each line.
@@ -161,12 +164,13 @@ set.each_parsed_line do |record|
161
164
  puts record.class # LineParser::AccessLog
162
165
  end
163
166
  ```
164
- **Note**: the LogfileSet iterators always starts with the most recent file.
167
+ **Note**: the LogfileSet iterators advances in the order of logfiles enumerator. If the enumerator is ascending, the builder will yield intervals in ascending order and vice-versa.
165
168
 
166
169
  ### Aggregate lines into intervals
167
170
  ```ruby
168
171
  length = 5.minutes
169
- interval_builder = LogfileInterval::IntervalBuilder.new(set, length)
172
+ parsed_lines_enum = set.each_parsed_line
173
+ interval_builder = LogfileInterval::IntervalBuilder.new(parsed_lines_enum, parser, length)
170
174
  interval_builder.each_interval do |interval|
171
175
  puts interval.class # LogfileInterval::Interval
172
176
  puts interval.start_time
@@ -10,27 +10,27 @@ unless File.exist?(String(logfile))
10
10
  exit 1
11
11
  end
12
12
 
13
- class AccessLogParser < LogfileInterval::LineParser::Base
13
+ class AccessLogParsedLine < LogfileInterval::ParsedLine::Base
14
14
  # Example line:
15
15
  # 74.75.19.145 - - [31/Mar/2013:06:54:12 -0700] "GET /ppa/google_chrome HTTP/1.1" 200 7855 "https://www.google.com/" "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.22 (KHTML, like Gecko) Ubuntu Chromium/25.0.1364.160 Chrome/25.0.1364.160 Safari/537.22"
16
16
 
17
17
  set_regex /^([\d\.]+)\s+\S+\s+\S+\s+\[(\d\d.*\d\d)\]\s+"(?:GET|POST|PUT|HEAD|DELETE)\s+(\S+)\s+HTTP\S+"\s+(\d+)\s+(\d+)\s+"([^"]*)"\s+"([^"]+)"$/
18
18
 
19
- add_column :name => 'ip', :pos => 1, :aggregator => :count, :group_by => 'ip'
19
+ add_column :name => 'ip', :pos => 1, :aggregator => :count
20
20
  add_column :name => 'timestamp', :pos => 2, :aggregator => :timestamp
21
- add_column :name => 'code_by_ip', :pos => 4, :aggregator => :count, :group_by => 'ip'
22
- add_column :name => 'length', :pos => 5, :aggregator => :average, :conversion => :integer
23
- add_column :name => 'length_by_ip', :pos => 5, :aggregator => :average, :conversion => :integer, :group_by => 'ip'
24
- add_column :name => 'referer', :pos => 6, :aggregator => :count, :group_by => :referer
25
- add_column :name => 'referer_by_ip', :pos => 6, :aggregator => :count, :group_by => :ip
21
+ add_column :name => 'code_by_ip', :pos => 4, :aggregator => :count, :group_by => 'ip'
22
+ add_column :name => 'length', :pos => 5, :aggregator => :average, :conversion => :integer
23
+ add_column :name => 'length_by_ip', :pos => 5, :aggregator => :average, :group_by => 'ip', :conversion => :integer
24
+ add_column :name => 'referer', :pos => 6, :aggregator => :count
25
+ add_column :name => 'referer_by_ip', :pos => 6, :aggregator => :count, :group_by => :ip
26
26
 
27
27
  def time
28
28
  DateTime.strptime(self.timestamp, '%d/%b/%Y:%H:%M:%S %z').to_time
29
29
  end
30
30
  end
31
31
 
32
- file = LogfileInterval::Logfile.new(logfile, AccessLogParser)
33
- builder = LogfileInterval::IntervalBuilder.new(file, 300)
32
+ file = LogfileInterval::Logfile.new(logfile, AccessLogParsedLine)
33
+ builder = LogfileInterval::IntervalBuilder.new(file.each_parsed_line, AccessLogParsedLine, 300)
34
34
  builder.each_interval do |interval|
35
35
  next unless interval.size > 0
36
36
 
data/bin/readme.rb CHANGED
@@ -4,15 +4,15 @@ require 'pp'
4
4
  require 'date'
5
5
  require File.join(File.expand_path('../../lib', __FILE__), 'logfile_interval')
6
6
 
7
- class AccessLog < LogfileInterval::LineParser::Base
7
+ class AccessLogParsedLine < LogfileInterval::ParsedLine::Base
8
8
  # Example line:
9
9
  # 74.75.19.145 - - [31/Mar/2013:06:54:12 -0700] "GET /ppa/google_chrome HTTP/1.1" 200 7855 "https://www.google.com/" "Mozilla/5.0 Chrome/25.0.1364.160"
10
10
 
11
11
  set_regex /^([\d\.]+)\s+.*\s+\[(\d\d.*\d\d)\]\s+"(?:GET|POST|PUT|HEAD|DELETE)\s+(\S+)\s+HTTP\S+"\s+(\d+)\s+/
12
12
 
13
- add_column :name => 'ip', :pos => 1, :aggregator => :count, :group_by => 'ip'
13
+ add_column :name => 'ip', :pos => 1, :aggregator => :count
14
14
  add_column :name => 'timestamp', :pos => 2, :aggregator => :timestamp
15
- add_column :name => 'code', :pos => 4, :aggregator => :count, :group_by => 'code'
15
+ add_column :name => 'code', :pos => 4, :aggregator => :count
16
16
  add_column :name => 'code_by_ip', :pos => 4, :aggregator => :count, :group_by => 'ip'
17
17
 
18
18
  def time
@@ -21,13 +21,14 @@ class AccessLog < LogfileInterval::LineParser::Base
21
21
  end
22
22
 
23
23
  path = ENV['ACCESS_LOG_PATH']
24
- file = LogfileInterval::Logfile.new(path, AccessLog)
24
+ file = LogfileInterval::Logfile.new(path, AccessLogParsedLine)
25
25
  unless file.exist?
26
26
  puts "#{path} is not found"
27
27
  exit 1
28
28
  end
29
+ parsed_line_enum = file.each_parsed_line
29
30
 
30
- builder = LogfileInterval::IntervalBuilder.new(file, 300)
31
+ builder = LogfileInterval::IntervalBuilder.new(parsed_line_enum, AccessLogParsedLine, 300)
31
32
  builder.each_interval do |interval|
32
33
  next unless interval.size > 0
33
34
 
data/docs/design.rb CHANGED
@@ -1,31 +1,33 @@
1
1
  module LogfileInterval
2
- module LineParser
3
- class Base
4
- class << self
5
- def set_regex(regex)
6
- end
2
+ module ParsedLine
7
3
 
8
- def add_column(name, options)
9
- agg = Aggregators.klass(aggregator)
10
- @columns[name] = { :pos => pos, :aggregator => agg, :conversion => conversion }
11
- define_method(name)
12
- end
4
+ module Parser
5
+ def columns
6
+ @columns ||= {}
7
+ end
13
8
 
14
- def parse(line)
15
- match_data = regex.match(line)
16
- data = f(match_data)
17
- end
9
+ def set_regex
10
+ end
18
11
 
19
- def create_record(line)
20
- record = new(line)
21
- return record.valid? ? record : nil
22
- end
12
+ def add_column
13
+ agg = Aggregator::Base.klass(aggregator)
14
+ @columns[name] = { :pos => pos, :aggregator => agg, :conversion => conversion }
15
+ define_method(name)
16
+ end
17
+
18
+ def parse(line)
19
+ match_data = regex.match(line)
20
+ data = {}
21
+ data = f(match_data)
22
+ end
23
+
24
+ def each(&block)
25
+ columns.each(&block)
23
26
  end
24
27
  end
25
28
 
26
- class AccessLog < Base
27
- set_regex /blah/
28
- add_column :name => :foo, :pos => 1, :conversion => integer, :aggregator => :average
29
+ class Base
30
+ extend Parser
29
31
 
30
32
  def initialize(line)
31
33
  @data = self.class.parse(line)
@@ -33,38 +35,72 @@ module LogfileInterval
33
35
  end
34
36
  end
35
37
 
38
+ class AggregatorSet
39
+ def initialize(parser_columns)
40
+ @aggregators = {}
41
+ parser_columns.each do |name, options|
42
+ @aggregators[name] = options[:aggregator].new(options)
43
+ end
44
+ end
36
45
 
37
- class Interval
38
- def initialize(end_time, length, parser)
39
- @data = {}
40
- parser.columns.each do |name, options|
41
- @data[name] = options[:aggregator].new
46
+ def add(record)
47
+ @aggregators.each do |name, agg|
48
+ agg.add_record(record)
42
49
  end
43
50
  end
44
51
 
45
- def [](name)
46
- @data[name].value
52
+ def each
53
+ @aggregators.each do |name, agg|
54
+ yield name, agg
55
+ end
47
56
  end
57
+ end
48
58
 
49
- def add_record(record)
50
- return unless record.valid?
51
- raise ParserMismatch unless record.class == parser
59
+ class Interval
60
+ def initialize(end_time, length, parser_columns)
61
+ @aggregators = AggregatorSet.new(parser_columns)
62
+ end
52
63
 
64
+ def [](name)
65
+ @aggregators[name].value
66
+ end
67
+
68
+ def add(record)
53
69
  @size += 1
54
- parser.columns.each do |name, options|
55
- @data[name].add(record[name])
70
+ @aggregators.add_record(record)
71
+ end
72
+ end
73
+
74
+ class IntervalBuilder
75
+ def initialize(parsed_lines_enum, parser_columns, length)
76
+ end
77
+
78
+ def each_interval
79
+ interval = Interval.new(now, length, parser_columns)
80
+ parsed_lines_enum.each do |record|
81
+ while record.time < interval.start_time do
82
+ yield interval
83
+ interval = Interval.new(interval.start_time, length, aggregators)
84
+ end
85
+ interval.add(record)
56
86
  end
57
87
  end
58
88
  end
59
89
 
60
90
  module Aggregator
61
- def self.klass(aggregator)
62
- case aggregator
63
- when :sum then Sum
91
+ class Base
92
+ def self.register(name, klass)
93
+ @aggregator_classes[name] = klass
94
+ end
95
+
96
+ def self.klass(name)
97
+ @aggregator_classes.fetch(name)
64
98
  end
65
99
  end
66
100
 
67
- class Sum
101
+ class Sum < Base
102
+ register :sum, self
103
+
68
104
  def initialize
69
105
  @val = 0
70
106
  end
@@ -74,7 +110,7 @@ module LogfileInterval
74
110
  end
75
111
  end
76
112
 
77
- class Count
113
+ class Count < Base
78
114
  def initialize
79
115
  @val = Counter.new
80
116
  end
@@ -90,6 +126,8 @@ module LogfileInterval
90
126
  end
91
127
 
92
128
  def each_line
129
+ return enum_for(:each_line) unless block_given?
130
+ ...
93
131
  end
94
132
 
95
133
  def each_parsed_line
@@ -114,23 +152,6 @@ module LogfileInterval
114
152
  end
115
153
  end
116
154
 
117
- class IntervalBuilder
118
- def initialize(logfile_set, length)
119
- parser = logfile_set.parser
120
- end
121
-
122
- def each_interval
123
- interval = Interval.new(now, length)
124
- set.each_parsed_line(parser) do |record|
125
- while record.time < interval.start_time do
126
- yield interval
127
- interval = Interval.new(interval.start_time, length)
128
- end
129
- interval.add(record)
130
- end
131
- end
132
- end
133
-
134
155
  class Counter < Hash
135
156
  def increment(key)
136
157
  self[key] = self[key] ? self[key] + 1 : 1
@@ -138,10 +159,16 @@ module LogfileInterval
138
159
  end
139
160
  end
140
161
 
162
+ class AccessLogParsedLine < LogfileInterval::Parse::Base
163
+ set_regex /blah/
164
+ add_column :name => :foo, :pos => 1, :conversion => integer, :aggregator => :average
165
+ end
166
+
167
+
141
168
  logfiles = [ 'access.log', 'access.log.1', 'access.log.2' ]
142
169
  logfile = logfiles.first
143
170
 
144
- parser = LineParser::AccessLog
171
+ parser = AccessLogParsedLine
145
172
 
146
173
  logfile_iterator = LogfileInterval::Logfile.new(logfile, parser)
147
174
  logfile_iterator.each_line do |line|
@@ -149,20 +176,19 @@ logfile_iterator.each_line do |line|
149
176
  puts line
150
177
  end
151
178
 
152
- parser = LineParser::AccessLog
153
179
  logfile_iterator.each_parsed_line do |record|
154
- puts record.class # LineParser::AccessLog
180
+ puts record.class # ParsedLine::AccessLog
155
181
  puts record.ip
156
182
  puts record.time
157
183
  end
158
184
 
159
- set_iterator = LogfileInterval::LogfileSet.new(logfiles, parser)
160
- set_iterator.each_parsed_line do |record|
161
- puts record.class # LineParser::AccessLog
185
+ set = LogfileInterval::LogfileSet.new(logfiles, parser)
186
+ set.each_parsed_line do |record|
187
+ puts record.class # ParsedLine::AccessLog
162
188
  end
163
189
 
164
190
  length = 5.minutes
165
- interval_builder = LogfileInterval::IntervalBuilder.new(logfiles, length)
191
+ interval_builder = LogfileInterval::IntervalBuilder.new(set.each_parsed_line, parser, length)
166
192
  interval_builder.each_interval do |interval|
167
193
  puts interval.class # LogfileInterval::Interval
168
194
  puts interval.start_time
@@ -1,9 +1,16 @@
1
+ require File.join(File.expand_path('..', __FILE__), '/registrar')
2
+
1
3
  module LogfileInterval
2
4
  module Aggregator
3
5
  class Base
6
+
7
+ extend Registrar
4
8
  include Enumerable
5
9
 
10
+ attr_reader :name
11
+
6
12
  def initialize(options = {})
13
+ @name = options[:name]
7
14
  @val = Util::Counter.new
8
15
  @size = Util::Counter.new
9
16
  @options = options
@@ -21,11 +28,12 @@ module LogfileInterval
21
28
  end
22
29
  end
23
30
 
24
- def add(value, group_by = nil)
31
+ def add(value, group_by_value = nil)
25
32
  raise NotImplementedError
26
33
  end
27
34
 
28
35
  private
36
+
29
37
  def key(group_by = nil)
30
38
  group_by ? group_by : :all
31
39
  end
@@ -51,3 +59,9 @@ module LogfileInterval
51
59
  end
52
60
  end
53
61
  end
62
+
63
+ current_dir = File.expand_path('..', __FILE__)
64
+ agg_files = Dir.glob("#{current_dir}/*.rb").reject { |file| file =~ /base\.rb/ || file =~ /registrar\.rb/ }
65
+ agg_files.each do |agg_file|
66
+ require agg_file
67
+ end
@@ -1,8 +1,16 @@
1
1
  module LogfileInterval
2
2
  module Aggregator
3
3
  class Count < Base
4
- def add(value, group_by = nil)
5
- @val.add(key(group_by), 1)
4
+ def each
5
+ @val.each { |k, v| yield k, v }
6
+ end
7
+
8
+ def add(value, group_by_value = nil)
9
+ if group_by_value
10
+ @val.increment_subkey(value, key(group_by_value))
11
+ else
12
+ @val.increment(value)
13
+ end
6
14
  end
7
15
  end
8
16
  end