logfile_interval 1.2.1 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile.lock +1 -1
  3. data/README.md +15 -11
  4. data/bin/aggregate_access_log.rb +9 -9
  5. data/bin/readme.rb +6 -5
  6. data/docs/design.rb +88 -62
  7. data/lib/logfile_interval/aggregator/base.rb +15 -1
  8. data/lib/logfile_interval/aggregator/count.rb +10 -2
  9. data/lib/logfile_interval/aggregator/delta.rb +6 -6
  10. data/lib/logfile_interval/aggregator/num_lines.rb +13 -0
  11. data/lib/logfile_interval/aggregator/registrar.rb +32 -0
  12. data/lib/logfile_interval/aggregator_set.rb +34 -0
  13. data/lib/logfile_interval/interval.rb +5 -31
  14. data/lib/logfile_interval/interval_builder/ascending.rb +23 -0
  15. data/lib/logfile_interval/interval_builder/descending.rb +22 -0
  16. data/lib/logfile_interval/interval_builder.rb +62 -19
  17. data/lib/logfile_interval/logfile.rb +34 -5
  18. data/lib/logfile_interval/logfile_set.rb +38 -24
  19. data/lib/logfile_interval/parsed_line/base.rb +31 -0
  20. data/lib/logfile_interval/{line_parser/base.rb → parsed_line/parser.rb} +15 -40
  21. data/lib/logfile_interval/version.rb +1 -1
  22. data/lib/logfile_interval.rb +4 -2
  23. data/spec/lib/aggregator_set_spec.rb +15 -0
  24. data/spec/lib/aggregator_spec.rb +39 -29
  25. data/spec/lib/custom_aggregator_spec.rb +3 -3
  26. data/spec/lib/interval_builder_spec.rb +185 -38
  27. data/spec/lib/interval_spec.rb +41 -30
  28. data/spec/lib/line_parser/base_spec.rb +16 -5
  29. data/spec/lib/logfile_set_spec.rb +23 -1
  30. data/spec/lib/logfile_spec.rb +23 -1
  31. data/spec/support/lib/access_log.rb +1 -1
  32. data/spec/support/lib/custom_timing_log.rb +3 -3
  33. data/spec/support/lib/timing_log.rb +4 -2
  34. data/spec/support/logfiles/timing.log.2 +1 -0
  35. metadata +13 -5
  36. data/lib/logfile_interval/aggregator/group_and_count.rb +0 -14
  37. data/lib/logfile_interval/aggregator.rb +0 -27
@@ -1,17 +1,17 @@
1
1
  module LogfileInterval
2
2
  module Aggregator
3
3
  class Delta < Base
4
- def initialize
4
+ def initialize(options = {})
5
5
  @previous = Util::Counter.new
6
6
  super
7
7
  end
8
8
 
9
- def add(value, group_by = nil)
10
- if @previous.has_key?(key(group_by))
11
- @val.add(key(group_by), @previous[key(group_by)] - value)
12
- @size.increment(key(group_by))
9
+ def add(value, group_by_value = nil)
10
+ if @previous.has_key?(key(group_by_value))
11
+ @val.add(key(group_by_value), value - @previous[key(group_by_value)])
12
+ @size.increment(key(group_by_value))
13
13
  end
14
- @previous.set(key(group_by), value)
14
+ @previous.set(key(group_by_value), value)
15
15
  end
16
16
 
17
17
  def val(k)
@@ -0,0 +1,13 @@
1
+ module LogfileInterval
2
+ module Aggregator
3
+ class NumLines < Base
4
+ def add(value, group_by_value = nil)
5
+ if group_by_value
6
+ @val.increment_subkey(:all, key(group_by_value))
7
+ else
8
+ @val.increment(:all)
9
+ end
10
+ end
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,32 @@
1
+ module LogfileInterval
2
+ module Aggregator
3
+ module Registrar
4
+ def inherited(subclass)
5
+ name = subclass.to_s
6
+ name = $1 if name =~ /(\w+)$/
7
+ name = name.scan(/[A-Z][a-z]*/).join("_").downcase.to_sym
8
+ aggregator_classes[name] = subclass
9
+ end
10
+
11
+ def aggregator_classes
12
+ @@aggregator_classes ||= {}
13
+ end
14
+
15
+ def register_aggregator(name, klass)
16
+ aggregator_classes[name] = klass
17
+ end
18
+
19
+ def klass(name)
20
+ aggregator_classes[name]
21
+ end
22
+
23
+ def exist?(name)
24
+ aggregator_classes.include?(name)
25
+ end
26
+
27
+ def all
28
+ aggregator_classes.keys
29
+ end
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,34 @@
1
+ module LogfileInterval
2
+ class AggregatorSet
3
+ def initialize(parser_columns)
4
+ @parser_columns = parser_columns
5
+ @aggregators = {}
6
+ parser_columns.each do |name, options|
7
+ next unless klass = options[:aggregator_class]
8
+ @aggregators[name.to_sym] = klass.new(options.fetch(:custom_options, {}))
9
+ end
10
+ end
11
+
12
+ def add(record)
13
+ @parser_columns.each do |name, options|
14
+ next unless @aggregators[name]
15
+ group_by_value = record[options[:group_by]] if options[:group_by]
16
+ @aggregators[name].add(record[name], group_by_value)
17
+ end
18
+ end
19
+
20
+ def [](name)
21
+ raise ArgumentError, "#{name} field does not exist" unless @aggregators.has_key?(name)
22
+ @aggregators[name.to_sym].values
23
+ end
24
+
25
+ def to_hash
26
+ @aggregators.inject({}) do |h, pair|
27
+ k = pair[0]
28
+ v = pair[1]
29
+ h[k] = v.values
30
+ h
31
+ end
32
+ end
33
+ end
34
+ end
@@ -4,9 +4,8 @@ module LogfileInterval
4
4
  attr_reader :size
5
5
 
6
6
  class OutOfRange < StandardError; end
7
- class ParserMismatch < StandardError; end
8
7
 
9
- def initialize(end_time, length, parser)
8
+ def initialize(end_time, length, parser_columns)
10
9
  raise ArgumentError, 'end_time must be round' unless (end_time.to_i % length.to_i == 0)
11
10
  @end_time = end_time
12
11
  @start_time = end_time - length
@@ -14,48 +13,23 @@ module LogfileInterval
14
13
  @parser = parser
15
14
  @size = 0
16
15
 
17
- @data = {}
18
- parser.columns.each do |name, options|
19
- next unless agg = options[:aggregator_class]
20
- if custom_options = options[:custom_options]
21
- @data[name] = agg.new(custom_options)
22
- else
23
- @data[name] = agg.new
24
- end
25
- end
16
+ @aggregators = AggregatorSet.new(parser_columns)
26
17
  end
27
18
 
28
19
  def [](name)
29
- raise ArgumentError, "#{name} field does not exist" unless @data.has_key?(name)
30
- @data[name.to_sym].values
31
- end
32
-
33
- def each(&block)
34
- @data.each(&block)
20
+ @aggregators[name]
35
21
  end
36
22
 
37
23
  def to_hash
38
- @data.inject({}) do |h, pair|
39
- k = pair[0]
40
- v = pair[1]
41
- h[k] = v.values
42
- h
43
- end
24
+ @aggregators.to_hash
44
25
  end
45
26
 
46
27
  def add_record(record)
47
- return unless record.valid?
48
- raise ParserMismatch unless record.class == parser
49
28
  raise OutOfRange, 'too recent' if record.time>@end_time
50
29
  raise OutOfRange, 'too old' if record.time<=@start_time
51
30
 
52
31
  @size += 1
53
-
54
- parser.columns.each do |name, options|
55
- next unless @data[name]
56
- group_by_value = record[options[:group_by]] if options[:group_by]
57
- @data[name].add(record[name], group_by_value)
58
- end
32
+ @aggregators.add(record)
59
33
  end
60
34
  end
61
35
  end
@@ -0,0 +1,23 @@
1
+ module LogfileInterval
2
+ class IntervalBuilder
3
+ module Ascending
4
+ def create_first_interval
5
+ first_record = parsed_lines_enum.first
6
+ interval_end_time = upper_boundary_time(first_record.time)
7
+ Interval.new(interval_end_time, length, parser_columns)
8
+ end
9
+
10
+ def past_current_interval?(current_interval, record)
11
+ record.time > current_interval.end_time
12
+ end
13
+
14
+ def out_of_order_record?(current_interval, record)
15
+ record.time <= current_interval.start_time
16
+ end
17
+
18
+ def next_interval_end_time(current_interval)
19
+ current_interval.end_time + length
20
+ end
21
+ end
22
+ end
23
+ end
@@ -0,0 +1,22 @@
1
+ module LogfileInterval
2
+ class IntervalBuilder
3
+ module Descending
4
+ def create_first_interval
5
+ interval_end_time = lower_boundary_time(Time.now)
6
+ Interval.new(interval_end_time, length, parser_columns)
7
+ end
8
+
9
+ def past_current_interval?(current_interval, record)
10
+ record.time <= current_interval.start_time
11
+ end
12
+
13
+ def out_of_order_record?(current_interval, record)
14
+ record.time > current_interval.end_time
15
+ end
16
+
17
+ def next_interval_end_time(current_interval)
18
+ current_interval.end_time - length
19
+ end
20
+ end
21
+ end
22
+ end
@@ -1,36 +1,79 @@
1
+ require File.join(File.expand_path('..', __FILE__), '/interval_builder/ascending')
2
+ require File.join(File.expand_path('..', __FILE__), '/interval_builder/descending')
3
+
1
4
  module LogfileInterval
2
5
  class IntervalBuilder
3
- attr_reader :logfile_set, :parser, :length
6
+ attr_reader :parsed_lines_enum, :parser_columns, :length
7
+
8
+ def initialize(parsed_lines_enum, parser_columns, length)
9
+ @parsed_lines_enum = parsed_lines_enum
10
+ @parser_columns = parser_columns
11
+ @length = length
4
12
 
5
- def initialize(logfile_set, length)
6
- @logfile_set = logfile_set
7
- @parser = logfile_set.parser
8
- @length = length
13
+ case order
14
+ when :asc then self.extend Ascending
15
+ when :desc then self.extend Descending
16
+ when :empty then nil
17
+ else raise ArgumentError, "Can't determine parsed_lines_enum sort order"
18
+ end
9
19
  end
10
20
 
11
- def each_interval
21
+ def each_interval(&block)
12
22
  return enum_for(:each_interval) unless block_given?
23
+ return if order == :empty
13
24
 
14
- secs = (Time.now.to_i / length.to_i) * length.to_i
15
- rounded_end_time = Time.at(secs)
16
- current_interval = Interval.new(rounded_end_time, length, parser)
25
+ current_interval = create_first_interval
17
26
 
18
- logfile_set.each_parsed_line do |record|
19
- next if record.time > current_interval.end_time
20
- while record.time <= current_interval.start_time
21
- yield current_interval
22
- current_interval = Interval.new(current_interval.start_time, length, parser)
23
- end
27
+ parsed_lines_enum.each do |record|
28
+ next if out_of_order_record?(current_interval, record)
29
+ current_interval = move_over_empty_intervals(current_interval, record) { |interval| yield interval }
24
30
  current_interval.add_record(record)
25
31
  end
26
32
 
27
- yield current_interval if current_interval.size>0
33
+ yield current_interval if current_interval.size > 0
34
+ end
35
+
36
+ def first_interval
37
+ each_interval.first
38
+ end
39
+
40
+ private
41
+
42
+ def lower_boundary_time(t)
43
+ secs = (t.to_i / length.to_i) * length.to_i
44
+ Time.at(secs)
45
+ end
46
+
47
+ def upper_boundary_time(t)
48
+ secs = (t.to_i / length.to_i + 1) * length.to_i
49
+ Time.at(secs)
50
+ end
51
+
52
+ def order
53
+ return @order if @order
54
+ num_lines = 0
55
+ previous = nil
56
+ parsed_lines_enum.each do |pl|
57
+ num_lines += 1
58
+ if previous
59
+ if pl.time > previous.time
60
+ return @order = :asc
61
+ elsif pl.time < previous.time
62
+ return @order = :desc
63
+ end
64
+ end
65
+ previous = pl
66
+ end
67
+ return @order = :empty if num_lines == 0
68
+ return @order = :unknown
28
69
  end
29
70
 
30
- def last_interval
31
- each_interval do |interval|
32
- return interval
71
+ def move_over_empty_intervals(current_interval, record)
72
+ while past_current_interval?(current_interval, record)
73
+ yield current_interval
74
+ current_interval = Interval.new(next_interval_end_time(current_interval), length, parser_columns)
33
75
  end
76
+ current_interval
34
77
  end
35
78
  end
36
79
  end
@@ -2,9 +2,14 @@ module LogfileInterval
2
2
  class Logfile
3
3
  attr_reader :filename, :parser
4
4
 
5
- def initialize(filename, parser)
5
+ ORDER_VALID_VALUES = [ :asc, :desc ]
6
+
7
+ def initialize(filename, parser, order = :desc)
6
8
  @filename = filename
7
9
  @parser = parser
10
+ @order = order
11
+
12
+ raise ArgumentError, "invalid order value: #{@order}" unless ORDER_VALID_VALUES.include?(@order.to_sym)
8
13
  end
9
14
 
10
15
  def exist?
@@ -26,11 +31,12 @@ module LogfileInterval
26
31
  return unless exist?
27
32
  return enum_for(:each_line) unless block_given?
28
33
 
29
- f = Util::FileBackward.new(@filename)
30
- while(line = f.gets)
31
- yield line.chomp
34
+ case @order
35
+ when :asc
36
+ each_line_ascending { |l| yield l }
37
+ when :desc
38
+ each_line_descending { |l| yield l }
32
39
  end
33
- f.close
34
40
  end
35
41
 
36
42
  def each_parsed_line
@@ -40,5 +46,28 @@ module LogfileInterval
40
46
  yield record if record
41
47
  end
42
48
  end
49
+ alias_method :each, :each_parsed_line
50
+
51
+ def first_parsed_line
52
+ each_parsed_line.first
53
+ end
54
+ alias_method :first, :first_parsed_line
55
+
56
+ private
57
+ def each_line_descending
58
+ f = Util::FileBackward.new(@filename)
59
+ while(line = f.gets)
60
+ yield line.chomp
61
+ end
62
+ f.close
63
+ end
64
+
65
+ def each_line_ascending
66
+ File.open(@filename) do |f|
67
+ f.each_line do |line|
68
+ yield line.chomp
69
+ end
70
+ end
71
+ end
43
72
  end
44
73
  end
@@ -2,49 +2,63 @@ module LogfileInterval
2
2
  class LogfileSet
3
3
  attr_reader :parser
4
4
 
5
- def initialize(filenames, parser)
5
+ ORDER_VALID_VALUES = [ :asc, :desc ]
6
+
7
+ def initialize(filenames, parser, order = :desc)
6
8
  @parser = parser
7
9
  @filenames = filenames
8
- end
10
+ @order = order
9
11
 
10
- def existing_filenames
11
- @existing_filenames ||= @filenames.select { |f| File.exist?(f) }
12
+ raise ArgumentError, "invalid order value: #{@order}" unless ORDER_VALID_VALUES.include?(@order.to_sym)
12
13
  end
13
14
 
14
15
  def ordered_filenames
15
- time_for_file = existing_filenames.inject({}) do |h, filename|
16
- file = Logfile.new(filename, parser)
17
- h[filename] = file.first_timestamp
18
- h
16
+ time_for_files = time_for_files(existing_filenames)
17
+ order_filenames_asc = time_for_files.to_a.sort_by { |arr| arr[1] }.map { |arr| arr[0] }
18
+ case @order
19
+ when :desc
20
+ order_filenames_asc.reverse
21
+ when :asc
22
+ order_filenames_asc
19
23
  end
20
- time_for_file.to_a.sort_by { |arr| arr[1] }.map { |arr| arr[0] }.reverse
21
24
  end
22
25
 
23
- def each_parsed_line
24
- return enum_for(:each_parsed_line) unless block_given?
26
+ def each_parsed_line(&block)
27
+ return enum_for(__method__) unless block_given?
28
+ each_by_method(__method__, &block)
29
+ end
30
+ alias_method :each, :each_parsed_line
25
31
 
26
- ordered_filenames.each do |filename|
27
- tfile = Logfile.new(filename, parser)
28
- tfile.each_parsed_line do |record|
29
- yield record
30
- end
31
- end
32
+ def each_line(&block)
33
+ return enum_for(__method__) unless block_given?
34
+ each_by_method(__method__, &block)
35
+ end
36
+
37
+ def first_parsed_line
38
+ each_parsed_line.first
32
39
  end
40
+ alias_method :first, :first_parsed_line
33
41
 
34
- def each_line
35
- return enum_for(:each_line) unless block_given?
42
+ private
43
+
44
+ def existing_filenames
45
+ @existing_filenames ||= @filenames.select { |f| File.exist?(f) }
46
+ end
36
47
 
48
+ def each_by_method(method, &block)
37
49
  ordered_filenames.each do |filename|
38
- tfile = Logfile.new(filename, parser)
39
- tfile.each_line do |line|
50
+ tfile = Logfile.new(filename, parser, @order)
51
+ tfile.send(method) do |line|
40
52
  yield line
41
53
  end
42
54
  end
43
55
  end
44
56
 
45
- def last_record
46
- each_parsed_line do |record|
47
- return record
57
+ def time_for_files(filenames)
58
+ filenames.inject({}) do |h, filename|
59
+ file = Logfile.new(filename, parser)
60
+ h[filename] = file.first_timestamp
61
+ h
48
62
  end
49
63
  end
50
64
  end