request-log-analyzer 1.1.1 → 1.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -22,8 +22,6 @@ module RequestLogAnalyzer::Filter
22
22
  # Returns nil otherwise
23
23
  # <tt>request</tt> Request object.
24
24
  def filter(request)
25
- return nil unless request
26
-
27
25
  if @after && @before && request.timestamp <= @before && @after <= request.timestamp
28
26
  return request
29
27
  elsif @after && @before.nil? && @after <= request.timestamp
@@ -1,22 +1,10 @@
1
1
  module RequestLogAnalyzer
2
2
 
3
- module Anonymizers
4
- def anonymizer_for_ip(value, capture_definition)
5
- '127.0.0.1'
6
- end
7
-
8
- def anonymizer_for_url(value, capture_definition)
9
- value.sub(/^https?\:\/\/[A-Za-z0-9\.-]+\//, "http://example.com/")
10
- end
11
- end
12
-
13
3
  # The line definition class is used to specify what lines should be parsed from the log file.
14
4
  # It contains functionality to match a line against the definition and parse the information
15
5
  # from this line. This is used by the LogParser class when parsing a log file..
16
6
  class LineDefinition
17
7
 
18
- include RequestLogAnalyzer::Anonymizers
19
-
20
8
  class Definer
21
9
 
22
10
  attr_accessor :line_definitions
@@ -52,20 +40,6 @@ module RequestLogAnalyzer
52
40
  return definition
53
41
  end
54
42
 
55
- # Converts a parsed value (String) to the desired value using some heuristics.
56
- def convert_value(value, type)
57
- case type
58
- when :integer; value.to_i
59
- when :float; value.to_f
60
- when :decimal; value.to_f
61
- when :symbol; value.to_sym
62
- when :sec; value.to_f
63
- when :msec; value.to_f / 1000
64
- when :timestamp; value.gsub(/[^0-9]/,'')[0..13].to_i # Retrieve with: DateTime.parse(value, '%Y%m%d%H%M%S')
65
- else value
66
- end
67
- end
68
-
69
43
  # Checks whether a given line matches this definition.
70
44
  # It will return false if a line does not match. If the line matches, a hash is returned
71
45
  # with all the fields parsed from that line as content.
@@ -74,17 +48,7 @@ module RequestLogAnalyzer
74
48
  def matches(line, lineno = nil, parser = nil)
75
49
  if @teaser.nil? || @teaser =~ line
76
50
  if match_data = line.match(@regexp)
77
- request_info = { :line_type => name, :lineno => lineno }
78
-
79
- captures.each_with_index do |capture, index|
80
- next if capture == :ignore
81
-
82
- if match_data.captures[index]
83
- request_info[capture[:name]] = convert_value(match_data.captures[index], capture[:type])
84
- end
85
-
86
- end
87
- return request_info
51
+ return { :line_definition => self, :lineno => lineno, :captures => match_data.captures}
88
52
  else
89
53
  if @teaser && parser
90
54
  parser.warn(:teaser_check_failed, "Teaser matched for #{name.inspect}, but full line did not:\n#{line.inspect}")
@@ -97,63 +61,23 @@ module RequestLogAnalyzer
97
61
  end
98
62
 
99
63
  alias :=~ :matches
100
-
101
- def anonymize_value(value, capture_definition)
102
- if capture_definition[:anonymize].respond_to?(:call)
103
- capture_definition[:anonymize].call(value, capture_definition)
64
+
65
+ def match_for(line, request, lineno = nil, parser = nil)
66
+ if match_info = matches(line, lineno, parser)
67
+ convert_captured_values(match_info[:captures], request)
104
68
  else
105
- case capture_definition[:anonymize]
106
- when nil; value
107
- when false; value
108
- when true; '***'
109
- when :slightly; anonymize_slightly(value, capture_definition)
110
- else
111
- method_name = "anonymizer_for_#{capture_definition[:anonymize]}".to_sym
112
- self.respond_to?(method_name) ? self.send(method_name, value, capture_definition) : '***'
113
- end
69
+ false
114
70
  end
115
71
  end
116
-
117
- def anonymize_slightly(value, capture_definition)
118
- case capture_definition[:type]
119
- when :integer
120
- (value.to_i * (0.8 + rand * 0.4)).to_i
121
- when :double
122
- (value.to_f * (0.8 + rand * 0.4)).to_f
123
- when :msec
124
- (value.to_i * (0.8 + rand * 0.4)).to_i
125
- when :sec
126
- (value.to_f * (0.8 + rand * 0.4)).to_f
127
- when :timestamp
128
- (DateTime.parse(value) + (rand(100) - 50)).to_s
129
- else
130
- puts "Cannot anonymize #{capture_definition[:type].inspect} slightly, using ***"
131
- '***'
132
- end
133
- end
134
72
 
135
- # Anonymize a log line
136
- def anonymize(line, options = {})
137
- if self.teaser.nil? || self.teaser =~ line
138
- if self.regexp =~ line
139
- pos_adjustment = 0
140
- captures.each_with_index do |capture, index|
141
- unless $~[index + 1].nil?
142
- anonymized_value = anonymize_value($~[index + 1], capture).to_s
143
- line[($~.begin(index + 1) + pos_adjustment)...($~.end(index + 1) + pos_adjustment)] = anonymized_value
144
- pos_adjustment += anonymized_value.length - $~[index + 1].length
145
- end
146
- end
147
- line
148
- elsif self.teaser.nil?
149
- nil
150
- else
151
- options[:discard_teaser_lines] ? "" : line
152
- end
153
- else
154
- nil
73
+ def convert_captured_values(values, request)
74
+ value_hash = {}
75
+ captures.each_with_index do |capture, index|
76
+ value_hash[capture[:name]] ||= request.convert_value(values[index], capture)
155
77
  end
78
+ return value_hash
156
79
  end
80
+
157
81
  end
158
82
 
159
83
  end
@@ -6,12 +6,9 @@ module RequestLogAnalyzer
6
6
  # contents of the line, remain it intact or remove it altogether, based on the current
7
7
  # file format
8
8
  #
9
- # Currently, two processors are supported, :strip and :anonymize.
9
+ # Currently, one processors is supported:
10
10
  # * :strip will remove all irrelevent lines (according to the file format) from the
11
11
  # sources. A compact, information packed log will remain/.
12
- # * :anonymize will anonymize sensitive information from the lines according to the
13
- # anonymization rules in the file format. The result can be passed to third parties
14
- # without privacy concerns.
15
12
  #
16
13
  class LogProcessor
17
14
 
@@ -21,8 +18,8 @@ module RequestLogAnalyzer
21
18
  attr_accessor :output_file
22
19
 
23
20
  # Builds a logprocessor instance from the arguments given on the command line
24
- # <tt>command</tt> The command hat was used to start the log processor. This can either be
25
- # :strip or :anonymize. This will set the processing mode.
21
+ # <tt>command</tt> The command hat was used to start the log processor. This will set the
22
+ # processing mode. Currently, only :strip is supported.
26
23
  # <tt>arguments</tt> The parsed command line arguments (a CommandLine::Arguments instance)
27
24
  def self.build(command, arguments)
28
25
 
@@ -43,7 +40,7 @@ module RequestLogAnalyzer
43
40
 
44
41
  # Initializes a new LogProcessor instance.
45
42
  # <tt>format</tt> The file format to use (e.g. :rails).
46
- # <tt>mode</tt> The processing mode (:anonymize or :strip)
43
+ # <tt>mode</tt> The processing mode
47
44
  # <tt>options</tt> A hash with options to take into account
48
45
  def initialize(format, mode, options = {})
49
46
  @options = options
@@ -61,12 +58,11 @@ module RequestLogAnalyzer
61
58
  end
62
59
 
63
60
  # Processes an input stream by iteration over each line and processing it according to
64
- # the current operation mode (:strip, :anonymize)
61
+ # the current operation mode
65
62
  # <tt>io</tt> The IO instance to process.
66
63
  def process_io(io)
67
64
  case mode
68
65
  when :strip; io.each_line { |line| @output << strip_line(line) }
69
- when :anonymize; io.each_line { |line| @output << anonymize_line(line) }
70
66
  end
71
67
  end
72
68
 
@@ -77,22 +73,6 @@ module RequestLogAnalyzer
77
73
  file_format.line_definitions.any? { |name, definition| definition =~ line } ? line : ""
78
74
  end
79
75
 
80
- # Returns an anonymized version of the provided line. This can be a copy of the line it self,
81
- # an empty string or a string in which some substrings are substituted for anonymized values.
82
- # <tt>line</tt> The line to anonymize
83
- def anonymize_line(line)
84
- anonymized_line = nil
85
- file_format.line_definitions.detect { |name, definition| anonymized_line = definition.anonymize(line, options) }
86
-
87
- if anonymized_line
88
- return anonymized_line
89
- elsif options[:keep_junk_lines]
90
- return line
91
- else
92
- return ""
93
- end
94
- end
95
-
96
76
  # Runs the log processing by setting up the output stream and iterating over all the
97
77
  # input sources. Input sources can either be filenames (String instances) or IO streams
98
78
  # (IO instances). The strings "-" and "STDIN" will be substituted for the $stdin variable.
@@ -9,7 +9,45 @@ module RequestLogAnalyzer
9
9
  # Request#every(field_name) returns all values corresponding to the given field name as array.
10
10
  class Request
11
11
 
12
+ module Converters
13
+
14
+ def convert_value(value, capture_definition)
15
+ custom_converter_method = "convert_#{capture_definition[:type]}".to_sym
16
+ if respond_to?(custom_converter_method)
17
+ send(custom_converter_method, value, capture_definition)
18
+ elsif !value.nil?
19
+ case capture_definition[:type]
20
+ when :decimal; value.to_f
21
+ when :float; value.to_f
22
+ when :double; value.to_f
23
+ when :integer; value.to_i
24
+ when :int; value.to_i
25
+ when :symbol; value.to_sym
26
+ else; value.to_s
27
+ end
28
+ else
29
+ nil
30
+ end
31
+ end
32
+
33
+ # Slow default method to parse timestamps
34
+ def convert_timestamp(value, capture_definition)
35
+ DateTime.parse(value).strftime('%Y%m%d%H%M%S').to_i unless value.nil?
36
+ end
37
+
38
+ def convert_duration(value, capture_definition)
39
+ if value.nil?
40
+ nil
41
+ elsif capture_definition[:unit] == :msec
42
+ value.to_f / 1000.0
43
+ else
44
+ value.to_f
45
+ end
46
+ end
47
+ end
48
+
12
49
  include RequestLogAnalyzer::FileFormat::Awareness
50
+ include Converters
13
51
 
14
52
  attr_reader :lines
15
53
  attr_reader :attributes
@@ -32,12 +70,22 @@ module RequestLogAnalyzer
32
70
 
33
71
  # Adds another line to the request.
34
72
  # The line should be provides as a hash of the fields parsed from the line.
35
- def add_parsed_line (request_info_hash)
36
- @lines << request_info_hash
37
- @attributes = request_info_hash.merge(@attributes)
73
+ def add_parsed_line (parsed_line)
74
+ value_hash = parsed_line[:line_definition].convert_captured_values(parsed_line[:captures], self)
75
+ value_hash[:line_type] = parsed_line[:line_definition].name
76
+ value_hash[:lineno] = parsed_line[:lineno]
77
+ add_line_hash(value_hash)
78
+ end
79
+
80
+ def add_line_hash(value_hash)
81
+ @lines << value_hash
82
+ @attributes = value_hash.merge(@attributes)
38
83
  end
39
84
 
40
- alias :<< :add_parsed_line
85
+
86
+ def <<(hash)
87
+ hash[:line_definition] ? add_parsed_line(hash) : add_line_hash(hash)
88
+ end
41
89
 
42
90
  # Checks whether the given line type was parsed from the log file for this request
43
91
  def has_line_type?(line_type)
@@ -81,6 +129,10 @@ module RequestLogAnalyzer
81
129
  header_found && footer_found
82
130
  end
83
131
 
132
+ # This function is called before a Requests is yielded.
133
+ def validate
134
+ end
135
+
84
136
  # Returns the first timestamp encountered in a request.
85
137
  def timestamp
86
138
  first(:timestamp)
@@ -131,7 +131,6 @@ module RequestLogAnalyzer::Source
131
131
  if header_line?(request_data)
132
132
  unless @current_request.nil?
133
133
  if options[:assume_correct_order]
134
- @parsed_requests += 1
135
134
  handle_request(@current_request, &block) #yield @current_request
136
135
  @current_request = @file_format.create_request(request_data)
137
136
  else
@@ -146,7 +145,6 @@ module RequestLogAnalyzer::Source
146
145
  unless @current_request.nil?
147
146
  @current_request << request_data
148
147
  if footer_line?(request_data)
149
- @parsed_requests += 1
150
148
  handle_request(@current_request, &block) # yield @current_request
151
149
  @current_request = nil
152
150
  end
@@ -161,18 +159,19 @@ module RequestLogAnalyzer::Source
161
159
  # The default controller will send the request to every running aggegator.
162
160
  def handle_request(request, &block)
163
161
  @parsed_requests += 1
162
+ request.validate
164
163
  accepted = block_given? ? yield(request) : true
165
164
  @skipped_requests += 1 if not accepted
166
165
  end
167
166
 
168
167
  # Checks whether a given line hash is a header line.
169
168
  def header_line?(hash)
170
- file_format.line_definitions[hash[:line_type]].header
169
+ hash[:line_definition].header
171
170
  end
172
171
 
173
172
  # Checks whether a given line hash is a footer line.
174
173
  def footer_line?(hash)
175
- file_format.line_definitions[hash[:line_type]].footer
174
+ hash[:line_definition].footer
176
175
  end
177
176
  end
178
177
 
@@ -1,6 +1,6 @@
1
1
  module RequestLogAnalyzer::Tracker
2
2
 
3
- # Catagorize requests.
3
+ # Catagorize requests by frequency.
4
4
  # Count and analyze requests for a specific attribute
5
5
  #
6
6
  # Accepts the following options:
@@ -19,7 +19,7 @@ module RequestLogAnalyzer::Tracker
19
19
  # PUT | 13685 hits (28.4%) |░░░░░░░░░░░
20
20
  # POST | 11662 hits (24.2%) |░░░░░░░░░
21
21
  # DELETE | 512 hits (1.1%) |
22
- class Category < Base
22
+ class Frequency < Base
23
23
 
24
24
  attr_reader :categories
25
25
 
@@ -4,42 +4,61 @@ describe RequestLogAnalyzer::Controller do
4
4
 
5
5
  include RequestLogAnalyzerSpecHelper
6
6
 
7
- # it "should include the file format module" do
8
- # controller = RequestLogAnalyzer::Controller.new(:rails)
9
- # (class << controller; self; end).ancestors.include?(RequestLogAnalyzer::FileFormat::Rails)
10
- # end
11
-
12
- it "should call the aggregators when run" do
7
+ it "should use a custom output generator correctly" do
13
8
 
14
- mock_output = mock('output')
15
- mock_output.stub!(:io).and_return($stdout)
9
+ mock_output = mock('RequestLogAnalyzer::Output::Base')
10
+ mock_output.stub!(:io).and_return(mock_io)
16
11
  mock_output.should_receive(:header)
17
12
  mock_output.should_receive(:footer)
13
+
14
+ file_format = RequestLogAnalyzer::FileFormat.load(:rails)
15
+ source = RequestLogAnalyzer::Source::LogParser.new(file_format, :source_files => log_fixture(:rails_1x))
16
+ controller = RequestLogAnalyzer::Controller.new(source, :output => mock_output)
17
+
18
+ controller.run!
19
+ end
20
+
21
+ it "should call aggregators correctly when run" do
18
22
 
19
23
  file_format = RequestLogAnalyzer::FileFormat.load(:rails)
20
24
  source = RequestLogAnalyzer::Source::LogParser.new(file_format, :source_files => log_fixture(:rails_1x))
21
25
  controller = RequestLogAnalyzer::Controller.new(source, :output => mock_output)
22
26
 
23
- mock_aggregator = mock('aggregator')
27
+ mock_aggregator = mock('RequestLogAnalyzer::Aggregator::Base')
24
28
  mock_aggregator.should_receive(:prepare).once.ordered
25
29
  mock_aggregator.should_receive(:aggregate).with(an_instance_of(file_format.class::Request)).at_least(:twice).ordered
26
30
  mock_aggregator.should_receive(:finalize).once.ordered
27
31
  mock_aggregator.should_receive(:report).once.ordered
32
+
33
+ controller.aggregators << mock_aggregator
34
+ controller.run!
35
+ end
36
+
37
+ it "should call filters when run" do
38
+ file_format = RequestLogAnalyzer::FileFormat.load(:rails)
39
+ source = RequestLogAnalyzer::Source::LogParser.new(file_format, :source_files => log_fixture(:rails_1x))
40
+ controller = RequestLogAnalyzer::Controller.new(source, :output => mock_output)
28
41
 
29
- another_mock_aggregator = mock('another aggregator')
30
- another_mock_aggregator.should_receive(:prepare).once.ordered
31
- another_mock_aggregator.should_receive(:aggregate).with(an_instance_of(file_format.class::Request)).at_least(:twice).ordered
32
- another_mock_aggregator.should_receive(:finalize).once.ordered
33
- another_mock_aggregator.should_receive(:report).once.ordered
34
-
35
- controller.aggregators << mock_aggregator << another_mock_aggregator
42
+ mock_filter = mock('RequestLogAnalyzer::Filter::Base')
43
+ mock_filter.should_receive(:prepare).once.ordered
44
+ mock_filter.should_receive(:filter).at_least(:twice)
45
+
46
+ controller.should_not_receive(:aggregate_request)
47
+
48
+ controller.filters << mock_filter
36
49
  controller.run!
37
50
  end
38
51
 
39
- it "should run well from the command line" do
40
- temp_file = "#{File.dirname(__FILE__)}/fixtures/temp.txt"
41
- system("#{File.dirname(__FILE__)}/../bin/request-log-analyzer #{log_fixture(:rails_1x)} > #{temp_file}").should be_true
52
+ it "should run well from the command line with the most important features" do
53
+
54
+ temp_file = "#{File.dirname(__FILE__)}/fixtures/report.txt"
55
+ temp_db = "#{File.dirname(__FILE__)}/fixtures/output.db"
56
+ binary = "#{File.dirname(__FILE__)}/../bin/request-log-analyzer"
57
+
58
+ system("#{binary} #{log_fixture(:rails_1x)} --database #{temp_db} --select Controller PeopleController --file #{temp_file} > /dev/null").should be_true
59
+
42
60
  File.unlink(temp_file)
61
+ File.unlink(temp_db)
43
62
  end
44
63
 
45
64
  end
@@ -50,13 +50,13 @@ describe RequestLogAnalyzer::FileFormat, :format_definition do
50
50
  line.first_test :regexp => /test/, :captures => []
51
51
  end
52
52
 
53
+
53
54
  @second_file_format.format_definition do |line|
54
55
  line.second_test :regexp => /test/, :captures => []
55
56
  end
56
57
 
58
+ @first_file_format.line_definer.should_not eql(@second_file_format.line_definer)
57
59
  @first_file_format.new.should have(1).line_definitions
58
- @first_file_format.new.line_definitions[:first_test].should_not be_nil
59
- @second_file_format.new.should have(1).line_definitions
60
60
  @second_file_format.new.line_definitions[:second_test].should_not be_nil
61
61
  end
62
62
  end