request-log-analyzer 1.1.1 → 1.1.2

Sign up to get free protection for your applications and to get access to all the features.
@@ -22,8 +22,6 @@ module RequestLogAnalyzer::Filter
22
22
  # Returns nil otherwise
23
23
  # <tt>request</tt> Request object.
24
24
  def filter(request)
25
- return nil unless request
26
-
27
25
  if @after && @before && request.timestamp <= @before && @after <= request.timestamp
28
26
  return request
29
27
  elsif @after && @before.nil? && @after <= request.timestamp
@@ -1,22 +1,10 @@
1
1
  module RequestLogAnalyzer
2
2
 
3
- module Anonymizers
4
- def anonymizer_for_ip(value, capture_definition)
5
- '127.0.0.1'
6
- end
7
-
8
- def anonymizer_for_url(value, capture_definition)
9
- value.sub(/^https?\:\/\/[A-Za-z0-9\.-]+\//, "http://example.com/")
10
- end
11
- end
12
-
13
3
  # The line definition class is used to specify what lines should be parsed from the log file.
14
4
  # It contains functionality to match a line against the definition and parse the information
15
5
  # from this line. This is used by the LogParser class when parsing a log file..
16
6
  class LineDefinition
17
7
 
18
- include RequestLogAnalyzer::Anonymizers
19
-
20
8
  class Definer
21
9
 
22
10
  attr_accessor :line_definitions
@@ -52,20 +40,6 @@ module RequestLogAnalyzer
52
40
  return definition
53
41
  end
54
42
 
55
- # Converts a parsed value (String) to the desired value using some heuristics.
56
- def convert_value(value, type)
57
- case type
58
- when :integer; value.to_i
59
- when :float; value.to_f
60
- when :decimal; value.to_f
61
- when :symbol; value.to_sym
62
- when :sec; value.to_f
63
- when :msec; value.to_f / 1000
64
- when :timestamp; value.gsub(/[^0-9]/,'')[0..13].to_i # Retrieve with: DateTime.parse(value, '%Y%m%d%H%M%S')
65
- else value
66
- end
67
- end
68
-
69
43
  # Checks whether a given line matches this definition.
70
44
  # It will return false if a line does not match. If the line matches, a hash is returned
71
45
  # with all the fields parsed from that line as content.
@@ -74,17 +48,7 @@ module RequestLogAnalyzer
74
48
  def matches(line, lineno = nil, parser = nil)
75
49
  if @teaser.nil? || @teaser =~ line
76
50
  if match_data = line.match(@regexp)
77
- request_info = { :line_type => name, :lineno => lineno }
78
-
79
- captures.each_with_index do |capture, index|
80
- next if capture == :ignore
81
-
82
- if match_data.captures[index]
83
- request_info[capture[:name]] = convert_value(match_data.captures[index], capture[:type])
84
- end
85
-
86
- end
87
- return request_info
51
+ return { :line_definition => self, :lineno => lineno, :captures => match_data.captures}
88
52
  else
89
53
  if @teaser && parser
90
54
  parser.warn(:teaser_check_failed, "Teaser matched for #{name.inspect}, but full line did not:\n#{line.inspect}")
@@ -97,63 +61,23 @@ module RequestLogAnalyzer
97
61
  end
98
62
 
99
63
  alias :=~ :matches
100
-
101
- def anonymize_value(value, capture_definition)
102
- if capture_definition[:anonymize].respond_to?(:call)
103
- capture_definition[:anonymize].call(value, capture_definition)
64
+
65
+ def match_for(line, request, lineno = nil, parser = nil)
66
+ if match_info = matches(line, lineno, parser)
67
+ convert_captured_values(match_info[:captures], request)
104
68
  else
105
- case capture_definition[:anonymize]
106
- when nil; value
107
- when false; value
108
- when true; '***'
109
- when :slightly; anonymize_slightly(value, capture_definition)
110
- else
111
- method_name = "anonymizer_for_#{capture_definition[:anonymize]}".to_sym
112
- self.respond_to?(method_name) ? self.send(method_name, value, capture_definition) : '***'
113
- end
69
+ false
114
70
  end
115
71
  end
116
-
117
- def anonymize_slightly(value, capture_definition)
118
- case capture_definition[:type]
119
- when :integer
120
- (value.to_i * (0.8 + rand * 0.4)).to_i
121
- when :double
122
- (value.to_f * (0.8 + rand * 0.4)).to_f
123
- when :msec
124
- (value.to_i * (0.8 + rand * 0.4)).to_i
125
- when :sec
126
- (value.to_f * (0.8 + rand * 0.4)).to_f
127
- when :timestamp
128
- (DateTime.parse(value) + (rand(100) - 50)).to_s
129
- else
130
- puts "Cannot anonymize #{capture_definition[:type].inspect} slightly, using ***"
131
- '***'
132
- end
133
- end
134
72
 
135
- # Anonymize a log line
136
- def anonymize(line, options = {})
137
- if self.teaser.nil? || self.teaser =~ line
138
- if self.regexp =~ line
139
- pos_adjustment = 0
140
- captures.each_with_index do |capture, index|
141
- unless $~[index + 1].nil?
142
- anonymized_value = anonymize_value($~[index + 1], capture).to_s
143
- line[($~.begin(index + 1) + pos_adjustment)...($~.end(index + 1) + pos_adjustment)] = anonymized_value
144
- pos_adjustment += anonymized_value.length - $~[index + 1].length
145
- end
146
- end
147
- line
148
- elsif self.teaser.nil?
149
- nil
150
- else
151
- options[:discard_teaser_lines] ? "" : line
152
- end
153
- else
154
- nil
73
+ def convert_captured_values(values, request)
74
+ value_hash = {}
75
+ captures.each_with_index do |capture, index|
76
+ value_hash[capture[:name]] ||= request.convert_value(values[index], capture)
155
77
  end
78
+ return value_hash
156
79
  end
80
+
157
81
  end
158
82
 
159
83
  end
@@ -6,12 +6,9 @@ module RequestLogAnalyzer
6
6
  # contents of the line, remain it intact or remove it altogether, based on the current
7
7
  # file format
8
8
  #
9
- # Currently, two processors are supported, :strip and :anonymize.
9
+ # Currently, one processors is supported:
10
10
  # * :strip will remove all irrelevent lines (according to the file format) from the
11
11
  # sources. A compact, information packed log will remain/.
12
- # * :anonymize will anonymize sensitive information from the lines according to the
13
- # anonymization rules in the file format. The result can be passed to third parties
14
- # without privacy concerns.
15
12
  #
16
13
  class LogProcessor
17
14
 
@@ -21,8 +18,8 @@ module RequestLogAnalyzer
21
18
  attr_accessor :output_file
22
19
 
23
20
  # Builds a logprocessor instance from the arguments given on the command line
24
- # <tt>command</tt> The command hat was used to start the log processor. This can either be
25
- # :strip or :anonymize. This will set the processing mode.
21
+ # <tt>command</tt> The command hat was used to start the log processor. This will set the
22
+ # processing mode. Currently, only :strip is supported.
26
23
  # <tt>arguments</tt> The parsed command line arguments (a CommandLine::Arguments instance)
27
24
  def self.build(command, arguments)
28
25
 
@@ -43,7 +40,7 @@ module RequestLogAnalyzer
43
40
 
44
41
  # Initializes a new LogProcessor instance.
45
42
  # <tt>format</tt> The file format to use (e.g. :rails).
46
- # <tt>mode</tt> The processing mode (:anonymize or :strip)
43
+ # <tt>mode</tt> The processing mode
47
44
  # <tt>options</tt> A hash with options to take into account
48
45
  def initialize(format, mode, options = {})
49
46
  @options = options
@@ -61,12 +58,11 @@ module RequestLogAnalyzer
61
58
  end
62
59
 
63
60
  # Processes an input stream by iteration over each line and processing it according to
64
- # the current operation mode (:strip, :anonymize)
61
+ # the current operation mode
65
62
  # <tt>io</tt> The IO instance to process.
66
63
  def process_io(io)
67
64
  case mode
68
65
  when :strip; io.each_line { |line| @output << strip_line(line) }
69
- when :anonymize; io.each_line { |line| @output << anonymize_line(line) }
70
66
  end
71
67
  end
72
68
 
@@ -77,22 +73,6 @@ module RequestLogAnalyzer
77
73
  file_format.line_definitions.any? { |name, definition| definition =~ line } ? line : ""
78
74
  end
79
75
 
80
- # Returns an anonymized version of the provided line. This can be a copy of the line it self,
81
- # an empty string or a string in which some substrings are substituted for anonymized values.
82
- # <tt>line</tt> The line to anonymize
83
- def anonymize_line(line)
84
- anonymized_line = nil
85
- file_format.line_definitions.detect { |name, definition| anonymized_line = definition.anonymize(line, options) }
86
-
87
- if anonymized_line
88
- return anonymized_line
89
- elsif options[:keep_junk_lines]
90
- return line
91
- else
92
- return ""
93
- end
94
- end
95
-
96
76
  # Runs the log processing by setting up the output stream and iterating over all the
97
77
  # input sources. Input sources can either be filenames (String instances) or IO streams
98
78
  # (IO instances). The strings "-" and "STDIN" will be substituted for the $stdin variable.
@@ -9,7 +9,45 @@ module RequestLogAnalyzer
9
9
  # Request#every(field_name) returns all values corresponding to the given field name as array.
10
10
  class Request
11
11
 
12
+ module Converters
13
+
14
+ def convert_value(value, capture_definition)
15
+ custom_converter_method = "convert_#{capture_definition[:type]}".to_sym
16
+ if respond_to?(custom_converter_method)
17
+ send(custom_converter_method, value, capture_definition)
18
+ elsif !value.nil?
19
+ case capture_definition[:type]
20
+ when :decimal; value.to_f
21
+ when :float; value.to_f
22
+ when :double; value.to_f
23
+ when :integer; value.to_i
24
+ when :int; value.to_i
25
+ when :symbol; value.to_sym
26
+ else; value.to_s
27
+ end
28
+ else
29
+ nil
30
+ end
31
+ end
32
+
33
+ # Slow default method to parse timestamps
34
+ def convert_timestamp(value, capture_definition)
35
+ DateTime.parse(value).strftime('%Y%m%d%H%M%S').to_i unless value.nil?
36
+ end
37
+
38
+ def convert_duration(value, capture_definition)
39
+ if value.nil?
40
+ nil
41
+ elsif capture_definition[:unit] == :msec
42
+ value.to_f / 1000.0
43
+ else
44
+ value.to_f
45
+ end
46
+ end
47
+ end
48
+
12
49
  include RequestLogAnalyzer::FileFormat::Awareness
50
+ include Converters
13
51
 
14
52
  attr_reader :lines
15
53
  attr_reader :attributes
@@ -32,12 +70,22 @@ module RequestLogAnalyzer
32
70
 
33
71
  # Adds another line to the request.
34
72
  # The line should be provides as a hash of the fields parsed from the line.
35
- def add_parsed_line (request_info_hash)
36
- @lines << request_info_hash
37
- @attributes = request_info_hash.merge(@attributes)
73
+ def add_parsed_line (parsed_line)
74
+ value_hash = parsed_line[:line_definition].convert_captured_values(parsed_line[:captures], self)
75
+ value_hash[:line_type] = parsed_line[:line_definition].name
76
+ value_hash[:lineno] = parsed_line[:lineno]
77
+ add_line_hash(value_hash)
78
+ end
79
+
80
+ def add_line_hash(value_hash)
81
+ @lines << value_hash
82
+ @attributes = value_hash.merge(@attributes)
38
83
  end
39
84
 
40
- alias :<< :add_parsed_line
85
+
86
+ def <<(hash)
87
+ hash[:line_definition] ? add_parsed_line(hash) : add_line_hash(hash)
88
+ end
41
89
 
42
90
  # Checks whether the given line type was parsed from the log file for this request
43
91
  def has_line_type?(line_type)
@@ -81,6 +129,10 @@ module RequestLogAnalyzer
81
129
  header_found && footer_found
82
130
  end
83
131
 
132
+ # This function is called before a Requests is yielded.
133
+ def validate
134
+ end
135
+
84
136
  # Returns the first timestamp encountered in a request.
85
137
  def timestamp
86
138
  first(:timestamp)
@@ -131,7 +131,6 @@ module RequestLogAnalyzer::Source
131
131
  if header_line?(request_data)
132
132
  unless @current_request.nil?
133
133
  if options[:assume_correct_order]
134
- @parsed_requests += 1
135
134
  handle_request(@current_request, &block) #yield @current_request
136
135
  @current_request = @file_format.create_request(request_data)
137
136
  else
@@ -146,7 +145,6 @@ module RequestLogAnalyzer::Source
146
145
  unless @current_request.nil?
147
146
  @current_request << request_data
148
147
  if footer_line?(request_data)
149
- @parsed_requests += 1
150
148
  handle_request(@current_request, &block) # yield @current_request
151
149
  @current_request = nil
152
150
  end
@@ -161,18 +159,19 @@ module RequestLogAnalyzer::Source
161
159
  # The default controller will send the request to every running aggegator.
162
160
  def handle_request(request, &block)
163
161
  @parsed_requests += 1
162
+ request.validate
164
163
  accepted = block_given? ? yield(request) : true
165
164
  @skipped_requests += 1 if not accepted
166
165
  end
167
166
 
168
167
  # Checks whether a given line hash is a header line.
169
168
  def header_line?(hash)
170
- file_format.line_definitions[hash[:line_type]].header
169
+ hash[:line_definition].header
171
170
  end
172
171
 
173
172
  # Checks whether a given line hash is a footer line.
174
173
  def footer_line?(hash)
175
- file_format.line_definitions[hash[:line_type]].footer
174
+ hash[:line_definition].footer
176
175
  end
177
176
  end
178
177
 
@@ -1,6 +1,6 @@
1
1
  module RequestLogAnalyzer::Tracker
2
2
 
3
- # Catagorize requests.
3
+ # Catagorize requests by frequency.
4
4
  # Count and analyze requests for a specific attribute
5
5
  #
6
6
  # Accepts the following options:
@@ -19,7 +19,7 @@ module RequestLogAnalyzer::Tracker
19
19
  # PUT | 13685 hits (28.4%) |░░░░░░░░░░░
20
20
  # POST | 11662 hits (24.2%) |░░░░░░░░░
21
21
  # DELETE | 512 hits (1.1%) |
22
- class Category < Base
22
+ class Frequency < Base
23
23
 
24
24
  attr_reader :categories
25
25
 
@@ -4,42 +4,61 @@ describe RequestLogAnalyzer::Controller do
4
4
 
5
5
  include RequestLogAnalyzerSpecHelper
6
6
 
7
- # it "should include the file format module" do
8
- # controller = RequestLogAnalyzer::Controller.new(:rails)
9
- # (class << controller; self; end).ancestors.include?(RequestLogAnalyzer::FileFormat::Rails)
10
- # end
11
-
12
- it "should call the aggregators when run" do
7
+ it "should use a custom output generator correctly" do
13
8
 
14
- mock_output = mock('output')
15
- mock_output.stub!(:io).and_return($stdout)
9
+ mock_output = mock('RequestLogAnalyzer::Output::Base')
10
+ mock_output.stub!(:io).and_return(mock_io)
16
11
  mock_output.should_receive(:header)
17
12
  mock_output.should_receive(:footer)
13
+
14
+ file_format = RequestLogAnalyzer::FileFormat.load(:rails)
15
+ source = RequestLogAnalyzer::Source::LogParser.new(file_format, :source_files => log_fixture(:rails_1x))
16
+ controller = RequestLogAnalyzer::Controller.new(source, :output => mock_output)
17
+
18
+ controller.run!
19
+ end
20
+
21
+ it "should call aggregators correctly when run" do
18
22
 
19
23
  file_format = RequestLogAnalyzer::FileFormat.load(:rails)
20
24
  source = RequestLogAnalyzer::Source::LogParser.new(file_format, :source_files => log_fixture(:rails_1x))
21
25
  controller = RequestLogAnalyzer::Controller.new(source, :output => mock_output)
22
26
 
23
- mock_aggregator = mock('aggregator')
27
+ mock_aggregator = mock('RequestLogAnalyzer::Aggregator::Base')
24
28
  mock_aggregator.should_receive(:prepare).once.ordered
25
29
  mock_aggregator.should_receive(:aggregate).with(an_instance_of(file_format.class::Request)).at_least(:twice).ordered
26
30
  mock_aggregator.should_receive(:finalize).once.ordered
27
31
  mock_aggregator.should_receive(:report).once.ordered
32
+
33
+ controller.aggregators << mock_aggregator
34
+ controller.run!
35
+ end
36
+
37
+ it "should call filters when run" do
38
+ file_format = RequestLogAnalyzer::FileFormat.load(:rails)
39
+ source = RequestLogAnalyzer::Source::LogParser.new(file_format, :source_files => log_fixture(:rails_1x))
40
+ controller = RequestLogAnalyzer::Controller.new(source, :output => mock_output)
28
41
 
29
- another_mock_aggregator = mock('another aggregator')
30
- another_mock_aggregator.should_receive(:prepare).once.ordered
31
- another_mock_aggregator.should_receive(:aggregate).with(an_instance_of(file_format.class::Request)).at_least(:twice).ordered
32
- another_mock_aggregator.should_receive(:finalize).once.ordered
33
- another_mock_aggregator.should_receive(:report).once.ordered
34
-
35
- controller.aggregators << mock_aggregator << another_mock_aggregator
42
+ mock_filter = mock('RequestLogAnalyzer::Filter::Base')
43
+ mock_filter.should_receive(:prepare).once.ordered
44
+ mock_filter.should_receive(:filter).at_least(:twice)
45
+
46
+ controller.should_not_receive(:aggregate_request)
47
+
48
+ controller.filters << mock_filter
36
49
  controller.run!
37
50
  end
38
51
 
39
- it "should run well from the command line" do
40
- temp_file = "#{File.dirname(__FILE__)}/fixtures/temp.txt"
41
- system("#{File.dirname(__FILE__)}/../bin/request-log-analyzer #{log_fixture(:rails_1x)} > #{temp_file}").should be_true
52
+ it "should run well from the command line with the most important features" do
53
+
54
+ temp_file = "#{File.dirname(__FILE__)}/fixtures/report.txt"
55
+ temp_db = "#{File.dirname(__FILE__)}/fixtures/output.db"
56
+ binary = "#{File.dirname(__FILE__)}/../bin/request-log-analyzer"
57
+
58
+ system("#{binary} #{log_fixture(:rails_1x)} --database #{temp_db} --select Controller PeopleController --file #{temp_file} > /dev/null").should be_true
59
+
42
60
  File.unlink(temp_file)
61
+ File.unlink(temp_db)
43
62
  end
44
63
 
45
64
  end
@@ -50,13 +50,13 @@ describe RequestLogAnalyzer::FileFormat, :format_definition do
50
50
  line.first_test :regexp => /test/, :captures => []
51
51
  end
52
52
 
53
+
53
54
  @second_file_format.format_definition do |line|
54
55
  line.second_test :regexp => /test/, :captures => []
55
56
  end
56
57
 
58
+ @first_file_format.line_definer.should_not eql(@second_file_format.line_definer)
57
59
  @first_file_format.new.should have(1).line_definitions
58
- @first_file_format.new.line_definitions[:first_test].should_not be_nil
59
- @second_file_format.new.should have(1).line_definitions
60
60
  @second_file_format.new.line_definitions[:second_test].should_not be_nil
61
61
  end
62
62
  end