request-log-analyzer 1.1.1 → 1.1.2
Sign up to get free protection for your applications and to get access to all the features.
- data/bin/request-log-analyzer +0 -11
- data/lib/request_log_analyzer/aggregator/summarizer.rb +7 -3
- data/lib/request_log_analyzer/controller.rb +15 -3
- data/lib/request_log_analyzer/file_format.rb +44 -16
- data/lib/request_log_analyzer/file_format/merb.rb +5 -5
- data/lib/request_log_analyzer/file_format/rails.rb +35 -21
- data/lib/request_log_analyzer/file_format/rails_development.rb +27 -20
- data/lib/request_log_analyzer/filter/{anonimize.rb → anonymize.rb} +3 -5
- data/lib/request_log_analyzer/filter/field.rb +1 -5
- data/lib/request_log_analyzer/filter/timespan.rb +0 -2
- data/lib/request_log_analyzer/line_definition.rb +12 -88
- data/lib/request_log_analyzer/log_processor.rb +5 -25
- data/lib/request_log_analyzer/request.rb +56 -4
- data/lib/request_log_analyzer/source/log_parser.rb +3 -4
- data/lib/request_log_analyzer/tracker/{category.rb → frequency.rb} +2 -2
- data/spec/controller_spec.rb +38 -19
- data/spec/file_format_spec.rb +2 -2
- data/spec/file_formats/spec_format.rb +12 -5
- data/spec/filter_spec.rb +3 -3
- data/spec/line_definition_spec.rb +18 -85
- data/spec/log_parser_spec.rb +2 -3
- data/spec/log_processor_spec.rb +0 -38
- data/spec/merb_format_spec.rb +1 -1
- data/spec/rails_format_spec.rb +6 -5
- data/spec/spec_helper.rb +16 -0
- metadata +4 -4
@@ -22,8 +22,6 @@ module RequestLogAnalyzer::Filter
|
|
22
22
|
# Returns nil otherwise
|
23
23
|
# <tt>request</tt> Request object.
|
24
24
|
def filter(request)
|
25
|
-
return nil unless request
|
26
|
-
|
27
25
|
if @after && @before && request.timestamp <= @before && @after <= request.timestamp
|
28
26
|
return request
|
29
27
|
elsif @after && @before.nil? && @after <= request.timestamp
|
@@ -1,22 +1,10 @@
|
|
1
1
|
module RequestLogAnalyzer
|
2
2
|
|
3
|
-
module Anonymizers
|
4
|
-
def anonymizer_for_ip(value, capture_definition)
|
5
|
-
'127.0.0.1'
|
6
|
-
end
|
7
|
-
|
8
|
-
def anonymizer_for_url(value, capture_definition)
|
9
|
-
value.sub(/^https?\:\/\/[A-Za-z0-9\.-]+\//, "http://example.com/")
|
10
|
-
end
|
11
|
-
end
|
12
|
-
|
13
3
|
# The line definition class is used to specify what lines should be parsed from the log file.
|
14
4
|
# It contains functionality to match a line against the definition and parse the information
|
15
5
|
# from this line. This is used by the LogParser class when parsing a log file..
|
16
6
|
class LineDefinition
|
17
7
|
|
18
|
-
include RequestLogAnalyzer::Anonymizers
|
19
|
-
|
20
8
|
class Definer
|
21
9
|
|
22
10
|
attr_accessor :line_definitions
|
@@ -52,20 +40,6 @@ module RequestLogAnalyzer
|
|
52
40
|
return definition
|
53
41
|
end
|
54
42
|
|
55
|
-
# Converts a parsed value (String) to the desired value using some heuristics.
|
56
|
-
def convert_value(value, type)
|
57
|
-
case type
|
58
|
-
when :integer; value.to_i
|
59
|
-
when :float; value.to_f
|
60
|
-
when :decimal; value.to_f
|
61
|
-
when :symbol; value.to_sym
|
62
|
-
when :sec; value.to_f
|
63
|
-
when :msec; value.to_f / 1000
|
64
|
-
when :timestamp; value.gsub(/[^0-9]/,'')[0..13].to_i # Retrieve with: DateTime.parse(value, '%Y%m%d%H%M%S')
|
65
|
-
else value
|
66
|
-
end
|
67
|
-
end
|
68
|
-
|
69
43
|
# Checks whether a given line matches this definition.
|
70
44
|
# It will return false if a line does not match. If the line matches, a hash is returned
|
71
45
|
# with all the fields parsed from that line as content.
|
@@ -74,17 +48,7 @@ module RequestLogAnalyzer
|
|
74
48
|
def matches(line, lineno = nil, parser = nil)
|
75
49
|
if @teaser.nil? || @teaser =~ line
|
76
50
|
if match_data = line.match(@regexp)
|
77
|
-
|
78
|
-
|
79
|
-
captures.each_with_index do |capture, index|
|
80
|
-
next if capture == :ignore
|
81
|
-
|
82
|
-
if match_data.captures[index]
|
83
|
-
request_info[capture[:name]] = convert_value(match_data.captures[index], capture[:type])
|
84
|
-
end
|
85
|
-
|
86
|
-
end
|
87
|
-
return request_info
|
51
|
+
return { :line_definition => self, :lineno => lineno, :captures => match_data.captures}
|
88
52
|
else
|
89
53
|
if @teaser && parser
|
90
54
|
parser.warn(:teaser_check_failed, "Teaser matched for #{name.inspect}, but full line did not:\n#{line.inspect}")
|
@@ -97,63 +61,23 @@ module RequestLogAnalyzer
|
|
97
61
|
end
|
98
62
|
|
99
63
|
alias :=~ :matches
|
100
|
-
|
101
|
-
def
|
102
|
-
if
|
103
|
-
|
64
|
+
|
65
|
+
def match_for(line, request, lineno = nil, parser = nil)
|
66
|
+
if match_info = matches(line, lineno, parser)
|
67
|
+
convert_captured_values(match_info[:captures], request)
|
104
68
|
else
|
105
|
-
|
106
|
-
when nil; value
|
107
|
-
when false; value
|
108
|
-
when true; '***'
|
109
|
-
when :slightly; anonymize_slightly(value, capture_definition)
|
110
|
-
else
|
111
|
-
method_name = "anonymizer_for_#{capture_definition[:anonymize]}".to_sym
|
112
|
-
self.respond_to?(method_name) ? self.send(method_name, value, capture_definition) : '***'
|
113
|
-
end
|
69
|
+
false
|
114
70
|
end
|
115
71
|
end
|
116
|
-
|
117
|
-
def anonymize_slightly(value, capture_definition)
|
118
|
-
case capture_definition[:type]
|
119
|
-
when :integer
|
120
|
-
(value.to_i * (0.8 + rand * 0.4)).to_i
|
121
|
-
when :double
|
122
|
-
(value.to_f * (0.8 + rand * 0.4)).to_f
|
123
|
-
when :msec
|
124
|
-
(value.to_i * (0.8 + rand * 0.4)).to_i
|
125
|
-
when :sec
|
126
|
-
(value.to_f * (0.8 + rand * 0.4)).to_f
|
127
|
-
when :timestamp
|
128
|
-
(DateTime.parse(value) + (rand(100) - 50)).to_s
|
129
|
-
else
|
130
|
-
puts "Cannot anonymize #{capture_definition[:type].inspect} slightly, using ***"
|
131
|
-
'***'
|
132
|
-
end
|
133
|
-
end
|
134
72
|
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
pos_adjustment = 0
|
140
|
-
captures.each_with_index do |capture, index|
|
141
|
-
unless $~[index + 1].nil?
|
142
|
-
anonymized_value = anonymize_value($~[index + 1], capture).to_s
|
143
|
-
line[($~.begin(index + 1) + pos_adjustment)...($~.end(index + 1) + pos_adjustment)] = anonymized_value
|
144
|
-
pos_adjustment += anonymized_value.length - $~[index + 1].length
|
145
|
-
end
|
146
|
-
end
|
147
|
-
line
|
148
|
-
elsif self.teaser.nil?
|
149
|
-
nil
|
150
|
-
else
|
151
|
-
options[:discard_teaser_lines] ? "" : line
|
152
|
-
end
|
153
|
-
else
|
154
|
-
nil
|
73
|
+
def convert_captured_values(values, request)
|
74
|
+
value_hash = {}
|
75
|
+
captures.each_with_index do |capture, index|
|
76
|
+
value_hash[capture[:name]] ||= request.convert_value(values[index], capture)
|
155
77
|
end
|
78
|
+
return value_hash
|
156
79
|
end
|
80
|
+
|
157
81
|
end
|
158
82
|
|
159
83
|
end
|
@@ -6,12 +6,9 @@ module RequestLogAnalyzer
|
|
6
6
|
# contents of the line, remain it intact or remove it altogether, based on the current
|
7
7
|
# file format
|
8
8
|
#
|
9
|
-
# Currently,
|
9
|
+
# Currently, one processors is supported:
|
10
10
|
# * :strip will remove all irrelevent lines (according to the file format) from the
|
11
11
|
# sources. A compact, information packed log will remain/.
|
12
|
-
# * :anonymize will anonymize sensitive information from the lines according to the
|
13
|
-
# anonymization rules in the file format. The result can be passed to third parties
|
14
|
-
# without privacy concerns.
|
15
12
|
#
|
16
13
|
class LogProcessor
|
17
14
|
|
@@ -21,8 +18,8 @@ module RequestLogAnalyzer
|
|
21
18
|
attr_accessor :output_file
|
22
19
|
|
23
20
|
# Builds a logprocessor instance from the arguments given on the command line
|
24
|
-
# <tt>command</tt> The command hat was used to start the log processor. This
|
25
|
-
#
|
21
|
+
# <tt>command</tt> The command hat was used to start the log processor. This will set the
|
22
|
+
# processing mode. Currently, only :strip is supported.
|
26
23
|
# <tt>arguments</tt> The parsed command line arguments (a CommandLine::Arguments instance)
|
27
24
|
def self.build(command, arguments)
|
28
25
|
|
@@ -43,7 +40,7 @@ module RequestLogAnalyzer
|
|
43
40
|
|
44
41
|
# Initializes a new LogProcessor instance.
|
45
42
|
# <tt>format</tt> The file format to use (e.g. :rails).
|
46
|
-
# <tt>mode</tt> The processing mode
|
43
|
+
# <tt>mode</tt> The processing mode
|
47
44
|
# <tt>options</tt> A hash with options to take into account
|
48
45
|
def initialize(format, mode, options = {})
|
49
46
|
@options = options
|
@@ -61,12 +58,11 @@ module RequestLogAnalyzer
|
|
61
58
|
end
|
62
59
|
|
63
60
|
# Processes an input stream by iteration over each line and processing it according to
|
64
|
-
# the current operation mode
|
61
|
+
# the current operation mode
|
65
62
|
# <tt>io</tt> The IO instance to process.
|
66
63
|
def process_io(io)
|
67
64
|
case mode
|
68
65
|
when :strip; io.each_line { |line| @output << strip_line(line) }
|
69
|
-
when :anonymize; io.each_line { |line| @output << anonymize_line(line) }
|
70
66
|
end
|
71
67
|
end
|
72
68
|
|
@@ -77,22 +73,6 @@ module RequestLogAnalyzer
|
|
77
73
|
file_format.line_definitions.any? { |name, definition| definition =~ line } ? line : ""
|
78
74
|
end
|
79
75
|
|
80
|
-
# Returns an anonymized version of the provided line. This can be a copy of the line it self,
|
81
|
-
# an empty string or a string in which some substrings are substituted for anonymized values.
|
82
|
-
# <tt>line</tt> The line to anonymize
|
83
|
-
def anonymize_line(line)
|
84
|
-
anonymized_line = nil
|
85
|
-
file_format.line_definitions.detect { |name, definition| anonymized_line = definition.anonymize(line, options) }
|
86
|
-
|
87
|
-
if anonymized_line
|
88
|
-
return anonymized_line
|
89
|
-
elsif options[:keep_junk_lines]
|
90
|
-
return line
|
91
|
-
else
|
92
|
-
return ""
|
93
|
-
end
|
94
|
-
end
|
95
|
-
|
96
76
|
# Runs the log processing by setting up the output stream and iterating over all the
|
97
77
|
# input sources. Input sources can either be filenames (String instances) or IO streams
|
98
78
|
# (IO instances). The strings "-" and "STDIN" will be substituted for the $stdin variable.
|
@@ -9,7 +9,45 @@ module RequestLogAnalyzer
|
|
9
9
|
# Request#every(field_name) returns all values corresponding to the given field name as array.
|
10
10
|
class Request
|
11
11
|
|
12
|
+
module Converters
|
13
|
+
|
14
|
+
def convert_value(value, capture_definition)
|
15
|
+
custom_converter_method = "convert_#{capture_definition[:type]}".to_sym
|
16
|
+
if respond_to?(custom_converter_method)
|
17
|
+
send(custom_converter_method, value, capture_definition)
|
18
|
+
elsif !value.nil?
|
19
|
+
case capture_definition[:type]
|
20
|
+
when :decimal; value.to_f
|
21
|
+
when :float; value.to_f
|
22
|
+
when :double; value.to_f
|
23
|
+
when :integer; value.to_i
|
24
|
+
when :int; value.to_i
|
25
|
+
when :symbol; value.to_sym
|
26
|
+
else; value.to_s
|
27
|
+
end
|
28
|
+
else
|
29
|
+
nil
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
# Slow default method to parse timestamps
|
34
|
+
def convert_timestamp(value, capture_definition)
|
35
|
+
DateTime.parse(value).strftime('%Y%m%d%H%M%S').to_i unless value.nil?
|
36
|
+
end
|
37
|
+
|
38
|
+
def convert_duration(value, capture_definition)
|
39
|
+
if value.nil?
|
40
|
+
nil
|
41
|
+
elsif capture_definition[:unit] == :msec
|
42
|
+
value.to_f / 1000.0
|
43
|
+
else
|
44
|
+
value.to_f
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
12
49
|
include RequestLogAnalyzer::FileFormat::Awareness
|
50
|
+
include Converters
|
13
51
|
|
14
52
|
attr_reader :lines
|
15
53
|
attr_reader :attributes
|
@@ -32,12 +70,22 @@ module RequestLogAnalyzer
|
|
32
70
|
|
33
71
|
# Adds another line to the request.
|
34
72
|
# The line should be provides as a hash of the fields parsed from the line.
|
35
|
-
def add_parsed_line (
|
36
|
-
|
37
|
-
|
73
|
+
def add_parsed_line (parsed_line)
|
74
|
+
value_hash = parsed_line[:line_definition].convert_captured_values(parsed_line[:captures], self)
|
75
|
+
value_hash[:line_type] = parsed_line[:line_definition].name
|
76
|
+
value_hash[:lineno] = parsed_line[:lineno]
|
77
|
+
add_line_hash(value_hash)
|
78
|
+
end
|
79
|
+
|
80
|
+
def add_line_hash(value_hash)
|
81
|
+
@lines << value_hash
|
82
|
+
@attributes = value_hash.merge(@attributes)
|
38
83
|
end
|
39
84
|
|
40
|
-
|
85
|
+
|
86
|
+
def <<(hash)
|
87
|
+
hash[:line_definition] ? add_parsed_line(hash) : add_line_hash(hash)
|
88
|
+
end
|
41
89
|
|
42
90
|
# Checks whether the given line type was parsed from the log file for this request
|
43
91
|
def has_line_type?(line_type)
|
@@ -81,6 +129,10 @@ module RequestLogAnalyzer
|
|
81
129
|
header_found && footer_found
|
82
130
|
end
|
83
131
|
|
132
|
+
# This function is called before a Requests is yielded.
|
133
|
+
def validate
|
134
|
+
end
|
135
|
+
|
84
136
|
# Returns the first timestamp encountered in a request.
|
85
137
|
def timestamp
|
86
138
|
first(:timestamp)
|
@@ -131,7 +131,6 @@ module RequestLogAnalyzer::Source
|
|
131
131
|
if header_line?(request_data)
|
132
132
|
unless @current_request.nil?
|
133
133
|
if options[:assume_correct_order]
|
134
|
-
@parsed_requests += 1
|
135
134
|
handle_request(@current_request, &block) #yield @current_request
|
136
135
|
@current_request = @file_format.create_request(request_data)
|
137
136
|
else
|
@@ -146,7 +145,6 @@ module RequestLogAnalyzer::Source
|
|
146
145
|
unless @current_request.nil?
|
147
146
|
@current_request << request_data
|
148
147
|
if footer_line?(request_data)
|
149
|
-
@parsed_requests += 1
|
150
148
|
handle_request(@current_request, &block) # yield @current_request
|
151
149
|
@current_request = nil
|
152
150
|
end
|
@@ -161,18 +159,19 @@ module RequestLogAnalyzer::Source
|
|
161
159
|
# The default controller will send the request to every running aggegator.
|
162
160
|
def handle_request(request, &block)
|
163
161
|
@parsed_requests += 1
|
162
|
+
request.validate
|
164
163
|
accepted = block_given? ? yield(request) : true
|
165
164
|
@skipped_requests += 1 if not accepted
|
166
165
|
end
|
167
166
|
|
168
167
|
# Checks whether a given line hash is a header line.
|
169
168
|
def header_line?(hash)
|
170
|
-
|
169
|
+
hash[:line_definition].header
|
171
170
|
end
|
172
171
|
|
173
172
|
# Checks whether a given line hash is a footer line.
|
174
173
|
def footer_line?(hash)
|
175
|
-
|
174
|
+
hash[:line_definition].footer
|
176
175
|
end
|
177
176
|
end
|
178
177
|
|
@@ -1,6 +1,6 @@
|
|
1
1
|
module RequestLogAnalyzer::Tracker
|
2
2
|
|
3
|
-
# Catagorize requests.
|
3
|
+
# Catagorize requests by frequency.
|
4
4
|
# Count and analyze requests for a specific attribute
|
5
5
|
#
|
6
6
|
# Accepts the following options:
|
@@ -19,7 +19,7 @@ module RequestLogAnalyzer::Tracker
|
|
19
19
|
# PUT | 13685 hits (28.4%) |░░░░░░░░░░░
|
20
20
|
# POST | 11662 hits (24.2%) |░░░░░░░░░
|
21
21
|
# DELETE | 512 hits (1.1%) |
|
22
|
-
class
|
22
|
+
class Frequency < Base
|
23
23
|
|
24
24
|
attr_reader :categories
|
25
25
|
|
data/spec/controller_spec.rb
CHANGED
@@ -4,42 +4,61 @@ describe RequestLogAnalyzer::Controller do
|
|
4
4
|
|
5
5
|
include RequestLogAnalyzerSpecHelper
|
6
6
|
|
7
|
-
|
8
|
-
# controller = RequestLogAnalyzer::Controller.new(:rails)
|
9
|
-
# (class << controller; self; end).ancestors.include?(RequestLogAnalyzer::FileFormat::Rails)
|
10
|
-
# end
|
11
|
-
|
12
|
-
it "should call the aggregators when run" do
|
7
|
+
it "should use a custom output generator correctly" do
|
13
8
|
|
14
|
-
mock_output = mock('
|
15
|
-
mock_output.stub!(:io).and_return(
|
9
|
+
mock_output = mock('RequestLogAnalyzer::Output::Base')
|
10
|
+
mock_output.stub!(:io).and_return(mock_io)
|
16
11
|
mock_output.should_receive(:header)
|
17
12
|
mock_output.should_receive(:footer)
|
13
|
+
|
14
|
+
file_format = RequestLogAnalyzer::FileFormat.load(:rails)
|
15
|
+
source = RequestLogAnalyzer::Source::LogParser.new(file_format, :source_files => log_fixture(:rails_1x))
|
16
|
+
controller = RequestLogAnalyzer::Controller.new(source, :output => mock_output)
|
17
|
+
|
18
|
+
controller.run!
|
19
|
+
end
|
20
|
+
|
21
|
+
it "should call aggregators correctly when run" do
|
18
22
|
|
19
23
|
file_format = RequestLogAnalyzer::FileFormat.load(:rails)
|
20
24
|
source = RequestLogAnalyzer::Source::LogParser.new(file_format, :source_files => log_fixture(:rails_1x))
|
21
25
|
controller = RequestLogAnalyzer::Controller.new(source, :output => mock_output)
|
22
26
|
|
23
|
-
mock_aggregator = mock('
|
27
|
+
mock_aggregator = mock('RequestLogAnalyzer::Aggregator::Base')
|
24
28
|
mock_aggregator.should_receive(:prepare).once.ordered
|
25
29
|
mock_aggregator.should_receive(:aggregate).with(an_instance_of(file_format.class::Request)).at_least(:twice).ordered
|
26
30
|
mock_aggregator.should_receive(:finalize).once.ordered
|
27
31
|
mock_aggregator.should_receive(:report).once.ordered
|
32
|
+
|
33
|
+
controller.aggregators << mock_aggregator
|
34
|
+
controller.run!
|
35
|
+
end
|
36
|
+
|
37
|
+
it "should call filters when run" do
|
38
|
+
file_format = RequestLogAnalyzer::FileFormat.load(:rails)
|
39
|
+
source = RequestLogAnalyzer::Source::LogParser.new(file_format, :source_files => log_fixture(:rails_1x))
|
40
|
+
controller = RequestLogAnalyzer::Controller.new(source, :output => mock_output)
|
28
41
|
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
controller.
|
42
|
+
mock_filter = mock('RequestLogAnalyzer::Filter::Base')
|
43
|
+
mock_filter.should_receive(:prepare).once.ordered
|
44
|
+
mock_filter.should_receive(:filter).at_least(:twice)
|
45
|
+
|
46
|
+
controller.should_not_receive(:aggregate_request)
|
47
|
+
|
48
|
+
controller.filters << mock_filter
|
36
49
|
controller.run!
|
37
50
|
end
|
38
51
|
|
39
|
-
it "should run well from the command line" do
|
40
|
-
|
41
|
-
|
52
|
+
it "should run well from the command line with the most important features" do
|
53
|
+
|
54
|
+
temp_file = "#{File.dirname(__FILE__)}/fixtures/report.txt"
|
55
|
+
temp_db = "#{File.dirname(__FILE__)}/fixtures/output.db"
|
56
|
+
binary = "#{File.dirname(__FILE__)}/../bin/request-log-analyzer"
|
57
|
+
|
58
|
+
system("#{binary} #{log_fixture(:rails_1x)} --database #{temp_db} --select Controller PeopleController --file #{temp_file} > /dev/null").should be_true
|
59
|
+
|
42
60
|
File.unlink(temp_file)
|
61
|
+
File.unlink(temp_db)
|
43
62
|
end
|
44
63
|
|
45
64
|
end
|
data/spec/file_format_spec.rb
CHANGED
@@ -50,13 +50,13 @@ describe RequestLogAnalyzer::FileFormat, :format_definition do
|
|
50
50
|
line.first_test :regexp => /test/, :captures => []
|
51
51
|
end
|
52
52
|
|
53
|
+
|
53
54
|
@second_file_format.format_definition do |line|
|
54
55
|
line.second_test :regexp => /test/, :captures => []
|
55
56
|
end
|
56
57
|
|
58
|
+
@first_file_format.line_definer.should_not eql(@second_file_format.line_definer)
|
57
59
|
@first_file_format.new.should have(1).line_definitions
|
58
|
-
@first_file_format.new.line_definitions[:first_test].should_not be_nil
|
59
|
-
@second_file_format.new.should have(1).line_definitions
|
60
60
|
@second_file_format.new.line_definitions[:second_test].should_not be_nil
|
61
61
|
end
|
62
62
|
end
|