request-log-analyzer 1.1.1 → 1.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/bin/request-log-analyzer +0 -11
- data/lib/request_log_analyzer/aggregator/summarizer.rb +7 -3
- data/lib/request_log_analyzer/controller.rb +15 -3
- data/lib/request_log_analyzer/file_format.rb +44 -16
- data/lib/request_log_analyzer/file_format/merb.rb +5 -5
- data/lib/request_log_analyzer/file_format/rails.rb +35 -21
- data/lib/request_log_analyzer/file_format/rails_development.rb +27 -20
- data/lib/request_log_analyzer/filter/{anonimize.rb → anonymize.rb} +3 -5
- data/lib/request_log_analyzer/filter/field.rb +1 -5
- data/lib/request_log_analyzer/filter/timespan.rb +0 -2
- data/lib/request_log_analyzer/line_definition.rb +12 -88
- data/lib/request_log_analyzer/log_processor.rb +5 -25
- data/lib/request_log_analyzer/request.rb +56 -4
- data/lib/request_log_analyzer/source/log_parser.rb +3 -4
- data/lib/request_log_analyzer/tracker/{category.rb → frequency.rb} +2 -2
- data/spec/controller_spec.rb +38 -19
- data/spec/file_format_spec.rb +2 -2
- data/spec/file_formats/spec_format.rb +12 -5
- data/spec/filter_spec.rb +3 -3
- data/spec/line_definition_spec.rb +18 -85
- data/spec/log_parser_spec.rb +2 -3
- data/spec/log_processor_spec.rb +0 -38
- data/spec/merb_format_spec.rb +1 -1
- data/spec/rails_format_spec.rb +6 -5
- data/spec/spec_helper.rb +16 -0
- metadata +4 -4
@@ -22,8 +22,6 @@ module RequestLogAnalyzer::Filter
|
|
22
22
|
# Returns nil otherwise
|
23
23
|
# <tt>request</tt> Request object.
|
24
24
|
def filter(request)
|
25
|
-
return nil unless request
|
26
|
-
|
27
25
|
if @after && @before && request.timestamp <= @before && @after <= request.timestamp
|
28
26
|
return request
|
29
27
|
elsif @after && @before.nil? && @after <= request.timestamp
|
@@ -1,22 +1,10 @@
|
|
1
1
|
module RequestLogAnalyzer
|
2
2
|
|
3
|
-
module Anonymizers
|
4
|
-
def anonymizer_for_ip(value, capture_definition)
|
5
|
-
'127.0.0.1'
|
6
|
-
end
|
7
|
-
|
8
|
-
def anonymizer_for_url(value, capture_definition)
|
9
|
-
value.sub(/^https?\:\/\/[A-Za-z0-9\.-]+\//, "http://example.com/")
|
10
|
-
end
|
11
|
-
end
|
12
|
-
|
13
3
|
# The line definition class is used to specify what lines should be parsed from the log file.
|
14
4
|
# It contains functionality to match a line against the definition and parse the information
|
15
5
|
# from this line. This is used by the LogParser class when parsing a log file..
|
16
6
|
class LineDefinition
|
17
7
|
|
18
|
-
include RequestLogAnalyzer::Anonymizers
|
19
|
-
|
20
8
|
class Definer
|
21
9
|
|
22
10
|
attr_accessor :line_definitions
|
@@ -52,20 +40,6 @@ module RequestLogAnalyzer
|
|
52
40
|
return definition
|
53
41
|
end
|
54
42
|
|
55
|
-
# Converts a parsed value (String) to the desired value using some heuristics.
|
56
|
-
def convert_value(value, type)
|
57
|
-
case type
|
58
|
-
when :integer; value.to_i
|
59
|
-
when :float; value.to_f
|
60
|
-
when :decimal; value.to_f
|
61
|
-
when :symbol; value.to_sym
|
62
|
-
when :sec; value.to_f
|
63
|
-
when :msec; value.to_f / 1000
|
64
|
-
when :timestamp; value.gsub(/[^0-9]/,'')[0..13].to_i # Retrieve with: DateTime.parse(value, '%Y%m%d%H%M%S')
|
65
|
-
else value
|
66
|
-
end
|
67
|
-
end
|
68
|
-
|
69
43
|
# Checks whether a given line matches this definition.
|
70
44
|
# It will return false if a line does not match. If the line matches, a hash is returned
|
71
45
|
# with all the fields parsed from that line as content.
|
@@ -74,17 +48,7 @@ module RequestLogAnalyzer
|
|
74
48
|
def matches(line, lineno = nil, parser = nil)
|
75
49
|
if @teaser.nil? || @teaser =~ line
|
76
50
|
if match_data = line.match(@regexp)
|
77
|
-
|
78
|
-
|
79
|
-
captures.each_with_index do |capture, index|
|
80
|
-
next if capture == :ignore
|
81
|
-
|
82
|
-
if match_data.captures[index]
|
83
|
-
request_info[capture[:name]] = convert_value(match_data.captures[index], capture[:type])
|
84
|
-
end
|
85
|
-
|
86
|
-
end
|
87
|
-
return request_info
|
51
|
+
return { :line_definition => self, :lineno => lineno, :captures => match_data.captures}
|
88
52
|
else
|
89
53
|
if @teaser && parser
|
90
54
|
parser.warn(:teaser_check_failed, "Teaser matched for #{name.inspect}, but full line did not:\n#{line.inspect}")
|
@@ -97,63 +61,23 @@ module RequestLogAnalyzer
|
|
97
61
|
end
|
98
62
|
|
99
63
|
alias :=~ :matches
|
100
|
-
|
101
|
-
def
|
102
|
-
if
|
103
|
-
|
64
|
+
|
65
|
+
def match_for(line, request, lineno = nil, parser = nil)
|
66
|
+
if match_info = matches(line, lineno, parser)
|
67
|
+
convert_captured_values(match_info[:captures], request)
|
104
68
|
else
|
105
|
-
|
106
|
-
when nil; value
|
107
|
-
when false; value
|
108
|
-
when true; '***'
|
109
|
-
when :slightly; anonymize_slightly(value, capture_definition)
|
110
|
-
else
|
111
|
-
method_name = "anonymizer_for_#{capture_definition[:anonymize]}".to_sym
|
112
|
-
self.respond_to?(method_name) ? self.send(method_name, value, capture_definition) : '***'
|
113
|
-
end
|
69
|
+
false
|
114
70
|
end
|
115
71
|
end
|
116
|
-
|
117
|
-
def anonymize_slightly(value, capture_definition)
|
118
|
-
case capture_definition[:type]
|
119
|
-
when :integer
|
120
|
-
(value.to_i * (0.8 + rand * 0.4)).to_i
|
121
|
-
when :double
|
122
|
-
(value.to_f * (0.8 + rand * 0.4)).to_f
|
123
|
-
when :msec
|
124
|
-
(value.to_i * (0.8 + rand * 0.4)).to_i
|
125
|
-
when :sec
|
126
|
-
(value.to_f * (0.8 + rand * 0.4)).to_f
|
127
|
-
when :timestamp
|
128
|
-
(DateTime.parse(value) + (rand(100) - 50)).to_s
|
129
|
-
else
|
130
|
-
puts "Cannot anonymize #{capture_definition[:type].inspect} slightly, using ***"
|
131
|
-
'***'
|
132
|
-
end
|
133
|
-
end
|
134
72
|
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
pos_adjustment = 0
|
140
|
-
captures.each_with_index do |capture, index|
|
141
|
-
unless $~[index + 1].nil?
|
142
|
-
anonymized_value = anonymize_value($~[index + 1], capture).to_s
|
143
|
-
line[($~.begin(index + 1) + pos_adjustment)...($~.end(index + 1) + pos_adjustment)] = anonymized_value
|
144
|
-
pos_adjustment += anonymized_value.length - $~[index + 1].length
|
145
|
-
end
|
146
|
-
end
|
147
|
-
line
|
148
|
-
elsif self.teaser.nil?
|
149
|
-
nil
|
150
|
-
else
|
151
|
-
options[:discard_teaser_lines] ? "" : line
|
152
|
-
end
|
153
|
-
else
|
154
|
-
nil
|
73
|
+
def convert_captured_values(values, request)
|
74
|
+
value_hash = {}
|
75
|
+
captures.each_with_index do |capture, index|
|
76
|
+
value_hash[capture[:name]] ||= request.convert_value(values[index], capture)
|
155
77
|
end
|
78
|
+
return value_hash
|
156
79
|
end
|
80
|
+
|
157
81
|
end
|
158
82
|
|
159
83
|
end
|
@@ -6,12 +6,9 @@ module RequestLogAnalyzer
|
|
6
6
|
# contents of the line, remain it intact or remove it altogether, based on the current
|
7
7
|
# file format
|
8
8
|
#
|
9
|
-
# Currently,
|
9
|
+
# Currently, one processors is supported:
|
10
10
|
# * :strip will remove all irrelevent lines (according to the file format) from the
|
11
11
|
# sources. A compact, information packed log will remain/.
|
12
|
-
# * :anonymize will anonymize sensitive information from the lines according to the
|
13
|
-
# anonymization rules in the file format. The result can be passed to third parties
|
14
|
-
# without privacy concerns.
|
15
12
|
#
|
16
13
|
class LogProcessor
|
17
14
|
|
@@ -21,8 +18,8 @@ module RequestLogAnalyzer
|
|
21
18
|
attr_accessor :output_file
|
22
19
|
|
23
20
|
# Builds a logprocessor instance from the arguments given on the command line
|
24
|
-
# <tt>command</tt> The command hat was used to start the log processor. This
|
25
|
-
#
|
21
|
+
# <tt>command</tt> The command hat was used to start the log processor. This will set the
|
22
|
+
# processing mode. Currently, only :strip is supported.
|
26
23
|
# <tt>arguments</tt> The parsed command line arguments (a CommandLine::Arguments instance)
|
27
24
|
def self.build(command, arguments)
|
28
25
|
|
@@ -43,7 +40,7 @@ module RequestLogAnalyzer
|
|
43
40
|
|
44
41
|
# Initializes a new LogProcessor instance.
|
45
42
|
# <tt>format</tt> The file format to use (e.g. :rails).
|
46
|
-
# <tt>mode</tt> The processing mode
|
43
|
+
# <tt>mode</tt> The processing mode
|
47
44
|
# <tt>options</tt> A hash with options to take into account
|
48
45
|
def initialize(format, mode, options = {})
|
49
46
|
@options = options
|
@@ -61,12 +58,11 @@ module RequestLogAnalyzer
|
|
61
58
|
end
|
62
59
|
|
63
60
|
# Processes an input stream by iteration over each line and processing it according to
|
64
|
-
# the current operation mode
|
61
|
+
# the current operation mode
|
65
62
|
# <tt>io</tt> The IO instance to process.
|
66
63
|
def process_io(io)
|
67
64
|
case mode
|
68
65
|
when :strip; io.each_line { |line| @output << strip_line(line) }
|
69
|
-
when :anonymize; io.each_line { |line| @output << anonymize_line(line) }
|
70
66
|
end
|
71
67
|
end
|
72
68
|
|
@@ -77,22 +73,6 @@ module RequestLogAnalyzer
|
|
77
73
|
file_format.line_definitions.any? { |name, definition| definition =~ line } ? line : ""
|
78
74
|
end
|
79
75
|
|
80
|
-
# Returns an anonymized version of the provided line. This can be a copy of the line it self,
|
81
|
-
# an empty string or a string in which some substrings are substituted for anonymized values.
|
82
|
-
# <tt>line</tt> The line to anonymize
|
83
|
-
def anonymize_line(line)
|
84
|
-
anonymized_line = nil
|
85
|
-
file_format.line_definitions.detect { |name, definition| anonymized_line = definition.anonymize(line, options) }
|
86
|
-
|
87
|
-
if anonymized_line
|
88
|
-
return anonymized_line
|
89
|
-
elsif options[:keep_junk_lines]
|
90
|
-
return line
|
91
|
-
else
|
92
|
-
return ""
|
93
|
-
end
|
94
|
-
end
|
95
|
-
|
96
76
|
# Runs the log processing by setting up the output stream and iterating over all the
|
97
77
|
# input sources. Input sources can either be filenames (String instances) or IO streams
|
98
78
|
# (IO instances). The strings "-" and "STDIN" will be substituted for the $stdin variable.
|
@@ -9,7 +9,45 @@ module RequestLogAnalyzer
|
|
9
9
|
# Request#every(field_name) returns all values corresponding to the given field name as array.
|
10
10
|
class Request
|
11
11
|
|
12
|
+
module Converters
|
13
|
+
|
14
|
+
def convert_value(value, capture_definition)
|
15
|
+
custom_converter_method = "convert_#{capture_definition[:type]}".to_sym
|
16
|
+
if respond_to?(custom_converter_method)
|
17
|
+
send(custom_converter_method, value, capture_definition)
|
18
|
+
elsif !value.nil?
|
19
|
+
case capture_definition[:type]
|
20
|
+
when :decimal; value.to_f
|
21
|
+
when :float; value.to_f
|
22
|
+
when :double; value.to_f
|
23
|
+
when :integer; value.to_i
|
24
|
+
when :int; value.to_i
|
25
|
+
when :symbol; value.to_sym
|
26
|
+
else; value.to_s
|
27
|
+
end
|
28
|
+
else
|
29
|
+
nil
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
# Slow default method to parse timestamps
|
34
|
+
def convert_timestamp(value, capture_definition)
|
35
|
+
DateTime.parse(value).strftime('%Y%m%d%H%M%S').to_i unless value.nil?
|
36
|
+
end
|
37
|
+
|
38
|
+
def convert_duration(value, capture_definition)
|
39
|
+
if value.nil?
|
40
|
+
nil
|
41
|
+
elsif capture_definition[:unit] == :msec
|
42
|
+
value.to_f / 1000.0
|
43
|
+
else
|
44
|
+
value.to_f
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
12
49
|
include RequestLogAnalyzer::FileFormat::Awareness
|
50
|
+
include Converters
|
13
51
|
|
14
52
|
attr_reader :lines
|
15
53
|
attr_reader :attributes
|
@@ -32,12 +70,22 @@ module RequestLogAnalyzer
|
|
32
70
|
|
33
71
|
# Adds another line to the request.
|
34
72
|
# The line should be provides as a hash of the fields parsed from the line.
|
35
|
-
def add_parsed_line (
|
36
|
-
|
37
|
-
|
73
|
+
def add_parsed_line (parsed_line)
|
74
|
+
value_hash = parsed_line[:line_definition].convert_captured_values(parsed_line[:captures], self)
|
75
|
+
value_hash[:line_type] = parsed_line[:line_definition].name
|
76
|
+
value_hash[:lineno] = parsed_line[:lineno]
|
77
|
+
add_line_hash(value_hash)
|
78
|
+
end
|
79
|
+
|
80
|
+
def add_line_hash(value_hash)
|
81
|
+
@lines << value_hash
|
82
|
+
@attributes = value_hash.merge(@attributes)
|
38
83
|
end
|
39
84
|
|
40
|
-
|
85
|
+
|
86
|
+
def <<(hash)
|
87
|
+
hash[:line_definition] ? add_parsed_line(hash) : add_line_hash(hash)
|
88
|
+
end
|
41
89
|
|
42
90
|
# Checks whether the given line type was parsed from the log file for this request
|
43
91
|
def has_line_type?(line_type)
|
@@ -81,6 +129,10 @@ module RequestLogAnalyzer
|
|
81
129
|
header_found && footer_found
|
82
130
|
end
|
83
131
|
|
132
|
+
# This function is called before a Requests is yielded.
|
133
|
+
def validate
|
134
|
+
end
|
135
|
+
|
84
136
|
# Returns the first timestamp encountered in a request.
|
85
137
|
def timestamp
|
86
138
|
first(:timestamp)
|
@@ -131,7 +131,6 @@ module RequestLogAnalyzer::Source
|
|
131
131
|
if header_line?(request_data)
|
132
132
|
unless @current_request.nil?
|
133
133
|
if options[:assume_correct_order]
|
134
|
-
@parsed_requests += 1
|
135
134
|
handle_request(@current_request, &block) #yield @current_request
|
136
135
|
@current_request = @file_format.create_request(request_data)
|
137
136
|
else
|
@@ -146,7 +145,6 @@ module RequestLogAnalyzer::Source
|
|
146
145
|
unless @current_request.nil?
|
147
146
|
@current_request << request_data
|
148
147
|
if footer_line?(request_data)
|
149
|
-
@parsed_requests += 1
|
150
148
|
handle_request(@current_request, &block) # yield @current_request
|
151
149
|
@current_request = nil
|
152
150
|
end
|
@@ -161,18 +159,19 @@ module RequestLogAnalyzer::Source
|
|
161
159
|
# The default controller will send the request to every running aggegator.
|
162
160
|
def handle_request(request, &block)
|
163
161
|
@parsed_requests += 1
|
162
|
+
request.validate
|
164
163
|
accepted = block_given? ? yield(request) : true
|
165
164
|
@skipped_requests += 1 if not accepted
|
166
165
|
end
|
167
166
|
|
168
167
|
# Checks whether a given line hash is a header line.
|
169
168
|
def header_line?(hash)
|
170
|
-
|
169
|
+
hash[:line_definition].header
|
171
170
|
end
|
172
171
|
|
173
172
|
# Checks whether a given line hash is a footer line.
|
174
173
|
def footer_line?(hash)
|
175
|
-
|
174
|
+
hash[:line_definition].footer
|
176
175
|
end
|
177
176
|
end
|
178
177
|
|
@@ -1,6 +1,6 @@
|
|
1
1
|
module RequestLogAnalyzer::Tracker
|
2
2
|
|
3
|
-
# Catagorize requests.
|
3
|
+
# Catagorize requests by frequency.
|
4
4
|
# Count and analyze requests for a specific attribute
|
5
5
|
#
|
6
6
|
# Accepts the following options:
|
@@ -19,7 +19,7 @@ module RequestLogAnalyzer::Tracker
|
|
19
19
|
# PUT | 13685 hits (28.4%) |░░░░░░░░░░░
|
20
20
|
# POST | 11662 hits (24.2%) |░░░░░░░░░
|
21
21
|
# DELETE | 512 hits (1.1%) |
|
22
|
-
class
|
22
|
+
class Frequency < Base
|
23
23
|
|
24
24
|
attr_reader :categories
|
25
25
|
|
data/spec/controller_spec.rb
CHANGED
@@ -4,42 +4,61 @@ describe RequestLogAnalyzer::Controller do
|
|
4
4
|
|
5
5
|
include RequestLogAnalyzerSpecHelper
|
6
6
|
|
7
|
-
|
8
|
-
# controller = RequestLogAnalyzer::Controller.new(:rails)
|
9
|
-
# (class << controller; self; end).ancestors.include?(RequestLogAnalyzer::FileFormat::Rails)
|
10
|
-
# end
|
11
|
-
|
12
|
-
it "should call the aggregators when run" do
|
7
|
+
it "should use a custom output generator correctly" do
|
13
8
|
|
14
|
-
mock_output = mock('
|
15
|
-
mock_output.stub!(:io).and_return(
|
9
|
+
mock_output = mock('RequestLogAnalyzer::Output::Base')
|
10
|
+
mock_output.stub!(:io).and_return(mock_io)
|
16
11
|
mock_output.should_receive(:header)
|
17
12
|
mock_output.should_receive(:footer)
|
13
|
+
|
14
|
+
file_format = RequestLogAnalyzer::FileFormat.load(:rails)
|
15
|
+
source = RequestLogAnalyzer::Source::LogParser.new(file_format, :source_files => log_fixture(:rails_1x))
|
16
|
+
controller = RequestLogAnalyzer::Controller.new(source, :output => mock_output)
|
17
|
+
|
18
|
+
controller.run!
|
19
|
+
end
|
20
|
+
|
21
|
+
it "should call aggregators correctly when run" do
|
18
22
|
|
19
23
|
file_format = RequestLogAnalyzer::FileFormat.load(:rails)
|
20
24
|
source = RequestLogAnalyzer::Source::LogParser.new(file_format, :source_files => log_fixture(:rails_1x))
|
21
25
|
controller = RequestLogAnalyzer::Controller.new(source, :output => mock_output)
|
22
26
|
|
23
|
-
mock_aggregator = mock('
|
27
|
+
mock_aggregator = mock('RequestLogAnalyzer::Aggregator::Base')
|
24
28
|
mock_aggregator.should_receive(:prepare).once.ordered
|
25
29
|
mock_aggregator.should_receive(:aggregate).with(an_instance_of(file_format.class::Request)).at_least(:twice).ordered
|
26
30
|
mock_aggregator.should_receive(:finalize).once.ordered
|
27
31
|
mock_aggregator.should_receive(:report).once.ordered
|
32
|
+
|
33
|
+
controller.aggregators << mock_aggregator
|
34
|
+
controller.run!
|
35
|
+
end
|
36
|
+
|
37
|
+
it "should call filters when run" do
|
38
|
+
file_format = RequestLogAnalyzer::FileFormat.load(:rails)
|
39
|
+
source = RequestLogAnalyzer::Source::LogParser.new(file_format, :source_files => log_fixture(:rails_1x))
|
40
|
+
controller = RequestLogAnalyzer::Controller.new(source, :output => mock_output)
|
28
41
|
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
controller.
|
42
|
+
mock_filter = mock('RequestLogAnalyzer::Filter::Base')
|
43
|
+
mock_filter.should_receive(:prepare).once.ordered
|
44
|
+
mock_filter.should_receive(:filter).at_least(:twice)
|
45
|
+
|
46
|
+
controller.should_not_receive(:aggregate_request)
|
47
|
+
|
48
|
+
controller.filters << mock_filter
|
36
49
|
controller.run!
|
37
50
|
end
|
38
51
|
|
39
|
-
it "should run well from the command line" do
|
40
|
-
|
41
|
-
|
52
|
+
it "should run well from the command line with the most important features" do
|
53
|
+
|
54
|
+
temp_file = "#{File.dirname(__FILE__)}/fixtures/report.txt"
|
55
|
+
temp_db = "#{File.dirname(__FILE__)}/fixtures/output.db"
|
56
|
+
binary = "#{File.dirname(__FILE__)}/../bin/request-log-analyzer"
|
57
|
+
|
58
|
+
system("#{binary} #{log_fixture(:rails_1x)} --database #{temp_db} --select Controller PeopleController --file #{temp_file} > /dev/null").should be_true
|
59
|
+
|
42
60
|
File.unlink(temp_file)
|
61
|
+
File.unlink(temp_db)
|
43
62
|
end
|
44
63
|
|
45
64
|
end
|
data/spec/file_format_spec.rb
CHANGED
@@ -50,13 +50,13 @@ describe RequestLogAnalyzer::FileFormat, :format_definition do
|
|
50
50
|
line.first_test :regexp => /test/, :captures => []
|
51
51
|
end
|
52
52
|
|
53
|
+
|
53
54
|
@second_file_format.format_definition do |line|
|
54
55
|
line.second_test :regexp => /test/, :captures => []
|
55
56
|
end
|
56
57
|
|
58
|
+
@first_file_format.line_definer.should_not eql(@second_file_format.line_definer)
|
57
59
|
@first_file_format.new.should have(1).line_definitions
|
58
|
-
@first_file_format.new.line_definitions[:first_test].should_not be_nil
|
59
|
-
@second_file_format.new.should have(1).line_definitions
|
60
60
|
@second_file_format.new.line_definitions[:second_test].should_not be_nil
|
61
61
|
end
|
62
62
|
end
|