request-log-analyzer 1.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/DESIGN +14 -0
- data/HACKING +7 -0
- data/LICENSE +20 -0
- data/README.textile +36 -0
- data/Rakefile +5 -0
- data/bin/request-log-analyzer +123 -0
- data/lib/cli/bashcolorizer.rb +60 -0
- data/lib/cli/command_line_arguments.rb +301 -0
- data/lib/cli/progressbar.rb +236 -0
- data/lib/request_log_analyzer.rb +14 -0
- data/lib/request_log_analyzer/aggregator/base.rb +45 -0
- data/lib/request_log_analyzer/aggregator/database.rb +148 -0
- data/lib/request_log_analyzer/aggregator/echo.rb +25 -0
- data/lib/request_log_analyzer/aggregator/summarizer.rb +116 -0
- data/lib/request_log_analyzer/controller.rb +201 -0
- data/lib/request_log_analyzer/file_format.rb +81 -0
- data/lib/request_log_analyzer/file_format/merb.rb +33 -0
- data/lib/request_log_analyzer/file_format/rails.rb +90 -0
- data/lib/request_log_analyzer/filter/base.rb +29 -0
- data/lib/request_log_analyzer/filter/field.rb +36 -0
- data/lib/request_log_analyzer/filter/timespan.rb +32 -0
- data/lib/request_log_analyzer/line_definition.rb +159 -0
- data/lib/request_log_analyzer/log_parser.rb +173 -0
- data/lib/request_log_analyzer/log_processor.rb +121 -0
- data/lib/request_log_analyzer/request.rb +95 -0
- data/lib/request_log_analyzer/source/base.rb +42 -0
- data/lib/request_log_analyzer/source/log_file.rb +170 -0
- data/lib/request_log_analyzer/tracker/base.rb +54 -0
- data/lib/request_log_analyzer/tracker/category.rb +71 -0
- data/lib/request_log_analyzer/tracker/duration.rb +81 -0
- data/lib/request_log_analyzer/tracker/hourly_spread.rb +80 -0
- data/lib/request_log_analyzer/tracker/timespan.rb +54 -0
- data/spec/controller_spec.rb +40 -0
- data/spec/database_inserter_spec.rb +101 -0
- data/spec/file_format_spec.rb +78 -0
- data/spec/file_formats/spec_format.rb +26 -0
- data/spec/filter_spec.rb +137 -0
- data/spec/fixtures/merb.log +84 -0
- data/spec/fixtures/multiple_files_1.log +5 -0
- data/spec/fixtures/multiple_files_2.log +2 -0
- data/spec/fixtures/rails_1x.log +59 -0
- data/spec/fixtures/rails_22.log +12 -0
- data/spec/fixtures/rails_22_cached.log +10 -0
- data/spec/fixtures/rails_unordered.log +24 -0
- data/spec/fixtures/syslog_1x.log +5 -0
- data/spec/fixtures/test_file_format.log +13 -0
- data/spec/fixtures/test_language_combined.log +14 -0
- data/spec/fixtures/test_order.log +16 -0
- data/spec/line_definition_spec.rb +124 -0
- data/spec/log_parser_spec.rb +68 -0
- data/spec/log_processor_spec.rb +57 -0
- data/spec/merb_format_spec.rb +38 -0
- data/spec/rails_format_spec.rb +76 -0
- data/spec/request_spec.rb +72 -0
- data/spec/spec_helper.rb +67 -0
- data/spec/summarizer_spec.rb +9 -0
- data/tasks/github-gem.rake +177 -0
- data/tasks/request_log_analyzer.rake +10 -0
- data/tasks/rspec.rake +6 -0
- metadata +135 -0
@@ -0,0 +1,29 @@
|
|
1
|
+
module RequestLogAnalyzer
|
2
|
+
module Filter
|
3
|
+
# Base filter class used to filter input requests.
|
4
|
+
# All filters should interit from this base.
|
5
|
+
class Base
|
6
|
+
|
7
|
+
include RequestLogAnalyzer::FileFormat::Awareness
|
8
|
+
|
9
|
+
attr_reader :log_parser
|
10
|
+
attr_reader :options
|
11
|
+
|
12
|
+
# Initializer
|
13
|
+
# <tt>format</tt> The file format
|
14
|
+
# <tt>options</tt> Are passed to the filters.
|
15
|
+
def initialize(format, options = {})
|
16
|
+
@options = options
|
17
|
+
register_file_format(format)
|
18
|
+
end
|
19
|
+
|
20
|
+
def prepare
|
21
|
+
end
|
22
|
+
|
23
|
+
def filter(request)
|
24
|
+
return nil unless request
|
25
|
+
return request
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
@@ -0,0 +1,36 @@
|
|
1
|
+
module RequestLogAnalyzer::Filter
|
2
|
+
|
3
|
+
# Filter to select or reject a specific field
|
4
|
+
# Options
|
5
|
+
# * <tt>:mode</tt> :reject or :accept.
|
6
|
+
# * <tt>:field</tt> Specific field to accept or reject.
|
7
|
+
# * <tt>:value</tt> Value that the field should match to be accepted or rejected.
|
8
|
+
class Field < Base
|
9
|
+
|
10
|
+
attr_reader :field, :value, :mode
|
11
|
+
|
12
|
+
def prepare
|
13
|
+
@mode = (@options[:mode] || :accept).to_sym
|
14
|
+
@field = @options[:field].to_sym
|
15
|
+
|
16
|
+
# Convert the timestamp to the correct formats for quick timestamp comparisons
|
17
|
+
if @options[:value].kind_of?(String) && @options[:value][0, 1] == '/' && @options[:value][-1, 1] == '/'
|
18
|
+
@value = Regexp.new(@options[:value][1..-2])
|
19
|
+
else
|
20
|
+
@value = @options[:value] # TODO: convert value?
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
def filter(request)
|
25
|
+
return nil unless request
|
26
|
+
|
27
|
+
found_field = request.every(@field).any? { |value| @value === value }
|
28
|
+
|
29
|
+
return nil if !found_field && @mode == :select
|
30
|
+
return nil if found_field && @mode == :reject
|
31
|
+
|
32
|
+
return request
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
end
|
@@ -0,0 +1,32 @@
|
|
1
|
+
module RequestLogAnalyzer::Filter
|
2
|
+
|
3
|
+
# Reject all requests not in given timespan
|
4
|
+
# Options
|
5
|
+
# * <tt>:after</tt> Only keep requests after this DateTime.
|
6
|
+
# * <tt>:before</tt> Only keep requests before this DateTime.
|
7
|
+
class Timespan < Base
|
8
|
+
|
9
|
+
attr_reader :before, :after
|
10
|
+
|
11
|
+
def prepare
|
12
|
+
# Convert the timestamp to the correct formats for quick timestamp comparisons
|
13
|
+
@after = @options[:after].strftime('%Y%m%d%H%M%S').to_i if options[:after]
|
14
|
+
@before = @options[:before].strftime('%Y%m%d%H%M%S').to_i if options[:before]
|
15
|
+
end
|
16
|
+
|
17
|
+
def filter(request)
|
18
|
+
return nil unless request
|
19
|
+
|
20
|
+
if @after && @before && request.timestamp <= @before && @after <= request.timestamp
|
21
|
+
return request
|
22
|
+
elsif @after && @before.nil? && @after <= request.timestamp
|
23
|
+
return request
|
24
|
+
elsif @before && @after.nil? && request.timestamp <= @before
|
25
|
+
return request
|
26
|
+
end
|
27
|
+
|
28
|
+
return nil
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
end
|
@@ -0,0 +1,159 @@
|
|
1
|
+
module RequestLogAnalyzer
|
2
|
+
|
3
|
+
module Anonymizers
|
4
|
+
def anonymizer_for_ip(value, capture_definition)
|
5
|
+
'127.0.0.1'
|
6
|
+
end
|
7
|
+
|
8
|
+
def anonymizer_for_url(value, capture_definition)
|
9
|
+
value.sub(/^https?\:\/\/[A-z0-9\.-]+\//, "http://example.com/")
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
# The line definition class is used to specify what lines should be parsed from the log file.
|
14
|
+
# It contains functionality to match a line against the definition and parse the information
|
15
|
+
# from this line. This is used by the LogParser class when parsing a log file..
|
16
|
+
class LineDefinition
|
17
|
+
|
18
|
+
include RequestLogAnalyzer::Anonymizers
|
19
|
+
|
20
|
+
class Definer
|
21
|
+
|
22
|
+
attr_accessor :line_definitions
|
23
|
+
|
24
|
+
def initialize
|
25
|
+
@line_definitions = {}
|
26
|
+
end
|
27
|
+
|
28
|
+
def method_missing(name, *args, &block)
|
29
|
+
if block_given?
|
30
|
+
@line_definitions[name] = RequestLogAnalyzer::LineDefinition.define(name, &block)
|
31
|
+
else
|
32
|
+
@line_definitions[name] = RequestLogAnalyzer::LineDefinition.new(name, args.first)
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
attr_reader :name
|
38
|
+
attr_accessor :teaser, :regexp, :captures
|
39
|
+
attr_accessor :header, :footer
|
40
|
+
|
41
|
+
# Initializes the LineDefinition instance with a hash containing the different elements of
|
42
|
+
# the definition.
|
43
|
+
def initialize(name, definition = {})
|
44
|
+
@name = name
|
45
|
+
@captures = []
|
46
|
+
definition.each { |key, value| self.send("#{key.to_s}=".to_sym, value) }
|
47
|
+
end
|
48
|
+
|
49
|
+
def self.define(name, &block)
|
50
|
+
definition = self.new(name)
|
51
|
+
yield(definition) if block_given?
|
52
|
+
return definition
|
53
|
+
end
|
54
|
+
|
55
|
+
# Converts a parsed value (String) to the desired value using some heuristics.
|
56
|
+
def convert_value(value, type)
|
57
|
+
case type
|
58
|
+
when :integer; value.to_i
|
59
|
+
when :float; value.to_f
|
60
|
+
when :decimal; value.to_f
|
61
|
+
when :symbol; value.to_sym
|
62
|
+
when :sec; value.to_f
|
63
|
+
when :msec; value.to_f / 1000
|
64
|
+
when :timestamp; value.gsub(/[^0-9]/,'')[0..13].to_i # Retrieve with: DateTime.parse(value, '%Y%m%d%H%M%S')
|
65
|
+
else value
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
# Checks whether a given line matches this definition.
|
70
|
+
# It will return false if a line does not match. If the line matches, a hash is returned
|
71
|
+
# with all the fields parsed from that line as content.
|
72
|
+
# If the line definition has a teaser-check, a :teaser_check_failed warning will be emitted
|
73
|
+
# if this teaser-check is passed, but the full regular exprssion does not ,atch.
|
74
|
+
def matches(line, lineno = nil, parser = nil)
|
75
|
+
if @teaser.nil? || @teaser =~ line
|
76
|
+
if match_data = line.match(@regexp)
|
77
|
+
request_info = { :line_type => name, :lineno => lineno }
|
78
|
+
|
79
|
+
captures.each_with_index do |capture, index|
|
80
|
+
next if capture == :ignore
|
81
|
+
|
82
|
+
if match_data.captures[index]
|
83
|
+
request_info[capture[:name]] = convert_value(match_data.captures[index], capture[:type])
|
84
|
+
end
|
85
|
+
|
86
|
+
end
|
87
|
+
return request_info
|
88
|
+
else
|
89
|
+
if @teaser && parser
|
90
|
+
parser.warn(:teaser_check_failed, "Teaser matched for #{name.inspect}, but full line did not:\n#{line.inspect}")
|
91
|
+
end
|
92
|
+
return false
|
93
|
+
end
|
94
|
+
else
|
95
|
+
return false
|
96
|
+
end
|
97
|
+
end
|
98
|
+
|
99
|
+
alias :=~ :matches
|
100
|
+
|
101
|
+
def anonymize_value(value, capture_definition)
|
102
|
+
if capture_definition[:anonymize].respond_to?(:call)
|
103
|
+
capture_definition[:anonymize].call(value, capture_definition)
|
104
|
+
else
|
105
|
+
case capture_definition[:anonymize]
|
106
|
+
when nil; value
|
107
|
+
when false; value
|
108
|
+
when true; '***'
|
109
|
+
when :slightly; anonymize_slightly(value, capture_definition)
|
110
|
+
else
|
111
|
+
method_name = "anonymizer_for_#{capture_definition[:anonymize]}".to_sym
|
112
|
+
self.respond_to?(method_name) ? self.send(method_name, value, capture_definition) : '***'
|
113
|
+
end
|
114
|
+
end
|
115
|
+
end
|
116
|
+
|
117
|
+
def anonymize_slightly(value, capture_definition)
|
118
|
+
case capture_definition[:type]
|
119
|
+
when :integer
|
120
|
+
(value.to_i * (0.8 + rand * 0.4)).to_i
|
121
|
+
when :double
|
122
|
+
(value.to_f * (0.8 + rand * 0.4)).to_f
|
123
|
+
when :msec
|
124
|
+
(value.to_i * (0.8 + rand * 0.4)).to_i
|
125
|
+
when :sec
|
126
|
+
(value.to_f * (0.8 + rand * 0.4)).to_f
|
127
|
+
when :timestamp
|
128
|
+
(DateTime.parse(value) + (rand(100) - 50)).to_s
|
129
|
+
else
|
130
|
+
puts "Cannot anonymize #{capture_definition[:type].inspect} slightly, using ***"
|
131
|
+
'***'
|
132
|
+
end
|
133
|
+
end
|
134
|
+
|
135
|
+
# Anonymize a log line
|
136
|
+
def anonymize(line, options = {})
|
137
|
+
if self.teaser.nil? || self.teaser =~ line
|
138
|
+
if self.regexp =~ line
|
139
|
+
pos_adjustment = 0
|
140
|
+
captures.each_with_index do |capture, index|
|
141
|
+
unless $~[index + 1].nil?
|
142
|
+
anonymized_value = anonymize_value($~[index + 1], capture).to_s
|
143
|
+
line[($~.begin(index + 1) + pos_adjustment)...($~.end(index + 1) + pos_adjustment)] = anonymized_value
|
144
|
+
pos_adjustment += anonymized_value.length - $~[index + 1].length
|
145
|
+
end
|
146
|
+
end
|
147
|
+
line
|
148
|
+
elsif self.teaser.nil?
|
149
|
+
nil
|
150
|
+
else
|
151
|
+
options[:discard_teaser_lines] ? "" : line
|
152
|
+
end
|
153
|
+
else
|
154
|
+
nil
|
155
|
+
end
|
156
|
+
end
|
157
|
+
end
|
158
|
+
|
159
|
+
end
|
@@ -0,0 +1,173 @@
|
|
1
|
+
module RequestLogAnalyzer
|
2
|
+
|
3
|
+
# The LogParser class reads log data from a given source and uses a file format definition
|
4
|
+
# to parse all relevent information about requests from the file. A FileFormat module should
|
5
|
+
# be provided that contains the definitions of the lines that occur in the log data.
|
6
|
+
#
|
7
|
+
# De order in which lines occur is used to combine lines to a single request. If these lines
|
8
|
+
# are mixed, requests cannot be combined properly. This can be the case if data is written to
|
9
|
+
# the log file simultaneously by different mongrel processes. This problem is detected by the
|
10
|
+
# parser, but the requests that are mixed up cannot be parsed. It will emit warnings when this
|
11
|
+
# occurs.
|
12
|
+
class LogParser
|
13
|
+
|
14
|
+
include RequestLogAnalyzer::FileFormat::Awareness
|
15
|
+
|
16
|
+
# A hash of options
|
17
|
+
attr_reader :options
|
18
|
+
|
19
|
+
# The current Request object that is being parsed
|
20
|
+
attr_reader :current_request
|
21
|
+
|
22
|
+
# The total number of parsed lines
|
23
|
+
attr_reader :parsed_lines
|
24
|
+
|
25
|
+
# The total number of parsed requests.
|
26
|
+
attr_reader :parsed_requests
|
27
|
+
|
28
|
+
# The number of skipped requests because of date constraints
|
29
|
+
attr_reader :skipped_requests
|
30
|
+
|
31
|
+
# Initializes the parser instance.
|
32
|
+
# It will apply the language specific FileFormat module to this instance. It will use the line
|
33
|
+
# definitions in this module to parse any input.
|
34
|
+
def initialize(format, options = {})
|
35
|
+
@line_definitions = {}
|
36
|
+
@options = options
|
37
|
+
@parsed_lines = 0
|
38
|
+
@parsed_requests = 0
|
39
|
+
@skipped_requests = 0
|
40
|
+
|
41
|
+
@current_io = nil
|
42
|
+
|
43
|
+
# install the file format module (see RequestLogAnalyzer::FileFormat)
|
44
|
+
# and register all the line definitions to the parser
|
45
|
+
self.register_file_format(format)
|
46
|
+
end
|
47
|
+
|
48
|
+
# Parses a list of consequent files of the same format
|
49
|
+
def parse_files(files, options = {}, &block)
|
50
|
+
files.each { |file| parse_file(file, options, &block) }
|
51
|
+
end
|
52
|
+
|
53
|
+
# Parses a file.
|
54
|
+
# Creates an IO stream for the provided file, and sends it to parse_io for further handling
|
55
|
+
def parse_file(file, options = {}, &block)
|
56
|
+
@progress_handler.call(:started, file) if @progress_handler
|
57
|
+
File.open(file, 'r') { |f| parse_io(f, options, &block) }
|
58
|
+
@progress_handler.call(:finished, file) if @progress_handler
|
59
|
+
end
|
60
|
+
|
61
|
+
def parse_stream(stream, options = {}, &block)
|
62
|
+
parse_io(stream, options, &block)
|
63
|
+
end
|
64
|
+
|
65
|
+
# Finds a log line and then parses the information in the line.
|
66
|
+
# Yields a hash containing the information found.
|
67
|
+
# <tt>*line_types</tt> The log line types to look for (defaults to LOG_LINES.keys).
|
68
|
+
# Yeilds a Hash when it encounters a chunk of information.
|
69
|
+
def parse_io(io, options = {}, &block)
|
70
|
+
|
71
|
+
# parse every line type by default
|
72
|
+
line_types = options[:line_types] || file_format.line_definitions.keys
|
73
|
+
|
74
|
+
# check whether all provided line types are valid
|
75
|
+
unknown = line_types.reject { |line_type| file_format.line_definitions.has_key?(line_type) }
|
76
|
+
raise "Unknown line types: #{unknown.join(', ')}" unless unknown.empty?
|
77
|
+
|
78
|
+
@current_io = io
|
79
|
+
@current_io.each_line do |line|
|
80
|
+
|
81
|
+
@progress_handler.call(:progress, @current_io.pos) if @progress_handler && @current_io.kind_of?(File)
|
82
|
+
|
83
|
+
request_data = nil
|
84
|
+
line_types.each do |line_type|
|
85
|
+
line_type_definition = file_format.line_definitions[line_type]
|
86
|
+
break if request_data = line_type_definition.matches(line, @current_io.lineno, self)
|
87
|
+
end
|
88
|
+
|
89
|
+
if request_data
|
90
|
+
@parsed_lines += 1
|
91
|
+
update_current_request(request_data, &block)
|
92
|
+
end
|
93
|
+
end
|
94
|
+
|
95
|
+
warn(:unfinished_request_on_eof, "End of file reached, but last request was not completed!") unless @current_request.nil?
|
96
|
+
|
97
|
+
@current_io = nil
|
98
|
+
end
|
99
|
+
|
100
|
+
# Add a block to this method to install a progress handler while parsing
|
101
|
+
def progress=(proc)
|
102
|
+
@progress_handler = proc
|
103
|
+
end
|
104
|
+
|
105
|
+
# Add a block to this method to install a warning handler while parsing
|
106
|
+
def warning=(proc)
|
107
|
+
@warning_handler = proc
|
108
|
+
end
|
109
|
+
|
110
|
+
# This method is called by the parser if it encounteres any problems.
|
111
|
+
# It will call the warning handler. The default controller will pass all warnings to every
|
112
|
+
# aggregator that is registered and running
|
113
|
+
def warn(type, message)
|
114
|
+
@warning_handler.call(type, message, @current_io.lineno) if @warning_handler
|
115
|
+
end
|
116
|
+
|
117
|
+
protected
|
118
|
+
|
119
|
+
# Combines the different lines of a request into a single Request object. It will start a
|
120
|
+
# new request when a header line is encountered en will emit the request when a footer line
|
121
|
+
# is encountered.
|
122
|
+
#
|
123
|
+
# - Every line that is parsed before a header line is ignored as it cannot be included in
|
124
|
+
# any request. It will emit a :no_current_request warning.
|
125
|
+
# - A header line that is parsed before a request is closed by a footer line, is a sign of
|
126
|
+
# an unprpertly ordered file. All data that is gathered for the request until then is
|
127
|
+
# discarded, the next request is ignored as well and a :unclosed_request warning is
|
128
|
+
# emitted.
|
129
|
+
def update_current_request(request_data, &block)
|
130
|
+
if header_line?(request_data)
|
131
|
+
unless @current_request.nil?
|
132
|
+
if options[:assume_correct_order]
|
133
|
+
handle_request(@current_request, &block)
|
134
|
+
@current_request = RequestLogAnalyzer::Request.create(@file_format, request_data)
|
135
|
+
else
|
136
|
+
warn(:unclosed_request, "Encountered header line, but previous request was not closed!")
|
137
|
+
@current_request = nil # remove all data that was parsed, skip next request as well.
|
138
|
+
end
|
139
|
+
else
|
140
|
+
@current_request = RequestLogAnalyzer::Request.create(@file_format, request_data)
|
141
|
+
end
|
142
|
+
else
|
143
|
+
unless @current_request.nil?
|
144
|
+
@current_request << request_data
|
145
|
+
if footer_line?(request_data)
|
146
|
+
handle_request(@current_request, &block)
|
147
|
+
@current_request = nil
|
148
|
+
end
|
149
|
+
else
|
150
|
+
warn(:no_current_request, "Parsebale line found outside of a request!")
|
151
|
+
end
|
152
|
+
end
|
153
|
+
end
|
154
|
+
|
155
|
+
# Handles the parsed request by calling the request handler.
|
156
|
+
# The default controller will send the request to every running aggegator.
|
157
|
+
def handle_request(request, &block)
|
158
|
+
@parsed_requests += 1
|
159
|
+
accepted = block_given? ? yield(request) : true
|
160
|
+
@skipped_requests += 1 if !accepted
|
161
|
+
end
|
162
|
+
|
163
|
+
# Checks whether a given line hash is a header line.
|
164
|
+
def header_line?(hash)
|
165
|
+
file_format.line_definitions[hash[:line_type]].header
|
166
|
+
end
|
167
|
+
|
168
|
+
# Checks whether a given line hash is a footer line.
|
169
|
+
def footer_line?(hash)
|
170
|
+
file_format.line_definitions[hash[:line_type]].footer
|
171
|
+
end
|
172
|
+
end
|
173
|
+
end
|
@@ -0,0 +1,121 @@
|
|
1
|
+
module RequestLogAnalyzer
|
2
|
+
|
3
|
+
# The Logprocessor class is used to perform simple processing actions over log files.
|
4
|
+
# It will go over the log file/stream line by line, pass the line to a processor and
|
5
|
+
# write the result back to the output file or stream. The processor can alter the
|
6
|
+
# contents of the line, remain it intact or remove it altogether, based on the current
|
7
|
+
# file format
|
8
|
+
#
|
9
|
+
# Currently, two processors are supported, :strip and :anonymize.
|
10
|
+
# * :strip will remove all irrelevent lines (according to the file format) from the
|
11
|
+
# sources. A compact, information packed log will remain/.
|
12
|
+
# * :anonymize will anonymize sensitive information from the lines according to the
|
13
|
+
# anonymization rules in the file format. The result can be passed to third parties
|
14
|
+
# without privacy concerns.
|
15
|
+
#
|
16
|
+
class LogProcessor
|
17
|
+
|
18
|
+
include RequestLogAnalyzer::FileFormat::Awareness
|
19
|
+
|
20
|
+
attr_reader :mode, :options, :sources
|
21
|
+
attr_accessor :output_file
|
22
|
+
|
23
|
+
# Builds a logprocessor instance from the arguments given on the command line
|
24
|
+
# <tt>command</tt> The command hat was used to start the log processor. This can either be
|
25
|
+
# :strip or :anonymize. This will set the processing mode.
|
26
|
+
# <tt>arguments</tt> The parsed command line arguments (a CommandLine::Arguments instance)
|
27
|
+
def self.build(command, arguments)
|
28
|
+
|
29
|
+
options = {
|
30
|
+
:discard_teaser_lines => arguments[:discard_teaser_lines],
|
31
|
+
:keep_junk_lines => arguments[:keep_junk_lines],
|
32
|
+
}
|
33
|
+
|
34
|
+
log_processor = RequestLogAnalyzer::LogProcessor.new(arguments[:format].to_sym, command, options)
|
35
|
+
log_processor.output_file = arguments[:output] if arguments[:output]
|
36
|
+
|
37
|
+
arguments.parameters.each do |input|
|
38
|
+
log_processor.sources << input
|
39
|
+
end
|
40
|
+
|
41
|
+
return log_processor
|
42
|
+
end
|
43
|
+
|
44
|
+
# Initializes a new LogProcessor instance.
|
45
|
+
# <tt>format</tt> The file format to use (e.g. :rails).
|
46
|
+
# <tt>mode</tt> The processing mode (:anonymize or :strip)
|
47
|
+
# <tt>options</tt> A hash with options to take into account
|
48
|
+
def initialize(format, mode, options = {})
|
49
|
+
@options = options
|
50
|
+
@mode = mode
|
51
|
+
@sources = []
|
52
|
+
$output_file = nil
|
53
|
+
self.register_file_format(format)
|
54
|
+
end
|
55
|
+
|
56
|
+
# Processes input files by opening it and sending the filestream to <code>process_io</code>,
|
57
|
+
# in which the actual processing is performed.
|
58
|
+
# <tt>file</tt> The file to process
|
59
|
+
def process_file(file)
|
60
|
+
File.open(file, 'r') { |file| process_io(file) }
|
61
|
+
end
|
62
|
+
|
63
|
+
# Processes an input stream by iteration over each line and processing it according to
|
64
|
+
# the current operation mode (:strip, :anonymize)
|
65
|
+
# <tt>io</tt> The IO instance to process.
|
66
|
+
def process_io(io)
|
67
|
+
case mode
|
68
|
+
when :strip; io.each_line { |line| @output << strip_line(line) }
|
69
|
+
when :anonymize; io.each_line { |line| @output << anonymize_line(line) }
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
# Returns the line itself if the string matches any of the line definitions. If no match is
|
74
|
+
# found, an empty line is returned, which will strip the line from the output.
|
75
|
+
# <tt>line</tt> The line to strip
|
76
|
+
def strip_line(line)
|
77
|
+
file_format.line_definitions.any? { |name, definition| definition =~ line } ? line : ""
|
78
|
+
end
|
79
|
+
|
80
|
+
# Returns an anonymized version of the provided line. This can be a copy of the line it self,
|
81
|
+
# an empty string or a string in which some substrings are substituted for anonymized values.
|
82
|
+
# <tt>line</tt> The line to anonymize
|
83
|
+
def anonymize_line(line)
|
84
|
+
anonymized_line = nil
|
85
|
+
file_format.line_definitions.detect { |name, definition| anonymized_line = definition.anonymize(line, options) }
|
86
|
+
|
87
|
+
if anonymized_line
|
88
|
+
return anonymized_line
|
89
|
+
elsif options[:keep_junk_lines]
|
90
|
+
return line
|
91
|
+
else
|
92
|
+
return ""
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
96
|
+
# Runs the log processing by setting up the output stream and iterating over all the
|
97
|
+
# input sources. Input sources can either be filenames (String instances) or IO streams
|
98
|
+
# (IO instances). The strings "-" and "STDIN" will be substituted for the $stdin variable.
|
99
|
+
def run!
|
100
|
+
if @output_file.nil?
|
101
|
+
@output = $stdout
|
102
|
+
else
|
103
|
+
@output = File.new(@output_file, 'a')
|
104
|
+
end
|
105
|
+
|
106
|
+
@sources.each do |source|
|
107
|
+
if source.kind_of?(String) && File.exist?(source)
|
108
|
+
process_file(source)
|
109
|
+
elsif source.kind_of?(IO)
|
110
|
+
process_io(source)
|
111
|
+
elsif ['-', 'STDIN'].include?(source)
|
112
|
+
process_io($stdin)
|
113
|
+
end
|
114
|
+
end
|
115
|
+
|
116
|
+
ensure
|
117
|
+
@output.close if @output.kind_of?(File)
|
118
|
+
end
|
119
|
+
end
|
120
|
+
|
121
|
+
end
|