request-log-analyzer 1.0.2
Sign up to get free protection for your applications and to get access to all the features.
- data/DESIGN +14 -0
- data/HACKING +7 -0
- data/LICENSE +20 -0
- data/README.textile +36 -0
- data/Rakefile +5 -0
- data/bin/request-log-analyzer +123 -0
- data/lib/cli/bashcolorizer.rb +60 -0
- data/lib/cli/command_line_arguments.rb +301 -0
- data/lib/cli/progressbar.rb +236 -0
- data/lib/request_log_analyzer.rb +14 -0
- data/lib/request_log_analyzer/aggregator/base.rb +45 -0
- data/lib/request_log_analyzer/aggregator/database.rb +148 -0
- data/lib/request_log_analyzer/aggregator/echo.rb +25 -0
- data/lib/request_log_analyzer/aggregator/summarizer.rb +116 -0
- data/lib/request_log_analyzer/controller.rb +201 -0
- data/lib/request_log_analyzer/file_format.rb +81 -0
- data/lib/request_log_analyzer/file_format/merb.rb +33 -0
- data/lib/request_log_analyzer/file_format/rails.rb +90 -0
- data/lib/request_log_analyzer/filter/base.rb +29 -0
- data/lib/request_log_analyzer/filter/field.rb +36 -0
- data/lib/request_log_analyzer/filter/timespan.rb +32 -0
- data/lib/request_log_analyzer/line_definition.rb +159 -0
- data/lib/request_log_analyzer/log_parser.rb +173 -0
- data/lib/request_log_analyzer/log_processor.rb +121 -0
- data/lib/request_log_analyzer/request.rb +95 -0
- data/lib/request_log_analyzer/source/base.rb +42 -0
- data/lib/request_log_analyzer/source/log_file.rb +170 -0
- data/lib/request_log_analyzer/tracker/base.rb +54 -0
- data/lib/request_log_analyzer/tracker/category.rb +71 -0
- data/lib/request_log_analyzer/tracker/duration.rb +81 -0
- data/lib/request_log_analyzer/tracker/hourly_spread.rb +80 -0
- data/lib/request_log_analyzer/tracker/timespan.rb +54 -0
- data/spec/controller_spec.rb +40 -0
- data/spec/database_inserter_spec.rb +101 -0
- data/spec/file_format_spec.rb +78 -0
- data/spec/file_formats/spec_format.rb +26 -0
- data/spec/filter_spec.rb +137 -0
- data/spec/fixtures/merb.log +84 -0
- data/spec/fixtures/multiple_files_1.log +5 -0
- data/spec/fixtures/multiple_files_2.log +2 -0
- data/spec/fixtures/rails_1x.log +59 -0
- data/spec/fixtures/rails_22.log +12 -0
- data/spec/fixtures/rails_22_cached.log +10 -0
- data/spec/fixtures/rails_unordered.log +24 -0
- data/spec/fixtures/syslog_1x.log +5 -0
- data/spec/fixtures/test_file_format.log +13 -0
- data/spec/fixtures/test_language_combined.log +14 -0
- data/spec/fixtures/test_order.log +16 -0
- data/spec/line_definition_spec.rb +124 -0
- data/spec/log_parser_spec.rb +68 -0
- data/spec/log_processor_spec.rb +57 -0
- data/spec/merb_format_spec.rb +38 -0
- data/spec/rails_format_spec.rb +76 -0
- data/spec/request_spec.rb +72 -0
- data/spec/spec_helper.rb +67 -0
- data/spec/summarizer_spec.rb +9 -0
- data/tasks/github-gem.rake +177 -0
- data/tasks/request_log_analyzer.rake +10 -0
- data/tasks/rspec.rake +6 -0
- metadata +135 -0
@@ -0,0 +1,29 @@
|
|
1
|
+
module RequestLogAnalyzer
|
2
|
+
module Filter
|
3
|
+
# Base filter class used to filter input requests.
|
4
|
+
# All filters should interit from this base.
|
5
|
+
class Base
|
6
|
+
|
7
|
+
include RequestLogAnalyzer::FileFormat::Awareness
|
8
|
+
|
9
|
+
attr_reader :log_parser
|
10
|
+
attr_reader :options
|
11
|
+
|
12
|
+
# Initializer
|
13
|
+
# <tt>format</tt> The file format
|
14
|
+
# <tt>options</tt> Are passed to the filters.
|
15
|
+
def initialize(format, options = {})
|
16
|
+
@options = options
|
17
|
+
register_file_format(format)
|
18
|
+
end
|
19
|
+
|
20
|
+
def prepare
|
21
|
+
end
|
22
|
+
|
23
|
+
def filter(request)
|
24
|
+
return nil unless request
|
25
|
+
return request
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
@@ -0,0 +1,36 @@
|
|
1
|
+
module RequestLogAnalyzer::Filter
|
2
|
+
|
3
|
+
# Filter to select or reject a specific field
|
4
|
+
# Options
|
5
|
+
# * <tt>:mode</tt> :reject or :accept.
|
6
|
+
# * <tt>:field</tt> Specific field to accept or reject.
|
7
|
+
# * <tt>:value</tt> Value that the field should match to be accepted or rejected.
|
8
|
+
class Field < Base
|
9
|
+
|
10
|
+
attr_reader :field, :value, :mode
|
11
|
+
|
12
|
+
def prepare
|
13
|
+
@mode = (@options[:mode] || :accept).to_sym
|
14
|
+
@field = @options[:field].to_sym
|
15
|
+
|
16
|
+
# Convert the timestamp to the correct formats for quick timestamp comparisons
|
17
|
+
if @options[:value].kind_of?(String) && @options[:value][0, 1] == '/' && @options[:value][-1, 1] == '/'
|
18
|
+
@value = Regexp.new(@options[:value][1..-2])
|
19
|
+
else
|
20
|
+
@value = @options[:value] # TODO: convert value?
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
def filter(request)
|
25
|
+
return nil unless request
|
26
|
+
|
27
|
+
found_field = request.every(@field).any? { |value| @value === value }
|
28
|
+
|
29
|
+
return nil if !found_field && @mode == :select
|
30
|
+
return nil if found_field && @mode == :reject
|
31
|
+
|
32
|
+
return request
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
end
|
@@ -0,0 +1,32 @@
|
|
1
|
+
module RequestLogAnalyzer::Filter
|
2
|
+
|
3
|
+
# Reject all requests not in given timespan
|
4
|
+
# Options
|
5
|
+
# * <tt>:after</tt> Only keep requests after this DateTime.
|
6
|
+
# * <tt>:before</tt> Only keep requests before this DateTime.
|
7
|
+
class Timespan < Base
|
8
|
+
|
9
|
+
attr_reader :before, :after
|
10
|
+
|
11
|
+
def prepare
|
12
|
+
# Convert the timestamp to the correct formats for quick timestamp comparisons
|
13
|
+
@after = @options[:after].strftime('%Y%m%d%H%M%S').to_i if options[:after]
|
14
|
+
@before = @options[:before].strftime('%Y%m%d%H%M%S').to_i if options[:before]
|
15
|
+
end
|
16
|
+
|
17
|
+
def filter(request)
|
18
|
+
return nil unless request
|
19
|
+
|
20
|
+
if @after && @before && request.timestamp <= @before && @after <= request.timestamp
|
21
|
+
return request
|
22
|
+
elsif @after && @before.nil? && @after <= request.timestamp
|
23
|
+
return request
|
24
|
+
elsif @before && @after.nil? && request.timestamp <= @before
|
25
|
+
return request
|
26
|
+
end
|
27
|
+
|
28
|
+
return nil
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
end
|
@@ -0,0 +1,159 @@
|
|
1
|
+
module RequestLogAnalyzer
|
2
|
+
|
3
|
+
module Anonymizers
|
4
|
+
def anonymizer_for_ip(value, capture_definition)
|
5
|
+
'127.0.0.1'
|
6
|
+
end
|
7
|
+
|
8
|
+
def anonymizer_for_url(value, capture_definition)
|
9
|
+
value.sub(/^https?\:\/\/[A-z0-9\.-]+\//, "http://example.com/")
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
# The line definition class is used to specify what lines should be parsed from the log file.
|
14
|
+
# It contains functionality to match a line against the definition and parse the information
|
15
|
+
# from this line. This is used by the LogParser class when parsing a log file..
|
16
|
+
class LineDefinition
|
17
|
+
|
18
|
+
include RequestLogAnalyzer::Anonymizers
|
19
|
+
|
20
|
+
class Definer
|
21
|
+
|
22
|
+
attr_accessor :line_definitions
|
23
|
+
|
24
|
+
def initialize
|
25
|
+
@line_definitions = {}
|
26
|
+
end
|
27
|
+
|
28
|
+
def method_missing(name, *args, &block)
|
29
|
+
if block_given?
|
30
|
+
@line_definitions[name] = RequestLogAnalyzer::LineDefinition.define(name, &block)
|
31
|
+
else
|
32
|
+
@line_definitions[name] = RequestLogAnalyzer::LineDefinition.new(name, args.first)
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
attr_reader :name
|
38
|
+
attr_accessor :teaser, :regexp, :captures
|
39
|
+
attr_accessor :header, :footer
|
40
|
+
|
41
|
+
# Initializes the LineDefinition instance with a hash containing the different elements of
|
42
|
+
# the definition.
|
43
|
+
def initialize(name, definition = {})
|
44
|
+
@name = name
|
45
|
+
@captures = []
|
46
|
+
definition.each { |key, value| self.send("#{key.to_s}=".to_sym, value) }
|
47
|
+
end
|
48
|
+
|
49
|
+
def self.define(name, &block)
|
50
|
+
definition = self.new(name)
|
51
|
+
yield(definition) if block_given?
|
52
|
+
return definition
|
53
|
+
end
|
54
|
+
|
55
|
+
# Converts a parsed value (String) to the desired value using some heuristics.
|
56
|
+
def convert_value(value, type)
|
57
|
+
case type
|
58
|
+
when :integer; value.to_i
|
59
|
+
when :float; value.to_f
|
60
|
+
when :decimal; value.to_f
|
61
|
+
when :symbol; value.to_sym
|
62
|
+
when :sec; value.to_f
|
63
|
+
when :msec; value.to_f / 1000
|
64
|
+
when :timestamp; value.gsub(/[^0-9]/,'')[0..13].to_i # Retrieve with: DateTime.parse(value, '%Y%m%d%H%M%S')
|
65
|
+
else value
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
# Checks whether a given line matches this definition.
|
70
|
+
# It will return false if a line does not match. If the line matches, a hash is returned
|
71
|
+
# with all the fields parsed from that line as content.
|
72
|
+
# If the line definition has a teaser-check, a :teaser_check_failed warning will be emitted
|
73
|
+
# if this teaser-check is passed, but the full regular exprssion does not ,atch.
|
74
|
+
def matches(line, lineno = nil, parser = nil)
|
75
|
+
if @teaser.nil? || @teaser =~ line
|
76
|
+
if match_data = line.match(@regexp)
|
77
|
+
request_info = { :line_type => name, :lineno => lineno }
|
78
|
+
|
79
|
+
captures.each_with_index do |capture, index|
|
80
|
+
next if capture == :ignore
|
81
|
+
|
82
|
+
if match_data.captures[index]
|
83
|
+
request_info[capture[:name]] = convert_value(match_data.captures[index], capture[:type])
|
84
|
+
end
|
85
|
+
|
86
|
+
end
|
87
|
+
return request_info
|
88
|
+
else
|
89
|
+
if @teaser && parser
|
90
|
+
parser.warn(:teaser_check_failed, "Teaser matched for #{name.inspect}, but full line did not:\n#{line.inspect}")
|
91
|
+
end
|
92
|
+
return false
|
93
|
+
end
|
94
|
+
else
|
95
|
+
return false
|
96
|
+
end
|
97
|
+
end
|
98
|
+
|
99
|
+
alias :=~ :matches
|
100
|
+
|
101
|
+
def anonymize_value(value, capture_definition)
|
102
|
+
if capture_definition[:anonymize].respond_to?(:call)
|
103
|
+
capture_definition[:anonymize].call(value, capture_definition)
|
104
|
+
else
|
105
|
+
case capture_definition[:anonymize]
|
106
|
+
when nil; value
|
107
|
+
when false; value
|
108
|
+
when true; '***'
|
109
|
+
when :slightly; anonymize_slightly(value, capture_definition)
|
110
|
+
else
|
111
|
+
method_name = "anonymizer_for_#{capture_definition[:anonymize]}".to_sym
|
112
|
+
self.respond_to?(method_name) ? self.send(method_name, value, capture_definition) : '***'
|
113
|
+
end
|
114
|
+
end
|
115
|
+
end
|
116
|
+
|
117
|
+
def anonymize_slightly(value, capture_definition)
|
118
|
+
case capture_definition[:type]
|
119
|
+
when :integer
|
120
|
+
(value.to_i * (0.8 + rand * 0.4)).to_i
|
121
|
+
when :double
|
122
|
+
(value.to_f * (0.8 + rand * 0.4)).to_f
|
123
|
+
when :msec
|
124
|
+
(value.to_i * (0.8 + rand * 0.4)).to_i
|
125
|
+
when :sec
|
126
|
+
(value.to_f * (0.8 + rand * 0.4)).to_f
|
127
|
+
when :timestamp
|
128
|
+
(DateTime.parse(value) + (rand(100) - 50)).to_s
|
129
|
+
else
|
130
|
+
puts "Cannot anonymize #{capture_definition[:type].inspect} slightly, using ***"
|
131
|
+
'***'
|
132
|
+
end
|
133
|
+
end
|
134
|
+
|
135
|
+
# Anonymize a log line
|
136
|
+
def anonymize(line, options = {})
|
137
|
+
if self.teaser.nil? || self.teaser =~ line
|
138
|
+
if self.regexp =~ line
|
139
|
+
pos_adjustment = 0
|
140
|
+
captures.each_with_index do |capture, index|
|
141
|
+
unless $~[index + 1].nil?
|
142
|
+
anonymized_value = anonymize_value($~[index + 1], capture).to_s
|
143
|
+
line[($~.begin(index + 1) + pos_adjustment)...($~.end(index + 1) + pos_adjustment)] = anonymized_value
|
144
|
+
pos_adjustment += anonymized_value.length - $~[index + 1].length
|
145
|
+
end
|
146
|
+
end
|
147
|
+
line
|
148
|
+
elsif self.teaser.nil?
|
149
|
+
nil
|
150
|
+
else
|
151
|
+
options[:discard_teaser_lines] ? "" : line
|
152
|
+
end
|
153
|
+
else
|
154
|
+
nil
|
155
|
+
end
|
156
|
+
end
|
157
|
+
end
|
158
|
+
|
159
|
+
end
|
@@ -0,0 +1,173 @@
|
|
1
|
+
module RequestLogAnalyzer
|
2
|
+
|
3
|
+
# The LogParser class reads log data from a given source and uses a file format definition
|
4
|
+
# to parse all relevent information about requests from the file. A FileFormat module should
|
5
|
+
# be provided that contains the definitions of the lines that occur in the log data.
|
6
|
+
#
|
7
|
+
# De order in which lines occur is used to combine lines to a single request. If these lines
|
8
|
+
# are mixed, requests cannot be combined properly. This can be the case if data is written to
|
9
|
+
# the log file simultaneously by different mongrel processes. This problem is detected by the
|
10
|
+
# parser, but the requests that are mixed up cannot be parsed. It will emit warnings when this
|
11
|
+
# occurs.
|
12
|
+
class LogParser
|
13
|
+
|
14
|
+
include RequestLogAnalyzer::FileFormat::Awareness
|
15
|
+
|
16
|
+
# A hash of options
|
17
|
+
attr_reader :options
|
18
|
+
|
19
|
+
# The current Request object that is being parsed
|
20
|
+
attr_reader :current_request
|
21
|
+
|
22
|
+
# The total number of parsed lines
|
23
|
+
attr_reader :parsed_lines
|
24
|
+
|
25
|
+
# The total number of parsed requests.
|
26
|
+
attr_reader :parsed_requests
|
27
|
+
|
28
|
+
# The number of skipped requests because of date constraints
|
29
|
+
attr_reader :skipped_requests
|
30
|
+
|
31
|
+
# Initializes the parser instance.
|
32
|
+
# It will apply the language specific FileFormat module to this instance. It will use the line
|
33
|
+
# definitions in this module to parse any input.
|
34
|
+
def initialize(format, options = {})
|
35
|
+
@line_definitions = {}
|
36
|
+
@options = options
|
37
|
+
@parsed_lines = 0
|
38
|
+
@parsed_requests = 0
|
39
|
+
@skipped_requests = 0
|
40
|
+
|
41
|
+
@current_io = nil
|
42
|
+
|
43
|
+
# install the file format module (see RequestLogAnalyzer::FileFormat)
|
44
|
+
# and register all the line definitions to the parser
|
45
|
+
self.register_file_format(format)
|
46
|
+
end
|
47
|
+
|
48
|
+
# Parses a list of consequent files of the same format
|
49
|
+
def parse_files(files, options = {}, &block)
|
50
|
+
files.each { |file| parse_file(file, options, &block) }
|
51
|
+
end
|
52
|
+
|
53
|
+
# Parses a file.
|
54
|
+
# Creates an IO stream for the provided file, and sends it to parse_io for further handling
|
55
|
+
def parse_file(file, options = {}, &block)
|
56
|
+
@progress_handler.call(:started, file) if @progress_handler
|
57
|
+
File.open(file, 'r') { |f| parse_io(f, options, &block) }
|
58
|
+
@progress_handler.call(:finished, file) if @progress_handler
|
59
|
+
end
|
60
|
+
|
61
|
+
def parse_stream(stream, options = {}, &block)
|
62
|
+
parse_io(stream, options, &block)
|
63
|
+
end
|
64
|
+
|
65
|
+
# Finds a log line and then parses the information in the line.
|
66
|
+
# Yields a hash containing the information found.
|
67
|
+
# <tt>*line_types</tt> The log line types to look for (defaults to LOG_LINES.keys).
|
68
|
+
# Yeilds a Hash when it encounters a chunk of information.
|
69
|
+
def parse_io(io, options = {}, &block)
|
70
|
+
|
71
|
+
# parse every line type by default
|
72
|
+
line_types = options[:line_types] || file_format.line_definitions.keys
|
73
|
+
|
74
|
+
# check whether all provided line types are valid
|
75
|
+
unknown = line_types.reject { |line_type| file_format.line_definitions.has_key?(line_type) }
|
76
|
+
raise "Unknown line types: #{unknown.join(', ')}" unless unknown.empty?
|
77
|
+
|
78
|
+
@current_io = io
|
79
|
+
@current_io.each_line do |line|
|
80
|
+
|
81
|
+
@progress_handler.call(:progress, @current_io.pos) if @progress_handler && @current_io.kind_of?(File)
|
82
|
+
|
83
|
+
request_data = nil
|
84
|
+
line_types.each do |line_type|
|
85
|
+
line_type_definition = file_format.line_definitions[line_type]
|
86
|
+
break if request_data = line_type_definition.matches(line, @current_io.lineno, self)
|
87
|
+
end
|
88
|
+
|
89
|
+
if request_data
|
90
|
+
@parsed_lines += 1
|
91
|
+
update_current_request(request_data, &block)
|
92
|
+
end
|
93
|
+
end
|
94
|
+
|
95
|
+
warn(:unfinished_request_on_eof, "End of file reached, but last request was not completed!") unless @current_request.nil?
|
96
|
+
|
97
|
+
@current_io = nil
|
98
|
+
end
|
99
|
+
|
100
|
+
# Add a block to this method to install a progress handler while parsing
|
101
|
+
def progress=(proc)
|
102
|
+
@progress_handler = proc
|
103
|
+
end
|
104
|
+
|
105
|
+
# Add a block to this method to install a warning handler while parsing
|
106
|
+
def warning=(proc)
|
107
|
+
@warning_handler = proc
|
108
|
+
end
|
109
|
+
|
110
|
+
# This method is called by the parser if it encounteres any problems.
|
111
|
+
# It will call the warning handler. The default controller will pass all warnings to every
|
112
|
+
# aggregator that is registered and running
|
113
|
+
def warn(type, message)
|
114
|
+
@warning_handler.call(type, message, @current_io.lineno) if @warning_handler
|
115
|
+
end
|
116
|
+
|
117
|
+
protected
|
118
|
+
|
119
|
+
# Combines the different lines of a request into a single Request object. It will start a
|
120
|
+
# new request when a header line is encountered en will emit the request when a footer line
|
121
|
+
# is encountered.
|
122
|
+
#
|
123
|
+
# - Every line that is parsed before a header line is ignored as it cannot be included in
|
124
|
+
# any request. It will emit a :no_current_request warning.
|
125
|
+
# - A header line that is parsed before a request is closed by a footer line, is a sign of
|
126
|
+
# an unprpertly ordered file. All data that is gathered for the request until then is
|
127
|
+
# discarded, the next request is ignored as well and a :unclosed_request warning is
|
128
|
+
# emitted.
|
129
|
+
def update_current_request(request_data, &block)
|
130
|
+
if header_line?(request_data)
|
131
|
+
unless @current_request.nil?
|
132
|
+
if options[:assume_correct_order]
|
133
|
+
handle_request(@current_request, &block)
|
134
|
+
@current_request = RequestLogAnalyzer::Request.create(@file_format, request_data)
|
135
|
+
else
|
136
|
+
warn(:unclosed_request, "Encountered header line, but previous request was not closed!")
|
137
|
+
@current_request = nil # remove all data that was parsed, skip next request as well.
|
138
|
+
end
|
139
|
+
else
|
140
|
+
@current_request = RequestLogAnalyzer::Request.create(@file_format, request_data)
|
141
|
+
end
|
142
|
+
else
|
143
|
+
unless @current_request.nil?
|
144
|
+
@current_request << request_data
|
145
|
+
if footer_line?(request_data)
|
146
|
+
handle_request(@current_request, &block)
|
147
|
+
@current_request = nil
|
148
|
+
end
|
149
|
+
else
|
150
|
+
warn(:no_current_request, "Parsebale line found outside of a request!")
|
151
|
+
end
|
152
|
+
end
|
153
|
+
end
|
154
|
+
|
155
|
+
# Handles the parsed request by calling the request handler.
|
156
|
+
# The default controller will send the request to every running aggegator.
|
157
|
+
def handle_request(request, &block)
|
158
|
+
@parsed_requests += 1
|
159
|
+
accepted = block_given? ? yield(request) : true
|
160
|
+
@skipped_requests += 1 if !accepted
|
161
|
+
end
|
162
|
+
|
163
|
+
# Checks whether a given line hash is a header line.
|
164
|
+
def header_line?(hash)
|
165
|
+
file_format.line_definitions[hash[:line_type]].header
|
166
|
+
end
|
167
|
+
|
168
|
+
# Checks whether a given line hash is a footer line.
|
169
|
+
def footer_line?(hash)
|
170
|
+
file_format.line_definitions[hash[:line_type]].footer
|
171
|
+
end
|
172
|
+
end
|
173
|
+
end
|
@@ -0,0 +1,121 @@
|
|
1
|
+
module RequestLogAnalyzer
|
2
|
+
|
3
|
+
# The Logprocessor class is used to perform simple processing actions over log files.
|
4
|
+
# It will go over the log file/stream line by line, pass the line to a processor and
|
5
|
+
# write the result back to the output file or stream. The processor can alter the
|
6
|
+
# contents of the line, remain it intact or remove it altogether, based on the current
|
7
|
+
# file format
|
8
|
+
#
|
9
|
+
# Currently, two processors are supported, :strip and :anonymize.
|
10
|
+
# * :strip will remove all irrelevent lines (according to the file format) from the
|
11
|
+
# sources. A compact, information packed log will remain/.
|
12
|
+
# * :anonymize will anonymize sensitive information from the lines according to the
|
13
|
+
# anonymization rules in the file format. The result can be passed to third parties
|
14
|
+
# without privacy concerns.
|
15
|
+
#
|
16
|
+
class LogProcessor
|
17
|
+
|
18
|
+
include RequestLogAnalyzer::FileFormat::Awareness
|
19
|
+
|
20
|
+
attr_reader :mode, :options, :sources
|
21
|
+
attr_accessor :output_file
|
22
|
+
|
23
|
+
# Builds a logprocessor instance from the arguments given on the command line
|
24
|
+
# <tt>command</tt> The command hat was used to start the log processor. This can either be
|
25
|
+
# :strip or :anonymize. This will set the processing mode.
|
26
|
+
# <tt>arguments</tt> The parsed command line arguments (a CommandLine::Arguments instance)
|
27
|
+
def self.build(command, arguments)
|
28
|
+
|
29
|
+
options = {
|
30
|
+
:discard_teaser_lines => arguments[:discard_teaser_lines],
|
31
|
+
:keep_junk_lines => arguments[:keep_junk_lines],
|
32
|
+
}
|
33
|
+
|
34
|
+
log_processor = RequestLogAnalyzer::LogProcessor.new(arguments[:format].to_sym, command, options)
|
35
|
+
log_processor.output_file = arguments[:output] if arguments[:output]
|
36
|
+
|
37
|
+
arguments.parameters.each do |input|
|
38
|
+
log_processor.sources << input
|
39
|
+
end
|
40
|
+
|
41
|
+
return log_processor
|
42
|
+
end
|
43
|
+
|
44
|
+
# Initializes a new LogProcessor instance.
|
45
|
+
# <tt>format</tt> The file format to use (e.g. :rails).
|
46
|
+
# <tt>mode</tt> The processing mode (:anonymize or :strip)
|
47
|
+
# <tt>options</tt> A hash with options to take into account
|
48
|
+
def initialize(format, mode, options = {})
|
49
|
+
@options = options
|
50
|
+
@mode = mode
|
51
|
+
@sources = []
|
52
|
+
$output_file = nil
|
53
|
+
self.register_file_format(format)
|
54
|
+
end
|
55
|
+
|
56
|
+
# Processes input files by opening it and sending the filestream to <code>process_io</code>,
|
57
|
+
# in which the actual processing is performed.
|
58
|
+
# <tt>file</tt> The file to process
|
59
|
+
def process_file(file)
|
60
|
+
File.open(file, 'r') { |file| process_io(file) }
|
61
|
+
end
|
62
|
+
|
63
|
+
# Processes an input stream by iteration over each line and processing it according to
|
64
|
+
# the current operation mode (:strip, :anonymize)
|
65
|
+
# <tt>io</tt> The IO instance to process.
|
66
|
+
def process_io(io)
|
67
|
+
case mode
|
68
|
+
when :strip; io.each_line { |line| @output << strip_line(line) }
|
69
|
+
when :anonymize; io.each_line { |line| @output << anonymize_line(line) }
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
# Returns the line itself if the string matches any of the line definitions. If no match is
|
74
|
+
# found, an empty line is returned, which will strip the line from the output.
|
75
|
+
# <tt>line</tt> The line to strip
|
76
|
+
def strip_line(line)
|
77
|
+
file_format.line_definitions.any? { |name, definition| definition =~ line } ? line : ""
|
78
|
+
end
|
79
|
+
|
80
|
+
# Returns an anonymized version of the provided line. This can be a copy of the line it self,
|
81
|
+
# an empty string or a string in which some substrings are substituted for anonymized values.
|
82
|
+
# <tt>line</tt> The line to anonymize
|
83
|
+
def anonymize_line(line)
|
84
|
+
anonymized_line = nil
|
85
|
+
file_format.line_definitions.detect { |name, definition| anonymized_line = definition.anonymize(line, options) }
|
86
|
+
|
87
|
+
if anonymized_line
|
88
|
+
return anonymized_line
|
89
|
+
elsif options[:keep_junk_lines]
|
90
|
+
return line
|
91
|
+
else
|
92
|
+
return ""
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
96
|
+
# Runs the log processing by setting up the output stream and iterating over all the
|
97
|
+
# input sources. Input sources can either be filenames (String instances) or IO streams
|
98
|
+
# (IO instances). The strings "-" and "STDIN" will be substituted for the $stdin variable.
|
99
|
+
def run!
|
100
|
+
if @output_file.nil?
|
101
|
+
@output = $stdout
|
102
|
+
else
|
103
|
+
@output = File.new(@output_file, 'a')
|
104
|
+
end
|
105
|
+
|
106
|
+
@sources.each do |source|
|
107
|
+
if source.kind_of?(String) && File.exist?(source)
|
108
|
+
process_file(source)
|
109
|
+
elsif source.kind_of?(IO)
|
110
|
+
process_io(source)
|
111
|
+
elsif ['-', 'STDIN'].include?(source)
|
112
|
+
process_io($stdin)
|
113
|
+
end
|
114
|
+
end
|
115
|
+
|
116
|
+
ensure
|
117
|
+
@output.close if @output.kind_of?(File)
|
118
|
+
end
|
119
|
+
end
|
120
|
+
|
121
|
+
end
|