wvanbergen-request-log-analyzer 1.0.0 → 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/DESIGN +14 -0
- data/HACKING +7 -0
- data/README.textile +9 -98
- data/Rakefile +2 -2
- data/bin/request-log-analyzer +1 -1
- data/lib/cli/bashcolorizer.rb +60 -0
- data/lib/cli/command_line_arguments.rb +301 -0
- data/lib/cli/progressbar.rb +236 -0
- data/lib/request_log_analyzer/aggregator/base.rb +51 -0
- data/lib/request_log_analyzer/aggregator/database.rb +97 -0
- data/lib/request_log_analyzer/aggregator/echo.rb +25 -0
- data/lib/request_log_analyzer/aggregator/summarizer.rb +116 -0
- data/lib/request_log_analyzer/controller.rb +206 -0
- data/lib/request_log_analyzer/file_format/merb.rb +33 -0
- data/lib/request_log_analyzer/file_format/rails.rb +119 -0
- data/lib/request_log_analyzer/file_format.rb +77 -0
- data/lib/request_log_analyzer/filter/base.rb +29 -0
- data/lib/request_log_analyzer/filter/field.rb +36 -0
- data/lib/request_log_analyzer/filter/timespan.rb +32 -0
- data/lib/request_log_analyzer/line_definition.rb +159 -0
- data/lib/request_log_analyzer/log_parser.rb +183 -0
- data/lib/request_log_analyzer/log_processor.rb +121 -0
- data/lib/request_log_analyzer/request.rb +115 -0
- data/lib/request_log_analyzer/source/base.rb +42 -0
- data/lib/request_log_analyzer/source/log_file.rb +180 -0
- data/lib/request_log_analyzer/tracker/base.rb +54 -0
- data/lib/request_log_analyzer/tracker/category.rb +71 -0
- data/lib/request_log_analyzer/tracker/duration.rb +81 -0
- data/lib/request_log_analyzer/tracker/hourly_spread.rb +80 -0
- data/lib/request_log_analyzer/tracker/timespan.rb +54 -0
- data/spec/file_format_spec.rb +78 -0
- data/spec/file_formats/spec_format.rb +26 -0
- data/spec/filter_spec.rb +137 -0
- data/spec/log_processor_spec.rb +57 -0
- data/tasks/rspec.rake +6 -0
- metadata +53 -55
- data/TODO +0 -58
- data/bin/request-log-database +0 -81
- data/lib/base/log_parser.rb +0 -78
- data/lib/base/record_inserter.rb +0 -139
- data/lib/command_line/arguments.rb +0 -129
- data/lib/command_line/flag.rb +0 -51
- data/lib/merb_analyzer/log_parser.rb +0 -26
- data/lib/rails_analyzer/log_parser.rb +0 -35
- data/lib/rails_analyzer/record_inserter.rb +0 -39
- data/tasks/test.rake +0 -8
- data/test/log_fragments/fragment_1.log +0 -59
- data/test/log_fragments/fragment_2.log +0 -5
- data/test/log_fragments/fragment_3.log +0 -12
- data/test/log_fragments/fragment_4.log +0 -10
- data/test/log_fragments/fragment_5.log +0 -24
- data/test/log_fragments/merb_1.log +0 -84
- data/test/merb_log_parser_test.rb +0 -39
- data/test/rails_log_parser_test.rb +0 -94
- data/test/record_inserter_test.rb +0 -45
@@ -0,0 +1,159 @@
|
|
1
|
+
module RequestLogAnalyzer
|
2
|
+
|
3
|
+
module Anonymizers
|
4
|
+
def anonymizer_for_ip(value, capture_definition)
|
5
|
+
'127.0.0.1'
|
6
|
+
end
|
7
|
+
|
8
|
+
def anonymizer_for_url(value, capture_definition)
|
9
|
+
value.sub(/^https?\:\/\/[A-z0-9\.-]+\//, "http://example.com/")
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
# The line definition class is used to specify what lines should be parsed from the log file.
|
14
|
+
# It contains functionality to match a line against the definition and parse the information
|
15
|
+
# from this line. This is used by the LogParser class when parsing a log file..
|
16
|
+
class LineDefinition
|
17
|
+
|
18
|
+
include RequestLogAnalyzer::Anonymizers
|
19
|
+
|
20
|
+
class Definer
|
21
|
+
|
22
|
+
attr_accessor :line_definitions
|
23
|
+
|
24
|
+
def initialize
|
25
|
+
@line_definitions = {}
|
26
|
+
end
|
27
|
+
|
28
|
+
def method_missing(name, *args, &block)
|
29
|
+
if block_given?
|
30
|
+
@line_definitions[name] = RequestLogAnalyzer::LineDefinition.define(name, &block)
|
31
|
+
else
|
32
|
+
@line_definitions[name] = RequestLogAnalyzer::LineDefinition.new(name, args.first)
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
attr_reader :name
|
38
|
+
attr_accessor :teaser, :regexp, :captures
|
39
|
+
attr_accessor :header, :footer
|
40
|
+
|
41
|
+
# Initializes the LineDefinition instance with a hash containing the different elements of
|
42
|
+
# the definition.
|
43
|
+
def initialize(name, definition = {})
|
44
|
+
@name = name
|
45
|
+
@captures = []
|
46
|
+
definition.each { |key, value| self.send("#{key.to_s}=".to_sym, value) }
|
47
|
+
end
|
48
|
+
|
49
|
+
def self.define(name, &block)
|
50
|
+
definition = self.new(name)
|
51
|
+
yield(definition) if block_given?
|
52
|
+
return definition
|
53
|
+
end
|
54
|
+
|
55
|
+
# Converts a parsed value (String) to the desired value using some heuristics.
|
56
|
+
def convert_value(value, type)
|
57
|
+
case type
|
58
|
+
when :integer; value.to_i
|
59
|
+
when :float; value.to_f
|
60
|
+
when :decimal; value.to_f
|
61
|
+
when :symbol; value.to_sym
|
62
|
+
when :sec; value.to_f
|
63
|
+
when :msec; value.to_f / 1000
|
64
|
+
when :timestamp; value.gsub(/[^0-9]/,'')[0..13].to_i # Retrieve with: DateTime.parse(value, '%Y%m%d%H%M%S')
|
65
|
+
else value
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
# Checks whether a given line matches this definition.
|
70
|
+
# It will return false if a line does not match. If the line matches, a hash is returned
|
71
|
+
# with all the fields parsed from that line as content.
|
72
|
+
# If the line definition has a teaser-check, a :teaser_check_failed warning will be emitted
|
73
|
+
# if this teaser-check is passed, but the full regular exprssion does not ,atch.
|
74
|
+
def matches(line, lineno = nil, parser = nil)
|
75
|
+
if @teaser.nil? || @teaser =~ line
|
76
|
+
if match_data = line.match(@regexp)
|
77
|
+
request_info = { :line_type => name, :lineno => lineno }
|
78
|
+
|
79
|
+
captures.each_with_index do |capture, index|
|
80
|
+
next if capture == :ignore
|
81
|
+
|
82
|
+
if match_data.captures[index]
|
83
|
+
request_info[capture[:name]] = convert_value(match_data.captures[index], capture[:type])
|
84
|
+
end
|
85
|
+
|
86
|
+
end
|
87
|
+
return request_info
|
88
|
+
else
|
89
|
+
if @teaser && parser
|
90
|
+
parser.warn(:teaser_check_failed, "Teaser matched for #{name.inspect}, but full line did not:\n#{line.inspect}")
|
91
|
+
end
|
92
|
+
return false
|
93
|
+
end
|
94
|
+
else
|
95
|
+
return false
|
96
|
+
end
|
97
|
+
end
|
98
|
+
|
99
|
+
alias :=~ :matches
|
100
|
+
|
101
|
+
def anonymize_value(value, capture_definition)
|
102
|
+
if capture_definition[:anonymize].respond_to?(:call)
|
103
|
+
capture_definition[:anonymize].call(value, capture_definition)
|
104
|
+
else
|
105
|
+
case capture_definition[:anonymize]
|
106
|
+
when nil; value
|
107
|
+
when false; value
|
108
|
+
when true; '***'
|
109
|
+
when :slightly; anonymize_slightly(value, capture_definition)
|
110
|
+
else
|
111
|
+
method_name = "anonymizer_for_#{capture_definition[:anonymize]}".to_sym
|
112
|
+
self.respond_to?(method_name) ? self.send(method_name, value, capture_definition) : '***'
|
113
|
+
end
|
114
|
+
end
|
115
|
+
end
|
116
|
+
|
117
|
+
def anonymize_slightly(value, capture_definition)
|
118
|
+
case capture_definition[:type]
|
119
|
+
when :integer
|
120
|
+
(value.to_i * (0.8 + rand * 0.4)).to_i
|
121
|
+
when :double
|
122
|
+
(value.to_f * (0.8 + rand * 0.4)).to_f
|
123
|
+
when :msec
|
124
|
+
(value.to_i * (0.8 + rand * 0.4)).to_i
|
125
|
+
when :sec
|
126
|
+
(value.to_f * (0.8 + rand * 0.4)).to_f
|
127
|
+
when :timestamp
|
128
|
+
(DateTime.parse(value) + (rand(100) - 50)).to_s
|
129
|
+
else
|
130
|
+
puts "Cannot anonymize #{capture_definition[:type].inspect} slightly, using ***"
|
131
|
+
'***'
|
132
|
+
end
|
133
|
+
end
|
134
|
+
|
135
|
+
# Anonymize a log line
|
136
|
+
def anonymize(line, options = {})
|
137
|
+
if self.teaser.nil? || self.teaser =~ line
|
138
|
+
if self.regexp =~ line
|
139
|
+
pos_adjustment = 0
|
140
|
+
captures.each_with_index do |capture, index|
|
141
|
+
unless $~[index + 1].nil?
|
142
|
+
anonymized_value = anonymize_value($~[index + 1], capture).to_s
|
143
|
+
line[($~.begin(index + 1) + pos_adjustment)...($~.end(index + 1) + pos_adjustment)] = anonymized_value
|
144
|
+
pos_adjustment += anonymized_value.length - $~[index + 1].length
|
145
|
+
end
|
146
|
+
end
|
147
|
+
line
|
148
|
+
elsif self.teaser.nil?
|
149
|
+
nil
|
150
|
+
else
|
151
|
+
options[:discard_teaser_lines] ? "" : line
|
152
|
+
end
|
153
|
+
else
|
154
|
+
nil
|
155
|
+
end
|
156
|
+
end
|
157
|
+
end
|
158
|
+
|
159
|
+
end
|
@@ -0,0 +1,183 @@
|
|
1
|
+
module RequestLogAnalyzer
|
2
|
+
|
3
|
+
# The LogParser class reads log data from a given source and uses a file format definition
|
4
|
+
# to parse all relevent information about requests from the file.
|
5
|
+
#
|
6
|
+
# A FileFormat module should be provided that contains the definitions of the lines that
|
7
|
+
# occur in the log data. The log parser can run in two modes:
|
8
|
+
# - In single line mode, it will emit every detected line as a separate request
|
9
|
+
# - In combined requests mode, it will combine the different lines from the line defintions
|
10
|
+
# into one request, that will then be emitted.
|
11
|
+
#
|
12
|
+
# The combined requests mode gives better information, but can be problematic if the log
|
13
|
+
# file is unordered. This can be the case if data is written to the log file simultaneously
|
14
|
+
# by different mongrel processes. This problem is detected by the parser, but the requests
|
15
|
+
# that are mixed up cannot be parsed. It will emit warnings when this occurs.
|
16
|
+
class LogParser
|
17
|
+
|
18
|
+
include RequestLogAnalyzer::FileFormat::Awareness
|
19
|
+
|
20
|
+
# A hash of options
|
21
|
+
attr_reader :options
|
22
|
+
|
23
|
+
# The current Request object that is being parsed
|
24
|
+
attr_reader :current_request
|
25
|
+
|
26
|
+
# The total number of parsed lines
|
27
|
+
attr_reader :parsed_lines
|
28
|
+
|
29
|
+
# The total number of parsed requests.
|
30
|
+
attr_reader :parsed_requests
|
31
|
+
|
32
|
+
# The number of skipped requests because of date constraints
|
33
|
+
attr_reader :skipped_requests
|
34
|
+
|
35
|
+
# Initializes the parser instance.
|
36
|
+
# It will apply the language specific FileFormat module to this instance. It will use the line
|
37
|
+
# definitions in this module to parse any input.
|
38
|
+
def initialize(format, options = {})
|
39
|
+
@line_definitions = {}
|
40
|
+
@options = options
|
41
|
+
@parsed_lines = 0
|
42
|
+
@parsed_requests = 0
|
43
|
+
@skipped_requests = 0
|
44
|
+
|
45
|
+
@current_io = nil
|
46
|
+
|
47
|
+
# install the file format module (see RequestLogAnalyzer::FileFormat)
|
48
|
+
# and register all the line definitions to the parser
|
49
|
+
self.register_file_format(format)
|
50
|
+
end
|
51
|
+
|
52
|
+
# Parses a list of consequent files of the same format
|
53
|
+
def parse_files(files, options = {}, &block)
|
54
|
+
files.each { |file| parse_file(file, options, &block) }
|
55
|
+
end
|
56
|
+
|
57
|
+
# Parses a file.
|
58
|
+
# Creates an IO stream for the provided file, and sends it to parse_io for further handling
|
59
|
+
def parse_file(file, options = {}, &block)
|
60
|
+
@progress_handler.call(:started, file) if @progress_handler
|
61
|
+
File.open(file, 'r') { |f| parse_io(f, options, &block) }
|
62
|
+
@progress_handler.call(:finished, file) if @progress_handler
|
63
|
+
end
|
64
|
+
|
65
|
+
def parse_stream(stream, options = {}, &block)
|
66
|
+
parse_io(stream, options, &block)
|
67
|
+
end
|
68
|
+
|
69
|
+
# Finds a log line and then parses the information in the line.
|
70
|
+
# Yields a hash containing the information found.
|
71
|
+
# <tt>*line_types</tt> The log line types to look for (defaults to LOG_LINES.keys).
|
72
|
+
# Yeilds a Hash when it encounters a chunk of information.
|
73
|
+
def parse_io(io, options = {}, &block)
|
74
|
+
|
75
|
+
# parse every line type by default
|
76
|
+
line_types = options[:line_types] || file_format.line_definitions.keys
|
77
|
+
|
78
|
+
# check whether all provided line types are valid
|
79
|
+
unknown = line_types.reject { |line_type| file_format.line_definitions.has_key?(line_type) }
|
80
|
+
raise "Unknown line types: #{unknown.join(', ')}" unless unknown.empty?
|
81
|
+
|
82
|
+
puts "Parsing mode: " + (options[:combined_requests] ? 'combined requests' : 'single lines') if options[:debug]
|
83
|
+
|
84
|
+
@current_io = io
|
85
|
+
@current_io.each_line do |line|
|
86
|
+
|
87
|
+
@progress_handler.call(:progress, @current_io.pos) if @progress_handler && @current_io.kind_of?(File)
|
88
|
+
|
89
|
+
request_data = nil
|
90
|
+
line_types.each do |line_type|
|
91
|
+
line_type_definition = file_format.line_definitions[line_type]
|
92
|
+
break if request_data = line_type_definition.matches(line, @current_io.lineno, self)
|
93
|
+
end
|
94
|
+
|
95
|
+
if request_data
|
96
|
+
@parsed_lines += 1
|
97
|
+
if @options[:combined_requests]
|
98
|
+
update_current_request(request_data, &block)
|
99
|
+
else
|
100
|
+
handle_request(RequestLogAnalyzer::Request.create(@file_format, request_data), &block)
|
101
|
+
end
|
102
|
+
end
|
103
|
+
end
|
104
|
+
|
105
|
+
warn(:unfinished_request_on_eof, "End of file reached, but last request was not completed!") unless @current_request.nil?
|
106
|
+
|
107
|
+
@current_io = nil
|
108
|
+
end
|
109
|
+
|
110
|
+
# Add a block to this method to install a progress handler while parsing
|
111
|
+
def progress=(proc)
|
112
|
+
@progress_handler = proc
|
113
|
+
end
|
114
|
+
|
115
|
+
# Add a block to this method to install a warning handler while parsing
|
116
|
+
def warning=(proc)
|
117
|
+
@warning_handler = proc
|
118
|
+
end
|
119
|
+
|
120
|
+
# This method is called by the parser if it encounteres any problems.
|
121
|
+
# It will call the warning handler. The default controller will pass all warnings to every
|
122
|
+
# aggregator that is registered and running
|
123
|
+
def warn(type, message)
|
124
|
+
@warning_handler.call(type, message, @current_io.lineno) if @warning_handler
|
125
|
+
end
|
126
|
+
|
127
|
+
protected
|
128
|
+
|
129
|
+
# Combines the different lines of a request into a single Request object.
|
130
|
+
# This function is only called in combined requests mode. It will start a new request when
|
131
|
+
# a header line is encountered en will emit the request when a footer line is encountered.
|
132
|
+
#
|
133
|
+
# - Every line that is parsed before a header line is ignored as it cannot be included in
|
134
|
+
# any request. It will emit a :no_current_request warning.
|
135
|
+
# - A header line that is parsed before a request is closed by a footer line, is a sign of
|
136
|
+
# an unprpertly ordered file. All data that is gathered for the request until then is
|
137
|
+
# discarded, the next request is ignored as well and a :unclosed_request warning is
|
138
|
+
# emitted.
|
139
|
+
def update_current_request(request_data, &block)
|
140
|
+
if header_line?(request_data)
|
141
|
+
unless @current_request.nil?
|
142
|
+
if options[:assume_correct_order]
|
143
|
+
handle_request(@current_request, &block)
|
144
|
+
@current_request = RequestLogAnalyzer::Request.create(@file_format, request_data)
|
145
|
+
else
|
146
|
+
warn(:unclosed_request, "Encountered header line, but previous request was not closed!")
|
147
|
+
@current_request = nil # remove all data that was parsed, skip next request as well.
|
148
|
+
end
|
149
|
+
else
|
150
|
+
@current_request = RequestLogAnalyzer::Request.create(@file_format, request_data)
|
151
|
+
end
|
152
|
+
else
|
153
|
+
unless @current_request.nil?
|
154
|
+
@current_request << request_data
|
155
|
+
if footer_line?(request_data)
|
156
|
+
handle_request(@current_request, &block)
|
157
|
+
@current_request = nil
|
158
|
+
end
|
159
|
+
else
|
160
|
+
warn(:no_current_request, "Parsebale line found outside of a request!")
|
161
|
+
end
|
162
|
+
end
|
163
|
+
end
|
164
|
+
|
165
|
+
# Handles the parsed request by calling the request handler.
|
166
|
+
# The default controller will send the request to every running aggegator.
|
167
|
+
def handle_request(request, &block)
|
168
|
+
@parsed_requests += 1
|
169
|
+
accepted = block_given? ? yield(request) : true
|
170
|
+
@skipped_requests += 1 if !accepted
|
171
|
+
end
|
172
|
+
|
173
|
+
# Checks whether a given line hash is a header line.
|
174
|
+
def header_line?(hash)
|
175
|
+
file_format.line_definitions[hash[:line_type]].header
|
176
|
+
end
|
177
|
+
|
178
|
+
# Checks whether a given line hash is a footer line.
|
179
|
+
def footer_line?(hash)
|
180
|
+
file_format.line_definitions[hash[:line_type]].footer
|
181
|
+
end
|
182
|
+
end
|
183
|
+
end
|
@@ -0,0 +1,121 @@
|
|
1
|
+
module RequestLogAnalyzer
|
2
|
+
|
3
|
+
# The Logprocessor class is used to perform simple processing actions over log files.
|
4
|
+
# It will go over the log file/stream line by line, pass the line to a processor and
|
5
|
+
# write the result back to the output file or stream. The processor can alter the
|
6
|
+
# contents of the line, remain it intact or remove it altogether, based on the current
|
7
|
+
# file format
|
8
|
+
#
|
9
|
+
# Currently, two processors are supported, :strip and :anonymize.
|
10
|
+
# * :strip will remove all irrelevent lines (according to the file format) from the
|
11
|
+
# sources. A compact, information packed log will remain/.
|
12
|
+
# * :anonymize will anonymize sensitive information from the lines according to the
|
13
|
+
# anonymization rules in the file format. The result can be passed to third parties
|
14
|
+
# without privacy concerns.
|
15
|
+
#
|
16
|
+
class LogProcessor
|
17
|
+
|
18
|
+
include RequestLogAnalyzer::FileFormat::Awareness
|
19
|
+
|
20
|
+
attr_reader :mode, :options, :sources
|
21
|
+
attr_accessor :output_file
|
22
|
+
|
23
|
+
# Builds a logprocessor instance from the arguments given on the command line
|
24
|
+
# <tt>command</tt> The command hat was used to start the log processor. This can either be
|
25
|
+
# :strip or :anonymize. This will set the processing mode.
|
26
|
+
# <tt>arguments</tt> The parsed command line arguments (a CommandLine::Arguments instance)
|
27
|
+
def self.build(command, arguments)
|
28
|
+
|
29
|
+
options = {
|
30
|
+
:discard_teaser_lines => arguments[:discard_teaser_lines],
|
31
|
+
:keep_junk_lines => arguments[:keep_junk_lines],
|
32
|
+
}
|
33
|
+
|
34
|
+
log_processor = RequestLogAnalyzer::LogProcessor.new(arguments[:format].to_sym, command, options)
|
35
|
+
log_processor.output_file = arguments[:output] if arguments[:output]
|
36
|
+
|
37
|
+
arguments.parameters.each do |input|
|
38
|
+
log_processor.sources << input
|
39
|
+
end
|
40
|
+
|
41
|
+
return log_processor
|
42
|
+
end
|
43
|
+
|
44
|
+
# Initializes a new LogProcessor instance.
|
45
|
+
# <tt>format</tt> The file format to use (e.g. :rails).
|
46
|
+
# <tt>mode</tt> The processing mode (:anonymize or :strip)
|
47
|
+
# <tt>options</tt> A hash with options to take into account
|
48
|
+
def initialize(format, mode, options = {})
|
49
|
+
@options = options
|
50
|
+
@mode = mode
|
51
|
+
@sources = []
|
52
|
+
$output_file = nil
|
53
|
+
self.register_file_format(format)
|
54
|
+
end
|
55
|
+
|
56
|
+
# Processes input files by opening it and sending the filestream to <code>process_io</code>,
|
57
|
+
# in which the actual processing is performed.
|
58
|
+
# <tt>file</tt> The file to process
|
59
|
+
def process_file(file)
|
60
|
+
File.open(file, 'r') { |file| process_io(file) }
|
61
|
+
end
|
62
|
+
|
63
|
+
# Processes an input stream by iteration over each line and processing it according to
|
64
|
+
# the current operation mode (:strip, :anonymize)
|
65
|
+
# <tt>io</tt> The IO instance to process.
|
66
|
+
def process_io(io)
|
67
|
+
case mode
|
68
|
+
when :strip; io.each_line { |line| @output << strip_line(line) }
|
69
|
+
when :anonymize; io.each_line { |line| @output << anonymize_line(line) }
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
# Returns the line itself if the string matches any of the line definitions. If no match is
|
74
|
+
# found, an empty line is returned, which will strip the line from the output.
|
75
|
+
# <tt>line</tt> The line to strip
|
76
|
+
def strip_line(line)
|
77
|
+
file_format.line_definitions.any? { |name, definition| definition =~ line } ? line : ""
|
78
|
+
end
|
79
|
+
|
80
|
+
# Returns an anonymized version of the provided line. This can be a copy of the line it self,
|
81
|
+
# an empty string or a string in which some substrings are substituted for anonymized values.
|
82
|
+
# <tt>line</tt> The line to anonymize
|
83
|
+
def anonymize_line(line)
|
84
|
+
anonymized_line = nil
|
85
|
+
file_format.line_definitions.detect { |name, definition| anonymized_line = definition.anonymize(line, options) }
|
86
|
+
|
87
|
+
if anonymized_line
|
88
|
+
return anonymized_line
|
89
|
+
elsif options[:keep_junk_lines]
|
90
|
+
return line
|
91
|
+
else
|
92
|
+
return ""
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
96
|
+
# Runs the log processing by setting up the output stream and iterating over all the
|
97
|
+
# input sources. Input sources can either be filenames (String instances) or IO streams
|
98
|
+
# (IO instances). The strings "-" and "STDIN" will be substituted for the $stdin variable.
|
99
|
+
def run!
|
100
|
+
if @output_file.nil?
|
101
|
+
@output = $stdout
|
102
|
+
else
|
103
|
+
@output = File.new(@output_file, 'a')
|
104
|
+
end
|
105
|
+
|
106
|
+
@sources.each do |source|
|
107
|
+
if source.kind_of?(String) && File.exist?(source)
|
108
|
+
process_file(source)
|
109
|
+
elsif source.kind_of?(IO)
|
110
|
+
process_io(source)
|
111
|
+
elsif ['-', 'STDIN'].include?(source)
|
112
|
+
process_io($stdin)
|
113
|
+
end
|
114
|
+
end
|
115
|
+
|
116
|
+
ensure
|
117
|
+
@output.close if @output.kind_of?(File)
|
118
|
+
end
|
119
|
+
end
|
120
|
+
|
121
|
+
end
|
@@ -0,0 +1,115 @@
|
|
1
|
+
module RequestLogAnalyzer
|
2
|
+
|
3
|
+
# The Request class represents a parsed request from the log file.
|
4
|
+
# Instances are created by the LogParser and are passed to the different aggregators, so they
|
5
|
+
# can do their aggregating work.
|
6
|
+
#
|
7
|
+
# Note that RequestLogAnalyzer can run in two modes:
|
8
|
+
# - Single line mode: every parsed line is regarded as a request. Request::single_line? will
|
9
|
+
# return true in this case
|
10
|
+
# - Combined requests mode: lines that belong together are grouped into one request.
|
11
|
+
# Request#combined? will return true in this case.
|
12
|
+
#
|
13
|
+
# This class provides several methods to access the data that was parsed from the log files.
|
14
|
+
# Request#first(field_name) returns the first (only) value corresponding to the given field
|
15
|
+
# Request#every(field_name) returns all values corresponding to the given field name as array.
|
16
|
+
class Request
|
17
|
+
|
18
|
+
include RequestLogAnalyzer::FileFormat::Awareness
|
19
|
+
|
20
|
+
attr_reader :lines
|
21
|
+
attr_reader :attributes
|
22
|
+
|
23
|
+
# Initializes a new Request object.
|
24
|
+
# It will apply the the provided FileFormat module to this instance.
|
25
|
+
def initialize(file_format)
|
26
|
+
@lines = []
|
27
|
+
@attributes = {}
|
28
|
+
register_file_format(file_format)
|
29
|
+
end
|
30
|
+
|
31
|
+
# Creates a new request that was parsed from the log with the given FileFormat. The hashes
|
32
|
+
# that are passed to this function are added as lines to this request.
|
33
|
+
def self.create(file_format, *hashes)
|
34
|
+
request = self.new(file_format)
|
35
|
+
hashes.flatten.each { |hash| request << hash }
|
36
|
+
return request
|
37
|
+
end
|
38
|
+
|
39
|
+
# Adds another line to the request.
|
40
|
+
# The line should be provides as a hash of the fields parsed from the line.
|
41
|
+
def << (request_info_hash)
|
42
|
+
@lines << request_info_hash
|
43
|
+
@attributes = request_info_hash.merge(@attributes)
|
44
|
+
end
|
45
|
+
|
46
|
+
# Checks whether the given line type was parsed from the log file for this request
|
47
|
+
def has_line_type?(line_type)
|
48
|
+
return true if @lines.length == 1 && @lines[0][:line_type] == line_type.to_sym
|
49
|
+
|
50
|
+
@lines.detect { |l| l[:line_type] == line_type.to_sym }
|
51
|
+
end
|
52
|
+
|
53
|
+
alias :=~ :has_line_type?
|
54
|
+
|
55
|
+
# Returns the value that was captured for the "field" of this request.
|
56
|
+
# This function will return the first value that was captured if the field
|
57
|
+
# was captured in multiple lines for a combined request.
|
58
|
+
def first(field)
|
59
|
+
@attributes[field]
|
60
|
+
end
|
61
|
+
|
62
|
+
alias :[] :first
|
63
|
+
|
64
|
+
# Returns an array of all the "field" values that were captured for this request
|
65
|
+
def every(field)
|
66
|
+
@lines.inject([]) { |result, fields| result << fields[field] if fields.has_key?(field); result }
|
67
|
+
end
|
68
|
+
|
69
|
+
# Returns true if this request does not yet contain any parsed lines. This should only occur
|
70
|
+
# during parsing. An empty request should never be sent to the aggregators
|
71
|
+
def empty?
|
72
|
+
@lines.length == 0
|
73
|
+
end
|
74
|
+
|
75
|
+
# Checks whether this request contains exactly one line. This means that RequestLogAnalyzer
|
76
|
+
# is running in single_line mode.
|
77
|
+
def single_line?
|
78
|
+
@lines.length == 1
|
79
|
+
end
|
80
|
+
|
81
|
+
# Checks whether this request contains more than one line. This means that RequestLogAnalyzer
|
82
|
+
# is runring in combined requests mode.
|
83
|
+
def combined?
|
84
|
+
@lines.length > 1
|
85
|
+
end
|
86
|
+
|
87
|
+
# Checks whether this request is completed. A completed request contains both a parsed header
|
88
|
+
# line and a parsed footer line. Not that calling this function in single line mode will always
|
89
|
+
# return false.
|
90
|
+
def completed?
|
91
|
+
puts attributes[:method]
|
92
|
+
|
93
|
+
header_found, footer_found = false, false
|
94
|
+
@lines.each do |line|
|
95
|
+
line_def = file_format.line_definitions[line[:line_type]]
|
96
|
+
header_found = true if line_def.header
|
97
|
+
footer_found = true if line_def.footer
|
98
|
+
end
|
99
|
+
header_found && footer_found
|
100
|
+
|
101
|
+
end
|
102
|
+
|
103
|
+
# Returns the line type of the parsed line of this request.
|
104
|
+
# This function can only be called in single line mode.
|
105
|
+
def line_type
|
106
|
+
raise "Not a single line request!" unless single_line?
|
107
|
+
lines.first[:line_type]
|
108
|
+
end
|
109
|
+
|
110
|
+
# Returns the first timestamp encountered in a request.
|
111
|
+
def timestamp
|
112
|
+
first(:timestamp)
|
113
|
+
end
|
114
|
+
end
|
115
|
+
end
|
@@ -0,0 +1,42 @@
|
|
1
|
+
module RequestLogAnalyzer::Source
|
2
|
+
class Base
|
3
|
+
|
4
|
+
include RequestLogAnalyzer::FileFormat::Awareness
|
5
|
+
|
6
|
+
# A hash of options
|
7
|
+
attr_reader :options
|
8
|
+
|
9
|
+
# The current Request object that is being parsed
|
10
|
+
attr_reader :current_request
|
11
|
+
|
12
|
+
# The total number of parsed lines
|
13
|
+
attr_reader :parsed_lines
|
14
|
+
|
15
|
+
# The total number of parsed requests.
|
16
|
+
attr_reader :parsed_requests
|
17
|
+
|
18
|
+
# The number of skipped requests because of date constraints
|
19
|
+
attr_reader :skipped_requests
|
20
|
+
|
21
|
+
# Base source class used to filter input requests.
|
22
|
+
|
23
|
+
# Initializer
|
24
|
+
# <tt>format</tt> The file format
|
25
|
+
# <tt>options</tt> Are passed to the filters.
|
26
|
+
def initialize(format, options = {})
|
27
|
+
@options = options
|
28
|
+
register_file_format(format)
|
29
|
+
end
|
30
|
+
|
31
|
+
def prepare
|
32
|
+
end
|
33
|
+
|
34
|
+
def requests(&block)
|
35
|
+
return true
|
36
|
+
end
|
37
|
+
|
38
|
+
def finalize
|
39
|
+
end
|
40
|
+
|
41
|
+
end
|
42
|
+
end
|