request-log-analyzer 1.0.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (60) hide show
  1. data/DESIGN +14 -0
  2. data/HACKING +7 -0
  3. data/LICENSE +20 -0
  4. data/README.textile +36 -0
  5. data/Rakefile +5 -0
  6. data/bin/request-log-analyzer +123 -0
  7. data/lib/cli/bashcolorizer.rb +60 -0
  8. data/lib/cli/command_line_arguments.rb +301 -0
  9. data/lib/cli/progressbar.rb +236 -0
  10. data/lib/request_log_analyzer.rb +14 -0
  11. data/lib/request_log_analyzer/aggregator/base.rb +45 -0
  12. data/lib/request_log_analyzer/aggregator/database.rb +148 -0
  13. data/lib/request_log_analyzer/aggregator/echo.rb +25 -0
  14. data/lib/request_log_analyzer/aggregator/summarizer.rb +116 -0
  15. data/lib/request_log_analyzer/controller.rb +201 -0
  16. data/lib/request_log_analyzer/file_format.rb +81 -0
  17. data/lib/request_log_analyzer/file_format/merb.rb +33 -0
  18. data/lib/request_log_analyzer/file_format/rails.rb +90 -0
  19. data/lib/request_log_analyzer/filter/base.rb +29 -0
  20. data/lib/request_log_analyzer/filter/field.rb +36 -0
  21. data/lib/request_log_analyzer/filter/timespan.rb +32 -0
  22. data/lib/request_log_analyzer/line_definition.rb +159 -0
  23. data/lib/request_log_analyzer/log_parser.rb +173 -0
  24. data/lib/request_log_analyzer/log_processor.rb +121 -0
  25. data/lib/request_log_analyzer/request.rb +95 -0
  26. data/lib/request_log_analyzer/source/base.rb +42 -0
  27. data/lib/request_log_analyzer/source/log_file.rb +170 -0
  28. data/lib/request_log_analyzer/tracker/base.rb +54 -0
  29. data/lib/request_log_analyzer/tracker/category.rb +71 -0
  30. data/lib/request_log_analyzer/tracker/duration.rb +81 -0
  31. data/lib/request_log_analyzer/tracker/hourly_spread.rb +80 -0
  32. data/lib/request_log_analyzer/tracker/timespan.rb +54 -0
  33. data/spec/controller_spec.rb +40 -0
  34. data/spec/database_inserter_spec.rb +101 -0
  35. data/spec/file_format_spec.rb +78 -0
  36. data/spec/file_formats/spec_format.rb +26 -0
  37. data/spec/filter_spec.rb +137 -0
  38. data/spec/fixtures/merb.log +84 -0
  39. data/spec/fixtures/multiple_files_1.log +5 -0
  40. data/spec/fixtures/multiple_files_2.log +2 -0
  41. data/spec/fixtures/rails_1x.log +59 -0
  42. data/spec/fixtures/rails_22.log +12 -0
  43. data/spec/fixtures/rails_22_cached.log +10 -0
  44. data/spec/fixtures/rails_unordered.log +24 -0
  45. data/spec/fixtures/syslog_1x.log +5 -0
  46. data/spec/fixtures/test_file_format.log +13 -0
  47. data/spec/fixtures/test_language_combined.log +14 -0
  48. data/spec/fixtures/test_order.log +16 -0
  49. data/spec/line_definition_spec.rb +124 -0
  50. data/spec/log_parser_spec.rb +68 -0
  51. data/spec/log_processor_spec.rb +57 -0
  52. data/spec/merb_format_spec.rb +38 -0
  53. data/spec/rails_format_spec.rb +76 -0
  54. data/spec/request_spec.rb +72 -0
  55. data/spec/spec_helper.rb +67 -0
  56. data/spec/summarizer_spec.rb +9 -0
  57. data/tasks/github-gem.rake +177 -0
  58. data/tasks/request_log_analyzer.rake +10 -0
  59. data/tasks/rspec.rake +6 -0
  60. metadata +135 -0
@@ -0,0 +1,29 @@
1
+ module RequestLogAnalyzer
2
+ module Filter
3
+ # Base filter class used to filter input requests.
4
+ # All filters should interit from this base.
5
+ class Base
6
+
7
+ include RequestLogAnalyzer::FileFormat::Awareness
8
+
9
+ attr_reader :log_parser
10
+ attr_reader :options
11
+
12
+ # Initializer
13
+ # <tt>format</tt> The file format
14
+ # <tt>options</tt> Are passed to the filters.
15
+ def initialize(format, options = {})
16
+ @options = options
17
+ register_file_format(format)
18
+ end
19
+
20
+ def prepare
21
+ end
22
+
23
+ def filter(request)
24
+ return nil unless request
25
+ return request
26
+ end
27
+ end
28
+ end
29
+ end
@@ -0,0 +1,36 @@
1
+ module RequestLogAnalyzer::Filter
2
+
3
+ # Filter to select or reject a specific field
4
+ # Options
5
+ # * <tt>:mode</tt> :reject or :accept.
6
+ # * <tt>:field</tt> Specific field to accept or reject.
7
+ # * <tt>:value</tt> Value that the field should match to be accepted or rejected.
8
+ class Field < Base
9
+
10
+ attr_reader :field, :value, :mode
11
+
12
+ def prepare
13
+ @mode = (@options[:mode] || :accept).to_sym
14
+ @field = @options[:field].to_sym
15
+
16
+ # Convert the timestamp to the correct formats for quick timestamp comparisons
17
+ if @options[:value].kind_of?(String) && @options[:value][0, 1] == '/' && @options[:value][-1, 1] == '/'
18
+ @value = Regexp.new(@options[:value][1..-2])
19
+ else
20
+ @value = @options[:value] # TODO: convert value?
21
+ end
22
+ end
23
+
24
+ def filter(request)
25
+ return nil unless request
26
+
27
+ found_field = request.every(@field).any? { |value| @value === value }
28
+
29
+ return nil if !found_field && @mode == :select
30
+ return nil if found_field && @mode == :reject
31
+
32
+ return request
33
+ end
34
+ end
35
+
36
+ end
@@ -0,0 +1,32 @@
1
+ module RequestLogAnalyzer::Filter
2
+
3
+ # Reject all requests not in given timespan
4
+ # Options
5
+ # * <tt>:after</tt> Only keep requests after this DateTime.
6
+ # * <tt>:before</tt> Only keep requests before this DateTime.
7
+ class Timespan < Base
8
+
9
+ attr_reader :before, :after
10
+
11
+ def prepare
12
+ # Convert the timestamp to the correct formats for quick timestamp comparisons
13
+ @after = @options[:after].strftime('%Y%m%d%H%M%S').to_i if options[:after]
14
+ @before = @options[:before].strftime('%Y%m%d%H%M%S').to_i if options[:before]
15
+ end
16
+
17
+ def filter(request)
18
+ return nil unless request
19
+
20
+ if @after && @before && request.timestamp <= @before && @after <= request.timestamp
21
+ return request
22
+ elsif @after && @before.nil? && @after <= request.timestamp
23
+ return request
24
+ elsif @before && @after.nil? && request.timestamp <= @before
25
+ return request
26
+ end
27
+
28
+ return nil
29
+ end
30
+ end
31
+
32
+ end
@@ -0,0 +1,159 @@
1
+ module RequestLogAnalyzer
2
+
3
+ module Anonymizers
4
+ def anonymizer_for_ip(value, capture_definition)
5
+ '127.0.0.1'
6
+ end
7
+
8
+ def anonymizer_for_url(value, capture_definition)
9
+ value.sub(/^https?\:\/\/[A-z0-9\.-]+\//, "http://example.com/")
10
+ end
11
+ end
12
+
13
+ # The line definition class is used to specify what lines should be parsed from the log file.
14
+ # It contains functionality to match a line against the definition and parse the information
15
+ # from this line. This is used by the LogParser class when parsing a log file..
16
+ class LineDefinition
17
+
18
+ include RequestLogAnalyzer::Anonymizers
19
+
20
+ class Definer
21
+
22
+ attr_accessor :line_definitions
23
+
24
+ def initialize
25
+ @line_definitions = {}
26
+ end
27
+
28
+ def method_missing(name, *args, &block)
29
+ if block_given?
30
+ @line_definitions[name] = RequestLogAnalyzer::LineDefinition.define(name, &block)
31
+ else
32
+ @line_definitions[name] = RequestLogAnalyzer::LineDefinition.new(name, args.first)
33
+ end
34
+ end
35
+ end
36
+
37
+ attr_reader :name
38
+ attr_accessor :teaser, :regexp, :captures
39
+ attr_accessor :header, :footer
40
+
41
+ # Initializes the LineDefinition instance with a hash containing the different elements of
42
+ # the definition.
43
+ def initialize(name, definition = {})
44
+ @name = name
45
+ @captures = []
46
+ definition.each { |key, value| self.send("#{key.to_s}=".to_sym, value) }
47
+ end
48
+
49
+ def self.define(name, &block)
50
+ definition = self.new(name)
51
+ yield(definition) if block_given?
52
+ return definition
53
+ end
54
+
55
+ # Converts a parsed value (String) to the desired value using some heuristics.
56
+ def convert_value(value, type)
57
+ case type
58
+ when :integer; value.to_i
59
+ when :float; value.to_f
60
+ when :decimal; value.to_f
61
+ when :symbol; value.to_sym
62
+ when :sec; value.to_f
63
+ when :msec; value.to_f / 1000
64
+ when :timestamp; value.gsub(/[^0-9]/,'')[0..13].to_i # Retrieve with: DateTime.parse(value, '%Y%m%d%H%M%S')
65
+ else value
66
+ end
67
+ end
68
+
69
+ # Checks whether a given line matches this definition.
70
+ # It will return false if a line does not match. If the line matches, a hash is returned
71
+ # with all the fields parsed from that line as content.
72
+ # If the line definition has a teaser-check, a :teaser_check_failed warning will be emitted
73
+ # if this teaser-check is passed, but the full regular exprssion does not ,atch.
74
+ def matches(line, lineno = nil, parser = nil)
75
+ if @teaser.nil? || @teaser =~ line
76
+ if match_data = line.match(@regexp)
77
+ request_info = { :line_type => name, :lineno => lineno }
78
+
79
+ captures.each_with_index do |capture, index|
80
+ next if capture == :ignore
81
+
82
+ if match_data.captures[index]
83
+ request_info[capture[:name]] = convert_value(match_data.captures[index], capture[:type])
84
+ end
85
+
86
+ end
87
+ return request_info
88
+ else
89
+ if @teaser && parser
90
+ parser.warn(:teaser_check_failed, "Teaser matched for #{name.inspect}, but full line did not:\n#{line.inspect}")
91
+ end
92
+ return false
93
+ end
94
+ else
95
+ return false
96
+ end
97
+ end
98
+
99
+ alias :=~ :matches
100
+
101
+ def anonymize_value(value, capture_definition)
102
+ if capture_definition[:anonymize].respond_to?(:call)
103
+ capture_definition[:anonymize].call(value, capture_definition)
104
+ else
105
+ case capture_definition[:anonymize]
106
+ when nil; value
107
+ when false; value
108
+ when true; '***'
109
+ when :slightly; anonymize_slightly(value, capture_definition)
110
+ else
111
+ method_name = "anonymizer_for_#{capture_definition[:anonymize]}".to_sym
112
+ self.respond_to?(method_name) ? self.send(method_name, value, capture_definition) : '***'
113
+ end
114
+ end
115
+ end
116
+
117
+ def anonymize_slightly(value, capture_definition)
118
+ case capture_definition[:type]
119
+ when :integer
120
+ (value.to_i * (0.8 + rand * 0.4)).to_i
121
+ when :double
122
+ (value.to_f * (0.8 + rand * 0.4)).to_f
123
+ when :msec
124
+ (value.to_i * (0.8 + rand * 0.4)).to_i
125
+ when :sec
126
+ (value.to_f * (0.8 + rand * 0.4)).to_f
127
+ when :timestamp
128
+ (DateTime.parse(value) + (rand(100) - 50)).to_s
129
+ else
130
+ puts "Cannot anonymize #{capture_definition[:type].inspect} slightly, using ***"
131
+ '***'
132
+ end
133
+ end
134
+
135
+ # Anonymize a log line
136
+ def anonymize(line, options = {})
137
+ if self.teaser.nil? || self.teaser =~ line
138
+ if self.regexp =~ line
139
+ pos_adjustment = 0
140
+ captures.each_with_index do |capture, index|
141
+ unless $~[index + 1].nil?
142
+ anonymized_value = anonymize_value($~[index + 1], capture).to_s
143
+ line[($~.begin(index + 1) + pos_adjustment)...($~.end(index + 1) + pos_adjustment)] = anonymized_value
144
+ pos_adjustment += anonymized_value.length - $~[index + 1].length
145
+ end
146
+ end
147
+ line
148
+ elsif self.teaser.nil?
149
+ nil
150
+ else
151
+ options[:discard_teaser_lines] ? "" : line
152
+ end
153
+ else
154
+ nil
155
+ end
156
+ end
157
+ end
158
+
159
+ end
@@ -0,0 +1,173 @@
1
+ module RequestLogAnalyzer
2
+
3
+ # The LogParser class reads log data from a given source and uses a file format definition
4
+ # to parse all relevent information about requests from the file. A FileFormat module should
5
+ # be provided that contains the definitions of the lines that occur in the log data.
6
+ #
7
+ # De order in which lines occur is used to combine lines to a single request. If these lines
8
+ # are mixed, requests cannot be combined properly. This can be the case if data is written to
9
+ # the log file simultaneously by different mongrel processes. This problem is detected by the
10
+ # parser, but the requests that are mixed up cannot be parsed. It will emit warnings when this
11
+ # occurs.
12
+ class LogParser
13
+
14
+ include RequestLogAnalyzer::FileFormat::Awareness
15
+
16
+ # A hash of options
17
+ attr_reader :options
18
+
19
+ # The current Request object that is being parsed
20
+ attr_reader :current_request
21
+
22
+ # The total number of parsed lines
23
+ attr_reader :parsed_lines
24
+
25
+ # The total number of parsed requests.
26
+ attr_reader :parsed_requests
27
+
28
+ # The number of skipped requests because of date constraints
29
+ attr_reader :skipped_requests
30
+
31
+ # Initializes the parser instance.
32
+ # It will apply the language specific FileFormat module to this instance. It will use the line
33
+ # definitions in this module to parse any input.
34
+ def initialize(format, options = {})
35
+ @line_definitions = {}
36
+ @options = options
37
+ @parsed_lines = 0
38
+ @parsed_requests = 0
39
+ @skipped_requests = 0
40
+
41
+ @current_io = nil
42
+
43
+ # install the file format module (see RequestLogAnalyzer::FileFormat)
44
+ # and register all the line definitions to the parser
45
+ self.register_file_format(format)
46
+ end
47
+
48
+ # Parses a list of consequent files of the same format
49
+ def parse_files(files, options = {}, &block)
50
+ files.each { |file| parse_file(file, options, &block) }
51
+ end
52
+
53
+ # Parses a file.
54
+ # Creates an IO stream for the provided file, and sends it to parse_io for further handling
55
+ def parse_file(file, options = {}, &block)
56
+ @progress_handler.call(:started, file) if @progress_handler
57
+ File.open(file, 'r') { |f| parse_io(f, options, &block) }
58
+ @progress_handler.call(:finished, file) if @progress_handler
59
+ end
60
+
61
+ def parse_stream(stream, options = {}, &block)
62
+ parse_io(stream, options, &block)
63
+ end
64
+
65
+ # Finds a log line and then parses the information in the line.
66
+ # Yields a hash containing the information found.
67
+ # <tt>*line_types</tt> The log line types to look for (defaults to LOG_LINES.keys).
68
+ # Yeilds a Hash when it encounters a chunk of information.
69
+ def parse_io(io, options = {}, &block)
70
+
71
+ # parse every line type by default
72
+ line_types = options[:line_types] || file_format.line_definitions.keys
73
+
74
+ # check whether all provided line types are valid
75
+ unknown = line_types.reject { |line_type| file_format.line_definitions.has_key?(line_type) }
76
+ raise "Unknown line types: #{unknown.join(', ')}" unless unknown.empty?
77
+
78
+ @current_io = io
79
+ @current_io.each_line do |line|
80
+
81
+ @progress_handler.call(:progress, @current_io.pos) if @progress_handler && @current_io.kind_of?(File)
82
+
83
+ request_data = nil
84
+ line_types.each do |line_type|
85
+ line_type_definition = file_format.line_definitions[line_type]
86
+ break if request_data = line_type_definition.matches(line, @current_io.lineno, self)
87
+ end
88
+
89
+ if request_data
90
+ @parsed_lines += 1
91
+ update_current_request(request_data, &block)
92
+ end
93
+ end
94
+
95
+ warn(:unfinished_request_on_eof, "End of file reached, but last request was not completed!") unless @current_request.nil?
96
+
97
+ @current_io = nil
98
+ end
99
+
100
+ # Add a block to this method to install a progress handler while parsing
101
+ def progress=(proc)
102
+ @progress_handler = proc
103
+ end
104
+
105
+ # Add a block to this method to install a warning handler while parsing
106
+ def warning=(proc)
107
+ @warning_handler = proc
108
+ end
109
+
110
+ # This method is called by the parser if it encounteres any problems.
111
+ # It will call the warning handler. The default controller will pass all warnings to every
112
+ # aggregator that is registered and running
113
+ def warn(type, message)
114
+ @warning_handler.call(type, message, @current_io.lineno) if @warning_handler
115
+ end
116
+
117
+ protected
118
+
119
+ # Combines the different lines of a request into a single Request object. It will start a
120
+ # new request when a header line is encountered en will emit the request when a footer line
121
+ # is encountered.
122
+ #
123
+ # - Every line that is parsed before a header line is ignored as it cannot be included in
124
+ # any request. It will emit a :no_current_request warning.
125
+ # - A header line that is parsed before a request is closed by a footer line, is a sign of
126
+ # an unprpertly ordered file. All data that is gathered for the request until then is
127
+ # discarded, the next request is ignored as well and a :unclosed_request warning is
128
+ # emitted.
129
+ def update_current_request(request_data, &block)
130
+ if header_line?(request_data)
131
+ unless @current_request.nil?
132
+ if options[:assume_correct_order]
133
+ handle_request(@current_request, &block)
134
+ @current_request = RequestLogAnalyzer::Request.create(@file_format, request_data)
135
+ else
136
+ warn(:unclosed_request, "Encountered header line, but previous request was not closed!")
137
+ @current_request = nil # remove all data that was parsed, skip next request as well.
138
+ end
139
+ else
140
+ @current_request = RequestLogAnalyzer::Request.create(@file_format, request_data)
141
+ end
142
+ else
143
+ unless @current_request.nil?
144
+ @current_request << request_data
145
+ if footer_line?(request_data)
146
+ handle_request(@current_request, &block)
147
+ @current_request = nil
148
+ end
149
+ else
150
+ warn(:no_current_request, "Parsebale line found outside of a request!")
151
+ end
152
+ end
153
+ end
154
+
155
+ # Handles the parsed request by calling the request handler.
156
+ # The default controller will send the request to every running aggegator.
157
+ def handle_request(request, &block)
158
+ @parsed_requests += 1
159
+ accepted = block_given? ? yield(request) : true
160
+ @skipped_requests += 1 if !accepted
161
+ end
162
+
163
+ # Checks whether a given line hash is a header line.
164
+ def header_line?(hash)
165
+ file_format.line_definitions[hash[:line_type]].header
166
+ end
167
+
168
+ # Checks whether a given line hash is a footer line.
169
+ def footer_line?(hash)
170
+ file_format.line_definitions[hash[:line_type]].footer
171
+ end
172
+ end
173
+ end
@@ -0,0 +1,121 @@
1
+ module RequestLogAnalyzer
2
+
3
+ # The Logprocessor class is used to perform simple processing actions over log files.
4
+ # It will go over the log file/stream line by line, pass the line to a processor and
5
+ # write the result back to the output file or stream. The processor can alter the
6
+ # contents of the line, remain it intact or remove it altogether, based on the current
7
+ # file format
8
+ #
9
+ # Currently, two processors are supported, :strip and :anonymize.
10
+ # * :strip will remove all irrelevent lines (according to the file format) from the
11
+ # sources. A compact, information packed log will remain/.
12
+ # * :anonymize will anonymize sensitive information from the lines according to the
13
+ # anonymization rules in the file format. The result can be passed to third parties
14
+ # without privacy concerns.
15
+ #
16
+ class LogProcessor
17
+
18
+ include RequestLogAnalyzer::FileFormat::Awareness
19
+
20
+ attr_reader :mode, :options, :sources
21
+ attr_accessor :output_file
22
+
23
+ # Builds a logprocessor instance from the arguments given on the command line
24
+ # <tt>command</tt> The command hat was used to start the log processor. This can either be
25
+ # :strip or :anonymize. This will set the processing mode.
26
+ # <tt>arguments</tt> The parsed command line arguments (a CommandLine::Arguments instance)
27
+ def self.build(command, arguments)
28
+
29
+ options = {
30
+ :discard_teaser_lines => arguments[:discard_teaser_lines],
31
+ :keep_junk_lines => arguments[:keep_junk_lines],
32
+ }
33
+
34
+ log_processor = RequestLogAnalyzer::LogProcessor.new(arguments[:format].to_sym, command, options)
35
+ log_processor.output_file = arguments[:output] if arguments[:output]
36
+
37
+ arguments.parameters.each do |input|
38
+ log_processor.sources << input
39
+ end
40
+
41
+ return log_processor
42
+ end
43
+
44
+ # Initializes a new LogProcessor instance.
45
+ # <tt>format</tt> The file format to use (e.g. :rails).
46
+ # <tt>mode</tt> The processing mode (:anonymize or :strip)
47
+ # <tt>options</tt> A hash with options to take into account
48
+ def initialize(format, mode, options = {})
49
+ @options = options
50
+ @mode = mode
51
+ @sources = []
52
+ $output_file = nil
53
+ self.register_file_format(format)
54
+ end
55
+
56
+ # Processes input files by opening it and sending the filestream to <code>process_io</code>,
57
+ # in which the actual processing is performed.
58
+ # <tt>file</tt> The file to process
59
+ def process_file(file)
60
+ File.open(file, 'r') { |file| process_io(file) }
61
+ end
62
+
63
+ # Processes an input stream by iteration over each line and processing it according to
64
+ # the current operation mode (:strip, :anonymize)
65
+ # <tt>io</tt> The IO instance to process.
66
+ def process_io(io)
67
+ case mode
68
+ when :strip; io.each_line { |line| @output << strip_line(line) }
69
+ when :anonymize; io.each_line { |line| @output << anonymize_line(line) }
70
+ end
71
+ end
72
+
73
+ # Returns the line itself if the string matches any of the line definitions. If no match is
74
+ # found, an empty line is returned, which will strip the line from the output.
75
+ # <tt>line</tt> The line to strip
76
+ def strip_line(line)
77
+ file_format.line_definitions.any? { |name, definition| definition =~ line } ? line : ""
78
+ end
79
+
80
+ # Returns an anonymized version of the provided line. This can be a copy of the line it self,
81
+ # an empty string or a string in which some substrings are substituted for anonymized values.
82
+ # <tt>line</tt> The line to anonymize
83
+ def anonymize_line(line)
84
+ anonymized_line = nil
85
+ file_format.line_definitions.detect { |name, definition| anonymized_line = definition.anonymize(line, options) }
86
+
87
+ if anonymized_line
88
+ return anonymized_line
89
+ elsif options[:keep_junk_lines]
90
+ return line
91
+ else
92
+ return ""
93
+ end
94
+ end
95
+
96
+ # Runs the log processing by setting up the output stream and iterating over all the
97
+ # input sources. Input sources can either be filenames (String instances) or IO streams
98
+ # (IO instances). The strings "-" and "STDIN" will be substituted for the $stdin variable.
99
+ def run!
100
+ if @output_file.nil?
101
+ @output = $stdout
102
+ else
103
+ @output = File.new(@output_file, 'a')
104
+ end
105
+
106
+ @sources.each do |source|
107
+ if source.kind_of?(String) && File.exist?(source)
108
+ process_file(source)
109
+ elsif source.kind_of?(IO)
110
+ process_io(source)
111
+ elsif ['-', 'STDIN'].include?(source)
112
+ process_io($stdin)
113
+ end
114
+ end
115
+
116
+ ensure
117
+ @output.close if @output.kind_of?(File)
118
+ end
119
+ end
120
+
121
+ end