request-log-analyzer 1.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. data/DESIGN +14 -0
  2. data/HACKING +7 -0
  3. data/LICENSE +20 -0
  4. data/README.textile +36 -0
  5. data/Rakefile +5 -0
  6. data/bin/request-log-analyzer +123 -0
  7. data/lib/cli/bashcolorizer.rb +60 -0
  8. data/lib/cli/command_line_arguments.rb +301 -0
  9. data/lib/cli/progressbar.rb +236 -0
  10. data/lib/request_log_analyzer.rb +14 -0
  11. data/lib/request_log_analyzer/aggregator/base.rb +45 -0
  12. data/lib/request_log_analyzer/aggregator/database.rb +148 -0
  13. data/lib/request_log_analyzer/aggregator/echo.rb +25 -0
  14. data/lib/request_log_analyzer/aggregator/summarizer.rb +116 -0
  15. data/lib/request_log_analyzer/controller.rb +201 -0
  16. data/lib/request_log_analyzer/file_format.rb +81 -0
  17. data/lib/request_log_analyzer/file_format/merb.rb +33 -0
  18. data/lib/request_log_analyzer/file_format/rails.rb +90 -0
  19. data/lib/request_log_analyzer/filter/base.rb +29 -0
  20. data/lib/request_log_analyzer/filter/field.rb +36 -0
  21. data/lib/request_log_analyzer/filter/timespan.rb +32 -0
  22. data/lib/request_log_analyzer/line_definition.rb +159 -0
  23. data/lib/request_log_analyzer/log_parser.rb +173 -0
  24. data/lib/request_log_analyzer/log_processor.rb +121 -0
  25. data/lib/request_log_analyzer/request.rb +95 -0
  26. data/lib/request_log_analyzer/source/base.rb +42 -0
  27. data/lib/request_log_analyzer/source/log_file.rb +170 -0
  28. data/lib/request_log_analyzer/tracker/base.rb +54 -0
  29. data/lib/request_log_analyzer/tracker/category.rb +71 -0
  30. data/lib/request_log_analyzer/tracker/duration.rb +81 -0
  31. data/lib/request_log_analyzer/tracker/hourly_spread.rb +80 -0
  32. data/lib/request_log_analyzer/tracker/timespan.rb +54 -0
  33. data/spec/controller_spec.rb +40 -0
  34. data/spec/database_inserter_spec.rb +101 -0
  35. data/spec/file_format_spec.rb +78 -0
  36. data/spec/file_formats/spec_format.rb +26 -0
  37. data/spec/filter_spec.rb +137 -0
  38. data/spec/fixtures/merb.log +84 -0
  39. data/spec/fixtures/multiple_files_1.log +5 -0
  40. data/spec/fixtures/multiple_files_2.log +2 -0
  41. data/spec/fixtures/rails_1x.log +59 -0
  42. data/spec/fixtures/rails_22.log +12 -0
  43. data/spec/fixtures/rails_22_cached.log +10 -0
  44. data/spec/fixtures/rails_unordered.log +24 -0
  45. data/spec/fixtures/syslog_1x.log +5 -0
  46. data/spec/fixtures/test_file_format.log +13 -0
  47. data/spec/fixtures/test_language_combined.log +14 -0
  48. data/spec/fixtures/test_order.log +16 -0
  49. data/spec/line_definition_spec.rb +124 -0
  50. data/spec/log_parser_spec.rb +68 -0
  51. data/spec/log_processor_spec.rb +57 -0
  52. data/spec/merb_format_spec.rb +38 -0
  53. data/spec/rails_format_spec.rb +76 -0
  54. data/spec/request_spec.rb +72 -0
  55. data/spec/spec_helper.rb +67 -0
  56. data/spec/summarizer_spec.rb +9 -0
  57. data/tasks/github-gem.rake +177 -0
  58. data/tasks/request_log_analyzer.rake +10 -0
  59. data/tasks/rspec.rake +6 -0
  60. metadata +135 -0
@@ -0,0 +1,95 @@
1
+ module RequestLogAnalyzer
2
+
3
+ # The Request class represents a parsed request from the log file.
4
+ # Instances are created by the LogParser and are passed to the different aggregators, so they
5
+ # can do their aggregating work.
6
+ #
7
+ # This class provides several methods to access the data that was parsed from the log files.
8
+ # Request#first(field_name) returns the first (only) value corresponding to the given field
9
+ # Request#every(field_name) returns all values corresponding to the given field name as array.
10
+ class Request
11
+
12
+ include RequestLogAnalyzer::FileFormat::Awareness
13
+
14
+ attr_reader :lines
15
+ attr_reader :attributes
16
+
17
+ # Initializes a new Request object.
18
+ # It will apply the the provided FileFormat module to this instance.
19
+ def initialize(file_format)
20
+ @lines = []
21
+ @attributes = {}
22
+ register_file_format(file_format)
23
+ end
24
+
25
+ # Creates a new request that was parsed from the log with the given FileFormat. The hashes
26
+ # that are passed to this function are added as lines to this request.
27
+ def self.create(file_format, *hashes)
28
+ request = self.new(file_format)
29
+ hashes.flatten.each { |hash| request << hash }
30
+ return request
31
+ end
32
+
33
+ # Adds another line to the request.
34
+ # The line should be provides as a hash of the fields parsed from the line.
35
+ def << (request_info_hash)
36
+ @lines << request_info_hash
37
+ @attributes = request_info_hash.merge(@attributes)
38
+ end
39
+
40
+ # Checks whether the given line type was parsed from the log file for this request
41
+ def has_line_type?(line_type)
42
+ return true if @lines.length == 1 && @lines[0][:line_type] == line_type.to_sym
43
+
44
+ @lines.detect { |l| l[:line_type] == line_type.to_sym }
45
+ end
46
+
47
+ alias :=~ :has_line_type?
48
+
49
+ # Returns the value that was captured for the "field" of this request.
50
+ # This function will return the first value that was captured if the field
51
+ # was captured in multiple lines
52
+ def first(field)
53
+ @attributes[field]
54
+ end
55
+
56
+ alias :[] :first
57
+
58
+ # Returns an array of all the "field" values that were captured for this request
59
+ def every(field)
60
+ @lines.inject([]) { |result, fields| result << fields[field] if fields.has_key?(field); result }
61
+ end
62
+
63
+ # Returns true if this request does not yet contain any parsed lines. This should only occur
64
+ # during parsing. An empty request should never be sent to the aggregators
65
+ def empty?
66
+ @lines.length == 0
67
+ end
68
+
69
+ # Checks whether this request is completed. A completed request contains both a parsed header
70
+ # line and a parsed footer line. Not that calling this function in single line mode will always
71
+ # return false.
72
+ def completed?
73
+ header_found, footer_found = false, false
74
+ @lines.each do |line|
75
+ line_def = file_format.line_definitions[line[:line_type]]
76
+ header_found = true if line_def.header
77
+ footer_found = true if line_def.footer
78
+ end
79
+ header_found && footer_found
80
+ end
81
+
82
+ # Returns the first timestamp encountered in a request.
83
+ def timestamp
84
+ first(:timestamp)
85
+ end
86
+
87
+ def first_lineno
88
+ @lines.first[:lineno]
89
+ end
90
+
91
+ def last_lineno
92
+ @lines.last[:lineno]
93
+ end
94
+ end
95
+ end
@@ -0,0 +1,42 @@
1
+ module RequestLogAnalyzer::Source
2
+ class Base
3
+
4
+ include RequestLogAnalyzer::FileFormat::Awareness
5
+
6
+ # A hash of options
7
+ attr_reader :options
8
+
9
+ # The current Request object that is being parsed
10
+ attr_reader :current_request
11
+
12
+ # The total number of parsed lines
13
+ attr_reader :parsed_lines
14
+
15
+ # The total number of parsed requests.
16
+ attr_reader :parsed_requests
17
+
18
+ # The number of skipped lines because of warnings
19
+ attr_reader :skipped_lines
20
+
21
+ # Base source class used to filter input requests.
22
+
23
+ # Initializer
24
+ # <tt>format</tt> The file format
25
+ # <tt>options</tt> Are passed to the filters.
26
+ def initialize(format, options = {})
27
+ @options = options
28
+ register_file_format(format)
29
+ end
30
+
31
+ def prepare
32
+ end
33
+
34
+ def requests(&block)
35
+ return true
36
+ end
37
+
38
+ def finalize
39
+ end
40
+
41
+ end
42
+ end
@@ -0,0 +1,170 @@
1
+ module RequestLogAnalyzer::Source
2
+
3
+ # The LogParser class reads log data from a given source and uses a file format definition
4
+ # to parse all relevent information about requests from the file. A FileFormat module should
5
+ # be provided that contains the definitions of the lines that occur in the log data.
6
+ #
7
+ # De order in which lines occur is used to combine lines to a single request. If these lines
8
+ # are mixed, requests cannot be combined properly. This can be the case if data is written to
9
+ # the log file simultaneously by different mongrel processes. This problem is detected by the
10
+ # parser, but the requests that are mixed up cannot be parsed. It will emit warnings when this
11
+ # occurs.
12
+ class LogFile < RequestLogAnalyzer::Source::Base
13
+
14
+ attr_reader :source_files
15
+
16
+ # Initializes the parser instance.
17
+ # It will apply the language specific FileFormat module to this instance. It will use the line
18
+ # definitions in this module to parse any input.
19
+ def initialize(format, options = {})
20
+ @line_definitions = {}
21
+ @options = options
22
+ @parsed_lines = 0
23
+ @parsed_requests = 0
24
+ @skipped_lines = 0
25
+ @current_io = nil
26
+ @source_files = options[:source_files]
27
+
28
+ # install the file format module (see RequestLogAnalyzer::FileFormat)
29
+ # and register all the line definitions to the parser
30
+ self.register_file_format(format)
31
+ end
32
+
33
+ def requests(options = {}, &block)
34
+
35
+ case @source_files
36
+ when IO;
37
+ puts "Parsing from the standard input. Press CTRL+C to finish."
38
+ parse_stream(@source_files, options, &block)
39
+ when String
40
+ parse_file(@source_files, options, &block)
41
+ when Array
42
+ parse_files(@source_files, options, &block)
43
+ else
44
+ raise "Unknown source provided"
45
+ end
46
+ end
47
+
48
+ # Parses a list of consequent files of the same format
49
+ def parse_files(files, options = {}, &block)
50
+ files.each { |file| parse_file(file, options, &block) }
51
+ end
52
+
53
+ # Parses a file.
54
+ # Creates an IO stream for the provided file, and sends it to parse_io for further handling
55
+ def parse_file(file, options = {}, &block)
56
+ @progress_handler.call(:started, file) if @progress_handler
57
+ File.open(file, 'r') { |f| parse_io(f, options, &block) }
58
+ @progress_handler.call(:finished, file) if @progress_handler
59
+ end
60
+
61
+ def parse_stream(stream, options = {}, &block)
62
+ parse_io(stream, options, &block)
63
+ end
64
+
65
+ # Finds a log line and then parses the information in the line.
66
+ # Yields a hash containing the information found.
67
+ # <tt>*line_types</tt> The log line types to look for (defaults to LOG_LINES.keys).
68
+ # Yeilds a Hash when it encounters a chunk of information.
69
+ def parse_io(io, options = {}, &block)
70
+
71
+ # parse every line type by default
72
+ line_types = options[:line_types] || file_format.line_definitions.keys
73
+
74
+ # check whether all provided line types are valid
75
+ unknown = line_types.reject { |line_type| file_format.line_definitions.has_key?(line_type) }
76
+ raise "Unknown line types: #{unknown.join(', ')}" unless unknown.empty?
77
+
78
+ @current_io = io
79
+ @current_io.each_line do |line|
80
+
81
+ @progress_handler.call(:progress, @current_io.pos) if @progress_handler && @current_io.kind_of?(File)
82
+
83
+ request_data = nil
84
+ line_types.each do |line_type|
85
+ line_type_definition = file_format.line_definitions[line_type]
86
+ break if request_data = line_type_definition.matches(line, @current_io.lineno, self)
87
+ end
88
+
89
+ if request_data
90
+ @parsed_lines += 1
91
+ update_current_request(request_data, &block)
92
+ end
93
+ end
94
+
95
+ warn(:unfinished_request_on_eof, "End of file reached, but last request was not completed!") unless @current_request.nil?
96
+
97
+ @current_io = nil
98
+ end
99
+
100
+ # Add a block to this method to install a progress handler while parsing
101
+ def progress=(proc)
102
+ @progress_handler = proc
103
+ end
104
+
105
+ # Add a block to this method to install a warning handler while parsing
106
+ def warning=(proc)
107
+ @warning_handler = proc
108
+ end
109
+
110
+ # This method is called by the parser if it encounteres any problems.
111
+ # It will call the warning handler. The default controller will pass all warnings to every
112
+ # aggregator that is registered and running
113
+ def warn(type, message)
114
+ @warning_handler.call(type, message, @current_io.lineno) if @warning_handler
115
+ end
116
+
117
+ protected
118
+
119
+ # Combines the different lines of a request into a single Request object. It will start a
120
+ # new request when a header line is encountered en will emit the request when a footer line
121
+ # is encountered.
122
+ #
123
+ # - Every line that is parsed before a header line is ignored as it cannot be included in
124
+ # any request. It will emit a :no_current_request warning.
125
+ # - A header line that is parsed before a request is closed by a footer line, is a sign of
126
+ # an unprpertly ordered file. All data that is gathered for the request until then is
127
+ # discarded, the next request is ignored as well and a :unclosed_request warning is
128
+ # emitted.
129
+ def update_current_request(request_data, &block)
130
+ if header_line?(request_data)
131
+ unless @current_request.nil?
132
+ if options[:assume_correct_order]
133
+ @parsed_requests += 1
134
+ yield @current_request
135
+ @current_request = RequestLogAnalyzer::Request.create(@file_format, request_data)
136
+ else
137
+ @skipped_lines += 1
138
+ warn(:unclosed_request, "Encountered header line, but previous request was not closed!")
139
+ @current_request = nil # remove all data that was parsed, skip next request as well.
140
+ end
141
+ else
142
+ @current_request = RequestLogAnalyzer::Request.create(@file_format, request_data)
143
+ end
144
+ else
145
+ unless @current_request.nil?
146
+ @current_request << request_data
147
+ if footer_line?(request_data)
148
+ @parsed_requests += 1
149
+ yield @current_request
150
+ @current_request = nil
151
+ end
152
+ else
153
+ @skipped_lines += 1
154
+ warn(:no_current_request, "Parsebale line found outside of a request!")
155
+ end
156
+ end
157
+ end
158
+
159
+ # Checks whether a given line hash is a header line.
160
+ def header_line?(hash)
161
+ file_format.line_definitions[hash[:line_type]].header
162
+ end
163
+
164
+ # Checks whether a given line hash is a footer line.
165
+ def footer_line?(hash)
166
+ file_format.line_definitions[hash[:line_type]].footer
167
+ end
168
+ end
169
+
170
+ end
@@ -0,0 +1,54 @@
1
+ module RequestLogAnalyzer
2
+ module Tracker
3
+
4
+ # Base tracker. All other trackers inherit from this class
5
+ #
6
+ # Accepts the following options:
7
+ # * <tt>:line_type</tt> The line type that contains the duration field (determined by the category proc).
8
+ # * <tt>:if</tt> Proc that has to return !nil for a request to be passed to the tracker.
9
+ # * <tt>:output</tt> Direct output here (defaults to STDOUT)
10
+ #
11
+ # For example :if => lambda { |request| request[:duration] && request[:duration] > 1.0 }
12
+ class Base
13
+
14
+ attr_reader :options
15
+
16
+ def initialize(options ={})
17
+ @options = options
18
+ end
19
+
20
+ def prepare
21
+ end
22
+
23
+ def update(request)
24
+ end
25
+
26
+ def finalize
27
+ end
28
+
29
+ def should_update?(request)
30
+ return false if options[:line_type] && !request.has_line_type?(options[:line_type])
31
+
32
+ if options[:if].kind_of?(Symbol)
33
+ return false unless request[options[:if]]
34
+ elsif options[:if].respond_to?(:call)
35
+ return false unless options[:if].call(request)
36
+ end
37
+
38
+ if options[:unless].kind_of?(Symbol)
39
+ return false if request[options[:unless]]
40
+ elsif options[:unless].respond_to?(:call)
41
+ return false if options[:unless].call(request)
42
+ end
43
+
44
+ return true
45
+ end
46
+
47
+ def report(output=STDOUT, report_width = 80, color = false)
48
+ output << self.inspect
49
+ output << "\n"
50
+ end
51
+
52
+ end
53
+ end
54
+ end
@@ -0,0 +1,71 @@
1
+ module RequestLogAnalyzer::Tracker
2
+
3
+ # Catagorize requests.
4
+ # Count and analyze requests for a specific attribute
5
+ #
6
+ # Accepts the following options:
7
+ # * <tt>:line_type</tt> The line type that contains the duration field (determined by the category proc).
8
+ # * <tt>:if</tt> Proc that has to return !nil for a request to be passed to the tracker.
9
+ # * <tt>:title</tt> Title do be displayed above the report.
10
+ # * <tt>:category</tt> Proc that handles the request categorization.
11
+ # * <tt>:amount</tt> The amount of lines in the report
12
+ #
13
+ # The items in the update request hash are set during the creation of the Duration tracker.
14
+ #
15
+ # Example output:
16
+ # HTTP methods
17
+ # ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
18
+ # GET ┃ 22248 hits (46.2%) ┃░░░░░░░░░░░░░░░░░
19
+ # PUT ┃ 13685 hits (28.4%) ┃░░░░░░░░░░░
20
+ # POST ┃ 11662 hits (24.2%) ┃░░░░░░░░░
21
+ # DELETE ┃ 512 hits (1.1%) ┃
22
+ class Category < RequestLogAnalyzer::Tracker::Base
23
+
24
+ attr_reader :categories
25
+
26
+ def prepare
27
+ raise "No categorizer set up for category tracker #{self.inspect}" unless options[:category]
28
+ @categories = {}
29
+ if options[:all_categories].kind_of?(Enumerable)
30
+ options[:all_categories].each { |cat| @categories[cat] = 0 }
31
+ end
32
+ end
33
+
34
+ def update(request)
35
+ cat = options[:category].respond_to?(:call) ? options[:category].call(request) : request[options[:category]]
36
+ if !cat.nil? || options[:nils]
37
+ @categories[cat] ||= 0
38
+ @categories[cat] += 1
39
+ end
40
+ end
41
+
42
+ def report(output = STDOUT, report_width = 80, color = false)
43
+ if options[:title]
44
+ output << "\n#{options[:title]}\n"
45
+ output << green(('━' * report_width), color) + "\n"
46
+ end
47
+
48
+ if @categories.empty?
49
+ output << "None found.\n"
50
+ else
51
+ sorted_categories = @categories.sort { |a, b| b[1] <=> a[1] }
52
+ total_hits = sorted_categories.inject(0) { |carry, item| carry + item[1] }
53
+ sorted_categories = sorted_categories.slice(0...options[:amount]) if options[:amount]
54
+
55
+ adjuster = color ? 33 : 24 # justifcation calcultaion is slight different when color codes are inserterted
56
+ max_cat_length = [sorted_categories.map { |c| c[0].to_s.length }.max, report_width - adjuster].min
57
+ sorted_categories.each do |(cat, count)|
58
+ text = "%-#{max_cat_length+1}s┃%7d hits %s" % [cat.to_s[0..max_cat_length], count, (green("(%0.01f%%)", color) % [(count.to_f / total_hits) * 100])]
59
+ space_left = report_width - (max_cat_length + adjuster + 3)
60
+ if space_left > 3
61
+ bar_chars = (space_left * (count.to_f / total_hits)).round
62
+ output << "%-#{max_cat_length + adjuster}s %s%s" % [text, '┃', '░' * bar_chars] + "\n"
63
+ else
64
+ output << text + "\n"
65
+ end
66
+ end
67
+ end
68
+ end
69
+
70
+ end
71
+ end
@@ -0,0 +1,81 @@
1
+ module RequestLogAnalyzer::Tracker
2
+
3
+ # Analyze the duration of a specific attribute
4
+ #
5
+ # Accepts the following options:
6
+ # * <tt>:line_type</tt> The line type that contains the duration field (determined by the category proc).
7
+ # * <tt>:if</tt> Proc that has to return !nil for a request to be passed to the tracker.
8
+ # * <tt>:title</tt> Title do be displayed above the report
9
+ # * <tt>:category</tt> Proc that handles request categorization for given fileformat (REQUEST_CATEGORIZER)
10
+ # * <tt>:duration</tt> The field containing the duration in the request hash.
11
+ # * <tt>:amount</tt> The amount of lines in the report
12
+ #
13
+ # The items in the update request hash are set during the creation of the Duration tracker.
14
+ #
15
+ # Example output:
16
+ # Request duration - top 20 by cumulative time ┃ Hits ┃ Sum. | Avg.
17
+ # ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
18
+ # EmployeeController#show.html [GET] ┃ 4742 ┃ 4922.56s ┃ 1.04s
19
+ # EmployeeController#update.html [POST] ┃ 4647 ┃ 2731.23s ┃ 0.59s
20
+ # EmployeeController#index.html [GET] ┃ 5802 ┃ 1477.32s ┃ 0.25s
21
+ # .............
22
+ class Duration < RequestLogAnalyzer::Tracker::Base
23
+ attr_reader :categories
24
+
25
+ def prepare
26
+ raise "No duration field set up for category tracker #{self.inspect}" unless options[:duration]
27
+ raise "No categorizer set up for duration tracker #{self.inspect}" unless options[:category]
28
+
29
+ @categories = {}
30
+ end
31
+
32
+ def update(request)
33
+ category = options[:category].respond_to?(:call) ? options[:category].call(request) : request[options[:category]]
34
+ duration = options[:duration].respond_to?(:call) ? options[:duration].call(request) : request[options[:duration]]
35
+
36
+ if !duration.nil? && !category.nil?
37
+ @categories[category] ||= {:count => 0, :total_duration => 0.0}
38
+ @categories[category][:count] += 1
39
+ @categories[category][:total_duration] += duration
40
+ end
41
+ end
42
+
43
+ def report_table(output = STDOUT, amount = 10, options = {}, &block)
44
+
45
+ top_categories = @categories.sort { |a, b| yield(b[1]) <=> yield(a[1]) }.slice(0...amount)
46
+ max_cat_length = top_categories.map { |a| a[0].length }.max || 0
47
+ space_left = [options[:report_width] - 33, [max_cat_length + 1, options[:title].length].max].min
48
+
49
+ output << "\n"
50
+ output << "%-#{space_left+1}s┃ Hits ┃ Sum. | Avg." % [options[:title][0...space_left]] + "\n"
51
+ output << green('━' * options[:report_width], options[:color]) + "\n"
52
+
53
+ top_categories.each do |(cat, info)|
54
+ hits = info[:count]
55
+ total = "%0.02f" % info[:total_duration]
56
+ avg = "%0.02f" % (info[:total_duration] / info[:count])
57
+ output << "%-#{space_left+1}s┃%8d ┃%9ss ┃%9ss" % [cat[0...space_left], hits, total, avg] + "\n"
58
+ end
59
+ end
60
+
61
+ def report(output = STDOUT, report_width = 80, color = false)
62
+
63
+ options[:title] ||= 'Request duration'
64
+ options[:report] ||= [:total, :average]
65
+ options[:top] ||= 20
66
+
67
+ options[:report].each do |report|
68
+ case report
69
+ when :average
70
+ report_table(output, options[:top], :title => "#{options[:title]} - top #{options[:top]} by average time", :color => color, :report_width => report_width) { |request| request[:total_duration] / request[:count] }
71
+ when :total
72
+ report_table(output, options[:top], :title => "#{options[:title]} - top #{options[:top]} by cumulative time", :color => color, :report_width => report_width) { |request| request[:total_duration] }
73
+ when :hits
74
+ report_table(output, options[:top], :title => "#{options[:title]} - top #{options[:top]} by hits", :color => color, :report_width => report_width) { |request| request[:count] }
75
+ else
76
+ output << "Unknown duration report specified\n"
77
+ end
78
+ end
79
+ end
80
+ end
81
+ end