request-log-analyzer 1.0.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (60) hide show
  1. data/DESIGN +14 -0
  2. data/HACKING +7 -0
  3. data/LICENSE +20 -0
  4. data/README.textile +36 -0
  5. data/Rakefile +5 -0
  6. data/bin/request-log-analyzer +123 -0
  7. data/lib/cli/bashcolorizer.rb +60 -0
  8. data/lib/cli/command_line_arguments.rb +301 -0
  9. data/lib/cli/progressbar.rb +236 -0
  10. data/lib/request_log_analyzer.rb +14 -0
  11. data/lib/request_log_analyzer/aggregator/base.rb +45 -0
  12. data/lib/request_log_analyzer/aggregator/database.rb +148 -0
  13. data/lib/request_log_analyzer/aggregator/echo.rb +25 -0
  14. data/lib/request_log_analyzer/aggregator/summarizer.rb +116 -0
  15. data/lib/request_log_analyzer/controller.rb +201 -0
  16. data/lib/request_log_analyzer/file_format.rb +81 -0
  17. data/lib/request_log_analyzer/file_format/merb.rb +33 -0
  18. data/lib/request_log_analyzer/file_format/rails.rb +90 -0
  19. data/lib/request_log_analyzer/filter/base.rb +29 -0
  20. data/lib/request_log_analyzer/filter/field.rb +36 -0
  21. data/lib/request_log_analyzer/filter/timespan.rb +32 -0
  22. data/lib/request_log_analyzer/line_definition.rb +159 -0
  23. data/lib/request_log_analyzer/log_parser.rb +173 -0
  24. data/lib/request_log_analyzer/log_processor.rb +121 -0
  25. data/lib/request_log_analyzer/request.rb +95 -0
  26. data/lib/request_log_analyzer/source/base.rb +42 -0
  27. data/lib/request_log_analyzer/source/log_file.rb +170 -0
  28. data/lib/request_log_analyzer/tracker/base.rb +54 -0
  29. data/lib/request_log_analyzer/tracker/category.rb +71 -0
  30. data/lib/request_log_analyzer/tracker/duration.rb +81 -0
  31. data/lib/request_log_analyzer/tracker/hourly_spread.rb +80 -0
  32. data/lib/request_log_analyzer/tracker/timespan.rb +54 -0
  33. data/spec/controller_spec.rb +40 -0
  34. data/spec/database_inserter_spec.rb +101 -0
  35. data/spec/file_format_spec.rb +78 -0
  36. data/spec/file_formats/spec_format.rb +26 -0
  37. data/spec/filter_spec.rb +137 -0
  38. data/spec/fixtures/merb.log +84 -0
  39. data/spec/fixtures/multiple_files_1.log +5 -0
  40. data/spec/fixtures/multiple_files_2.log +2 -0
  41. data/spec/fixtures/rails_1x.log +59 -0
  42. data/spec/fixtures/rails_22.log +12 -0
  43. data/spec/fixtures/rails_22_cached.log +10 -0
  44. data/spec/fixtures/rails_unordered.log +24 -0
  45. data/spec/fixtures/syslog_1x.log +5 -0
  46. data/spec/fixtures/test_file_format.log +13 -0
  47. data/spec/fixtures/test_language_combined.log +14 -0
  48. data/spec/fixtures/test_order.log +16 -0
  49. data/spec/line_definition_spec.rb +124 -0
  50. data/spec/log_parser_spec.rb +68 -0
  51. data/spec/log_processor_spec.rb +57 -0
  52. data/spec/merb_format_spec.rb +38 -0
  53. data/spec/rails_format_spec.rb +76 -0
  54. data/spec/request_spec.rb +72 -0
  55. data/spec/spec_helper.rb +67 -0
  56. data/spec/summarizer_spec.rb +9 -0
  57. data/tasks/github-gem.rake +177 -0
  58. data/tasks/request_log_analyzer.rake +10 -0
  59. data/tasks/rspec.rake +6 -0
  60. metadata +135 -0
@@ -0,0 +1,95 @@
1
+ module RequestLogAnalyzer
2
+
3
+ # The Request class represents a parsed request from the log file.
4
+ # Instances are created by the LogParser and are passed to the different aggregators, so they
5
+ # can do their aggregating work.
6
+ #
7
+ # This class provides several methods to access the data that was parsed from the log files.
8
+ # Request#first(field_name) returns the first (only) value corresponding to the given field
9
+ # Request#every(field_name) returns all values corresponding to the given field name as array.
10
+ class Request
11
+
12
+ include RequestLogAnalyzer::FileFormat::Awareness
13
+
14
+ attr_reader :lines
15
+ attr_reader :attributes
16
+
17
+ # Initializes a new Request object.
18
+ # It will apply the the provided FileFormat module to this instance.
19
+ def initialize(file_format)
20
+ @lines = []
21
+ @attributes = {}
22
+ register_file_format(file_format)
23
+ end
24
+
25
+ # Creates a new request that was parsed from the log with the given FileFormat. The hashes
26
+ # that are passed to this function are added as lines to this request.
27
+ def self.create(file_format, *hashes)
28
+ request = self.new(file_format)
29
+ hashes.flatten.each { |hash| request << hash }
30
+ return request
31
+ end
32
+
33
+ # Adds another line to the request.
34
+ # The line should be provides as a hash of the fields parsed from the line.
35
+ def << (request_info_hash)
36
+ @lines << request_info_hash
37
+ @attributes = request_info_hash.merge(@attributes)
38
+ end
39
+
40
+ # Checks whether the given line type was parsed from the log file for this request
41
+ def has_line_type?(line_type)
42
+ return true if @lines.length == 1 && @lines[0][:line_type] == line_type.to_sym
43
+
44
+ @lines.detect { |l| l[:line_type] == line_type.to_sym }
45
+ end
46
+
47
+ alias :=~ :has_line_type?
48
+
49
+ # Returns the value that was captured for the "field" of this request.
50
+ # This function will return the first value that was captured if the field
51
+ # was captured in multiple lines
52
+ def first(field)
53
+ @attributes[field]
54
+ end
55
+
56
+ alias :[] :first
57
+
58
+ # Returns an array of all the "field" values that were captured for this request
59
+ def every(field)
60
+ @lines.inject([]) { |result, fields| result << fields[field] if fields.has_key?(field); result }
61
+ end
62
+
63
+ # Returns true if this request does not yet contain any parsed lines. This should only occur
64
+ # during parsing. An empty request should never be sent to the aggregators
65
+ def empty?
66
+ @lines.length == 0
67
+ end
68
+
69
+ # Checks whether this request is completed. A completed request contains both a parsed header
70
+ # line and a parsed footer line. Not that calling this function in single line mode will always
71
+ # return false.
72
+ def completed?
73
+ header_found, footer_found = false, false
74
+ @lines.each do |line|
75
+ line_def = file_format.line_definitions[line[:line_type]]
76
+ header_found = true if line_def.header
77
+ footer_found = true if line_def.footer
78
+ end
79
+ header_found && footer_found
80
+ end
81
+
82
+ # Returns the first timestamp encountered in a request.
83
+ def timestamp
84
+ first(:timestamp)
85
+ end
86
+
87
+ def first_lineno
88
+ @lines.first[:lineno]
89
+ end
90
+
91
+ def last_lineno
92
+ @lines.last[:lineno]
93
+ end
94
+ end
95
+ end
@@ -0,0 +1,42 @@
1
+ module RequestLogAnalyzer::Source
2
+ class Base
3
+
4
+ include RequestLogAnalyzer::FileFormat::Awareness
5
+
6
+ # A hash of options
7
+ attr_reader :options
8
+
9
+ # The current Request object that is being parsed
10
+ attr_reader :current_request
11
+
12
+ # The total number of parsed lines
13
+ attr_reader :parsed_lines
14
+
15
+ # The total number of parsed requests.
16
+ attr_reader :parsed_requests
17
+
18
+ # The number of skipped lines because of warnings
19
+ attr_reader :skipped_lines
20
+
21
+ # Base source class used to filter input requests.
22
+
23
+ # Initializer
24
+ # <tt>format</tt> The file format
25
+ # <tt>options</tt> Are passed to the filters.
26
+ def initialize(format, options = {})
27
+ @options = options
28
+ register_file_format(format)
29
+ end
30
+
31
+ def prepare
32
+ end
33
+
34
+ def requests(&block)
35
+ return true
36
+ end
37
+
38
+ def finalize
39
+ end
40
+
41
+ end
42
+ end
@@ -0,0 +1,170 @@
1
+ module RequestLogAnalyzer::Source
2
+
3
+ # The LogParser class reads log data from a given source and uses a file format definition
4
+ # to parse all relevent information about requests from the file. A FileFormat module should
5
+ # be provided that contains the definitions of the lines that occur in the log data.
6
+ #
7
+ # De order in which lines occur is used to combine lines to a single request. If these lines
8
+ # are mixed, requests cannot be combined properly. This can be the case if data is written to
9
+ # the log file simultaneously by different mongrel processes. This problem is detected by the
10
+ # parser, but the requests that are mixed up cannot be parsed. It will emit warnings when this
11
+ # occurs.
12
+ class LogFile < RequestLogAnalyzer::Source::Base
13
+
14
+ attr_reader :source_files
15
+
16
+ # Initializes the parser instance.
17
+ # It will apply the language specific FileFormat module to this instance. It will use the line
18
+ # definitions in this module to parse any input.
19
+ def initialize(format, options = {})
20
+ @line_definitions = {}
21
+ @options = options
22
+ @parsed_lines = 0
23
+ @parsed_requests = 0
24
+ @skipped_lines = 0
25
+ @current_io = nil
26
+ @source_files = options[:source_files]
27
+
28
+ # install the file format module (see RequestLogAnalyzer::FileFormat)
29
+ # and register all the line definitions to the parser
30
+ self.register_file_format(format)
31
+ end
32
+
33
+ def requests(options = {}, &block)
34
+
35
+ case @source_files
36
+ when IO;
37
+ puts "Parsing from the standard input. Press CTRL+C to finish."
38
+ parse_stream(@source_files, options, &block)
39
+ when String
40
+ parse_file(@source_files, options, &block)
41
+ when Array
42
+ parse_files(@source_files, options, &block)
43
+ else
44
+ raise "Unknown source provided"
45
+ end
46
+ end
47
+
48
+ # Parses a list of consequent files of the same format
49
+ def parse_files(files, options = {}, &block)
50
+ files.each { |file| parse_file(file, options, &block) }
51
+ end
52
+
53
+ # Parses a file.
54
+ # Creates an IO stream for the provided file, and sends it to parse_io for further handling
55
+ def parse_file(file, options = {}, &block)
56
+ @progress_handler.call(:started, file) if @progress_handler
57
+ File.open(file, 'r') { |f| parse_io(f, options, &block) }
58
+ @progress_handler.call(:finished, file) if @progress_handler
59
+ end
60
+
61
+ def parse_stream(stream, options = {}, &block)
62
+ parse_io(stream, options, &block)
63
+ end
64
+
65
+ # Finds a log line and then parses the information in the line.
66
+ # Yields a hash containing the information found.
67
+ # <tt>*line_types</tt> The log line types to look for (defaults to LOG_LINES.keys).
68
+ # Yeilds a Hash when it encounters a chunk of information.
69
+ def parse_io(io, options = {}, &block)
70
+
71
+ # parse every line type by default
72
+ line_types = options[:line_types] || file_format.line_definitions.keys
73
+
74
+ # check whether all provided line types are valid
75
+ unknown = line_types.reject { |line_type| file_format.line_definitions.has_key?(line_type) }
76
+ raise "Unknown line types: #{unknown.join(', ')}" unless unknown.empty?
77
+
78
+ @current_io = io
79
+ @current_io.each_line do |line|
80
+
81
+ @progress_handler.call(:progress, @current_io.pos) if @progress_handler && @current_io.kind_of?(File)
82
+
83
+ request_data = nil
84
+ line_types.each do |line_type|
85
+ line_type_definition = file_format.line_definitions[line_type]
86
+ break if request_data = line_type_definition.matches(line, @current_io.lineno, self)
87
+ end
88
+
89
+ if request_data
90
+ @parsed_lines += 1
91
+ update_current_request(request_data, &block)
92
+ end
93
+ end
94
+
95
+ warn(:unfinished_request_on_eof, "End of file reached, but last request was not completed!") unless @current_request.nil?
96
+
97
+ @current_io = nil
98
+ end
99
+
100
+ # Add a block to this method to install a progress handler while parsing
101
+ def progress=(proc)
102
+ @progress_handler = proc
103
+ end
104
+
105
+ # Add a block to this method to install a warning handler while parsing
106
+ def warning=(proc)
107
+ @warning_handler = proc
108
+ end
109
+
110
+ # This method is called by the parser if it encounteres any problems.
111
+ # It will call the warning handler. The default controller will pass all warnings to every
112
+ # aggregator that is registered and running
113
+ def warn(type, message)
114
+ @warning_handler.call(type, message, @current_io.lineno) if @warning_handler
115
+ end
116
+
117
+ protected
118
+
119
+ # Combines the different lines of a request into a single Request object. It will start a
120
+ # new request when a header line is encountered en will emit the request when a footer line
121
+ # is encountered.
122
+ #
123
+ # - Every line that is parsed before a header line is ignored as it cannot be included in
124
+ # any request. It will emit a :no_current_request warning.
125
+ # - A header line that is parsed before a request is closed by a footer line, is a sign of
126
+ # an unprpertly ordered file. All data that is gathered for the request until then is
127
+ # discarded, the next request is ignored as well and a :unclosed_request warning is
128
+ # emitted.
129
+ def update_current_request(request_data, &block)
130
+ if header_line?(request_data)
131
+ unless @current_request.nil?
132
+ if options[:assume_correct_order]
133
+ @parsed_requests += 1
134
+ yield @current_request
135
+ @current_request = RequestLogAnalyzer::Request.create(@file_format, request_data)
136
+ else
137
+ @skipped_lines += 1
138
+ warn(:unclosed_request, "Encountered header line, but previous request was not closed!")
139
+ @current_request = nil # remove all data that was parsed, skip next request as well.
140
+ end
141
+ else
142
+ @current_request = RequestLogAnalyzer::Request.create(@file_format, request_data)
143
+ end
144
+ else
145
+ unless @current_request.nil?
146
+ @current_request << request_data
147
+ if footer_line?(request_data)
148
+ @parsed_requests += 1
149
+ yield @current_request
150
+ @current_request = nil
151
+ end
152
+ else
153
+ @skipped_lines += 1
154
+ warn(:no_current_request, "Parsebale line found outside of a request!")
155
+ end
156
+ end
157
+ end
158
+
159
+ # Checks whether a given line hash is a header line.
160
+ def header_line?(hash)
161
+ file_format.line_definitions[hash[:line_type]].header
162
+ end
163
+
164
+ # Checks whether a given line hash is a footer line.
165
+ def footer_line?(hash)
166
+ file_format.line_definitions[hash[:line_type]].footer
167
+ end
168
+ end
169
+
170
+ end
@@ -0,0 +1,54 @@
1
+ module RequestLogAnalyzer
2
+ module Tracker
3
+
4
+ # Base tracker. All other trackers inherit from this class
5
+ #
6
+ # Accepts the following options:
7
+ # * <tt>:line_type</tt> The line type that contains the duration field (determined by the category proc).
8
+ # * <tt>:if</tt> Proc that has to return !nil for a request to be passed to the tracker.
9
+ # * <tt>:output</tt> Direct output here (defaults to STDOUT)
10
+ #
11
+ # For example :if => lambda { |request| request[:duration] && request[:duration] > 1.0 }
12
+ class Base
13
+
14
+ attr_reader :options
15
+
16
+ def initialize(options ={})
17
+ @options = options
18
+ end
19
+
20
+ def prepare
21
+ end
22
+
23
+ def update(request)
24
+ end
25
+
26
+ def finalize
27
+ end
28
+
29
+ def should_update?(request)
30
+ return false if options[:line_type] && !request.has_line_type?(options[:line_type])
31
+
32
+ if options[:if].kind_of?(Symbol)
33
+ return false unless request[options[:if]]
34
+ elsif options[:if].respond_to?(:call)
35
+ return false unless options[:if].call(request)
36
+ end
37
+
38
+ if options[:unless].kind_of?(Symbol)
39
+ return false if request[options[:unless]]
40
+ elsif options[:unless].respond_to?(:call)
41
+ return false if options[:unless].call(request)
42
+ end
43
+
44
+ return true
45
+ end
46
+
47
+ def report(output=STDOUT, report_width = 80, color = false)
48
+ output << self.inspect
49
+ output << "\n"
50
+ end
51
+
52
+ end
53
+ end
54
+ end
@@ -0,0 +1,71 @@
1
+ module RequestLogAnalyzer::Tracker
2
+
3
+ # Catagorize requests.
4
+ # Count and analyze requests for a specific attribute
5
+ #
6
+ # Accepts the following options:
7
+ # * <tt>:line_type</tt> The line type that contains the duration field (determined by the category proc).
8
+ # * <tt>:if</tt> Proc that has to return !nil for a request to be passed to the tracker.
9
+ # * <tt>:title</tt> Title do be displayed above the report.
10
+ # * <tt>:category</tt> Proc that handles the request categorization.
11
+ # * <tt>:amount</tt> The amount of lines in the report
12
+ #
13
+ # The items in the update request hash are set during the creation of the Duration tracker.
14
+ #
15
+ # Example output:
16
+ # HTTP methods
17
+ # ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
18
+ # GET ┃ 22248 hits (46.2%) ┃░░░░░░░░░░░░░░░░░
19
+ # PUT ┃ 13685 hits (28.4%) ┃░░░░░░░░░░░
20
+ # POST ┃ 11662 hits (24.2%) ┃░░░░░░░░░
21
+ # DELETE ┃ 512 hits (1.1%) ┃
22
+ class Category < RequestLogAnalyzer::Tracker::Base
23
+
24
+ attr_reader :categories
25
+
26
+ def prepare
27
+ raise "No categorizer set up for category tracker #{self.inspect}" unless options[:category]
28
+ @categories = {}
29
+ if options[:all_categories].kind_of?(Enumerable)
30
+ options[:all_categories].each { |cat| @categories[cat] = 0 }
31
+ end
32
+ end
33
+
34
+ def update(request)
35
+ cat = options[:category].respond_to?(:call) ? options[:category].call(request) : request[options[:category]]
36
+ if !cat.nil? || options[:nils]
37
+ @categories[cat] ||= 0
38
+ @categories[cat] += 1
39
+ end
40
+ end
41
+
42
+ def report(output = STDOUT, report_width = 80, color = false)
43
+ if options[:title]
44
+ output << "\n#{options[:title]}\n"
45
+ output << green(('━' * report_width), color) + "\n"
46
+ end
47
+
48
+ if @categories.empty?
49
+ output << "None found.\n"
50
+ else
51
+ sorted_categories = @categories.sort { |a, b| b[1] <=> a[1] }
52
+ total_hits = sorted_categories.inject(0) { |carry, item| carry + item[1] }
53
+ sorted_categories = sorted_categories.slice(0...options[:amount]) if options[:amount]
54
+
55
+ adjuster = color ? 33 : 24 # justifcation calcultaion is slight different when color codes are inserterted
56
+ max_cat_length = [sorted_categories.map { |c| c[0].to_s.length }.max, report_width - adjuster].min
57
+ sorted_categories.each do |(cat, count)|
58
+ text = "%-#{max_cat_length+1}s┃%7d hits %s" % [cat.to_s[0..max_cat_length], count, (green("(%0.01f%%)", color) % [(count.to_f / total_hits) * 100])]
59
+ space_left = report_width - (max_cat_length + adjuster + 3)
60
+ if space_left > 3
61
+ bar_chars = (space_left * (count.to_f / total_hits)).round
62
+ output << "%-#{max_cat_length + adjuster}s %s%s" % [text, '┃', '░' * bar_chars] + "\n"
63
+ else
64
+ output << text + "\n"
65
+ end
66
+ end
67
+ end
68
+ end
69
+
70
+ end
71
+ end
@@ -0,0 +1,81 @@
1
+ module RequestLogAnalyzer::Tracker
2
+
3
+ # Analyze the duration of a specific attribute
4
+ #
5
+ # Accepts the following options:
6
+ # * <tt>:line_type</tt> The line type that contains the duration field (determined by the category proc).
7
+ # * <tt>:if</tt> Proc that has to return !nil for a request to be passed to the tracker.
8
+ # * <tt>:title</tt> Title do be displayed above the report
9
+ # * <tt>:category</tt> Proc that handles request categorization for given fileformat (REQUEST_CATEGORIZER)
10
+ # * <tt>:duration</tt> The field containing the duration in the request hash.
11
+ # * <tt>:amount</tt> The amount of lines in the report
12
+ #
13
+ # The items in the update request hash are set during the creation of the Duration tracker.
14
+ #
15
+ # Example output:
16
+ # Request duration - top 20 by cumulative time ┃ Hits ┃ Sum. | Avg.
17
+ # ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
18
+ # EmployeeController#show.html [GET] ┃ 4742 ┃ 4922.56s ┃ 1.04s
19
+ # EmployeeController#update.html [POST] ┃ 4647 ┃ 2731.23s ┃ 0.59s
20
+ # EmployeeController#index.html [GET] ┃ 5802 ┃ 1477.32s ┃ 0.25s
21
+ # .............
22
+ class Duration < RequestLogAnalyzer::Tracker::Base
23
+ attr_reader :categories
24
+
25
+ def prepare
26
+ raise "No duration field set up for category tracker #{self.inspect}" unless options[:duration]
27
+ raise "No categorizer set up for duration tracker #{self.inspect}" unless options[:category]
28
+
29
+ @categories = {}
30
+ end
31
+
32
+ def update(request)
33
+ category = options[:category].respond_to?(:call) ? options[:category].call(request) : request[options[:category]]
34
+ duration = options[:duration].respond_to?(:call) ? options[:duration].call(request) : request[options[:duration]]
35
+
36
+ if !duration.nil? && !category.nil?
37
+ @categories[category] ||= {:count => 0, :total_duration => 0.0}
38
+ @categories[category][:count] += 1
39
+ @categories[category][:total_duration] += duration
40
+ end
41
+ end
42
+
43
+ def report_table(output = STDOUT, amount = 10, options = {}, &block)
44
+
45
+ top_categories = @categories.sort { |a, b| yield(b[1]) <=> yield(a[1]) }.slice(0...amount)
46
+ max_cat_length = top_categories.map { |a| a[0].length }.max || 0
47
+ space_left = [options[:report_width] - 33, [max_cat_length + 1, options[:title].length].max].min
48
+
49
+ output << "\n"
50
+ output << "%-#{space_left+1}s┃ Hits ┃ Sum. | Avg." % [options[:title][0...space_left]] + "\n"
51
+ output << green('━' * options[:report_width], options[:color]) + "\n"
52
+
53
+ top_categories.each do |(cat, info)|
54
+ hits = info[:count]
55
+ total = "%0.02f" % info[:total_duration]
56
+ avg = "%0.02f" % (info[:total_duration] / info[:count])
57
+ output << "%-#{space_left+1}s┃%8d ┃%9ss ┃%9ss" % [cat[0...space_left], hits, total, avg] + "\n"
58
+ end
59
+ end
60
+
61
+ def report(output = STDOUT, report_width = 80, color = false)
62
+
63
+ options[:title] ||= 'Request duration'
64
+ options[:report] ||= [:total, :average]
65
+ options[:top] ||= 20
66
+
67
+ options[:report].each do |report|
68
+ case report
69
+ when :average
70
+ report_table(output, options[:top], :title => "#{options[:title]} - top #{options[:top]} by average time", :color => color, :report_width => report_width) { |request| request[:total_duration] / request[:count] }
71
+ when :total
72
+ report_table(output, options[:top], :title => "#{options[:title]} - top #{options[:top]} by cumulative time", :color => color, :report_width => report_width) { |request| request[:total_duration] }
73
+ when :hits
74
+ report_table(output, options[:top], :title => "#{options[:title]} - top #{options[:top]} by hits", :color => color, :report_width => report_width) { |request| request[:count] }
75
+ else
76
+ output << "Unknown duration report specified\n"
77
+ end
78
+ end
79
+ end
80
+ end
81
+ end