ngmoco-request-log-analyzer 1.4.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (112) hide show
  1. data/.gitignore +10 -0
  2. data/DESIGN.rdoc +41 -0
  3. data/LICENSE +20 -0
  4. data/README.rdoc +39 -0
  5. data/Rakefile +8 -0
  6. data/bin/request-log-analyzer +114 -0
  7. data/lib/cli/command_line_arguments.rb +301 -0
  8. data/lib/cli/database_console.rb +26 -0
  9. data/lib/cli/database_console_init.rb +43 -0
  10. data/lib/cli/progressbar.rb +213 -0
  11. data/lib/cli/tools.rb +46 -0
  12. data/lib/request_log_analyzer.rb +44 -0
  13. data/lib/request_log_analyzer/aggregator.rb +49 -0
  14. data/lib/request_log_analyzer/aggregator/database_inserter.rb +83 -0
  15. data/lib/request_log_analyzer/aggregator/echo.rb +29 -0
  16. data/lib/request_log_analyzer/aggregator/summarizer.rb +175 -0
  17. data/lib/request_log_analyzer/controller.rb +332 -0
  18. data/lib/request_log_analyzer/database.rb +102 -0
  19. data/lib/request_log_analyzer/database/base.rb +115 -0
  20. data/lib/request_log_analyzer/database/connection.rb +38 -0
  21. data/lib/request_log_analyzer/database/request.rb +22 -0
  22. data/lib/request_log_analyzer/database/source.rb +13 -0
  23. data/lib/request_log_analyzer/database/warning.rb +14 -0
  24. data/lib/request_log_analyzer/file_format.rb +160 -0
  25. data/lib/request_log_analyzer/file_format/amazon_s3.rb +71 -0
  26. data/lib/request_log_analyzer/file_format/apache.rb +141 -0
  27. data/lib/request_log_analyzer/file_format/merb.rb +67 -0
  28. data/lib/request_log_analyzer/file_format/rack.rb +11 -0
  29. data/lib/request_log_analyzer/file_format/rails.rb +176 -0
  30. data/lib/request_log_analyzer/file_format/rails_development.rb +12 -0
  31. data/lib/request_log_analyzer/filter.rb +30 -0
  32. data/lib/request_log_analyzer/filter/anonymize.rb +39 -0
  33. data/lib/request_log_analyzer/filter/field.rb +42 -0
  34. data/lib/request_log_analyzer/filter/timespan.rb +45 -0
  35. data/lib/request_log_analyzer/line_definition.rb +111 -0
  36. data/lib/request_log_analyzer/log_processor.rb +99 -0
  37. data/lib/request_log_analyzer/mailer.rb +62 -0
  38. data/lib/request_log_analyzer/output.rb +113 -0
  39. data/lib/request_log_analyzer/output/fixed_width.rb +220 -0
  40. data/lib/request_log_analyzer/output/html.rb +184 -0
  41. data/lib/request_log_analyzer/request.rb +175 -0
  42. data/lib/request_log_analyzer/source.rb +72 -0
  43. data/lib/request_log_analyzer/source/database_loader.rb +87 -0
  44. data/lib/request_log_analyzer/source/log_parser.rb +274 -0
  45. data/lib/request_log_analyzer/tracker.rb +206 -0
  46. data/lib/request_log_analyzer/tracker/duration.rb +104 -0
  47. data/lib/request_log_analyzer/tracker/frequency.rb +95 -0
  48. data/lib/request_log_analyzer/tracker/hourly_spread.rb +107 -0
  49. data/lib/request_log_analyzer/tracker/timespan.rb +81 -0
  50. data/lib/request_log_analyzer/tracker/traffic.rb +106 -0
  51. data/request-log-analyzer.gemspec +40 -0
  52. data/spec/database.yml +23 -0
  53. data/spec/fixtures/apache_combined.log +5 -0
  54. data/spec/fixtures/apache_common.log +10 -0
  55. data/spec/fixtures/decompression.log +12 -0
  56. data/spec/fixtures/decompression.log.bz2 +0 -0
  57. data/spec/fixtures/decompression.log.gz +0 -0
  58. data/spec/fixtures/decompression.log.zip +0 -0
  59. data/spec/fixtures/decompression.tar.gz +0 -0
  60. data/spec/fixtures/decompression.tgz +0 -0
  61. data/spec/fixtures/header_and_footer.log +6 -0
  62. data/spec/fixtures/merb.log +84 -0
  63. data/spec/fixtures/merb_prefixed.log +9 -0
  64. data/spec/fixtures/multiple_files_1.log +5 -0
  65. data/spec/fixtures/multiple_files_2.log +2 -0
  66. data/spec/fixtures/rails.db +0 -0
  67. data/spec/fixtures/rails_1x.log +59 -0
  68. data/spec/fixtures/rails_22.log +12 -0
  69. data/spec/fixtures/rails_22_cached.log +10 -0
  70. data/spec/fixtures/rails_unordered.log +24 -0
  71. data/spec/fixtures/syslog_1x.log +5 -0
  72. data/spec/fixtures/test_file_format.log +13 -0
  73. data/spec/fixtures/test_language_combined.log +14 -0
  74. data/spec/fixtures/test_order.log +16 -0
  75. data/spec/integration/command_line_usage_spec.rb +84 -0
  76. data/spec/integration/munin_plugins_rails_spec.rb +58 -0
  77. data/spec/integration/scout_spec.rb +151 -0
  78. data/spec/lib/helpers.rb +52 -0
  79. data/spec/lib/macros.rb +18 -0
  80. data/spec/lib/matchers.rb +77 -0
  81. data/spec/lib/mocks.rb +76 -0
  82. data/spec/lib/testing_format.rb +46 -0
  83. data/spec/spec_helper.rb +24 -0
  84. data/spec/unit/aggregator/database_inserter_spec.rb +93 -0
  85. data/spec/unit/aggregator/summarizer_spec.rb +26 -0
  86. data/spec/unit/controller/controller_spec.rb +41 -0
  87. data/spec/unit/controller/log_processor_spec.rb +18 -0
  88. data/spec/unit/database/base_class_spec.rb +183 -0
  89. data/spec/unit/database/connection_spec.rb +34 -0
  90. data/spec/unit/database/database_spec.rb +133 -0
  91. data/spec/unit/file_format/amazon_s3_format_spec.rb +49 -0
  92. data/spec/unit/file_format/apache_format_spec.rb +203 -0
  93. data/spec/unit/file_format/file_format_api_spec.rb +69 -0
  94. data/spec/unit/file_format/line_definition_spec.rb +75 -0
  95. data/spec/unit/file_format/merb_format_spec.rb +52 -0
  96. data/spec/unit/file_format/rails_format_spec.rb +164 -0
  97. data/spec/unit/filter/anonymize_filter_spec.rb +21 -0
  98. data/spec/unit/filter/field_filter_spec.rb +66 -0
  99. data/spec/unit/filter/filter_spec.rb +17 -0
  100. data/spec/unit/filter/timespan_filter_spec.rb +58 -0
  101. data/spec/unit/mailer_spec.rb +30 -0
  102. data/spec/unit/request_spec.rb +111 -0
  103. data/spec/unit/source/log_parser_spec.rb +119 -0
  104. data/spec/unit/tracker/duration_tracker_spec.rb +130 -0
  105. data/spec/unit/tracker/frequency_tracker_spec.rb +88 -0
  106. data/spec/unit/tracker/hourly_spread_spec.rb +79 -0
  107. data/spec/unit/tracker/timespan_tracker_spec.rb +73 -0
  108. data/spec/unit/tracker/tracker_api_spec.rb +124 -0
  109. data/spec/unit/tracker/traffic_tracker_spec.rb +107 -0
  110. data/tasks/github-gem.rake +323 -0
  111. data/tasks/request_log_analyzer.rake +26 -0
  112. metadata +220 -0
@@ -0,0 +1,175 @@
1
+ module RequestLogAnalyzer
2
+
3
+ # The Request class represents a parsed request from the log file.
4
+ # Instances are created by the LogParser and are passed to the different aggregators, so they
5
+ # can do their aggregating work.
6
+ #
7
+ # This class provides several methods to access the data that was parsed from the log files.
8
+ # Request#first(field_name) returns the first (only) value corresponding to the given field
9
+ # Request#every(field_name) returns all values corresponding to the given field name as array.
10
+ class Request
11
+
12
+ module Converters
13
+
14
+ # Default converter function, which converts the parsed strings to a native Ruby type
15
+ # using the type indication in the line definition. It will use a custom connverter
16
+ # method if one is available.
17
+ def convert_value(value, capture_definition)
18
+ return capture_definition[:default] if value.nil?
19
+ custom_converter_method = :"convert_#{capture_definition[:type]}"
20
+ send(custom_converter_method, value, capture_definition)
21
+ end
22
+
23
+ def convert_string(value, capture_definition); value; end
24
+ def convert_float(value, capture_definition); value.to_f; end
25
+ def convert_decimal(value, capture_definition); value.to_f; end
26
+ def convert_int(value, capture_definition); value.to_i; end
27
+ def convert_integer(value, capture_definition); value.to_i; end
28
+ def convert_sym(value, capture_definition); value.to_sym; end
29
+ def convert_symbol(value, capture_definition); value.to_sym; end
30
+
31
+ # Converts :eval field, which should evaluate to a hash.
32
+ def convert_eval(value, capture_definition)
33
+ eval(value).inject({}) { |h, (k, v)| h[k.to_sym] = v; h}
34
+ rescue SyntaxError
35
+ nil
36
+ end
37
+
38
+ # Slow default method to parse timestamps.
39
+ # Reimplement this function in a file format specific Request class
40
+ # to improve the timestamp parsing speed.
41
+ def convert_timestamp(value, capture_definition)
42
+ DateTime.parse(value).strftime('%Y%m%d%H%M%S').to_i
43
+ end
44
+
45
+ # Converts traffic fields to (whole) bytes based on the given unit.
46
+ def convert_traffic(value, capture_definition)
47
+ case capture_definition[:unit]
48
+ when nil, :b, :B, :byte then value.to_i
49
+ when :GB, :G, :gigabyte then (value.to_f * 1000_000_000).round
50
+ when :GiB, :gibibyte then (value.to_f * (2 ** 30)).round
51
+ when :MB, :M, :megabyte then (value.to_f * 1000_000).round
52
+ when :MiB, :mebibyte then (value.to_f * (2 ** 20)).round
53
+ when :KB, :K, :kilobyte, :kB then (value.to_f * 1000).round
54
+ when :KiB, :kibibyte then (value.to_f * (2 ** 10)).round
55
+ else raise "Unknown traffic unit"
56
+ end
57
+ end
58
+
59
+ # Convert duration fields to float, and make sure the values are in seconds.
60
+ def convert_duration(value, capture_definition)
61
+ case capture_definition[:unit]
62
+ when nil, :sec, :s then value.to_f
63
+ when :microsec, :musec then value.to_f / 1000000.0
64
+ when :msec, :millisec then value.to_f / 1000.0
65
+ else raise "Unknown duration unit"
66
+ end
67
+ end
68
+ end
69
+
70
+ # Install the default converter methods
71
+ include Converters
72
+
73
+ attr_reader :lines, :attributes, :file_format
74
+
75
+ # Initializes a new Request object.
76
+ # It will apply the the provided FileFormat module to this instance.
77
+ def initialize(file_format, attributes = {})
78
+ @lines = []
79
+ @attributes = attributes
80
+ @file_format = file_format
81
+ end
82
+
83
+ # Creates a new request that was parsed from the log with the given FileFormat. The hashes
84
+ # that are passed to this function are added as lines to this request.
85
+ def self.create(file_format, *hashes)
86
+ request = self.new(file_format)
87
+ hashes.flatten.each { |hash| request << hash }
88
+ return request
89
+ end
90
+
91
+ # Adds another line to the request when it is parsed in the LogParser.
92
+ #
93
+ # The line should be provided as a hash with the attributes line_definition, :captures,
94
+ # :lineno and :source set. This function is called from LogParser.
95
+ def add_parsed_line (parsed_line)
96
+ value_hash = parsed_line[:line_definition].convert_captured_values(parsed_line[:captures], self)
97
+ value_hash[:line_type] = parsed_line[:line_definition].name
98
+ value_hash[:lineno] = parsed_line[:lineno]
99
+ value_hash[:source] = parsed_line[:source]
100
+ add_line_hash(value_hash)
101
+ end
102
+
103
+ # Adds another line to the request using a plain hash.
104
+ #
105
+ # The line should be provides as a hash of the fields parsed from the line.
106
+ def add_line_hash(value_hash)
107
+ @lines << value_hash
108
+ @attributes = value_hash.merge(@attributes)
109
+ end
110
+
111
+ # Adds another line to the request. This method switches automatically between
112
+ # the add_line_hash and add_parsed_line based on the keys of the provided hash.
113
+ def <<(hash)
114
+ hash[:line_definition] ? add_parsed_line(hash) : add_line_hash(hash)
115
+ end
116
+
117
+ # Checks whether the given line type was parsed from the log file for this request
118
+ def has_line_type?(line_type)
119
+ return true if @lines.length == 1 && @lines[0][:line_type] == line_type.to_sym
120
+ @lines.detect { |l| l[:line_type] == line_type.to_sym }
121
+ end
122
+
123
+ alias :=~ :has_line_type?
124
+
125
+ # Returns the value that was captured for the "field" of this request.
126
+ # This function will return the first value that was captured if the field
127
+ # was captured in multiple lines
128
+ def first(field)
129
+ @attributes[field]
130
+ end
131
+
132
+ alias :[] :first
133
+
134
+ # Returns an array of all the "field" values that were captured for this request
135
+ def every(field)
136
+ @lines.inject([]) { |result, fields| result << fields[field] if fields.has_key?(field); result }
137
+ end
138
+
139
+ # Returns true if this request does not yet contain any parsed lines. This should only occur
140
+ # during parsing. An empty request should never be sent to the aggregators
141
+ def empty?
142
+ @lines.length == 0
143
+ end
144
+
145
+ # Checks whether this request is completed. A completed request contains both a parsed header
146
+ # line and a parsed footer line. Not that calling this function in single line mode will always
147
+ # return false.
148
+ def completed?
149
+ header_found, footer_found = false, false
150
+ @lines.each do |line|
151
+ line_def = file_format.line_definitions[line[:line_type]]
152
+ header_found = true if line_def.header
153
+ footer_found = true if line_def.footer
154
+ end
155
+ header_found && footer_found
156
+ end
157
+
158
+ # This function is called before a Requests is yielded.
159
+ def validate
160
+ end
161
+
162
+ # Returns the first timestamp encountered in a request.
163
+ def timestamp
164
+ first(:timestamp)
165
+ end
166
+
167
+ def first_lineno
168
+ @lines.map { |line| line[:lineno] }.reject { |v| v.nil? }.min
169
+ end
170
+
171
+ def last_lineno
172
+ @lines.map { |line| line[:lineno] }.reject { |v| v.nil? }.max
173
+ end
174
+ end
175
+ end
@@ -0,0 +1,72 @@
1
+ # The RequestLogAnalyzer::Source module contains all functionality that loads requests from a given source
2
+ # and feed them to the pipeline for further processing. The requests (see RequestLogAnalyzer::Request) that
3
+ # will be parsed from a source, will be piped throug filters (see RequestLogAnalyzer::Filter) and are then
4
+ # fed to an aggregator (see RequestLogAnalyzer::Aggregator). The source instance is thus the beginning of
5
+ # the RequestLogAnalyzer chain.
6
+ #
7
+ # - The base class for all sources is RequestLogAnalyzer::Source::Base. All source classes should inherit from this class.
8
+ # - Currently, RequestLogAnalyzer::Source::LogParser is the only implemented source.
9
+ module RequestLogAnalyzer::Source
10
+
11
+ # Loads constants that reside in the RequestLogAnalyzer::Source namespace. This function uses
12
+ # RequestLogAnalyzer::load_default_class_file to load the file in which the constant is declared.
13
+ # <tt>const</tt>:: The constant to load in the RequestLogAnalyzer::Source namespace.
14
+ def self.const_missing(const)
15
+ RequestLogAnalyzer::load_default_class_file(self, const)
16
+ end
17
+
18
+ # The base Source class. All other sources should inherit from this class.
19
+ #
20
+ # A source implememtation should at least implement the each_request method, which should yield
21
+ # RequestLogAnalyzer::Request instances that will be fed through the pipleine.
22
+ class Base
23
+
24
+ # A hash of options
25
+ attr_reader :options
26
+
27
+ # The current Request object that is being parsed
28
+ attr_reader :current_request
29
+
30
+ # The total number of parsed lines
31
+ attr_reader :parsed_lines
32
+
33
+ # The number of skipped lines because of warnings
34
+ attr_reader :skipped_lines
35
+
36
+ # The total number of parsed requests.
37
+ attr_reader :parsed_requests
38
+
39
+ # The total number of skipped requests because of filters.
40
+ attr_reader :skipped_requests
41
+
42
+ # The FileFormat instance that describes the format of this source.
43
+ attr_reader :file_format
44
+
45
+ # Initializer, which will register the file format and save any options given as a hash.
46
+ # <tt>format</tt>:: The file format instance
47
+ # <tt>options</tt>:: A hash of options that can be used by a specific Source implementation
48
+ def initialize(format, options = {})
49
+ @options = options
50
+ @file_format = format
51
+ end
52
+
53
+ # The prepare method is called before the RequestLogAnalyzer::Source::Base#each_request method is called.
54
+ # Use this method to implement any initialization that should occur before this source can produce Request
55
+ # instances.
56
+ def prepare
57
+ end
58
+
59
+ # This function is called to actually produce the requests that will be send into the pipeline.
60
+ # The implementation should yield instances of RequestLogAnalyzer::Request.
61
+ # <tt>options</tt>:: A Hash of options that can be used in the implementation.
62
+ def each_request(options = {}, &block) # :yields: request
63
+ return true
64
+ end
65
+
66
+ # This function is called after RequestLogAnalyzer::Source::Base#each_request finished. Any code to
67
+ # wrap up, free resources, etc. can be put in this method.
68
+ def finalize
69
+ end
70
+
71
+ end
72
+ end
@@ -0,0 +1,87 @@
1
+ require 'rubygems'
2
+ require 'activerecord'
3
+
4
+ module RequestLogAnalyzer::Source
5
+
6
+ # Active Resource hook
7
+ class Request < ActiveRecord::Base
8
+ has_many :completed_lines
9
+ has_many :processing_lines
10
+ def convert(file_format)
11
+ send_attributes = self.attributes
12
+ send_attributes.merge!(self.completed_lines.first.attributes) if self.completed_lines.first
13
+ send_attributes.merge!(self.processing_lines.first.attributes) if self.processing_lines.first
14
+ return RequestLogAnalyzer::Request.new(file_format, send_attributes)
15
+ end
16
+ end
17
+
18
+ class CompletedLine < ActiveRecord::Base
19
+ belongs_to :request
20
+ end
21
+
22
+ class ProcessingLine < ActiveRecord::Base
23
+ belongs_to :request
24
+ end
25
+
26
+ # The Database class gets log data from the database.
27
+ class DatabaseLoader < Base
28
+
29
+ attr_reader :source_files, :file_format, :requests
30
+
31
+ # Initializes the log file parser instance.
32
+ # It will apply the language specific FileFormat module to this instance. It will use the line
33
+ # definitions in this module to parse any input that it is given (see parse_io).
34
+ #
35
+ # <tt>format</tt>:: The current file format instance
36
+ # <tt>options</tt>:: A hash of options that are used by the parser
37
+ def initialize(format, options = {})
38
+ super(format, options)
39
+ @source_files = options[:source_files]
40
+ @parsed_requests = 0
41
+ @requests = []
42
+ end
43
+
44
+ # Reads the input, which can either be a file, sequence of files or STDIN to parse
45
+ # lines specified in the FileFormat. This lines will be combined into Request instances,
46
+ # that will be yielded. The actual parsing occurs in the parse_io method.
47
+ # <tt>options</tt>:: A Hash of options that will be pased to parse_io.
48
+ def each_request(options = {}, &block) # :yields: request
49
+ ActiveRecord::Base.establish_connection(:adapter => 'sqlite3', :database => @source_files)
50
+
51
+ @progress_handler.call(:started, @source_files) if @progress_handler
52
+ RequestLogAnalyzer::Source::Request.find(:all).each do |request|
53
+ @parsed_requests += 1
54
+ @progress_handler.call(:progress, @parsed_requests) if @progress_handler
55
+
56
+ yield request.convert(self.file_format)
57
+ end
58
+
59
+ @progress_handler.call(:finished, @source_files) if @progress_handler
60
+ end
61
+
62
+ # Add a block to this method to install a progress handler while parsing.
63
+ # <tt>proc</tt>:: The proc that will be called to handle progress update messages
64
+ def progress=(proc)
65
+ @progress_handler = proc
66
+ end
67
+
68
+ # Add a block to this method to install a warning handler while parsing,
69
+ # <tt>proc</tt>:: The proc that will be called to handle parse warning messages
70
+ def warning=(proc)
71
+ @warning_handler = proc
72
+ end
73
+
74
+ # This method is called by the parser if it encounteres any parsing problems.
75
+ # It will call the installed warning handler if any.
76
+ #
77
+ # By default, RequestLogAnalyzer::Controller will install a warning handler
78
+ # that will pass the warnings to each aggregator so they can do something useful
79
+ # with it.
80
+ #
81
+ # <tt>type</tt>:: The warning type (a Symbol)
82
+ # <tt>message</tt>:: A message explaining the warning
83
+ def warn(type, message)
84
+ @warning_handler.call(type, message, @current_io.lineno) if @warning_handler
85
+ end
86
+ end
87
+ end
@@ -0,0 +1,274 @@
1
+ module RequestLogAnalyzer::Source
2
+
3
+ # The LogParser class reads log data from a given source and uses a file format definition
4
+ # to parse all relevent information about requests from the file. A FileFormat module should
5
+ # be provided that contains the definitions of the lines that occur in the log data.
6
+ #
7
+ # De order in which lines occur is used to combine lines to a single request. If these lines
8
+ # are mixed, requests cannot be combined properly. This can be the case if data is written to
9
+ # the log file simultaneously by different mongrel processes. This problem is detected by the
10
+ # parser. It will emit warnings when this occurs. LogParser supports multiple parse strategies
11
+ # that deal differently with this problem.
12
+ class LogParser < Base
13
+
14
+ include Enumerable
15
+
16
+ # The default parse strategy that will be used to parse the input.
17
+ DEFAULT_PARSE_STRATEGY = 'assume-correct'
18
+
19
+ # All available parse strategies.
20
+ PARSE_STRATEGIES = ['cautious', 'assume-correct']
21
+
22
+ attr_reader :source_files, :current_file, :current_lineno
23
+
24
+ # Initializes the log file parser instance.
25
+ # It will apply the language specific FileFormat module to this instance. It will use the line
26
+ # definitions in this module to parse any input that it is given (see parse_io).
27
+ #
28
+ # <tt>format</tt>:: The current file format instance
29
+ # <tt>options</tt>:: A hash of options that are used by the parser
30
+ def initialize(format, options = {})
31
+ super(format, options)
32
+ @parsed_lines = 0
33
+ @parsed_requests = 0
34
+ @skipped_lines = 0
35
+ @skipped_requests = 0
36
+ @current_request = nil
37
+ @current_source = nil
38
+ @current_file = nil
39
+ @current_lineno = nil
40
+ @source_files = options[:source_files]
41
+ @progress_handler = nil
42
+
43
+ @options[:parse_strategy] ||= DEFAULT_PARSE_STRATEGY
44
+ raise "Unknown parse strategy" unless PARSE_STRATEGIES.include?(@options[:parse_strategy])
45
+ end
46
+
47
+ # Reads the input, which can either be a file, sequence of files or STDIN to parse
48
+ # lines specified in the FileFormat. This lines will be combined into Request instances,
49
+ # that will be yielded. The actual parsing occurs in the parse_io method.
50
+ # <tt>options</tt>:: A Hash of options that will be pased to parse_io.
51
+ def each_request(options = {}, &block) # :yields: :request, request
52
+
53
+ case @source_files
54
+ when IO
55
+ if @source_files == $stdin
56
+ puts "Parsing from the standard input. Press CTRL+C to finish." # FIXME: not here
57
+ end
58
+ parse_stream(@source_files, options, &block)
59
+ when String
60
+ parse_file(@source_files, options, &block)
61
+ when Array
62
+ parse_files(@source_files, options, &block)
63
+ else
64
+ raise "Unknown source provided"
65
+ end
66
+ end
67
+
68
+ # Make sure the Enumerable methods work as expected
69
+ alias_method :each, :each_request
70
+
71
+ # Parses a list of subsequent files of the same format, by calling parse_file for every
72
+ # file in the array.
73
+ # <tt>files</tt>:: The Array of files that should be parsed
74
+ # <tt>options</tt>:: A Hash of options that will be pased to parse_io.
75
+ def parse_files(files, options = {}, &block) # :yields: request
76
+ files.each { |file| parse_file(file, options, &block) }
77
+ end
78
+
79
+ # Check if a file has a compressed extention in the filename.
80
+ # If recognized, return the command string used to decompress the file
81
+ def decompress_file?(filename)
82
+ nice_command = "nice -n 5"
83
+
84
+ return "#{nice_command} gunzip -c -d #{filename}" if filename.match(/\.tar.gz$/) || filename.match(/\.tgz$/) || filename.match(/\.gz$/)
85
+ return "#{nice_command} bunzip2 -c -d #{filename}" if filename.match(/\.bz2$/)
86
+ return "#{nice_command} unzip -p #{filename}" if filename.match(/\.zip$/)
87
+
88
+ return ""
89
+ end
90
+
91
+ # Parses a log file. Creates an IO stream for the provided file, and sends it to parse_io for
92
+ # further handling. This method supports progress updates that can be used to display a progressbar
93
+ #
94
+ # If the logfile is compressed, it is uncompressed to stdout and read.
95
+ # TODO: Check if IO.popen encounters problems with the given command line.
96
+ # TODO: Fix progress bar that is broken for IO.popen, as it returns a single string.
97
+ #
98
+ # <tt>file</tt>:: The file that should be parsed.
99
+ # <tt>options</tt>:: A Hash of options that will be pased to parse_io.
100
+ def parse_file(file, options = {}, &block)
101
+
102
+ @current_source = File.expand_path(file)
103
+ @source_changes_handler.call(:started, @current_source) if @source_changes_handler
104
+
105
+ if decompress_file?(file).empty?
106
+
107
+ @progress_handler = @dormant_progress_handler
108
+ @progress_handler.call(:started, file) if @progress_handler
109
+
110
+ File.open(file, 'r') { |f| parse_io(f, options, &block) }
111
+
112
+ @progress_handler.call(:finished, file) if @progress_handler
113
+ @progress_handler = nil
114
+ else
115
+ IO.popen(decompress_file?(file), 'r') { |f| parse_io(f, options, &block) }
116
+ end
117
+
118
+ @source_changes_handler.call(:finished, @current_source) if @source_changes_handler
119
+
120
+ @current_source = nil
121
+
122
+ end
123
+
124
+ # Parses an IO stream. It will simply call parse_io. This function does not support progress updates
125
+ # because the length of a stream is not known.
126
+ # <tt>stream</tt>:: The IO stream that should be parsed.
127
+ # <tt>options</tt>:: A Hash of options that will be pased to parse_io.
128
+ def parse_stream(stream, options = {}, &block)
129
+ parse_io(stream, options, &block)
130
+ end
131
+
132
+ # This method loops over each line of the input stream. It will try to parse this line as any of
133
+ # the lines that are defined by the current file format (see RequestLogAnalyazer::FileFormat).
134
+ # It will then combine these parsed line into requests using heuristics. These requests (see
135
+ # RequestLogAnalyzer::Request) will then be yielded for further processing in the pipeline.
136
+ #
137
+ # - RequestLogAnalyzer::LineDefinition#matches is called to test if a line matches a line definition of the file format.
138
+ # - update_current_request is used to combine parsed lines into requests using heuristics.
139
+ # - The method will yield progress updates if a progress handler is installed using progress=
140
+ # - The method will yield parse warnings if a warning handler is installed using warning=
141
+ #
142
+ # <tt>io</tt>:: The IO instance to use as source
143
+ # <tt>options</tt>:: A hash of options that can be used by the parser.
144
+ def parse_io(io, options = {}, &block) # :yields: request
145
+ @current_lineno = 1
146
+ while line = io.gets
147
+ @progress_handler.call(:progress, io.pos) if @progress_handler && @current_lineno % 255 == 0
148
+
149
+ if request_data = file_format.parse_line(line) { |wt, message| warn(wt, message) }
150
+ @parsed_lines += 1
151
+ update_current_request(request_data.merge(:source => @current_source, :lineno => @current_lineno), &block)
152
+ end
153
+
154
+ @current_lineno += 1
155
+ end
156
+
157
+ warn(:unfinished_request_on_eof, "End of file reached, but last request was not completed!") unless @current_request.nil?
158
+ @current_lineno = nil
159
+ end
160
+
161
+ # Add a block to this method to install a progress handler while parsing.
162
+ # <tt>proc</tt>:: The proc that will be called to handle progress update messages
163
+ def progress=(proc)
164
+ @dormant_progress_handler = proc
165
+ end
166
+
167
+ # Add a block to this method to install a warning handler while parsing,
168
+ # <tt>proc</tt>:: The proc that will be called to handle parse warning messages
169
+ def warning=(proc)
170
+ @warning_handler = proc
171
+ end
172
+
173
+ # Add a block to this method to install a source change handler while parsing,
174
+ # <tt>proc</tt>:: The proc that will be called to handle source changes
175
+ def source_changes=(proc)
176
+ @source_changes_handler = proc
177
+ end
178
+
179
+ # This method is called by the parser if it encounteres any parsing problems.
180
+ # It will call the installed warning handler if any.
181
+ #
182
+ # By default, RequestLogAnalyzer::Controller will install a warning handler
183
+ # that will pass the warnings to each aggregator so they can do something useful
184
+ # with it.
185
+ #
186
+ # <tt>type</tt>:: The warning type (a Symbol)
187
+ # <tt>message</tt>:: A message explaining the warning
188
+ def warn(type, message)
189
+ @warning_handler.call(type, message, @current_lineno) if @warning_handler
190
+ end
191
+
192
+ protected
193
+
194
+ # Combines the different lines of a request into a single Request object. It will start a
195
+ # new request when a header line is encountered en will emit the request when a footer line
196
+ # is encountered.
197
+ #
198
+ # Combining the lines is done using heuristics. Problems can occur in this process. The
199
+ # current parse strategy defines how these cases are handled.
200
+ #
201
+ # When using the 'assume-correct' parse strategy (default):
202
+ # - Every line that is parsed before a header line is ignored as it cannot be included in
203
+ # any request. It will emit a :no_current_request warning.
204
+ # - If a header line is found before the previous requests was closed, the previous request
205
+ # will be yielded and a new request will be started.
206
+ #
207
+ # When using the 'cautious' parse strategy:
208
+ # - Every line that is parsed before a header line is ignored as it cannot be included in
209
+ # any request. It will emit a :no_current_request warning.
210
+ # - A header line that is parsed before a request is closed by a footer line, is a sign of
211
+ # an unproperly ordered file. All data that is gathered for the request until then is
212
+ # discarded and the next request is ignored as well. An :unclosed_request warning is
213
+ # emitted.
214
+ #
215
+ # <tt>request_data</tt>:: A hash of data that was parsed from the last line.
216
+ def update_current_request(request_data, &block) # :yields: request
217
+ if header_line?(request_data)
218
+ if @current_request
219
+ case options[:parse_strategy]
220
+ when 'assume-correct'
221
+ handle_request(@current_request, &block)
222
+ @current_request = @file_format.request(request_data)
223
+ when 'cautious'
224
+ @skipped_lines += 1
225
+ warn(:unclosed_request, "Encountered header line (#{request_data[:line_definition].name.inspect}), but previous request was not closed!")
226
+ @current_request = nil # remove all data that was parsed, skip next request as well.
227
+ end
228
+ elsif footer_line?(request_data)
229
+ handle_request(@file_format.request(request_data), &block)
230
+ else
231
+ @current_request = @file_format.request(request_data)
232
+ end
233
+ else
234
+ if @current_request
235
+ @current_request << request_data
236
+ if footer_line?(request_data)
237
+ handle_request(@current_request, &block) # yield @current_request
238
+ @current_request = nil
239
+ end
240
+ else
241
+ @skipped_lines += 1
242
+ warn(:no_current_request, "Parsebale line (#{request_data[:line_definition].name.inspect}) found outside of a request!")
243
+ end
244
+ end
245
+ end
246
+
247
+ # Handles the parsed request by sending it into the pipeline.
248
+ #
249
+ # - It will call RequestLogAnalyzer::Request#validate on the request instance
250
+ # - It will send the request into the pipeline, checking whether it was accepted by all the filters.
251
+ # - It will update the parsed_requests and skipped_requests variables accordingly
252
+ #
253
+ # <tt>request</tt>:: The parsed request instance (RequestLogAnalyzer::Request)
254
+ def handle_request(request, &block) # :yields: :request, request
255
+ @parsed_requests += 1
256
+ request.validate
257
+ accepted = block_given? ? yield(request) : true
258
+ @skipped_requests += 1 unless accepted
259
+ end
260
+
261
+ # Checks whether a given line hash is a header line according to the current file format.
262
+ # <tt>hash</tt>:: A hash of data that was parsed from the line.
263
+ def header_line?(hash)
264
+ hash[:line_definition].header
265
+ end
266
+
267
+ # Checks whether a given line hash is a footer line according to the current file format.
268
+ # <tt>hash</tt>:: A hash of data that was parsed from the line.
269
+ def footer_line?(hash)
270
+ hash[:line_definition].footer
271
+ end
272
+ end
273
+
274
+ end