ngmoco-request-log-analyzer 1.4.2
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +10 -0
- data/DESIGN.rdoc +41 -0
- data/LICENSE +20 -0
- data/README.rdoc +39 -0
- data/Rakefile +8 -0
- data/bin/request-log-analyzer +114 -0
- data/lib/cli/command_line_arguments.rb +301 -0
- data/lib/cli/database_console.rb +26 -0
- data/lib/cli/database_console_init.rb +43 -0
- data/lib/cli/progressbar.rb +213 -0
- data/lib/cli/tools.rb +46 -0
- data/lib/request_log_analyzer.rb +44 -0
- data/lib/request_log_analyzer/aggregator.rb +49 -0
- data/lib/request_log_analyzer/aggregator/database_inserter.rb +83 -0
- data/lib/request_log_analyzer/aggregator/echo.rb +29 -0
- data/lib/request_log_analyzer/aggregator/summarizer.rb +175 -0
- data/lib/request_log_analyzer/controller.rb +332 -0
- data/lib/request_log_analyzer/database.rb +102 -0
- data/lib/request_log_analyzer/database/base.rb +115 -0
- data/lib/request_log_analyzer/database/connection.rb +38 -0
- data/lib/request_log_analyzer/database/request.rb +22 -0
- data/lib/request_log_analyzer/database/source.rb +13 -0
- data/lib/request_log_analyzer/database/warning.rb +14 -0
- data/lib/request_log_analyzer/file_format.rb +160 -0
- data/lib/request_log_analyzer/file_format/amazon_s3.rb +71 -0
- data/lib/request_log_analyzer/file_format/apache.rb +141 -0
- data/lib/request_log_analyzer/file_format/merb.rb +67 -0
- data/lib/request_log_analyzer/file_format/rack.rb +11 -0
- data/lib/request_log_analyzer/file_format/rails.rb +176 -0
- data/lib/request_log_analyzer/file_format/rails_development.rb +12 -0
- data/lib/request_log_analyzer/filter.rb +30 -0
- data/lib/request_log_analyzer/filter/anonymize.rb +39 -0
- data/lib/request_log_analyzer/filter/field.rb +42 -0
- data/lib/request_log_analyzer/filter/timespan.rb +45 -0
- data/lib/request_log_analyzer/line_definition.rb +111 -0
- data/lib/request_log_analyzer/log_processor.rb +99 -0
- data/lib/request_log_analyzer/mailer.rb +62 -0
- data/lib/request_log_analyzer/output.rb +113 -0
- data/lib/request_log_analyzer/output/fixed_width.rb +220 -0
- data/lib/request_log_analyzer/output/html.rb +184 -0
- data/lib/request_log_analyzer/request.rb +175 -0
- data/lib/request_log_analyzer/source.rb +72 -0
- data/lib/request_log_analyzer/source/database_loader.rb +87 -0
- data/lib/request_log_analyzer/source/log_parser.rb +274 -0
- data/lib/request_log_analyzer/tracker.rb +206 -0
- data/lib/request_log_analyzer/tracker/duration.rb +104 -0
- data/lib/request_log_analyzer/tracker/frequency.rb +95 -0
- data/lib/request_log_analyzer/tracker/hourly_spread.rb +107 -0
- data/lib/request_log_analyzer/tracker/timespan.rb +81 -0
- data/lib/request_log_analyzer/tracker/traffic.rb +106 -0
- data/request-log-analyzer.gemspec +40 -0
- data/spec/database.yml +23 -0
- data/spec/fixtures/apache_combined.log +5 -0
- data/spec/fixtures/apache_common.log +10 -0
- data/spec/fixtures/decompression.log +12 -0
- data/spec/fixtures/decompression.log.bz2 +0 -0
- data/spec/fixtures/decompression.log.gz +0 -0
- data/spec/fixtures/decompression.log.zip +0 -0
- data/spec/fixtures/decompression.tar.gz +0 -0
- data/spec/fixtures/decompression.tgz +0 -0
- data/spec/fixtures/header_and_footer.log +6 -0
- data/spec/fixtures/merb.log +84 -0
- data/spec/fixtures/merb_prefixed.log +9 -0
- data/spec/fixtures/multiple_files_1.log +5 -0
- data/spec/fixtures/multiple_files_2.log +2 -0
- data/spec/fixtures/rails.db +0 -0
- data/spec/fixtures/rails_1x.log +59 -0
- data/spec/fixtures/rails_22.log +12 -0
- data/spec/fixtures/rails_22_cached.log +10 -0
- data/spec/fixtures/rails_unordered.log +24 -0
- data/spec/fixtures/syslog_1x.log +5 -0
- data/spec/fixtures/test_file_format.log +13 -0
- data/spec/fixtures/test_language_combined.log +14 -0
- data/spec/fixtures/test_order.log +16 -0
- data/spec/integration/command_line_usage_spec.rb +84 -0
- data/spec/integration/munin_plugins_rails_spec.rb +58 -0
- data/spec/integration/scout_spec.rb +151 -0
- data/spec/lib/helpers.rb +52 -0
- data/spec/lib/macros.rb +18 -0
- data/spec/lib/matchers.rb +77 -0
- data/spec/lib/mocks.rb +76 -0
- data/spec/lib/testing_format.rb +46 -0
- data/spec/spec_helper.rb +24 -0
- data/spec/unit/aggregator/database_inserter_spec.rb +93 -0
- data/spec/unit/aggregator/summarizer_spec.rb +26 -0
- data/spec/unit/controller/controller_spec.rb +41 -0
- data/spec/unit/controller/log_processor_spec.rb +18 -0
- data/spec/unit/database/base_class_spec.rb +183 -0
- data/spec/unit/database/connection_spec.rb +34 -0
- data/spec/unit/database/database_spec.rb +133 -0
- data/spec/unit/file_format/amazon_s3_format_spec.rb +49 -0
- data/spec/unit/file_format/apache_format_spec.rb +203 -0
- data/spec/unit/file_format/file_format_api_spec.rb +69 -0
- data/spec/unit/file_format/line_definition_spec.rb +75 -0
- data/spec/unit/file_format/merb_format_spec.rb +52 -0
- data/spec/unit/file_format/rails_format_spec.rb +164 -0
- data/spec/unit/filter/anonymize_filter_spec.rb +21 -0
- data/spec/unit/filter/field_filter_spec.rb +66 -0
- data/spec/unit/filter/filter_spec.rb +17 -0
- data/spec/unit/filter/timespan_filter_spec.rb +58 -0
- data/spec/unit/mailer_spec.rb +30 -0
- data/spec/unit/request_spec.rb +111 -0
- data/spec/unit/source/log_parser_spec.rb +119 -0
- data/spec/unit/tracker/duration_tracker_spec.rb +130 -0
- data/spec/unit/tracker/frequency_tracker_spec.rb +88 -0
- data/spec/unit/tracker/hourly_spread_spec.rb +79 -0
- data/spec/unit/tracker/timespan_tracker_spec.rb +73 -0
- data/spec/unit/tracker/tracker_api_spec.rb +124 -0
- data/spec/unit/tracker/traffic_tracker_spec.rb +107 -0
- data/tasks/github-gem.rake +323 -0
- data/tasks/request_log_analyzer.rake +26 -0
- metadata +220 -0
@@ -0,0 +1,175 @@
|
|
1
|
+
module RequestLogAnalyzer
|
2
|
+
|
3
|
+
# The Request class represents a parsed request from the log file.
|
4
|
+
# Instances are created by the LogParser and are passed to the different aggregators, so they
|
5
|
+
# can do their aggregating work.
|
6
|
+
#
|
7
|
+
# This class provides several methods to access the data that was parsed from the log files.
|
8
|
+
# Request#first(field_name) returns the first (only) value corresponding to the given field
|
9
|
+
# Request#every(field_name) returns all values corresponding to the given field name as array.
|
10
|
+
class Request
|
11
|
+
|
12
|
+
module Converters
|
13
|
+
|
14
|
+
# Default converter function, which converts the parsed strings to a native Ruby type
|
15
|
+
# using the type indication in the line definition. It will use a custom connverter
|
16
|
+
# method if one is available.
|
17
|
+
def convert_value(value, capture_definition)
|
18
|
+
return capture_definition[:default] if value.nil?
|
19
|
+
custom_converter_method = :"convert_#{capture_definition[:type]}"
|
20
|
+
send(custom_converter_method, value, capture_definition)
|
21
|
+
end
|
22
|
+
|
23
|
+
def convert_string(value, capture_definition); value; end
|
24
|
+
def convert_float(value, capture_definition); value.to_f; end
|
25
|
+
def convert_decimal(value, capture_definition); value.to_f; end
|
26
|
+
def convert_int(value, capture_definition); value.to_i; end
|
27
|
+
def convert_integer(value, capture_definition); value.to_i; end
|
28
|
+
def convert_sym(value, capture_definition); value.to_sym; end
|
29
|
+
def convert_symbol(value, capture_definition); value.to_sym; end
|
30
|
+
|
31
|
+
# Converts :eval field, which should evaluate to a hash.
|
32
|
+
def convert_eval(value, capture_definition)
|
33
|
+
eval(value).inject({}) { |h, (k, v)| h[k.to_sym] = v; h}
|
34
|
+
rescue SyntaxError
|
35
|
+
nil
|
36
|
+
end
|
37
|
+
|
38
|
+
# Slow default method to parse timestamps.
|
39
|
+
# Reimplement this function in a file format specific Request class
|
40
|
+
# to improve the timestamp parsing speed.
|
41
|
+
def convert_timestamp(value, capture_definition)
|
42
|
+
DateTime.parse(value).strftime('%Y%m%d%H%M%S').to_i
|
43
|
+
end
|
44
|
+
|
45
|
+
# Converts traffic fields to (whole) bytes based on the given unit.
|
46
|
+
def convert_traffic(value, capture_definition)
|
47
|
+
case capture_definition[:unit]
|
48
|
+
when nil, :b, :B, :byte then value.to_i
|
49
|
+
when :GB, :G, :gigabyte then (value.to_f * 1000_000_000).round
|
50
|
+
when :GiB, :gibibyte then (value.to_f * (2 ** 30)).round
|
51
|
+
when :MB, :M, :megabyte then (value.to_f * 1000_000).round
|
52
|
+
when :MiB, :mebibyte then (value.to_f * (2 ** 20)).round
|
53
|
+
when :KB, :K, :kilobyte, :kB then (value.to_f * 1000).round
|
54
|
+
when :KiB, :kibibyte then (value.to_f * (2 ** 10)).round
|
55
|
+
else raise "Unknown traffic unit"
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
# Convert duration fields to float, and make sure the values are in seconds.
|
60
|
+
def convert_duration(value, capture_definition)
|
61
|
+
case capture_definition[:unit]
|
62
|
+
when nil, :sec, :s then value.to_f
|
63
|
+
when :microsec, :musec then value.to_f / 1000000.0
|
64
|
+
when :msec, :millisec then value.to_f / 1000.0
|
65
|
+
else raise "Unknown duration unit"
|
66
|
+
end
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
# Install the default converter methods
|
71
|
+
include Converters
|
72
|
+
|
73
|
+
attr_reader :lines, :attributes, :file_format
|
74
|
+
|
75
|
+
# Initializes a new Request object.
|
76
|
+
# It will apply the the provided FileFormat module to this instance.
|
77
|
+
def initialize(file_format, attributes = {})
|
78
|
+
@lines = []
|
79
|
+
@attributes = attributes
|
80
|
+
@file_format = file_format
|
81
|
+
end
|
82
|
+
|
83
|
+
# Creates a new request that was parsed from the log with the given FileFormat. The hashes
|
84
|
+
# that are passed to this function are added as lines to this request.
|
85
|
+
def self.create(file_format, *hashes)
|
86
|
+
request = self.new(file_format)
|
87
|
+
hashes.flatten.each { |hash| request << hash }
|
88
|
+
return request
|
89
|
+
end
|
90
|
+
|
91
|
+
# Adds another line to the request when it is parsed in the LogParser.
|
92
|
+
#
|
93
|
+
# The line should be provided as a hash with the attributes line_definition, :captures,
|
94
|
+
# :lineno and :source set. This function is called from LogParser.
|
95
|
+
def add_parsed_line (parsed_line)
|
96
|
+
value_hash = parsed_line[:line_definition].convert_captured_values(parsed_line[:captures], self)
|
97
|
+
value_hash[:line_type] = parsed_line[:line_definition].name
|
98
|
+
value_hash[:lineno] = parsed_line[:lineno]
|
99
|
+
value_hash[:source] = parsed_line[:source]
|
100
|
+
add_line_hash(value_hash)
|
101
|
+
end
|
102
|
+
|
103
|
+
# Adds another line to the request using a plain hash.
|
104
|
+
#
|
105
|
+
# The line should be provides as a hash of the fields parsed from the line.
|
106
|
+
def add_line_hash(value_hash)
|
107
|
+
@lines << value_hash
|
108
|
+
@attributes = value_hash.merge(@attributes)
|
109
|
+
end
|
110
|
+
|
111
|
+
# Adds another line to the request. This method switches automatically between
|
112
|
+
# the add_line_hash and add_parsed_line based on the keys of the provided hash.
|
113
|
+
def <<(hash)
|
114
|
+
hash[:line_definition] ? add_parsed_line(hash) : add_line_hash(hash)
|
115
|
+
end
|
116
|
+
|
117
|
+
# Checks whether the given line type was parsed from the log file for this request
|
118
|
+
def has_line_type?(line_type)
|
119
|
+
return true if @lines.length == 1 && @lines[0][:line_type] == line_type.to_sym
|
120
|
+
@lines.detect { |l| l[:line_type] == line_type.to_sym }
|
121
|
+
end
|
122
|
+
|
123
|
+
alias :=~ :has_line_type?
|
124
|
+
|
125
|
+
# Returns the value that was captured for the "field" of this request.
|
126
|
+
# This function will return the first value that was captured if the field
|
127
|
+
# was captured in multiple lines
|
128
|
+
def first(field)
|
129
|
+
@attributes[field]
|
130
|
+
end
|
131
|
+
|
132
|
+
alias :[] :first
|
133
|
+
|
134
|
+
# Returns an array of all the "field" values that were captured for this request
|
135
|
+
def every(field)
|
136
|
+
@lines.inject([]) { |result, fields| result << fields[field] if fields.has_key?(field); result }
|
137
|
+
end
|
138
|
+
|
139
|
+
# Returns true if this request does not yet contain any parsed lines. This should only occur
|
140
|
+
# during parsing. An empty request should never be sent to the aggregators
|
141
|
+
def empty?
|
142
|
+
@lines.length == 0
|
143
|
+
end
|
144
|
+
|
145
|
+
# Checks whether this request is completed. A completed request contains both a parsed header
|
146
|
+
# line and a parsed footer line. Not that calling this function in single line mode will always
|
147
|
+
# return false.
|
148
|
+
def completed?
|
149
|
+
header_found, footer_found = false, false
|
150
|
+
@lines.each do |line|
|
151
|
+
line_def = file_format.line_definitions[line[:line_type]]
|
152
|
+
header_found = true if line_def.header
|
153
|
+
footer_found = true if line_def.footer
|
154
|
+
end
|
155
|
+
header_found && footer_found
|
156
|
+
end
|
157
|
+
|
158
|
+
# This function is called before a Requests is yielded.
|
159
|
+
def validate
|
160
|
+
end
|
161
|
+
|
162
|
+
# Returns the first timestamp encountered in a request.
|
163
|
+
def timestamp
|
164
|
+
first(:timestamp)
|
165
|
+
end
|
166
|
+
|
167
|
+
def first_lineno
|
168
|
+
@lines.map { |line| line[:lineno] }.reject { |v| v.nil? }.min
|
169
|
+
end
|
170
|
+
|
171
|
+
def last_lineno
|
172
|
+
@lines.map { |line| line[:lineno] }.reject { |v| v.nil? }.max
|
173
|
+
end
|
174
|
+
end
|
175
|
+
end
|
@@ -0,0 +1,72 @@
|
|
1
|
+
# The RequestLogAnalyzer::Source module contains all functionality that loads requests from a given source
|
2
|
+
# and feed them to the pipeline for further processing. The requests (see RequestLogAnalyzer::Request) that
|
3
|
+
# will be parsed from a source, will be piped throug filters (see RequestLogAnalyzer::Filter) and are then
|
4
|
+
# fed to an aggregator (see RequestLogAnalyzer::Aggregator). The source instance is thus the beginning of
|
5
|
+
# the RequestLogAnalyzer chain.
|
6
|
+
#
|
7
|
+
# - The base class for all sources is RequestLogAnalyzer::Source::Base. All source classes should inherit from this class.
|
8
|
+
# - Currently, RequestLogAnalyzer::Source::LogParser is the only implemented source.
|
9
|
+
module RequestLogAnalyzer::Source
|
10
|
+
|
11
|
+
# Loads constants that reside in the RequestLogAnalyzer::Source namespace. This function uses
|
12
|
+
# RequestLogAnalyzer::load_default_class_file to load the file in which the constant is declared.
|
13
|
+
# <tt>const</tt>:: The constant to load in the RequestLogAnalyzer::Source namespace.
|
14
|
+
def self.const_missing(const)
|
15
|
+
RequestLogAnalyzer::load_default_class_file(self, const)
|
16
|
+
end
|
17
|
+
|
18
|
+
# The base Source class. All other sources should inherit from this class.
|
19
|
+
#
|
20
|
+
# A source implememtation should at least implement the each_request method, which should yield
|
21
|
+
# RequestLogAnalyzer::Request instances that will be fed through the pipleine.
|
22
|
+
class Base
|
23
|
+
|
24
|
+
# A hash of options
|
25
|
+
attr_reader :options
|
26
|
+
|
27
|
+
# The current Request object that is being parsed
|
28
|
+
attr_reader :current_request
|
29
|
+
|
30
|
+
# The total number of parsed lines
|
31
|
+
attr_reader :parsed_lines
|
32
|
+
|
33
|
+
# The number of skipped lines because of warnings
|
34
|
+
attr_reader :skipped_lines
|
35
|
+
|
36
|
+
# The total number of parsed requests.
|
37
|
+
attr_reader :parsed_requests
|
38
|
+
|
39
|
+
# The total number of skipped requests because of filters.
|
40
|
+
attr_reader :skipped_requests
|
41
|
+
|
42
|
+
# The FileFormat instance that describes the format of this source.
|
43
|
+
attr_reader :file_format
|
44
|
+
|
45
|
+
# Initializer, which will register the file format and save any options given as a hash.
|
46
|
+
# <tt>format</tt>:: The file format instance
|
47
|
+
# <tt>options</tt>:: A hash of options that can be used by a specific Source implementation
|
48
|
+
def initialize(format, options = {})
|
49
|
+
@options = options
|
50
|
+
@file_format = format
|
51
|
+
end
|
52
|
+
|
53
|
+
# The prepare method is called before the RequestLogAnalyzer::Source::Base#each_request method is called.
|
54
|
+
# Use this method to implement any initialization that should occur before this source can produce Request
|
55
|
+
# instances.
|
56
|
+
def prepare
|
57
|
+
end
|
58
|
+
|
59
|
+
# This function is called to actually produce the requests that will be send into the pipeline.
|
60
|
+
# The implementation should yield instances of RequestLogAnalyzer::Request.
|
61
|
+
# <tt>options</tt>:: A Hash of options that can be used in the implementation.
|
62
|
+
def each_request(options = {}, &block) # :yields: request
|
63
|
+
return true
|
64
|
+
end
|
65
|
+
|
66
|
+
# This function is called after RequestLogAnalyzer::Source::Base#each_request finished. Any code to
|
67
|
+
# wrap up, free resources, etc. can be put in this method.
|
68
|
+
def finalize
|
69
|
+
end
|
70
|
+
|
71
|
+
end
|
72
|
+
end
|
@@ -0,0 +1,87 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'activerecord'
|
3
|
+
|
4
|
+
module RequestLogAnalyzer::Source
|
5
|
+
|
6
|
+
# Active Resource hook
|
7
|
+
class Request < ActiveRecord::Base
|
8
|
+
has_many :completed_lines
|
9
|
+
has_many :processing_lines
|
10
|
+
def convert(file_format)
|
11
|
+
send_attributes = self.attributes
|
12
|
+
send_attributes.merge!(self.completed_lines.first.attributes) if self.completed_lines.first
|
13
|
+
send_attributes.merge!(self.processing_lines.first.attributes) if self.processing_lines.first
|
14
|
+
return RequestLogAnalyzer::Request.new(file_format, send_attributes)
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
class CompletedLine < ActiveRecord::Base
|
19
|
+
belongs_to :request
|
20
|
+
end
|
21
|
+
|
22
|
+
class ProcessingLine < ActiveRecord::Base
|
23
|
+
belongs_to :request
|
24
|
+
end
|
25
|
+
|
26
|
+
# The Database class gets log data from the database.
|
27
|
+
class DatabaseLoader < Base
|
28
|
+
|
29
|
+
attr_reader :source_files, :file_format, :requests
|
30
|
+
|
31
|
+
# Initializes the log file parser instance.
|
32
|
+
# It will apply the language specific FileFormat module to this instance. It will use the line
|
33
|
+
# definitions in this module to parse any input that it is given (see parse_io).
|
34
|
+
#
|
35
|
+
# <tt>format</tt>:: The current file format instance
|
36
|
+
# <tt>options</tt>:: A hash of options that are used by the parser
|
37
|
+
def initialize(format, options = {})
|
38
|
+
super(format, options)
|
39
|
+
@source_files = options[:source_files]
|
40
|
+
@parsed_requests = 0
|
41
|
+
@requests = []
|
42
|
+
end
|
43
|
+
|
44
|
+
# Reads the input, which can either be a file, sequence of files or STDIN to parse
|
45
|
+
# lines specified in the FileFormat. This lines will be combined into Request instances,
|
46
|
+
# that will be yielded. The actual parsing occurs in the parse_io method.
|
47
|
+
# <tt>options</tt>:: A Hash of options that will be pased to parse_io.
|
48
|
+
def each_request(options = {}, &block) # :yields: request
|
49
|
+
ActiveRecord::Base.establish_connection(:adapter => 'sqlite3', :database => @source_files)
|
50
|
+
|
51
|
+
@progress_handler.call(:started, @source_files) if @progress_handler
|
52
|
+
RequestLogAnalyzer::Source::Request.find(:all).each do |request|
|
53
|
+
@parsed_requests += 1
|
54
|
+
@progress_handler.call(:progress, @parsed_requests) if @progress_handler
|
55
|
+
|
56
|
+
yield request.convert(self.file_format)
|
57
|
+
end
|
58
|
+
|
59
|
+
@progress_handler.call(:finished, @source_files) if @progress_handler
|
60
|
+
end
|
61
|
+
|
62
|
+
# Add a block to this method to install a progress handler while parsing.
|
63
|
+
# <tt>proc</tt>:: The proc that will be called to handle progress update messages
|
64
|
+
def progress=(proc)
|
65
|
+
@progress_handler = proc
|
66
|
+
end
|
67
|
+
|
68
|
+
# Add a block to this method to install a warning handler while parsing,
|
69
|
+
# <tt>proc</tt>:: The proc that will be called to handle parse warning messages
|
70
|
+
def warning=(proc)
|
71
|
+
@warning_handler = proc
|
72
|
+
end
|
73
|
+
|
74
|
+
# This method is called by the parser if it encounteres any parsing problems.
|
75
|
+
# It will call the installed warning handler if any.
|
76
|
+
#
|
77
|
+
# By default, RequestLogAnalyzer::Controller will install a warning handler
|
78
|
+
# that will pass the warnings to each aggregator so they can do something useful
|
79
|
+
# with it.
|
80
|
+
#
|
81
|
+
# <tt>type</tt>:: The warning type (a Symbol)
|
82
|
+
# <tt>message</tt>:: A message explaining the warning
|
83
|
+
def warn(type, message)
|
84
|
+
@warning_handler.call(type, message, @current_io.lineno) if @warning_handler
|
85
|
+
end
|
86
|
+
end
|
87
|
+
end
|
@@ -0,0 +1,274 @@
|
|
1
|
+
module RequestLogAnalyzer::Source
|
2
|
+
|
3
|
+
# The LogParser class reads log data from a given source and uses a file format definition
|
4
|
+
# to parse all relevent information about requests from the file. A FileFormat module should
|
5
|
+
# be provided that contains the definitions of the lines that occur in the log data.
|
6
|
+
#
|
7
|
+
# De order in which lines occur is used to combine lines to a single request. If these lines
|
8
|
+
# are mixed, requests cannot be combined properly. This can be the case if data is written to
|
9
|
+
# the log file simultaneously by different mongrel processes. This problem is detected by the
|
10
|
+
# parser. It will emit warnings when this occurs. LogParser supports multiple parse strategies
|
11
|
+
# that deal differently with this problem.
|
12
|
+
class LogParser < Base
|
13
|
+
|
14
|
+
include Enumerable
|
15
|
+
|
16
|
+
# The default parse strategy that will be used to parse the input.
|
17
|
+
DEFAULT_PARSE_STRATEGY = 'assume-correct'
|
18
|
+
|
19
|
+
# All available parse strategies.
|
20
|
+
PARSE_STRATEGIES = ['cautious', 'assume-correct']
|
21
|
+
|
22
|
+
attr_reader :source_files, :current_file, :current_lineno
|
23
|
+
|
24
|
+
# Initializes the log file parser instance.
|
25
|
+
# It will apply the language specific FileFormat module to this instance. It will use the line
|
26
|
+
# definitions in this module to parse any input that it is given (see parse_io).
|
27
|
+
#
|
28
|
+
# <tt>format</tt>:: The current file format instance
|
29
|
+
# <tt>options</tt>:: A hash of options that are used by the parser
|
30
|
+
def initialize(format, options = {})
|
31
|
+
super(format, options)
|
32
|
+
@parsed_lines = 0
|
33
|
+
@parsed_requests = 0
|
34
|
+
@skipped_lines = 0
|
35
|
+
@skipped_requests = 0
|
36
|
+
@current_request = nil
|
37
|
+
@current_source = nil
|
38
|
+
@current_file = nil
|
39
|
+
@current_lineno = nil
|
40
|
+
@source_files = options[:source_files]
|
41
|
+
@progress_handler = nil
|
42
|
+
|
43
|
+
@options[:parse_strategy] ||= DEFAULT_PARSE_STRATEGY
|
44
|
+
raise "Unknown parse strategy" unless PARSE_STRATEGIES.include?(@options[:parse_strategy])
|
45
|
+
end
|
46
|
+
|
47
|
+
# Reads the input, which can either be a file, sequence of files or STDIN to parse
|
48
|
+
# lines specified in the FileFormat. This lines will be combined into Request instances,
|
49
|
+
# that will be yielded. The actual parsing occurs in the parse_io method.
|
50
|
+
# <tt>options</tt>:: A Hash of options that will be pased to parse_io.
|
51
|
+
def each_request(options = {}, &block) # :yields: :request, request
|
52
|
+
|
53
|
+
case @source_files
|
54
|
+
when IO
|
55
|
+
if @source_files == $stdin
|
56
|
+
puts "Parsing from the standard input. Press CTRL+C to finish." # FIXME: not here
|
57
|
+
end
|
58
|
+
parse_stream(@source_files, options, &block)
|
59
|
+
when String
|
60
|
+
parse_file(@source_files, options, &block)
|
61
|
+
when Array
|
62
|
+
parse_files(@source_files, options, &block)
|
63
|
+
else
|
64
|
+
raise "Unknown source provided"
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
# Make sure the Enumerable methods work as expected
|
69
|
+
alias_method :each, :each_request
|
70
|
+
|
71
|
+
# Parses a list of subsequent files of the same format, by calling parse_file for every
|
72
|
+
# file in the array.
|
73
|
+
# <tt>files</tt>:: The Array of files that should be parsed
|
74
|
+
# <tt>options</tt>:: A Hash of options that will be pased to parse_io.
|
75
|
+
def parse_files(files, options = {}, &block) # :yields: request
|
76
|
+
files.each { |file| parse_file(file, options, &block) }
|
77
|
+
end
|
78
|
+
|
79
|
+
# Check if a file has a compressed extention in the filename.
|
80
|
+
# If recognized, return the command string used to decompress the file
|
81
|
+
def decompress_file?(filename)
|
82
|
+
nice_command = "nice -n 5"
|
83
|
+
|
84
|
+
return "#{nice_command} gunzip -c -d #{filename}" if filename.match(/\.tar.gz$/) || filename.match(/\.tgz$/) || filename.match(/\.gz$/)
|
85
|
+
return "#{nice_command} bunzip2 -c -d #{filename}" if filename.match(/\.bz2$/)
|
86
|
+
return "#{nice_command} unzip -p #{filename}" if filename.match(/\.zip$/)
|
87
|
+
|
88
|
+
return ""
|
89
|
+
end
|
90
|
+
|
91
|
+
# Parses a log file. Creates an IO stream for the provided file, and sends it to parse_io for
|
92
|
+
# further handling. This method supports progress updates that can be used to display a progressbar
|
93
|
+
#
|
94
|
+
# If the logfile is compressed, it is uncompressed to stdout and read.
|
95
|
+
# TODO: Check if IO.popen encounters problems with the given command line.
|
96
|
+
# TODO: Fix progress bar that is broken for IO.popen, as it returns a single string.
|
97
|
+
#
|
98
|
+
# <tt>file</tt>:: The file that should be parsed.
|
99
|
+
# <tt>options</tt>:: A Hash of options that will be pased to parse_io.
|
100
|
+
def parse_file(file, options = {}, &block)
|
101
|
+
|
102
|
+
@current_source = File.expand_path(file)
|
103
|
+
@source_changes_handler.call(:started, @current_source) if @source_changes_handler
|
104
|
+
|
105
|
+
if decompress_file?(file).empty?
|
106
|
+
|
107
|
+
@progress_handler = @dormant_progress_handler
|
108
|
+
@progress_handler.call(:started, file) if @progress_handler
|
109
|
+
|
110
|
+
File.open(file, 'r') { |f| parse_io(f, options, &block) }
|
111
|
+
|
112
|
+
@progress_handler.call(:finished, file) if @progress_handler
|
113
|
+
@progress_handler = nil
|
114
|
+
else
|
115
|
+
IO.popen(decompress_file?(file), 'r') { |f| parse_io(f, options, &block) }
|
116
|
+
end
|
117
|
+
|
118
|
+
@source_changes_handler.call(:finished, @current_source) if @source_changes_handler
|
119
|
+
|
120
|
+
@current_source = nil
|
121
|
+
|
122
|
+
end
|
123
|
+
|
124
|
+
# Parses an IO stream. It will simply call parse_io. This function does not support progress updates
|
125
|
+
# because the length of a stream is not known.
|
126
|
+
# <tt>stream</tt>:: The IO stream that should be parsed.
|
127
|
+
# <tt>options</tt>:: A Hash of options that will be pased to parse_io.
|
128
|
+
def parse_stream(stream, options = {}, &block)
|
129
|
+
parse_io(stream, options, &block)
|
130
|
+
end
|
131
|
+
|
132
|
+
# This method loops over each line of the input stream. It will try to parse this line as any of
|
133
|
+
# the lines that are defined by the current file format (see RequestLogAnalyazer::FileFormat).
|
134
|
+
# It will then combine these parsed line into requests using heuristics. These requests (see
|
135
|
+
# RequestLogAnalyzer::Request) will then be yielded for further processing in the pipeline.
|
136
|
+
#
|
137
|
+
# - RequestLogAnalyzer::LineDefinition#matches is called to test if a line matches a line definition of the file format.
|
138
|
+
# - update_current_request is used to combine parsed lines into requests using heuristics.
|
139
|
+
# - The method will yield progress updates if a progress handler is installed using progress=
|
140
|
+
# - The method will yield parse warnings if a warning handler is installed using warning=
|
141
|
+
#
|
142
|
+
# <tt>io</tt>:: The IO instance to use as source
|
143
|
+
# <tt>options</tt>:: A hash of options that can be used by the parser.
|
144
|
+
def parse_io(io, options = {}, &block) # :yields: request
|
145
|
+
@current_lineno = 1
|
146
|
+
while line = io.gets
|
147
|
+
@progress_handler.call(:progress, io.pos) if @progress_handler && @current_lineno % 255 == 0
|
148
|
+
|
149
|
+
if request_data = file_format.parse_line(line) { |wt, message| warn(wt, message) }
|
150
|
+
@parsed_lines += 1
|
151
|
+
update_current_request(request_data.merge(:source => @current_source, :lineno => @current_lineno), &block)
|
152
|
+
end
|
153
|
+
|
154
|
+
@current_lineno += 1
|
155
|
+
end
|
156
|
+
|
157
|
+
warn(:unfinished_request_on_eof, "End of file reached, but last request was not completed!") unless @current_request.nil?
|
158
|
+
@current_lineno = nil
|
159
|
+
end
|
160
|
+
|
161
|
+
# Add a block to this method to install a progress handler while parsing.
|
162
|
+
# <tt>proc</tt>:: The proc that will be called to handle progress update messages
|
163
|
+
def progress=(proc)
|
164
|
+
@dormant_progress_handler = proc
|
165
|
+
end
|
166
|
+
|
167
|
+
# Add a block to this method to install a warning handler while parsing,
|
168
|
+
# <tt>proc</tt>:: The proc that will be called to handle parse warning messages
|
169
|
+
def warning=(proc)
|
170
|
+
@warning_handler = proc
|
171
|
+
end
|
172
|
+
|
173
|
+
# Add a block to this method to install a source change handler while parsing,
|
174
|
+
# <tt>proc</tt>:: The proc that will be called to handle source changes
|
175
|
+
def source_changes=(proc)
|
176
|
+
@source_changes_handler = proc
|
177
|
+
end
|
178
|
+
|
179
|
+
# This method is called by the parser if it encounteres any parsing problems.
|
180
|
+
# It will call the installed warning handler if any.
|
181
|
+
#
|
182
|
+
# By default, RequestLogAnalyzer::Controller will install a warning handler
|
183
|
+
# that will pass the warnings to each aggregator so they can do something useful
|
184
|
+
# with it.
|
185
|
+
#
|
186
|
+
# <tt>type</tt>:: The warning type (a Symbol)
|
187
|
+
# <tt>message</tt>:: A message explaining the warning
|
188
|
+
def warn(type, message)
|
189
|
+
@warning_handler.call(type, message, @current_lineno) if @warning_handler
|
190
|
+
end
|
191
|
+
|
192
|
+
protected
|
193
|
+
|
194
|
+
# Combines the different lines of a request into a single Request object. It will start a
|
195
|
+
# new request when a header line is encountered en will emit the request when a footer line
|
196
|
+
# is encountered.
|
197
|
+
#
|
198
|
+
# Combining the lines is done using heuristics. Problems can occur in this process. The
|
199
|
+
# current parse strategy defines how these cases are handled.
|
200
|
+
#
|
201
|
+
# When using the 'assume-correct' parse strategy (default):
|
202
|
+
# - Every line that is parsed before a header line is ignored as it cannot be included in
|
203
|
+
# any request. It will emit a :no_current_request warning.
|
204
|
+
# - If a header line is found before the previous requests was closed, the previous request
|
205
|
+
# will be yielded and a new request will be started.
|
206
|
+
#
|
207
|
+
# When using the 'cautious' parse strategy:
|
208
|
+
# - Every line that is parsed before a header line is ignored as it cannot be included in
|
209
|
+
# any request. It will emit a :no_current_request warning.
|
210
|
+
# - A header line that is parsed before a request is closed by a footer line, is a sign of
|
211
|
+
# an unproperly ordered file. All data that is gathered for the request until then is
|
212
|
+
# discarded and the next request is ignored as well. An :unclosed_request warning is
|
213
|
+
# emitted.
|
214
|
+
#
|
215
|
+
# <tt>request_data</tt>:: A hash of data that was parsed from the last line.
|
216
|
+
def update_current_request(request_data, &block) # :yields: request
|
217
|
+
if header_line?(request_data)
|
218
|
+
if @current_request
|
219
|
+
case options[:parse_strategy]
|
220
|
+
when 'assume-correct'
|
221
|
+
handle_request(@current_request, &block)
|
222
|
+
@current_request = @file_format.request(request_data)
|
223
|
+
when 'cautious'
|
224
|
+
@skipped_lines += 1
|
225
|
+
warn(:unclosed_request, "Encountered header line (#{request_data[:line_definition].name.inspect}), but previous request was not closed!")
|
226
|
+
@current_request = nil # remove all data that was parsed, skip next request as well.
|
227
|
+
end
|
228
|
+
elsif footer_line?(request_data)
|
229
|
+
handle_request(@file_format.request(request_data), &block)
|
230
|
+
else
|
231
|
+
@current_request = @file_format.request(request_data)
|
232
|
+
end
|
233
|
+
else
|
234
|
+
if @current_request
|
235
|
+
@current_request << request_data
|
236
|
+
if footer_line?(request_data)
|
237
|
+
handle_request(@current_request, &block) # yield @current_request
|
238
|
+
@current_request = nil
|
239
|
+
end
|
240
|
+
else
|
241
|
+
@skipped_lines += 1
|
242
|
+
warn(:no_current_request, "Parsebale line (#{request_data[:line_definition].name.inspect}) found outside of a request!")
|
243
|
+
end
|
244
|
+
end
|
245
|
+
end
|
246
|
+
|
247
|
+
# Handles the parsed request by sending it into the pipeline.
|
248
|
+
#
|
249
|
+
# - It will call RequestLogAnalyzer::Request#validate on the request instance
|
250
|
+
# - It will send the request into the pipeline, checking whether it was accepted by all the filters.
|
251
|
+
# - It will update the parsed_requests and skipped_requests variables accordingly
|
252
|
+
#
|
253
|
+
# <tt>request</tt>:: The parsed request instance (RequestLogAnalyzer::Request)
|
254
|
+
def handle_request(request, &block) # :yields: :request, request
|
255
|
+
@parsed_requests += 1
|
256
|
+
request.validate
|
257
|
+
accepted = block_given? ? yield(request) : true
|
258
|
+
@skipped_requests += 1 unless accepted
|
259
|
+
end
|
260
|
+
|
261
|
+
# Checks whether a given line hash is a header line according to the current file format.
|
262
|
+
# <tt>hash</tt>:: A hash of data that was parsed from the line.
|
263
|
+
def header_line?(hash)
|
264
|
+
hash[:line_definition].header
|
265
|
+
end
|
266
|
+
|
267
|
+
# Checks whether a given line hash is a footer line according to the current file format.
|
268
|
+
# <tt>hash</tt>:: A hash of data that was parsed from the line.
|
269
|
+
def footer_line?(hash)
|
270
|
+
hash[:line_definition].footer
|
271
|
+
end
|
272
|
+
end
|
273
|
+
|
274
|
+
end
|