ngmoco-request-log-analyzer 1.4.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +10 -0
- data/DESIGN.rdoc +41 -0
- data/LICENSE +20 -0
- data/README.rdoc +39 -0
- data/Rakefile +8 -0
- data/bin/request-log-analyzer +114 -0
- data/lib/cli/command_line_arguments.rb +301 -0
- data/lib/cli/database_console.rb +26 -0
- data/lib/cli/database_console_init.rb +43 -0
- data/lib/cli/progressbar.rb +213 -0
- data/lib/cli/tools.rb +46 -0
- data/lib/request_log_analyzer.rb +44 -0
- data/lib/request_log_analyzer/aggregator.rb +49 -0
- data/lib/request_log_analyzer/aggregator/database_inserter.rb +83 -0
- data/lib/request_log_analyzer/aggregator/echo.rb +29 -0
- data/lib/request_log_analyzer/aggregator/summarizer.rb +175 -0
- data/lib/request_log_analyzer/controller.rb +332 -0
- data/lib/request_log_analyzer/database.rb +102 -0
- data/lib/request_log_analyzer/database/base.rb +115 -0
- data/lib/request_log_analyzer/database/connection.rb +38 -0
- data/lib/request_log_analyzer/database/request.rb +22 -0
- data/lib/request_log_analyzer/database/source.rb +13 -0
- data/lib/request_log_analyzer/database/warning.rb +14 -0
- data/lib/request_log_analyzer/file_format.rb +160 -0
- data/lib/request_log_analyzer/file_format/amazon_s3.rb +71 -0
- data/lib/request_log_analyzer/file_format/apache.rb +141 -0
- data/lib/request_log_analyzer/file_format/merb.rb +67 -0
- data/lib/request_log_analyzer/file_format/rack.rb +11 -0
- data/lib/request_log_analyzer/file_format/rails.rb +176 -0
- data/lib/request_log_analyzer/file_format/rails_development.rb +12 -0
- data/lib/request_log_analyzer/filter.rb +30 -0
- data/lib/request_log_analyzer/filter/anonymize.rb +39 -0
- data/lib/request_log_analyzer/filter/field.rb +42 -0
- data/lib/request_log_analyzer/filter/timespan.rb +45 -0
- data/lib/request_log_analyzer/line_definition.rb +111 -0
- data/lib/request_log_analyzer/log_processor.rb +99 -0
- data/lib/request_log_analyzer/mailer.rb +62 -0
- data/lib/request_log_analyzer/output.rb +113 -0
- data/lib/request_log_analyzer/output/fixed_width.rb +220 -0
- data/lib/request_log_analyzer/output/html.rb +184 -0
- data/lib/request_log_analyzer/request.rb +175 -0
- data/lib/request_log_analyzer/source.rb +72 -0
- data/lib/request_log_analyzer/source/database_loader.rb +87 -0
- data/lib/request_log_analyzer/source/log_parser.rb +274 -0
- data/lib/request_log_analyzer/tracker.rb +206 -0
- data/lib/request_log_analyzer/tracker/duration.rb +104 -0
- data/lib/request_log_analyzer/tracker/frequency.rb +95 -0
- data/lib/request_log_analyzer/tracker/hourly_spread.rb +107 -0
- data/lib/request_log_analyzer/tracker/timespan.rb +81 -0
- data/lib/request_log_analyzer/tracker/traffic.rb +106 -0
- data/request-log-analyzer.gemspec +40 -0
- data/spec/database.yml +23 -0
- data/spec/fixtures/apache_combined.log +5 -0
- data/spec/fixtures/apache_common.log +10 -0
- data/spec/fixtures/decompression.log +12 -0
- data/spec/fixtures/decompression.log.bz2 +0 -0
- data/spec/fixtures/decompression.log.gz +0 -0
- data/spec/fixtures/decompression.log.zip +0 -0
- data/spec/fixtures/decompression.tar.gz +0 -0
- data/spec/fixtures/decompression.tgz +0 -0
- data/spec/fixtures/header_and_footer.log +6 -0
- data/spec/fixtures/merb.log +84 -0
- data/spec/fixtures/merb_prefixed.log +9 -0
- data/spec/fixtures/multiple_files_1.log +5 -0
- data/spec/fixtures/multiple_files_2.log +2 -0
- data/spec/fixtures/rails.db +0 -0
- data/spec/fixtures/rails_1x.log +59 -0
- data/spec/fixtures/rails_22.log +12 -0
- data/spec/fixtures/rails_22_cached.log +10 -0
- data/spec/fixtures/rails_unordered.log +24 -0
- data/spec/fixtures/syslog_1x.log +5 -0
- data/spec/fixtures/test_file_format.log +13 -0
- data/spec/fixtures/test_language_combined.log +14 -0
- data/spec/fixtures/test_order.log +16 -0
- data/spec/integration/command_line_usage_spec.rb +84 -0
- data/spec/integration/munin_plugins_rails_spec.rb +58 -0
- data/spec/integration/scout_spec.rb +151 -0
- data/spec/lib/helpers.rb +52 -0
- data/spec/lib/macros.rb +18 -0
- data/spec/lib/matchers.rb +77 -0
- data/spec/lib/mocks.rb +76 -0
- data/spec/lib/testing_format.rb +46 -0
- data/spec/spec_helper.rb +24 -0
- data/spec/unit/aggregator/database_inserter_spec.rb +93 -0
- data/spec/unit/aggregator/summarizer_spec.rb +26 -0
- data/spec/unit/controller/controller_spec.rb +41 -0
- data/spec/unit/controller/log_processor_spec.rb +18 -0
- data/spec/unit/database/base_class_spec.rb +183 -0
- data/spec/unit/database/connection_spec.rb +34 -0
- data/spec/unit/database/database_spec.rb +133 -0
- data/spec/unit/file_format/amazon_s3_format_spec.rb +49 -0
- data/spec/unit/file_format/apache_format_spec.rb +203 -0
- data/spec/unit/file_format/file_format_api_spec.rb +69 -0
- data/spec/unit/file_format/line_definition_spec.rb +75 -0
- data/spec/unit/file_format/merb_format_spec.rb +52 -0
- data/spec/unit/file_format/rails_format_spec.rb +164 -0
- data/spec/unit/filter/anonymize_filter_spec.rb +21 -0
- data/spec/unit/filter/field_filter_spec.rb +66 -0
- data/spec/unit/filter/filter_spec.rb +17 -0
- data/spec/unit/filter/timespan_filter_spec.rb +58 -0
- data/spec/unit/mailer_spec.rb +30 -0
- data/spec/unit/request_spec.rb +111 -0
- data/spec/unit/source/log_parser_spec.rb +119 -0
- data/spec/unit/tracker/duration_tracker_spec.rb +130 -0
- data/spec/unit/tracker/frequency_tracker_spec.rb +88 -0
- data/spec/unit/tracker/hourly_spread_spec.rb +79 -0
- data/spec/unit/tracker/timespan_tracker_spec.rb +73 -0
- data/spec/unit/tracker/tracker_api_spec.rb +124 -0
- data/spec/unit/tracker/traffic_tracker_spec.rb +107 -0
- data/tasks/github-gem.rake +323 -0
- data/tasks/request_log_analyzer.rake +26 -0
- metadata +220 -0
|
@@ -0,0 +1,175 @@
|
|
|
1
|
+
module RequestLogAnalyzer
|
|
2
|
+
|
|
3
|
+
# The Request class represents a parsed request from the log file.
|
|
4
|
+
# Instances are created by the LogParser and are passed to the different aggregators, so they
|
|
5
|
+
# can do their aggregating work.
|
|
6
|
+
#
|
|
7
|
+
# This class provides several methods to access the data that was parsed from the log files.
|
|
8
|
+
# Request#first(field_name) returns the first (only) value corresponding to the given field
|
|
9
|
+
# Request#every(field_name) returns all values corresponding to the given field name as array.
|
|
10
|
+
class Request
|
|
11
|
+
|
|
12
|
+
module Converters
|
|
13
|
+
|
|
14
|
+
# Default converter function, which converts the parsed strings to a native Ruby type
|
|
15
|
+
# using the type indication in the line definition. It will use a custom connverter
|
|
16
|
+
# method if one is available.
|
|
17
|
+
def convert_value(value, capture_definition)
|
|
18
|
+
return capture_definition[:default] if value.nil?
|
|
19
|
+
custom_converter_method = :"convert_#{capture_definition[:type]}"
|
|
20
|
+
send(custom_converter_method, value, capture_definition)
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
def convert_string(value, capture_definition); value; end
|
|
24
|
+
def convert_float(value, capture_definition); value.to_f; end
|
|
25
|
+
def convert_decimal(value, capture_definition); value.to_f; end
|
|
26
|
+
def convert_int(value, capture_definition); value.to_i; end
|
|
27
|
+
def convert_integer(value, capture_definition); value.to_i; end
|
|
28
|
+
def convert_sym(value, capture_definition); value.to_sym; end
|
|
29
|
+
def convert_symbol(value, capture_definition); value.to_sym; end
|
|
30
|
+
|
|
31
|
+
# Converts :eval field, which should evaluate to a hash.
|
|
32
|
+
def convert_eval(value, capture_definition)
|
|
33
|
+
eval(value).inject({}) { |h, (k, v)| h[k.to_sym] = v; h}
|
|
34
|
+
rescue SyntaxError
|
|
35
|
+
nil
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
# Slow default method to parse timestamps.
|
|
39
|
+
# Reimplement this function in a file format specific Request class
|
|
40
|
+
# to improve the timestamp parsing speed.
|
|
41
|
+
def convert_timestamp(value, capture_definition)
|
|
42
|
+
DateTime.parse(value).strftime('%Y%m%d%H%M%S').to_i
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
# Converts traffic fields to (whole) bytes based on the given unit.
|
|
46
|
+
def convert_traffic(value, capture_definition)
|
|
47
|
+
case capture_definition[:unit]
|
|
48
|
+
when nil, :b, :B, :byte then value.to_i
|
|
49
|
+
when :GB, :G, :gigabyte then (value.to_f * 1000_000_000).round
|
|
50
|
+
when :GiB, :gibibyte then (value.to_f * (2 ** 30)).round
|
|
51
|
+
when :MB, :M, :megabyte then (value.to_f * 1000_000).round
|
|
52
|
+
when :MiB, :mebibyte then (value.to_f * (2 ** 20)).round
|
|
53
|
+
when :KB, :K, :kilobyte, :kB then (value.to_f * 1000).round
|
|
54
|
+
when :KiB, :kibibyte then (value.to_f * (2 ** 10)).round
|
|
55
|
+
else raise "Unknown traffic unit"
|
|
56
|
+
end
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
# Convert duration fields to float, and make sure the values are in seconds.
|
|
60
|
+
def convert_duration(value, capture_definition)
|
|
61
|
+
case capture_definition[:unit]
|
|
62
|
+
when nil, :sec, :s then value.to_f
|
|
63
|
+
when :microsec, :musec then value.to_f / 1000000.0
|
|
64
|
+
when :msec, :millisec then value.to_f / 1000.0
|
|
65
|
+
else raise "Unknown duration unit"
|
|
66
|
+
end
|
|
67
|
+
end
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
# Install the default converter methods
|
|
71
|
+
include Converters
|
|
72
|
+
|
|
73
|
+
attr_reader :lines, :attributes, :file_format
|
|
74
|
+
|
|
75
|
+
# Initializes a new Request object.
|
|
76
|
+
# It will apply the the provided FileFormat module to this instance.
|
|
77
|
+
def initialize(file_format, attributes = {})
|
|
78
|
+
@lines = []
|
|
79
|
+
@attributes = attributes
|
|
80
|
+
@file_format = file_format
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
# Creates a new request that was parsed from the log with the given FileFormat. The hashes
|
|
84
|
+
# that are passed to this function are added as lines to this request.
|
|
85
|
+
def self.create(file_format, *hashes)
|
|
86
|
+
request = self.new(file_format)
|
|
87
|
+
hashes.flatten.each { |hash| request << hash }
|
|
88
|
+
return request
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
# Adds another line to the request when it is parsed in the LogParser.
|
|
92
|
+
#
|
|
93
|
+
# The line should be provided as a hash with the attributes line_definition, :captures,
|
|
94
|
+
# :lineno and :source set. This function is called from LogParser.
|
|
95
|
+
def add_parsed_line (parsed_line)
|
|
96
|
+
value_hash = parsed_line[:line_definition].convert_captured_values(parsed_line[:captures], self)
|
|
97
|
+
value_hash[:line_type] = parsed_line[:line_definition].name
|
|
98
|
+
value_hash[:lineno] = parsed_line[:lineno]
|
|
99
|
+
value_hash[:source] = parsed_line[:source]
|
|
100
|
+
add_line_hash(value_hash)
|
|
101
|
+
end
|
|
102
|
+
|
|
103
|
+
# Adds another line to the request using a plain hash.
|
|
104
|
+
#
|
|
105
|
+
# The line should be provides as a hash of the fields parsed from the line.
|
|
106
|
+
def add_line_hash(value_hash)
|
|
107
|
+
@lines << value_hash
|
|
108
|
+
@attributes = value_hash.merge(@attributes)
|
|
109
|
+
end
|
|
110
|
+
|
|
111
|
+
# Adds another line to the request. This method switches automatically between
|
|
112
|
+
# the add_line_hash and add_parsed_line based on the keys of the provided hash.
|
|
113
|
+
def <<(hash)
|
|
114
|
+
hash[:line_definition] ? add_parsed_line(hash) : add_line_hash(hash)
|
|
115
|
+
end
|
|
116
|
+
|
|
117
|
+
# Checks whether the given line type was parsed from the log file for this request
|
|
118
|
+
def has_line_type?(line_type)
|
|
119
|
+
return true if @lines.length == 1 && @lines[0][:line_type] == line_type.to_sym
|
|
120
|
+
@lines.detect { |l| l[:line_type] == line_type.to_sym }
|
|
121
|
+
end
|
|
122
|
+
|
|
123
|
+
alias :=~ :has_line_type?
|
|
124
|
+
|
|
125
|
+
# Returns the value that was captured for the "field" of this request.
|
|
126
|
+
# This function will return the first value that was captured if the field
|
|
127
|
+
# was captured in multiple lines
|
|
128
|
+
def first(field)
|
|
129
|
+
@attributes[field]
|
|
130
|
+
end
|
|
131
|
+
|
|
132
|
+
alias :[] :first
|
|
133
|
+
|
|
134
|
+
# Returns an array of all the "field" values that were captured for this request
|
|
135
|
+
def every(field)
|
|
136
|
+
@lines.inject([]) { |result, fields| result << fields[field] if fields.has_key?(field); result }
|
|
137
|
+
end
|
|
138
|
+
|
|
139
|
+
# Returns true if this request does not yet contain any parsed lines. This should only occur
|
|
140
|
+
# during parsing. An empty request should never be sent to the aggregators
|
|
141
|
+
def empty?
|
|
142
|
+
@lines.length == 0
|
|
143
|
+
end
|
|
144
|
+
|
|
145
|
+
# Checks whether this request is completed. A completed request contains both a parsed header
|
|
146
|
+
# line and a parsed footer line. Not that calling this function in single line mode will always
|
|
147
|
+
# return false.
|
|
148
|
+
def completed?
|
|
149
|
+
header_found, footer_found = false, false
|
|
150
|
+
@lines.each do |line|
|
|
151
|
+
line_def = file_format.line_definitions[line[:line_type]]
|
|
152
|
+
header_found = true if line_def.header
|
|
153
|
+
footer_found = true if line_def.footer
|
|
154
|
+
end
|
|
155
|
+
header_found && footer_found
|
|
156
|
+
end
|
|
157
|
+
|
|
158
|
+
# This function is called before a Requests is yielded.
|
|
159
|
+
def validate
|
|
160
|
+
end
|
|
161
|
+
|
|
162
|
+
# Returns the first timestamp encountered in a request.
|
|
163
|
+
def timestamp
|
|
164
|
+
first(:timestamp)
|
|
165
|
+
end
|
|
166
|
+
|
|
167
|
+
def first_lineno
|
|
168
|
+
@lines.map { |line| line[:lineno] }.reject { |v| v.nil? }.min
|
|
169
|
+
end
|
|
170
|
+
|
|
171
|
+
def last_lineno
|
|
172
|
+
@lines.map { |line| line[:lineno] }.reject { |v| v.nil? }.max
|
|
173
|
+
end
|
|
174
|
+
end
|
|
175
|
+
end
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
# The RequestLogAnalyzer::Source module contains all functionality that loads requests from a given source
|
|
2
|
+
# and feed them to the pipeline for further processing. The requests (see RequestLogAnalyzer::Request) that
|
|
3
|
+
# will be parsed from a source, will be piped throug filters (see RequestLogAnalyzer::Filter) and are then
|
|
4
|
+
# fed to an aggregator (see RequestLogAnalyzer::Aggregator). The source instance is thus the beginning of
|
|
5
|
+
# the RequestLogAnalyzer chain.
|
|
6
|
+
#
|
|
7
|
+
# - The base class for all sources is RequestLogAnalyzer::Source::Base. All source classes should inherit from this class.
|
|
8
|
+
# - Currently, RequestLogAnalyzer::Source::LogParser is the only implemented source.
|
|
9
|
+
module RequestLogAnalyzer::Source
|
|
10
|
+
|
|
11
|
+
# Loads constants that reside in the RequestLogAnalyzer::Source namespace. This function uses
|
|
12
|
+
# RequestLogAnalyzer::load_default_class_file to load the file in which the constant is declared.
|
|
13
|
+
# <tt>const</tt>:: The constant to load in the RequestLogAnalyzer::Source namespace.
|
|
14
|
+
def self.const_missing(const)
|
|
15
|
+
RequestLogAnalyzer::load_default_class_file(self, const)
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
# The base Source class. All other sources should inherit from this class.
|
|
19
|
+
#
|
|
20
|
+
# A source implememtation should at least implement the each_request method, which should yield
|
|
21
|
+
# RequestLogAnalyzer::Request instances that will be fed through the pipleine.
|
|
22
|
+
class Base
|
|
23
|
+
|
|
24
|
+
# A hash of options
|
|
25
|
+
attr_reader :options
|
|
26
|
+
|
|
27
|
+
# The current Request object that is being parsed
|
|
28
|
+
attr_reader :current_request
|
|
29
|
+
|
|
30
|
+
# The total number of parsed lines
|
|
31
|
+
attr_reader :parsed_lines
|
|
32
|
+
|
|
33
|
+
# The number of skipped lines because of warnings
|
|
34
|
+
attr_reader :skipped_lines
|
|
35
|
+
|
|
36
|
+
# The total number of parsed requests.
|
|
37
|
+
attr_reader :parsed_requests
|
|
38
|
+
|
|
39
|
+
# The total number of skipped requests because of filters.
|
|
40
|
+
attr_reader :skipped_requests
|
|
41
|
+
|
|
42
|
+
# The FileFormat instance that describes the format of this source.
|
|
43
|
+
attr_reader :file_format
|
|
44
|
+
|
|
45
|
+
# Initializer, which will register the file format and save any options given as a hash.
|
|
46
|
+
# <tt>format</tt>:: The file format instance
|
|
47
|
+
# <tt>options</tt>:: A hash of options that can be used by a specific Source implementation
|
|
48
|
+
def initialize(format, options = {})
|
|
49
|
+
@options = options
|
|
50
|
+
@file_format = format
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
# The prepare method is called before the RequestLogAnalyzer::Source::Base#each_request method is called.
|
|
54
|
+
# Use this method to implement any initialization that should occur before this source can produce Request
|
|
55
|
+
# instances.
|
|
56
|
+
def prepare
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
# This function is called to actually produce the requests that will be send into the pipeline.
|
|
60
|
+
# The implementation should yield instances of RequestLogAnalyzer::Request.
|
|
61
|
+
# <tt>options</tt>:: A Hash of options that can be used in the implementation.
|
|
62
|
+
def each_request(options = {}, &block) # :yields: request
|
|
63
|
+
return true
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
# This function is called after RequestLogAnalyzer::Source::Base#each_request finished. Any code to
|
|
67
|
+
# wrap up, free resources, etc. can be put in this method.
|
|
68
|
+
def finalize
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
end
|
|
72
|
+
end
|
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
require 'rubygems'
|
|
2
|
+
require 'activerecord'
|
|
3
|
+
|
|
4
|
+
module RequestLogAnalyzer::Source
|
|
5
|
+
|
|
6
|
+
# Active Resource hook
|
|
7
|
+
class Request < ActiveRecord::Base
|
|
8
|
+
has_many :completed_lines
|
|
9
|
+
has_many :processing_lines
|
|
10
|
+
def convert(file_format)
|
|
11
|
+
send_attributes = self.attributes
|
|
12
|
+
send_attributes.merge!(self.completed_lines.first.attributes) if self.completed_lines.first
|
|
13
|
+
send_attributes.merge!(self.processing_lines.first.attributes) if self.processing_lines.first
|
|
14
|
+
return RequestLogAnalyzer::Request.new(file_format, send_attributes)
|
|
15
|
+
end
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
class CompletedLine < ActiveRecord::Base
|
|
19
|
+
belongs_to :request
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
class ProcessingLine < ActiveRecord::Base
|
|
23
|
+
belongs_to :request
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
# The Database class gets log data from the database.
|
|
27
|
+
class DatabaseLoader < Base
|
|
28
|
+
|
|
29
|
+
attr_reader :source_files, :file_format, :requests
|
|
30
|
+
|
|
31
|
+
# Initializes the log file parser instance.
|
|
32
|
+
# It will apply the language specific FileFormat module to this instance. It will use the line
|
|
33
|
+
# definitions in this module to parse any input that it is given (see parse_io).
|
|
34
|
+
#
|
|
35
|
+
# <tt>format</tt>:: The current file format instance
|
|
36
|
+
# <tt>options</tt>:: A hash of options that are used by the parser
|
|
37
|
+
def initialize(format, options = {})
|
|
38
|
+
super(format, options)
|
|
39
|
+
@source_files = options[:source_files]
|
|
40
|
+
@parsed_requests = 0
|
|
41
|
+
@requests = []
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
# Reads the input, which can either be a file, sequence of files or STDIN to parse
|
|
45
|
+
# lines specified in the FileFormat. This lines will be combined into Request instances,
|
|
46
|
+
# that will be yielded. The actual parsing occurs in the parse_io method.
|
|
47
|
+
# <tt>options</tt>:: A Hash of options that will be pased to parse_io.
|
|
48
|
+
def each_request(options = {}, &block) # :yields: request
|
|
49
|
+
ActiveRecord::Base.establish_connection(:adapter => 'sqlite3', :database => @source_files)
|
|
50
|
+
|
|
51
|
+
@progress_handler.call(:started, @source_files) if @progress_handler
|
|
52
|
+
RequestLogAnalyzer::Source::Request.find(:all).each do |request|
|
|
53
|
+
@parsed_requests += 1
|
|
54
|
+
@progress_handler.call(:progress, @parsed_requests) if @progress_handler
|
|
55
|
+
|
|
56
|
+
yield request.convert(self.file_format)
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
@progress_handler.call(:finished, @source_files) if @progress_handler
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
# Add a block to this method to install a progress handler while parsing.
|
|
63
|
+
# <tt>proc</tt>:: The proc that will be called to handle progress update messages
|
|
64
|
+
def progress=(proc)
|
|
65
|
+
@progress_handler = proc
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
# Add a block to this method to install a warning handler while parsing,
|
|
69
|
+
# <tt>proc</tt>:: The proc that will be called to handle parse warning messages
|
|
70
|
+
def warning=(proc)
|
|
71
|
+
@warning_handler = proc
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
# This method is called by the parser if it encounteres any parsing problems.
|
|
75
|
+
# It will call the installed warning handler if any.
|
|
76
|
+
#
|
|
77
|
+
# By default, RequestLogAnalyzer::Controller will install a warning handler
|
|
78
|
+
# that will pass the warnings to each aggregator so they can do something useful
|
|
79
|
+
# with it.
|
|
80
|
+
#
|
|
81
|
+
# <tt>type</tt>:: The warning type (a Symbol)
|
|
82
|
+
# <tt>message</tt>:: A message explaining the warning
|
|
83
|
+
def warn(type, message)
|
|
84
|
+
@warning_handler.call(type, message, @current_io.lineno) if @warning_handler
|
|
85
|
+
end
|
|
86
|
+
end
|
|
87
|
+
end
|
|
@@ -0,0 +1,274 @@
|
|
|
1
|
+
module RequestLogAnalyzer::Source
|
|
2
|
+
|
|
3
|
+
# The LogParser class reads log data from a given source and uses a file format definition
|
|
4
|
+
# to parse all relevent information about requests from the file. A FileFormat module should
|
|
5
|
+
# be provided that contains the definitions of the lines that occur in the log data.
|
|
6
|
+
#
|
|
7
|
+
# De order in which lines occur is used to combine lines to a single request. If these lines
|
|
8
|
+
# are mixed, requests cannot be combined properly. This can be the case if data is written to
|
|
9
|
+
# the log file simultaneously by different mongrel processes. This problem is detected by the
|
|
10
|
+
# parser. It will emit warnings when this occurs. LogParser supports multiple parse strategies
|
|
11
|
+
# that deal differently with this problem.
|
|
12
|
+
class LogParser < Base
|
|
13
|
+
|
|
14
|
+
include Enumerable
|
|
15
|
+
|
|
16
|
+
# The default parse strategy that will be used to parse the input.
|
|
17
|
+
DEFAULT_PARSE_STRATEGY = 'assume-correct'
|
|
18
|
+
|
|
19
|
+
# All available parse strategies.
|
|
20
|
+
PARSE_STRATEGIES = ['cautious', 'assume-correct']
|
|
21
|
+
|
|
22
|
+
attr_reader :source_files, :current_file, :current_lineno
|
|
23
|
+
|
|
24
|
+
# Initializes the log file parser instance.
|
|
25
|
+
# It will apply the language specific FileFormat module to this instance. It will use the line
|
|
26
|
+
# definitions in this module to parse any input that it is given (see parse_io).
|
|
27
|
+
#
|
|
28
|
+
# <tt>format</tt>:: The current file format instance
|
|
29
|
+
# <tt>options</tt>:: A hash of options that are used by the parser
|
|
30
|
+
def initialize(format, options = {})
|
|
31
|
+
super(format, options)
|
|
32
|
+
@parsed_lines = 0
|
|
33
|
+
@parsed_requests = 0
|
|
34
|
+
@skipped_lines = 0
|
|
35
|
+
@skipped_requests = 0
|
|
36
|
+
@current_request = nil
|
|
37
|
+
@current_source = nil
|
|
38
|
+
@current_file = nil
|
|
39
|
+
@current_lineno = nil
|
|
40
|
+
@source_files = options[:source_files]
|
|
41
|
+
@progress_handler = nil
|
|
42
|
+
|
|
43
|
+
@options[:parse_strategy] ||= DEFAULT_PARSE_STRATEGY
|
|
44
|
+
raise "Unknown parse strategy" unless PARSE_STRATEGIES.include?(@options[:parse_strategy])
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
# Reads the input, which can either be a file, sequence of files or STDIN to parse
|
|
48
|
+
# lines specified in the FileFormat. This lines will be combined into Request instances,
|
|
49
|
+
# that will be yielded. The actual parsing occurs in the parse_io method.
|
|
50
|
+
# <tt>options</tt>:: A Hash of options that will be pased to parse_io.
|
|
51
|
+
def each_request(options = {}, &block) # :yields: :request, request
|
|
52
|
+
|
|
53
|
+
case @source_files
|
|
54
|
+
when IO
|
|
55
|
+
if @source_files == $stdin
|
|
56
|
+
puts "Parsing from the standard input. Press CTRL+C to finish." # FIXME: not here
|
|
57
|
+
end
|
|
58
|
+
parse_stream(@source_files, options, &block)
|
|
59
|
+
when String
|
|
60
|
+
parse_file(@source_files, options, &block)
|
|
61
|
+
when Array
|
|
62
|
+
parse_files(@source_files, options, &block)
|
|
63
|
+
else
|
|
64
|
+
raise "Unknown source provided"
|
|
65
|
+
end
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
# Make sure the Enumerable methods work as expected
|
|
69
|
+
alias_method :each, :each_request
|
|
70
|
+
|
|
71
|
+
# Parses a list of subsequent files of the same format, by calling parse_file for every
|
|
72
|
+
# file in the array.
|
|
73
|
+
# <tt>files</tt>:: The Array of files that should be parsed
|
|
74
|
+
# <tt>options</tt>:: A Hash of options that will be pased to parse_io.
|
|
75
|
+
def parse_files(files, options = {}, &block) # :yields: request
|
|
76
|
+
files.each { |file| parse_file(file, options, &block) }
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
# Check if a file has a compressed extention in the filename.
|
|
80
|
+
# If recognized, return the command string used to decompress the file
|
|
81
|
+
def decompress_file?(filename)
|
|
82
|
+
nice_command = "nice -n 5"
|
|
83
|
+
|
|
84
|
+
return "#{nice_command} gunzip -c -d #{filename}" if filename.match(/\.tar.gz$/) || filename.match(/\.tgz$/) || filename.match(/\.gz$/)
|
|
85
|
+
return "#{nice_command} bunzip2 -c -d #{filename}" if filename.match(/\.bz2$/)
|
|
86
|
+
return "#{nice_command} unzip -p #{filename}" if filename.match(/\.zip$/)
|
|
87
|
+
|
|
88
|
+
return ""
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
# Parses a log file. Creates an IO stream for the provided file, and sends it to parse_io for
|
|
92
|
+
# further handling. This method supports progress updates that can be used to display a progressbar
|
|
93
|
+
#
|
|
94
|
+
# If the logfile is compressed, it is uncompressed to stdout and read.
|
|
95
|
+
# TODO: Check if IO.popen encounters problems with the given command line.
|
|
96
|
+
# TODO: Fix progress bar that is broken for IO.popen, as it returns a single string.
|
|
97
|
+
#
|
|
98
|
+
# <tt>file</tt>:: The file that should be parsed.
|
|
99
|
+
# <tt>options</tt>:: A Hash of options that will be pased to parse_io.
|
|
100
|
+
def parse_file(file, options = {}, &block)
|
|
101
|
+
|
|
102
|
+
@current_source = File.expand_path(file)
|
|
103
|
+
@source_changes_handler.call(:started, @current_source) if @source_changes_handler
|
|
104
|
+
|
|
105
|
+
if decompress_file?(file).empty?
|
|
106
|
+
|
|
107
|
+
@progress_handler = @dormant_progress_handler
|
|
108
|
+
@progress_handler.call(:started, file) if @progress_handler
|
|
109
|
+
|
|
110
|
+
File.open(file, 'r') { |f| parse_io(f, options, &block) }
|
|
111
|
+
|
|
112
|
+
@progress_handler.call(:finished, file) if @progress_handler
|
|
113
|
+
@progress_handler = nil
|
|
114
|
+
else
|
|
115
|
+
IO.popen(decompress_file?(file), 'r') { |f| parse_io(f, options, &block) }
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
@source_changes_handler.call(:finished, @current_source) if @source_changes_handler
|
|
119
|
+
|
|
120
|
+
@current_source = nil
|
|
121
|
+
|
|
122
|
+
end
|
|
123
|
+
|
|
124
|
+
# Parses an IO stream. It will simply call parse_io. This function does not support progress updates
|
|
125
|
+
# because the length of a stream is not known.
|
|
126
|
+
# <tt>stream</tt>:: The IO stream that should be parsed.
|
|
127
|
+
# <tt>options</tt>:: A Hash of options that will be pased to parse_io.
|
|
128
|
+
def parse_stream(stream, options = {}, &block)
|
|
129
|
+
parse_io(stream, options, &block)
|
|
130
|
+
end
|
|
131
|
+
|
|
132
|
+
# This method loops over each line of the input stream. It will try to parse this line as any of
|
|
133
|
+
# the lines that are defined by the current file format (see RequestLogAnalyazer::FileFormat).
|
|
134
|
+
# It will then combine these parsed line into requests using heuristics. These requests (see
|
|
135
|
+
# RequestLogAnalyzer::Request) will then be yielded for further processing in the pipeline.
|
|
136
|
+
#
|
|
137
|
+
# - RequestLogAnalyzer::LineDefinition#matches is called to test if a line matches a line definition of the file format.
|
|
138
|
+
# - update_current_request is used to combine parsed lines into requests using heuristics.
|
|
139
|
+
# - The method will yield progress updates if a progress handler is installed using progress=
|
|
140
|
+
# - The method will yield parse warnings if a warning handler is installed using warning=
|
|
141
|
+
#
|
|
142
|
+
# <tt>io</tt>:: The IO instance to use as source
|
|
143
|
+
# <tt>options</tt>:: A hash of options that can be used by the parser.
|
|
144
|
+
def parse_io(io, options = {}, &block) # :yields: request
|
|
145
|
+
@current_lineno = 1
|
|
146
|
+
while line = io.gets
|
|
147
|
+
@progress_handler.call(:progress, io.pos) if @progress_handler && @current_lineno % 255 == 0
|
|
148
|
+
|
|
149
|
+
if request_data = file_format.parse_line(line) { |wt, message| warn(wt, message) }
|
|
150
|
+
@parsed_lines += 1
|
|
151
|
+
update_current_request(request_data.merge(:source => @current_source, :lineno => @current_lineno), &block)
|
|
152
|
+
end
|
|
153
|
+
|
|
154
|
+
@current_lineno += 1
|
|
155
|
+
end
|
|
156
|
+
|
|
157
|
+
warn(:unfinished_request_on_eof, "End of file reached, but last request was not completed!") unless @current_request.nil?
|
|
158
|
+
@current_lineno = nil
|
|
159
|
+
end
|
|
160
|
+
|
|
161
|
+
# Add a block to this method to install a progress handler while parsing.
|
|
162
|
+
# <tt>proc</tt>:: The proc that will be called to handle progress update messages
|
|
163
|
+
def progress=(proc)
|
|
164
|
+
@dormant_progress_handler = proc
|
|
165
|
+
end
|
|
166
|
+
|
|
167
|
+
# Add a block to this method to install a warning handler while parsing,
|
|
168
|
+
# <tt>proc</tt>:: The proc that will be called to handle parse warning messages
|
|
169
|
+
def warning=(proc)
|
|
170
|
+
@warning_handler = proc
|
|
171
|
+
end
|
|
172
|
+
|
|
173
|
+
# Add a block to this method to install a source change handler while parsing,
|
|
174
|
+
# <tt>proc</tt>:: The proc that will be called to handle source changes
|
|
175
|
+
def source_changes=(proc)
|
|
176
|
+
@source_changes_handler = proc
|
|
177
|
+
end
|
|
178
|
+
|
|
179
|
+
# This method is called by the parser if it encounteres any parsing problems.
|
|
180
|
+
# It will call the installed warning handler if any.
|
|
181
|
+
#
|
|
182
|
+
# By default, RequestLogAnalyzer::Controller will install a warning handler
|
|
183
|
+
# that will pass the warnings to each aggregator so they can do something useful
|
|
184
|
+
# with it.
|
|
185
|
+
#
|
|
186
|
+
# <tt>type</tt>:: The warning type (a Symbol)
|
|
187
|
+
# <tt>message</tt>:: A message explaining the warning
|
|
188
|
+
def warn(type, message)
|
|
189
|
+
@warning_handler.call(type, message, @current_lineno) if @warning_handler
|
|
190
|
+
end
|
|
191
|
+
|
|
192
|
+
protected
|
|
193
|
+
|
|
194
|
+
# Combines the different lines of a request into a single Request object. It will start a
|
|
195
|
+
# new request when a header line is encountered en will emit the request when a footer line
|
|
196
|
+
# is encountered.
|
|
197
|
+
#
|
|
198
|
+
# Combining the lines is done using heuristics. Problems can occur in this process. The
|
|
199
|
+
# current parse strategy defines how these cases are handled.
|
|
200
|
+
#
|
|
201
|
+
# When using the 'assume-correct' parse strategy (default):
|
|
202
|
+
# - Every line that is parsed before a header line is ignored as it cannot be included in
|
|
203
|
+
# any request. It will emit a :no_current_request warning.
|
|
204
|
+
# - If a header line is found before the previous requests was closed, the previous request
|
|
205
|
+
# will be yielded and a new request will be started.
|
|
206
|
+
#
|
|
207
|
+
# When using the 'cautious' parse strategy:
|
|
208
|
+
# - Every line that is parsed before a header line is ignored as it cannot be included in
|
|
209
|
+
# any request. It will emit a :no_current_request warning.
|
|
210
|
+
# - A header line that is parsed before a request is closed by a footer line, is a sign of
|
|
211
|
+
# an unproperly ordered file. All data that is gathered for the request until then is
|
|
212
|
+
# discarded and the next request is ignored as well. An :unclosed_request warning is
|
|
213
|
+
# emitted.
|
|
214
|
+
#
|
|
215
|
+
# <tt>request_data</tt>:: A hash of data that was parsed from the last line.
|
|
216
|
+
def update_current_request(request_data, &block) # :yields: request
|
|
217
|
+
if header_line?(request_data)
|
|
218
|
+
if @current_request
|
|
219
|
+
case options[:parse_strategy]
|
|
220
|
+
when 'assume-correct'
|
|
221
|
+
handle_request(@current_request, &block)
|
|
222
|
+
@current_request = @file_format.request(request_data)
|
|
223
|
+
when 'cautious'
|
|
224
|
+
@skipped_lines += 1
|
|
225
|
+
warn(:unclosed_request, "Encountered header line (#{request_data[:line_definition].name.inspect}), but previous request was not closed!")
|
|
226
|
+
@current_request = nil # remove all data that was parsed, skip next request as well.
|
|
227
|
+
end
|
|
228
|
+
elsif footer_line?(request_data)
|
|
229
|
+
handle_request(@file_format.request(request_data), &block)
|
|
230
|
+
else
|
|
231
|
+
@current_request = @file_format.request(request_data)
|
|
232
|
+
end
|
|
233
|
+
else
|
|
234
|
+
if @current_request
|
|
235
|
+
@current_request << request_data
|
|
236
|
+
if footer_line?(request_data)
|
|
237
|
+
handle_request(@current_request, &block) # yield @current_request
|
|
238
|
+
@current_request = nil
|
|
239
|
+
end
|
|
240
|
+
else
|
|
241
|
+
@skipped_lines += 1
|
|
242
|
+
warn(:no_current_request, "Parsebale line (#{request_data[:line_definition].name.inspect}) found outside of a request!")
|
|
243
|
+
end
|
|
244
|
+
end
|
|
245
|
+
end
|
|
246
|
+
|
|
247
|
+
# Handles the parsed request by sending it into the pipeline.
|
|
248
|
+
#
|
|
249
|
+
# - It will call RequestLogAnalyzer::Request#validate on the request instance
|
|
250
|
+
# - It will send the request into the pipeline, checking whether it was accepted by all the filters.
|
|
251
|
+
# - It will update the parsed_requests and skipped_requests variables accordingly
|
|
252
|
+
#
|
|
253
|
+
# <tt>request</tt>:: The parsed request instance (RequestLogAnalyzer::Request)
|
|
254
|
+
def handle_request(request, &block) # :yields: :request, request
|
|
255
|
+
@parsed_requests += 1
|
|
256
|
+
request.validate
|
|
257
|
+
accepted = block_given? ? yield(request) : true
|
|
258
|
+
@skipped_requests += 1 unless accepted
|
|
259
|
+
end
|
|
260
|
+
|
|
261
|
+
# Checks whether a given line hash is a header line according to the current file format.
|
|
262
|
+
# <tt>hash</tt>:: A hash of data that was parsed from the line.
|
|
263
|
+
def header_line?(hash)
|
|
264
|
+
hash[:line_definition].header
|
|
265
|
+
end
|
|
266
|
+
|
|
267
|
+
# Checks whether a given line hash is a footer line according to the current file format.
|
|
268
|
+
# <tt>hash</tt>:: A hash of data that was parsed from the line.
|
|
269
|
+
def footer_line?(hash)
|
|
270
|
+
hash[:line_definition].footer
|
|
271
|
+
end
|
|
272
|
+
end
|
|
273
|
+
|
|
274
|
+
end
|