request-log-analyzer 1.3.7 → 1.4.0
Sign up to get free protection for your applications and to get access to all the features.
- data/LICENSE +3 -3
- data/README.rdoc +1 -1
- data/bin/request-log-analyzer +17 -14
- data/lib/cli/command_line_arguments.rb +51 -51
- data/lib/cli/database_console.rb +3 -3
- data/lib/cli/database_console_init.rb +2 -2
- data/lib/cli/progressbar.rb +10 -10
- data/lib/cli/tools.rb +3 -3
- data/lib/request_log_analyzer.rb +4 -4
- data/lib/request_log_analyzer/aggregator.rb +10 -10
- data/lib/request_log_analyzer/aggregator/database_inserter.rb +9 -9
- data/lib/request_log_analyzer/aggregator/echo.rb +14 -9
- data/lib/request_log_analyzer/aggregator/summarizer.rb +26 -26
- data/lib/request_log_analyzer/controller.rb +153 -69
- data/lib/request_log_analyzer/database.rb +13 -13
- data/lib/request_log_analyzer/database/base.rb +17 -17
- data/lib/request_log_analyzer/database/connection.rb +3 -3
- data/lib/request_log_analyzer/database/request.rb +2 -2
- data/lib/request_log_analyzer/database/source.rb +1 -1
- data/lib/request_log_analyzer/file_format.rb +15 -15
- data/lib/request_log_analyzer/file_format/amazon_s3.rb +16 -16
- data/lib/request_log_analyzer/file_format/apache.rb +20 -19
- data/lib/request_log_analyzer/file_format/merb.rb +12 -12
- data/lib/request_log_analyzer/file_format/rack.rb +4 -4
- data/lib/request_log_analyzer/file_format/rails.rb +146 -70
- data/lib/request_log_analyzer/file_format/rails_development.rb +4 -49
- data/lib/request_log_analyzer/filter.rb +6 -6
- data/lib/request_log_analyzer/filter/anonymize.rb +6 -6
- data/lib/request_log_analyzer/filter/field.rb +9 -9
- data/lib/request_log_analyzer/filter/timespan.rb +12 -10
- data/lib/request_log_analyzer/line_definition.rb +15 -14
- data/lib/request_log_analyzer/log_processor.rb +22 -22
- data/lib/request_log_analyzer/mailer.rb +15 -9
- data/lib/request_log_analyzer/output.rb +53 -12
- data/lib/request_log_analyzer/output/fixed_width.rb +40 -41
- data/lib/request_log_analyzer/output/html.rb +20 -20
- data/lib/request_log_analyzer/request.rb +35 -36
- data/lib/request_log_analyzer/source.rb +7 -7
- data/lib/request_log_analyzer/source/database_loader.rb +7 -7
- data/lib/request_log_analyzer/source/log_parser.rb +48 -43
- data/lib/request_log_analyzer/tracker.rb +128 -14
- data/lib/request_log_analyzer/tracker/duration.rb +39 -132
- data/lib/request_log_analyzer/tracker/frequency.rb +31 -32
- data/lib/request_log_analyzer/tracker/hourly_spread.rb +20 -19
- data/lib/request_log_analyzer/tracker/timespan.rb +17 -17
- data/lib/request_log_analyzer/tracker/traffic.rb +36 -116
- data/request-log-analyzer.gemspec +19 -15
- data/spec/fixtures/rails_22.log +1 -1
- data/spec/integration/command_line_usage_spec.rb +1 -1
- data/spec/lib/helpers.rb +7 -7
- data/spec/lib/macros.rb +3 -3
- data/spec/lib/matchers.rb +41 -27
- data/spec/lib/mocks.rb +15 -14
- data/spec/lib/testing_format.rb +9 -9
- data/spec/spec_helper.rb +6 -6
- data/spec/unit/aggregator/database_inserter_spec.rb +13 -13
- data/spec/unit/aggregator/summarizer_spec.rb +4 -4
- data/spec/unit/controller/controller_spec.rb +2 -2
- data/spec/unit/controller/log_processor_spec.rb +1 -1
- data/spec/unit/database/base_class_spec.rb +19 -19
- data/spec/unit/database/connection_spec.rb +3 -3
- data/spec/unit/database/database_spec.rb +25 -25
- data/spec/unit/file_format/amazon_s3_format_spec.rb +5 -5
- data/spec/unit/file_format/apache_format_spec.rb +13 -13
- data/spec/unit/file_format/file_format_api_spec.rb +13 -13
- data/spec/unit/file_format/line_definition_spec.rb +24 -17
- data/spec/unit/file_format/merb_format_spec.rb +41 -45
- data/spec/unit/file_format/rails_format_spec.rb +157 -117
- data/spec/unit/filter/anonymize_filter_spec.rb +2 -2
- data/spec/unit/filter/field_filter_spec.rb +13 -13
- data/spec/unit/filter/filter_spec.rb +1 -1
- data/spec/unit/filter/timespan_filter_spec.rb +15 -15
- data/spec/unit/mailer_spec.rb +30 -0
- data/spec/unit/{source/request_spec.rb → request_spec.rb} +30 -30
- data/spec/unit/source/log_parser_spec.rb +27 -27
- data/spec/unit/tracker/duration_tracker_spec.rb +115 -78
- data/spec/unit/tracker/frequency_tracker_spec.rb +74 -63
- data/spec/unit/tracker/hourly_spread_spec.rb +28 -20
- data/spec/unit/tracker/timespan_tracker_spec.rb +25 -13
- data/spec/unit/tracker/tracker_api_spec.rb +13 -13
- data/spec/unit/tracker/traffic_tracker_spec.rb +81 -79
- data/tasks/github-gem.rake +125 -75
- data/tasks/request_log_analyzer.rake +2 -2
- metadata +8 -6
@@ -5,13 +5,13 @@ class RequestLogAnalyzer::Database
|
|
5
5
|
|
6
6
|
def self.const_missing(const) # :nodoc:
|
7
7
|
RequestLogAnalyzer::load_default_class_file(self, const)
|
8
|
-
end
|
8
|
+
end
|
9
9
|
|
10
10
|
include RequestLogAnalyzer::Database::Connection
|
11
11
|
|
12
12
|
attr_accessor :file_format
|
13
13
|
attr_reader :line_classes
|
14
|
-
|
14
|
+
|
15
15
|
def initialize(connection_identifier = nil)
|
16
16
|
@line_classes = []
|
17
17
|
RequestLogAnalyzer::Database::Base.database = self
|
@@ -23,11 +23,11 @@ class RequestLogAnalyzer::Database
|
|
23
23
|
line_type = line_type.name if line_type.respond_to?(:name)
|
24
24
|
Object.const_get("#{line_type}_line".camelize)
|
25
25
|
end
|
26
|
-
|
26
|
+
|
27
27
|
def default_classes
|
28
28
|
[RequestLogAnalyzer::Database::Request, RequestLogAnalyzer::Database::Source, RequestLogAnalyzer::Database::Warning]
|
29
29
|
end
|
30
|
-
|
30
|
+
|
31
31
|
# Loads the ORM classes by inspecting the tables in the current database
|
32
32
|
def load_database_schema!
|
33
33
|
connection.tables.map do |table|
|
@@ -39,16 +39,16 @@ class RequestLogAnalyzer::Database
|
|
39
39
|
end
|
40
40
|
end
|
41
41
|
end
|
42
|
-
|
42
|
+
|
43
43
|
# Returns an array of all the ActiveRecord-bases ORM classes for this database
|
44
44
|
def orm_classes
|
45
45
|
default_classes + line_classes
|
46
46
|
end
|
47
|
-
|
47
|
+
|
48
48
|
# Loads an ActiveRecord-based class that correspond to the given parameter, which can either be
|
49
49
|
# a table name or a LineDefinition instance.
|
50
50
|
def load_activerecord_class(linedefinition_or_table)
|
51
|
-
|
51
|
+
|
52
52
|
case linedefinition_or_table
|
53
53
|
when String, Symbol
|
54
54
|
klass_name = linedefinition_or_table.to_s.singularize.camelize
|
@@ -57,12 +57,12 @@ class RequestLogAnalyzer::Database
|
|
57
57
|
klass_name = "#{linedefinition_or_table.name}_line".camelize
|
58
58
|
klass = RequestLogAnalyzer::Database::Base.subclass_from_line_definition(linedefinition_or_table)
|
59
59
|
end
|
60
|
-
|
60
|
+
|
61
61
|
Object.const_set(klass_name, klass)
|
62
62
|
klass = Object.const_get(klass_name)
|
63
63
|
@line_classes << klass
|
64
64
|
return klass
|
65
|
-
end
|
65
|
+
end
|
66
66
|
|
67
67
|
def fileformat_classes
|
68
68
|
raise "No file_format provided!" unless file_format
|
@@ -70,25 +70,25 @@ class RequestLogAnalyzer::Database
|
|
70
70
|
return default_classes + line_classes
|
71
71
|
end
|
72
72
|
|
73
|
-
# Creates the database schema and related ActiveRecord::Base subclasses that correspond to the
|
73
|
+
# Creates the database schema and related ActiveRecord::Base subclasses that correspond to the
|
74
74
|
# file format definition. These ORM classes will later be used to create records in the database.
|
75
75
|
def create_database_schema!
|
76
76
|
fileformat_classes.each { |klass| klass.create_table! }
|
77
77
|
end
|
78
|
-
|
78
|
+
|
79
79
|
# Drops the table of all the ORM classes, and unregisters the classes
|
80
80
|
def drop_database_schema!
|
81
81
|
file_format ? fileformat_classes.map(&:drop_table!) : orm_classes.map(&:drop_table!)
|
82
82
|
remove_orm_classes!
|
83
83
|
end
|
84
|
-
|
84
|
+
|
85
85
|
# Registers the default ORM classes in the default namespace
|
86
86
|
def register_default_orm_classes!
|
87
87
|
Object.const_set('Request', RequestLogAnalyzer::Database::Request)
|
88
88
|
Object.const_set('Source', RequestLogAnalyzer::Database::Source)
|
89
89
|
Object.const_set('Warning', RequestLogAnalyzer::Database::Warning)
|
90
90
|
end
|
91
|
-
|
91
|
+
|
92
92
|
# Unregisters every ORM class constant
|
93
93
|
def remove_orm_classes!
|
94
94
|
orm_classes.each do |klass|
|
@@ -1,5 +1,5 @@
|
|
1
1
|
class RequestLogAnalyzer::Database::Base < ActiveRecord::Base
|
2
|
-
|
2
|
+
|
3
3
|
self.abstract_class = true
|
4
4
|
|
5
5
|
def <=>(other)
|
@@ -13,20 +13,20 @@ class RequestLogAnalyzer::Database::Base < ActiveRecord::Base
|
|
13
13
|
def line_type
|
14
14
|
self.class.name.underscore.gsub(/_line$/, '').to_sym
|
15
15
|
end
|
16
|
-
|
16
|
+
|
17
17
|
class_inheritable_accessor :line_definition
|
18
18
|
cattr_accessor :database
|
19
19
|
|
20
20
|
def self.subclass_from_line_definition(definition)
|
21
21
|
klass = Class.new(RequestLogAnalyzer::Database::Base)
|
22
22
|
klass.set_table_name("#{definition.name}_lines")
|
23
|
-
|
23
|
+
|
24
24
|
klass.line_definition = definition
|
25
|
-
|
25
|
+
|
26
26
|
# Set relations with requests and sources table
|
27
27
|
klass.belongs_to :request
|
28
28
|
klass.belongs_to :source
|
29
|
-
|
29
|
+
|
30
30
|
# Serialize complex fields into the database
|
31
31
|
definition.captures.select { |c| c.has_key?(:provides) }.each do |capture|
|
32
32
|
klass.send(:serialize, capture[:name], Hash)
|
@@ -48,32 +48,32 @@ class RequestLogAnalyzer::Database::Base < ActiveRecord::Base
|
|
48
48
|
klass.belongs_to :request
|
49
49
|
RequestLogAnalyzer::Database::Request.has_many table.to_sym
|
50
50
|
end
|
51
|
-
|
51
|
+
|
52
52
|
if klass.column_names.include?('source_id')
|
53
53
|
klass.belongs_to :source
|
54
54
|
RequestLogAnalyzer::Database::Source.has_many table.to_sym
|
55
55
|
end
|
56
|
-
|
56
|
+
|
57
57
|
return klass
|
58
58
|
end
|
59
|
-
|
59
|
+
|
60
60
|
def self.drop_table!
|
61
61
|
database.connection.remove_index(self.table_name, [:source_id]) rescue nil
|
62
62
|
database.connection.remove_index(self.table_name, [:request_id]) rescue nil
|
63
63
|
database.connection.drop_table(self.table_name) if database.connection.table_exists?(self.table_name)
|
64
64
|
end
|
65
|
-
|
65
|
+
|
66
66
|
def self.create_table!
|
67
67
|
raise "No line_definition available to base table schema on!" unless self.line_definition
|
68
|
-
|
68
|
+
|
69
69
|
unless table_exists?
|
70
70
|
database.connection.create_table(table_name.to_sym) do |t|
|
71
|
-
|
71
|
+
|
72
72
|
# Default fields
|
73
73
|
t.column :request_id, :integer
|
74
74
|
t.column :source_id, :integer
|
75
75
|
t.column :lineno, :integer
|
76
|
-
|
76
|
+
|
77
77
|
line_definition.captures.each do |capture|
|
78
78
|
# Add a field for every capture
|
79
79
|
t.column(capture[:name], column_type(capture[:type]))
|
@@ -83,13 +83,13 @@ class RequestLogAnalyzer::Database::Base < ActiveRecord::Base
|
|
83
83
|
end
|
84
84
|
end
|
85
85
|
end
|
86
|
-
|
86
|
+
|
87
87
|
# Add indices to table for more speedy querying
|
88
88
|
database.connection.add_index(self.table_name.to_sym, [:request_id]) # rescue
|
89
89
|
database.connection.add_index(self.table_name.to_sym, [:source_id]) # rescue
|
90
90
|
end
|
91
|
-
|
92
|
-
|
91
|
+
|
92
|
+
|
93
93
|
# Function to determine the column type for a field
|
94
94
|
# TODO: make more robust / include in file-format definition
|
95
95
|
def self.column_type(type_indicator)
|
@@ -110,6 +110,6 @@ class RequestLogAnalyzer::Database::Base < ActiveRecord::Base
|
|
110
110
|
when :date; :date
|
111
111
|
else :string
|
112
112
|
end
|
113
|
-
end
|
114
|
-
|
113
|
+
end
|
114
|
+
|
115
115
|
end
|
@@ -26,13 +26,13 @@ module RequestLogAnalyzer::Database::Connection
|
|
26
26
|
raise "Cannot connect with this connection_identifier: #{connection_identifier.inspect}"
|
27
27
|
end
|
28
28
|
end
|
29
|
-
|
29
|
+
|
30
30
|
def disconnect
|
31
31
|
RequestLogAnalyzer::Database::Base.remove_connection
|
32
32
|
end
|
33
|
-
|
33
|
+
|
34
34
|
def connection
|
35
35
|
RequestLogAnalyzer::Database::Base.connection
|
36
36
|
end
|
37
|
-
|
37
|
+
|
38
38
|
end
|
@@ -1,5 +1,5 @@
|
|
1
1
|
class RequestLogAnalyzer::Database::Request < RequestLogAnalyzer::Database::Base
|
2
|
-
|
2
|
+
|
3
3
|
# Returns an array of all the Line objects of this request in the correct order.
|
4
4
|
def lines
|
5
5
|
@lines ||= begin
|
@@ -8,7 +8,7 @@ class RequestLogAnalyzer::Database::Request < RequestLogAnalyzer::Database::Base
|
|
8
8
|
lines.sort
|
9
9
|
end
|
10
10
|
end
|
11
|
-
|
11
|
+
|
12
12
|
# Creates the table to store requests in.
|
13
13
|
def self.create_table!
|
14
14
|
unless database.connection.table_exists?(:requests)
|
@@ -1,9 +1,9 @@
|
|
1
1
|
module RequestLogAnalyzer::FileFormat
|
2
|
-
|
2
|
+
|
3
3
|
def self.const_missing(const)
|
4
4
|
RequestLogAnalyzer::load_default_class_file(self, const)
|
5
|
-
end
|
6
|
-
|
5
|
+
end
|
6
|
+
|
7
7
|
# Loads a FileFormat::Base subclass instance.
|
8
8
|
# You can provide:
|
9
9
|
# * A FileFormat instance (which will return itself)
|
@@ -34,46 +34,46 @@ module RequestLogAnalyzer::FileFormat
|
|
34
34
|
|
35
35
|
else
|
36
36
|
# load a provided file format
|
37
|
-
klass = RequestLogAnalyzer::FileFormat.const_get(RequestLogAnalyzer::to_camelcase(file_format))
|
37
|
+
klass = RequestLogAnalyzer::FileFormat.const_get(RequestLogAnalyzer::to_camelcase(file_format))
|
38
38
|
end
|
39
|
-
|
39
|
+
|
40
40
|
# check the returned klass to see if it can be used
|
41
41
|
raise "Could not load a file format from #{file_format.inspect}" if klass.nil?
|
42
42
|
raise "Invalid FileFormat class" unless klass.kind_of?(Class) && klass.ancestors.include?(RequestLogAnalyzer::FileFormat::Base)
|
43
|
-
|
43
|
+
|
44
44
|
@current_file_format = klass.create(*args) # return an instance of the class
|
45
45
|
end
|
46
46
|
|
47
|
-
# Base class for all log file format definitions. This class provides functions for subclasses to
|
47
|
+
# Base class for all log file format definitions. This class provides functions for subclasses to
|
48
48
|
# define their LineDefinitions and to define a summary report.
|
49
49
|
#
|
50
50
|
# A subclass of this class is instantiated when request-log-analyzer is started and this instance
|
51
51
|
# is shared with all components of the application so they can act on the specifics of the format
|
52
52
|
class Base
|
53
|
-
|
53
|
+
|
54
54
|
attr_reader :line_definitions, :report_trackers
|
55
|
-
|
55
|
+
|
56
56
|
####################################################################################
|
57
57
|
# CLASS METHODS for format definition
|
58
58
|
####################################################################################
|
59
|
-
|
59
|
+
|
60
60
|
# Registers the line definer instance for a subclass.
|
61
61
|
def self.inherited(subclass)
|
62
62
|
if subclass.superclass == RequestLogAnalyzer::FileFormat::Base
|
63
63
|
|
64
64
|
# Create aline and report definer for this class
|
65
|
-
subclass.class_eval do
|
65
|
+
subclass.class_eval do
|
66
66
|
instance_variable_set(:@line_definer, RequestLogAnalyzer::LineDefinition::Definer.new)
|
67
67
|
instance_variable_set(:@report_definer, RequestLogAnalyzer::Aggregator::Summarizer::Definer.new)
|
68
68
|
class << self; attr_accessor :line_definer, :report_definer; end
|
69
|
-
end
|
69
|
+
end
|
70
70
|
|
71
71
|
# Create a custom Request class for this file format
|
72
72
|
subclass.const_set('Request', Class.new(RequestLogAnalyzer::Request)) unless subclass.const_defined?('Request')
|
73
73
|
else
|
74
74
|
|
75
75
|
# Copy the line and report definer from the parent class.
|
76
|
-
subclass.class_eval do
|
76
|
+
subclass.class_eval do
|
77
77
|
instance_variable_set(:@line_definer, superclass.line_definer.clone)
|
78
78
|
instance_variable_set(:@report_definer, superclass.report_definer.clone)
|
79
79
|
class << self; attr_accessor :line_definer, :report_definer; end
|
@@ -113,7 +113,7 @@ module RequestLogAnalyzer::FileFormat
|
|
113
113
|
return self.new(line_definer.line_definitions, report_definer.trackers)
|
114
114
|
end
|
115
115
|
|
116
|
-
def initialize(line_definitions =
|
116
|
+
def initialize(line_definitions = {}, report_trackers = [])
|
117
117
|
@line_definitions, @report_trackers = line_definitions, report_trackers
|
118
118
|
end
|
119
119
|
|
@@ -153,7 +153,7 @@ module RequestLogAnalyzer::FileFormat
|
|
153
153
|
match = definition.matches(line, &warning_handler)
|
154
154
|
return match if match
|
155
155
|
end
|
156
|
-
|
156
|
+
|
157
157
|
return nil
|
158
158
|
end
|
159
159
|
end
|
@@ -1,20 +1,20 @@
|
|
1
1
|
module RequestLogAnalyzer::FileFormat
|
2
2
|
|
3
|
-
# FileFormat for Amazon S3 access logs.
|
3
|
+
# FileFormat for Amazon S3 access logs.
|
4
4
|
#
|
5
5
|
# Access logs are disabled by default on Amazon S3. To enable logging, see
|
6
6
|
# http://docs.amazonwebservices.com/AmazonS3/latest/index.html?ServerLogs.html
|
7
7
|
class AmazonS3 < Base
|
8
|
-
|
8
|
+
|
9
9
|
line_definition :access do |line|
|
10
10
|
line.header = true
|
11
11
|
line.footer = true
|
12
12
|
line.regexp = /^([^\ ]+) ([^\ ]+) \[(\d{2}\/[A-Za-z]{3}\/\d{4}.\d{2}:\d{2}:\d{2})(?: .\d{4})?\] (\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}) ([^\ ]+) ([^\ ]+) (\w+(?:\.\w+)*) ([^\ ]+) "([^"]+)" (\d+) ([^\ ]+) (\d+) (\d+) (\d+) (\d+) "([^"]+)" "([^"]+)"/
|
13
|
-
line.captures << { :name => :bucket_owner, :type => :string } <<
|
14
|
-
{ :name => :bucket, :type => :string } <<
|
13
|
+
line.captures << { :name => :bucket_owner, :type => :string } <<
|
14
|
+
{ :name => :bucket, :type => :string } <<
|
15
15
|
{ :name => :timestamp, :type => :timestamp } <<
|
16
16
|
{ :name => :remote_ip, :type => :string } <<
|
17
|
-
{ :name => :requester, :type => :string } <<
|
17
|
+
{ :name => :requester, :type => :string } <<
|
18
18
|
{ :name => :request_id, :type => :string } <<
|
19
19
|
{ :name => :operation, :type => :string } <<
|
20
20
|
{ :name => :key, :type => :nillable_string } <<
|
@@ -28,44 +28,44 @@ module RequestLogAnalyzer::FileFormat
|
|
28
28
|
{ :name => :referer, :type => :referer } <<
|
29
29
|
{ :name => :user_agent, :type => :user_agent }
|
30
30
|
end
|
31
|
-
|
31
|
+
|
32
32
|
report do |analyze|
|
33
33
|
analyze.timespan
|
34
34
|
analyze.hourly_spread
|
35
35
|
|
36
|
-
analyze.frequency :category => lambda { |r| "#{r[:bucket]}/#{r[:key]}"}, :
|
37
|
-
analyze.duration :duration => :total_time, :category => lambda { |r| "#{r[:bucket]}/#{r[:key]}"}, :
|
38
|
-
analyze.traffic :traffic => :bytes_sent, :category => lambda { |r| "#{r[:bucket]}/#{r[:key]}"}, :
|
36
|
+
analyze.frequency :category => lambda { |r| "#{r[:bucket]}/#{r[:key]}"}, :title => "Most popular items"
|
37
|
+
analyze.duration :duration => :total_time, :category => lambda { |r| "#{r[:bucket]}/#{r[:key]}"}, :title => "Request duration"
|
38
|
+
analyze.traffic :traffic => :bytes_sent, :category => lambda { |r| "#{r[:bucket]}/#{r[:key]}"}, :title => "Traffic"
|
39
39
|
analyze.frequency :category => :http_status, :title => 'HTTP status codes'
|
40
40
|
analyze.frequency :category => :error_code, :title => 'Error codes'
|
41
41
|
end
|
42
|
-
|
42
|
+
|
43
43
|
class Request < RequestLogAnalyzer::Request
|
44
|
-
|
44
|
+
|
45
45
|
MONTHS = {'Jan' => '01', 'Feb' => '02', 'Mar' => '03', 'Apr' => '04', 'May' => '05', 'Jun' => '06',
|
46
46
|
'Jul' => '07', 'Aug' => '08', 'Sep' => '09', 'Oct' => '10', 'Nov' => '11', 'Dec' => '12' }
|
47
|
-
|
47
|
+
|
48
48
|
# Do not use DateTime.parse, but parse the timestamp ourselves to return a integer
|
49
49
|
# to speed up parsing.
|
50
50
|
def convert_timestamp(value, definition)
|
51
51
|
"#{value[7,4]}#{MONTHS[value[3,3]]}#{value[0,2]}#{value[12,2]}#{value[15,2]}#{value[18,2]}".to_i
|
52
52
|
end
|
53
|
-
|
53
|
+
|
54
54
|
# Make sure that the string '-' is parsed as a nil value.
|
55
55
|
def convert_nillable_string(value, definition)
|
56
56
|
value == '-' ? nil : value
|
57
57
|
end
|
58
|
-
|
58
|
+
|
59
59
|
# Can be implemented in subclasses for improved categorizations
|
60
60
|
def convert_referer(value, definition)
|
61
61
|
value == '-' ? nil : value
|
62
62
|
end
|
63
|
-
|
63
|
+
|
64
64
|
# Can be implemented in subclasses for improved categorizations
|
65
65
|
def convert_user_agent(value, definition)
|
66
66
|
value == '-' ? nil : value
|
67
67
|
end
|
68
68
|
end
|
69
|
-
|
69
|
+
|
70
70
|
end
|
71
71
|
end
|
@@ -1,6 +1,6 @@
|
|
1
1
|
module RequestLogAnalyzer::FileFormat
|
2
|
-
|
3
|
-
# The Apache file format is able to log Apache access.log files.
|
2
|
+
|
3
|
+
# The Apache file format is able to log Apache access.log files.
|
4
4
|
#
|
5
5
|
# The access.log can be configured in Apache to have many different formats. In theory, this
|
6
6
|
# FileFormat can handle any format, but it must be aware of the log formatting that is used
|
@@ -22,7 +22,7 @@ module RequestLogAnalyzer::FileFormat
|
|
22
22
|
:referer => '%{Referer}i -> %U',
|
23
23
|
:agent => '%{User-agent}i'
|
24
24
|
}
|
25
|
-
|
25
|
+
|
26
26
|
# A hash that defines how the log format directives should be parsed.
|
27
27
|
LOG_DIRECTIVES = {
|
28
28
|
'%' => { :regexp => '%', :captures => [] },
|
@@ -48,8 +48,9 @@ module RequestLogAnalyzer::FileFormat
|
|
48
48
|
# It will set up the line definition and the report trackers according to the Apache access log format,
|
49
49
|
# which should be passed as first argument. By default, is uses the 'combined' log format.
|
50
50
|
def self.create(*args)
|
51
|
-
access_line =
|
52
|
-
|
51
|
+
access_line = access_line_definition(args.first)
|
52
|
+
trackers = report_trackers(access_line) + report_definer.trackers
|
53
|
+
self.new(line_definer.line_definitions.merge(:access => access_line), trackers)
|
53
54
|
end
|
54
55
|
|
55
56
|
# Creates the access log line definition based on the Apache log format string
|
@@ -62,7 +63,7 @@ module RequestLogAnalyzer::FileFormat
|
|
62
63
|
format_string.scan(/([^%]*)(?:%(?:\{([^\}]+)\})?>?([A-Za-z%]))?/) do |literal, arg, variable|
|
63
64
|
|
64
65
|
line_regexp << Regexp.quote(literal) # Make sure to parse the literal before the directive
|
65
|
-
|
66
|
+
|
66
67
|
if variable
|
67
68
|
# Check if we recognize the log directive
|
68
69
|
directive = LOG_DIRECTIVES[variable]
|
@@ -77,7 +78,7 @@ module RequestLogAnalyzer::FileFormat
|
|
77
78
|
end
|
78
79
|
end
|
79
80
|
end
|
80
|
-
|
81
|
+
|
81
82
|
# Return a new line definition object
|
82
83
|
return RequestLogAnalyzer::LineDefinition.new(:access, :regexp => Regexp.new(line_regexp),
|
83
84
|
:captures => captures, :header => true, :footer => true)
|
@@ -90,12 +91,12 @@ module RequestLogAnalyzer::FileFormat
|
|
90
91
|
analyze.timespan if line_definition.captures?(:timestamp)
|
91
92
|
analyze.hourly_spread if line_definition.captures?(:timestamp)
|
92
93
|
|
93
|
-
analyze.frequency :category => :http_method, :
|
94
|
-
analyze.frequency :category => :http_status, :
|
95
|
-
analyze.frequency :category => lambda { |r| r.category }, :
|
94
|
+
analyze.frequency :category => :http_method, :title => "HTTP methods" if line_definition.captures?(:http_method)
|
95
|
+
analyze.frequency :category => :http_status, :title => "HTTP statuses" if line_definition.captures?(:http_status)
|
96
|
+
analyze.frequency :category => lambda { |r| r.category }, :title => "Most popular URIs" if line_definition.captures?(:path)
|
96
97
|
|
97
|
-
analyze.frequency :category => :user_agent, :
|
98
|
-
analyze.frequency :category => :referer, :
|
98
|
+
analyze.frequency :category => :user_agent, :title => "User agents" if line_definition.captures?(:user_agent)
|
99
|
+
analyze.frequency :category => :referer, :title => "Referers" if line_definition.captures?(:referer)
|
99
100
|
|
100
101
|
analyze.duration :duration => :duration, :category => lambda { |r| r.category }, :title => 'Request duration' if line_definition.captures?(:duration)
|
101
102
|
analyze.traffic :traffic => :bytes_sent, :category => lambda { |r| r.category }, :title => 'Traffic' if line_definition.captures?(:bytes_sent)
|
@@ -105,32 +106,32 @@ module RequestLogAnalyzer::FileFormat
|
|
105
106
|
|
106
107
|
# Define a custom Request class for the Apache file format to speed up timestamp handling.
|
107
108
|
class Request < RequestLogAnalyzer::Request
|
108
|
-
|
109
|
+
|
109
110
|
def category
|
110
111
|
first(:path)
|
111
112
|
end
|
112
|
-
|
113
|
+
|
113
114
|
MONTHS = {'Jan' => '01', 'Feb' => '02', 'Mar' => '03', 'Apr' => '04', 'May' => '05', 'Jun' => '06',
|
114
115
|
'Jul' => '07', 'Aug' => '08', 'Sep' => '09', 'Oct' => '10', 'Nov' => '11', 'Dec' => '12' }
|
115
|
-
|
116
|
+
|
116
117
|
# Do not use DateTime.parse, but parse the timestamp ourselves to return a integer
|
117
118
|
# to speed up parsing.
|
118
119
|
def convert_timestamp(value, definition)
|
119
120
|
"#{value[7,4]}#{MONTHS[value[3,3]]}#{value[0,2]}#{value[12,2]}#{value[15,2]}#{value[18,2]}".to_i
|
120
121
|
end
|
121
|
-
|
122
|
+
|
122
123
|
# This function can be overridden to rewrite the path for better categorization in the
|
123
124
|
# reports.
|
124
125
|
def convert_path(value, definition)
|
125
126
|
value
|
126
127
|
end
|
127
|
-
|
128
|
-
# This function can be overridden to simplify the user agent string for better
|
128
|
+
|
129
|
+
# This function can be overridden to simplify the user agent string for better
|
129
130
|
# categorization in the reports
|
130
131
|
def convert_user_agent(value, definition)
|
131
132
|
value # TODO
|
132
133
|
end
|
133
|
-
|
134
|
+
|
134
135
|
# Make sure that the string '-' is parsed as a nil value.
|
135
136
|
def convert_nillable_string(value, definition)
|
136
137
|
value == '-' ? nil : value
|