ngmoco-request-log-analyzer 1.4.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (112) hide show
  1. data/.gitignore +10 -0
  2. data/DESIGN.rdoc +41 -0
  3. data/LICENSE +20 -0
  4. data/README.rdoc +39 -0
  5. data/Rakefile +8 -0
  6. data/bin/request-log-analyzer +114 -0
  7. data/lib/cli/command_line_arguments.rb +301 -0
  8. data/lib/cli/database_console.rb +26 -0
  9. data/lib/cli/database_console_init.rb +43 -0
  10. data/lib/cli/progressbar.rb +213 -0
  11. data/lib/cli/tools.rb +46 -0
  12. data/lib/request_log_analyzer.rb +44 -0
  13. data/lib/request_log_analyzer/aggregator.rb +49 -0
  14. data/lib/request_log_analyzer/aggregator/database_inserter.rb +83 -0
  15. data/lib/request_log_analyzer/aggregator/echo.rb +29 -0
  16. data/lib/request_log_analyzer/aggregator/summarizer.rb +175 -0
  17. data/lib/request_log_analyzer/controller.rb +332 -0
  18. data/lib/request_log_analyzer/database.rb +102 -0
  19. data/lib/request_log_analyzer/database/base.rb +115 -0
  20. data/lib/request_log_analyzer/database/connection.rb +38 -0
  21. data/lib/request_log_analyzer/database/request.rb +22 -0
  22. data/lib/request_log_analyzer/database/source.rb +13 -0
  23. data/lib/request_log_analyzer/database/warning.rb +14 -0
  24. data/lib/request_log_analyzer/file_format.rb +160 -0
  25. data/lib/request_log_analyzer/file_format/amazon_s3.rb +71 -0
  26. data/lib/request_log_analyzer/file_format/apache.rb +141 -0
  27. data/lib/request_log_analyzer/file_format/merb.rb +67 -0
  28. data/lib/request_log_analyzer/file_format/rack.rb +11 -0
  29. data/lib/request_log_analyzer/file_format/rails.rb +176 -0
  30. data/lib/request_log_analyzer/file_format/rails_development.rb +12 -0
  31. data/lib/request_log_analyzer/filter.rb +30 -0
  32. data/lib/request_log_analyzer/filter/anonymize.rb +39 -0
  33. data/lib/request_log_analyzer/filter/field.rb +42 -0
  34. data/lib/request_log_analyzer/filter/timespan.rb +45 -0
  35. data/lib/request_log_analyzer/line_definition.rb +111 -0
  36. data/lib/request_log_analyzer/log_processor.rb +99 -0
  37. data/lib/request_log_analyzer/mailer.rb +62 -0
  38. data/lib/request_log_analyzer/output.rb +113 -0
  39. data/lib/request_log_analyzer/output/fixed_width.rb +220 -0
  40. data/lib/request_log_analyzer/output/html.rb +184 -0
  41. data/lib/request_log_analyzer/request.rb +175 -0
  42. data/lib/request_log_analyzer/source.rb +72 -0
  43. data/lib/request_log_analyzer/source/database_loader.rb +87 -0
  44. data/lib/request_log_analyzer/source/log_parser.rb +274 -0
  45. data/lib/request_log_analyzer/tracker.rb +206 -0
  46. data/lib/request_log_analyzer/tracker/duration.rb +104 -0
  47. data/lib/request_log_analyzer/tracker/frequency.rb +95 -0
  48. data/lib/request_log_analyzer/tracker/hourly_spread.rb +107 -0
  49. data/lib/request_log_analyzer/tracker/timespan.rb +81 -0
  50. data/lib/request_log_analyzer/tracker/traffic.rb +106 -0
  51. data/request-log-analyzer.gemspec +40 -0
  52. data/spec/database.yml +23 -0
  53. data/spec/fixtures/apache_combined.log +5 -0
  54. data/spec/fixtures/apache_common.log +10 -0
  55. data/spec/fixtures/decompression.log +12 -0
  56. data/spec/fixtures/decompression.log.bz2 +0 -0
  57. data/spec/fixtures/decompression.log.gz +0 -0
  58. data/spec/fixtures/decompression.log.zip +0 -0
  59. data/spec/fixtures/decompression.tar.gz +0 -0
  60. data/spec/fixtures/decompression.tgz +0 -0
  61. data/spec/fixtures/header_and_footer.log +6 -0
  62. data/spec/fixtures/merb.log +84 -0
  63. data/spec/fixtures/merb_prefixed.log +9 -0
  64. data/spec/fixtures/multiple_files_1.log +5 -0
  65. data/spec/fixtures/multiple_files_2.log +2 -0
  66. data/spec/fixtures/rails.db +0 -0
  67. data/spec/fixtures/rails_1x.log +59 -0
  68. data/spec/fixtures/rails_22.log +12 -0
  69. data/spec/fixtures/rails_22_cached.log +10 -0
  70. data/spec/fixtures/rails_unordered.log +24 -0
  71. data/spec/fixtures/syslog_1x.log +5 -0
  72. data/spec/fixtures/test_file_format.log +13 -0
  73. data/spec/fixtures/test_language_combined.log +14 -0
  74. data/spec/fixtures/test_order.log +16 -0
  75. data/spec/integration/command_line_usage_spec.rb +84 -0
  76. data/spec/integration/munin_plugins_rails_spec.rb +58 -0
  77. data/spec/integration/scout_spec.rb +151 -0
  78. data/spec/lib/helpers.rb +52 -0
  79. data/spec/lib/macros.rb +18 -0
  80. data/spec/lib/matchers.rb +77 -0
  81. data/spec/lib/mocks.rb +76 -0
  82. data/spec/lib/testing_format.rb +46 -0
  83. data/spec/spec_helper.rb +24 -0
  84. data/spec/unit/aggregator/database_inserter_spec.rb +93 -0
  85. data/spec/unit/aggregator/summarizer_spec.rb +26 -0
  86. data/spec/unit/controller/controller_spec.rb +41 -0
  87. data/spec/unit/controller/log_processor_spec.rb +18 -0
  88. data/spec/unit/database/base_class_spec.rb +183 -0
  89. data/spec/unit/database/connection_spec.rb +34 -0
  90. data/spec/unit/database/database_spec.rb +133 -0
  91. data/spec/unit/file_format/amazon_s3_format_spec.rb +49 -0
  92. data/spec/unit/file_format/apache_format_spec.rb +203 -0
  93. data/spec/unit/file_format/file_format_api_spec.rb +69 -0
  94. data/spec/unit/file_format/line_definition_spec.rb +75 -0
  95. data/spec/unit/file_format/merb_format_spec.rb +52 -0
  96. data/spec/unit/file_format/rails_format_spec.rb +164 -0
  97. data/spec/unit/filter/anonymize_filter_spec.rb +21 -0
  98. data/spec/unit/filter/field_filter_spec.rb +66 -0
  99. data/spec/unit/filter/filter_spec.rb +17 -0
  100. data/spec/unit/filter/timespan_filter_spec.rb +58 -0
  101. data/spec/unit/mailer_spec.rb +30 -0
  102. data/spec/unit/request_spec.rb +111 -0
  103. data/spec/unit/source/log_parser_spec.rb +119 -0
  104. data/spec/unit/tracker/duration_tracker_spec.rb +130 -0
  105. data/spec/unit/tracker/frequency_tracker_spec.rb +88 -0
  106. data/spec/unit/tracker/hourly_spread_spec.rb +79 -0
  107. data/spec/unit/tracker/timespan_tracker_spec.rb +73 -0
  108. data/spec/unit/tracker/tracker_api_spec.rb +124 -0
  109. data/spec/unit/tracker/traffic_tracker_spec.rb +107 -0
  110. data/tasks/github-gem.rake +323 -0
  111. data/tasks/request_log_analyzer.rake +26 -0
  112. metadata +220 -0
@@ -0,0 +1,71 @@
1
+ module RequestLogAnalyzer::FileFormat
2
+
3
+ # FileFormat for Amazon S3 access logs.
4
+ #
5
+ # Access logs are disabled by default on Amazon S3. To enable logging, see
6
+ # http://docs.amazonwebservices.com/AmazonS3/latest/index.html?ServerLogs.html
7
+ class AmazonS3 < Base
8
+
9
+ line_definition :access do |line|
10
+ line.header = true
11
+ line.footer = true
12
+ line.regexp = /^([^\ ]+) ([^\ ]+) \[(\d{2}\/[A-Za-z]{3}\/\d{4}.\d{2}:\d{2}:\d{2})(?: .\d{4})?\] (\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}) ([^\ ]+) ([^\ ]+) (\w+(?:\.\w+)*) ([^\ ]+) "([^"]+)" (\d+) ([^\ ]+) (\d+) (\d+) (\d+) (\d+) "([^"]+)" "([^"]+)"/
13
+ line.captures << { :name => :bucket_owner, :type => :string } <<
14
+ { :name => :bucket, :type => :string } <<
15
+ { :name => :timestamp, :type => :timestamp } <<
16
+ { :name => :remote_ip, :type => :string } <<
17
+ { :name => :requester, :type => :string } <<
18
+ { :name => :request_id, :type => :string } <<
19
+ { :name => :operation, :type => :string } <<
20
+ { :name => :key, :type => :nillable_string } <<
21
+ { :name => :request_uri, :type => :string } <<
22
+ { :name => :http_status, :type => :integer } <<
23
+ { :name => :error_code, :type => :nillable_string } <<
24
+ { :name => :bytes_sent, :type => :traffic, :unit => :byte } <<
25
+ { :name => :object_size, :type => :traffic, :unit => :byte } <<
26
+ { :name => :total_time, :type => :duration, :unit => :msec } <<
27
+ { :name => :turnaround_time, :type => :duration, :unit => :msec } <<
28
+ { :name => :referer, :type => :referer } <<
29
+ { :name => :user_agent, :type => :user_agent }
30
+ end
31
+
32
+ report do |analyze|
33
+ analyze.timespan
34
+ analyze.hourly_spread
35
+
36
+ analyze.frequency :category => lambda { |r| "#{r[:bucket]}/#{r[:key]}"}, :title => "Most popular items"
37
+ analyze.duration :duration => :total_time, :category => lambda { |r| "#{r[:bucket]}/#{r[:key]}"}, :title => "Request duration"
38
+ analyze.traffic :traffic => :bytes_sent, :category => lambda { |r| "#{r[:bucket]}/#{r[:key]}"}, :title => "Traffic"
39
+ analyze.frequency :category => :http_status, :title => 'HTTP status codes'
40
+ analyze.frequency :category => :error_code, :title => 'Error codes'
41
+ end
42
+
43
+ class Request < RequestLogAnalyzer::Request
44
+
45
+ MONTHS = {'Jan' => '01', 'Feb' => '02', 'Mar' => '03', 'Apr' => '04', 'May' => '05', 'Jun' => '06',
46
+ 'Jul' => '07', 'Aug' => '08', 'Sep' => '09', 'Oct' => '10', 'Nov' => '11', 'Dec' => '12' }
47
+
48
+ # Do not use DateTime.parse, but parse the timestamp ourselves to return a integer
49
+ # to speed up parsing.
50
+ def convert_timestamp(value, definition)
51
+ "#{value[7,4]}#{MONTHS[value[3,3]]}#{value[0,2]}#{value[12,2]}#{value[15,2]}#{value[18,2]}".to_i
52
+ end
53
+
54
+ # Make sure that the string '-' is parsed as a nil value.
55
+ def convert_nillable_string(value, definition)
56
+ value == '-' ? nil : value
57
+ end
58
+
59
+ # Can be implemented in subclasses for improved categorizations
60
+ def convert_referer(value, definition)
61
+ value == '-' ? nil : value
62
+ end
63
+
64
+ # Can be implemented in subclasses for improved categorizations
65
+ def convert_user_agent(value, definition)
66
+ value == '-' ? nil : value
67
+ end
68
+ end
69
+
70
+ end
71
+ end
@@ -0,0 +1,141 @@
1
+ module RequestLogAnalyzer::FileFormat
2
+
3
+ # The Apache file format is able to log Apache access.log files.
4
+ #
5
+ # The access.log can be configured in Apache to have many different formats. In theory, this
6
+ # FileFormat can handle any format, but it must be aware of the log formatting that is used
7
+ # by sending the formatting string as parameter to the create method, e.g.:
8
+ #
9
+ # RequestLogAnalyzer::FileFormat::Apache.create('%h %l %u %t "%r" %>s %b')
10
+ #
11
+ # It also supports the predefined Apache log formats "common" and "combined". The line
12
+ # definition and the report definition will be constructed using this file format string.
13
+ # From the command line, you can provide the format string using the <tt>--apache-format</tt>
14
+ # command line option.
15
+ class Apache < Base
16
+
17
+ # A hash of predefined Apache log formats
18
+ LOG_FORMAT_DEFAULTS = {
19
+ :common => '%h %l %u %t "%r" %>s %b',
20
+ :combined => '%h %l %u %t "%r" %>s %b "%{Referer}i" "%{User-agent}i"',
21
+ :rack => '%h %l %u %t "%r" %>s %b %T',
22
+ :referer => '%{Referer}i -> %U',
23
+ :agent => '%{User-agent}i'
24
+ }
25
+
26
+ # A hash that defines how the log format directives should be parsed.
27
+ LOG_DIRECTIVES = {
28
+ '%' => { :regexp => '%', :captures => [] },
29
+ 'h' => { :regexp => '([A-Za-z0-9-]+(?:\.[A-Za-z0-9-]+)+)', :captures => [{:name => :remote_host, :type => :string}] },
30
+ 'a' => { :regexp => '(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})', :captures => [{:name => :remote_ip, :type => :string}] },
31
+ 'b' => { :regexp => '(\d+|-)', :captures => [{:name => :bytes_sent, :type => :traffic}] },
32
+ 'c' => { :regexp => '(\+|\-|\X)', :captures => [{:name => :connection_status, :type => :integer}] },
33
+ 'D' => { :regexp => '(\d+|-)', :captures => [{:name => :duration, :type => :duration, :unit => :musec}] },
34
+ 'l' => { :regexp => '([\w-]+)', :captures => [{:name => :remote_logname, :type => :nillable_string}] },
35
+ 'T' => { :regexp => '((?:\d+(?:\.\d+))|-)', :captures => [{:name => :duration, :type => :duration, :unit => :sec}] },
36
+ 't' => { :regexp => '\[(\d{2}\/[A-Za-z]{3}\/\d{4}.\d{2}:\d{2}:\d{2})(?: .\d{4})?\]', :captures => [{:name => :timestamp, :type => :timestamp}] },
37
+ 's' => { :regexp => '(\d{3})', :captures => [{:name => :http_status, :type => :integer}] },
38
+ 'u' => { :regexp => '(\w+|-)', :captures => [{:name => :user, :type => :nillable_string}] },
39
+ 'U' => { :regexp => '(\/\S*)', :captures => [{:name => :path, :type => :string}] },
40
+ 'r' => { :regexp => '([A-Z]+) (\S+) HTTP\/(\d+(?:\.\d+)*)', :captures => [{:name => :http_method, :type => :string},
41
+ {:name => :path, :type => :path}, {:name => :http_version, :type => :string}]},
42
+ 'i' => { 'Referer' => { :regexp => '(\S+)', :captures => [{:name => :referer, :type => :nillable_string}] },
43
+ 'User-agent' => { :regexp => '(.*)', :captures => [{:name => :user_agent, :type => :user_agent}] }
44
+ }
45
+ }
46
+
47
+ # Creates the Apache log format language based on a Apache log format string.
48
+ # It will set up the line definition and the report trackers according to the Apache access log format,
49
+ # which should be passed as first argument. By default, is uses the 'combined' log format.
50
+ def self.create(*args)
51
+ access_line = access_line_definition(args.first)
52
+ trackers = report_trackers(access_line) + report_definer.trackers
53
+ self.new(line_definer.line_definitions.merge(:access => access_line), trackers)
54
+ end
55
+
56
+ # Creates the access log line definition based on the Apache log format string
57
+ def self.access_line_definition(format_string)
58
+ format_string ||= :combined
59
+ format_string = LOG_FORMAT_DEFAULTS[format_string.to_sym] || format_string
60
+
61
+ line_regexp = ''
62
+ captures = []
63
+ format_string.scan(/([^%]*)(?:%(?:\{([^\}]+)\})?>?([A-Za-z%]))?/) do |literal, arg, variable|
64
+
65
+ line_regexp << Regexp.quote(literal) # Make sure to parse the literal before the directive
66
+
67
+ if variable
68
+ # Check if we recognize the log directive
69
+ directive = LOG_DIRECTIVES[variable]
70
+ directive = directive[arg] if directive && arg
71
+
72
+ if directive
73
+ line_regexp << directive[:regexp] # Parse the value of the directive
74
+ captures += directive[:captures] # Add the directive's information to the captures
75
+ else
76
+ puts "%#{directive} log directiven not yet supported, field is ignored."
77
+ line_regexp << '.*' # Just accept any input for this literal
78
+ end
79
+ end
80
+ end
81
+
82
+ # Return a new line definition object
83
+ return RequestLogAnalyzer::LineDefinition.new(:access, :regexp => Regexp.new(line_regexp),
84
+ :captures => captures, :header => true, :footer => true)
85
+ end
86
+
87
+ # Sets up the report trackers according to the fields captured by the access line definition.
88
+ def self.report_trackers(line_definition)
89
+ analyze = RequestLogAnalyzer::Aggregator::Summarizer::Definer.new
90
+
91
+ analyze.timespan if line_definition.captures?(:timestamp)
92
+ analyze.hourly_spread if line_definition.captures?(:timestamp)
93
+
94
+ analyze.frequency :category => :http_method, :title => "HTTP methods" if line_definition.captures?(:http_method)
95
+ analyze.frequency :category => :http_status, :title => "HTTP statuses" if line_definition.captures?(:http_status)
96
+ analyze.frequency :category => lambda { |r| r.category }, :title => "Most popular URIs" if line_definition.captures?(:path)
97
+
98
+ analyze.frequency :category => :user_agent, :title => "User agents" if line_definition.captures?(:user_agent)
99
+ analyze.frequency :category => :referer, :title => "Referers" if line_definition.captures?(:referer)
100
+
101
+ analyze.duration :duration => :duration, :category => lambda { |r| r.category }, :title => 'Request duration' if line_definition.captures?(:duration)
102
+ analyze.traffic :traffic => :bytes_sent, :category => lambda { |r| r.category }, :title => 'Traffic' if line_definition.captures?(:bytes_sent)
103
+
104
+ return analyze.trackers
105
+ end
106
+
107
+ # Define a custom Request class for the Apache file format to speed up timestamp handling.
108
+ class Request < RequestLogAnalyzer::Request
109
+
110
+ def category
111
+ first(:path)
112
+ end
113
+
114
+ MONTHS = {'Jan' => '01', 'Feb' => '02', 'Mar' => '03', 'Apr' => '04', 'May' => '05', 'Jun' => '06',
115
+ 'Jul' => '07', 'Aug' => '08', 'Sep' => '09', 'Oct' => '10', 'Nov' => '11', 'Dec' => '12' }
116
+
117
+ # Do not use DateTime.parse, but parse the timestamp ourselves to return a integer
118
+ # to speed up parsing.
119
+ def convert_timestamp(value, definition)
120
+ "#{value[7,4]}#{MONTHS[value[3,3]]}#{value[0,2]}#{value[12,2]}#{value[15,2]}#{value[18,2]}".to_i
121
+ end
122
+
123
+ # This function can be overridden to rewrite the path for better categorization in the
124
+ # reports.
125
+ def convert_path(value, definition)
126
+ value
127
+ end
128
+
129
+ # This function can be overridden to simplify the user agent string for better
130
+ # categorization in the reports
131
+ def convert_user_agent(value, definition)
132
+ value # TODO
133
+ end
134
+
135
+ # Make sure that the string '-' is parsed as a nil value.
136
+ def convert_nillable_string(value, definition)
137
+ value == '-' ? nil : value
138
+ end
139
+ end
140
+ end
141
+ end
@@ -0,0 +1,67 @@
1
+ module RequestLogAnalyzer::FileFormat
2
+
3
+ # The Merb file format parses the request header with the timestamp, the params line
4
+ # with the most important request information and the durations line which contains
5
+ # the different request durations that can be used for analysis.
6
+ class Merb < Base
7
+
8
+ # ~ Started request handling: Fri Aug 29 11:10:23 +0200 2008
9
+ line_definition :started do |line|
10
+ line.header = true
11
+ line.teaser = /Started request handling\:/
12
+ line.regexp = /Started request handling\:\ (.+)/
13
+ line.captures << { :name => :timestamp, :type => :timestamp }
14
+ end
15
+
16
+ # ~ Params: {"action"=>"create", "controller"=>"session"}
17
+ # ~ Params: {"_method"=>"delete", "authenticity_token"=>"[FILTERED]", "action"=>"d}
18
+ line_definition :params do |line|
19
+ line.teaser = /Params\:\ /
20
+ line.regexp = /Params\:\ (\{.+\})/
21
+ line.captures << { :name => :params, :type => :eval, :provides => {
22
+ :namespace => :string, :controller => :string, :action => :string, :format => :string, :method => :string } }
23
+ end
24
+
25
+ # ~ {:dispatch_time=>0.006117, :after_filters_time=>6.1e-05, :before_filters_time=>0.000712, :action_time=>0.005833}
26
+ line_definition :completed do |line|
27
+ line.footer = true
28
+ # line.teaser = Regexp.new(Regexp.quote('~ {:'))
29
+ line.regexp = /(\{.*\:dispatch_time\s*=>\s*\d+\.\d+.*\})/
30
+ line.captures << { :name => :times_hash, :type => :eval, :provides => {
31
+ :dispatch_time => :duration, :after_filters_time => :duration,
32
+ :before_filters_time => :duration, :action_time => :duration } }
33
+ end
34
+
35
+ REQUEST_CATEGORIZER = Proc.new do |request|
36
+ category = "#{request[:controller]}##{request[:action]}"
37
+ category = "#{request[:namespace]}::#{category}" if request[:namespace]
38
+ category = "#{category}.#{request[:format]}" if request[:format]
39
+ category
40
+ end
41
+
42
+ report do |analyze|
43
+
44
+ analyze.timespan
45
+ analyze.hourly_spread
46
+
47
+ analyze.frequency :category => REQUEST_CATEGORIZER, :title => "Top 20 by hits"
48
+ analyze.duration :dispatch_time, :category => REQUEST_CATEGORIZER, :title => 'Request dispatch duration'
49
+
50
+ # analyze.duration :action_time, :category => REQUEST_CATEGORIZER, :title => 'Request action duration'
51
+ # analyze.duration :after_filters_time, :category => REQUEST_CATEGORIZER, :title => 'Request after_filter duration'
52
+ # analyze.duration :before_filters_time, :category => REQUEST_CATEGORIZER, :title => 'Request before_filter duration'
53
+ end
54
+
55
+ class Request < RequestLogAnalyzer::Request
56
+
57
+ MONTHS = {'Jan' => '01', 'Feb' => '02', 'Mar' => '03', 'Apr' => '04', 'May' => '05', 'Jun' => '06',
58
+ 'Jul' => '07', 'Aug' => '08', 'Sep' => '09', 'Oct' => '10', 'Nov' => '11', 'Dec' => '12' }
59
+
60
+ # Speed up timestamp conversion
61
+ def convert_timestamp(value, definition)
62
+ "#{value[26,4]}#{MONTHS[value[4,3]]}#{value[8,2]}#{value[11,2]}#{value[14,2]}#{value[17,2]}".to_i
63
+ end
64
+ end
65
+ end
66
+
67
+ end
@@ -0,0 +1,11 @@
1
+ module RequestLogAnalyzer::FileFormat
2
+
3
+ class Rack < Apache
4
+
5
+ def self.create(*args)
6
+ super(:rack, *args)
7
+ end
8
+
9
+ end
10
+
11
+ end
@@ -0,0 +1,176 @@
1
+ module RequestLogAnalyzer::FileFormat
2
+
3
+ # Default FileFormat class for Rails logs.
4
+ #
5
+ # Instances will be created dynamically based on the lines you want it to parse. You can
6
+ # specify what lines should be included in the parser by providing a list to the create
7
+ # method as first argument.
8
+ class Rails < Base
9
+
10
+ # Creates a Rails FileFormat instance.
11
+ #
12
+ # The lines that will be parsed can be defined by the argument to this function,
13
+ # which should be an array of line names, or a list of line names as comma separated
14
+ # string. The resulting report depends on the lines that will be parsed. You can
15
+ # also provide s string that describes a common set of lines, like "production",
16
+ # "development" or "production".
17
+ def self.create(lines = 'production')
18
+ definitions_hash = line_definer.line_definitions.clone
19
+
20
+ lines = lines.to_s.split(',') if lines.kind_of?(String)
21
+ lines = [lines.to_s] if lines.kind_of?(Symbol)
22
+
23
+ lines.each do |line|
24
+ line = line.to_sym
25
+ if LINE_COLLECTIONS.has_key?(line)
26
+ LINE_COLLECTIONS[line].each { |l| definitions_hash[l] = LINE_DEFINITIONS[l] }
27
+ elsif LINE_DEFINITIONS.has_key?(line)
28
+ definitions_hash[line] = LINE_DEFINITIONS[line]
29
+ else
30
+ raise "Unrecognized Rails log line name: #{line.inspect}!"
31
+ end
32
+ end
33
+
34
+ return self.new(definitions_hash, report_trackers(definitions_hash))
35
+ end
36
+
37
+ # Creates trackers based on the specified line definitions.
38
+ #
39
+ # The more lines that will be parsed, the more information will appear in the report.
40
+ def self.report_trackers(lines)
41
+ analyze = RequestLogAnalyzer::Aggregator::Summarizer::Definer.new
42
+
43
+ analyze.timespan
44
+ analyze.hourly_spread
45
+
46
+ analyze.frequency :category => REQUEST_CATEGORIZER, :title => 'Most requested'
47
+ analyze.frequency :method, :title => 'HTTP methods'
48
+ analyze.frequency :status, :title => 'HTTP statuses returned'
49
+
50
+ if lines.has_key?(:cache_hit)
51
+ analyze.frequency(:category => lambda { |request| request =~ :cache_hit ? 'Cache hit' : 'No hit' },
52
+ :title => 'Rails action cache hits')
53
+ end
54
+
55
+ analyze.duration :duration, :category => REQUEST_CATEGORIZER, :title => "Request duration", :line_type => :completed
56
+ analyze.duration :view, :category => REQUEST_CATEGORIZER, :title => "View rendering time", :line_type => :completed
57
+ analyze.duration :db, :category => REQUEST_CATEGORIZER, :title => "Database time", :line_type => :completed
58
+
59
+ analyze.frequency :category => REQUEST_CATEGORIZER, :title => 'Process blockers (> 1 sec duration)',
60
+ :if => lambda { |request| request[:duration] && request[:duration] > 1.0 }
61
+
62
+ if lines.has_key?(:failure)
63
+ analyze.frequency :error, :title => 'Failed requests', :line_type => :failure
64
+ end
65
+
66
+ if lines.has_key?(:rendered)
67
+ analyze.duration :render_duration, :category => :render_file, :multiple_per_request => true, :title => 'Partial rendering duration'
68
+ end
69
+
70
+ if lines.has_key?(:query_executed)
71
+ analyze.duration :query_duration, :category => :query_sql, :multiple_per_request => true, :title => 'Query duration'
72
+ end
73
+
74
+ return analyze.trackers + report_definer.trackers
75
+ end
76
+
77
+ # Rails < 2.1 completed line example
78
+ # Completed in 0.21665 (4 reqs/sec) | Rendering: 0.00926 (4%) | DB: 0.00000 (0%) | 200 OK [http://demo.nu/employees]
79
+ RAILS_21_COMPLETED = /Completed in (\d+\.\d{5}) \(\d+ reqs\/sec\) (?:\| Rendering: (\d+\.\d{5}) \(\d+\%\) )?(?:\| DB: (\d+\.\d{5}) \(\d+\%\) )?\| (\d\d\d).+\[(http.+)\]/
80
+
81
+ # Rails > 2.1 completed line example
82
+ # Completed in 614ms (View: 120, DB: 31) | 200 OK [http://floorplanner.local/demo]
83
+ RAILS_22_COMPLETED = /Completed in (\d+)ms \((?:View: (\d+), )?DB: (\d+)\) \| (\d\d\d).+\[(http.+)\]/
84
+
85
+ # A hash of definitions for all common lines in Rails logs.
86
+ LINE_DEFINITIONS = {
87
+ :processing => RequestLogAnalyzer::LineDefinition.new(:processing, :header => true,
88
+ :teaser => /Processing /,
89
+ :regexp => /Processing ((?:\w+::)?\w+)#(\w+)(?: to (\w+))? \(for (\d+\.\d+\.\d+\.\d+) at (\d\d\d\d-\d\d-\d\d \d\d:\d\d:\d\d)\) \[([A-Z]+)\]/,
90
+ :captures => [{ :name => :controller, :type => :string },
91
+ { :name => :action, :type => :string },
92
+ { :name => :format, :type => :string, :default => 'html' },
93
+ { :name => :ip, :type => :string },
94
+ { :name => :timestamp, :type => :timestamp },
95
+ { :name => :method, :type => :string }]),
96
+
97
+ :completed => RequestLogAnalyzer::LineDefinition.new(:completed, :footer => true,
98
+ :teaser => /Completed in /,
99
+ :regexp => Regexp.union(RAILS_21_COMPLETED, RAILS_22_COMPLETED),
100
+ :captures => [{ :name => :duration, :type => :duration, :unit => :sec }, # First old variant capture
101
+ { :name => :view, :type => :duration, :unit => :sec },
102
+ { :name => :db, :type => :duration, :unit => :sec },
103
+ { :name => :status, :type => :integer },
104
+ { :name => :url, :type => :string }, # Last old variant capture
105
+ { :name => :duration, :type => :duration, :unit => :msec }, # First new variant capture
106
+ { :name => :view, :type => :duration, :unit => :msec },
107
+ { :name => :db, :type => :duration, :unit => :msec },
108
+ { :name => :status, :type => :integer },
109
+ { :name => :url, :type => :string }]), # Last new variant capture
110
+
111
+ :failure => RequestLogAnalyzer::LineDefinition.new(:failure, :footer => true,
112
+ :teaser => /((?:[A-Z]\w*[a-z]\w+\:\:)*[A-Z]\w*[a-z]\w+) \((.*)\)(?: on line #(\d+) of (.+))?\:/,
113
+ :regexp => /((?:[A-Z]\w*[a-z]\w+\:\:)*[A-Z]\w*[a-z]\w+) \((.*)\)(?: on line #(\d+) of (.+))?\:\s*$/,
114
+ :captures => [{ :name => :error, :type => :string },
115
+ { :name => :message, :type => :string },
116
+ { :name => :line, :type => :integer },
117
+ { :name => :file, :type => :string }]),
118
+
119
+ :cache_hit => RequestLogAnalyzer::LineDefinition.new(:cache_hit,
120
+ :regexp => /Filter chain halted as \[\#<ActionController::Caching::Actions::ActionCacheFilter/),
121
+
122
+ :parameters => RequestLogAnalyzer::LineDefinition.new(:parameters,
123
+ :teaser => / Parameters:/,
124
+ :regexp => / Parameters:\s+(\{.*\})/,
125
+ :captures => [{ :name => :params, :type => :eval }]),
126
+
127
+ :rendered => RequestLogAnalyzer::LineDefinition.new(:rendered,
128
+ :teaser => /Rendered /,
129
+ :regexp => /Rendered (\w+(?:\/\w+)+) \((\d+\.\d+)ms\)/,
130
+ :captures => [{ :name => :render_file, :type => :string },
131
+ { :name => :render_duration, :type => :duration, :unit => :msec }]),
132
+
133
+ :query_executed => RequestLogAnalyzer::LineDefinition.new(:query_executed,
134
+ :regexp => /\s+(?:\e\[4;36;1m)?((?:\w+::)*\w+) Load \((\d+\.\d+)ms\)(?:\e\[0m)?\s+(?:\e\[0;1m)?([^\e]+) ?(?:\e\[0m)?/,
135
+ :captures => [{ :name => :query_class, :type => :string },
136
+ { :name => :query_duration, :type => :duration, :unit => :msec },
137
+ { :name => :query_sql, :type => :sql }]),
138
+
139
+ :query_cached => RequestLogAnalyzer::LineDefinition.new(:query_cached,
140
+ :regexp => /\s+(?:\e\[4;35;1m)?CACHE \((\d+\.\d+)ms\)(?:\e\[0m)?\s+(?:\e\[0m)?([^\e]+) ?(?:\e\[0m)?/,
141
+ :captures => [{ :name => :cached_duration, :type => :duration, :unit => :msec },
142
+ { :name => :cached_sql, :type => :sql }])
143
+ }
144
+
145
+ # Definitions of common combinations of lines that can be parsed
146
+ LINE_COLLECTIONS = {
147
+ :minimal => [:processing, :completed],
148
+ :production => [:processing, :completed, :failure, :cache_hit],
149
+ :development => [:processing, :completed, :failure, :rendered, :query_executed, :query_cached],
150
+ :all => LINE_DEFINITIONS.keys
151
+ }
152
+
153
+
154
+ # Simple function to categorize Rails requests using controller/actions/format and method.
155
+ REQUEST_CATEGORIZER = Proc.new do |request|
156
+ "#{request[:controller]}##{request[:action]}.#{request[:format]} [#{request[:method]}]"
157
+ end
158
+
159
+ # Define a custom Request class for the Rails file format to speed up timestamp handling
160
+ # and to ensure that a format is always set.
161
+ class Request < RequestLogAnalyzer::Request
162
+
163
+ # Do not use DateTime.parse
164
+ def convert_timestamp(value, definition)
165
+ value.gsub(/[^0-9]/, '')[0...14].to_i
166
+ end
167
+
168
+ # Sanitizes SQL queries so that they can be grouped
169
+ def convert_sql(sql, definition)
170
+ sql.gsub(/\b\d+\b/, ':int').gsub(/`([^`]+)`/, '\1').gsub(/'[^']*'/, ':string').rstrip
171
+ end
172
+ end
173
+
174
+ end
175
+
176
+ end