ngmoco-request-log-analyzer 1.4.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (112) hide show
  1. data/.gitignore +10 -0
  2. data/DESIGN.rdoc +41 -0
  3. data/LICENSE +20 -0
  4. data/README.rdoc +39 -0
  5. data/Rakefile +8 -0
  6. data/bin/request-log-analyzer +114 -0
  7. data/lib/cli/command_line_arguments.rb +301 -0
  8. data/lib/cli/database_console.rb +26 -0
  9. data/lib/cli/database_console_init.rb +43 -0
  10. data/lib/cli/progressbar.rb +213 -0
  11. data/lib/cli/tools.rb +46 -0
  12. data/lib/request_log_analyzer.rb +44 -0
  13. data/lib/request_log_analyzer/aggregator.rb +49 -0
  14. data/lib/request_log_analyzer/aggregator/database_inserter.rb +83 -0
  15. data/lib/request_log_analyzer/aggregator/echo.rb +29 -0
  16. data/lib/request_log_analyzer/aggregator/summarizer.rb +175 -0
  17. data/lib/request_log_analyzer/controller.rb +332 -0
  18. data/lib/request_log_analyzer/database.rb +102 -0
  19. data/lib/request_log_analyzer/database/base.rb +115 -0
  20. data/lib/request_log_analyzer/database/connection.rb +38 -0
  21. data/lib/request_log_analyzer/database/request.rb +22 -0
  22. data/lib/request_log_analyzer/database/source.rb +13 -0
  23. data/lib/request_log_analyzer/database/warning.rb +14 -0
  24. data/lib/request_log_analyzer/file_format.rb +160 -0
  25. data/lib/request_log_analyzer/file_format/amazon_s3.rb +71 -0
  26. data/lib/request_log_analyzer/file_format/apache.rb +141 -0
  27. data/lib/request_log_analyzer/file_format/merb.rb +67 -0
  28. data/lib/request_log_analyzer/file_format/rack.rb +11 -0
  29. data/lib/request_log_analyzer/file_format/rails.rb +176 -0
  30. data/lib/request_log_analyzer/file_format/rails_development.rb +12 -0
  31. data/lib/request_log_analyzer/filter.rb +30 -0
  32. data/lib/request_log_analyzer/filter/anonymize.rb +39 -0
  33. data/lib/request_log_analyzer/filter/field.rb +42 -0
  34. data/lib/request_log_analyzer/filter/timespan.rb +45 -0
  35. data/lib/request_log_analyzer/line_definition.rb +111 -0
  36. data/lib/request_log_analyzer/log_processor.rb +99 -0
  37. data/lib/request_log_analyzer/mailer.rb +62 -0
  38. data/lib/request_log_analyzer/output.rb +113 -0
  39. data/lib/request_log_analyzer/output/fixed_width.rb +220 -0
  40. data/lib/request_log_analyzer/output/html.rb +184 -0
  41. data/lib/request_log_analyzer/request.rb +175 -0
  42. data/lib/request_log_analyzer/source.rb +72 -0
  43. data/lib/request_log_analyzer/source/database_loader.rb +87 -0
  44. data/lib/request_log_analyzer/source/log_parser.rb +274 -0
  45. data/lib/request_log_analyzer/tracker.rb +206 -0
  46. data/lib/request_log_analyzer/tracker/duration.rb +104 -0
  47. data/lib/request_log_analyzer/tracker/frequency.rb +95 -0
  48. data/lib/request_log_analyzer/tracker/hourly_spread.rb +107 -0
  49. data/lib/request_log_analyzer/tracker/timespan.rb +81 -0
  50. data/lib/request_log_analyzer/tracker/traffic.rb +106 -0
  51. data/request-log-analyzer.gemspec +40 -0
  52. data/spec/database.yml +23 -0
  53. data/spec/fixtures/apache_combined.log +5 -0
  54. data/spec/fixtures/apache_common.log +10 -0
  55. data/spec/fixtures/decompression.log +12 -0
  56. data/spec/fixtures/decompression.log.bz2 +0 -0
  57. data/spec/fixtures/decompression.log.gz +0 -0
  58. data/spec/fixtures/decompression.log.zip +0 -0
  59. data/spec/fixtures/decompression.tar.gz +0 -0
  60. data/spec/fixtures/decompression.tgz +0 -0
  61. data/spec/fixtures/header_and_footer.log +6 -0
  62. data/spec/fixtures/merb.log +84 -0
  63. data/spec/fixtures/merb_prefixed.log +9 -0
  64. data/spec/fixtures/multiple_files_1.log +5 -0
  65. data/spec/fixtures/multiple_files_2.log +2 -0
  66. data/spec/fixtures/rails.db +0 -0
  67. data/spec/fixtures/rails_1x.log +59 -0
  68. data/spec/fixtures/rails_22.log +12 -0
  69. data/spec/fixtures/rails_22_cached.log +10 -0
  70. data/spec/fixtures/rails_unordered.log +24 -0
  71. data/spec/fixtures/syslog_1x.log +5 -0
  72. data/spec/fixtures/test_file_format.log +13 -0
  73. data/spec/fixtures/test_language_combined.log +14 -0
  74. data/spec/fixtures/test_order.log +16 -0
  75. data/spec/integration/command_line_usage_spec.rb +84 -0
  76. data/spec/integration/munin_plugins_rails_spec.rb +58 -0
  77. data/spec/integration/scout_spec.rb +151 -0
  78. data/spec/lib/helpers.rb +52 -0
  79. data/spec/lib/macros.rb +18 -0
  80. data/spec/lib/matchers.rb +77 -0
  81. data/spec/lib/mocks.rb +76 -0
  82. data/spec/lib/testing_format.rb +46 -0
  83. data/spec/spec_helper.rb +24 -0
  84. data/spec/unit/aggregator/database_inserter_spec.rb +93 -0
  85. data/spec/unit/aggregator/summarizer_spec.rb +26 -0
  86. data/spec/unit/controller/controller_spec.rb +41 -0
  87. data/spec/unit/controller/log_processor_spec.rb +18 -0
  88. data/spec/unit/database/base_class_spec.rb +183 -0
  89. data/spec/unit/database/connection_spec.rb +34 -0
  90. data/spec/unit/database/database_spec.rb +133 -0
  91. data/spec/unit/file_format/amazon_s3_format_spec.rb +49 -0
  92. data/spec/unit/file_format/apache_format_spec.rb +203 -0
  93. data/spec/unit/file_format/file_format_api_spec.rb +69 -0
  94. data/spec/unit/file_format/line_definition_spec.rb +75 -0
  95. data/spec/unit/file_format/merb_format_spec.rb +52 -0
  96. data/spec/unit/file_format/rails_format_spec.rb +164 -0
  97. data/spec/unit/filter/anonymize_filter_spec.rb +21 -0
  98. data/spec/unit/filter/field_filter_spec.rb +66 -0
  99. data/spec/unit/filter/filter_spec.rb +17 -0
  100. data/spec/unit/filter/timespan_filter_spec.rb +58 -0
  101. data/spec/unit/mailer_spec.rb +30 -0
  102. data/spec/unit/request_spec.rb +111 -0
  103. data/spec/unit/source/log_parser_spec.rb +119 -0
  104. data/spec/unit/tracker/duration_tracker_spec.rb +130 -0
  105. data/spec/unit/tracker/frequency_tracker_spec.rb +88 -0
  106. data/spec/unit/tracker/hourly_spread_spec.rb +79 -0
  107. data/spec/unit/tracker/timespan_tracker_spec.rb +73 -0
  108. data/spec/unit/tracker/tracker_api_spec.rb +124 -0
  109. data/spec/unit/tracker/traffic_tracker_spec.rb +107 -0
  110. data/tasks/github-gem.rake +323 -0
  111. data/tasks/request_log_analyzer.rake +26 -0
  112. metadata +220 -0
@@ -0,0 +1,12 @@
1
+ module RequestLogAnalyzer::FileFormat
2
+
3
+ # The RailsDevelopment FileFormat is an extention to the default Rails file format. It includes
4
+ # all lines of the normal Rails file format, but parses SQL queries and partial rendering lines
5
+ # as well.
6
+ class RailsDevelopment < Rails
7
+ def self.create
8
+ puts 'DEPRECATED: use --rails-format development instead!'
9
+ super('development')
10
+ end
11
+ end
12
+ end
@@ -0,0 +1,30 @@
1
+ module RequestLogAnalyzer::Filter
2
+
3
+ # Filter class loader using const_missing
4
+ # This function will automatically load the class file based on the name of the class
5
+ def self.const_missing(const)
6
+ RequestLogAnalyzer::load_default_class_file(self, const)
7
+ end
8
+
9
+ # Base filter class used to filter input requests.
10
+ # All filters should interit from this base.
11
+ class Base
12
+
13
+ attr_reader :file_format, :options
14
+
15
+ # Initializer
16
+ # <tt>format</tt> The file format
17
+ # <tt>options</tt> Are passed to the filters.
18
+ def initialize(format, options = {})
19
+ @file_format = format
20
+ @options = options
21
+ end
22
+
23
+ # Return the request if the request should be kept.
24
+ # Return nil otherwise.
25
+ def filter(request)
26
+ request
27
+ end
28
+ end
29
+
30
+ end
@@ -0,0 +1,39 @@
1
+ module RequestLogAnalyzer::Filter
2
+
3
+ # Filter to anonymize parsed values
4
+ # Options
5
+ # * <tt>:mode</tt> :reject or :accept.
6
+ # * <tt>:field</tt> Specific field to accept or reject.
7
+ # * <tt>:value</tt> Value that the field should match to be accepted or rejected.
8
+ class Anonymize < Base
9
+
10
+ def generate_random_ip
11
+ "#{rand(256)}.#{rand(256)}.#{rand(256)}.#{rand(256)}"
12
+ end
13
+
14
+ def anonymize_url(value)
15
+ return value.sub(/^https?\:\/\/[A-Za-z0-9\.-]+\//, "http://example.com/")
16
+ end
17
+
18
+ def fuzz(value)
19
+ value * ((75 + rand(50)) / 100.0)
20
+ end
21
+
22
+ def filter(request)
23
+ # TODO: request.attributes is bad practice
24
+ request.attributes.each do |key, value|
25
+ if key == :ip
26
+ request.attributes[key] = generate_random_ip
27
+ elsif key == :url
28
+ request.attributes[key] = anonymize_url(value)
29
+ elsif [ :duration, :view, :db, :type, :after_filters_time, :before_filters_time,
30
+ :action_time].include?(key)
31
+ request.attributes[key] = fuzz(value)
32
+ end
33
+ end
34
+
35
+ return request
36
+ end
37
+ end
38
+
39
+ end
@@ -0,0 +1,42 @@
1
+ module RequestLogAnalyzer::Filter
2
+
3
+ # Filter to select or reject a specific field
4
+ # Options
5
+ # * <tt>:mode</tt> :reject or :accept.
6
+ # * <tt>:field</tt> Specific field to accept or reject.
7
+ # * <tt>:value</tt> Value that the field should match to be accepted or rejected.
8
+ class Field < Base
9
+
10
+ attr_reader :field, :value, :mode
11
+
12
+ def initialize(file_format, options = {})
13
+ super(file_format, options)
14
+ setup_filter
15
+ end
16
+
17
+ # Setup mode, field and value.
18
+ def setup_filter
19
+ @mode = (@options[:mode] || :accept).to_sym
20
+ @field = @options[:field].to_sym
21
+
22
+ # Convert the timestamp to the correct formats for quick timestamp comparisons
23
+ if @options[:value].kind_of?(String) && @options[:value][0, 1] == '/' && @options[:value][-1, 1] == '/'
24
+ @value = Regexp.new(@options[:value][1..-2])
25
+ else
26
+ @value = @options[:value] # TODO: convert value?
27
+ end
28
+ end
29
+
30
+ # Keep request if @mode == :select and request has the field and value.
31
+ # Drop request if @mode == :reject and request has the field and value.
32
+ # Returns nil otherwise.
33
+ # <tt>request</tt> Request Object
34
+ def filter(request)
35
+ found_field = request.every(@field).any? { |value| @value === value.to_s }
36
+ return nil if !found_field && @mode == :select
37
+ return nil if found_field && @mode == :reject
38
+ return request
39
+ end
40
+ end
41
+
42
+ end
@@ -0,0 +1,45 @@
1
+ module RequestLogAnalyzer::Filter
2
+
3
+ # Reject all requests not in given timespan
4
+ # Options
5
+ # * <tt>:after</tt> Only keep requests after this DateTime.
6
+ # * <tt>:before</tt> Only keep requests before this DateTime.
7
+ class Timespan < Base
8
+
9
+ attr_reader :before, :after
10
+
11
+ def initialize(file_format, options = {})
12
+ @after = nil
13
+ @before = nil
14
+ super(file_format, options)
15
+ setup_filter
16
+ end
17
+
18
+
19
+ # Convert the timestamp to the correct formats for quick timestamp comparisons.
20
+ # These are stored in the before and after attr_reader fields.
21
+ def setup_filter
22
+ @after = @options[:after].strftime('%Y%m%d%H%M%S').to_i if options[:after]
23
+ @before = @options[:before].strftime('%Y%m%d%H%M%S').to_i if options[:before]
24
+ end
25
+
26
+ # Returns request if:
27
+ # * @after <= request.timestamp <= @before
28
+ # * @after <= request.timestamp
29
+ # * request.timestamp <= @before
30
+ # Returns nil otherwise
31
+ # <tt>request</tt> Request object.
32
+ def filter(request)
33
+ if @after && @before && request.timestamp <= @before && @after <= request.timestamp
34
+ return request
35
+ elsif @after && @before.nil? && @after <= request.timestamp
36
+ return request
37
+ elsif @before && @after.nil? && request.timestamp <= @before
38
+ return request
39
+ end
40
+
41
+ return nil
42
+ end
43
+ end
44
+
45
+ end
@@ -0,0 +1,111 @@
1
+ module RequestLogAnalyzer
2
+
3
+ # The line definition class is used to specify what lines should be parsed from the log file.
4
+ # It contains functionality to match a line against the definition and parse the information
5
+ # from this line. This is used by the LogParser class when parsing a log file..
6
+ class LineDefinition
7
+
8
+ class Definer
9
+
10
+ attr_accessor :line_definitions
11
+
12
+ def initialize
13
+ @line_definitions = {}
14
+ end
15
+
16
+ def initialize_copy(other)
17
+ @line_definitions = other.line_definitions.dup
18
+ end
19
+
20
+ def method_missing(name, *args, &block)
21
+ if block_given?
22
+ @line_definitions[name] = RequestLogAnalyzer::LineDefinition.define(name, &block)
23
+ else
24
+ @line_definitions[name] = RequestLogAnalyzer::LineDefinition.new(name, args.first)
25
+ end
26
+ end
27
+ end
28
+
29
+ attr_reader :name
30
+ attr_accessor :teaser, :regexp, :captures
31
+ attr_accessor :header, :footer
32
+
33
+ alias_method :header?, :header
34
+ alias_method :footer?, :footer
35
+
36
+ # Initializes the LineDefinition instance with a hash containing the different elements of
37
+ # the definition.
38
+ def initialize(name, definition = {})
39
+ @name = name
40
+ @captures = []
41
+ @teaser = nil
42
+ definition.each { |key, value| self.send("#{key.to_s}=".to_sym, value) }
43
+ end
44
+
45
+ def self.define(name, &block)
46
+ definition = self.new(name)
47
+ yield(definition) if block_given?
48
+ return definition
49
+ end
50
+
51
+ # Checks whether a given line matches this definition.
52
+ # It will return false if a line does not match. If the line matches, a hash is returned
53
+ # with all the fields parsed from that line as content.
54
+ # If the line definition has a teaser-check, a :teaser_check_failed warning will be emitted
55
+ # if this teaser-check is passed, but the full regular exprssion does not ,atch.
56
+ def matches(line, &warning_handler)
57
+ if @teaser.nil? || @teaser =~ line
58
+ if match_data = line.match(@regexp)
59
+ return { :line_definition => self, :captures => match_data.captures}
60
+ else
61
+ if @teaser && warning_handler
62
+ warning_handler.call(:teaser_check_failed, "Teaser matched for #{name.inspect}, but full line did not:\n#{line.inspect}")
63
+ end
64
+ return false
65
+ end
66
+ else
67
+ return false
68
+ end
69
+ end
70
+
71
+ alias :=~ :matches
72
+
73
+ # matches the line and converts the captured values using the request's
74
+ # convert_value function.
75
+ def match_for(line, request, &warning_handler)
76
+ if match_info = matches(line, &warning_handler)
77
+ convert_captured_values(match_info[:captures], request)
78
+ else
79
+ false
80
+ end
81
+ end
82
+
83
+ # Updates a captures hash using the converters specified in the request
84
+ # and handle the :provides option in the line definition.
85
+ def convert_captured_values(values, request)
86
+ value_hash = {}
87
+ captures.each_with_index do |capture, index|
88
+
89
+ # convert the value using the request convert_value function
90
+ converted = request.convert_value(values[index], capture)
91
+ value_hash[capture[:name]] ||= converted
92
+
93
+ # Add items directly to the resulting hash from the converted value
94
+ # if it is a hash and they are set in the :provides hash for this line definition
95
+ if converted.kind_of?(Hash) && capture[:provides].kind_of?(Hash)
96
+ capture[:provides].each do |name, type|
97
+ value_hash[name] ||= request.convert_value(converted[name], { :type => type })
98
+ end
99
+ end
100
+ end
101
+ return value_hash
102
+ end
103
+
104
+ # Returns true if this line captures values of the given name
105
+ def captures?(name)
106
+ captures.any? { |c| c[:name] == name }
107
+ end
108
+
109
+ end
110
+
111
+ end
@@ -0,0 +1,99 @@
1
+ module RequestLogAnalyzer
2
+
3
+ # The Logprocessor class is used to perform simple processing actions over log files.
4
+ # It will go over the log file/stream line by line, pass the line to a processor and
5
+ # write the result back to the output file or stream. The processor can alter the
6
+ # contents of the line, remain it intact or remove it altogether, based on the current
7
+ # file format
8
+ #
9
+ # Currently, one processors is supported:
10
+ # * :strip will remove all irrelevent lines (according to the file format) from the
11
+ # sources. A compact, information packed log will remain/.
12
+ #
13
+ class LogProcessor
14
+
15
+ attr_reader :mode, :options, :sources, :file_format
16
+ attr_accessor :output_file
17
+
18
+ # Builds a logprocessor instance from the arguments given on the command line
19
+ # <tt>command</tt> The command hat was used to start the log processor. This will set the
20
+ # processing mode. Currently, only :strip is supported.
21
+ # <tt>arguments</tt> The parsed command line arguments (a CommandLine::Arguments instance)
22
+ def self.build(command, arguments)
23
+
24
+ options = {
25
+ :discard_teaser_lines => arguments[:discard_teaser_lines],
26
+ :keep_junk_lines => arguments[:keep_junk_lines],
27
+ }
28
+
29
+ log_processor = RequestLogAnalyzer::LogProcessor.new(arguments[:format].to_sym, command, options)
30
+ log_processor.output_file = arguments[:output] if arguments[:output]
31
+
32
+ arguments.parameters.each do |input|
33
+ log_processor.sources << input
34
+ end
35
+
36
+ return log_processor
37
+ end
38
+
39
+ # Initializes a new LogProcessor instance.
40
+ # <tt>format</tt> The file format to use (e.g. :rails).
41
+ # <tt>mode</tt> The processing mode
42
+ # <tt>options</tt> A hash with options to take into account
43
+ def initialize(format, mode, options = {})
44
+ @options = options
45
+ @mode = mode
46
+ @sources = []
47
+ @file_format = format
48
+ $output_file = nil
49
+ end
50
+
51
+ # Processes input files by opening it and sending the filestream to <code>process_io</code>,
52
+ # in which the actual processing is performed.
53
+ # <tt>file</tt> The file to process
54
+ def process_file(file)
55
+ File.open(file, 'r') { |file| process_io(file) }
56
+ end
57
+
58
+ # Processes an input stream by iteration over each line and processing it according to
59
+ # the current operation mode
60
+ # <tt>io</tt> The IO instance to process.
61
+ def process_io(io)
62
+ case mode
63
+ when :strip; io.each_line { |line| @output << strip_line(line) }
64
+ end
65
+ end
66
+
67
+ # Returns the line itself if the string matches any of the line definitions. If no match is
68
+ # found, an empty line is returned, which will strip the line from the output.
69
+ # <tt>line</tt> The line to strip
70
+ def strip_line(line)
71
+ file_format.line_definitions.any? { |name, definition| definition =~ line } ? line : ""
72
+ end
73
+
74
+ # Runs the log processing by setting up the output stream and iterating over all the
75
+ # input sources. Input sources can either be filenames (String instances) or IO streams
76
+ # (IO instances). The strings "-" and "STDIN" will be substituted for the $stdin variable.
77
+ def run!
78
+ if @output_file.nil?
79
+ @output = $stdout
80
+ else
81
+ @output = File.new(@output_file, 'a')
82
+ end
83
+
84
+ @sources.each do |source|
85
+ if source.kind_of?(String) && File.exist?(source)
86
+ process_file(source)
87
+ elsif source.kind_of?(IO)
88
+ process_io(source)
89
+ elsif ['-', 'STDIN'].include?(source)
90
+ process_io($stdin)
91
+ end
92
+ end
93
+
94
+ ensure
95
+ @output.close if @output.kind_of?(File)
96
+ end
97
+ end
98
+
99
+ end
@@ -0,0 +1,62 @@
1
+ module RequestLogAnalyzer
2
+
3
+ # Mail report to a specified emailaddress
4
+ class Mailer
5
+
6
+ attr_accessor :data, :to, :host, :content_type
7
+
8
+ # Initialize a mailer
9
+ # <tt>to</tt> to email address to mail to
10
+ # <tt>host</tt> the mailer host (defaults to localhost)
11
+ # <tt>options</tt> Specific style options
12
+ #
13
+ # Options
14
+ # * <tt>:debug</tt> Do not actually mail
15
+ # * <tt>:from_alias</tt> The from alias
16
+ # * <tt>:to_alias</tt> The to alias
17
+ # * <tt>:subject</tt> The message subject
18
+ def initialize(to, host = 'localhost', options = {})
19
+ require 'net/smtp'
20
+ @to = to
21
+ @host = host
22
+ @options = options
23
+ @data = []
24
+ @content_type = nil
25
+ end
26
+
27
+ # Send all data in @data to the email address used during initialization.
28
+ # Returns array containg [message_data, from_email_address, to_email_address] of sent email.
29
+ def mail
30
+ from = @options[:from] || 'contact@railsdoctors.com'
31
+ from_alias = @options[:from_alias] || 'Request-log-analyzer reporter'
32
+ to_alias = @options[:to_alias] || to
33
+ subject = @options[:subject] || "Request log analyzer report - generated on #{Time.now.to_s}"
34
+ content_type= "Content-Type: #{@content_type}" if @content_type
35
+ msg = <<END_OF_MESSAGE
36
+ From: #{from_alias} <#{from}>
37
+ To: #{to_alias} <#{@to}>
38
+ Subject: #{subject}
39
+ #{content_type}
40
+
41
+ #{@data.to_s}
42
+ END_OF_MESSAGE
43
+
44
+ unless @options[:debug]
45
+ Net::SMTP.start(@host) do |smtp|
46
+ smtp.send_message msg, from, to
47
+ end
48
+ end
49
+
50
+ return [msg, from, to]
51
+ end
52
+
53
+ def << string
54
+ data << string
55
+ end
56
+
57
+ def puts string
58
+ data << string
59
+ end
60
+
61
+ end
62
+ end