ngmoco-request-log-analyzer 1.4.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (112) hide show
  1. data/.gitignore +10 -0
  2. data/DESIGN.rdoc +41 -0
  3. data/LICENSE +20 -0
  4. data/README.rdoc +39 -0
  5. data/Rakefile +8 -0
  6. data/bin/request-log-analyzer +114 -0
  7. data/lib/cli/command_line_arguments.rb +301 -0
  8. data/lib/cli/database_console.rb +26 -0
  9. data/lib/cli/database_console_init.rb +43 -0
  10. data/lib/cli/progressbar.rb +213 -0
  11. data/lib/cli/tools.rb +46 -0
  12. data/lib/request_log_analyzer.rb +44 -0
  13. data/lib/request_log_analyzer/aggregator.rb +49 -0
  14. data/lib/request_log_analyzer/aggregator/database_inserter.rb +83 -0
  15. data/lib/request_log_analyzer/aggregator/echo.rb +29 -0
  16. data/lib/request_log_analyzer/aggregator/summarizer.rb +175 -0
  17. data/lib/request_log_analyzer/controller.rb +332 -0
  18. data/lib/request_log_analyzer/database.rb +102 -0
  19. data/lib/request_log_analyzer/database/base.rb +115 -0
  20. data/lib/request_log_analyzer/database/connection.rb +38 -0
  21. data/lib/request_log_analyzer/database/request.rb +22 -0
  22. data/lib/request_log_analyzer/database/source.rb +13 -0
  23. data/lib/request_log_analyzer/database/warning.rb +14 -0
  24. data/lib/request_log_analyzer/file_format.rb +160 -0
  25. data/lib/request_log_analyzer/file_format/amazon_s3.rb +71 -0
  26. data/lib/request_log_analyzer/file_format/apache.rb +141 -0
  27. data/lib/request_log_analyzer/file_format/merb.rb +67 -0
  28. data/lib/request_log_analyzer/file_format/rack.rb +11 -0
  29. data/lib/request_log_analyzer/file_format/rails.rb +176 -0
  30. data/lib/request_log_analyzer/file_format/rails_development.rb +12 -0
  31. data/lib/request_log_analyzer/filter.rb +30 -0
  32. data/lib/request_log_analyzer/filter/anonymize.rb +39 -0
  33. data/lib/request_log_analyzer/filter/field.rb +42 -0
  34. data/lib/request_log_analyzer/filter/timespan.rb +45 -0
  35. data/lib/request_log_analyzer/line_definition.rb +111 -0
  36. data/lib/request_log_analyzer/log_processor.rb +99 -0
  37. data/lib/request_log_analyzer/mailer.rb +62 -0
  38. data/lib/request_log_analyzer/output.rb +113 -0
  39. data/lib/request_log_analyzer/output/fixed_width.rb +220 -0
  40. data/lib/request_log_analyzer/output/html.rb +184 -0
  41. data/lib/request_log_analyzer/request.rb +175 -0
  42. data/lib/request_log_analyzer/source.rb +72 -0
  43. data/lib/request_log_analyzer/source/database_loader.rb +87 -0
  44. data/lib/request_log_analyzer/source/log_parser.rb +274 -0
  45. data/lib/request_log_analyzer/tracker.rb +206 -0
  46. data/lib/request_log_analyzer/tracker/duration.rb +104 -0
  47. data/lib/request_log_analyzer/tracker/frequency.rb +95 -0
  48. data/lib/request_log_analyzer/tracker/hourly_spread.rb +107 -0
  49. data/lib/request_log_analyzer/tracker/timespan.rb +81 -0
  50. data/lib/request_log_analyzer/tracker/traffic.rb +106 -0
  51. data/request-log-analyzer.gemspec +40 -0
  52. data/spec/database.yml +23 -0
  53. data/spec/fixtures/apache_combined.log +5 -0
  54. data/spec/fixtures/apache_common.log +10 -0
  55. data/spec/fixtures/decompression.log +12 -0
  56. data/spec/fixtures/decompression.log.bz2 +0 -0
  57. data/spec/fixtures/decompression.log.gz +0 -0
  58. data/spec/fixtures/decompression.log.zip +0 -0
  59. data/spec/fixtures/decompression.tar.gz +0 -0
  60. data/spec/fixtures/decompression.tgz +0 -0
  61. data/spec/fixtures/header_and_footer.log +6 -0
  62. data/spec/fixtures/merb.log +84 -0
  63. data/spec/fixtures/merb_prefixed.log +9 -0
  64. data/spec/fixtures/multiple_files_1.log +5 -0
  65. data/spec/fixtures/multiple_files_2.log +2 -0
  66. data/spec/fixtures/rails.db +0 -0
  67. data/spec/fixtures/rails_1x.log +59 -0
  68. data/spec/fixtures/rails_22.log +12 -0
  69. data/spec/fixtures/rails_22_cached.log +10 -0
  70. data/spec/fixtures/rails_unordered.log +24 -0
  71. data/spec/fixtures/syslog_1x.log +5 -0
  72. data/spec/fixtures/test_file_format.log +13 -0
  73. data/spec/fixtures/test_language_combined.log +14 -0
  74. data/spec/fixtures/test_order.log +16 -0
  75. data/spec/integration/command_line_usage_spec.rb +84 -0
  76. data/spec/integration/munin_plugins_rails_spec.rb +58 -0
  77. data/spec/integration/scout_spec.rb +151 -0
  78. data/spec/lib/helpers.rb +52 -0
  79. data/spec/lib/macros.rb +18 -0
  80. data/spec/lib/matchers.rb +77 -0
  81. data/spec/lib/mocks.rb +76 -0
  82. data/spec/lib/testing_format.rb +46 -0
  83. data/spec/spec_helper.rb +24 -0
  84. data/spec/unit/aggregator/database_inserter_spec.rb +93 -0
  85. data/spec/unit/aggregator/summarizer_spec.rb +26 -0
  86. data/spec/unit/controller/controller_spec.rb +41 -0
  87. data/spec/unit/controller/log_processor_spec.rb +18 -0
  88. data/spec/unit/database/base_class_spec.rb +183 -0
  89. data/spec/unit/database/connection_spec.rb +34 -0
  90. data/spec/unit/database/database_spec.rb +133 -0
  91. data/spec/unit/file_format/amazon_s3_format_spec.rb +49 -0
  92. data/spec/unit/file_format/apache_format_spec.rb +203 -0
  93. data/spec/unit/file_format/file_format_api_spec.rb +69 -0
  94. data/spec/unit/file_format/line_definition_spec.rb +75 -0
  95. data/spec/unit/file_format/merb_format_spec.rb +52 -0
  96. data/spec/unit/file_format/rails_format_spec.rb +164 -0
  97. data/spec/unit/filter/anonymize_filter_spec.rb +21 -0
  98. data/spec/unit/filter/field_filter_spec.rb +66 -0
  99. data/spec/unit/filter/filter_spec.rb +17 -0
  100. data/spec/unit/filter/timespan_filter_spec.rb +58 -0
  101. data/spec/unit/mailer_spec.rb +30 -0
  102. data/spec/unit/request_spec.rb +111 -0
  103. data/spec/unit/source/log_parser_spec.rb +119 -0
  104. data/spec/unit/tracker/duration_tracker_spec.rb +130 -0
  105. data/spec/unit/tracker/frequency_tracker_spec.rb +88 -0
  106. data/spec/unit/tracker/hourly_spread_spec.rb +79 -0
  107. data/spec/unit/tracker/timespan_tracker_spec.rb +73 -0
  108. data/spec/unit/tracker/tracker_api_spec.rb +124 -0
  109. data/spec/unit/tracker/traffic_tracker_spec.rb +107 -0
  110. data/tasks/github-gem.rake +323 -0
  111. data/tasks/request_log_analyzer.rake +26 -0
  112. metadata +220 -0
@@ -0,0 +1,12 @@
1
+ module RequestLogAnalyzer::FileFormat
2
+
3
+ # The RailsDevelopment FileFormat is an extention to the default Rails file format. It includes
4
+ # all lines of the normal Rails file format, but parses SQL queries and partial rendering lines
5
+ # as well.
6
+ class RailsDevelopment < Rails
7
+ def self.create
8
+ puts 'DEPRECATED: use --rails-format development instead!'
9
+ super('development')
10
+ end
11
+ end
12
+ end
@@ -0,0 +1,30 @@
1
+ module RequestLogAnalyzer::Filter
2
+
3
+ # Filter class loader using const_missing
4
+ # This function will automatically load the class file based on the name of the class
5
+ def self.const_missing(const)
6
+ RequestLogAnalyzer::load_default_class_file(self, const)
7
+ end
8
+
9
+ # Base filter class used to filter input requests.
10
+ # All filters should interit from this base.
11
+ class Base
12
+
13
+ attr_reader :file_format, :options
14
+
15
+ # Initializer
16
+ # <tt>format</tt> The file format
17
+ # <tt>options</tt> Are passed to the filters.
18
+ def initialize(format, options = {})
19
+ @file_format = format
20
+ @options = options
21
+ end
22
+
23
+ # Return the request if the request should be kept.
24
+ # Return nil otherwise.
25
+ def filter(request)
26
+ request
27
+ end
28
+ end
29
+
30
+ end
@@ -0,0 +1,39 @@
1
+ module RequestLogAnalyzer::Filter
2
+
3
+ # Filter to anonymize parsed values
4
+ # Options
5
+ # * <tt>:mode</tt> :reject or :accept.
6
+ # * <tt>:field</tt> Specific field to accept or reject.
7
+ # * <tt>:value</tt> Value that the field should match to be accepted or rejected.
8
+ class Anonymize < Base
9
+
10
+ def generate_random_ip
11
+ "#{rand(256)}.#{rand(256)}.#{rand(256)}.#{rand(256)}"
12
+ end
13
+
14
+ def anonymize_url(value)
15
+ return value.sub(/^https?\:\/\/[A-Za-z0-9\.-]+\//, "http://example.com/")
16
+ end
17
+
18
+ def fuzz(value)
19
+ value * ((75 + rand(50)) / 100.0)
20
+ end
21
+
22
+ def filter(request)
23
+ # TODO: request.attributes is bad practice
24
+ request.attributes.each do |key, value|
25
+ if key == :ip
26
+ request.attributes[key] = generate_random_ip
27
+ elsif key == :url
28
+ request.attributes[key] = anonymize_url(value)
29
+ elsif [ :duration, :view, :db, :type, :after_filters_time, :before_filters_time,
30
+ :action_time].include?(key)
31
+ request.attributes[key] = fuzz(value)
32
+ end
33
+ end
34
+
35
+ return request
36
+ end
37
+ end
38
+
39
+ end
@@ -0,0 +1,42 @@
1
+ module RequestLogAnalyzer::Filter
2
+
3
+ # Filter to select or reject a specific field
4
+ # Options
5
+ # * <tt>:mode</tt> :reject or :accept.
6
+ # * <tt>:field</tt> Specific field to accept or reject.
7
+ # * <tt>:value</tt> Value that the field should match to be accepted or rejected.
8
+ class Field < Base
9
+
10
+ attr_reader :field, :value, :mode
11
+
12
+ def initialize(file_format, options = {})
13
+ super(file_format, options)
14
+ setup_filter
15
+ end
16
+
17
+ # Setup mode, field and value.
18
+ def setup_filter
19
+ @mode = (@options[:mode] || :accept).to_sym
20
+ @field = @options[:field].to_sym
21
+
22
+ # Convert the timestamp to the correct formats for quick timestamp comparisons
23
+ if @options[:value].kind_of?(String) && @options[:value][0, 1] == '/' && @options[:value][-1, 1] == '/'
24
+ @value = Regexp.new(@options[:value][1..-2])
25
+ else
26
+ @value = @options[:value] # TODO: convert value?
27
+ end
28
+ end
29
+
30
+ # Keep request if @mode == :select and request has the field and value.
31
+ # Drop request if @mode == :reject and request has the field and value.
32
+ # Returns nil otherwise.
33
+ # <tt>request</tt> Request Object
34
+ def filter(request)
35
+ found_field = request.every(@field).any? { |value| @value === value.to_s }
36
+ return nil if !found_field && @mode == :select
37
+ return nil if found_field && @mode == :reject
38
+ return request
39
+ end
40
+ end
41
+
42
+ end
@@ -0,0 +1,45 @@
1
+ module RequestLogAnalyzer::Filter
2
+
3
+ # Reject all requests not in given timespan
4
+ # Options
5
+ # * <tt>:after</tt> Only keep requests after this DateTime.
6
+ # * <tt>:before</tt> Only keep requests before this DateTime.
7
+ class Timespan < Base
8
+
9
+ attr_reader :before, :after
10
+
11
+ def initialize(file_format, options = {})
12
+ @after = nil
13
+ @before = nil
14
+ super(file_format, options)
15
+ setup_filter
16
+ end
17
+
18
+
19
+ # Convert the timestamp to the correct formats for quick timestamp comparisons.
20
+ # These are stored in the before and after attr_reader fields.
21
+ def setup_filter
22
+ @after = @options[:after].strftime('%Y%m%d%H%M%S').to_i if options[:after]
23
+ @before = @options[:before].strftime('%Y%m%d%H%M%S').to_i if options[:before]
24
+ end
25
+
26
+ # Returns request if:
27
+ # * @after <= request.timestamp <= @before
28
+ # * @after <= request.timestamp
29
+ # * request.timestamp <= @before
30
+ # Returns nil otherwise
31
+ # <tt>request</tt> Request object.
32
+ def filter(request)
33
+ if @after && @before && request.timestamp <= @before && @after <= request.timestamp
34
+ return request
35
+ elsif @after && @before.nil? && @after <= request.timestamp
36
+ return request
37
+ elsif @before && @after.nil? && request.timestamp <= @before
38
+ return request
39
+ end
40
+
41
+ return nil
42
+ end
43
+ end
44
+
45
+ end
@@ -0,0 +1,111 @@
1
+ module RequestLogAnalyzer
2
+
3
+ # The line definition class is used to specify what lines should be parsed from the log file.
4
+ # It contains functionality to match a line against the definition and parse the information
5
+ # from this line. This is used by the LogParser class when parsing a log file..
6
+ class LineDefinition
7
+
8
+ class Definer
9
+
10
+ attr_accessor :line_definitions
11
+
12
+ def initialize
13
+ @line_definitions = {}
14
+ end
15
+
16
+ def initialize_copy(other)
17
+ @line_definitions = other.line_definitions.dup
18
+ end
19
+
20
+ def method_missing(name, *args, &block)
21
+ if block_given?
22
+ @line_definitions[name] = RequestLogAnalyzer::LineDefinition.define(name, &block)
23
+ else
24
+ @line_definitions[name] = RequestLogAnalyzer::LineDefinition.new(name, args.first)
25
+ end
26
+ end
27
+ end
28
+
29
+ attr_reader :name
30
+ attr_accessor :teaser, :regexp, :captures
31
+ attr_accessor :header, :footer
32
+
33
+ alias_method :header?, :header
34
+ alias_method :footer?, :footer
35
+
36
+ # Initializes the LineDefinition instance with a hash containing the different elements of
37
+ # the definition.
38
+ def initialize(name, definition = {})
39
+ @name = name
40
+ @captures = []
41
+ @teaser = nil
42
+ definition.each { |key, value| self.send("#{key.to_s}=".to_sym, value) }
43
+ end
44
+
45
+ def self.define(name, &block)
46
+ definition = self.new(name)
47
+ yield(definition) if block_given?
48
+ return definition
49
+ end
50
+
51
+ # Checks whether a given line matches this definition.
52
+ # It will return false if a line does not match. If the line matches, a hash is returned
53
+ # with all the fields parsed from that line as content.
54
+ # If the line definition has a teaser-check, a :teaser_check_failed warning will be emitted
55
+ # if this teaser-check is passed, but the full regular exprssion does not ,atch.
56
+ def matches(line, &warning_handler)
57
+ if @teaser.nil? || @teaser =~ line
58
+ if match_data = line.match(@regexp)
59
+ return { :line_definition => self, :captures => match_data.captures}
60
+ else
61
+ if @teaser && warning_handler
62
+ warning_handler.call(:teaser_check_failed, "Teaser matched for #{name.inspect}, but full line did not:\n#{line.inspect}")
63
+ end
64
+ return false
65
+ end
66
+ else
67
+ return false
68
+ end
69
+ end
70
+
71
+ alias :=~ :matches
72
+
73
+ # matches the line and converts the captured values using the request's
74
+ # convert_value function.
75
+ def match_for(line, request, &warning_handler)
76
+ if match_info = matches(line, &warning_handler)
77
+ convert_captured_values(match_info[:captures], request)
78
+ else
79
+ false
80
+ end
81
+ end
82
+
83
+ # Updates a captures hash using the converters specified in the request
84
+ # and handle the :provides option in the line definition.
85
+ def convert_captured_values(values, request)
86
+ value_hash = {}
87
+ captures.each_with_index do |capture, index|
88
+
89
+ # convert the value using the request convert_value function
90
+ converted = request.convert_value(values[index], capture)
91
+ value_hash[capture[:name]] ||= converted
92
+
93
+ # Add items directly to the resulting hash from the converted value
94
+ # if it is a hash and they are set in the :provides hash for this line definition
95
+ if converted.kind_of?(Hash) && capture[:provides].kind_of?(Hash)
96
+ capture[:provides].each do |name, type|
97
+ value_hash[name] ||= request.convert_value(converted[name], { :type => type })
98
+ end
99
+ end
100
+ end
101
+ return value_hash
102
+ end
103
+
104
+ # Returns true if this line captures values of the given name
105
+ def captures?(name)
106
+ captures.any? { |c| c[:name] == name }
107
+ end
108
+
109
+ end
110
+
111
+ end
@@ -0,0 +1,99 @@
1
+ module RequestLogAnalyzer
2
+
3
+ # The Logprocessor class is used to perform simple processing actions over log files.
4
+ # It will go over the log file/stream line by line, pass the line to a processor and
5
+ # write the result back to the output file or stream. The processor can alter the
6
+ # contents of the line, remain it intact or remove it altogether, based on the current
7
+ # file format
8
+ #
9
+ # Currently, one processors is supported:
10
+ # * :strip will remove all irrelevent lines (according to the file format) from the
11
+ # sources. A compact, information packed log will remain/.
12
+ #
13
+ class LogProcessor
14
+
15
+ attr_reader :mode, :options, :sources, :file_format
16
+ attr_accessor :output_file
17
+
18
+ # Builds a logprocessor instance from the arguments given on the command line
19
+ # <tt>command</tt> The command hat was used to start the log processor. This will set the
20
+ # processing mode. Currently, only :strip is supported.
21
+ # <tt>arguments</tt> The parsed command line arguments (a CommandLine::Arguments instance)
22
+ def self.build(command, arguments)
23
+
24
+ options = {
25
+ :discard_teaser_lines => arguments[:discard_teaser_lines],
26
+ :keep_junk_lines => arguments[:keep_junk_lines],
27
+ }
28
+
29
+ log_processor = RequestLogAnalyzer::LogProcessor.new(arguments[:format].to_sym, command, options)
30
+ log_processor.output_file = arguments[:output] if arguments[:output]
31
+
32
+ arguments.parameters.each do |input|
33
+ log_processor.sources << input
34
+ end
35
+
36
+ return log_processor
37
+ end
38
+
39
+ # Initializes a new LogProcessor instance.
40
+ # <tt>format</tt> The file format to use (e.g. :rails).
41
+ # <tt>mode</tt> The processing mode
42
+ # <tt>options</tt> A hash with options to take into account
43
+ def initialize(format, mode, options = {})
44
+ @options = options
45
+ @mode = mode
46
+ @sources = []
47
+ @file_format = format
48
+ $output_file = nil
49
+ end
50
+
51
+ # Processes input files by opening it and sending the filestream to <code>process_io</code>,
52
+ # in which the actual processing is performed.
53
+ # <tt>file</tt> The file to process
54
+ def process_file(file)
55
+ File.open(file, 'r') { |file| process_io(file) }
56
+ end
57
+
58
+ # Processes an input stream by iteration over each line and processing it according to
59
+ # the current operation mode
60
+ # <tt>io</tt> The IO instance to process.
61
+ def process_io(io)
62
+ case mode
63
+ when :strip; io.each_line { |line| @output << strip_line(line) }
64
+ end
65
+ end
66
+
67
+ # Returns the line itself if the string matches any of the line definitions. If no match is
68
+ # found, an empty line is returned, which will strip the line from the output.
69
+ # <tt>line</tt> The line to strip
70
+ def strip_line(line)
71
+ file_format.line_definitions.any? { |name, definition| definition =~ line } ? line : ""
72
+ end
73
+
74
+ # Runs the log processing by setting up the output stream and iterating over all the
75
+ # input sources. Input sources can either be filenames (String instances) or IO streams
76
+ # (IO instances). The strings "-" and "STDIN" will be substituted for the $stdin variable.
77
+ def run!
78
+ if @output_file.nil?
79
+ @output = $stdout
80
+ else
81
+ @output = File.new(@output_file, 'a')
82
+ end
83
+
84
+ @sources.each do |source|
85
+ if source.kind_of?(String) && File.exist?(source)
86
+ process_file(source)
87
+ elsif source.kind_of?(IO)
88
+ process_io(source)
89
+ elsif ['-', 'STDIN'].include?(source)
90
+ process_io($stdin)
91
+ end
92
+ end
93
+
94
+ ensure
95
+ @output.close if @output.kind_of?(File)
96
+ end
97
+ end
98
+
99
+ end
@@ -0,0 +1,62 @@
1
+ module RequestLogAnalyzer
2
+
3
+ # Mail report to a specified emailaddress
4
+ class Mailer
5
+
6
+ attr_accessor :data, :to, :host, :content_type
7
+
8
+ # Initialize a mailer
9
+ # <tt>to</tt> to email address to mail to
10
+ # <tt>host</tt> the mailer host (defaults to localhost)
11
+ # <tt>options</tt> Specific style options
12
+ #
13
+ # Options
14
+ # * <tt>:debug</tt> Do not actually mail
15
+ # * <tt>:from_alias</tt> The from alias
16
+ # * <tt>:to_alias</tt> The to alias
17
+ # * <tt>:subject</tt> The message subject
18
+ def initialize(to, host = 'localhost', options = {})
19
+ require 'net/smtp'
20
+ @to = to
21
+ @host = host
22
+ @options = options
23
+ @data = []
24
+ @content_type = nil
25
+ end
26
+
27
+ # Send all data in @data to the email address used during initialization.
28
+ # Returns array containg [message_data, from_email_address, to_email_address] of sent email.
29
+ def mail
30
+ from = @options[:from] || 'contact@railsdoctors.com'
31
+ from_alias = @options[:from_alias] || 'Request-log-analyzer reporter'
32
+ to_alias = @options[:to_alias] || to
33
+ subject = @options[:subject] || "Request log analyzer report - generated on #{Time.now.to_s}"
34
+ content_type= "Content-Type: #{@content_type}" if @content_type
35
+ msg = <<END_OF_MESSAGE
36
+ From: #{from_alias} <#{from}>
37
+ To: #{to_alias} <#{@to}>
38
+ Subject: #{subject}
39
+ #{content_type}
40
+
41
+ #{@data.to_s}
42
+ END_OF_MESSAGE
43
+
44
+ unless @options[:debug]
45
+ Net::SMTP.start(@host) do |smtp|
46
+ smtp.send_message msg, from, to
47
+ end
48
+ end
49
+
50
+ return [msg, from, to]
51
+ end
52
+
53
+ def << string
54
+ data << string
55
+ end
56
+
57
+ def puts string
58
+ data << string
59
+ end
60
+
61
+ end
62
+ end