ngmoco-request-log-analyzer 1.4.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (112) hide show
  1. data/.gitignore +10 -0
  2. data/DESIGN.rdoc +41 -0
  3. data/LICENSE +20 -0
  4. data/README.rdoc +39 -0
  5. data/Rakefile +8 -0
  6. data/bin/request-log-analyzer +114 -0
  7. data/lib/cli/command_line_arguments.rb +301 -0
  8. data/lib/cli/database_console.rb +26 -0
  9. data/lib/cli/database_console_init.rb +43 -0
  10. data/lib/cli/progressbar.rb +213 -0
  11. data/lib/cli/tools.rb +46 -0
  12. data/lib/request_log_analyzer.rb +44 -0
  13. data/lib/request_log_analyzer/aggregator.rb +49 -0
  14. data/lib/request_log_analyzer/aggregator/database_inserter.rb +83 -0
  15. data/lib/request_log_analyzer/aggregator/echo.rb +29 -0
  16. data/lib/request_log_analyzer/aggregator/summarizer.rb +175 -0
  17. data/lib/request_log_analyzer/controller.rb +332 -0
  18. data/lib/request_log_analyzer/database.rb +102 -0
  19. data/lib/request_log_analyzer/database/base.rb +115 -0
  20. data/lib/request_log_analyzer/database/connection.rb +38 -0
  21. data/lib/request_log_analyzer/database/request.rb +22 -0
  22. data/lib/request_log_analyzer/database/source.rb +13 -0
  23. data/lib/request_log_analyzer/database/warning.rb +14 -0
  24. data/lib/request_log_analyzer/file_format.rb +160 -0
  25. data/lib/request_log_analyzer/file_format/amazon_s3.rb +71 -0
  26. data/lib/request_log_analyzer/file_format/apache.rb +141 -0
  27. data/lib/request_log_analyzer/file_format/merb.rb +67 -0
  28. data/lib/request_log_analyzer/file_format/rack.rb +11 -0
  29. data/lib/request_log_analyzer/file_format/rails.rb +176 -0
  30. data/lib/request_log_analyzer/file_format/rails_development.rb +12 -0
  31. data/lib/request_log_analyzer/filter.rb +30 -0
  32. data/lib/request_log_analyzer/filter/anonymize.rb +39 -0
  33. data/lib/request_log_analyzer/filter/field.rb +42 -0
  34. data/lib/request_log_analyzer/filter/timespan.rb +45 -0
  35. data/lib/request_log_analyzer/line_definition.rb +111 -0
  36. data/lib/request_log_analyzer/log_processor.rb +99 -0
  37. data/lib/request_log_analyzer/mailer.rb +62 -0
  38. data/lib/request_log_analyzer/output.rb +113 -0
  39. data/lib/request_log_analyzer/output/fixed_width.rb +220 -0
  40. data/lib/request_log_analyzer/output/html.rb +184 -0
  41. data/lib/request_log_analyzer/request.rb +175 -0
  42. data/lib/request_log_analyzer/source.rb +72 -0
  43. data/lib/request_log_analyzer/source/database_loader.rb +87 -0
  44. data/lib/request_log_analyzer/source/log_parser.rb +274 -0
  45. data/lib/request_log_analyzer/tracker.rb +206 -0
  46. data/lib/request_log_analyzer/tracker/duration.rb +104 -0
  47. data/lib/request_log_analyzer/tracker/frequency.rb +95 -0
  48. data/lib/request_log_analyzer/tracker/hourly_spread.rb +107 -0
  49. data/lib/request_log_analyzer/tracker/timespan.rb +81 -0
  50. data/lib/request_log_analyzer/tracker/traffic.rb +106 -0
  51. data/request-log-analyzer.gemspec +40 -0
  52. data/spec/database.yml +23 -0
  53. data/spec/fixtures/apache_combined.log +5 -0
  54. data/spec/fixtures/apache_common.log +10 -0
  55. data/spec/fixtures/decompression.log +12 -0
  56. data/spec/fixtures/decompression.log.bz2 +0 -0
  57. data/spec/fixtures/decompression.log.gz +0 -0
  58. data/spec/fixtures/decompression.log.zip +0 -0
  59. data/spec/fixtures/decompression.tar.gz +0 -0
  60. data/spec/fixtures/decompression.tgz +0 -0
  61. data/spec/fixtures/header_and_footer.log +6 -0
  62. data/spec/fixtures/merb.log +84 -0
  63. data/spec/fixtures/merb_prefixed.log +9 -0
  64. data/spec/fixtures/multiple_files_1.log +5 -0
  65. data/spec/fixtures/multiple_files_2.log +2 -0
  66. data/spec/fixtures/rails.db +0 -0
  67. data/spec/fixtures/rails_1x.log +59 -0
  68. data/spec/fixtures/rails_22.log +12 -0
  69. data/spec/fixtures/rails_22_cached.log +10 -0
  70. data/spec/fixtures/rails_unordered.log +24 -0
  71. data/spec/fixtures/syslog_1x.log +5 -0
  72. data/spec/fixtures/test_file_format.log +13 -0
  73. data/spec/fixtures/test_language_combined.log +14 -0
  74. data/spec/fixtures/test_order.log +16 -0
  75. data/spec/integration/command_line_usage_spec.rb +84 -0
  76. data/spec/integration/munin_plugins_rails_spec.rb +58 -0
  77. data/spec/integration/scout_spec.rb +151 -0
  78. data/spec/lib/helpers.rb +52 -0
  79. data/spec/lib/macros.rb +18 -0
  80. data/spec/lib/matchers.rb +77 -0
  81. data/spec/lib/mocks.rb +76 -0
  82. data/spec/lib/testing_format.rb +46 -0
  83. data/spec/spec_helper.rb +24 -0
  84. data/spec/unit/aggregator/database_inserter_spec.rb +93 -0
  85. data/spec/unit/aggregator/summarizer_spec.rb +26 -0
  86. data/spec/unit/controller/controller_spec.rb +41 -0
  87. data/spec/unit/controller/log_processor_spec.rb +18 -0
  88. data/spec/unit/database/base_class_spec.rb +183 -0
  89. data/spec/unit/database/connection_spec.rb +34 -0
  90. data/spec/unit/database/database_spec.rb +133 -0
  91. data/spec/unit/file_format/amazon_s3_format_spec.rb +49 -0
  92. data/spec/unit/file_format/apache_format_spec.rb +203 -0
  93. data/spec/unit/file_format/file_format_api_spec.rb +69 -0
  94. data/spec/unit/file_format/line_definition_spec.rb +75 -0
  95. data/spec/unit/file_format/merb_format_spec.rb +52 -0
  96. data/spec/unit/file_format/rails_format_spec.rb +164 -0
  97. data/spec/unit/filter/anonymize_filter_spec.rb +21 -0
  98. data/spec/unit/filter/field_filter_spec.rb +66 -0
  99. data/spec/unit/filter/filter_spec.rb +17 -0
  100. data/spec/unit/filter/timespan_filter_spec.rb +58 -0
  101. data/spec/unit/mailer_spec.rb +30 -0
  102. data/spec/unit/request_spec.rb +111 -0
  103. data/spec/unit/source/log_parser_spec.rb +119 -0
  104. data/spec/unit/tracker/duration_tracker_spec.rb +130 -0
  105. data/spec/unit/tracker/frequency_tracker_spec.rb +88 -0
  106. data/spec/unit/tracker/hourly_spread_spec.rb +79 -0
  107. data/spec/unit/tracker/timespan_tracker_spec.rb +73 -0
  108. data/spec/unit/tracker/tracker_api_spec.rb +124 -0
  109. data/spec/unit/tracker/traffic_tracker_spec.rb +107 -0
  110. data/tasks/github-gem.rake +323 -0
  111. data/tasks/request_log_analyzer.rake +26 -0
  112. metadata +220 -0
@@ -0,0 +1,175 @@
1
+ module RequestLogAnalyzer
2
+
3
+ # The Request class represents a parsed request from the log file.
4
+ # Instances are created by the LogParser and are passed to the different aggregators, so they
5
+ # can do their aggregating work.
6
+ #
7
+ # This class provides several methods to access the data that was parsed from the log files.
8
+ # Request#first(field_name) returns the first (only) value corresponding to the given field
9
+ # Request#every(field_name) returns all values corresponding to the given field name as array.
10
+ class Request
11
+
12
+ module Converters
13
+
14
+ # Default converter function, which converts the parsed strings to a native Ruby type
15
+ # using the type indication in the line definition. It will use a custom connverter
16
+ # method if one is available.
17
+ def convert_value(value, capture_definition)
18
+ return capture_definition[:default] if value.nil?
19
+ custom_converter_method = :"convert_#{capture_definition[:type]}"
20
+ send(custom_converter_method, value, capture_definition)
21
+ end
22
+
23
+ def convert_string(value, capture_definition); value; end
24
+ def convert_float(value, capture_definition); value.to_f; end
25
+ def convert_decimal(value, capture_definition); value.to_f; end
26
+ def convert_int(value, capture_definition); value.to_i; end
27
+ def convert_integer(value, capture_definition); value.to_i; end
28
+ def convert_sym(value, capture_definition); value.to_sym; end
29
+ def convert_symbol(value, capture_definition); value.to_sym; end
30
+
31
+ # Converts :eval field, which should evaluate to a hash.
32
+ def convert_eval(value, capture_definition)
33
+ eval(value).inject({}) { |h, (k, v)| h[k.to_sym] = v; h}
34
+ rescue SyntaxError
35
+ nil
36
+ end
37
+
38
+ # Slow default method to parse timestamps.
39
+ # Reimplement this function in a file format specific Request class
40
+ # to improve the timestamp parsing speed.
41
+ def convert_timestamp(value, capture_definition)
42
+ DateTime.parse(value).strftime('%Y%m%d%H%M%S').to_i
43
+ end
44
+
45
+ # Converts traffic fields to (whole) bytes based on the given unit.
46
+ def convert_traffic(value, capture_definition)
47
+ case capture_definition[:unit]
48
+ when nil, :b, :B, :byte then value.to_i
49
+ when :GB, :G, :gigabyte then (value.to_f * 1000_000_000).round
50
+ when :GiB, :gibibyte then (value.to_f * (2 ** 30)).round
51
+ when :MB, :M, :megabyte then (value.to_f * 1000_000).round
52
+ when :MiB, :mebibyte then (value.to_f * (2 ** 20)).round
53
+ when :KB, :K, :kilobyte, :kB then (value.to_f * 1000).round
54
+ when :KiB, :kibibyte then (value.to_f * (2 ** 10)).round
55
+ else raise "Unknown traffic unit"
56
+ end
57
+ end
58
+
59
+ # Convert duration fields to float, and make sure the values are in seconds.
60
+ def convert_duration(value, capture_definition)
61
+ case capture_definition[:unit]
62
+ when nil, :sec, :s then value.to_f
63
+ when :microsec, :musec then value.to_f / 1000000.0
64
+ when :msec, :millisec then value.to_f / 1000.0
65
+ else raise "Unknown duration unit"
66
+ end
67
+ end
68
+ end
69
+
70
+ # Install the default converter methods
71
+ include Converters
72
+
73
+ attr_reader :lines, :attributes, :file_format
74
+
75
+ # Initializes a new Request object.
76
+ # It will apply the the provided FileFormat module to this instance.
77
+ def initialize(file_format, attributes = {})
78
+ @lines = []
79
+ @attributes = attributes
80
+ @file_format = file_format
81
+ end
82
+
83
+ # Creates a new request that was parsed from the log with the given FileFormat. The hashes
84
+ # that are passed to this function are added as lines to this request.
85
+ def self.create(file_format, *hashes)
86
+ request = self.new(file_format)
87
+ hashes.flatten.each { |hash| request << hash }
88
+ return request
89
+ end
90
+
91
+ # Adds another line to the request when it is parsed in the LogParser.
92
+ #
93
+ # The line should be provided as a hash with the attributes line_definition, :captures,
94
+ # :lineno and :source set. This function is called from LogParser.
95
+ def add_parsed_line (parsed_line)
96
+ value_hash = parsed_line[:line_definition].convert_captured_values(parsed_line[:captures], self)
97
+ value_hash[:line_type] = parsed_line[:line_definition].name
98
+ value_hash[:lineno] = parsed_line[:lineno]
99
+ value_hash[:source] = parsed_line[:source]
100
+ add_line_hash(value_hash)
101
+ end
102
+
103
+ # Adds another line to the request using a plain hash.
104
+ #
105
+ # The line should be provides as a hash of the fields parsed from the line.
106
+ def add_line_hash(value_hash)
107
+ @lines << value_hash
108
+ @attributes = value_hash.merge(@attributes)
109
+ end
110
+
111
+ # Adds another line to the request. This method switches automatically between
112
+ # the add_line_hash and add_parsed_line based on the keys of the provided hash.
113
+ def <<(hash)
114
+ hash[:line_definition] ? add_parsed_line(hash) : add_line_hash(hash)
115
+ end
116
+
117
+ # Checks whether the given line type was parsed from the log file for this request
118
+ def has_line_type?(line_type)
119
+ return true if @lines.length == 1 && @lines[0][:line_type] == line_type.to_sym
120
+ @lines.detect { |l| l[:line_type] == line_type.to_sym }
121
+ end
122
+
123
+ alias :=~ :has_line_type?
124
+
125
+ # Returns the value that was captured for the "field" of this request.
126
+ # This function will return the first value that was captured if the field
127
+ # was captured in multiple lines
128
+ def first(field)
129
+ @attributes[field]
130
+ end
131
+
132
+ alias :[] :first
133
+
134
+ # Returns an array of all the "field" values that were captured for this request
135
+ def every(field)
136
+ @lines.inject([]) { |result, fields| result << fields[field] if fields.has_key?(field); result }
137
+ end
138
+
139
+ # Returns true if this request does not yet contain any parsed lines. This should only occur
140
+ # during parsing. An empty request should never be sent to the aggregators
141
+ def empty?
142
+ @lines.length == 0
143
+ end
144
+
145
+ # Checks whether this request is completed. A completed request contains both a parsed header
146
+ # line and a parsed footer line. Not that calling this function in single line mode will always
147
+ # return false.
148
+ def completed?
149
+ header_found, footer_found = false, false
150
+ @lines.each do |line|
151
+ line_def = file_format.line_definitions[line[:line_type]]
152
+ header_found = true if line_def.header
153
+ footer_found = true if line_def.footer
154
+ end
155
+ header_found && footer_found
156
+ end
157
+
158
+ # This function is called before a Requests is yielded.
159
+ def validate
160
+ end
161
+
162
+ # Returns the first timestamp encountered in a request.
163
+ def timestamp
164
+ first(:timestamp)
165
+ end
166
+
167
+ def first_lineno
168
+ @lines.map { |line| line[:lineno] }.reject { |v| v.nil? }.min
169
+ end
170
+
171
+ def last_lineno
172
+ @lines.map { |line| line[:lineno] }.reject { |v| v.nil? }.max
173
+ end
174
+ end
175
+ end
@@ -0,0 +1,72 @@
1
+ # The RequestLogAnalyzer::Source module contains all functionality that loads requests from a given source
2
+ # and feed them to the pipeline for further processing. The requests (see RequestLogAnalyzer::Request) that
3
+ # will be parsed from a source, will be piped throug filters (see RequestLogAnalyzer::Filter) and are then
4
+ # fed to an aggregator (see RequestLogAnalyzer::Aggregator). The source instance is thus the beginning of
5
+ # the RequestLogAnalyzer chain.
6
+ #
7
+ # - The base class for all sources is RequestLogAnalyzer::Source::Base. All source classes should inherit from this class.
8
+ # - Currently, RequestLogAnalyzer::Source::LogParser is the only implemented source.
9
+ module RequestLogAnalyzer::Source
10
+
11
+ # Loads constants that reside in the RequestLogAnalyzer::Source namespace. This function uses
12
+ # RequestLogAnalyzer::load_default_class_file to load the file in which the constant is declared.
13
+ # <tt>const</tt>:: The constant to load in the RequestLogAnalyzer::Source namespace.
14
+ def self.const_missing(const)
15
+ RequestLogAnalyzer::load_default_class_file(self, const)
16
+ end
17
+
18
+ # The base Source class. All other sources should inherit from this class.
19
+ #
20
+ # A source implememtation should at least implement the each_request method, which should yield
21
+ # RequestLogAnalyzer::Request instances that will be fed through the pipleine.
22
+ class Base
23
+
24
+ # A hash of options
25
+ attr_reader :options
26
+
27
+ # The current Request object that is being parsed
28
+ attr_reader :current_request
29
+
30
+ # The total number of parsed lines
31
+ attr_reader :parsed_lines
32
+
33
+ # The number of skipped lines because of warnings
34
+ attr_reader :skipped_lines
35
+
36
+ # The total number of parsed requests.
37
+ attr_reader :parsed_requests
38
+
39
+ # The total number of skipped requests because of filters.
40
+ attr_reader :skipped_requests
41
+
42
+ # The FileFormat instance that describes the format of this source.
43
+ attr_reader :file_format
44
+
45
+ # Initializer, which will register the file format and save any options given as a hash.
46
+ # <tt>format</tt>:: The file format instance
47
+ # <tt>options</tt>:: A hash of options that can be used by a specific Source implementation
48
+ def initialize(format, options = {})
49
+ @options = options
50
+ @file_format = format
51
+ end
52
+
53
+ # The prepare method is called before the RequestLogAnalyzer::Source::Base#each_request method is called.
54
+ # Use this method to implement any initialization that should occur before this source can produce Request
55
+ # instances.
56
+ def prepare
57
+ end
58
+
59
+ # This function is called to actually produce the requests that will be send into the pipeline.
60
+ # The implementation should yield instances of RequestLogAnalyzer::Request.
61
+ # <tt>options</tt>:: A Hash of options that can be used in the implementation.
62
+ def each_request(options = {}, &block) # :yields: request
63
+ return true
64
+ end
65
+
66
+ # This function is called after RequestLogAnalyzer::Source::Base#each_request finished. Any code to
67
+ # wrap up, free resources, etc. can be put in this method.
68
+ def finalize
69
+ end
70
+
71
+ end
72
+ end
@@ -0,0 +1,87 @@
1
+ require 'rubygems'
2
+ require 'activerecord'
3
+
4
+ module RequestLogAnalyzer::Source
5
+
6
+ # Active Resource hook
7
+ class Request < ActiveRecord::Base
8
+ has_many :completed_lines
9
+ has_many :processing_lines
10
+ def convert(file_format)
11
+ send_attributes = self.attributes
12
+ send_attributes.merge!(self.completed_lines.first.attributes) if self.completed_lines.first
13
+ send_attributes.merge!(self.processing_lines.first.attributes) if self.processing_lines.first
14
+ return RequestLogAnalyzer::Request.new(file_format, send_attributes)
15
+ end
16
+ end
17
+
18
+ class CompletedLine < ActiveRecord::Base
19
+ belongs_to :request
20
+ end
21
+
22
+ class ProcessingLine < ActiveRecord::Base
23
+ belongs_to :request
24
+ end
25
+
26
+ # The Database class gets log data from the database.
27
+ class DatabaseLoader < Base
28
+
29
+ attr_reader :source_files, :file_format, :requests
30
+
31
+ # Initializes the log file parser instance.
32
+ # It will apply the language specific FileFormat module to this instance. It will use the line
33
+ # definitions in this module to parse any input that it is given (see parse_io).
34
+ #
35
+ # <tt>format</tt>:: The current file format instance
36
+ # <tt>options</tt>:: A hash of options that are used by the parser
37
+ def initialize(format, options = {})
38
+ super(format, options)
39
+ @source_files = options[:source_files]
40
+ @parsed_requests = 0
41
+ @requests = []
42
+ end
43
+
44
+ # Reads the input, which can either be a file, sequence of files or STDIN to parse
45
+ # lines specified in the FileFormat. This lines will be combined into Request instances,
46
+ # that will be yielded. The actual parsing occurs in the parse_io method.
47
+ # <tt>options</tt>:: A Hash of options that will be pased to parse_io.
48
+ def each_request(options = {}, &block) # :yields: request
49
+ ActiveRecord::Base.establish_connection(:adapter => 'sqlite3', :database => @source_files)
50
+
51
+ @progress_handler.call(:started, @source_files) if @progress_handler
52
+ RequestLogAnalyzer::Source::Request.find(:all).each do |request|
53
+ @parsed_requests += 1
54
+ @progress_handler.call(:progress, @parsed_requests) if @progress_handler
55
+
56
+ yield request.convert(self.file_format)
57
+ end
58
+
59
+ @progress_handler.call(:finished, @source_files) if @progress_handler
60
+ end
61
+
62
+ # Add a block to this method to install a progress handler while parsing.
63
+ # <tt>proc</tt>:: The proc that will be called to handle progress update messages
64
+ def progress=(proc)
65
+ @progress_handler = proc
66
+ end
67
+
68
+ # Add a block to this method to install a warning handler while parsing,
69
+ # <tt>proc</tt>:: The proc that will be called to handle parse warning messages
70
+ def warning=(proc)
71
+ @warning_handler = proc
72
+ end
73
+
74
+ # This method is called by the parser if it encounteres any parsing problems.
75
+ # It will call the installed warning handler if any.
76
+ #
77
+ # By default, RequestLogAnalyzer::Controller will install a warning handler
78
+ # that will pass the warnings to each aggregator so they can do something useful
79
+ # with it.
80
+ #
81
+ # <tt>type</tt>:: The warning type (a Symbol)
82
+ # <tt>message</tt>:: A message explaining the warning
83
+ def warn(type, message)
84
+ @warning_handler.call(type, message, @current_io.lineno) if @warning_handler
85
+ end
86
+ end
87
+ end
@@ -0,0 +1,274 @@
1
+ module RequestLogAnalyzer::Source
2
+
3
+ # The LogParser class reads log data from a given source and uses a file format definition
4
+ # to parse all relevent information about requests from the file. A FileFormat module should
5
+ # be provided that contains the definitions of the lines that occur in the log data.
6
+ #
7
+ # De order in which lines occur is used to combine lines to a single request. If these lines
8
+ # are mixed, requests cannot be combined properly. This can be the case if data is written to
9
+ # the log file simultaneously by different mongrel processes. This problem is detected by the
10
+ # parser. It will emit warnings when this occurs. LogParser supports multiple parse strategies
11
+ # that deal differently with this problem.
12
+ class LogParser < Base
13
+
14
+ include Enumerable
15
+
16
+ # The default parse strategy that will be used to parse the input.
17
+ DEFAULT_PARSE_STRATEGY = 'assume-correct'
18
+
19
+ # All available parse strategies.
20
+ PARSE_STRATEGIES = ['cautious', 'assume-correct']
21
+
22
+ attr_reader :source_files, :current_file, :current_lineno
23
+
24
+ # Initializes the log file parser instance.
25
+ # It will apply the language specific FileFormat module to this instance. It will use the line
26
+ # definitions in this module to parse any input that it is given (see parse_io).
27
+ #
28
+ # <tt>format</tt>:: The current file format instance
29
+ # <tt>options</tt>:: A hash of options that are used by the parser
30
+ def initialize(format, options = {})
31
+ super(format, options)
32
+ @parsed_lines = 0
33
+ @parsed_requests = 0
34
+ @skipped_lines = 0
35
+ @skipped_requests = 0
36
+ @current_request = nil
37
+ @current_source = nil
38
+ @current_file = nil
39
+ @current_lineno = nil
40
+ @source_files = options[:source_files]
41
+ @progress_handler = nil
42
+
43
+ @options[:parse_strategy] ||= DEFAULT_PARSE_STRATEGY
44
+ raise "Unknown parse strategy" unless PARSE_STRATEGIES.include?(@options[:parse_strategy])
45
+ end
46
+
47
+ # Reads the input, which can either be a file, sequence of files or STDIN to parse
48
+ # lines specified in the FileFormat. This lines will be combined into Request instances,
49
+ # that will be yielded. The actual parsing occurs in the parse_io method.
50
+ # <tt>options</tt>:: A Hash of options that will be pased to parse_io.
51
+ def each_request(options = {}, &block) # :yields: :request, request
52
+
53
+ case @source_files
54
+ when IO
55
+ if @source_files == $stdin
56
+ puts "Parsing from the standard input. Press CTRL+C to finish." # FIXME: not here
57
+ end
58
+ parse_stream(@source_files, options, &block)
59
+ when String
60
+ parse_file(@source_files, options, &block)
61
+ when Array
62
+ parse_files(@source_files, options, &block)
63
+ else
64
+ raise "Unknown source provided"
65
+ end
66
+ end
67
+
68
+ # Make sure the Enumerable methods work as expected
69
+ alias_method :each, :each_request
70
+
71
+ # Parses a list of subsequent files of the same format, by calling parse_file for every
72
+ # file in the array.
73
+ # <tt>files</tt>:: The Array of files that should be parsed
74
+ # <tt>options</tt>:: A Hash of options that will be pased to parse_io.
75
+ def parse_files(files, options = {}, &block) # :yields: request
76
+ files.each { |file| parse_file(file, options, &block) }
77
+ end
78
+
79
+ # Check if a file has a compressed extention in the filename.
80
+ # If recognized, return the command string used to decompress the file
81
+ def decompress_file?(filename)
82
+ nice_command = "nice -n 5"
83
+
84
+ return "#{nice_command} gunzip -c -d #{filename}" if filename.match(/\.tar.gz$/) || filename.match(/\.tgz$/) || filename.match(/\.gz$/)
85
+ return "#{nice_command} bunzip2 -c -d #{filename}" if filename.match(/\.bz2$/)
86
+ return "#{nice_command} unzip -p #{filename}" if filename.match(/\.zip$/)
87
+
88
+ return ""
89
+ end
90
+
91
+ # Parses a log file. Creates an IO stream for the provided file, and sends it to parse_io for
92
+ # further handling. This method supports progress updates that can be used to display a progressbar
93
+ #
94
+ # If the logfile is compressed, it is uncompressed to stdout and read.
95
+ # TODO: Check if IO.popen encounters problems with the given command line.
96
+ # TODO: Fix progress bar that is broken for IO.popen, as it returns a single string.
97
+ #
98
+ # <tt>file</tt>:: The file that should be parsed.
99
+ # <tt>options</tt>:: A Hash of options that will be pased to parse_io.
100
+ def parse_file(file, options = {}, &block)
101
+
102
+ @current_source = File.expand_path(file)
103
+ @source_changes_handler.call(:started, @current_source) if @source_changes_handler
104
+
105
+ if decompress_file?(file).empty?
106
+
107
+ @progress_handler = @dormant_progress_handler
108
+ @progress_handler.call(:started, file) if @progress_handler
109
+
110
+ File.open(file, 'r') { |f| parse_io(f, options, &block) }
111
+
112
+ @progress_handler.call(:finished, file) if @progress_handler
113
+ @progress_handler = nil
114
+ else
115
+ IO.popen(decompress_file?(file), 'r') { |f| parse_io(f, options, &block) }
116
+ end
117
+
118
+ @source_changes_handler.call(:finished, @current_source) if @source_changes_handler
119
+
120
+ @current_source = nil
121
+
122
+ end
123
+
124
+ # Parses an IO stream. It will simply call parse_io. This function does not support progress updates
125
+ # because the length of a stream is not known.
126
+ # <tt>stream</tt>:: The IO stream that should be parsed.
127
+ # <tt>options</tt>:: A Hash of options that will be pased to parse_io.
128
+ def parse_stream(stream, options = {}, &block)
129
+ parse_io(stream, options, &block)
130
+ end
131
+
132
+ # This method loops over each line of the input stream. It will try to parse this line as any of
133
+ # the lines that are defined by the current file format (see RequestLogAnalyazer::FileFormat).
134
+ # It will then combine these parsed line into requests using heuristics. These requests (see
135
+ # RequestLogAnalyzer::Request) will then be yielded for further processing in the pipeline.
136
+ #
137
+ # - RequestLogAnalyzer::LineDefinition#matches is called to test if a line matches a line definition of the file format.
138
+ # - update_current_request is used to combine parsed lines into requests using heuristics.
139
+ # - The method will yield progress updates if a progress handler is installed using progress=
140
+ # - The method will yield parse warnings if a warning handler is installed using warning=
141
+ #
142
+ # <tt>io</tt>:: The IO instance to use as source
143
+ # <tt>options</tt>:: A hash of options that can be used by the parser.
144
+ def parse_io(io, options = {}, &block) # :yields: request
145
+ @current_lineno = 1
146
+ while line = io.gets
147
+ @progress_handler.call(:progress, io.pos) if @progress_handler && @current_lineno % 255 == 0
148
+
149
+ if request_data = file_format.parse_line(line) { |wt, message| warn(wt, message) }
150
+ @parsed_lines += 1
151
+ update_current_request(request_data.merge(:source => @current_source, :lineno => @current_lineno), &block)
152
+ end
153
+
154
+ @current_lineno += 1
155
+ end
156
+
157
+ warn(:unfinished_request_on_eof, "End of file reached, but last request was not completed!") unless @current_request.nil?
158
+ @current_lineno = nil
159
+ end
160
+
161
+ # Add a block to this method to install a progress handler while parsing.
162
+ # <tt>proc</tt>:: The proc that will be called to handle progress update messages
163
+ def progress=(proc)
164
+ @dormant_progress_handler = proc
165
+ end
166
+
167
+ # Add a block to this method to install a warning handler while parsing,
168
+ # <tt>proc</tt>:: The proc that will be called to handle parse warning messages
169
+ def warning=(proc)
170
+ @warning_handler = proc
171
+ end
172
+
173
+ # Add a block to this method to install a source change handler while parsing,
174
+ # <tt>proc</tt>:: The proc that will be called to handle source changes
175
+ def source_changes=(proc)
176
+ @source_changes_handler = proc
177
+ end
178
+
179
+ # This method is called by the parser if it encounteres any parsing problems.
180
+ # It will call the installed warning handler if any.
181
+ #
182
+ # By default, RequestLogAnalyzer::Controller will install a warning handler
183
+ # that will pass the warnings to each aggregator so they can do something useful
184
+ # with it.
185
+ #
186
+ # <tt>type</tt>:: The warning type (a Symbol)
187
+ # <tt>message</tt>:: A message explaining the warning
188
+ def warn(type, message)
189
+ @warning_handler.call(type, message, @current_lineno) if @warning_handler
190
+ end
191
+
192
+ protected
193
+
194
+ # Combines the different lines of a request into a single Request object. It will start a
195
+ # new request when a header line is encountered en will emit the request when a footer line
196
+ # is encountered.
197
+ #
198
+ # Combining the lines is done using heuristics. Problems can occur in this process. The
199
+ # current parse strategy defines how these cases are handled.
200
+ #
201
+ # When using the 'assume-correct' parse strategy (default):
202
+ # - Every line that is parsed before a header line is ignored as it cannot be included in
203
+ # any request. It will emit a :no_current_request warning.
204
+ # - If a header line is found before the previous requests was closed, the previous request
205
+ # will be yielded and a new request will be started.
206
+ #
207
+ # When using the 'cautious' parse strategy:
208
+ # - Every line that is parsed before a header line is ignored as it cannot be included in
209
+ # any request. It will emit a :no_current_request warning.
210
+ # - A header line that is parsed before a request is closed by a footer line, is a sign of
211
+ # an unproperly ordered file. All data that is gathered for the request until then is
212
+ # discarded and the next request is ignored as well. An :unclosed_request warning is
213
+ # emitted.
214
+ #
215
+ # <tt>request_data</tt>:: A hash of data that was parsed from the last line.
216
+ def update_current_request(request_data, &block) # :yields: request
217
+ if header_line?(request_data)
218
+ if @current_request
219
+ case options[:parse_strategy]
220
+ when 'assume-correct'
221
+ handle_request(@current_request, &block)
222
+ @current_request = @file_format.request(request_data)
223
+ when 'cautious'
224
+ @skipped_lines += 1
225
+ warn(:unclosed_request, "Encountered header line (#{request_data[:line_definition].name.inspect}), but previous request was not closed!")
226
+ @current_request = nil # remove all data that was parsed, skip next request as well.
227
+ end
228
+ elsif footer_line?(request_data)
229
+ handle_request(@file_format.request(request_data), &block)
230
+ else
231
+ @current_request = @file_format.request(request_data)
232
+ end
233
+ else
234
+ if @current_request
235
+ @current_request << request_data
236
+ if footer_line?(request_data)
237
+ handle_request(@current_request, &block) # yield @current_request
238
+ @current_request = nil
239
+ end
240
+ else
241
+ @skipped_lines += 1
242
+ warn(:no_current_request, "Parsebale line (#{request_data[:line_definition].name.inspect}) found outside of a request!")
243
+ end
244
+ end
245
+ end
246
+
247
+ # Handles the parsed request by sending it into the pipeline.
248
+ #
249
+ # - It will call RequestLogAnalyzer::Request#validate on the request instance
250
+ # - It will send the request into the pipeline, checking whether it was accepted by all the filters.
251
+ # - It will update the parsed_requests and skipped_requests variables accordingly
252
+ #
253
+ # <tt>request</tt>:: The parsed request instance (RequestLogAnalyzer::Request)
254
+ def handle_request(request, &block) # :yields: :request, request
255
+ @parsed_requests += 1
256
+ request.validate
257
+ accepted = block_given? ? yield(request) : true
258
+ @skipped_requests += 1 unless accepted
259
+ end
260
+
261
+ # Checks whether a given line hash is a header line according to the current file format.
262
+ # <tt>hash</tt>:: A hash of data that was parsed from the line.
263
+ def header_line?(hash)
264
+ hash[:line_definition].header
265
+ end
266
+
267
+ # Checks whether a given line hash is a footer line according to the current file format.
268
+ # <tt>hash</tt>:: A hash of data that was parsed from the line.
269
+ def footer_line?(hash)
270
+ hash[:line_definition].footer
271
+ end
272
+ end
273
+
274
+ end