apachecrunch 0.4 → 0.5

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,114 @@
1
+ require 'format_token_definition'
2
+ require 'derivation'
3
+
4
+ class ApacheCrunch
5
+ # Abstract for a token in a log format
6
+ class FormatToken
7
+ # Performs whatever initial population is necessary for the token.
8
+ def populate!; raise NotImplementedError; end
9
+
10
+ def name; raise NotImplementedError; end
11
+ def regex; raise NotImplementedError; end
12
+ def captured?; raise NotImplementedError; end
13
+ def derivation_rule; raise NotImplementedError; end
14
+ end
15
+
16
+
17
+ # A predefined token like %q or %r from the Apache log.
18
+ class PredefinedToken < FormatToken
19
+ def populate!(token_definition)
20
+ @token_definition = token_definition
21
+ end
22
+
23
+ def name; @token_definition.name; end
24
+ def regex; @token_definition.regex; end
25
+ def captured?; @token_definition.captured; end
26
+ def derivation_rule; @token_definition.derivation_rule; end
27
+ end
28
+
29
+
30
+ # A bare string in a log format.
31
+ class StringToken < FormatToken
32
+ # Initializes the instance given the string it represents
33
+ def populate!(string_value)
34
+ @_string_value = string_value
35
+ end
36
+
37
+ def name; nil; end
38
+
39
+ def regex
40
+ # Make sure there aren't any regex special characters in the string that will confuse
41
+ # the parsing later.
42
+ Regexp.escape(@_string_value)
43
+ end
44
+
45
+ def captured?; false; end
46
+ def derivation_rule; NullDerivationRule.new; end
47
+ end
48
+
49
+
50
+ # A token based on a request header.
51
+ class ReqheaderToken < FormatToken
52
+ def populate!(header_name)
53
+ @_name = _header_name_to_token_name(header_name)
54
+ end
55
+
56
+ def name; @_name; end
57
+ def regex; '[^"]*'; end
58
+ def captured?; true; end
59
+ def derivation_rule; NullDerivationRule.new; end
60
+
61
+ # Lowercases header name and turns hyphens into underscores
62
+ def _header_name_to_token_name(header_name)
63
+ ("reqheader_" + header_name.downcase().gsub("-", "_")).to_sym
64
+ end
65
+ end
66
+
67
+
68
+ # A token based on an arbitrary regular expression.
69
+ class RegexToken < FormatToken
70
+ def populate!(regex_name, regex_text)
71
+ @_name = "regex_#{regex_name}".to_sym
72
+ @_regex = regex_text
73
+ end
74
+
75
+ def name; @_name; end
76
+ def regex; @_regex; end
77
+ def captured?; true; end
78
+ def derivation_rule; NullDerivationRule.new; end
79
+ end
80
+
81
+
82
+ # Generates FormatToken instances.
83
+ #
84
+ # This class does the work of figuring out which FormatToken subclass to make.
85
+ class FormatTokenFactory
86
+ # Takes an Apache log format abbreviation and returns a corresponding FormatToken
87
+ def self.from_abbrev(abbrev)
88
+ token_def = TokenDictionary.fetch(abbrev)
89
+ if token_def
90
+ # We found it in the dictionary, so just return a Token based on it
91
+ tok = PredefinedToken.new
92
+ tok.populate!(token_def)
93
+ elsif abbrev !~ /^%/
94
+ tok = StringToken.new
95
+ tok.populate!(abbrev)
96
+ elsif abbrev == "%%"
97
+ tok = StringToken.new
98
+ tok.populate!("%")
99
+ elsif abbrev =~ /^%\{([A-Za-z0-9-]+)\}i/
100
+ # HTTP request header
101
+ tok = ReqheaderToken.new
102
+ tok.populate!($1)
103
+ elsif abbrev =~ /^%\{(.*?):([^}]+)\}r/
104
+ # Arbitrary regex
105
+ tok = RegexToken.new
106
+ tok.populate!($1, $2)
107
+ else
108
+ raise "Unable to parse format definition starting at '#{abbrev}'"
109
+ end
110
+
111
+ tok
112
+ end
113
+ end
114
+ end
@@ -0,0 +1,183 @@
1
+ require 'cast'
2
+ require 'derivation'
3
+
4
+ class ApacheCrunch
5
+ # Defines the properties of a known Apache log format token (like %q or %h)
6
+ class FormatTokenDefinition
7
+ class << self; attr_accessor :name, :abbrev, :regex, :caster, :derivation_rule, :captured; end
8
+ end
9
+
10
+
11
+ class RemoteHostTokenDefinition < FormatTokenDefinition
12
+ @name = :remote_host
13
+ @abbrev = "%h"
14
+ @regex = %q![A-Za-z0-9.-]+!
15
+ @caster = nil
16
+ @derivation_rule = NullDerivationRule.new
17
+ @captured = true
18
+ end
19
+
20
+
21
+ class LogNameTokenDefinition < FormatTokenDefinition
22
+ @name = :log_name
23
+ @abbrev = "%l"
24
+ @regex = %q!\S+!
25
+ @caster = nil
26
+ @derivation_rule = NullDerivationRule.new
27
+ @captured = true
28
+ end
29
+
30
+
31
+ class RemoteUserTokenDefinition < FormatTokenDefinition
32
+ @name = :remote_user
33
+ @abbrev = "%u"
34
+ @regex = %q![^:]+!
35
+ @caster = nil
36
+ @derivation_rule = NullDerivationRule.new
37
+ @captured = true
38
+ end
39
+
40
+
41
+ class TimeTokenDefinition < FormatTokenDefinition
42
+ @name = :time
43
+ @abbrev = "%t"
44
+ @regex = %q!\[\d\d/[A-Za-z]{3}/\d\d\d\d:\d\d:\d\d:\d\d [-+]\d\d\d\d\]!
45
+ @caster = nil
46
+ @derivation_rule = TimeDerivationRule.new
47
+ @captured = true
48
+ end
49
+
50
+
51
+ class ReqFirstlineTokenDefinition < FormatTokenDefinition
52
+ @name = :req_firstline
53
+ @abbrev = "%r"
54
+ @regex = %q![^"]+!
55
+ @caster = nil
56
+ @derivation_rule = ReqFirstlineDerivationRule.new
57
+ @captured = true
58
+ end
59
+
60
+
61
+ class StatusTokenDefinition < FormatTokenDefinition
62
+ @name = :status
63
+ @abbrev = "%s"
64
+ @regex = %q!\d+|-!
65
+ @caster = nil
66
+ @derivation_rule = NullDerivationRule.new
67
+ @captured = true
68
+ end
69
+
70
+
71
+ class BytesSentTokenDefinition < FormatTokenDefinition
72
+ @name = :bytes_sent
73
+ @abbrev = "%b"
74
+ @regex = %q!\d+!
75
+ @caster = IntegerCast.new
76
+ @derivation_rule = NullDerivationRule.new
77
+ @captured = true
78
+ end
79
+
80
+
81
+ class BytesSentTokenDefinition < FormatTokenDefinition
82
+ @name = :bytes_sent
83
+ @abbrev = "%b"
84
+ @regex = %q![\d-]+!
85
+ @caster = CLFIntegerCast.new
86
+ @derivation_rule = NullDerivationRule.new
87
+ @captured = true
88
+ end
89
+
90
+
91
+ class BytesSentWithHeadersTokenDefinition < FormatTokenDefinition
92
+ @name = :bytes_sent_with_headers
93
+ @abbrev = "%O"
94
+ @regex = %q!\d+!
95
+ @caster = IntegerCast.new
96
+ @derivation_rule = NullDerivationRule.new
97
+ @captured = true
98
+ end
99
+
100
+
101
+ class ServeTimeMicroTokenDefinition < FormatTokenDefinition
102
+ @name = :serve_time_micro
103
+ @abbrev = "%D"
104
+ @regex = %q!\d+!
105
+ @caster = IntegerCast.new
106
+ @derivation_rule = NullDerivationRule.new
107
+ @captured = true
108
+ end
109
+
110
+
111
+ class UrlPathTokenDefinition < FormatTokenDefinition
112
+ @name = :url_path
113
+ @abbrev = "%U"
114
+ @regex = %q!/[^?]*!
115
+ @caster = nil
116
+ @derivation_rule = NullDerivationRule.new
117
+ @captured = true
118
+ end
119
+
120
+
121
+ class QueryStringTokenDefinition < FormatTokenDefinition
122
+ @name = :query_string
123
+ @abbrev = "%q"
124
+ @regex = %q!\??\S*!
125
+ @caster = nil
126
+ @derivation_rule = NullDerivationRule.new
127
+ @captured = true
128
+ end
129
+
130
+
131
+ class ReqMethodTokenDefinition < FormatTokenDefinition
132
+ @name = :req_method
133
+ @abbrev = "%m"
134
+ @regex = %q![A-Z]+!
135
+ @caster = nil
136
+ @derivation_rule = NullDerivationRule.new
137
+ @captured = true
138
+ end
139
+
140
+
141
+ class ProtocolTokenDefinition < FormatTokenDefinition
142
+ @name = :protocol
143
+ @abbrev = "%H"
144
+ @regex = %q!\S+!
145
+ @caster = nil
146
+ @derivation_rule = NullDerivationRule.new
147
+ @captured = true
148
+ end
149
+
150
+
151
+ # Finds log format elements given information about them.
152
+ class TokenDictionary
153
+ @@_defs = [
154
+ RemoteHostTokenDefinition,
155
+ LogNameTokenDefinition,
156
+ RemoteUserTokenDefinition,
157
+ TimeTokenDefinition,
158
+ ReqFirstlineTokenDefinition,
159
+ StatusTokenDefinition,
160
+ BytesSentTokenDefinition,
161
+ BytesSentTokenDefinition,
162
+ BytesSentWithHeadersTokenDefinition,
163
+ ServeTimeMicroTokenDefinition,
164
+ UrlPathTokenDefinition,
165
+ QueryStringTokenDefinition,
166
+ ReqMethodTokenDefinition,
167
+ ProtocolTokenDefinition
168
+ ]
169
+
170
+ # Returns the FormatToken subclass with the given abbreviation.
171
+ #
172
+ # If none exists, returns nil.
173
+ def self.fetch(abbrev)
174
+ @@_defs.each do |token_def|
175
+ if token_def.abbrev == abbrev
176
+ return token_def
177
+ end
178
+ end
179
+
180
+ nil
181
+ end
182
+ end
183
+ end
data/lib/log_parser.rb CHANGED
@@ -2,54 +2,58 @@ class ApacheCrunch
2
2
  # Parses a log file given a path and a Format instance
3
3
  class LogParser
4
4
  # Initializes the parser with the path to a log file and a EntryParser.
5
- def initialize(path, entry_parser, file_cls=File)
6
- @path = path
7
- @entry_parser = entry_parser
5
+ def initialize(entry_parser)
6
+ @_entry_parser = entry_parser
7
+ @_log_file = nil
8
8
 
9
- @_file_cls = file_cls
10
- @_file = nil
9
+ @_File = File
11
10
  end
12
11
 
13
- # Returns the next entry in the log file as a hash, or nil if we've reached EOF.
14
- #
15
- # The keys of the hash are names of LogFormatElements (e.g. :remote_host,
16
- # :reqheader_referer)
17
- def next_entry
18
- @_file = @_file_cls.open(@path) if @_file.nil?
12
+ # Handles dependency injection
13
+ def dep_inject!(file_cls)
14
+ @_File = file_cls
15
+ end
19
16
 
20
- while line_text = @_file.gets
17
+ # Returns the next parsed line in the log file as an Entry, or nil if we've reached EOF.
18
+ def next_entry
19
+ while line_text = @_log_file.gets
20
+ # This is if we've reached EOF:
21
21
  return nil if line_text.nil?
22
- logline = @entry_parser.from_text(line_text)
23
22
 
23
+ entry = @_entry_parser.parse(@_format, line_text)
24
24
  # The EntryParser returns nil and writes a warning if the line text doesn't
25
25
  # match our expected format.
26
- next if logline.nil?
26
+ next if entry.nil?
27
27
 
28
- return logline
28
+ return entry
29
29
  end
30
30
  end
31
31
 
32
32
  # Resets the LogParser's filehandle so we can start over.
33
- def reset
34
- @_file = nil
33
+ def reset_file!
34
+ @_log_file.close
35
+ @_log_file = @_File.open(@_log_file.path)
35
36
  end
36
37
 
37
- # Makes the LogParser close its current log file and start parsing a new one instead
38
+ # Makes the LogParser start parsing a new log file
38
39
  #
39
40
  # `new_target` is a writable file object that the parser should start parsing, and if
40
- # in_place is true, we actually replace the contents of the current target with those
41
+ # `in_place` is true, we actually replace the contents of the current target with those
41
42
  # of the new target.
42
- def replace_target(new_target, in_place)
43
- new_target.close
43
+ def set_file!(new_file)
44
+ @_log_file.close unless @_log_file.nil?
45
+ @_log_file = new_file
46
+ end
44
47
 
45
- if in_place
46
- old_path = @_file.path
47
- @_file_cls.rename(new_target.path, old_path)
48
- else
49
- @path = new_target.path
50
- end
48
+ # Replaces the LogParser current file with another. Like, for real, on the filesystem.
49
+ def replace_file!(new_file)
50
+ @_log_file.close
51
+ @_File.rename(new_file.path, @_log_file.path)
52
+ @_log_file = @_File.open(@_log_file.path)
53
+ end
51
54
 
52
- @_file = nil
55
+ def set_format!(format)
56
+ @_format = format
53
57
  end
54
58
  end
55
59
 
@@ -64,11 +68,15 @@ class ApacheCrunch
64
68
  # First we generate a Format instance based on the format definition we were given
65
69
  log_format = FormatFactory.from_format_def(format_def)
66
70
 
67
- # Now we generate a line parser
68
- log_line_parser = EntryParser.new(log_format, progress_meter)
71
+ # Now we generate a parser for the individual entries
72
+ entry_parser = EntryParser.new
73
+ entry_parser.add_progress_meter!(progress_meter)
69
74
 
70
75
  # And now we can instantiate and return a LogParser
71
- return LogParser.new(path, log_line_parser)
76
+ log_parser = LogParser.new(entry_parser)
77
+ log_parser.set_file!(open(path, "r"))
78
+ log_parser.set_format!(log_format)
79
+ log_parser
72
80
  end
73
81
  end
74
82
  end
data/lib/procedure_dsl.rb CHANGED
@@ -1,308 +1,318 @@
1
- # Abstract for a procedure routine.
2
- class ProcedureRoutine
3
- def initialize(log_parser)
4
- @_log_parser = log_parser
5
- @_current_entry = nil
6
- end
1
+ require 'element_value_fetcher'
2
+
3
+ class ApacheCrunch
4
+ # Abstract for a procedure routine.
5
+ class ProcedureRoutine
6
+ def initialize(log_parser)
7
+ @_log_parser = log_parser
8
+ @_current_entry = nil
9
+ end
7
10
 
8
- # Allows blocks passed to a DSL routine to access parameters from the current log entry
9
- def method_missing(sym, *args)
10
- @_current_entry[sym]
11
- end
11
+ # Allows blocks passed to a DSL routine to access parameters from the current log entry
12
+ def method_missing(sym, *args)
13
+ @_current_entry.fetch(sym)
14
+ end
12
15
 
13
- # Executes the DSL routine using the given block
14
- #
15
- # Abstract method
16
- def execute(&blk)
17
- raise "Not implemented"
18
- end
16
+ # Executes the DSL routine using the given block
17
+ #
18
+ # Abstract method
19
+ def execute(&blk)
20
+ raise "Not implemented"
21
+ end
19
22
 
20
- # Anything that needs to happen after the routine completes but before it returns its
21
- # result can go in here.
22
- def finish
23
- @_log_parser.reset
23
+ # Anything that needs to happen after the routine completes but before it returns its
24
+ # result can go in here.
25
+ def finish
26
+ @_log_parser.reset_file!
27
+ end
24
28
  end
25
- end
26
29
 
27
30
 
28
- # DSL routine that returns the number of log entries where the block evaluates to true
29
- class CountWhere < ProcedureRoutine
30
- def execute(&blk)
31
- count = 0
32
- while @_current_entry = @_log_parser.next_entry
33
- if instance_eval(&blk)
34
- count += 1
31
+ # DSL routine that returns the number of log entries where the block evaluates to true
32
+ class CountWhere < ProcedureRoutine
33
+ def execute(&blk)
34
+ count = 0
35
+ while @_current_entry = @_log_parser.next_entry
36
+ if instance_eval(&blk)
37
+ count += 1
38
+ end
35
39
  end
40
+ count
36
41
  end
37
- count
38
42
  end
39
- end
40
43
 
41
44
 
42
- # DSL routine that executes the block for every log entry
43
- class Each < ProcedureRoutine
44
- def execute(&blk)
45
- while @_current_entry = @_log_parser.next_entry
46
- instance_eval(&blk)
45
+ # DSL routine that executes the block for every log entry
46
+ class Each < ProcedureRoutine
47
+ def execute(&blk)
48
+ while @_current_entry = @_log_parser.next_entry
49
+ instance_eval(&blk)
50
+ end
47
51
  end
48
52
  end
49
- end
50
53
 
51
54
 
52
- # DSL routine(s) that filter(s) for entries for which the given block evaluates to true
53
- #
54
- # This can be called as 'filter()', which means the filtering happens in a temporary file, or
55
- # as 'filter(path)', which means the filtering happens in the given file. It can also be called
56
- # as 'filter!()', which means the filtering happens in place, clobbering what's in apachecrunch's
57
- # target file.
58
- class Filter < ProcedureRoutine
59
- def execute(path=nil, in_place=false, &blk)
60
- @_in_place = in_place
61
- @_results_file = _make_results_file(path, in_place)
62
-
63
- while @_current_entry = @_log_parser.next_entry
64
- if instance_eval(&blk)
65
- @_results_file.write(@_current_entry[:text])
55
+ # DSL routine(s) that filter(s) for entries for which the given block evaluates to true
56
+ #
57
+ # This can be called as 'filter()', which means the filtering happens in a temporary file, or
58
+ # as 'filter(path)', which means the filtering happens in the given file. It can also be called
59
+ # as 'filter!()', which means the filtering happens in place, clobbering what's in apachecrunch's
60
+ # target file.
61
+ class Filter < ProcedureRoutine
62
+ def execute(path=nil, in_place=false, &blk)
63
+ @_in_place = in_place
64
+ @_results_file = _make_results_file(path, in_place)
65
+
66
+ while @_current_entry = @_log_parser.next_entry
67
+ if instance_eval(&blk)
68
+ @_results_file.write(@_current_entry.fetch(:text))
69
+ end
66
70
  end
67
71
  end
68
- end
69
72
 
70
- def finish
71
- @_log_parser.replace_target(@_results_file, @_in_place)
72
- end
73
+ def finish
74
+ @_results_file.close
75
+ @_results_file = open(@_results_file.path)
76
+ if @_in_place
77
+ @_log_parser.replace_file!(@_results_file)
78
+ else
79
+ @_log_parser.set_file!(@_results_file)
80
+ end
81
+ end
73
82
 
74
- # Returns a writable file object to which the results of the filter should be written.
75
- def _make_results_file(path, in_place)
76
- if path.nil?
77
- # If no path passed (this includes the case where the filter is being performed
78
- # in place), we want a temp file.
79
- return Tempfile.new("apachecrunch")
80
- else
81
- return open(path, "w")
83
+ # Returns a writable file object to which the results of the filter should be written.
84
+ def _make_results_file(path, in_place)
85
+ if path.nil?
86
+ # If no path passed (this includes the case where the filter is being performed
87
+ # in place), we want a temp file.
88
+ return Tempfile.new("apachecrunch")
89
+ else
90
+ return open(path, "w")
91
+ end
82
92
  end
83
93
  end
84
- end
85
94
 
86
- # DSL routine that returns the count of entries with each found value of the given block
87
- #
88
- # You might for instance run this with the block { status }, and you'd get back something like
89
- # {"200" => 941, "301" => 41, "404" => 2, "500" => 0}
90
- class CountBy < ProcedureRoutine
91
- def execute(&blk)
92
- counts = {}
93
- while @_current_entry = @_log_parser.next_entry
94
- val = instance_eval(&blk)
95
- if counts.key?(val)
96
- counts[val] += 1
97
- else
98
- counts[val] = 1
95
+ # DSL routine that returns the count of entries with each found value of the given block
96
+ #
97
+ # You might for instance run this with the block { status }, and you'd get back something like
98
+ # {"200" => 941, "301" => 41, "404" => 2, "500" => 0}
99
+ class CountBy < ProcedureRoutine
100
+ def execute(&blk)
101
+ counts = {}
102
+ while @_current_entry = @_log_parser.next_entry
103
+ val = instance_eval(&blk)
104
+ if counts.key?(val)
105
+ counts[val] += 1
106
+ else
107
+ counts[val] = 1
108
+ end
99
109
  end
110
+ return counts
100
111
  end
101
- return counts
102
112
  end
103
- end
104
113
 
105
114
 
106
- # DSL routine that finds the distribution of (numeric) values to which the given block evaluates
107
- #
108
- # For example,
109
- #
110
- # distribution 100 do
111
- # bytes_sent
112
- # end
113
- #
114
- # would return a hash with keys from 0 up by multiples of 100, the value of each being the number
115
- # of entries for which bytes_sent is between that key and the next key.
116
- class Distribution < ProcedureRoutine
117
- def execute(bucket_width, &blk)
118
- dist = {}
119
- while @_current_entry = @_log_parser.next_entry
120
- val = instance_eval(&blk)
121
- k = _key_for(val, bucket_width)
122
- if dist.key?(k)
123
- dist[k] += 1
124
- else
125
- dist[k] = 1
115
+ # DSL routine that finds the distribution of (numeric) values to which the given block evaluates
116
+ #
117
+ # For example,
118
+ #
119
+ # distribution 100 do
120
+ # bytes_sent
121
+ # end
122
+ #
123
+ # would return a hash with keys from 0 up by multiples of 100, the value of each being the number
124
+ # of entries for which bytes_sent is between that key and the next key.
125
+ class Distribution < ProcedureRoutine
126
+ def execute(bucket_width, &blk)
127
+ dist = {}
128
+ while @_current_entry = @_log_parser.next_entry
129
+ val = instance_eval(&blk)
130
+ k = _key_for(val, bucket_width)
131
+ if dist.key?(k)
132
+ dist[k] += 1
133
+ else
134
+ dist[k] = 1
135
+ end
126
136
  end
127
- end
128
137
 
129
- # Backfill keys for which we didn't find a value
130
- 0.step(dist.keys.max, bucket_width).each do |k|
131
- dist[k] = 0 unless dist.key?(k)
138
+ # Backfill keys for which we didn't find a value
139
+ 0.step(dist.keys.max, bucket_width).each do |k|
140
+ dist[k] = 0 unless dist.key?(k)
141
+ end
142
+
143
+ dist
132
144
  end
133
145
 
134
- dist
146
+ # Determines the key for the distribution hash given the value and step
147
+ def _key_for(val, bucket_width)
148
+ (val.to_i / bucket_width) * bucket_width
149
+ end
135
150
  end
136
151
 
137
- # Determines the key for the distribution hash given the value and step
138
- def _key_for(val, bucket_width)
139
- (val.to_i / bucket_width) * bucket_width
140
- end
141
- end
142
152
 
153
+ # Same as Distribution, but the buckets get expenentially wider
154
+ class LogDistribution < ProcedureRoutine
155
+ def execute(width_base, &blk)
156
+ dist = {}
157
+ while @_current_entry = @_log_parser.next_entry
158
+ val = instance_eval(&blk)
159
+ k = _key_for(val, width_base)
160
+ if dist.key?(k)
161
+ dist[k] += 1
162
+ else
163
+ dist[k] = 1
164
+ end
165
+ end
143
166
 
144
- # Same as Distribution, but the buckets get expenentially wider
145
- class LogDistribution < ProcedureRoutine
146
- def execute(width_base, &blk)
147
- dist = {}
148
- while @_current_entry = @_log_parser.next_entry
149
- val = instance_eval(&blk)
150
- k = _key_for(val, width_base)
151
- if dist.key?(k)
152
- dist[k] += 1
153
- else
154
- dist[k] = 1
167
+ # Backfill keys for which we didn't find a value
168
+ k = dist.keys.min
169
+ max_key = dist.keys.max
170
+ while k *= width_base and k < max_key
171
+ dist[k] = 0 unless dist.key?(k)
155
172
  end
156
- end
157
173
 
158
- # Backfill keys for which we didn't find a value
159
- k = dist.keys.min
160
- max_key = dist.keys.max
161
- while k *= width_base and k < max_key
162
- dist[k] = 0 unless dist.key?(k)
174
+ dist
163
175
  end
164
176
 
165
- dist
166
- end
167
-
168
- # Determines the key for the distribution hash given the value and logarithmic base for
169
- # the bucket width
170
- def _key_for(val, width_base)
171
- exp = (Math.log(val) / Math.log(width_base)).to_i
172
- width_base ** exp
177
+ # Determines the key for the distribution hash given the value and logarithmic base for
178
+ # the bucket width
179
+ def _key_for(val, width_base)
180
+ exp = (Math.log(val) / Math.log(width_base)).to_i
181
+ width_base ** exp
182
+ end
173
183
  end
174
- end
175
184
 
176
185
 
177
- # DSL routine that determines a confidence interval for the values to which the block evaluates
178
- #
179
- # For example,
180
- #
181
- # confidence_interval 95 do
182
- # time_to_serve
183
- # end
184
- #
185
- # would return two numbers, the lower and upper bound of a 95% confidence interval for the values
186
- # of time_to_serve.
187
- class ConfidenceInterval < ProcedureRoutine
188
- def execute(confidence, &blk)
189
- # Build a list of all the values found
190
- values = []
191
- while @_current_entry = @_log_parser.next_entry
192
- values << instance_eval(&blk)
193
- end
194
- values.sort!
186
+ # DSL routine that determines a confidence interval for the values to which the block evaluates
187
+ #
188
+ # For example,
189
+ #
190
+ # confidence_interval 95 do
191
+ # time_to_serve
192
+ # end
193
+ #
194
+ # would return two numbers, the lower and upper bound of a 95% confidence interval for the values
195
+ # of time_to_serve.
196
+ class ConfidenceInterval < ProcedureRoutine
197
+ def execute(confidence, &blk)
198
+ # Build a list of all the values found
199
+ values = []
200
+ while @_current_entry = @_log_parser.next_entry
201
+ values << instance_eval(&blk)
202
+ end
203
+ values.sort!
195
204
 
196
- # Determine how many values are outside the bounds of the CI
197
- count_outside = (values.length * (1.0 - confidence/100.0)).to_i
205
+ # Determine how many values are outside the bounds of the CI
206
+ count_outside = (values.length * (1.0 - confidence/100.0)).to_i
198
207
 
199
- # Find the bounds of the confidence interval
200
- return values[count_outside / 2], values[-count_outside / 2]
208
+ # Find the bounds of the confidence interval
209
+ return values[count_outside / 2], values[-count_outside / 2]
210
+ end
201
211
  end
202
- end
203
-
204
212
 
205
- # DSL routine that finds the most common n values for the given block.
206
- #
207
- # Returns a list of lists, each of which is [value, count]. This list is sorted by count.
208
- class MostCommon < ProcedureRoutine
209
- def execute(n, &blk)
210
- counts = CountBy.new(@_log_parser).execute(&blk)
211
213
 
212
- # Sort the block values descending
213
- sorted_vals = counts.keys.sort do |val_a,val_b|
214
- - (counts[val_a] <=> counts[val_b])
215
- end
214
+ # DSL routine that finds the most common n values for the given block.
215
+ #
216
+ # Returns a list of lists, each of which is [value, count]. This list is sorted by count.
217
+ class MostCommon < ProcedureRoutine
218
+ def execute(n, &blk)
219
+ counts = CountBy.new(@_log_parser).execute(&blk)
220
+
221
+ # Sort the block values descending
222
+ sorted_vals = counts.keys.sort do |val_a,val_b|
223
+ - (counts[val_a] <=> counts[val_b])
224
+ end
216
225
 
217
- sorted_vals[0..n].map do |val|
218
- [val, counts[val]]
226
+ sorted_vals[0..n].map do |val|
227
+ [val, counts[val]]
228
+ end
219
229
  end
220
230
  end
221
- end
222
231
 
223
232
 
224
- # The environment in which a procedure file is evaluated.
225
- #
226
- # A procedure file is some ruby code that uses our DSL.
227
- class ProcedureEnvironment
228
- def initialize(log_parser)
229
- @_log_parser = log_parser
230
- end
233
+ # The environment in which a procedure file is evaluated.
234
+ #
235
+ # A procedure file is some ruby code that uses our DSL.
236
+ class ProcedureEnvironment
237
+ def initialize(log_parser)
238
+ @_log_parser = log_parser
239
+ end
231
240
 
232
- # Evaluates the given string as a procedure in our DSL
233
- def eval_procedure(proc_string)
234
- eval proc_string
235
- end
241
+ # Evaluates the given string as a procedure in our DSL
242
+ def eval_procedure(proc_string)
243
+ eval proc_string
244
+ end
236
245
 
237
- # DSL routine 'count_where'
238
- def count_where(&blk)
239
- routine = CountWhere.new(@_log_parser)
240
- rv = routine.execute(&blk)
241
- routine.finish
242
- rv
243
- end
246
+ # DSL routine 'count_where'
247
+ def count_where(&blk)
248
+ routine = CountWhere.new(@_log_parser)
249
+ rv = routine.execute(&blk)
250
+ routine.finish
251
+ rv
252
+ end
244
253
 
245
- # DSL routine 'filter!'
246
- def filter!(&blk)
247
- routine = Filter.new(@_log_parser)
248
- routine.execute(nil, true, &blk)
249
- routine.finish
250
- nil
251
- end
254
+ # DSL routine 'filter!'
255
+ def filter!(&blk)
256
+ routine = Filter.new(@_log_parser)
257
+ routine.execute(nil, true, &blk)
258
+ routine.finish
259
+ nil
260
+ end
252
261
 
253
- # DSL routine 'filter'
254
- def filter(target_path=nil, &blk)
255
- routine = Filter.new(@_log_parser)
256
- routine.execute(target_path, &blk)
257
- routine.finish
258
- nil
259
- end
262
+ # DSL routine 'filter'
263
+ def filter(target_path=nil, &blk)
264
+ routine = Filter.new(@_log_parser)
265
+ routine.execute(target_path, &blk)
266
+ routine.finish
267
+ nil
268
+ end
260
269
 
261
- # DSL routine 'each'
262
- def each(&blk)
263
- routine = Each.new(@_log_parser)
264
- routine.execute(&blk)
265
- routine.finish
266
- nil
267
- end
270
+ # DSL routine 'each'
271
+ def each(&blk)
272
+ routine = Each.new(@_log_parser)
273
+ routine.execute(&blk)
274
+ routine.finish
275
+ nil
276
+ end
268
277
 
269
- # DSL routine 'count_by'
270
- def count_by(&blk)
271
- routine = CountBy.new(@_log_parser)
272
- rv = routine.execute(&blk)
273
- routine.finish
274
- rv
275
- end
278
+ # DSL routine 'count_by'
279
+ def count_by(&blk)
280
+ routine = CountBy.new(@_log_parser)
281
+ rv = routine.execute(&blk)
282
+ routine.finish
283
+ rv
284
+ end
276
285
 
277
- # DSL routine 'distribution'
278
- def distribution(bucket_width, &blk)
279
- routine = Distribution.new(@_log_parser)
280
- rv = routine.execute(bucket_width, &blk)
281
- routine.finish
282
- rv
283
- end
286
+ # DSL routine 'distribution'
287
+ def distribution(bucket_width, &blk)
288
+ routine = Distribution.new(@_log_parser)
289
+ rv = routine.execute(bucket_width, &blk)
290
+ routine.finish
291
+ rv
292
+ end
284
293
 
285
- # DSL routine 'log_distribution'
286
- def log_distribution(width_base, &blk)
287
- routine = LogDistribution.new(@_log_parser)
288
- rv = routine.execute(width_base, &blk)
289
- routine.finish
290
- rv
291
- end
294
+ # DSL routine 'log_distribution'
295
+ def log_distribution(width_base, &blk)
296
+ routine = LogDistribution.new(@_log_parser)
297
+ rv = routine.execute(width_base, &blk)
298
+ routine.finish
299
+ rv
300
+ end
292
301
 
293
- # DSL routine 'confidence_interval'
294
- def confidence_interval(confidence, &blk)
295
- routine = ConfidenceInterval.new(@_log_parser)
296
- rv = routine.execute(confidence, &blk)
297
- routine.finish
298
- rv
299
- end
302
+ # DSL routine 'confidence_interval'
303
+ def confidence_interval(confidence, &blk)
304
+ routine = ConfidenceInterval.new(@_log_parser)
305
+ rv = routine.execute(confidence, &blk)
306
+ routine.finish
307
+ rv
308
+ end
300
309
 
301
- # DSL routine 'most_common'
302
- def most_common(n, &blk)
303
- routine = MostCommon.new(@_log_parser)
304
- rv = routine.execute(n, &blk)
305
- routine.finish
306
- rv
310
+ # DSL routine 'most_common'
311
+ def most_common(n, &blk)
312
+ routine = MostCommon.new(@_log_parser)
313
+ rv = routine.execute(n, &blk)
314
+ routine.finish
315
+ rv
316
+ end
307
317
  end
308
318
  end