apachecrunch 0.4 → 0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,114 @@
1
+ require 'format_token_definition'
2
+ require 'derivation'
3
+
4
+ class ApacheCrunch
5
+ # Abstract for a token in a log format
6
+ class FormatToken
7
+ # Performs whatever initial population is necessary for the token.
8
+ def populate!; raise NotImplementedError; end
9
+
10
+ def name; raise NotImplementedError; end
11
+ def regex; raise NotImplementedError; end
12
+ def captured?; raise NotImplementedError; end
13
+ def derivation_rule; raise NotImplementedError; end
14
+ end
15
+
16
+
17
+ # A predefined token like %q or %r from the Apache log.
18
+ class PredefinedToken < FormatToken
19
+ def populate!(token_definition)
20
+ @token_definition = token_definition
21
+ end
22
+
23
+ def name; @token_definition.name; end
24
+ def regex; @token_definition.regex; end
25
+ def captured?; @token_definition.captured; end
26
+ def derivation_rule; @token_definition.derivation_rule; end
27
+ end
28
+
29
+
30
+ # A bare string in a log format.
31
+ class StringToken < FormatToken
32
+ # Initializes the instance given the string it represents
33
+ def populate!(string_value)
34
+ @_string_value = string_value
35
+ end
36
+
37
+ def name; nil; end
38
+
39
+ def regex
40
+ # Make sure there aren't any regex special characters in the string that will confuse
41
+ # the parsing later.
42
+ Regexp.escape(@_string_value)
43
+ end
44
+
45
+ def captured?; false; end
46
+ def derivation_rule; NullDerivationRule.new; end
47
+ end
48
+
49
+
50
+ # A token based on a request header.
51
+ class ReqheaderToken < FormatToken
52
+ def populate!(header_name)
53
+ @_name = _header_name_to_token_name(header_name)
54
+ end
55
+
56
+ def name; @_name; end
57
+ def regex; '[^"]*'; end
58
+ def captured?; true; end
59
+ def derivation_rule; NullDerivationRule.new; end
60
+
61
+ # Lowercases header name and turns hyphens into underscores
62
+ def _header_name_to_token_name(header_name)
63
+ ("reqheader_" + header_name.downcase().gsub("-", "_")).to_sym
64
+ end
65
+ end
66
+
67
+
68
+ # A token based on an arbitrary regular expression.
69
+ class RegexToken < FormatToken
70
+ def populate!(regex_name, regex_text)
71
+ @_name = "regex_#{regex_name}".to_sym
72
+ @_regex = regex_text
73
+ end
74
+
75
+ def name; @_name; end
76
+ def regex; @_regex; end
77
+ def captured?; true; end
78
+ def derivation_rule; NullDerivationRule.new; end
79
+ end
80
+
81
+
82
+ # Generates FormatToken instances.
83
+ #
84
+ # This class does the work of figuring out which FormatToken subclass to make.
85
+ class FormatTokenFactory
86
+ # Takes an Apache log format abbreviation and returns a corresponding FormatToken
87
+ def self.from_abbrev(abbrev)
88
+ token_def = TokenDictionary.fetch(abbrev)
89
+ if token_def
90
+ # We found it in the dictionary, so just return a Token based on it
91
+ tok = PredefinedToken.new
92
+ tok.populate!(token_def)
93
+ elsif abbrev !~ /^%/
94
+ tok = StringToken.new
95
+ tok.populate!(abbrev)
96
+ elsif abbrev == "%%"
97
+ tok = StringToken.new
98
+ tok.populate!("%")
99
+ elsif abbrev =~ /^%\{([A-Za-z0-9-]+)\}i/
100
+ # HTTP request header
101
+ tok = ReqheaderToken.new
102
+ tok.populate!($1)
103
+ elsif abbrev =~ /^%\{(.*?):([^}]+)\}r/
104
+ # Arbitrary regex
105
+ tok = RegexToken.new
106
+ tok.populate!($1, $2)
107
+ else
108
+ raise "Unable to parse format definition starting at '#{abbrev}'"
109
+ end
110
+
111
+ tok
112
+ end
113
+ end
114
+ end
@@ -0,0 +1,183 @@
1
+ require 'cast'
2
+ require 'derivation'
3
+
4
+ class ApacheCrunch
5
+ # Defines the properties of a known Apache log format token (like %q or %h)
6
+ class FormatTokenDefinition
7
+ class << self; attr_accessor :name, :abbrev, :regex, :caster, :derivation_rule, :captured; end
8
+ end
9
+
10
+
11
+ class RemoteHostTokenDefinition < FormatTokenDefinition
12
+ @name = :remote_host
13
+ @abbrev = "%h"
14
+ @regex = %q![A-Za-z0-9.-]+!
15
+ @caster = nil
16
+ @derivation_rule = NullDerivationRule.new
17
+ @captured = true
18
+ end
19
+
20
+
21
+ class LogNameTokenDefinition < FormatTokenDefinition
22
+ @name = :log_name
23
+ @abbrev = "%l"
24
+ @regex = %q!\S+!
25
+ @caster = nil
26
+ @derivation_rule = NullDerivationRule.new
27
+ @captured = true
28
+ end
29
+
30
+
31
+ class RemoteUserTokenDefinition < FormatTokenDefinition
32
+ @name = :remote_user
33
+ @abbrev = "%u"
34
+ @regex = %q![^:]+!
35
+ @caster = nil
36
+ @derivation_rule = NullDerivationRule.new
37
+ @captured = true
38
+ end
39
+
40
+
41
+ class TimeTokenDefinition < FormatTokenDefinition
42
+ @name = :time
43
+ @abbrev = "%t"
44
+ @regex = %q!\[\d\d/[A-Za-z]{3}/\d\d\d\d:\d\d:\d\d:\d\d [-+]\d\d\d\d\]!
45
+ @caster = nil
46
+ @derivation_rule = TimeDerivationRule.new
47
+ @captured = true
48
+ end
49
+
50
+
51
+ class ReqFirstlineTokenDefinition < FormatTokenDefinition
52
+ @name = :req_firstline
53
+ @abbrev = "%r"
54
+ @regex = %q![^"]+!
55
+ @caster = nil
56
+ @derivation_rule = ReqFirstlineDerivationRule.new
57
+ @captured = true
58
+ end
59
+
60
+
61
+ class StatusTokenDefinition < FormatTokenDefinition
62
+ @name = :status
63
+ @abbrev = "%s"
64
+ @regex = %q!\d+|-!
65
+ @caster = nil
66
+ @derivation_rule = NullDerivationRule.new
67
+ @captured = true
68
+ end
69
+
70
+
71
+ class BytesSentTokenDefinition < FormatTokenDefinition
72
+ @name = :bytes_sent
73
+ @abbrev = "%b"
74
+ @regex = %q!\d+!
75
+ @caster = IntegerCast.new
76
+ @derivation_rule = NullDerivationRule.new
77
+ @captured = true
78
+ end
79
+
80
+
81
+ class BytesSentTokenDefinition < FormatTokenDefinition
82
+ @name = :bytes_sent
83
+ @abbrev = "%b"
84
+ @regex = %q![\d-]+!
85
+ @caster = CLFIntegerCast.new
86
+ @derivation_rule = NullDerivationRule.new
87
+ @captured = true
88
+ end
89
+
90
+
91
+ class BytesSentWithHeadersTokenDefinition < FormatTokenDefinition
92
+ @name = :bytes_sent_with_headers
93
+ @abbrev = "%O"
94
+ @regex = %q!\d+!
95
+ @caster = IntegerCast.new
96
+ @derivation_rule = NullDerivationRule.new
97
+ @captured = true
98
+ end
99
+
100
+
101
+ class ServeTimeMicroTokenDefinition < FormatTokenDefinition
102
+ @name = :serve_time_micro
103
+ @abbrev = "%D"
104
+ @regex = %q!\d+!
105
+ @caster = IntegerCast.new
106
+ @derivation_rule = NullDerivationRule.new
107
+ @captured = true
108
+ end
109
+
110
+
111
+ class UrlPathTokenDefinition < FormatTokenDefinition
112
+ @name = :url_path
113
+ @abbrev = "%U"
114
+ @regex = %q!/[^?]*!
115
+ @caster = nil
116
+ @derivation_rule = NullDerivationRule.new
117
+ @captured = true
118
+ end
119
+
120
+
121
+ class QueryStringTokenDefinition < FormatTokenDefinition
122
+ @name = :query_string
123
+ @abbrev = "%q"
124
+ @regex = %q!\??\S*!
125
+ @caster = nil
126
+ @derivation_rule = NullDerivationRule.new
127
+ @captured = true
128
+ end
129
+
130
+
131
+ class ReqMethodTokenDefinition < FormatTokenDefinition
132
+ @name = :req_method
133
+ @abbrev = "%m"
134
+ @regex = %q![A-Z]+!
135
+ @caster = nil
136
+ @derivation_rule = NullDerivationRule.new
137
+ @captured = true
138
+ end
139
+
140
+
141
+ class ProtocolTokenDefinition < FormatTokenDefinition
142
+ @name = :protocol
143
+ @abbrev = "%H"
144
+ @regex = %q!\S+!
145
+ @caster = nil
146
+ @derivation_rule = NullDerivationRule.new
147
+ @captured = true
148
+ end
149
+
150
+
151
+ # Finds log format elements given information about them.
152
+ class TokenDictionary
153
+ @@_defs = [
154
+ RemoteHostTokenDefinition,
155
+ LogNameTokenDefinition,
156
+ RemoteUserTokenDefinition,
157
+ TimeTokenDefinition,
158
+ ReqFirstlineTokenDefinition,
159
+ StatusTokenDefinition,
160
+ BytesSentTokenDefinition,
161
+ BytesSentTokenDefinition,
162
+ BytesSentWithHeadersTokenDefinition,
163
+ ServeTimeMicroTokenDefinition,
164
+ UrlPathTokenDefinition,
165
+ QueryStringTokenDefinition,
166
+ ReqMethodTokenDefinition,
167
+ ProtocolTokenDefinition
168
+ ]
169
+
170
+ # Returns the FormatToken subclass with the given abbreviation.
171
+ #
172
+ # If none exists, returns nil.
173
+ def self.fetch(abbrev)
174
+ @@_defs.each do |token_def|
175
+ if token_def.abbrev == abbrev
176
+ return token_def
177
+ end
178
+ end
179
+
180
+ nil
181
+ end
182
+ end
183
+ end
data/lib/log_parser.rb CHANGED
@@ -2,54 +2,58 @@ class ApacheCrunch
2
2
  # Parses a log file given a path and a Format instance
3
3
  class LogParser
4
4
  # Initializes the parser with the path to a log file and a EntryParser.
5
- def initialize(path, entry_parser, file_cls=File)
6
- @path = path
7
- @entry_parser = entry_parser
5
+ def initialize(entry_parser)
6
+ @_entry_parser = entry_parser
7
+ @_log_file = nil
8
8
 
9
- @_file_cls = file_cls
10
- @_file = nil
9
+ @_File = File
11
10
  end
12
11
 
13
- # Returns the next entry in the log file as a hash, or nil if we've reached EOF.
14
- #
15
- # The keys of the hash are names of LogFormatElements (e.g. :remote_host,
16
- # :reqheader_referer)
17
- def next_entry
18
- @_file = @_file_cls.open(@path) if @_file.nil?
12
+ # Handles dependency injection
13
+ def dep_inject!(file_cls)
14
+ @_File = file_cls
15
+ end
19
16
 
20
- while line_text = @_file.gets
17
+ # Returns the next parsed line in the log file as an Entry, or nil if we've reached EOF.
18
+ def next_entry
19
+ while line_text = @_log_file.gets
20
+ # This is if we've reached EOF:
21
21
  return nil if line_text.nil?
22
- logline = @entry_parser.from_text(line_text)
23
22
 
23
+ entry = @_entry_parser.parse(@_format, line_text)
24
24
  # The EntryParser returns nil and writes a warning if the line text doesn't
25
25
  # match our expected format.
26
- next if logline.nil?
26
+ next if entry.nil?
27
27
 
28
- return logline
28
+ return entry
29
29
  end
30
30
  end
31
31
 
32
32
  # Resets the LogParser's filehandle so we can start over.
33
- def reset
34
- @_file = nil
33
+ def reset_file!
34
+ @_log_file.close
35
+ @_log_file = @_File.open(@_log_file.path)
35
36
  end
36
37
 
37
- # Makes the LogParser close its current log file and start parsing a new one instead
38
+ # Makes the LogParser start parsing a new log file
38
39
  #
39
40
  # `new_target` is a writable file object that the parser should start parsing, and if
40
- # in_place is true, we actually replace the contents of the current target with those
41
+ # `in_place` is true, we actually replace the contents of the current target with those
41
42
  # of the new target.
42
- def replace_target(new_target, in_place)
43
- new_target.close
43
+ def set_file!(new_file)
44
+ @_log_file.close unless @_log_file.nil?
45
+ @_log_file = new_file
46
+ end
44
47
 
45
- if in_place
46
- old_path = @_file.path
47
- @_file_cls.rename(new_target.path, old_path)
48
- else
49
- @path = new_target.path
50
- end
48
+ # Replaces the LogParser current file with another. Like, for real, on the filesystem.
49
+ def replace_file!(new_file)
50
+ @_log_file.close
51
+ @_File.rename(new_file.path, @_log_file.path)
52
+ @_log_file = @_File.open(@_log_file.path)
53
+ end
51
54
 
52
- @_file = nil
55
+ def set_format!(format)
56
+ @_format = format
53
57
  end
54
58
  end
55
59
 
@@ -64,11 +68,15 @@ class ApacheCrunch
64
68
  # First we generate a Format instance based on the format definition we were given
65
69
  log_format = FormatFactory.from_format_def(format_def)
66
70
 
67
- # Now we generate a line parser
68
- log_line_parser = EntryParser.new(log_format, progress_meter)
71
+ # Now we generate a parser for the individual entries
72
+ entry_parser = EntryParser.new
73
+ entry_parser.add_progress_meter!(progress_meter)
69
74
 
70
75
  # And now we can instantiate and return a LogParser
71
- return LogParser.new(path, log_line_parser)
76
+ log_parser = LogParser.new(entry_parser)
77
+ log_parser.set_file!(open(path, "r"))
78
+ log_parser.set_format!(log_format)
79
+ log_parser
72
80
  end
73
81
  end
74
82
  end
data/lib/procedure_dsl.rb CHANGED
@@ -1,308 +1,318 @@
1
- # Abstract for a procedure routine.
2
- class ProcedureRoutine
3
- def initialize(log_parser)
4
- @_log_parser = log_parser
5
- @_current_entry = nil
6
- end
1
+ require 'element_value_fetcher'
2
+
3
+ class ApacheCrunch
4
+ # Abstract for a procedure routine.
5
+ class ProcedureRoutine
6
+ def initialize(log_parser)
7
+ @_log_parser = log_parser
8
+ @_current_entry = nil
9
+ end
7
10
 
8
- # Allows blocks passed to a DSL routine to access parameters from the current log entry
9
- def method_missing(sym, *args)
10
- @_current_entry[sym]
11
- end
11
+ # Allows blocks passed to a DSL routine to access parameters from the current log entry
12
+ def method_missing(sym, *args)
13
+ @_current_entry.fetch(sym)
14
+ end
12
15
 
13
- # Executes the DSL routine using the given block
14
- #
15
- # Abstract method
16
- def execute(&blk)
17
- raise "Not implemented"
18
- end
16
+ # Executes the DSL routine using the given block
17
+ #
18
+ # Abstract method
19
+ def execute(&blk)
20
+ raise "Not implemented"
21
+ end
19
22
 
20
- # Anything that needs to happen after the routine completes but before it returns its
21
- # result can go in here.
22
- def finish
23
- @_log_parser.reset
23
+ # Anything that needs to happen after the routine completes but before it returns its
24
+ # result can go in here.
25
+ def finish
26
+ @_log_parser.reset_file!
27
+ end
24
28
  end
25
- end
26
29
 
27
30
 
28
- # DSL routine that returns the number of log entries where the block evaluates to true
29
- class CountWhere < ProcedureRoutine
30
- def execute(&blk)
31
- count = 0
32
- while @_current_entry = @_log_parser.next_entry
33
- if instance_eval(&blk)
34
- count += 1
31
+ # DSL routine that returns the number of log entries where the block evaluates to true
32
+ class CountWhere < ProcedureRoutine
33
+ def execute(&blk)
34
+ count = 0
35
+ while @_current_entry = @_log_parser.next_entry
36
+ if instance_eval(&blk)
37
+ count += 1
38
+ end
35
39
  end
40
+ count
36
41
  end
37
- count
38
42
  end
39
- end
40
43
 
41
44
 
42
- # DSL routine that executes the block for every log entry
43
- class Each < ProcedureRoutine
44
- def execute(&blk)
45
- while @_current_entry = @_log_parser.next_entry
46
- instance_eval(&blk)
45
+ # DSL routine that executes the block for every log entry
46
+ class Each < ProcedureRoutine
47
+ def execute(&blk)
48
+ while @_current_entry = @_log_parser.next_entry
49
+ instance_eval(&blk)
50
+ end
47
51
  end
48
52
  end
49
- end
50
53
 
51
54
 
52
- # DSL routine(s) that filter(s) for entries for which the given block evaluates to true
53
- #
54
- # This can be called as 'filter()', which means the filtering happens in a temporary file, or
55
- # as 'filter(path)', which means the filtering happens in the given file. It can also be called
56
- # as 'filter!()', which means the filtering happens in place, clobbering what's in apachecrunch's
57
- # target file.
58
- class Filter < ProcedureRoutine
59
- def execute(path=nil, in_place=false, &blk)
60
- @_in_place = in_place
61
- @_results_file = _make_results_file(path, in_place)
62
-
63
- while @_current_entry = @_log_parser.next_entry
64
- if instance_eval(&blk)
65
- @_results_file.write(@_current_entry[:text])
55
+ # DSL routine(s) that filter(s) for entries for which the given block evaluates to true
56
+ #
57
+ # This can be called as 'filter()', which means the filtering happens in a temporary file, or
58
+ # as 'filter(path)', which means the filtering happens in the given file. It can also be called
59
+ # as 'filter!()', which means the filtering happens in place, clobbering what's in apachecrunch's
60
+ # target file.
61
+ class Filter < ProcedureRoutine
62
+ def execute(path=nil, in_place=false, &blk)
63
+ @_in_place = in_place
64
+ @_results_file = _make_results_file(path, in_place)
65
+
66
+ while @_current_entry = @_log_parser.next_entry
67
+ if instance_eval(&blk)
68
+ @_results_file.write(@_current_entry.fetch(:text))
69
+ end
66
70
  end
67
71
  end
68
- end
69
72
 
70
- def finish
71
- @_log_parser.replace_target(@_results_file, @_in_place)
72
- end
73
+ def finish
74
+ @_results_file.close
75
+ @_results_file = open(@_results_file.path)
76
+ if @_in_place
77
+ @_log_parser.replace_file!(@_results_file)
78
+ else
79
+ @_log_parser.set_file!(@_results_file)
80
+ end
81
+ end
73
82
 
74
- # Returns a writable file object to which the results of the filter should be written.
75
- def _make_results_file(path, in_place)
76
- if path.nil?
77
- # If no path passed (this includes the case where the filter is being performed
78
- # in place), we want a temp file.
79
- return Tempfile.new("apachecrunch")
80
- else
81
- return open(path, "w")
83
+ # Returns a writable file object to which the results of the filter should be written.
84
+ def _make_results_file(path, in_place)
85
+ if path.nil?
86
+ # If no path passed (this includes the case where the filter is being performed
87
+ # in place), we want a temp file.
88
+ return Tempfile.new("apachecrunch")
89
+ else
90
+ return open(path, "w")
91
+ end
82
92
  end
83
93
  end
84
- end
85
94
 
86
- # DSL routine that returns the count of entries with each found value of the given block
87
- #
88
- # You might for instance run this with the block { status }, and you'd get back something like
89
- # {"200" => 941, "301" => 41, "404" => 2, "500" => 0}
90
- class CountBy < ProcedureRoutine
91
- def execute(&blk)
92
- counts = {}
93
- while @_current_entry = @_log_parser.next_entry
94
- val = instance_eval(&blk)
95
- if counts.key?(val)
96
- counts[val] += 1
97
- else
98
- counts[val] = 1
95
+ # DSL routine that returns the count of entries with each found value of the given block
96
+ #
97
+ # You might for instance run this with the block { status }, and you'd get back something like
98
+ # {"200" => 941, "301" => 41, "404" => 2, "500" => 0}
99
+ class CountBy < ProcedureRoutine
100
+ def execute(&blk)
101
+ counts = {}
102
+ while @_current_entry = @_log_parser.next_entry
103
+ val = instance_eval(&blk)
104
+ if counts.key?(val)
105
+ counts[val] += 1
106
+ else
107
+ counts[val] = 1
108
+ end
99
109
  end
110
+ return counts
100
111
  end
101
- return counts
102
112
  end
103
- end
104
113
 
105
114
 
106
- # DSL routine that finds the distribution of (numeric) values to which the given block evaluates
107
- #
108
- # For example,
109
- #
110
- # distribution 100 do
111
- # bytes_sent
112
- # end
113
- #
114
- # would return a hash with keys from 0 up by multiples of 100, the value of each being the number
115
- # of entries for which bytes_sent is between that key and the next key.
116
- class Distribution < ProcedureRoutine
117
- def execute(bucket_width, &blk)
118
- dist = {}
119
- while @_current_entry = @_log_parser.next_entry
120
- val = instance_eval(&blk)
121
- k = _key_for(val, bucket_width)
122
- if dist.key?(k)
123
- dist[k] += 1
124
- else
125
- dist[k] = 1
115
+ # DSL routine that finds the distribution of (numeric) values to which the given block evaluates
116
+ #
117
+ # For example,
118
+ #
119
+ # distribution 100 do
120
+ # bytes_sent
121
+ # end
122
+ #
123
+ # would return a hash with keys from 0 up by multiples of 100, the value of each being the number
124
+ # of entries for which bytes_sent is between that key and the next key.
125
+ class Distribution < ProcedureRoutine
126
+ def execute(bucket_width, &blk)
127
+ dist = {}
128
+ while @_current_entry = @_log_parser.next_entry
129
+ val = instance_eval(&blk)
130
+ k = _key_for(val, bucket_width)
131
+ if dist.key?(k)
132
+ dist[k] += 1
133
+ else
134
+ dist[k] = 1
135
+ end
126
136
  end
127
- end
128
137
 
129
- # Backfill keys for which we didn't find a value
130
- 0.step(dist.keys.max, bucket_width).each do |k|
131
- dist[k] = 0 unless dist.key?(k)
138
+ # Backfill keys for which we didn't find a value
139
+ 0.step(dist.keys.max, bucket_width).each do |k|
140
+ dist[k] = 0 unless dist.key?(k)
141
+ end
142
+
143
+ dist
132
144
  end
133
145
 
134
- dist
146
+ # Determines the key for the distribution hash given the value and step
147
+ def _key_for(val, bucket_width)
148
+ (val.to_i / bucket_width) * bucket_width
149
+ end
135
150
  end
136
151
 
137
- # Determines the key for the distribution hash given the value and step
138
- def _key_for(val, bucket_width)
139
- (val.to_i / bucket_width) * bucket_width
140
- end
141
- end
142
152
 
153
+ # Same as Distribution, but the buckets get expenentially wider
154
+ class LogDistribution < ProcedureRoutine
155
+ def execute(width_base, &blk)
156
+ dist = {}
157
+ while @_current_entry = @_log_parser.next_entry
158
+ val = instance_eval(&blk)
159
+ k = _key_for(val, width_base)
160
+ if dist.key?(k)
161
+ dist[k] += 1
162
+ else
163
+ dist[k] = 1
164
+ end
165
+ end
143
166
 
144
- # Same as Distribution, but the buckets get expenentially wider
145
- class LogDistribution < ProcedureRoutine
146
- def execute(width_base, &blk)
147
- dist = {}
148
- while @_current_entry = @_log_parser.next_entry
149
- val = instance_eval(&blk)
150
- k = _key_for(val, width_base)
151
- if dist.key?(k)
152
- dist[k] += 1
153
- else
154
- dist[k] = 1
167
+ # Backfill keys for which we didn't find a value
168
+ k = dist.keys.min
169
+ max_key = dist.keys.max
170
+ while k *= width_base and k < max_key
171
+ dist[k] = 0 unless dist.key?(k)
155
172
  end
156
- end
157
173
 
158
- # Backfill keys for which we didn't find a value
159
- k = dist.keys.min
160
- max_key = dist.keys.max
161
- while k *= width_base and k < max_key
162
- dist[k] = 0 unless dist.key?(k)
174
+ dist
163
175
  end
164
176
 
165
- dist
166
- end
167
-
168
- # Determines the key for the distribution hash given the value and logarithmic base for
169
- # the bucket width
170
- def _key_for(val, width_base)
171
- exp = (Math.log(val) / Math.log(width_base)).to_i
172
- width_base ** exp
177
+ # Determines the key for the distribution hash given the value and logarithmic base for
178
+ # the bucket width
179
+ def _key_for(val, width_base)
180
+ exp = (Math.log(val) / Math.log(width_base)).to_i
181
+ width_base ** exp
182
+ end
173
183
  end
174
- end
175
184
 
176
185
 
177
- # DSL routine that determines a confidence interval for the values to which the block evaluates
178
- #
179
- # For example,
180
- #
181
- # confidence_interval 95 do
182
- # time_to_serve
183
- # end
184
- #
185
- # would return two numbers, the lower and upper bound of a 95% confidence interval for the values
186
- # of time_to_serve.
187
- class ConfidenceInterval < ProcedureRoutine
188
- def execute(confidence, &blk)
189
- # Build a list of all the values found
190
- values = []
191
- while @_current_entry = @_log_parser.next_entry
192
- values << instance_eval(&blk)
193
- end
194
- values.sort!
186
+ # DSL routine that determines a confidence interval for the values to which the block evaluates
187
+ #
188
+ # For example,
189
+ #
190
+ # confidence_interval 95 do
191
+ # time_to_serve
192
+ # end
193
+ #
194
+ # would return two numbers, the lower and upper bound of a 95% confidence interval for the values
195
+ # of time_to_serve.
196
+ class ConfidenceInterval < ProcedureRoutine
197
+ def execute(confidence, &blk)
198
+ # Build a list of all the values found
199
+ values = []
200
+ while @_current_entry = @_log_parser.next_entry
201
+ values << instance_eval(&blk)
202
+ end
203
+ values.sort!
195
204
 
196
- # Determine how many values are outside the bounds of the CI
197
- count_outside = (values.length * (1.0 - confidence/100.0)).to_i
205
+ # Determine how many values are outside the bounds of the CI
206
+ count_outside = (values.length * (1.0 - confidence/100.0)).to_i
198
207
 
199
- # Find the bounds of the confidence interval
200
- return values[count_outside / 2], values[-count_outside / 2]
208
+ # Find the bounds of the confidence interval
209
+ return values[count_outside / 2], values[-count_outside / 2]
210
+ end
201
211
  end
202
- end
203
-
204
212
 
205
- # DSL routine that finds the most common n values for the given block.
206
- #
207
- # Returns a list of lists, each of which is [value, count]. This list is sorted by count.
208
- class MostCommon < ProcedureRoutine
209
- def execute(n, &blk)
210
- counts = CountBy.new(@_log_parser).execute(&blk)
211
213
 
212
- # Sort the block values descending
213
- sorted_vals = counts.keys.sort do |val_a,val_b|
214
- - (counts[val_a] <=> counts[val_b])
215
- end
214
+ # DSL routine that finds the most common n values for the given block.
215
+ #
216
+ # Returns a list of lists, each of which is [value, count]. This list is sorted by count.
217
+ class MostCommon < ProcedureRoutine
218
+ def execute(n, &blk)
219
+ counts = CountBy.new(@_log_parser).execute(&blk)
220
+
221
+ # Sort the block values descending
222
+ sorted_vals = counts.keys.sort do |val_a,val_b|
223
+ - (counts[val_a] <=> counts[val_b])
224
+ end
216
225
 
217
- sorted_vals[0..n].map do |val|
218
- [val, counts[val]]
226
+ sorted_vals[0..n].map do |val|
227
+ [val, counts[val]]
228
+ end
219
229
  end
220
230
  end
221
- end
222
231
 
223
232
 
224
- # The environment in which a procedure file is evaluated.
225
- #
226
- # A procedure file is some ruby code that uses our DSL.
227
- class ProcedureEnvironment
228
- def initialize(log_parser)
229
- @_log_parser = log_parser
230
- end
233
+ # The environment in which a procedure file is evaluated.
234
+ #
235
+ # A procedure file is some ruby code that uses our DSL.
236
+ class ProcedureEnvironment
237
+ def initialize(log_parser)
238
+ @_log_parser = log_parser
239
+ end
231
240
 
232
- # Evaluates the given string as a procedure in our DSL
233
- def eval_procedure(proc_string)
234
- eval proc_string
235
- end
241
+ # Evaluates the given string as a procedure in our DSL
242
+ def eval_procedure(proc_string)
243
+ eval proc_string
244
+ end
236
245
 
237
- # DSL routine 'count_where'
238
- def count_where(&blk)
239
- routine = CountWhere.new(@_log_parser)
240
- rv = routine.execute(&blk)
241
- routine.finish
242
- rv
243
- end
246
+ # DSL routine 'count_where'
247
+ def count_where(&blk)
248
+ routine = CountWhere.new(@_log_parser)
249
+ rv = routine.execute(&blk)
250
+ routine.finish
251
+ rv
252
+ end
244
253
 
245
- # DSL routine 'filter!'
246
- def filter!(&blk)
247
- routine = Filter.new(@_log_parser)
248
- routine.execute(nil, true, &blk)
249
- routine.finish
250
- nil
251
- end
254
+ # DSL routine 'filter!'
255
+ def filter!(&blk)
256
+ routine = Filter.new(@_log_parser)
257
+ routine.execute(nil, true, &blk)
258
+ routine.finish
259
+ nil
260
+ end
252
261
 
253
- # DSL routine 'filter'
254
- def filter(target_path=nil, &blk)
255
- routine = Filter.new(@_log_parser)
256
- routine.execute(target_path, &blk)
257
- routine.finish
258
- nil
259
- end
262
+ # DSL routine 'filter'
263
+ def filter(target_path=nil, &blk)
264
+ routine = Filter.new(@_log_parser)
265
+ routine.execute(target_path, &blk)
266
+ routine.finish
267
+ nil
268
+ end
260
269
 
261
- # DSL routine 'each'
262
- def each(&blk)
263
- routine = Each.new(@_log_parser)
264
- routine.execute(&blk)
265
- routine.finish
266
- nil
267
- end
270
+ # DSL routine 'each'
271
+ def each(&blk)
272
+ routine = Each.new(@_log_parser)
273
+ routine.execute(&blk)
274
+ routine.finish
275
+ nil
276
+ end
268
277
 
269
- # DSL routine 'count_by'
270
- def count_by(&blk)
271
- routine = CountBy.new(@_log_parser)
272
- rv = routine.execute(&blk)
273
- routine.finish
274
- rv
275
- end
278
+ # DSL routine 'count_by'
279
+ def count_by(&blk)
280
+ routine = CountBy.new(@_log_parser)
281
+ rv = routine.execute(&blk)
282
+ routine.finish
283
+ rv
284
+ end
276
285
 
277
- # DSL routine 'distribution'
278
- def distribution(bucket_width, &blk)
279
- routine = Distribution.new(@_log_parser)
280
- rv = routine.execute(bucket_width, &blk)
281
- routine.finish
282
- rv
283
- end
286
+ # DSL routine 'distribution'
287
+ def distribution(bucket_width, &blk)
288
+ routine = Distribution.new(@_log_parser)
289
+ rv = routine.execute(bucket_width, &blk)
290
+ routine.finish
291
+ rv
292
+ end
284
293
 
285
- # DSL routine 'log_distribution'
286
- def log_distribution(width_base, &blk)
287
- routine = LogDistribution.new(@_log_parser)
288
- rv = routine.execute(width_base, &blk)
289
- routine.finish
290
- rv
291
- end
294
+ # DSL routine 'log_distribution'
295
+ def log_distribution(width_base, &blk)
296
+ routine = LogDistribution.new(@_log_parser)
297
+ rv = routine.execute(width_base, &blk)
298
+ routine.finish
299
+ rv
300
+ end
292
301
 
293
- # DSL routine 'confidence_interval'
294
- def confidence_interval(confidence, &blk)
295
- routine = ConfidenceInterval.new(@_log_parser)
296
- rv = routine.execute(confidence, &blk)
297
- routine.finish
298
- rv
299
- end
302
+ # DSL routine 'confidence_interval'
303
+ def confidence_interval(confidence, &blk)
304
+ routine = ConfidenceInterval.new(@_log_parser)
305
+ rv = routine.execute(confidence, &blk)
306
+ routine.finish
307
+ rv
308
+ end
300
309
 
301
- # DSL routine 'most_common'
302
- def most_common(n, &blk)
303
- routine = MostCommon.new(@_log_parser)
304
- rv = routine.execute(n, &blk)
305
- routine.finish
306
- rv
310
+ # DSL routine 'most_common'
311
+ def most_common(n, &blk)
312
+ routine = MostCommon.new(@_log_parser)
313
+ rv = routine.execute(n, &blk)
314
+ routine.finish
315
+ rv
316
+ end
307
317
  end
308
318
  end