apachecrunch 0.4 → 0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/bin/apachecrunch +1 -1
- data/lib/apachecrunch.rb +5 -15
- data/lib/cast.rb +21 -0
- data/lib/derivation.rb +113 -0
- data/lib/element.rb +16 -0
- data/lib/element_value_fetcher.rb +72 -0
- data/lib/entry.rb +64 -54
- data/lib/format.rb +21 -63
- data/lib/format_token.rb +114 -0
- data/lib/format_token_definition.rb +183 -0
- data/lib/log_parser.rb +39 -31
- data/lib/procedure_dsl.rb +254 -244
- data/lib/progress.rb +1 -1
- data/test/mock.rb +37 -0
- data/test/runner.rb +13 -1
- data/test/stub.rb +66 -36
- data/test/test_derived_value_fetcher.rb +36 -0
- data/test/test_element.rb +18 -0
- data/test/test_element_value_fetcher.rb +45 -0
- data/test/test_entry_parser.rb +39 -0
- data/test/test_format.rb +13 -51
- data/test/test_format_parser.rb +22 -13
- data/test/test_log_parser.rb +88 -0
- data/test/test_raw_value_fetcher.rb +36 -0
- data/test/test_regex_token.rb +17 -0
- data/test/test_req_firstline_derivation_rule.rb +41 -0
- data/test/test_reqheader_token.rb +26 -0
- data/test/test_string_token.rb +27 -0
- data/test/test_time_derivation_rule.rb +29 -0
- metadata +23 -18
- data/lib/log_element.rb +0 -351
- data/test/test_entry.rb +0 -28
data/lib/format_token.rb
ADDED
@@ -0,0 +1,114 @@
|
|
1
|
+
require 'format_token_definition'
|
2
|
+
require 'derivation'
|
3
|
+
|
4
|
+
class ApacheCrunch
|
5
|
+
# Abstract for a token in a log format
|
6
|
+
class FormatToken
|
7
|
+
# Performs whatever initial population is necessary for the token.
|
8
|
+
def populate!; raise NotImplementedError; end
|
9
|
+
|
10
|
+
def name; raise NotImplementedError; end
|
11
|
+
def regex; raise NotImplementedError; end
|
12
|
+
def captured?; raise NotImplementedError; end
|
13
|
+
def derivation_rule; raise NotImplementedError; end
|
14
|
+
end
|
15
|
+
|
16
|
+
|
17
|
+
# A predefined token like %q or %r from the Apache log.
|
18
|
+
class PredefinedToken < FormatToken
|
19
|
+
def populate!(token_definition)
|
20
|
+
@token_definition = token_definition
|
21
|
+
end
|
22
|
+
|
23
|
+
def name; @token_definition.name; end
|
24
|
+
def regex; @token_definition.regex; end
|
25
|
+
def captured?; @token_definition.captured; end
|
26
|
+
def derivation_rule; @token_definition.derivation_rule; end
|
27
|
+
end
|
28
|
+
|
29
|
+
|
30
|
+
# A bare string in a log format.
|
31
|
+
class StringToken < FormatToken
|
32
|
+
# Initializes the instance given the string it represents
|
33
|
+
def populate!(string_value)
|
34
|
+
@_string_value = string_value
|
35
|
+
end
|
36
|
+
|
37
|
+
def name; nil; end
|
38
|
+
|
39
|
+
def regex
|
40
|
+
# Make sure there aren't any regex special characters in the string that will confuse
|
41
|
+
# the parsing later.
|
42
|
+
Regexp.escape(@_string_value)
|
43
|
+
end
|
44
|
+
|
45
|
+
def captured?; false; end
|
46
|
+
def derivation_rule; NullDerivationRule.new; end
|
47
|
+
end
|
48
|
+
|
49
|
+
|
50
|
+
# A token based on a request header.
|
51
|
+
class ReqheaderToken < FormatToken
|
52
|
+
def populate!(header_name)
|
53
|
+
@_name = _header_name_to_token_name(header_name)
|
54
|
+
end
|
55
|
+
|
56
|
+
def name; @_name; end
|
57
|
+
def regex; '[^"]*'; end
|
58
|
+
def captured?; true; end
|
59
|
+
def derivation_rule; NullDerivationRule.new; end
|
60
|
+
|
61
|
+
# Lowercases header name and turns hyphens into underscores
|
62
|
+
def _header_name_to_token_name(header_name)
|
63
|
+
("reqheader_" + header_name.downcase().gsub("-", "_")).to_sym
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
|
68
|
+
# A token based on an arbitrary regular expression.
|
69
|
+
class RegexToken < FormatToken
|
70
|
+
def populate!(regex_name, regex_text)
|
71
|
+
@_name = "regex_#{regex_name}".to_sym
|
72
|
+
@_regex = regex_text
|
73
|
+
end
|
74
|
+
|
75
|
+
def name; @_name; end
|
76
|
+
def regex; @_regex; end
|
77
|
+
def captured?; true; end
|
78
|
+
def derivation_rule; NullDerivationRule.new; end
|
79
|
+
end
|
80
|
+
|
81
|
+
|
82
|
+
# Generates FormatToken instances.
|
83
|
+
#
|
84
|
+
# This class does the work of figuring out which FormatToken subclass to make.
|
85
|
+
class FormatTokenFactory
|
86
|
+
# Takes an Apache log format abbreviation and returns a corresponding FormatToken
|
87
|
+
def self.from_abbrev(abbrev)
|
88
|
+
token_def = TokenDictionary.fetch(abbrev)
|
89
|
+
if token_def
|
90
|
+
# We found it in the dictionary, so just return a Token based on it
|
91
|
+
tok = PredefinedToken.new
|
92
|
+
tok.populate!(token_def)
|
93
|
+
elsif abbrev !~ /^%/
|
94
|
+
tok = StringToken.new
|
95
|
+
tok.populate!(abbrev)
|
96
|
+
elsif abbrev == "%%"
|
97
|
+
tok = StringToken.new
|
98
|
+
tok.populate!("%")
|
99
|
+
elsif abbrev =~ /^%\{([A-Za-z0-9-]+)\}i/
|
100
|
+
# HTTP request header
|
101
|
+
tok = ReqheaderToken.new
|
102
|
+
tok.populate!($1)
|
103
|
+
elsif abbrev =~ /^%\{(.*?):([^}]+)\}r/
|
104
|
+
# Arbitrary regex
|
105
|
+
tok = RegexToken.new
|
106
|
+
tok.populate!($1, $2)
|
107
|
+
else
|
108
|
+
raise "Unable to parse format definition starting at '#{abbrev}'"
|
109
|
+
end
|
110
|
+
|
111
|
+
tok
|
112
|
+
end
|
113
|
+
end
|
114
|
+
end
|
@@ -0,0 +1,183 @@
|
|
1
|
+
require 'cast'
|
2
|
+
require 'derivation'
|
3
|
+
|
4
|
+
class ApacheCrunch
|
5
|
+
# Defines the properties of a known Apache log format token (like %q or %h)
|
6
|
+
class FormatTokenDefinition
|
7
|
+
class << self; attr_accessor :name, :abbrev, :regex, :caster, :derivation_rule, :captured; end
|
8
|
+
end
|
9
|
+
|
10
|
+
|
11
|
+
class RemoteHostTokenDefinition < FormatTokenDefinition
|
12
|
+
@name = :remote_host
|
13
|
+
@abbrev = "%h"
|
14
|
+
@regex = %q![A-Za-z0-9.-]+!
|
15
|
+
@caster = nil
|
16
|
+
@derivation_rule = NullDerivationRule.new
|
17
|
+
@captured = true
|
18
|
+
end
|
19
|
+
|
20
|
+
|
21
|
+
class LogNameTokenDefinition < FormatTokenDefinition
|
22
|
+
@name = :log_name
|
23
|
+
@abbrev = "%l"
|
24
|
+
@regex = %q!\S+!
|
25
|
+
@caster = nil
|
26
|
+
@derivation_rule = NullDerivationRule.new
|
27
|
+
@captured = true
|
28
|
+
end
|
29
|
+
|
30
|
+
|
31
|
+
class RemoteUserTokenDefinition < FormatTokenDefinition
|
32
|
+
@name = :remote_user
|
33
|
+
@abbrev = "%u"
|
34
|
+
@regex = %q![^:]+!
|
35
|
+
@caster = nil
|
36
|
+
@derivation_rule = NullDerivationRule.new
|
37
|
+
@captured = true
|
38
|
+
end
|
39
|
+
|
40
|
+
|
41
|
+
class TimeTokenDefinition < FormatTokenDefinition
|
42
|
+
@name = :time
|
43
|
+
@abbrev = "%t"
|
44
|
+
@regex = %q!\[\d\d/[A-Za-z]{3}/\d\d\d\d:\d\d:\d\d:\d\d [-+]\d\d\d\d\]!
|
45
|
+
@caster = nil
|
46
|
+
@derivation_rule = TimeDerivationRule.new
|
47
|
+
@captured = true
|
48
|
+
end
|
49
|
+
|
50
|
+
|
51
|
+
class ReqFirstlineTokenDefinition < FormatTokenDefinition
|
52
|
+
@name = :req_firstline
|
53
|
+
@abbrev = "%r"
|
54
|
+
@regex = %q![^"]+!
|
55
|
+
@caster = nil
|
56
|
+
@derivation_rule = ReqFirstlineDerivationRule.new
|
57
|
+
@captured = true
|
58
|
+
end
|
59
|
+
|
60
|
+
|
61
|
+
class StatusTokenDefinition < FormatTokenDefinition
|
62
|
+
@name = :status
|
63
|
+
@abbrev = "%s"
|
64
|
+
@regex = %q!\d+|-!
|
65
|
+
@caster = nil
|
66
|
+
@derivation_rule = NullDerivationRule.new
|
67
|
+
@captured = true
|
68
|
+
end
|
69
|
+
|
70
|
+
|
71
|
+
class BytesSentTokenDefinition < FormatTokenDefinition
|
72
|
+
@name = :bytes_sent
|
73
|
+
@abbrev = "%b"
|
74
|
+
@regex = %q!\d+!
|
75
|
+
@caster = IntegerCast.new
|
76
|
+
@derivation_rule = NullDerivationRule.new
|
77
|
+
@captured = true
|
78
|
+
end
|
79
|
+
|
80
|
+
|
81
|
+
class BytesSentTokenDefinition < FormatTokenDefinition
|
82
|
+
@name = :bytes_sent
|
83
|
+
@abbrev = "%b"
|
84
|
+
@regex = %q![\d-]+!
|
85
|
+
@caster = CLFIntegerCast.new
|
86
|
+
@derivation_rule = NullDerivationRule.new
|
87
|
+
@captured = true
|
88
|
+
end
|
89
|
+
|
90
|
+
|
91
|
+
class BytesSentWithHeadersTokenDefinition < FormatTokenDefinition
|
92
|
+
@name = :bytes_sent_with_headers
|
93
|
+
@abbrev = "%O"
|
94
|
+
@regex = %q!\d+!
|
95
|
+
@caster = IntegerCast.new
|
96
|
+
@derivation_rule = NullDerivationRule.new
|
97
|
+
@captured = true
|
98
|
+
end
|
99
|
+
|
100
|
+
|
101
|
+
class ServeTimeMicroTokenDefinition < FormatTokenDefinition
|
102
|
+
@name = :serve_time_micro
|
103
|
+
@abbrev = "%D"
|
104
|
+
@regex = %q!\d+!
|
105
|
+
@caster = IntegerCast.new
|
106
|
+
@derivation_rule = NullDerivationRule.new
|
107
|
+
@captured = true
|
108
|
+
end
|
109
|
+
|
110
|
+
|
111
|
+
class UrlPathTokenDefinition < FormatTokenDefinition
|
112
|
+
@name = :url_path
|
113
|
+
@abbrev = "%U"
|
114
|
+
@regex = %q!/[^?]*!
|
115
|
+
@caster = nil
|
116
|
+
@derivation_rule = NullDerivationRule.new
|
117
|
+
@captured = true
|
118
|
+
end
|
119
|
+
|
120
|
+
|
121
|
+
class QueryStringTokenDefinition < FormatTokenDefinition
|
122
|
+
@name = :query_string
|
123
|
+
@abbrev = "%q"
|
124
|
+
@regex = %q!\??\S*!
|
125
|
+
@caster = nil
|
126
|
+
@derivation_rule = NullDerivationRule.new
|
127
|
+
@captured = true
|
128
|
+
end
|
129
|
+
|
130
|
+
|
131
|
+
class ReqMethodTokenDefinition < FormatTokenDefinition
|
132
|
+
@name = :req_method
|
133
|
+
@abbrev = "%m"
|
134
|
+
@regex = %q![A-Z]+!
|
135
|
+
@caster = nil
|
136
|
+
@derivation_rule = NullDerivationRule.new
|
137
|
+
@captured = true
|
138
|
+
end
|
139
|
+
|
140
|
+
|
141
|
+
class ProtocolTokenDefinition < FormatTokenDefinition
|
142
|
+
@name = :protocol
|
143
|
+
@abbrev = "%H"
|
144
|
+
@regex = %q!\S+!
|
145
|
+
@caster = nil
|
146
|
+
@derivation_rule = NullDerivationRule.new
|
147
|
+
@captured = true
|
148
|
+
end
|
149
|
+
|
150
|
+
|
151
|
+
# Finds log format elements given information about them.
|
152
|
+
class TokenDictionary
|
153
|
+
@@_defs = [
|
154
|
+
RemoteHostTokenDefinition,
|
155
|
+
LogNameTokenDefinition,
|
156
|
+
RemoteUserTokenDefinition,
|
157
|
+
TimeTokenDefinition,
|
158
|
+
ReqFirstlineTokenDefinition,
|
159
|
+
StatusTokenDefinition,
|
160
|
+
BytesSentTokenDefinition,
|
161
|
+
BytesSentTokenDefinition,
|
162
|
+
BytesSentWithHeadersTokenDefinition,
|
163
|
+
ServeTimeMicroTokenDefinition,
|
164
|
+
UrlPathTokenDefinition,
|
165
|
+
QueryStringTokenDefinition,
|
166
|
+
ReqMethodTokenDefinition,
|
167
|
+
ProtocolTokenDefinition
|
168
|
+
]
|
169
|
+
|
170
|
+
# Returns the FormatToken subclass with the given abbreviation.
|
171
|
+
#
|
172
|
+
# If none exists, returns nil.
|
173
|
+
def self.fetch(abbrev)
|
174
|
+
@@_defs.each do |token_def|
|
175
|
+
if token_def.abbrev == abbrev
|
176
|
+
return token_def
|
177
|
+
end
|
178
|
+
end
|
179
|
+
|
180
|
+
nil
|
181
|
+
end
|
182
|
+
end
|
183
|
+
end
|
data/lib/log_parser.rb
CHANGED
@@ -2,54 +2,58 @@ class ApacheCrunch
|
|
2
2
|
# Parses a log file given a path and a Format instance
|
3
3
|
class LogParser
|
4
4
|
# Initializes the parser with the path to a log file and a EntryParser.
|
5
|
-
def initialize(
|
6
|
-
@
|
7
|
-
@
|
5
|
+
def initialize(entry_parser)
|
6
|
+
@_entry_parser = entry_parser
|
7
|
+
@_log_file = nil
|
8
8
|
|
9
|
-
@
|
10
|
-
@_file = nil
|
9
|
+
@_File = File
|
11
10
|
end
|
12
11
|
|
13
|
-
#
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
def next_entry
|
18
|
-
@_file = @_file_cls.open(@path) if @_file.nil?
|
12
|
+
# Handles dependency injection
|
13
|
+
def dep_inject!(file_cls)
|
14
|
+
@_File = file_cls
|
15
|
+
end
|
19
16
|
|
20
|
-
|
17
|
+
# Returns the next parsed line in the log file as an Entry, or nil if we've reached EOF.
|
18
|
+
def next_entry
|
19
|
+
while line_text = @_log_file.gets
|
20
|
+
# This is if we've reached EOF:
|
21
21
|
return nil if line_text.nil?
|
22
|
-
logline = @entry_parser.from_text(line_text)
|
23
22
|
|
23
|
+
entry = @_entry_parser.parse(@_format, line_text)
|
24
24
|
# The EntryParser returns nil and writes a warning if the line text doesn't
|
25
25
|
# match our expected format.
|
26
|
-
next if
|
26
|
+
next if entry.nil?
|
27
27
|
|
28
|
-
return
|
28
|
+
return entry
|
29
29
|
end
|
30
30
|
end
|
31
31
|
|
32
32
|
# Resets the LogParser's filehandle so we can start over.
|
33
|
-
def
|
34
|
-
@
|
33
|
+
def reset_file!
|
34
|
+
@_log_file.close
|
35
|
+
@_log_file = @_File.open(@_log_file.path)
|
35
36
|
end
|
36
37
|
|
37
|
-
# Makes the LogParser
|
38
|
+
# Makes the LogParser start parsing a new log file
|
38
39
|
#
|
39
40
|
# `new_target` is a writable file object that the parser should start parsing, and if
|
40
|
-
# in_place is true, we actually replace the contents of the current target with those
|
41
|
+
# `in_place` is true, we actually replace the contents of the current target with those
|
41
42
|
# of the new target.
|
42
|
-
def
|
43
|
-
|
43
|
+
def set_file!(new_file)
|
44
|
+
@_log_file.close unless @_log_file.nil?
|
45
|
+
@_log_file = new_file
|
46
|
+
end
|
44
47
|
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
48
|
+
# Replaces the LogParser current file with another. Like, for real, on the filesystem.
|
49
|
+
def replace_file!(new_file)
|
50
|
+
@_log_file.close
|
51
|
+
@_File.rename(new_file.path, @_log_file.path)
|
52
|
+
@_log_file = @_File.open(@_log_file.path)
|
53
|
+
end
|
51
54
|
|
52
|
-
|
55
|
+
def set_format!(format)
|
56
|
+
@_format = format
|
53
57
|
end
|
54
58
|
end
|
55
59
|
|
@@ -64,11 +68,15 @@ class ApacheCrunch
|
|
64
68
|
# First we generate a Format instance based on the format definition we were given
|
65
69
|
log_format = FormatFactory.from_format_def(format_def)
|
66
70
|
|
67
|
-
# Now we generate a
|
68
|
-
|
71
|
+
# Now we generate a parser for the individual entries
|
72
|
+
entry_parser = EntryParser.new
|
73
|
+
entry_parser.add_progress_meter!(progress_meter)
|
69
74
|
|
70
75
|
# And now we can instantiate and return a LogParser
|
71
|
-
|
76
|
+
log_parser = LogParser.new(entry_parser)
|
77
|
+
log_parser.set_file!(open(path, "r"))
|
78
|
+
log_parser.set_format!(log_format)
|
79
|
+
log_parser
|
72
80
|
end
|
73
81
|
end
|
74
82
|
end
|
data/lib/procedure_dsl.rb
CHANGED
@@ -1,308 +1,318 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
1
|
+
require 'element_value_fetcher'
|
2
|
+
|
3
|
+
class ApacheCrunch
|
4
|
+
# Abstract for a procedure routine.
|
5
|
+
class ProcedureRoutine
|
6
|
+
def initialize(log_parser)
|
7
|
+
@_log_parser = log_parser
|
8
|
+
@_current_entry = nil
|
9
|
+
end
|
7
10
|
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
11
|
+
# Allows blocks passed to a DSL routine to access parameters from the current log entry
|
12
|
+
def method_missing(sym, *args)
|
13
|
+
@_current_entry.fetch(sym)
|
14
|
+
end
|
12
15
|
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
16
|
+
# Executes the DSL routine using the given block
|
17
|
+
#
|
18
|
+
# Abstract method
|
19
|
+
def execute(&blk)
|
20
|
+
raise "Not implemented"
|
21
|
+
end
|
19
22
|
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
23
|
+
# Anything that needs to happen after the routine completes but before it returns its
|
24
|
+
# result can go in here.
|
25
|
+
def finish
|
26
|
+
@_log_parser.reset_file!
|
27
|
+
end
|
24
28
|
end
|
25
|
-
end
|
26
29
|
|
27
30
|
|
28
|
-
# DSL routine that returns the number of log entries where the block evaluates to true
|
29
|
-
class CountWhere < ProcedureRoutine
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
31
|
+
# DSL routine that returns the number of log entries where the block evaluates to true
|
32
|
+
class CountWhere < ProcedureRoutine
|
33
|
+
def execute(&blk)
|
34
|
+
count = 0
|
35
|
+
while @_current_entry = @_log_parser.next_entry
|
36
|
+
if instance_eval(&blk)
|
37
|
+
count += 1
|
38
|
+
end
|
35
39
|
end
|
40
|
+
count
|
36
41
|
end
|
37
|
-
count
|
38
42
|
end
|
39
|
-
end
|
40
43
|
|
41
44
|
|
42
|
-
# DSL routine that executes the block for every log entry
|
43
|
-
class Each < ProcedureRoutine
|
44
|
-
|
45
|
-
|
46
|
-
|
45
|
+
# DSL routine that executes the block for every log entry
|
46
|
+
class Each < ProcedureRoutine
|
47
|
+
def execute(&blk)
|
48
|
+
while @_current_entry = @_log_parser.next_entry
|
49
|
+
instance_eval(&blk)
|
50
|
+
end
|
47
51
|
end
|
48
52
|
end
|
49
|
-
end
|
50
53
|
|
51
54
|
|
52
|
-
# DSL routine(s) that filter(s) for entries for which the given block evaluates to true
|
53
|
-
#
|
54
|
-
# This can be called as 'filter()', which means the filtering happens in a temporary file, or
|
55
|
-
# as 'filter(path)', which means the filtering happens in the given file. It can also be called
|
56
|
-
# as 'filter!()', which means the filtering happens in place, clobbering what's in apachecrunch's
|
57
|
-
# target file.
|
58
|
-
class Filter < ProcedureRoutine
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
55
|
+
# DSL routine(s) that filter(s) for entries for which the given block evaluates to true
|
56
|
+
#
|
57
|
+
# This can be called as 'filter()', which means the filtering happens in a temporary file, or
|
58
|
+
# as 'filter(path)', which means the filtering happens in the given file. It can also be called
|
59
|
+
# as 'filter!()', which means the filtering happens in place, clobbering what's in apachecrunch's
|
60
|
+
# target file.
|
61
|
+
class Filter < ProcedureRoutine
|
62
|
+
def execute(path=nil, in_place=false, &blk)
|
63
|
+
@_in_place = in_place
|
64
|
+
@_results_file = _make_results_file(path, in_place)
|
65
|
+
|
66
|
+
while @_current_entry = @_log_parser.next_entry
|
67
|
+
if instance_eval(&blk)
|
68
|
+
@_results_file.write(@_current_entry.fetch(:text))
|
69
|
+
end
|
66
70
|
end
|
67
71
|
end
|
68
|
-
end
|
69
72
|
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
+
def finish
|
74
|
+
@_results_file.close
|
75
|
+
@_results_file = open(@_results_file.path)
|
76
|
+
if @_in_place
|
77
|
+
@_log_parser.replace_file!(@_results_file)
|
78
|
+
else
|
79
|
+
@_log_parser.set_file!(@_results_file)
|
80
|
+
end
|
81
|
+
end
|
73
82
|
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
83
|
+
# Returns a writable file object to which the results of the filter should be written.
|
84
|
+
def _make_results_file(path, in_place)
|
85
|
+
if path.nil?
|
86
|
+
# If no path passed (this includes the case where the filter is being performed
|
87
|
+
# in place), we want a temp file.
|
88
|
+
return Tempfile.new("apachecrunch")
|
89
|
+
else
|
90
|
+
return open(path, "w")
|
91
|
+
end
|
82
92
|
end
|
83
93
|
end
|
84
|
-
end
|
85
94
|
|
86
|
-
# DSL routine that returns the count of entries with each found value of the given block
|
87
|
-
#
|
88
|
-
# You might for instance run this with the block { status }, and you'd get back something like
|
89
|
-
# {"200" => 941, "301" => 41, "404" => 2, "500" => 0}
|
90
|
-
class CountBy < ProcedureRoutine
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
95
|
+
# DSL routine that returns the count of entries with each found value of the given block
|
96
|
+
#
|
97
|
+
# You might for instance run this with the block { status }, and you'd get back something like
|
98
|
+
# {"200" => 941, "301" => 41, "404" => 2, "500" => 0}
|
99
|
+
class CountBy < ProcedureRoutine
|
100
|
+
def execute(&blk)
|
101
|
+
counts = {}
|
102
|
+
while @_current_entry = @_log_parser.next_entry
|
103
|
+
val = instance_eval(&blk)
|
104
|
+
if counts.key?(val)
|
105
|
+
counts[val] += 1
|
106
|
+
else
|
107
|
+
counts[val] = 1
|
108
|
+
end
|
99
109
|
end
|
110
|
+
return counts
|
100
111
|
end
|
101
|
-
return counts
|
102
112
|
end
|
103
|
-
end
|
104
113
|
|
105
114
|
|
106
|
-
# DSL routine that finds the distribution of (numeric) values to which the given block evaluates
|
107
|
-
#
|
108
|
-
# For example,
|
109
|
-
#
|
110
|
-
# distribution 100 do
|
111
|
-
# bytes_sent
|
112
|
-
# end
|
113
|
-
#
|
114
|
-
# would return a hash with keys from 0 up by multiples of 100, the value of each being the number
|
115
|
-
# of entries for which bytes_sent is between that key and the next key.
|
116
|
-
class Distribution < ProcedureRoutine
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
115
|
+
# DSL routine that finds the distribution of (numeric) values to which the given block evaluates
|
116
|
+
#
|
117
|
+
# For example,
|
118
|
+
#
|
119
|
+
# distribution 100 do
|
120
|
+
# bytes_sent
|
121
|
+
# end
|
122
|
+
#
|
123
|
+
# would return a hash with keys from 0 up by multiples of 100, the value of each being the number
|
124
|
+
# of entries for which bytes_sent is between that key and the next key.
|
125
|
+
class Distribution < ProcedureRoutine
|
126
|
+
def execute(bucket_width, &blk)
|
127
|
+
dist = {}
|
128
|
+
while @_current_entry = @_log_parser.next_entry
|
129
|
+
val = instance_eval(&blk)
|
130
|
+
k = _key_for(val, bucket_width)
|
131
|
+
if dist.key?(k)
|
132
|
+
dist[k] += 1
|
133
|
+
else
|
134
|
+
dist[k] = 1
|
135
|
+
end
|
126
136
|
end
|
127
|
-
end
|
128
137
|
|
129
|
-
|
130
|
-
|
131
|
-
|
138
|
+
# Backfill keys for which we didn't find a value
|
139
|
+
0.step(dist.keys.max, bucket_width).each do |k|
|
140
|
+
dist[k] = 0 unless dist.key?(k)
|
141
|
+
end
|
142
|
+
|
143
|
+
dist
|
132
144
|
end
|
133
145
|
|
134
|
-
|
146
|
+
# Determines the key for the distribution hash given the value and step
|
147
|
+
def _key_for(val, bucket_width)
|
148
|
+
(val.to_i / bucket_width) * bucket_width
|
149
|
+
end
|
135
150
|
end
|
136
151
|
|
137
|
-
# Determines the key for the distribution hash given the value and step
|
138
|
-
def _key_for(val, bucket_width)
|
139
|
-
(val.to_i / bucket_width) * bucket_width
|
140
|
-
end
|
141
|
-
end
|
142
152
|
|
153
|
+
# Same as Distribution, but the buckets get expenentially wider
|
154
|
+
class LogDistribution < ProcedureRoutine
|
155
|
+
def execute(width_base, &blk)
|
156
|
+
dist = {}
|
157
|
+
while @_current_entry = @_log_parser.next_entry
|
158
|
+
val = instance_eval(&blk)
|
159
|
+
k = _key_for(val, width_base)
|
160
|
+
if dist.key?(k)
|
161
|
+
dist[k] += 1
|
162
|
+
else
|
163
|
+
dist[k] = 1
|
164
|
+
end
|
165
|
+
end
|
143
166
|
|
144
|
-
#
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
val = instance_eval(&blk)
|
150
|
-
k = _key_for(val, width_base)
|
151
|
-
if dist.key?(k)
|
152
|
-
dist[k] += 1
|
153
|
-
else
|
154
|
-
dist[k] = 1
|
167
|
+
# Backfill keys for which we didn't find a value
|
168
|
+
k = dist.keys.min
|
169
|
+
max_key = dist.keys.max
|
170
|
+
while k *= width_base and k < max_key
|
171
|
+
dist[k] = 0 unless dist.key?(k)
|
155
172
|
end
|
156
|
-
end
|
157
173
|
|
158
|
-
|
159
|
-
k = dist.keys.min
|
160
|
-
max_key = dist.keys.max
|
161
|
-
while k *= width_base and k < max_key
|
162
|
-
dist[k] = 0 unless dist.key?(k)
|
174
|
+
dist
|
163
175
|
end
|
164
176
|
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
exp = (Math.log(val) / Math.log(width_base)).to_i
|
172
|
-
width_base ** exp
|
177
|
+
# Determines the key for the distribution hash given the value and logarithmic base for
|
178
|
+
# the bucket width
|
179
|
+
def _key_for(val, width_base)
|
180
|
+
exp = (Math.log(val) / Math.log(width_base)).to_i
|
181
|
+
width_base ** exp
|
182
|
+
end
|
173
183
|
end
|
174
|
-
end
|
175
184
|
|
176
185
|
|
177
|
-
# DSL routine that determines a confidence interval for the values to which the block evaluates
|
178
|
-
#
|
179
|
-
# For example,
|
180
|
-
#
|
181
|
-
# confidence_interval 95 do
|
182
|
-
# time_to_serve
|
183
|
-
# end
|
184
|
-
#
|
185
|
-
# would return two numbers, the lower and upper bound of a 95% confidence interval for the values
|
186
|
-
# of time_to_serve.
|
187
|
-
class ConfidenceInterval < ProcedureRoutine
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
186
|
+
# DSL routine that determines a confidence interval for the values to which the block evaluates
|
187
|
+
#
|
188
|
+
# For example,
|
189
|
+
#
|
190
|
+
# confidence_interval 95 do
|
191
|
+
# time_to_serve
|
192
|
+
# end
|
193
|
+
#
|
194
|
+
# would return two numbers, the lower and upper bound of a 95% confidence interval for the values
|
195
|
+
# of time_to_serve.
|
196
|
+
class ConfidenceInterval < ProcedureRoutine
|
197
|
+
def execute(confidence, &blk)
|
198
|
+
# Build a list of all the values found
|
199
|
+
values = []
|
200
|
+
while @_current_entry = @_log_parser.next_entry
|
201
|
+
values << instance_eval(&blk)
|
202
|
+
end
|
203
|
+
values.sort!
|
195
204
|
|
196
|
-
|
197
|
-
|
205
|
+
# Determine how many values are outside the bounds of the CI
|
206
|
+
count_outside = (values.length * (1.0 - confidence/100.0)).to_i
|
198
207
|
|
199
|
-
|
200
|
-
|
208
|
+
# Find the bounds of the confidence interval
|
209
|
+
return values[count_outside / 2], values[-count_outside / 2]
|
210
|
+
end
|
201
211
|
end
|
202
|
-
end
|
203
|
-
|
204
212
|
|
205
|
-
# DSL routine that finds the most common n values for the given block.
|
206
|
-
#
|
207
|
-
# Returns a list of lists, each of which is [value, count]. This list is sorted by count.
|
208
|
-
class MostCommon < ProcedureRoutine
|
209
|
-
def execute(n, &blk)
|
210
|
-
counts = CountBy.new(@_log_parser).execute(&blk)
|
211
213
|
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
214
|
+
# DSL routine that finds the most common n values for the given block.
|
215
|
+
#
|
216
|
+
# Returns a list of lists, each of which is [value, count]. This list is sorted by count.
|
217
|
+
class MostCommon < ProcedureRoutine
|
218
|
+
def execute(n, &blk)
|
219
|
+
counts = CountBy.new(@_log_parser).execute(&blk)
|
220
|
+
|
221
|
+
# Sort the block values descending
|
222
|
+
sorted_vals = counts.keys.sort do |val_a,val_b|
|
223
|
+
- (counts[val_a] <=> counts[val_b])
|
224
|
+
end
|
216
225
|
|
217
|
-
|
218
|
-
|
226
|
+
sorted_vals[0..n].map do |val|
|
227
|
+
[val, counts[val]]
|
228
|
+
end
|
219
229
|
end
|
220
230
|
end
|
221
|
-
end
|
222
231
|
|
223
232
|
|
224
|
-
# The environment in which a procedure file is evaluated.
|
225
|
-
#
|
226
|
-
# A procedure file is some ruby code that uses our DSL.
|
227
|
-
class ProcedureEnvironment
|
228
|
-
|
229
|
-
|
230
|
-
|
233
|
+
# The environment in which a procedure file is evaluated.
|
234
|
+
#
|
235
|
+
# A procedure file is some ruby code that uses our DSL.
|
236
|
+
class ProcedureEnvironment
|
237
|
+
def initialize(log_parser)
|
238
|
+
@_log_parser = log_parser
|
239
|
+
end
|
231
240
|
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
|
241
|
+
# Evaluates the given string as a procedure in our DSL
|
242
|
+
def eval_procedure(proc_string)
|
243
|
+
eval proc_string
|
244
|
+
end
|
236
245
|
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
|
242
|
-
|
243
|
-
|
246
|
+
# DSL routine 'count_where'
|
247
|
+
def count_where(&blk)
|
248
|
+
routine = CountWhere.new(@_log_parser)
|
249
|
+
rv = routine.execute(&blk)
|
250
|
+
routine.finish
|
251
|
+
rv
|
252
|
+
end
|
244
253
|
|
245
|
-
|
246
|
-
|
247
|
-
|
248
|
-
|
249
|
-
|
250
|
-
|
251
|
-
|
254
|
+
# DSL routine 'filter!'
|
255
|
+
def filter!(&blk)
|
256
|
+
routine = Filter.new(@_log_parser)
|
257
|
+
routine.execute(nil, true, &blk)
|
258
|
+
routine.finish
|
259
|
+
nil
|
260
|
+
end
|
252
261
|
|
253
|
-
|
254
|
-
|
255
|
-
|
256
|
-
|
257
|
-
|
258
|
-
|
259
|
-
|
262
|
+
# DSL routine 'filter'
|
263
|
+
def filter(target_path=nil, &blk)
|
264
|
+
routine = Filter.new(@_log_parser)
|
265
|
+
routine.execute(target_path, &blk)
|
266
|
+
routine.finish
|
267
|
+
nil
|
268
|
+
end
|
260
269
|
|
261
|
-
|
262
|
-
|
263
|
-
|
264
|
-
|
265
|
-
|
266
|
-
|
267
|
-
|
270
|
+
# DSL routine 'each'
|
271
|
+
def each(&blk)
|
272
|
+
routine = Each.new(@_log_parser)
|
273
|
+
routine.execute(&blk)
|
274
|
+
routine.finish
|
275
|
+
nil
|
276
|
+
end
|
268
277
|
|
269
|
-
|
270
|
-
|
271
|
-
|
272
|
-
|
273
|
-
|
274
|
-
|
275
|
-
|
278
|
+
# DSL routine 'count_by'
|
279
|
+
def count_by(&blk)
|
280
|
+
routine = CountBy.new(@_log_parser)
|
281
|
+
rv = routine.execute(&blk)
|
282
|
+
routine.finish
|
283
|
+
rv
|
284
|
+
end
|
276
285
|
|
277
|
-
|
278
|
-
|
279
|
-
|
280
|
-
|
281
|
-
|
282
|
-
|
283
|
-
|
286
|
+
# DSL routine 'distribution'
|
287
|
+
def distribution(bucket_width, &blk)
|
288
|
+
routine = Distribution.new(@_log_parser)
|
289
|
+
rv = routine.execute(bucket_width, &blk)
|
290
|
+
routine.finish
|
291
|
+
rv
|
292
|
+
end
|
284
293
|
|
285
|
-
|
286
|
-
|
287
|
-
|
288
|
-
|
289
|
-
|
290
|
-
|
291
|
-
|
294
|
+
# DSL routine 'log_distribution'
|
295
|
+
def log_distribution(width_base, &blk)
|
296
|
+
routine = LogDistribution.new(@_log_parser)
|
297
|
+
rv = routine.execute(width_base, &blk)
|
298
|
+
routine.finish
|
299
|
+
rv
|
300
|
+
end
|
292
301
|
|
293
|
-
|
294
|
-
|
295
|
-
|
296
|
-
|
297
|
-
|
298
|
-
|
299
|
-
|
302
|
+
# DSL routine 'confidence_interval'
|
303
|
+
def confidence_interval(confidence, &blk)
|
304
|
+
routine = ConfidenceInterval.new(@_log_parser)
|
305
|
+
rv = routine.execute(confidence, &blk)
|
306
|
+
routine.finish
|
307
|
+
rv
|
308
|
+
end
|
300
309
|
|
301
|
-
|
302
|
-
|
303
|
-
|
304
|
-
|
305
|
-
|
306
|
-
|
310
|
+
# DSL routine 'most_common'
|
311
|
+
def most_common(n, &blk)
|
312
|
+
routine = MostCommon.new(@_log_parser)
|
313
|
+
rv = routine.execute(n, &blk)
|
314
|
+
routine.finish
|
315
|
+
rv
|
316
|
+
end
|
307
317
|
end
|
308
318
|
end
|