apachecrunch 0.3 → 0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/bin/apachecrunch +5 -5
- data/lib/apachecrunch.rb +12 -310
- data/lib/config.rb +44 -0
- data/lib/entry.rb +90 -0
- data/lib/format.rb +128 -0
- data/lib/log_parser.rb +74 -0
- data/lib/progress.rb +1 -1
- data/test/runner.rb +16 -0
- data/test/stub.rb +56 -0
- data/test/test_entry.rb +28 -0
- data/test/test_format.rb +63 -0
- data/test/test_format_parser.rb +26 -0
- metadata +13 -4
data/bin/apachecrunch
CHANGED
@@ -60,11 +60,11 @@ end
|
|
60
60
|
|
61
61
|
options = parse_args
|
62
62
|
|
63
|
-
|
63
|
+
format_def = ApacheCrunch::FormatDefinitionFinder.new.find(options[:format])
|
64
64
|
progress_meter = ProgressMeterFactory.from_options(options)
|
65
|
-
log_parser = LogParserFactory.log_parser(
|
66
|
-
|
67
|
-
|
68
|
-
progress_meter
|
65
|
+
log_parser = ApacheCrunch::LogParserFactory.log_parser(
|
66
|
+
format_def,
|
67
|
+
options[:logfile],
|
68
|
+
progress_meter)
|
69
69
|
proc_env = ProcedureEnvironment.new(log_parser)
|
70
70
|
proc_env.eval_procedure(open(options[:procedure]).read())
|
data/lib/apachecrunch.rb
CHANGED
@@ -1,320 +1,22 @@
|
|
1
1
|
require "date"
|
2
2
|
require "tempfile"
|
3
3
|
|
4
|
+
require 'config'
|
5
|
+
require 'entry'
|
6
|
+
require 'format'
|
7
|
+
require 'log_parser'
|
4
8
|
require 'log_element'
|
5
9
|
|
6
|
-
|
7
|
-
# A
|
8
|
-
#
|
9
|
-
# Acts like a hash, in that you get at the log elements (e.g. "url_path", "remote_host") by
|
10
|
-
# as entry[name].
|
11
|
-
class LogEntry
|
12
|
-
def initialize(derivation_map)
|
13
|
-
@_derivation_map = derivation_map
|
14
|
-
@_attributes = {}
|
15
|
-
end
|
16
|
-
|
17
|
-
def []=(name, value)
|
18
|
-
@_attributes[name] = value
|
19
|
-
end
|
20
|
-
|
21
|
-
def [](name)
|
22
|
-
return @_attributes[name] if @_attributes.key?(name)
|
23
|
-
|
24
|
-
derived_from_cls = @_derivation_map[name]
|
25
|
-
return nil if derived_from_cls.nil?
|
26
|
-
|
27
|
-
derived_from_cls.derive(name, @_attributes[derived_from_cls.name])
|
28
|
-
end
|
29
|
-
|
30
|
-
def merge!(hsh)
|
31
|
-
@_attributes.merge!(hsh)
|
32
|
-
end
|
33
|
-
end
|
34
|
-
|
35
|
-
|
36
|
-
# A bare string in a log format
|
37
|
-
#
|
38
|
-
# Exposes 'regex' for consistency with LogFormatElement, but there shouldn't be anything other
|
39
|
-
# than one-to-one character matching in there.
|
40
|
-
class LogFormatString
|
41
|
-
attr_accessor :regex
|
42
|
-
|
43
|
-
def initialize(regex)
|
44
|
-
@regex = regex
|
45
|
-
end
|
46
|
-
end
|
47
|
-
|
48
|
-
|
49
|
-
# Represents a particular Apache log format
|
50
|
-
class LogFormat
|
51
|
-
attr_accessor :format_string, :tokens
|
52
|
-
|
53
|
-
def initialize
|
54
|
-
@tokens = []
|
55
|
-
@_regex = nil
|
56
|
-
end
|
57
|
-
|
58
|
-
# Appends a given token (a LogFormatElement or LogFormatString) to the tokens list
|
59
|
-
def append(token)
|
60
|
-
@tokens << token
|
61
|
-
end
|
62
|
-
|
63
|
-
# Returns a compiled regex to match a log line in this format
|
64
|
-
def regex
|
65
|
-
return @_regex unless @_regex.nil?
|
66
|
-
|
67
|
-
r = "^"
|
68
|
-
@tokens.each do |tok|
|
69
|
-
# We only care to remember the LogFormatElements. No need to put parentheses
|
70
|
-
# around LogFormatString shit.
|
71
|
-
if tok.respond_to?(:name)
|
72
|
-
r += "(" + tok.regex + ")"
|
73
|
-
else
|
74
|
-
r += tok.regex
|
75
|
-
end
|
76
|
-
end
|
77
|
-
r += "$"
|
78
|
-
|
79
|
-
@_regex = Regexp.compile(r)
|
80
|
-
@_regex
|
81
|
-
end
|
82
|
-
|
83
|
-
# Returns the list of LogFormatElements, in order, of the interpolated things in the format.
|
10
|
+
class ApacheCrunch
|
11
|
+
# A bare string in a log format
|
84
12
|
#
|
85
|
-
#
|
86
|
-
#
|
87
|
-
|
88
|
-
|
89
|
-
tok.respond_to?(:name)
|
90
|
-
end
|
91
|
-
end
|
92
|
-
|
93
|
-
# Returns hash mapping names of elements to the element class from which they can be derived.
|
94
|
-
def derivation_map
|
95
|
-
hsh = {}
|
96
|
-
elements.each do |tok|
|
97
|
-
tok.derived_elements.each do |derived_element|
|
98
|
-
hsh[derived_element.name] = tok.class
|
99
|
-
end
|
100
|
-
end
|
101
|
-
|
102
|
-
hsh
|
103
|
-
end
|
104
|
-
end
|
105
|
-
|
106
|
-
|
107
|
-
# Turns a string specifying an Apache log format into a LogFormat instance
|
108
|
-
class LogFormatFactory
|
109
|
-
def initialize
|
110
|
-
@element_factory = LogFormatElementFactory.new
|
111
|
-
end
|
112
|
-
|
113
|
-
# Constructs and returns a LogFormat instance based on the given Apache log format string
|
114
|
-
def from_format_string(f_string)
|
115
|
-
logformat = LogFormat.new
|
116
|
-
logformat.format_string = f_string
|
13
|
+
# Exposes 'regex' for consistency with LogFormatElement, but there shouldn't be anything other
|
14
|
+
# than one-to-one character matching in there.
|
15
|
+
class LogFormatString
|
16
|
+
attr_accessor :regex
|
117
17
|
|
118
|
-
|
119
|
-
|
120
|
-
logformat.append(token)
|
18
|
+
def initialize(regex)
|
19
|
+
@regex = regex
|
121
20
|
end
|
122
|
-
|
123
|
-
logformat
|
124
|
-
end
|
125
|
-
|
126
|
-
# Finds the first token (a LogFormatElement or LogFormatString) in a format string
|
127
|
-
#
|
128
|
-
# Returns a list containing the token and the new format string (with the characters that
|
129
|
-
# correspond to the token removed)
|
130
|
-
def _shift_token(f_string)
|
131
|
-
if f_string =~ /^%%(.*)/
|
132
|
-
# Literal "%"
|
133
|
-
return [LogFormatString.new("%%"), $1]
|
134
|
-
elsif f_string =~ /^(%[A-Za-z])(.*)/
|
135
|
-
# Simple element (e.g. "%h", "%u")
|
136
|
-
return [@element_factory.from_abbrev($1), $2]
|
137
|
-
elsif f_string =~ /^%[<>]([A-Za-z])(.*)/
|
138
|
-
# No idea how to handle mod_log_config's "which request" system yet, so we
|
139
|
-
# ignore it.
|
140
|
-
return [@element_factory.from_abbrev("%" + $1), $2]
|
141
|
-
elsif f_string =~ /^(%\{.+?\}[Ceinor])(.*)/
|
142
|
-
# "Contents of" element (e.g. "%{Accept}i")
|
143
|
-
return [@element_factory.from_abbrev($1), $2]
|
144
|
-
elsif f_string =~ /^(.+?)(%.*|$)/
|
145
|
-
# Bare string up until the next %, or up until the end of the format string
|
146
|
-
return [LogFormatString.new($1), $2]
|
147
|
-
end
|
148
|
-
end
|
149
|
-
end
|
150
|
-
|
151
|
-
|
152
|
-
# Makes log line hashes based on log file text
|
153
|
-
class LogLineParser
|
154
|
-
# Initializes the instance given a LogFormat instance
|
155
|
-
def initialize(log_format, progress_meter)
|
156
|
-
@log_format = log_format
|
157
|
-
@progress_meter = progress_meter
|
158
|
-
|
159
|
-
@_elements = log_format.elements
|
160
|
-
@_derivation_map = log_format.derivation_map
|
161
|
-
end
|
162
|
-
|
163
|
-
# Returns a log line hash built from a line of text, or nil if the line was malformatted
|
164
|
-
#
|
165
|
-
# The keys of the hash are names of LogFormatElements (e.g. "remote_host", "reqheader_referer")
|
166
|
-
def from_text(log_text)
|
167
|
-
match = (log_text =~ @log_format.regex)
|
168
|
-
if match.nil?
|
169
|
-
warn "Log line did not match expected format: #{log_text}"
|
170
|
-
return nil
|
171
|
-
end
|
172
|
-
|
173
|
-
# Make a hash mapping all parsed elements to their values in the entry
|
174
|
-
match_groups = Regexp.last_match.to_a
|
175
|
-
match_groups.shift # First value is the whole matched string, which we do not want
|
176
|
-
element_values = Hash[*@_elements.zip(match_groups).flatten]
|
177
|
-
|
178
|
-
# Start building the return value
|
179
|
-
entry = LogEntry.new(@_derivation_map)
|
180
|
-
entry[:text] = log_text
|
181
|
-
# Insert all the elements specified in the LogFormat
|
182
|
-
entry.merge!(_elements_to_hash(element_values))
|
183
|
-
|
184
|
-
@progress_meter.output_progress(entry)
|
185
|
-
entry
|
186
|
-
end
|
187
|
-
|
188
|
-
# Returns a hash of "element name" => value pairs based on a hash of element => value pairs.
|
189
|
-
def _elements_to_hash(element_values)
|
190
|
-
hsh = {}
|
191
|
-
element_values.each_pair do |element, value|
|
192
|
-
hsh[element.name] = value
|
193
|
-
end
|
194
|
-
|
195
|
-
hsh
|
196
|
-
end
|
197
|
-
|
198
|
-
# Returns hash of derived "element name" => value pairs from a hash of element => value pairs.
|
199
|
-
#
|
200
|
-
# That is, we go through the elements passed and if any offers derived elements, we include
|
201
|
-
# those in the return value.
|
202
|
-
def _derived_elements(element_values)
|
203
|
-
hsh = {}
|
204
|
-
element_values.each_pair do |element, value|
|
205
|
-
hsh.merge!(element.derived_values(value))
|
206
|
-
end
|
207
|
-
|
208
|
-
hsh
|
209
|
-
end
|
210
|
-
end
|
211
|
-
|
212
|
-
|
213
|
-
# Parses a log file given a path and a LogFormat instance
|
214
|
-
class LogParser
|
215
|
-
# Initializes the parser with the path to a log file and a LogLineParser.
|
216
|
-
def initialize(path, ll_parser)
|
217
|
-
@path = path
|
218
|
-
@ll_parser = ll_parser
|
219
|
-
|
220
|
-
@_file = nil
|
221
|
-
end
|
222
|
-
|
223
|
-
# Returns the next entry in the log file as a hash, or nil if we've reached EOF.
|
224
|
-
#
|
225
|
-
# The keys of the hash are names of LogFormatElements (e.g. "remote_host", "reqheader_referer")
|
226
|
-
def next_entry
|
227
|
-
@_file = open(@path) if @_file.nil?
|
228
|
-
|
229
|
-
while line_text = @_file.gets
|
230
|
-
return nil if line_text.nil?
|
231
|
-
logline = @ll_parser.from_text(line_text)
|
232
|
-
|
233
|
-
# The LogLineFactory returns nil and writes a warning if the line text doesn't
|
234
|
-
# match our expected format.
|
235
|
-
next if logline.nil?
|
236
|
-
|
237
|
-
return logline
|
238
|
-
end
|
239
|
-
end
|
240
|
-
|
241
|
-
# Resets the LogParser's filehandle so we can start over.
|
242
|
-
def reset
|
243
|
-
@_file = nil
|
244
|
-
end
|
245
|
-
|
246
|
-
# Makes the LogParser close its current log file and start parsing a new one instead
|
247
|
-
#
|
248
|
-
# `new_target` is a writable file object that the parser should start parsing, and if
|
249
|
-
# in_place is true, we actually replace the contents of the current target with those
|
250
|
-
# of the new target.
|
251
|
-
def replace_target(new_target, in_place)
|
252
|
-
new_target.close
|
253
|
-
|
254
|
-
if in_place
|
255
|
-
old_path = @_file.path
|
256
|
-
File.rename(new_target.path, old_path)
|
257
|
-
else
|
258
|
-
@path = new_target.path
|
259
|
-
end
|
260
|
-
|
261
|
-
@_file = nil
|
262
|
-
end
|
263
|
-
end
|
264
|
-
|
265
|
-
# Makes a LogParser given the parameters we want to work with.
|
266
|
-
#
|
267
|
-
# This is the class that most external code should instatiate to begin using this library.
|
268
|
-
class LogParserFactory
|
269
|
-
# Returns a new LogParser instance for the given log file, which should have the given Apache
|
270
|
-
# log format.
|
271
|
-
def self.log_parser(format_string, path, progress_meter)
|
272
|
-
# First we generate a LogFormat instance based on the format string we were given
|
273
|
-
format_factory = LogFormatFactory.new
|
274
|
-
log_format = format_factory.from_format_string(format_string)
|
275
|
-
|
276
|
-
# Now we generate a line parser
|
277
|
-
log_line_parser = LogLineParser.new(log_format, progress_meter)
|
278
|
-
|
279
|
-
# And now we can instantiate and return a LogParser
|
280
|
-
return LogParser.new(path, log_line_parser)
|
281
|
-
end
|
282
|
-
end
|
283
|
-
|
284
|
-
|
285
|
-
# Finds a named log format string in the configuration file(s)
|
286
|
-
class FormatStringFinder
|
287
|
-
@@FILE_NAME = "log_formats.rb"
|
288
|
-
@@DEFAULT_FORMATS = {
|
289
|
-
:ncsa => %q!%h %l %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-agent}i\"!,
|
290
|
-
:ubuntu => %q!%h %l %u %t \"%r\" %>s %O \"%{Referer}i\" \"%{User-Agent}i\"!
|
291
|
-
}
|
292
|
-
|
293
|
-
# Finds the given format string in the configuration file(s)
|
294
|
-
#
|
295
|
-
# If none exists, returns nil.
|
296
|
-
def find(format_name)
|
297
|
-
name_as_symbol = format_name.to_sym
|
298
|
-
|
299
|
-
formats = @@DEFAULT_FORMATS.clone
|
300
|
-
_search_path.each do |dir|
|
301
|
-
config_path = File.join(dir, @@FILE_NAME)
|
302
|
-
if File.readable?(config_path)
|
303
|
-
config_file = open(File.join(dir, @@FILE_NAME))
|
304
|
-
eval config_file.read
|
305
|
-
end
|
306
|
-
|
307
|
-
if formats.key?(format_name.to_sym)
|
308
|
-
return formats[format_name.to_sym].gsub(/\\"/, '"')
|
309
|
-
end
|
310
|
-
end
|
311
|
-
|
312
|
-
raise "Failed to find the format '#{format_name}' in the search path: #{_search_path.inspect}"
|
313
|
-
end
|
314
|
-
|
315
|
-
def _search_path
|
316
|
-
[".", "./etc",
|
317
|
-
File.join(ENV["HOME"], ".apachecrunch"),
|
318
|
-
"/etc/apachecrunch"]
|
319
21
|
end
|
320
22
|
end
|
data/lib/config.rb
ADDED
@@ -0,0 +1,44 @@
|
|
1
|
+
class ApacheCrunch
|
2
|
+
# Finds a named log format string in the configuration file(s)
|
3
|
+
class FormatDefinitionFinder
|
4
|
+
@@FILE_NAME = "log_formats.rb"
|
5
|
+
@@DEFAULT_FORMATS = {
|
6
|
+
:ncsa => %q!%h %l %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-agent}i\"!,
|
7
|
+
:ubuntu => %q!%h %l %u %t \"%r\" %>s %O \"%{Referer}i\" \"%{User-Agent}i\"!
|
8
|
+
}
|
9
|
+
|
10
|
+
# Initializes the FormatStringFinder.
|
11
|
+
def initialize(file_cls=File, env=ENV)
|
12
|
+
@_file_cls=file_cls
|
13
|
+
@_env=env
|
14
|
+
end
|
15
|
+
|
16
|
+
# Finds the given format string in the configuration file(s)
|
17
|
+
#
|
18
|
+
# If none exists, returns nil.
|
19
|
+
def find(format_name)
|
20
|
+
name_as_symbol = format_name.to_sym
|
21
|
+
|
22
|
+
formats = @@DEFAULT_FORMATS.clone
|
23
|
+
_search_path.each do |dir|
|
24
|
+
config_path = @_file_cls.join(dir, @@FILE_NAME)
|
25
|
+
if @_file_cls.readable?(config_path)
|
26
|
+
config_file = @_file_cls.open(@_file_cls.join(dir, @@FILE_NAME))
|
27
|
+
eval config_file.read
|
28
|
+
end
|
29
|
+
|
30
|
+
if formats.key?(format_name.to_sym)
|
31
|
+
return formats[format_name.to_sym].gsub(/\\"/, '"')
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
raise "Failed to find the format '#{format_name}' in the search path: #{_search_path.inspect}"
|
36
|
+
end
|
37
|
+
|
38
|
+
def _search_path
|
39
|
+
[".", "./etc",
|
40
|
+
@_file_cls.join(@_env["HOME"], ".apachecrunch"),
|
41
|
+
"/etc/apachecrunch"]
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
data/lib/entry.rb
ADDED
@@ -0,0 +1,90 @@
|
|
1
|
+
class ApacheCrunch
|
2
|
+
# A parsed entry from the log.
|
3
|
+
#
|
4
|
+
# Acts like a hash, in that you get at the log elements (e.g. "url_path", "remote_host") by
|
5
|
+
# as entry[name].
|
6
|
+
class Entry
|
7
|
+
def initialize(derivation_map)
|
8
|
+
@_derivation_map = derivation_map
|
9
|
+
@_attributes = {}
|
10
|
+
end
|
11
|
+
|
12
|
+
def []=(name, value)
|
13
|
+
@_attributes[name] = value
|
14
|
+
end
|
15
|
+
|
16
|
+
def [](name)
|
17
|
+
return @_attributes[name] if @_attributes.key?(name)
|
18
|
+
|
19
|
+
derived_from_cls = @_derivation_map[name]
|
20
|
+
return nil if derived_from_cls.nil?
|
21
|
+
|
22
|
+
derived_from_cls.derive(name, @_attributes[derived_from_cls.name])
|
23
|
+
end
|
24
|
+
|
25
|
+
def merge!(hsh)
|
26
|
+
@_attributes.merge!(hsh)
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
|
31
|
+
# Makes Entry instances based on log file text
|
32
|
+
class EntryParser
|
33
|
+
# Initializes the instance given a LogFormat instance
|
34
|
+
def initialize(log_format, progress_meter)
|
35
|
+
@log_format = log_format
|
36
|
+
@progress_meter = progress_meter
|
37
|
+
|
38
|
+
@_elements = log_format.elements
|
39
|
+
@_derivation_map = log_format.derivation_map
|
40
|
+
end
|
41
|
+
|
42
|
+
# Returns a log line hash built from a line of text, or nil if the line was malformatted
|
43
|
+
#
|
44
|
+
# The keys of the hash are names of LogFormatElements (e.g. "remote_host", "reqheader_referer")
|
45
|
+
def from_text(log_text)
|
46
|
+
match = (log_text =~ @log_format.regex)
|
47
|
+
if match.nil?
|
48
|
+
warn "Log line did not match expected format: #{log_text}"
|
49
|
+
return nil
|
50
|
+
end
|
51
|
+
|
52
|
+
# Make a hash mapping all parsed elements to their values in the entry
|
53
|
+
match_groups = Regexp.last_match.to_a
|
54
|
+
match_groups.shift # First value is the whole matched string, which we do not want
|
55
|
+
element_values = Hash[*@_elements.zip(match_groups).flatten]
|
56
|
+
|
57
|
+
# Start building the return value
|
58
|
+
entry = Entry.new(@_derivation_map)
|
59
|
+
entry[:text] = log_text
|
60
|
+
# Insert all the elements specified in the LogFormat
|
61
|
+
entry.merge!(_elements_to_hash(element_values))
|
62
|
+
|
63
|
+
@progress_meter.output_progress(entry)
|
64
|
+
entry
|
65
|
+
end
|
66
|
+
|
67
|
+
# Returns a hash of "element name" => value pairs based on a hash of element => value pairs.
|
68
|
+
def _elements_to_hash(element_values)
|
69
|
+
hsh = {}
|
70
|
+
element_values.each_pair do |element, value|
|
71
|
+
hsh[element.name] = value
|
72
|
+
end
|
73
|
+
|
74
|
+
hsh
|
75
|
+
end
|
76
|
+
|
77
|
+
# Returns hash of derived "element name" => value pairs from a hash of element => value pairs.
|
78
|
+
#
|
79
|
+
# That is, we go through the elements passed and if any offers derived elements, we include
|
80
|
+
# those in the return value.
|
81
|
+
def _derived_elements(element_values)
|
82
|
+
hsh = {}
|
83
|
+
element_values.each_pair do |element, value|
|
84
|
+
hsh.merge!(element.derived_values(value))
|
85
|
+
end
|
86
|
+
|
87
|
+
hsh
|
88
|
+
end
|
89
|
+
end
|
90
|
+
end
|
data/lib/format.rb
ADDED
@@ -0,0 +1,128 @@
|
|
1
|
+
class ApacheCrunch
|
2
|
+
# Represents a particular Apache log format
|
3
|
+
class Format
|
4
|
+
attr_accessor :format_def, :tokens
|
5
|
+
|
6
|
+
def initialize
|
7
|
+
@tokens = []
|
8
|
+
@_regex = nil
|
9
|
+
end
|
10
|
+
|
11
|
+
# Appends a given token (a LogFormatElement or LogFormatString) to the tokens list
|
12
|
+
def append(token)
|
13
|
+
@tokens << token
|
14
|
+
end
|
15
|
+
|
16
|
+
# Returns a compiled regex to match a log line in this format
|
17
|
+
#
|
18
|
+
# Each group matched will correspond to an element in the log format.
|
19
|
+
def regex
|
20
|
+
return @_regex unless @_regex.nil?
|
21
|
+
|
22
|
+
r = "^"
|
23
|
+
@tokens.each do |tok|
|
24
|
+
# We only care to remember the LogFormatElements. No need to put parentheses
|
25
|
+
# around LogFormatString shit.
|
26
|
+
if tok.respond_to?(:name)
|
27
|
+
r += "(" + tok.regex + ")"
|
28
|
+
else
|
29
|
+
r += tok.regex
|
30
|
+
end
|
31
|
+
end
|
32
|
+
r += "$"
|
33
|
+
|
34
|
+
@_regex = Regexp.compile(r)
|
35
|
+
@_regex
|
36
|
+
end
|
37
|
+
|
38
|
+
# Returns the list of LogFormatElements, in order, of the interpolated things in the format.
|
39
|
+
#
|
40
|
+
# For example, if the log format definition were "%h %u %{Referer}i", this would return the
|
41
|
+
# LogFormatElement instances for "%h", "%u", and "%{Referer}i".
|
42
|
+
def elements
|
43
|
+
@tokens.find_all do |tok|
|
44
|
+
tok.respond_to?(:name)
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
# Returns hash mapping names of elements to the element class from which they can be derived.
|
49
|
+
def derivation_map
|
50
|
+
hsh = {}
|
51
|
+
elements.each do |tok|
|
52
|
+
tok.derived_elements.each do |derived_element|
|
53
|
+
hsh[derived_element.name] = tok.class
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
hsh
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
# Parses a log format definition
|
62
|
+
class FormatParser
|
63
|
+
# Initializes the FormatParser
|
64
|
+
#
|
65
|
+
# Takes a FormatElementFactory instance, and you can inject a replacement for the
|
66
|
+
# LogFormatString class.
|
67
|
+
def initialize(format_element_factory, format_string_cls=LogFormatString)
|
68
|
+
@_element_factory = format_element_factory
|
69
|
+
@_format_string_cls = format_string_cls
|
70
|
+
end
|
71
|
+
|
72
|
+
# Parses the given format_def (e.g. "%h %u %s #{Referer}i") and returns a list of tokens.
|
73
|
+
#
|
74
|
+
# These tokens are all instances of LogFormatString or LogFormatElement.
|
75
|
+
def parse_def(format_def)
|
76
|
+
s = format_def
|
77
|
+
tokens = []
|
78
|
+
|
79
|
+
until s.empty?
|
80
|
+
token, s = _shift_token(s)
|
81
|
+
tokens << token
|
82
|
+
end
|
83
|
+
|
84
|
+
tokens
|
85
|
+
end
|
86
|
+
|
87
|
+
# Finds the first token (a LogFormatElement or LogFormatString) in a format definition
|
88
|
+
#
|
89
|
+
# Returns a list containing the token and the new format definition (with the characters
|
90
|
+
# that correspond to the token removed)
|
91
|
+
def _shift_token(format_def)
|
92
|
+
if format_def =~ /^%%(.*)/
|
93
|
+
# Literal "%"
|
94
|
+
return [@_format_string_cls.new("%%"), $1]
|
95
|
+
elsif format_def =~ /^(%[A-Za-z])(.*)/
|
96
|
+
# Simple element (e.g. "%h", "%u")
|
97
|
+
return [@_element_factory.from_abbrev($1), $2]
|
98
|
+
elsif format_def =~ /^%[<>]([A-Za-z])(.*)/
|
99
|
+
# No idea how to handle mod_log_config's "which request" system yet, so we
|
100
|
+
# ignore it.
|
101
|
+
return [@_element_factory.from_abbrev("%" + $1), $2]
|
102
|
+
elsif format_def =~ /^(%\{.+?\}[Ceinor])(.*)/
|
103
|
+
# "Contents of" element (e.g. "%{Accept}i")
|
104
|
+
return [@_element_factory.from_abbrev($1), $2]
|
105
|
+
elsif format_def =~ /^(.+?)(%.*|$)/
|
106
|
+
# Bare string up until the next %, or up until the end of the format definition
|
107
|
+
return [@_format_string_cls.new($1), $2]
|
108
|
+
end
|
109
|
+
end
|
110
|
+
end
|
111
|
+
|
112
|
+
|
113
|
+
# Turns a string specifying an Apache log format into a Format instance
|
114
|
+
class FormatFactory
|
115
|
+
# Constructs and returns a Format instance based on the given Apache log format string
|
116
|
+
def self.from_format_def(format_def)
|
117
|
+
logformat = Format.new
|
118
|
+
logformat.format_def = format_def
|
119
|
+
|
120
|
+
element_factory = LogFormatElementFactory.new
|
121
|
+
|
122
|
+
format_parser = FormatParser.new(element_factory)
|
123
|
+
logformat.tokens = format_parser.parse_def(format_def)
|
124
|
+
|
125
|
+
logformat
|
126
|
+
end
|
127
|
+
end
|
128
|
+
end
|
data/lib/log_parser.rb
ADDED
@@ -0,0 +1,74 @@
|
|
1
|
+
class ApacheCrunch
|
2
|
+
# Parses a log file given a path and a Format instance
|
3
|
+
class LogParser
|
4
|
+
# Initializes the parser with the path to a log file and a EntryParser.
|
5
|
+
def initialize(path, entry_parser, file_cls=File)
|
6
|
+
@path = path
|
7
|
+
@entry_parser = entry_parser
|
8
|
+
|
9
|
+
@_file_cls = file_cls
|
10
|
+
@_file = nil
|
11
|
+
end
|
12
|
+
|
13
|
+
# Returns the next entry in the log file as a hash, or nil if we've reached EOF.
|
14
|
+
#
|
15
|
+
# The keys of the hash are names of LogFormatElements (e.g. :remote_host,
|
16
|
+
# :reqheader_referer)
|
17
|
+
def next_entry
|
18
|
+
@_file = @_file_cls.open(@path) if @_file.nil?
|
19
|
+
|
20
|
+
while line_text = @_file.gets
|
21
|
+
return nil if line_text.nil?
|
22
|
+
logline = @entry_parser.from_text(line_text)
|
23
|
+
|
24
|
+
# The EntryParser returns nil and writes a warning if the line text doesn't
|
25
|
+
# match our expected format.
|
26
|
+
next if logline.nil?
|
27
|
+
|
28
|
+
return logline
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
# Resets the LogParser's filehandle so we can start over.
|
33
|
+
def reset
|
34
|
+
@_file = nil
|
35
|
+
end
|
36
|
+
|
37
|
+
# Makes the LogParser close its current log file and start parsing a new one instead
|
38
|
+
#
|
39
|
+
# `new_target` is a writable file object that the parser should start parsing, and if
|
40
|
+
# in_place is true, we actually replace the contents of the current target with those
|
41
|
+
# of the new target.
|
42
|
+
def replace_target(new_target, in_place)
|
43
|
+
new_target.close
|
44
|
+
|
45
|
+
if in_place
|
46
|
+
old_path = @_file.path
|
47
|
+
@_file_cls.rename(new_target.path, old_path)
|
48
|
+
else
|
49
|
+
@path = new_target.path
|
50
|
+
end
|
51
|
+
|
52
|
+
@_file = nil
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
|
57
|
+
# Makes a LogParser given the parameters we want to work with.
|
58
|
+
#
|
59
|
+
# This is the class that most external code should instatiate to begin using this library.
|
60
|
+
class LogParserFactory
|
61
|
+
# Returns a new LogParser instance for the given log file, which should have the given
|
62
|
+
# Apache log format.
|
63
|
+
def self.log_parser(format_def, path, progress_meter)
|
64
|
+
# First we generate a Format instance based on the format definition we were given
|
65
|
+
log_format = FormatFactory.from_format_def(format_def)
|
66
|
+
|
67
|
+
# Now we generate a line parser
|
68
|
+
log_line_parser = EntryParser.new(log_format, progress_meter)
|
69
|
+
|
70
|
+
# And now we can instantiate and return a LogParser
|
71
|
+
return LogParser.new(path, log_line_parser)
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|
data/lib/progress.rb
CHANGED
data/test/runner.rb
ADDED
data/test/stub.rb
ADDED
@@ -0,0 +1,56 @@
|
|
1
|
+
class StubFormatElement < LogFormatElement
|
2
|
+
@abbrev = "%z"
|
3
|
+
@name = :stub
|
4
|
+
@regex = %q!.*!
|
5
|
+
end
|
6
|
+
|
7
|
+
class StubAlphanumericFormatElement < LogFormatElement
|
8
|
+
@abbrev = "%Z"
|
9
|
+
@name = :alnum
|
10
|
+
@regex = %q![A-Za-z0-9]+!
|
11
|
+
end
|
12
|
+
|
13
|
+
class StubNumericFormatElement < LogFormatElement
|
14
|
+
@abbrev = "%y"
|
15
|
+
@name = :num
|
16
|
+
@regex = %q!\d+!
|
17
|
+
end
|
18
|
+
|
19
|
+
class StubFormatString
|
20
|
+
attr_accessor :regex
|
21
|
+
def initialize(regex)
|
22
|
+
@regex = regex
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
class StubDerivedElement < LogFormatElement
|
27
|
+
@abbrev = ""
|
28
|
+
@name = :derived
|
29
|
+
@regex = %q!.*!
|
30
|
+
end
|
31
|
+
|
32
|
+
class StubDerivationSourceElement < LogFormatElement
|
33
|
+
@name = :derivation_source
|
34
|
+
|
35
|
+
def derived_elements
|
36
|
+
[StubDerivedElement]
|
37
|
+
end
|
38
|
+
|
39
|
+
def self.derive(name, our_own_value)
|
40
|
+
if name == :derived
|
41
|
+
return "derived from #{our_own_value}"
|
42
|
+
end
|
43
|
+
|
44
|
+
nil
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
class StubLogFormatElementFactory
|
49
|
+
def from_abbrev(abbrev)
|
50
|
+
if abbrev =~ /^%/
|
51
|
+
return StubFormatElement.new
|
52
|
+
else
|
53
|
+
return StubFormatString.new
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
data/test/test_entry.rb
ADDED
@@ -0,0 +1,28 @@
|
|
1
|
+
require 'test/stub'
|
2
|
+
|
3
|
+
class TestEntry < Test::Unit::TestCase
|
4
|
+
def setup
|
5
|
+
@inst = ApacheCrunch::Entry.new({:derived => StubDerivationSourceElement})
|
6
|
+
end
|
7
|
+
|
8
|
+
def teardown
|
9
|
+
@inst = nil
|
10
|
+
end
|
11
|
+
|
12
|
+
# Tests direct assignment of an element.
|
13
|
+
def test_assign
|
14
|
+
@inst[:bar] = "test_value"
|
15
|
+
assert_equal(@inst[:bar], "test_value")
|
16
|
+
end
|
17
|
+
|
18
|
+
# Tests derivation of one element from another.
|
19
|
+
def test_derive
|
20
|
+
@inst[:derivation_source] = "source text"
|
21
|
+
assert_equal(@inst[:derived], "derived from source text")
|
22
|
+
end
|
23
|
+
|
24
|
+
# Tests access to an absent element.
|
25
|
+
def test_access_absent
|
26
|
+
assert_nil(@inst[:nonexistent])
|
27
|
+
end
|
28
|
+
end
|
data/test/test_format.rb
ADDED
@@ -0,0 +1,63 @@
|
|
1
|
+
require 'test/stub'
|
2
|
+
|
3
|
+
class TestFormat < Test::Unit::TestCase
|
4
|
+
def setup
|
5
|
+
@inst = ApacheCrunch::Format.new
|
6
|
+
end
|
7
|
+
|
8
|
+
def teardown
|
9
|
+
@inst = nil
|
10
|
+
end
|
11
|
+
|
12
|
+
# Tests appending an element to the format
|
13
|
+
def test_append
|
14
|
+
format_element = StubFormatElement.new
|
15
|
+
@inst.append(format_element)
|
16
|
+
assert_same(@inst.tokens[-1], format_element)
|
17
|
+
end
|
18
|
+
|
19
|
+
# Tests regex compilation for a simple format
|
20
|
+
def test_regex_simple
|
21
|
+
@inst.append(StubAlphanumericFormatElement.new)
|
22
|
+
"abc123\n" =~ @inst.regex
|
23
|
+
assert_equal($1, "abc123")
|
24
|
+
|
25
|
+
"!abc123\n" =~ @inst.regex
|
26
|
+
assert_nil($1)
|
27
|
+
end
|
28
|
+
|
29
|
+
# Tests regex compilation for a more complex format
|
30
|
+
def test_regex_complex
|
31
|
+
@inst.append(StubNumericFormatElement.new)
|
32
|
+
@inst.append(StubFormatString.new(' \(some stuff\) '))
|
33
|
+
@inst.append(StubAlphanumericFormatElement.new)
|
34
|
+
|
35
|
+
"54321 (some stuff) alphaNumericStuff" =~ @inst.regex
|
36
|
+
assert_equal([Regexp.last_match(1), Regexp.last_match(2)],
|
37
|
+
["54321", "alphaNumericStuff"])
|
38
|
+
|
39
|
+
"54321 (doesn't match) alphaNumericStuff" =~ @inst.regex
|
40
|
+
assert_equal([Regexp.last_match(1), Regexp.last_match(2)],
|
41
|
+
[nil, nil])
|
42
|
+
end
|
43
|
+
|
44
|
+
# Tests the list of matchable elements
|
45
|
+
def test_elements
|
46
|
+
num_element = StubNumericFormatElement.new
|
47
|
+
alnum_element = StubAlphanumericFormatElement.new
|
48
|
+
@inst.append(num_element)
|
49
|
+
@inst.append(StubFormatString.new(' \(some stuff\) '))
|
50
|
+
@inst.append(alnum_element)
|
51
|
+
|
52
|
+
assert_equal(@inst.elements, [num_element, alnum_element])
|
53
|
+
end
|
54
|
+
|
55
|
+
# Tests the derivation map
|
56
|
+
def test_derivation_map
|
57
|
+
@inst.append(StubNumericFormatElement.new)
|
58
|
+
@inst.append(StubFormatString.new(' \(some stuff\) '))
|
59
|
+
@inst.append(StubDerivationSourceElement.new)
|
60
|
+
|
61
|
+
assert(@inst.derivation_map, {:derived => StubDerivationSourceElement})
|
62
|
+
end
|
63
|
+
end
|
@@ -0,0 +1,26 @@
|
|
1
|
+
require 'test/stub'
|
2
|
+
|
3
|
+
class TestFormatParser < Test::Unit::TestCase
|
4
|
+
def setup
|
5
|
+
@inst = ApacheCrunch::FormatParser.new(StubLogFormatElementFactory.new,
|
6
|
+
StubFormatString)
|
7
|
+
end
|
8
|
+
|
9
|
+
def teardown
|
10
|
+
@inst = nil
|
11
|
+
end
|
12
|
+
|
13
|
+
def test_parse_simple
|
14
|
+
tokens = @inst.parse_def("%Z %z")
|
15
|
+
[StubFormatElement, StubFormatString, StubFormatElement].each_with_index do |c,i|
|
16
|
+
assert_instance_of(c, tokens[i])
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
def test_parse_complex
|
21
|
+
tokens = @inst.parse_def("%{Foo-Bar}i %{baz:\d+}r")
|
22
|
+
[StubFormatElement, StubFormatString, StubFormatElement].each_with_index do |c,i|
|
23
|
+
assert_instance_of(c, tokens[i])
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
metadata
CHANGED
@@ -1,12 +1,12 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: apachecrunch
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 3
|
5
5
|
prerelease:
|
6
6
|
segments:
|
7
7
|
- 0
|
8
|
-
-
|
9
|
-
version: "0.
|
8
|
+
- 4
|
9
|
+
version: "0.4"
|
10
10
|
platform: ruby
|
11
11
|
authors:
|
12
12
|
- Dan Slimmon
|
@@ -14,7 +14,7 @@ autorequire:
|
|
14
14
|
bindir: bin
|
15
15
|
cert_chain: []
|
16
16
|
|
17
|
-
date: 2011-07-
|
17
|
+
date: 2011-07-12 00:00:00 -04:00
|
18
18
|
default_executable:
|
19
19
|
dependencies: []
|
20
20
|
|
@@ -31,11 +31,20 @@ extra_rdoc_files: []
|
|
31
31
|
|
32
32
|
files:
|
33
33
|
- lib/apachecrunch.rb
|
34
|
+
- lib/config.rb
|
35
|
+
- lib/entry.rb
|
36
|
+
- lib/format.rb
|
34
37
|
- lib/log_element.rb
|
38
|
+
- lib/log_parser.rb
|
35
39
|
- lib/procedure_dsl.rb
|
36
40
|
- lib/progress.rb
|
37
41
|
- bin/apachecrunch
|
38
42
|
- LICENSE
|
43
|
+
- test/runner.rb
|
44
|
+
- test/stub.rb
|
45
|
+
- test/test_entry.rb
|
46
|
+
- test/test_format.rb
|
47
|
+
- test/test_format_parser.rb
|
39
48
|
has_rdoc: true
|
40
49
|
homepage: https://github.com/danslimmon/apachecrunch/
|
41
50
|
licenses:
|