danielsdeleo-teeth 0.0.2 → 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/LICENSE +11 -0
- data/README.rdoc +107 -10
- data/Rakefile +47 -31
- data/VERSION.yml +4 -0
- data/doc/classes/String.html +182 -0
- data/doc/classes/Teeth/DuplicateDefinitionError.html +113 -0
- data/doc/classes/Teeth/DuplicateRuleError.html +113 -0
- data/doc/classes/Teeth/InvalidDefaultDefinitionName.html +113 -0
- data/doc/classes/Teeth/InvalidExtensionDirectory.html +113 -0
- data/doc/classes/Teeth/RuleStatement.html +291 -0
- data/doc/classes/Teeth/RuleStatementGroup.html +195 -0
- data/doc/classes/Teeth/Scanner.html +535 -0
- data/doc/classes/Teeth/ScannerDefinition.html +253 -0
- data/doc/classes/Teeth/ScannerDefinitionArgumentError.html +113 -0
- data/doc/classes/Teeth/ScannerDefinitionGroup.html +269 -0
- data/doc/classes/Teeth/ScannerError.html +111 -0
- data/doc/classes/Teeth.html +129 -0
- data/doc/created.rid +1 -0
- data/doc/files/README_rdoc.html +314 -0
- data/doc/files/ext/scan_apache_logs/scan_apache_logs_yy_c.html +101 -0
- data/doc/files/ext/scan_rails_logs/scan_rails_logs_yy_c.html +101 -0
- data/doc/files/lib/rule_statement_rb.html +101 -0
- data/doc/files/lib/scanner_definition_rb.html +101 -0
- data/doc/files/lib/scanner_rb.html +108 -0
- data/doc/files/lib/teeth_rb.html +111 -0
- data/doc/fr_class_index.html +39 -0
- data/doc/fr_file_index.html +33 -0
- data/doc/fr_method_index.html +60 -0
- data/doc/index.html +24 -0
- data/doc/rdoc-style.css +208 -0
- data/ext/scan_apache_logs/Makefile +158 -0
- data/ext/scan_apache_logs/extconf.rb +3 -0
- data/ext/scan_apache_logs/scan_apache_logs.yy +267 -0
- data/ext/scan_apache_logs/scan_apache_logs.yy.c +8355 -0
- data/ext/scan_rails_logs/Makefile +158 -0
- data/ext/scan_rails_logs/extconf.rb +3 -0
- data/ext/scan_rails_logs/scan_rails_logs.yy +376 -0
- data/ext/scan_rails_logs/scan_rails_logs.yy.c +11127 -0
- data/lib/rule_statement.rb +61 -0
- data/lib/scanner.rb +98 -0
- data/lib/scanner_definition.rb +116 -0
- data/lib/teeth.rb +5 -1
- data/scanners/scan_apache_logs.rb +27 -0
- data/scanners/scan_rails_logs.rb +70 -0
- data/spec/fixtures/rails_1x.log +59 -0
- data/spec/fixtures/rails_22.log +12 -0
- data/spec/fixtures/rails_22_cached.log +10 -0
- data/spec/fixtures/rails_unordered.log +24 -0
- data/spec/playground/show_apache_processing.rb +13 -0
- data/spec/spec_helper.rb +6 -1
- data/spec/unit/rule_statement_spec.rb +60 -0
- data/spec/unit/{tokenize_apache_spec.rb → scan_apache_spec.rb} +16 -11
- data/spec/unit/scan_rails_logs_spec.rb +90 -0
- data/spec/unit/scaner_definition_spec.rb +65 -0
- data/spec/unit/scanner_spec.rb +109 -0
- data/teeth.gemspec +31 -0
- data/templates/tokenizer.yy.erb +168 -0
- metadata +60 -15
- data/ext/extconf.rb +0 -4
- data/ext/tokenize_apache_logs.yy +0 -215
- data/ext/tokenize_apache_logs.yy.c +0 -12067
@@ -0,0 +1,111 @@
|
|
1
|
+
<?xml version="1.0" encoding="iso-8859-1"?>
|
2
|
+
<!DOCTYPE html
|
3
|
+
PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
|
4
|
+
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
5
|
+
|
6
|
+
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
|
7
|
+
<head>
|
8
|
+
<title>Class: Teeth::ScannerError</title>
|
9
|
+
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
|
10
|
+
<meta http-equiv="Content-Script-Type" content="text/javascript" />
|
11
|
+
<link rel="stylesheet" href="../.././rdoc-style.css" type="text/css" media="screen" />
|
12
|
+
<script type="text/javascript">
|
13
|
+
// <![CDATA[
|
14
|
+
|
15
|
+
function popupCode( url ) {
|
16
|
+
window.open(url, "Code", "resizable=yes,scrollbars=yes,toolbar=no,status=no,height=150,width=400")
|
17
|
+
}
|
18
|
+
|
19
|
+
function toggleCode( id ) {
|
20
|
+
if ( document.getElementById )
|
21
|
+
elem = document.getElementById( id );
|
22
|
+
else if ( document.all )
|
23
|
+
elem = eval( "document.all." + id );
|
24
|
+
else
|
25
|
+
return false;
|
26
|
+
|
27
|
+
elemStyle = elem.style;
|
28
|
+
|
29
|
+
if ( elemStyle.display != "block" ) {
|
30
|
+
elemStyle.display = "block"
|
31
|
+
} else {
|
32
|
+
elemStyle.display = "none"
|
33
|
+
}
|
34
|
+
|
35
|
+
return true;
|
36
|
+
}
|
37
|
+
|
38
|
+
// Make codeblocks hidden by default
|
39
|
+
document.writeln( "<style type=\"text/css\">div.method-source-code { display: none }</style>" )
|
40
|
+
|
41
|
+
// ]]>
|
42
|
+
</script>
|
43
|
+
|
44
|
+
</head>
|
45
|
+
<body>
|
46
|
+
|
47
|
+
|
48
|
+
|
49
|
+
<div id="classHeader">
|
50
|
+
<table class="header-table">
|
51
|
+
<tr class="top-aligned-row">
|
52
|
+
<td><strong>Class</strong></td>
|
53
|
+
<td class="class-name-in-header">Teeth::ScannerError</td>
|
54
|
+
</tr>
|
55
|
+
<tr class="top-aligned-row">
|
56
|
+
<td><strong>In:</strong></td>
|
57
|
+
<td>
|
58
|
+
<a href="../../files/lib/scanner_rb.html">
|
59
|
+
lib/scanner.rb
|
60
|
+
</a>
|
61
|
+
<br />
|
62
|
+
</td>
|
63
|
+
</tr>
|
64
|
+
|
65
|
+
<tr class="top-aligned-row">
|
66
|
+
<td><strong>Parent:</strong></td>
|
67
|
+
<td>
|
68
|
+
StandardError
|
69
|
+
</td>
|
70
|
+
</tr>
|
71
|
+
</table>
|
72
|
+
</div>
|
73
|
+
<!-- banner header -->
|
74
|
+
|
75
|
+
<div id="bodyContent">
|
76
|
+
|
77
|
+
|
78
|
+
|
79
|
+
<div id="contextContent">
|
80
|
+
|
81
|
+
|
82
|
+
|
83
|
+
</div>
|
84
|
+
|
85
|
+
|
86
|
+
</div>
|
87
|
+
|
88
|
+
|
89
|
+
<!-- if includes -->
|
90
|
+
|
91
|
+
<div id="section">
|
92
|
+
|
93
|
+
|
94
|
+
|
95
|
+
|
96
|
+
|
97
|
+
|
98
|
+
|
99
|
+
|
100
|
+
<!-- if method_list -->
|
101
|
+
|
102
|
+
|
103
|
+
</div>
|
104
|
+
|
105
|
+
|
106
|
+
<div id="validator-badges">
|
107
|
+
<p><small><a href="http://validator.w3.org/check/referer">[Validate]</a></small></p>
|
108
|
+
</div>
|
109
|
+
|
110
|
+
</body>
|
111
|
+
</html>
|
@@ -0,0 +1,129 @@
|
|
1
|
+
<?xml version="1.0" encoding="iso-8859-1"?>
|
2
|
+
<!DOCTYPE html
|
3
|
+
PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
|
4
|
+
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
5
|
+
|
6
|
+
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
|
7
|
+
<head>
|
8
|
+
<title>Module: Teeth</title>
|
9
|
+
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
|
10
|
+
<meta http-equiv="Content-Script-Type" content="text/javascript" />
|
11
|
+
<link rel="stylesheet" href=".././rdoc-style.css" type="text/css" media="screen" />
|
12
|
+
<script type="text/javascript">
|
13
|
+
// <![CDATA[
|
14
|
+
|
15
|
+
function popupCode( url ) {
|
16
|
+
window.open(url, "Code", "resizable=yes,scrollbars=yes,toolbar=no,status=no,height=150,width=400")
|
17
|
+
}
|
18
|
+
|
19
|
+
function toggleCode( id ) {
|
20
|
+
if ( document.getElementById )
|
21
|
+
elem = document.getElementById( id );
|
22
|
+
else if ( document.all )
|
23
|
+
elem = eval( "document.all." + id );
|
24
|
+
else
|
25
|
+
return false;
|
26
|
+
|
27
|
+
elemStyle = elem.style;
|
28
|
+
|
29
|
+
if ( elemStyle.display != "block" ) {
|
30
|
+
elemStyle.display = "block"
|
31
|
+
} else {
|
32
|
+
elemStyle.display = "none"
|
33
|
+
}
|
34
|
+
|
35
|
+
return true;
|
36
|
+
}
|
37
|
+
|
38
|
+
// Make codeblocks hidden by default
|
39
|
+
document.writeln( "<style type=\"text/css\">div.method-source-code { display: none }</style>" )
|
40
|
+
|
41
|
+
// ]]>
|
42
|
+
</script>
|
43
|
+
|
44
|
+
</head>
|
45
|
+
<body>
|
46
|
+
|
47
|
+
|
48
|
+
|
49
|
+
<div id="classHeader">
|
50
|
+
<table class="header-table">
|
51
|
+
<tr class="top-aligned-row">
|
52
|
+
<td><strong>Module</strong></td>
|
53
|
+
<td class="class-name-in-header">Teeth</td>
|
54
|
+
</tr>
|
55
|
+
<tr class="top-aligned-row">
|
56
|
+
<td><strong>In:</strong></td>
|
57
|
+
<td>
|
58
|
+
<a href="../files/lib/rule_statement_rb.html">
|
59
|
+
lib/rule_statement.rb
|
60
|
+
</a>
|
61
|
+
<br />
|
62
|
+
<a href="../files/lib/scanner_rb.html">
|
63
|
+
lib/scanner.rb
|
64
|
+
</a>
|
65
|
+
<br />
|
66
|
+
<a href="../files/lib/scanner_definition_rb.html">
|
67
|
+
lib/scanner_definition.rb
|
68
|
+
</a>
|
69
|
+
<br />
|
70
|
+
</td>
|
71
|
+
</tr>
|
72
|
+
|
73
|
+
</table>
|
74
|
+
</div>
|
75
|
+
<!-- banner header -->
|
76
|
+
|
77
|
+
<div id="bodyContent">
|
78
|
+
|
79
|
+
|
80
|
+
|
81
|
+
<div id="contextContent">
|
82
|
+
|
83
|
+
|
84
|
+
|
85
|
+
</div>
|
86
|
+
|
87
|
+
|
88
|
+
</div>
|
89
|
+
|
90
|
+
|
91
|
+
<!-- if includes -->
|
92
|
+
|
93
|
+
<div id="section">
|
94
|
+
|
95
|
+
<div id="class-list">
|
96
|
+
<h3 class="section-bar">Classes and Modules</h3>
|
97
|
+
|
98
|
+
Class <a href="Teeth/DuplicateDefinitionError.html" class="link">Teeth::DuplicateDefinitionError</a><br />
|
99
|
+
Class <a href="Teeth/DuplicateRuleError.html" class="link">Teeth::DuplicateRuleError</a><br />
|
100
|
+
Class <a href="Teeth/InvalidDefaultDefinitionName.html" class="link">Teeth::InvalidDefaultDefinitionName</a><br />
|
101
|
+
Class <a href="Teeth/InvalidExtensionDirectory.html" class="link">Teeth::InvalidExtensionDirectory</a><br />
|
102
|
+
Class <a href="Teeth/RuleStatement.html" class="link">Teeth::RuleStatement</a><br />
|
103
|
+
Class <a href="Teeth/RuleStatementGroup.html" class="link">Teeth::RuleStatementGroup</a><br />
|
104
|
+
Class <a href="Teeth/Scanner.html" class="link">Teeth::Scanner</a><br />
|
105
|
+
Class <a href="Teeth/ScannerDefinition.html" class="link">Teeth::ScannerDefinition</a><br />
|
106
|
+
Class <a href="Teeth/ScannerDefinitionArgumentError.html" class="link">Teeth::ScannerDefinitionArgumentError</a><br />
|
107
|
+
Class <a href="Teeth/ScannerDefinitionGroup.html" class="link">Teeth::ScannerDefinitionGroup</a><br />
|
108
|
+
Class <a href="Teeth/ScannerError.html" class="link">Teeth::ScannerError</a><br />
|
109
|
+
|
110
|
+
</div>
|
111
|
+
|
112
|
+
|
113
|
+
|
114
|
+
|
115
|
+
|
116
|
+
|
117
|
+
|
118
|
+
<!-- if method_list -->
|
119
|
+
|
120
|
+
|
121
|
+
</div>
|
122
|
+
|
123
|
+
|
124
|
+
<div id="validator-badges">
|
125
|
+
<p><small><a href="http://validator.w3.org/check/referer">[Validate]</a></small></p>
|
126
|
+
</div>
|
127
|
+
|
128
|
+
</body>
|
129
|
+
</html>
|
data/doc/created.rid
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
Sun, 29 Mar 2009 17:53:14 -0600
|
@@ -0,0 +1,314 @@
|
|
1
|
+
<?xml version="1.0" encoding="iso-8859-1"?>
|
2
|
+
<!DOCTYPE html
|
3
|
+
PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
|
4
|
+
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
5
|
+
|
6
|
+
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
|
7
|
+
<head>
|
8
|
+
<title>File: README.rdoc</title>
|
9
|
+
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
|
10
|
+
<meta http-equiv="Content-Script-Type" content="text/javascript" />
|
11
|
+
<link rel="stylesheet" href=".././rdoc-style.css" type="text/css" media="screen" />
|
12
|
+
<script type="text/javascript">
|
13
|
+
// <![CDATA[
|
14
|
+
|
15
|
+
function popupCode( url ) {
|
16
|
+
window.open(url, "Code", "resizable=yes,scrollbars=yes,toolbar=no,status=no,height=150,width=400")
|
17
|
+
}
|
18
|
+
|
19
|
+
function toggleCode( id ) {
|
20
|
+
if ( document.getElementById )
|
21
|
+
elem = document.getElementById( id );
|
22
|
+
else if ( document.all )
|
23
|
+
elem = eval( "document.all." + id );
|
24
|
+
else
|
25
|
+
return false;
|
26
|
+
|
27
|
+
elemStyle = elem.style;
|
28
|
+
|
29
|
+
if ( elemStyle.display != "block" ) {
|
30
|
+
elemStyle.display = "block"
|
31
|
+
} else {
|
32
|
+
elemStyle.display = "none"
|
33
|
+
}
|
34
|
+
|
35
|
+
return true;
|
36
|
+
}
|
37
|
+
|
38
|
+
// Make codeblocks hidden by default
|
39
|
+
document.writeln( "<style type=\"text/css\">div.method-source-code { display: none }</style>" )
|
40
|
+
|
41
|
+
// ]]>
|
42
|
+
</script>
|
43
|
+
|
44
|
+
</head>
|
45
|
+
<body>
|
46
|
+
|
47
|
+
|
48
|
+
|
49
|
+
<div id="fileHeader">
|
50
|
+
<h1>README.rdoc</h1>
|
51
|
+
<table class="header-table">
|
52
|
+
<tr class="top-aligned-row">
|
53
|
+
<td><strong>Path:</strong></td>
|
54
|
+
<td>README.rdoc
|
55
|
+
</td>
|
56
|
+
</tr>
|
57
|
+
<tr class="top-aligned-row">
|
58
|
+
<td><strong>Last Update:</strong></td>
|
59
|
+
<td>Sun Mar 29 17:53:10 -0600 2009</td>
|
60
|
+
</tr>
|
61
|
+
</table>
|
62
|
+
</div>
|
63
|
+
<!-- banner header -->
|
64
|
+
|
65
|
+
<div id="bodyContent">
|
66
|
+
|
67
|
+
|
68
|
+
|
69
|
+
<div id="contextContent">
|
70
|
+
|
71
|
+
<div id="description">
|
72
|
+
<h1><a href="../classes/Teeth.html">Teeth</a></h1>
|
73
|
+
<p>
|
74
|
+
<a href="../classes/Teeth.html">Teeth</a> is a library for fast parsing of
|
75
|
+
log files such as Apache access and error logs. It uses C extensions
|
76
|
+
generated by <a href="http://flex.sourceforge.net/index.html">flex</a> (as
|
77
|
+
in Flex and Bison). If you only want to use the built-in scanners, you
|
78
|
+
don‘t need flex. If you want to add support for new/different log
|
79
|
+
formats, you‘ll need to have flex installed.
|
80
|
+
</p>
|
81
|
+
<h1>Example</h1>
|
82
|
+
<pre>
|
83
|
+
require "teeth"
|
84
|
+
|
85
|
+
access_log = %q{myhost.localdomain:80 172.16.115.1 - - [13/Dec/2008:19:26:11 -0500] "GET /favicon.ico HTTP/1.1" 404 241 "http://172.16.115.130/" "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_4_11; en) AppleWebKit/525.27.1 (KHTML, like Gecko) Version/3.2.1 Safari/525.27.1"}
|
86
|
+
access_log.scan_apache_logs
|
87
|
+
=> {:strings=>["241"],
|
88
|
+
:apache_access_datetime=>["13/Dec/2008:19:26:11 -0500"],
|
89
|
+
:absolute_url=>["http://172.16.115.130/"],
|
90
|
+
:message=>"myhost.localdomain:80 172.16.115.1 - - [13/Dec/2008:19:26:11 -0500] \"GET /favicon.ico HTTP/1.1\" 404 241 \"http://172.16.115.130/\" \"Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_4_11; en) AppleWebKit/525.27.1 (KHTML, like Gecko) Version/3.2.1 Safari/525.27.1\"",
|
91
|
+
:http_method=>["GET"],
|
92
|
+
:browser_string=>["Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_4_11; en) AppleWebKit/525.27.1 (KHTML, like Gecko) Version/3.2.1 Safari/525.27.1"],
|
93
|
+
:relative_url=>["/favicon.ico"],
|
94
|
+
:http_version=>["HTTP/1.1"],
|
95
|
+
:host=>["myhost.localdomain:80"],
|
96
|
+
:id=>"8AD5CBCC1CB011DE8CE10017F22FF48F",
|
97
|
+
:http_response=>["404"],
|
98
|
+
:ipv4_addr=>["172.16.115.1"]}
|
99
|
+
</pre>
|
100
|
+
<h1>Supported Log Formats</h1>
|
101
|
+
<ul>
|
102
|
+
<li>Apache (access and error logs)
|
103
|
+
|
104
|
+
</li>
|
105
|
+
<li>Rails
|
106
|
+
|
107
|
+
</li>
|
108
|
+
</ul>
|
109
|
+
<p>
|
110
|
+
Support for other web servers, app servers, and applications as well as
|
111
|
+
other types of servers (e.g., SMTP, etc.) and generic syslog logs is
|
112
|
+
planned for the future.
|
113
|
+
</p>
|
114
|
+
<h2>Creating Your Own Scanners</h2>
|
115
|
+
<p>
|
116
|
+
<a href="../classes/Teeth.html">Teeth</a> includes a library that can
|
117
|
+
generate a flex scanner definition using a simplified definition written in
|
118
|
+
ruby. This cuts down on the repetition involved in writing all the C code
|
119
|
+
by hand. The included scanners for Apache and Rails logs are defined this
|
120
|
+
way. You can find them in the scanners directory.
|
121
|
+
</p>
|
122
|
+
<p>
|
123
|
+
Here‘s an example based on the definition for the Rails log scanner:
|
124
|
+
</p>
|
125
|
+
<pre>
|
126
|
+
require File.dirname(__FILE__) + "/../lib/teeth"
|
127
|
+
scanner = Teeth::Scanner.new(:rails_logs, File.dirname(__FILE__) + '/../ext/scan_rails_logs/')
|
128
|
+
</pre>
|
129
|
+
<p>
|
130
|
+
Flex definitions are kinda like macros for regular expressions. We include
|
131
|
+
some of the available defaults here to make writing the scanner easier
|
132
|
+
</p>
|
133
|
+
<pre>
|
134
|
+
scanner.load_default_definitions_for(:whitespace, :ip, :time, :web)
|
135
|
+
</pre>
|
136
|
+
<p>
|
137
|
+
Add some more definitions
|
138
|
+
</p>
|
139
|
+
<pre>
|
140
|
+
scanner.definitions do |define|
|
141
|
+
define.RAILS_TEASER '(processing|filter\ chain\ halted|rendered)'
|
142
|
+
define.CONTROLLER_ACTION '[a-z0-9]+#[a-z0-9]+'
|
143
|
+
</pre>
|
144
|
+
<p>
|
145
|
+
Scanner is case insensitive
|
146
|
+
</p>
|
147
|
+
<pre>
|
148
|
+
define.RAILS_ERROR_CLASS '([a-z]+\:\:)*[a-z]+error'
|
149
|
+
</pre>
|
150
|
+
<p>
|
151
|
+
"start conditions" are a feature of flex that allows us to have
|
152
|
+
some regular expressions that are only active when we tell the scanner to
|
153
|
+
enter a certain state. Here we define the ``REQUEST_COMPLETED’’
|
154
|
+
state, and specify that it is exclusive. This means that if the scanner is
|
155
|
+
in this state, it only matches rules written for this state
|
156
|
+
</p>
|
157
|
+
<pre>
|
158
|
+
define.REQUEST_COMPLETED :start_condition => :exclusive
|
159
|
+
end
|
160
|
+
</pre>
|
161
|
+
<p>
|
162
|
+
Define rules. These are the actions that the scanner executes when it sees
|
163
|
+
text that matches a regular expression. The default action is to add
|
164
|
+
:action_name => [matched_text] to the results Hash, or push the matched
|
165
|
+
text on the end of the array if it already exists.
|
166
|
+
</p>
|
167
|
+
<pre>
|
168
|
+
scanner.rules do |r|
|
169
|
+
</pre>
|
170
|
+
<p>
|
171
|
+
This will add something like :teaser => ["Processing"] to the
|
172
|
+
results
|
173
|
+
</p>
|
174
|
+
<pre>
|
175
|
+
r.teaser '{RAILS_TEASER}'
|
176
|
+
r.controller_action '{CONTROLLER_ACTION}'
|
177
|
+
</pre>
|
178
|
+
<p>
|
179
|
+
Use some of the default definitions we added above.
|
180
|
+
</p>
|
181
|
+
<pre>
|
182
|
+
r.datetime '{YEAR}"-"{MONTH_NUM}"-"{MDAY}{WS}{HOUR}":"{MINSEC}":"{MINSEC}'
|
183
|
+
r.http_method '{HTTP_VERB}'
|
184
|
+
</pre>
|
185
|
+
<p>
|
186
|
+
With :skip_line => true, scanner stops processing the line immediately
|
187
|
+
</p>
|
188
|
+
<pre>
|
189
|
+
r.skip_lines '{RAILS_SKIP_LINES}', :skip_line => true
|
190
|
+
r.error '{RAILS_ERROR_CLASS}'
|
191
|
+
</pre>
|
192
|
+
<p>
|
193
|
+
with :strip_ends => true, scanner removes first and last characters from
|
194
|
+
matched text
|
195
|
+
</p>
|
196
|
+
<pre>
|
197
|
+
r.error_message '\(({WS}|{NON_WS})+\)', :strip_ends => true
|
198
|
+
</pre>
|
199
|
+
<p>
|
200
|
+
Puts scanner in the ``REQUEST_COMPLETED’’ state we defined
|
201
|
+
above. The scanner only matches rules beginning with
|
202
|
+
``<REQUEST_COMPLETED>’’ now
|
203
|
+
</p>
|
204
|
+
<pre>
|
205
|
+
r.teaser 'completed\ in', :begin => "REQUEST_COMPLETED"
|
206
|
+
</pre>
|
207
|
+
<p>
|
208
|
+
These rules only apply to the ``REQUEST_COMPLETED’’ State
|
209
|
+
</p>
|
210
|
+
<pre>
|
211
|
+
r.duration_s '<REQUEST_COMPLETED>[0-9]+\.[0-9]+'
|
212
|
+
r.duration_ms '<REQUEST_COMPLETED>[0-9]+/ms'
|
213
|
+
r.http_response '<REQUEST_COMPLETED>{HTTPCODE}'
|
214
|
+
</pre>
|
215
|
+
<p>
|
216
|
+
Need a "catchall" rule — flex scanner "jams" if
|
217
|
+
there isn‘t a default rule (the catchall rule for the default/INITIAL
|
218
|
+
state is automatically included). note that :ignore => true makes the
|
219
|
+
scanner ignore what it matches but doesn‘t stop processing of the
|
220
|
+
line.
|
221
|
+
</p>
|
222
|
+
<pre>
|
223
|
+
r.ignore_others '<REQUEST_COMPLETED>{CATCHALL}', :ignore => true
|
224
|
+
</pre>
|
225
|
+
<p>
|
226
|
+
The "strings" action is special. It keeps track of whether the
|
227
|
+
last token was also a string, and if it was, the new string is appended to
|
228
|
+
the last string instead of being pushed to the array. For example, when
|
229
|
+
scanning an apache error log, ``Invalid URI in request’’ will
|
230
|
+
be extracted as a complete string (instead of ["Invalid",
|
231
|
+
"URI", "in", "request"])
|
232
|
+
</p>
|
233
|
+
<pre>
|
234
|
+
r.strings '{NON_WS}{NON_WS}*'
|
235
|
+
end
|
236
|
+
</pre>
|
237
|
+
<p>
|
238
|
+
Writes the generated scanner and an extconf.rb for it to the directory we
|
239
|
+
specified when we initialized the scanner.
|
240
|
+
</p>
|
241
|
+
<pre>
|
242
|
+
scanner.write!
|
243
|
+
</pre>
|
244
|
+
<p>
|
245
|
+
There‘s much in the way of documentation for the scanner generator,
|
246
|
+
but you can refer to the specs and the definitions for Apache and Rails
|
247
|
+
logs to get a sense of how it works. It would probably help a lot to learn
|
248
|
+
about flex‘s regex syntax and other features.
|
249
|
+
</p>
|
250
|
+
<h1>Ruby 1.9</h1>
|
251
|
+
<p>
|
252
|
+
Ruby 1.9 is supported on the master branch. Don‘t use the ruby1.9
|
253
|
+
branch, it is orphaned.
|
254
|
+
</p>
|
255
|
+
<h1>Shortcomings and Known Issues</h1>
|
256
|
+
<p>
|
257
|
+
In addition to the lack of support for formats other than Apache and Rails
|
258
|
+
described above:
|
259
|
+
</p>
|
260
|
+
<ul>
|
261
|
+
<li>It‘s a new project, lots of API changes
|
262
|
+
|
263
|
+
</li>
|
264
|
+
<li>Does not convert datetimes to Ruby Time objects
|
265
|
+
|
266
|
+
</li>
|
267
|
+
<li>Does not always use context or knowledge of the log format to its
|
268
|
+
advantage. This is improving now that the scanner can utilize start
|
269
|
+
conditions.
|
270
|
+
|
271
|
+
</li>
|
272
|
+
</ul>
|
273
|
+
<h1>Performance</h1>
|
274
|
+
<p>
|
275
|
+
On my laptop, a white MacBook 2.0 GHz Intel Core Duo, teeth can process
|
276
|
+
more than 30k lines of Apache access logs per second. So it‘s pretty
|
277
|
+
fast. If modified to not create a UUID or keep the full message, this can
|
278
|
+
be increased to around 45k lines/sec. One could potentially do pretty well
|
279
|
+
on the <a
|
280
|
+
href="http://www.tbray.org/ongoing/When/200x/2008/05/01/Wide-Finder-2">wide_finder2</a>…
|
281
|
+
</p>
|
282
|
+
|
283
|
+
</div>
|
284
|
+
|
285
|
+
|
286
|
+
</div>
|
287
|
+
|
288
|
+
|
289
|
+
</div>
|
290
|
+
|
291
|
+
|
292
|
+
<!-- if includes -->
|
293
|
+
|
294
|
+
<div id="section">
|
295
|
+
|
296
|
+
|
297
|
+
|
298
|
+
|
299
|
+
|
300
|
+
|
301
|
+
|
302
|
+
|
303
|
+
<!-- if method_list -->
|
304
|
+
|
305
|
+
|
306
|
+
</div>
|
307
|
+
|
308
|
+
|
309
|
+
<div id="validator-badges">
|
310
|
+
<p><small><a href="http://validator.w3.org/check/referer">[Validate]</a></small></p>
|
311
|
+
</div>
|
312
|
+
|
313
|
+
</body>
|
314
|
+
</html>
|
@@ -0,0 +1,101 @@
|
|
1
|
+
<?xml version="1.0" encoding="iso-8859-1"?>
|
2
|
+
<!DOCTYPE html
|
3
|
+
PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
|
4
|
+
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
5
|
+
|
6
|
+
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
|
7
|
+
<head>
|
8
|
+
<title>File: scan_apache_logs.yy.c</title>
|
9
|
+
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
|
10
|
+
<meta http-equiv="Content-Script-Type" content="text/javascript" />
|
11
|
+
<link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" />
|
12
|
+
<script type="text/javascript">
|
13
|
+
// <![CDATA[
|
14
|
+
|
15
|
+
function popupCode( url ) {
|
16
|
+
window.open(url, "Code", "resizable=yes,scrollbars=yes,toolbar=no,status=no,height=150,width=400")
|
17
|
+
}
|
18
|
+
|
19
|
+
function toggleCode( id ) {
|
20
|
+
if ( document.getElementById )
|
21
|
+
elem = document.getElementById( id );
|
22
|
+
else if ( document.all )
|
23
|
+
elem = eval( "document.all." + id );
|
24
|
+
else
|
25
|
+
return false;
|
26
|
+
|
27
|
+
elemStyle = elem.style;
|
28
|
+
|
29
|
+
if ( elemStyle.display != "block" ) {
|
30
|
+
elemStyle.display = "block"
|
31
|
+
} else {
|
32
|
+
elemStyle.display = "none"
|
33
|
+
}
|
34
|
+
|
35
|
+
return true;
|
36
|
+
}
|
37
|
+
|
38
|
+
// Make codeblocks hidden by default
|
39
|
+
document.writeln( "<style type=\"text/css\">div.method-source-code { display: none }</style>" )
|
40
|
+
|
41
|
+
// ]]>
|
42
|
+
</script>
|
43
|
+
|
44
|
+
</head>
|
45
|
+
<body>
|
46
|
+
|
47
|
+
|
48
|
+
|
49
|
+
<div id="fileHeader">
|
50
|
+
<h1>scan_apache_logs.yy.c</h1>
|
51
|
+
<table class="header-table">
|
52
|
+
<tr class="top-aligned-row">
|
53
|
+
<td><strong>Path:</strong></td>
|
54
|
+
<td>ext/scan_apache_logs/scan_apache_logs.yy.c
|
55
|
+
</td>
|
56
|
+
</tr>
|
57
|
+
<tr class="top-aligned-row">
|
58
|
+
<td><strong>Last Update:</strong></td>
|
59
|
+
<td>Sun Mar 29 17:35:41 -0600 2009</td>
|
60
|
+
</tr>
|
61
|
+
</table>
|
62
|
+
</div>
|
63
|
+
<!-- banner header -->
|
64
|
+
|
65
|
+
<div id="bodyContent">
|
66
|
+
|
67
|
+
|
68
|
+
|
69
|
+
<div id="contextContent">
|
70
|
+
|
71
|
+
|
72
|
+
|
73
|
+
</div>
|
74
|
+
|
75
|
+
|
76
|
+
</div>
|
77
|
+
|
78
|
+
|
79
|
+
<!-- if includes -->
|
80
|
+
|
81
|
+
<div id="section">
|
82
|
+
|
83
|
+
|
84
|
+
|
85
|
+
|
86
|
+
|
87
|
+
|
88
|
+
|
89
|
+
|
90
|
+
<!-- if method_list -->
|
91
|
+
|
92
|
+
|
93
|
+
</div>
|
94
|
+
|
95
|
+
|
96
|
+
<div id="validator-badges">
|
97
|
+
<p><small><a href="http://validator.w3.org/check/referer">[Validate]</a></small></p>
|
98
|
+
</div>
|
99
|
+
|
100
|
+
</body>
|
101
|
+
</html>
|