danielsdeleo-teeth 0.0.2 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/LICENSE +11 -0
- data/README.rdoc +107 -10
- data/Rakefile +47 -31
- data/VERSION.yml +4 -0
- data/doc/classes/String.html +182 -0
- data/doc/classes/Teeth/DuplicateDefinitionError.html +113 -0
- data/doc/classes/Teeth/DuplicateRuleError.html +113 -0
- data/doc/classes/Teeth/InvalidDefaultDefinitionName.html +113 -0
- data/doc/classes/Teeth/InvalidExtensionDirectory.html +113 -0
- data/doc/classes/Teeth/RuleStatement.html +291 -0
- data/doc/classes/Teeth/RuleStatementGroup.html +195 -0
- data/doc/classes/Teeth/Scanner.html +535 -0
- data/doc/classes/Teeth/ScannerDefinition.html +253 -0
- data/doc/classes/Teeth/ScannerDefinitionArgumentError.html +113 -0
- data/doc/classes/Teeth/ScannerDefinitionGroup.html +269 -0
- data/doc/classes/Teeth/ScannerError.html +111 -0
- data/doc/classes/Teeth.html +129 -0
- data/doc/created.rid +1 -0
- data/doc/files/README_rdoc.html +314 -0
- data/doc/files/ext/scan_apache_logs/scan_apache_logs_yy_c.html +101 -0
- data/doc/files/ext/scan_rails_logs/scan_rails_logs_yy_c.html +101 -0
- data/doc/files/lib/rule_statement_rb.html +101 -0
- data/doc/files/lib/scanner_definition_rb.html +101 -0
- data/doc/files/lib/scanner_rb.html +108 -0
- data/doc/files/lib/teeth_rb.html +111 -0
- data/doc/fr_class_index.html +39 -0
- data/doc/fr_file_index.html +33 -0
- data/doc/fr_method_index.html +60 -0
- data/doc/index.html +24 -0
- data/doc/rdoc-style.css +208 -0
- data/ext/scan_apache_logs/Makefile +158 -0
- data/ext/scan_apache_logs/extconf.rb +3 -0
- data/ext/scan_apache_logs/scan_apache_logs.yy +267 -0
- data/ext/scan_apache_logs/scan_apache_logs.yy.c +8355 -0
- data/ext/scan_rails_logs/Makefile +158 -0
- data/ext/scan_rails_logs/extconf.rb +3 -0
- data/ext/scan_rails_logs/scan_rails_logs.yy +376 -0
- data/ext/scan_rails_logs/scan_rails_logs.yy.c +11127 -0
- data/lib/rule_statement.rb +61 -0
- data/lib/scanner.rb +98 -0
- data/lib/scanner_definition.rb +116 -0
- data/lib/teeth.rb +5 -1
- data/scanners/scan_apache_logs.rb +27 -0
- data/scanners/scan_rails_logs.rb +70 -0
- data/spec/fixtures/rails_1x.log +59 -0
- data/spec/fixtures/rails_22.log +12 -0
- data/spec/fixtures/rails_22_cached.log +10 -0
- data/spec/fixtures/rails_unordered.log +24 -0
- data/spec/playground/show_apache_processing.rb +13 -0
- data/spec/spec_helper.rb +6 -1
- data/spec/unit/rule_statement_spec.rb +60 -0
- data/spec/unit/{tokenize_apache_spec.rb → scan_apache_spec.rb} +16 -11
- data/spec/unit/scan_rails_logs_spec.rb +90 -0
- data/spec/unit/scaner_definition_spec.rb +65 -0
- data/spec/unit/scanner_spec.rb +109 -0
- data/teeth.gemspec +31 -0
- data/templates/tokenizer.yy.erb +168 -0
- metadata +60 -15
- data/ext/extconf.rb +0 -4
- data/ext/tokenize_apache_logs.yy +0 -215
- data/ext/tokenize_apache_logs.yy.c +0 -12067
@@ -0,0 +1,111 @@
|
|
1
|
+
<?xml version="1.0" encoding="iso-8859-1"?>
|
2
|
+
<!DOCTYPE html
|
3
|
+
PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
|
4
|
+
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
5
|
+
|
6
|
+
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
|
7
|
+
<head>
|
8
|
+
<title>Class: Teeth::ScannerError</title>
|
9
|
+
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
|
10
|
+
<meta http-equiv="Content-Script-Type" content="text/javascript" />
|
11
|
+
<link rel="stylesheet" href="../.././rdoc-style.css" type="text/css" media="screen" />
|
12
|
+
<script type="text/javascript">
|
13
|
+
// <![CDATA[
|
14
|
+
|
15
|
+
function popupCode( url ) {
|
16
|
+
window.open(url, "Code", "resizable=yes,scrollbars=yes,toolbar=no,status=no,height=150,width=400")
|
17
|
+
}
|
18
|
+
|
19
|
+
function toggleCode( id ) {
|
20
|
+
if ( document.getElementById )
|
21
|
+
elem = document.getElementById( id );
|
22
|
+
else if ( document.all )
|
23
|
+
elem = eval( "document.all." + id );
|
24
|
+
else
|
25
|
+
return false;
|
26
|
+
|
27
|
+
elemStyle = elem.style;
|
28
|
+
|
29
|
+
if ( elemStyle.display != "block" ) {
|
30
|
+
elemStyle.display = "block"
|
31
|
+
} else {
|
32
|
+
elemStyle.display = "none"
|
33
|
+
}
|
34
|
+
|
35
|
+
return true;
|
36
|
+
}
|
37
|
+
|
38
|
+
// Make codeblocks hidden by default
|
39
|
+
document.writeln( "<style type=\"text/css\">div.method-source-code { display: none }</style>" )
|
40
|
+
|
41
|
+
// ]]>
|
42
|
+
</script>
|
43
|
+
|
44
|
+
</head>
|
45
|
+
<body>
|
46
|
+
|
47
|
+
|
48
|
+
|
49
|
+
<div id="classHeader">
|
50
|
+
<table class="header-table">
|
51
|
+
<tr class="top-aligned-row">
|
52
|
+
<td><strong>Class</strong></td>
|
53
|
+
<td class="class-name-in-header">Teeth::ScannerError</td>
|
54
|
+
</tr>
|
55
|
+
<tr class="top-aligned-row">
|
56
|
+
<td><strong>In:</strong></td>
|
57
|
+
<td>
|
58
|
+
<a href="../../files/lib/scanner_rb.html">
|
59
|
+
lib/scanner.rb
|
60
|
+
</a>
|
61
|
+
<br />
|
62
|
+
</td>
|
63
|
+
</tr>
|
64
|
+
|
65
|
+
<tr class="top-aligned-row">
|
66
|
+
<td><strong>Parent:</strong></td>
|
67
|
+
<td>
|
68
|
+
StandardError
|
69
|
+
</td>
|
70
|
+
</tr>
|
71
|
+
</table>
|
72
|
+
</div>
|
73
|
+
<!-- banner header -->
|
74
|
+
|
75
|
+
<div id="bodyContent">
|
76
|
+
|
77
|
+
|
78
|
+
|
79
|
+
<div id="contextContent">
|
80
|
+
|
81
|
+
|
82
|
+
|
83
|
+
</div>
|
84
|
+
|
85
|
+
|
86
|
+
</div>
|
87
|
+
|
88
|
+
|
89
|
+
<!-- if includes -->
|
90
|
+
|
91
|
+
<div id="section">
|
92
|
+
|
93
|
+
|
94
|
+
|
95
|
+
|
96
|
+
|
97
|
+
|
98
|
+
|
99
|
+
|
100
|
+
<!-- if method_list -->
|
101
|
+
|
102
|
+
|
103
|
+
</div>
|
104
|
+
|
105
|
+
|
106
|
+
<div id="validator-badges">
|
107
|
+
<p><small><a href="http://validator.w3.org/check/referer">[Validate]</a></small></p>
|
108
|
+
</div>
|
109
|
+
|
110
|
+
</body>
|
111
|
+
</html>
|
@@ -0,0 +1,129 @@
|
|
1
|
+
<?xml version="1.0" encoding="iso-8859-1"?>
|
2
|
+
<!DOCTYPE html
|
3
|
+
PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
|
4
|
+
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
5
|
+
|
6
|
+
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
|
7
|
+
<head>
|
8
|
+
<title>Module: Teeth</title>
|
9
|
+
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
|
10
|
+
<meta http-equiv="Content-Script-Type" content="text/javascript" />
|
11
|
+
<link rel="stylesheet" href=".././rdoc-style.css" type="text/css" media="screen" />
|
12
|
+
<script type="text/javascript">
|
13
|
+
// <![CDATA[
|
14
|
+
|
15
|
+
function popupCode( url ) {
|
16
|
+
window.open(url, "Code", "resizable=yes,scrollbars=yes,toolbar=no,status=no,height=150,width=400")
|
17
|
+
}
|
18
|
+
|
19
|
+
function toggleCode( id ) {
|
20
|
+
if ( document.getElementById )
|
21
|
+
elem = document.getElementById( id );
|
22
|
+
else if ( document.all )
|
23
|
+
elem = eval( "document.all." + id );
|
24
|
+
else
|
25
|
+
return false;
|
26
|
+
|
27
|
+
elemStyle = elem.style;
|
28
|
+
|
29
|
+
if ( elemStyle.display != "block" ) {
|
30
|
+
elemStyle.display = "block"
|
31
|
+
} else {
|
32
|
+
elemStyle.display = "none"
|
33
|
+
}
|
34
|
+
|
35
|
+
return true;
|
36
|
+
}
|
37
|
+
|
38
|
+
// Make codeblocks hidden by default
|
39
|
+
document.writeln( "<style type=\"text/css\">div.method-source-code { display: none }</style>" )
|
40
|
+
|
41
|
+
// ]]>
|
42
|
+
</script>
|
43
|
+
|
44
|
+
</head>
|
45
|
+
<body>
|
46
|
+
|
47
|
+
|
48
|
+
|
49
|
+
<div id="classHeader">
|
50
|
+
<table class="header-table">
|
51
|
+
<tr class="top-aligned-row">
|
52
|
+
<td><strong>Module</strong></td>
|
53
|
+
<td class="class-name-in-header">Teeth</td>
|
54
|
+
</tr>
|
55
|
+
<tr class="top-aligned-row">
|
56
|
+
<td><strong>In:</strong></td>
|
57
|
+
<td>
|
58
|
+
<a href="../files/lib/rule_statement_rb.html">
|
59
|
+
lib/rule_statement.rb
|
60
|
+
</a>
|
61
|
+
<br />
|
62
|
+
<a href="../files/lib/scanner_rb.html">
|
63
|
+
lib/scanner.rb
|
64
|
+
</a>
|
65
|
+
<br />
|
66
|
+
<a href="../files/lib/scanner_definition_rb.html">
|
67
|
+
lib/scanner_definition.rb
|
68
|
+
</a>
|
69
|
+
<br />
|
70
|
+
</td>
|
71
|
+
</tr>
|
72
|
+
|
73
|
+
</table>
|
74
|
+
</div>
|
75
|
+
<!-- banner header -->
|
76
|
+
|
77
|
+
<div id="bodyContent">
|
78
|
+
|
79
|
+
|
80
|
+
|
81
|
+
<div id="contextContent">
|
82
|
+
|
83
|
+
|
84
|
+
|
85
|
+
</div>
|
86
|
+
|
87
|
+
|
88
|
+
</div>
|
89
|
+
|
90
|
+
|
91
|
+
<!-- if includes -->
|
92
|
+
|
93
|
+
<div id="section">
|
94
|
+
|
95
|
+
<div id="class-list">
|
96
|
+
<h3 class="section-bar">Classes and Modules</h3>
|
97
|
+
|
98
|
+
Class <a href="Teeth/DuplicateDefinitionError.html" class="link">Teeth::DuplicateDefinitionError</a><br />
|
99
|
+
Class <a href="Teeth/DuplicateRuleError.html" class="link">Teeth::DuplicateRuleError</a><br />
|
100
|
+
Class <a href="Teeth/InvalidDefaultDefinitionName.html" class="link">Teeth::InvalidDefaultDefinitionName</a><br />
|
101
|
+
Class <a href="Teeth/InvalidExtensionDirectory.html" class="link">Teeth::InvalidExtensionDirectory</a><br />
|
102
|
+
Class <a href="Teeth/RuleStatement.html" class="link">Teeth::RuleStatement</a><br />
|
103
|
+
Class <a href="Teeth/RuleStatementGroup.html" class="link">Teeth::RuleStatementGroup</a><br />
|
104
|
+
Class <a href="Teeth/Scanner.html" class="link">Teeth::Scanner</a><br />
|
105
|
+
Class <a href="Teeth/ScannerDefinition.html" class="link">Teeth::ScannerDefinition</a><br />
|
106
|
+
Class <a href="Teeth/ScannerDefinitionArgumentError.html" class="link">Teeth::ScannerDefinitionArgumentError</a><br />
|
107
|
+
Class <a href="Teeth/ScannerDefinitionGroup.html" class="link">Teeth::ScannerDefinitionGroup</a><br />
|
108
|
+
Class <a href="Teeth/ScannerError.html" class="link">Teeth::ScannerError</a><br />
|
109
|
+
|
110
|
+
</div>
|
111
|
+
|
112
|
+
|
113
|
+
|
114
|
+
|
115
|
+
|
116
|
+
|
117
|
+
|
118
|
+
<!-- if method_list -->
|
119
|
+
|
120
|
+
|
121
|
+
</div>
|
122
|
+
|
123
|
+
|
124
|
+
<div id="validator-badges">
|
125
|
+
<p><small><a href="http://validator.w3.org/check/referer">[Validate]</a></small></p>
|
126
|
+
</div>
|
127
|
+
|
128
|
+
</body>
|
129
|
+
</html>
|
data/doc/created.rid
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
Sun, 29 Mar 2009 17:53:14 -0600
|
@@ -0,0 +1,314 @@
|
|
1
|
+
<?xml version="1.0" encoding="iso-8859-1"?>
|
2
|
+
<!DOCTYPE html
|
3
|
+
PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
|
4
|
+
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
5
|
+
|
6
|
+
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
|
7
|
+
<head>
|
8
|
+
<title>File: README.rdoc</title>
|
9
|
+
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
|
10
|
+
<meta http-equiv="Content-Script-Type" content="text/javascript" />
|
11
|
+
<link rel="stylesheet" href=".././rdoc-style.css" type="text/css" media="screen" />
|
12
|
+
<script type="text/javascript">
|
13
|
+
// <![CDATA[
|
14
|
+
|
15
|
+
function popupCode( url ) {
|
16
|
+
window.open(url, "Code", "resizable=yes,scrollbars=yes,toolbar=no,status=no,height=150,width=400")
|
17
|
+
}
|
18
|
+
|
19
|
+
function toggleCode( id ) {
|
20
|
+
if ( document.getElementById )
|
21
|
+
elem = document.getElementById( id );
|
22
|
+
else if ( document.all )
|
23
|
+
elem = eval( "document.all." + id );
|
24
|
+
else
|
25
|
+
return false;
|
26
|
+
|
27
|
+
elemStyle = elem.style;
|
28
|
+
|
29
|
+
if ( elemStyle.display != "block" ) {
|
30
|
+
elemStyle.display = "block"
|
31
|
+
} else {
|
32
|
+
elemStyle.display = "none"
|
33
|
+
}
|
34
|
+
|
35
|
+
return true;
|
36
|
+
}
|
37
|
+
|
38
|
+
// Make codeblocks hidden by default
|
39
|
+
document.writeln( "<style type=\"text/css\">div.method-source-code { display: none }</style>" )
|
40
|
+
|
41
|
+
// ]]>
|
42
|
+
</script>
|
43
|
+
|
44
|
+
</head>
|
45
|
+
<body>
|
46
|
+
|
47
|
+
|
48
|
+
|
49
|
+
<div id="fileHeader">
|
50
|
+
<h1>README.rdoc</h1>
|
51
|
+
<table class="header-table">
|
52
|
+
<tr class="top-aligned-row">
|
53
|
+
<td><strong>Path:</strong></td>
|
54
|
+
<td>README.rdoc
|
55
|
+
</td>
|
56
|
+
</tr>
|
57
|
+
<tr class="top-aligned-row">
|
58
|
+
<td><strong>Last Update:</strong></td>
|
59
|
+
<td>Sun Mar 29 17:53:10 -0600 2009</td>
|
60
|
+
</tr>
|
61
|
+
</table>
|
62
|
+
</div>
|
63
|
+
<!-- banner header -->
|
64
|
+
|
65
|
+
<div id="bodyContent">
|
66
|
+
|
67
|
+
|
68
|
+
|
69
|
+
<div id="contextContent">
|
70
|
+
|
71
|
+
<div id="description">
|
72
|
+
<h1><a href="../classes/Teeth.html">Teeth</a></h1>
|
73
|
+
<p>
|
74
|
+
<a href="../classes/Teeth.html">Teeth</a> is a library for fast parsing of
|
75
|
+
log files such as Apache access and error logs. It uses C extensions
|
76
|
+
generated by <a href="http://flex.sourceforge.net/index.html">flex</a> (as
|
77
|
+
in Flex and Bison). If you only want to use the built-in scanners, you
|
78
|
+
don‘t need flex. If you want to add support for new/different log
|
79
|
+
formats, you‘ll need to have flex installed.
|
80
|
+
</p>
|
81
|
+
<h1>Example</h1>
|
82
|
+
<pre>
|
83
|
+
require "teeth"
|
84
|
+
|
85
|
+
access_log = %q{myhost.localdomain:80 172.16.115.1 - - [13/Dec/2008:19:26:11 -0500] "GET /favicon.ico HTTP/1.1" 404 241 "http://172.16.115.130/" "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_4_11; en) AppleWebKit/525.27.1 (KHTML, like Gecko) Version/3.2.1 Safari/525.27.1"}
|
86
|
+
access_log.scan_apache_logs
|
87
|
+
=> {:strings=>["241"],
|
88
|
+
:apache_access_datetime=>["13/Dec/2008:19:26:11 -0500"],
|
89
|
+
:absolute_url=>["http://172.16.115.130/"],
|
90
|
+
:message=>"myhost.localdomain:80 172.16.115.1 - - [13/Dec/2008:19:26:11 -0500] \"GET /favicon.ico HTTP/1.1\" 404 241 \"http://172.16.115.130/\" \"Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_4_11; en) AppleWebKit/525.27.1 (KHTML, like Gecko) Version/3.2.1 Safari/525.27.1\"",
|
91
|
+
:http_method=>["GET"],
|
92
|
+
:browser_string=>["Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_4_11; en) AppleWebKit/525.27.1 (KHTML, like Gecko) Version/3.2.1 Safari/525.27.1"],
|
93
|
+
:relative_url=>["/favicon.ico"],
|
94
|
+
:http_version=>["HTTP/1.1"],
|
95
|
+
:host=>["myhost.localdomain:80"],
|
96
|
+
:id=>"8AD5CBCC1CB011DE8CE10017F22FF48F",
|
97
|
+
:http_response=>["404"],
|
98
|
+
:ipv4_addr=>["172.16.115.1"]}
|
99
|
+
</pre>
|
100
|
+
<h1>Supported Log Formats</h1>
|
101
|
+
<ul>
|
102
|
+
<li>Apache (access and error logs)
|
103
|
+
|
104
|
+
</li>
|
105
|
+
<li>Rails
|
106
|
+
|
107
|
+
</li>
|
108
|
+
</ul>
|
109
|
+
<p>
|
110
|
+
Support for other web servers, app servers, and applications as well as
|
111
|
+
other types of servers (e.g., SMTP, etc.) and generic syslog logs is
|
112
|
+
planned for the future.
|
113
|
+
</p>
|
114
|
+
<h2>Creating Your Own Scanners</h2>
|
115
|
+
<p>
|
116
|
+
<a href="../classes/Teeth.html">Teeth</a> includes a library that can
|
117
|
+
generate a flex scanner definition using a simplified definition written in
|
118
|
+
ruby. This cuts down on the repetition involved in writing all the C code
|
119
|
+
by hand. The included scanners for Apache and Rails logs are defined this
|
120
|
+
way. You can find them in the scanners directory.
|
121
|
+
</p>
|
122
|
+
<p>
|
123
|
+
Here‘s an example based on the definition for the Rails log scanner:
|
124
|
+
</p>
|
125
|
+
<pre>
|
126
|
+
require File.dirname(__FILE__) + "/../lib/teeth"
|
127
|
+
scanner = Teeth::Scanner.new(:rails_logs, File.dirname(__FILE__) + '/../ext/scan_rails_logs/')
|
128
|
+
</pre>
|
129
|
+
<p>
|
130
|
+
Flex definitions are kinda like macros for regular expressions. We include
|
131
|
+
some of the available defaults here to make writing the scanner easier
|
132
|
+
</p>
|
133
|
+
<pre>
|
134
|
+
scanner.load_default_definitions_for(:whitespace, :ip, :time, :web)
|
135
|
+
</pre>
|
136
|
+
<p>
|
137
|
+
Add some more definitions
|
138
|
+
</p>
|
139
|
+
<pre>
|
140
|
+
scanner.definitions do |define|
|
141
|
+
define.RAILS_TEASER '(processing|filter\ chain\ halted|rendered)'
|
142
|
+
define.CONTROLLER_ACTION '[a-z0-9]+#[a-z0-9]+'
|
143
|
+
</pre>
|
144
|
+
<p>
|
145
|
+
Scanner is case insensitive
|
146
|
+
</p>
|
147
|
+
<pre>
|
148
|
+
define.RAILS_ERROR_CLASS '([a-z]+\:\:)*[a-z]+error'
|
149
|
+
</pre>
|
150
|
+
<p>
|
151
|
+
"start conditions" are a feature of flex that allows us to have
|
152
|
+
some regular expressions that are only active when we tell the scanner to
|
153
|
+
enter a certain state. Here we define the ``REQUEST_COMPLETED’’
|
154
|
+
state, and specify that it is exclusive. This means that if the scanner is
|
155
|
+
in this state, it only matches rules written for this state
|
156
|
+
</p>
|
157
|
+
<pre>
|
158
|
+
define.REQUEST_COMPLETED :start_condition => :exclusive
|
159
|
+
end
|
160
|
+
</pre>
|
161
|
+
<p>
|
162
|
+
Define rules. These are the actions that the scanner executes when it sees
|
163
|
+
text that matches a regular expression. The default action is to add
|
164
|
+
:action_name => [matched_text] to the results Hash, or push the matched
|
165
|
+
text on the end of the array if it already exists.
|
166
|
+
</p>
|
167
|
+
<pre>
|
168
|
+
scanner.rules do |r|
|
169
|
+
</pre>
|
170
|
+
<p>
|
171
|
+
This will add something like :teaser => ["Processing"] to the
|
172
|
+
results
|
173
|
+
</p>
|
174
|
+
<pre>
|
175
|
+
r.teaser '{RAILS_TEASER}'
|
176
|
+
r.controller_action '{CONTROLLER_ACTION}'
|
177
|
+
</pre>
|
178
|
+
<p>
|
179
|
+
Use some of the default definitions we added above.
|
180
|
+
</p>
|
181
|
+
<pre>
|
182
|
+
r.datetime '{YEAR}"-"{MONTH_NUM}"-"{MDAY}{WS}{HOUR}":"{MINSEC}":"{MINSEC}'
|
183
|
+
r.http_method '{HTTP_VERB}'
|
184
|
+
</pre>
|
185
|
+
<p>
|
186
|
+
With :skip_line => true, scanner stops processing the line immediately
|
187
|
+
</p>
|
188
|
+
<pre>
|
189
|
+
r.skip_lines '{RAILS_SKIP_LINES}', :skip_line => true
|
190
|
+
r.error '{RAILS_ERROR_CLASS}'
|
191
|
+
</pre>
|
192
|
+
<p>
|
193
|
+
with :strip_ends => true, scanner removes first and last characters from
|
194
|
+
matched text
|
195
|
+
</p>
|
196
|
+
<pre>
|
197
|
+
r.error_message '\(({WS}|{NON_WS})+\)', :strip_ends => true
|
198
|
+
</pre>
|
199
|
+
<p>
|
200
|
+
Puts scanner in the ``REQUEST_COMPLETED’’ state we defined
|
201
|
+
above. The scanner only matches rules beginning with
|
202
|
+
``<REQUEST_COMPLETED>’’ now
|
203
|
+
</p>
|
204
|
+
<pre>
|
205
|
+
r.teaser 'completed\ in', :begin => "REQUEST_COMPLETED"
|
206
|
+
</pre>
|
207
|
+
<p>
|
208
|
+
These rules only apply to the ``REQUEST_COMPLETED’’ State
|
209
|
+
</p>
|
210
|
+
<pre>
|
211
|
+
r.duration_s '<REQUEST_COMPLETED>[0-9]+\.[0-9]+'
|
212
|
+
r.duration_ms '<REQUEST_COMPLETED>[0-9]+/ms'
|
213
|
+
r.http_response '<REQUEST_COMPLETED>{HTTPCODE}'
|
214
|
+
</pre>
|
215
|
+
<p>
|
216
|
+
Need a "catchall" rule — flex scanner "jams" if
|
217
|
+
there isn‘t a default rule (the catchall rule for the default/INITIAL
|
218
|
+
state is automatically included). note that :ignore => true makes the
|
219
|
+
scanner ignore what it matches but doesn‘t stop processing of the
|
220
|
+
line.
|
221
|
+
</p>
|
222
|
+
<pre>
|
223
|
+
r.ignore_others '<REQUEST_COMPLETED>{CATCHALL}', :ignore => true
|
224
|
+
</pre>
|
225
|
+
<p>
|
226
|
+
The "strings" action is special. It keeps track of whether the
|
227
|
+
last token was also a string, and if it was, the new string is appended to
|
228
|
+
the last string instead of being pushed to the array. For example, when
|
229
|
+
scanning an apache error log, ``Invalid URI in request’’ will
|
230
|
+
be extracted as a complete string (instead of ["Invalid",
|
231
|
+
"URI", "in", "request"])
|
232
|
+
</p>
|
233
|
+
<pre>
|
234
|
+
r.strings '{NON_WS}{NON_WS}*'
|
235
|
+
end
|
236
|
+
</pre>
|
237
|
+
<p>
|
238
|
+
Writes the generated scanner and an extconf.rb for it to the directory we
|
239
|
+
specified when we initialized the scanner.
|
240
|
+
</p>
|
241
|
+
<pre>
|
242
|
+
scanner.write!
|
243
|
+
</pre>
|
244
|
+
<p>
|
245
|
+
There‘s much in the way of documentation for the scanner generator,
|
246
|
+
but you can refer to the specs and the definitions for Apache and Rails
|
247
|
+
logs to get a sense of how it works. It would probably help a lot to learn
|
248
|
+
about flex‘s regex syntax and other features.
|
249
|
+
</p>
|
250
|
+
<h1>Ruby 1.9</h1>
|
251
|
+
<p>
|
252
|
+
Ruby 1.9 is supported on the master branch. Don‘t use the ruby1.9
|
253
|
+
branch, it is orphaned.
|
254
|
+
</p>
|
255
|
+
<h1>Shortcomings and Known Issues</h1>
|
256
|
+
<p>
|
257
|
+
In addition to the lack of support for formats other than Apache and Rails
|
258
|
+
described above:
|
259
|
+
</p>
|
260
|
+
<ul>
|
261
|
+
<li>It‘s a new project, lots of API changes
|
262
|
+
|
263
|
+
</li>
|
264
|
+
<li>Does not convert datetimes to Ruby Time objects
|
265
|
+
|
266
|
+
</li>
|
267
|
+
<li>Does not always use context or knowledge of the log format to its
|
268
|
+
advantage. This is improving now that the scanner can utilize start
|
269
|
+
conditions.
|
270
|
+
|
271
|
+
</li>
|
272
|
+
</ul>
|
273
|
+
<h1>Performance</h1>
|
274
|
+
<p>
|
275
|
+
On my laptop, a white MacBook 2.0 GHz Intel Core Duo, teeth can process
|
276
|
+
more than 30k lines of Apache access logs per second. So it‘s pretty
|
277
|
+
fast. If modified to not create a UUID or keep the full message, this can
|
278
|
+
be increased to around 45k lines/sec. One could potentially do pretty well
|
279
|
+
on the <a
|
280
|
+
href="http://www.tbray.org/ongoing/When/200x/2008/05/01/Wide-Finder-2">wide_finder2</a>…
|
281
|
+
</p>
|
282
|
+
|
283
|
+
</div>
|
284
|
+
|
285
|
+
|
286
|
+
</div>
|
287
|
+
|
288
|
+
|
289
|
+
</div>
|
290
|
+
|
291
|
+
|
292
|
+
<!-- if includes -->
|
293
|
+
|
294
|
+
<div id="section">
|
295
|
+
|
296
|
+
|
297
|
+
|
298
|
+
|
299
|
+
|
300
|
+
|
301
|
+
|
302
|
+
|
303
|
+
<!-- if method_list -->
|
304
|
+
|
305
|
+
|
306
|
+
</div>
|
307
|
+
|
308
|
+
|
309
|
+
<div id="validator-badges">
|
310
|
+
<p><small><a href="http://validator.w3.org/check/referer">[Validate]</a></small></p>
|
311
|
+
</div>
|
312
|
+
|
313
|
+
</body>
|
314
|
+
</html>
|
@@ -0,0 +1,101 @@
|
|
1
|
+
<?xml version="1.0" encoding="iso-8859-1"?>
|
2
|
+
<!DOCTYPE html
|
3
|
+
PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
|
4
|
+
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
5
|
+
|
6
|
+
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
|
7
|
+
<head>
|
8
|
+
<title>File: scan_apache_logs.yy.c</title>
|
9
|
+
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
|
10
|
+
<meta http-equiv="Content-Script-Type" content="text/javascript" />
|
11
|
+
<link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" />
|
12
|
+
<script type="text/javascript">
|
13
|
+
// <![CDATA[
|
14
|
+
|
15
|
+
function popupCode( url ) {
|
16
|
+
window.open(url, "Code", "resizable=yes,scrollbars=yes,toolbar=no,status=no,height=150,width=400")
|
17
|
+
}
|
18
|
+
|
19
|
+
function toggleCode( id ) {
|
20
|
+
if ( document.getElementById )
|
21
|
+
elem = document.getElementById( id );
|
22
|
+
else if ( document.all )
|
23
|
+
elem = eval( "document.all." + id );
|
24
|
+
else
|
25
|
+
return false;
|
26
|
+
|
27
|
+
elemStyle = elem.style;
|
28
|
+
|
29
|
+
if ( elemStyle.display != "block" ) {
|
30
|
+
elemStyle.display = "block"
|
31
|
+
} else {
|
32
|
+
elemStyle.display = "none"
|
33
|
+
}
|
34
|
+
|
35
|
+
return true;
|
36
|
+
}
|
37
|
+
|
38
|
+
// Make codeblocks hidden by default
|
39
|
+
document.writeln( "<style type=\"text/css\">div.method-source-code { display: none }</style>" )
|
40
|
+
|
41
|
+
// ]]>
|
42
|
+
</script>
|
43
|
+
|
44
|
+
</head>
|
45
|
+
<body>
|
46
|
+
|
47
|
+
|
48
|
+
|
49
|
+
<div id="fileHeader">
|
50
|
+
<h1>scan_apache_logs.yy.c</h1>
|
51
|
+
<table class="header-table">
|
52
|
+
<tr class="top-aligned-row">
|
53
|
+
<td><strong>Path:</strong></td>
|
54
|
+
<td>ext/scan_apache_logs/scan_apache_logs.yy.c
|
55
|
+
</td>
|
56
|
+
</tr>
|
57
|
+
<tr class="top-aligned-row">
|
58
|
+
<td><strong>Last Update:</strong></td>
|
59
|
+
<td>Sun Mar 29 17:35:41 -0600 2009</td>
|
60
|
+
</tr>
|
61
|
+
</table>
|
62
|
+
</div>
|
63
|
+
<!-- banner header -->
|
64
|
+
|
65
|
+
<div id="bodyContent">
|
66
|
+
|
67
|
+
|
68
|
+
|
69
|
+
<div id="contextContent">
|
70
|
+
|
71
|
+
|
72
|
+
|
73
|
+
</div>
|
74
|
+
|
75
|
+
|
76
|
+
</div>
|
77
|
+
|
78
|
+
|
79
|
+
<!-- if includes -->
|
80
|
+
|
81
|
+
<div id="section">
|
82
|
+
|
83
|
+
|
84
|
+
|
85
|
+
|
86
|
+
|
87
|
+
|
88
|
+
|
89
|
+
|
90
|
+
<!-- if method_list -->
|
91
|
+
|
92
|
+
|
93
|
+
</div>
|
94
|
+
|
95
|
+
|
96
|
+
<div id="validator-badges">
|
97
|
+
<p><small><a href="http://validator.w3.org/check/referer">[Validate]</a></small></p>
|
98
|
+
</div>
|
99
|
+
|
100
|
+
</body>
|
101
|
+
</html>
|