danielsdeleo-teeth 0.0.2 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/LICENSE +11 -0
- data/README.rdoc +107 -10
- data/Rakefile +47 -31
- data/VERSION.yml +4 -0
- data/doc/classes/String.html +182 -0
- data/doc/classes/Teeth/DuplicateDefinitionError.html +113 -0
- data/doc/classes/Teeth/DuplicateRuleError.html +113 -0
- data/doc/classes/Teeth/InvalidDefaultDefinitionName.html +113 -0
- data/doc/classes/Teeth/InvalidExtensionDirectory.html +113 -0
- data/doc/classes/Teeth/RuleStatement.html +291 -0
- data/doc/classes/Teeth/RuleStatementGroup.html +195 -0
- data/doc/classes/Teeth/Scanner.html +535 -0
- data/doc/classes/Teeth/ScannerDefinition.html +253 -0
- data/doc/classes/Teeth/ScannerDefinitionArgumentError.html +113 -0
- data/doc/classes/Teeth/ScannerDefinitionGroup.html +269 -0
- data/doc/classes/Teeth/ScannerError.html +111 -0
- data/doc/classes/Teeth.html +129 -0
- data/doc/created.rid +1 -0
- data/doc/files/README_rdoc.html +314 -0
- data/doc/files/ext/scan_apache_logs/scan_apache_logs_yy_c.html +101 -0
- data/doc/files/ext/scan_rails_logs/scan_rails_logs_yy_c.html +101 -0
- data/doc/files/lib/rule_statement_rb.html +101 -0
- data/doc/files/lib/scanner_definition_rb.html +101 -0
- data/doc/files/lib/scanner_rb.html +108 -0
- data/doc/files/lib/teeth_rb.html +111 -0
- data/doc/fr_class_index.html +39 -0
- data/doc/fr_file_index.html +33 -0
- data/doc/fr_method_index.html +60 -0
- data/doc/index.html +24 -0
- data/doc/rdoc-style.css +208 -0
- data/ext/scan_apache_logs/Makefile +158 -0
- data/ext/scan_apache_logs/extconf.rb +3 -0
- data/ext/scan_apache_logs/scan_apache_logs.yy +267 -0
- data/ext/scan_apache_logs/scan_apache_logs.yy.c +8355 -0
- data/ext/scan_rails_logs/Makefile +158 -0
- data/ext/scan_rails_logs/extconf.rb +3 -0
- data/ext/scan_rails_logs/scan_rails_logs.yy +376 -0
- data/ext/scan_rails_logs/scan_rails_logs.yy.c +11127 -0
- data/lib/rule_statement.rb +61 -0
- data/lib/scanner.rb +98 -0
- data/lib/scanner_definition.rb +116 -0
- data/lib/teeth.rb +5 -1
- data/scanners/scan_apache_logs.rb +27 -0
- data/scanners/scan_rails_logs.rb +70 -0
- data/spec/fixtures/rails_1x.log +59 -0
- data/spec/fixtures/rails_22.log +12 -0
- data/spec/fixtures/rails_22_cached.log +10 -0
- data/spec/fixtures/rails_unordered.log +24 -0
- data/spec/playground/show_apache_processing.rb +13 -0
- data/spec/spec_helper.rb +6 -1
- data/spec/unit/rule_statement_spec.rb +60 -0
- data/spec/unit/{tokenize_apache_spec.rb → scan_apache_spec.rb} +16 -11
- data/spec/unit/scan_rails_logs_spec.rb +90 -0
- data/spec/unit/scaner_definition_spec.rb +65 -0
- data/spec/unit/scanner_spec.rb +109 -0
- data/teeth.gemspec +31 -0
- data/templates/tokenizer.yy.erb +168 -0
- metadata +60 -15
- data/ext/extconf.rb +0 -4
- data/ext/tokenize_apache_logs.yy +0 -215
- data/ext/tokenize_apache_logs.yy.c +0 -12067
data/ext/tokenize_apache_logs.yy
DELETED
@@ -1,215 +0,0 @@
|
|
1
|
-
/*
|
2
|
-
* uuid_unparse_upper_sans_dash is derived from the uuid library
|
3
|
-
* from OS X / darwin, therefore:
|
4
|
-
*
|
5
|
-
* Copyright (c) 2004 Apple Computer, Inc. All rights reserved.
|
6
|
-
*
|
7
|
-
* %Begin-Header%
|
8
|
-
* Redistribution and use in source and binary forms, with or without
|
9
|
-
* modification, are permitted provided that the following conditions
|
10
|
-
* are met:
|
11
|
-
* 1. Redistributions of source code must retain the above copyright
|
12
|
-
* notice, and the entire permission notice in its entirety,
|
13
|
-
* including the disclaimer of warranties.
|
14
|
-
* 2. Redistributions in binary form must reproduce the above copyright
|
15
|
-
* notice, this list of conditions and the following disclaimer in the
|
16
|
-
* documentation and/or other materials provided with the distribution.
|
17
|
-
* 3. The name of the author may not be used to endorse or promote
|
18
|
-
* products derived from this software without specific prior
|
19
|
-
* written permission.
|
20
|
-
*
|
21
|
-
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
|
22
|
-
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
23
|
-
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ALL OF
|
24
|
-
* WHICH ARE HEREBY DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE
|
25
|
-
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
26
|
-
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
|
27
|
-
* OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
|
28
|
-
* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
29
|
-
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
30
|
-
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
31
|
-
* USE OF THIS SOFTWARE, EVEN IF NOT ADVISED OF THE POSSIBILITY OF SUCH
|
32
|
-
* DAMAGE.
|
33
|
-
* %End-Header%
|
34
|
-
*/
|
35
|
-
|
36
|
-
/* %option prefix="vor_yy" */
|
37
|
-
%option full
|
38
|
-
%{
|
39
|
-
#include <ruby.h>
|
40
|
-
#include <uuid/uuid.h>
|
41
|
-
|
42
|
-
/* prototypes */
|
43
|
-
static VALUE vor_curr_tok_hsh;
|
44
|
-
char *strip_ends(char *);
|
45
|
-
void push_token_to_hash(char *, char *);
|
46
|
-
VALUE t_tokenize_apache_logs(VALUE);
|
47
|
-
void new_uuid(char *str_ptr);
|
48
|
-
%}
|
49
|
-
/* Definitions */
|
50
|
-
|
51
|
-
IP4_OCT [0-9]|[0-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5]
|
52
|
-
|
53
|
-
WDAY (Mon|Tue|Wed|Thu|Fri|Sat|Sun)
|
54
|
-
|
55
|
-
MON (Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)
|
56
|
-
|
57
|
-
MDAY (3[0-1]|[1-2][0-9]|0[1-9])
|
58
|
-
|
59
|
-
HOUR (2[0-3]|[0-1][0-9])
|
60
|
-
|
61
|
-
MINSEC [0-5][0-9]
|
62
|
-
|
63
|
-
YEAR [0-9][0-9][0-9][0-9]
|
64
|
-
|
65
|
-
REL_URL (\/|\\|\.)[a-z0-9\._\~\-\/\?&;#=\%\:\+\[\]\\]*
|
66
|
-
|
67
|
-
PROTO (http:|https:)
|
68
|
-
|
69
|
-
HOST [a-z0-9][a-z0-9\-]*\.[a-z0-9][a-z0-9\-]*.[a-z0-9][a-z0-9\-\.]*[a-z]+(\:[0-9]+)?
|
70
|
-
|
71
|
-
ERR_LVL (emerg|alert|crit|err|error|warn|warning|notice|info|debug)
|
72
|
-
|
73
|
-
PLUSMINUS (\+|\-)
|
74
|
-
|
75
|
-
HTTP_VERS HTTP\/(1.0|1.1)
|
76
|
-
|
77
|
-
HTTP_VERB (get|head|put|post|delete|trace|connect)
|
78
|
-
|
79
|
-
HTTPCODE (100|101|20[0-6]|30[0-5]|307|40[0-9]|41[0-7]|50[0-5])
|
80
|
-
|
81
|
-
WS [\000-\s]
|
82
|
-
|
83
|
-
/*
|
84
|
-
Covers most of the bases, but there are a F*-ton of these: http://www.zytrax.com/tech/web/browser_ids.htm
|
85
|
-
Also, handling of quotes is nieve. If it becomes a problem try something like
|
86
|
-
http://flex.sourceforge.net/manual/How-can-I-match-C_002dstyle-comments_003f.html#How-can-I-match-C_002dstyle-comments_003f
|
87
|
-
*/
|
88
|
-
BROWSER_STR \"(moz|msie|lynx).+\"
|
89
|
-
|
90
|
-
NON_WS ([a-z]|[0-9]|[:punct:])
|
91
|
-
|
92
|
-
%%
|
93
|
-
/*
|
94
|
-
Actions
|
95
|
-
*/
|
96
|
-
|
97
|
-
{IP4_OCT}"."{IP4_OCT}"."{IP4_OCT}"."{IP4_OCT} {
|
98
|
-
push_token_to_hash("ipv4_addr", yytext);
|
99
|
-
}
|
100
|
-
|
101
|
-
{WDAY}{WS}{MON}{WS}{MDAY}{WS}{HOUR}\:{MINSEC}\:{MINSEC}{WS}{YEAR} {
|
102
|
-
push_token_to_hash("apache_err_datetime", yytext);
|
103
|
-
}
|
104
|
-
|
105
|
-
{MDAY}\/{MON}\/{YEAR}\:{HOUR}\:{MINSEC}\:{MINSEC}{WS}{PLUSMINUS}{YEAR} {
|
106
|
-
push_token_to_hash("apache_access_datetime", yytext);
|
107
|
-
}
|
108
|
-
|
109
|
-
{HTTP_VERS} { push_token_to_hash("http_version", yytext);}
|
110
|
-
|
111
|
-
{BROWSER_STR} { push_token_to_hash("browser_string", strip_ends(yytext));}
|
112
|
-
|
113
|
-
{PROTO}"\/\/"({HOST}|{IP4_OCT}"."{IP4_OCT}"."{IP4_OCT}"."{IP4_OCT})({REL_URL}|"\/")? {
|
114
|
-
push_token_to_hash("absolute_url", yytext);
|
115
|
-
}
|
116
|
-
|
117
|
-
{HOST} {push_token_to_hash("host", yytext);}
|
118
|
-
|
119
|
-
{REL_URL} {
|
120
|
-
push_token_to_hash("relative_url", yytext);
|
121
|
-
}
|
122
|
-
|
123
|
-
{ERR_LVL} {
|
124
|
-
push_token_to_hash("error_level", yytext);
|
125
|
-
}
|
126
|
-
|
127
|
-
{HTTPCODE} { push_token_to_hash("http_response", yytext); }
|
128
|
-
|
129
|
-
{HTTP_VERB} { push_token_to_hash("http_method", yytext);}
|
130
|
-
|
131
|
-
{NON_WS}{NON_WS}* {
|
132
|
-
push_token_to_hash("word", yytext);
|
133
|
-
}
|
134
|
-
|
135
|
-
(.|"\n") /* ignore */
|
136
|
-
%%
|
137
|
-
|
138
|
-
char *strip_ends(char *string) {
|
139
|
-
string[yyleng-1] = '\0';
|
140
|
-
++string;
|
141
|
-
return string;
|
142
|
-
}
|
143
|
-
|
144
|
-
void uuid_unparse_upper_sans_dash(const uuid_t uu, char *out)
|
145
|
-
{
|
146
|
-
sprintf(out,
|
147
|
-
"%02X%02X%02X%02X"
|
148
|
-
"%02X%02X"
|
149
|
-
"%02X%02X"
|
150
|
-
"%02X%02X"
|
151
|
-
"%02X%02X%02X%02X%02X%02X",
|
152
|
-
uu[0], uu[1], uu[2], uu[3],
|
153
|
-
uu[4], uu[5],
|
154
|
-
uu[6], uu[7],
|
155
|
-
uu[8], uu[9],
|
156
|
-
uu[10], uu[11], uu[12], uu[13], uu[14], uu[15]);
|
157
|
-
}
|
158
|
-
|
159
|
-
void new_uuid(char *str_ptr){
|
160
|
-
uuid_t new_uuid;
|
161
|
-
uuid_generate_time(new_uuid);
|
162
|
-
uuid_unparse_upper_sans_dash(new_uuid, str_ptr);
|
163
|
-
}
|
164
|
-
|
165
|
-
/* Processes a line from an Apache log file (error or access log) and returns a
|
166
|
-
* Hash of the form {:token_type => ["value1", "value2"...] ...}
|
167
|
-
* The types of tokens extracted are IPv4 addresses, HTTP verbs, response codes
|
168
|
-
* and version strings, hostnames, relative and absolute URIs, browser strings,
|
169
|
-
* error levels, and other words */
|
170
|
-
VALUE t_tokenize_apache_logs(VALUE self) {
|
171
|
-
char new_uuid_str[33];
|
172
|
-
vor_curr_tok_hsh = rb_hash_new();
|
173
|
-
rb_global_variable(&vor_curr_tok_hsh);
|
174
|
-
/* error out on absurdly large strings */
|
175
|
-
if( strlen(RSTRING(self)->ptr) > 1000000){
|
176
|
-
rb_raise(rb_eArgError, "string too long for tokenize_apache_logs! max length is 1,000,000 chars");
|
177
|
-
}
|
178
|
-
else{
|
179
|
-
/* {:message => self} */
|
180
|
-
VALUE hsh_key_msg = ID2SYM(rb_intern("message"));
|
181
|
-
rb_hash_aset(vor_curr_tok_hsh, hsh_key_msg, self);
|
182
|
-
/* {:id => generated_uuid} */
|
183
|
-
new_uuid(new_uuid_str);
|
184
|
-
VALUE hsh_key_id = ID2SYM(rb_intern("id"));
|
185
|
-
VALUE hsh_val_id = rb_tainted_str_new2(new_uuid_str);
|
186
|
-
rb_hash_aset(vor_curr_tok_hsh, hsh_key_id, hsh_val_id);
|
187
|
-
yy_scan_string(RSTRING(self)->ptr);
|
188
|
-
yylex();
|
189
|
-
yy_delete_buffer(YY_CURRENT_BUFFER);
|
190
|
-
}
|
191
|
-
return rb_obj_dup(vor_curr_tok_hsh);
|
192
|
-
}
|
193
|
-
|
194
|
-
void push_token_to_hash(char * token_type, char * token_val) {
|
195
|
-
VALUE hsh_key = ID2SYM(rb_intern(token_type));
|
196
|
-
VALUE hsh_value = rb_hash_aref(vor_curr_tok_hsh, hsh_key);
|
197
|
-
VALUE ary_for_token_type = rb_ary_new();
|
198
|
-
switch (TYPE(hsh_value)) {
|
199
|
-
case T_NIL:
|
200
|
-
rb_ary_push(ary_for_token_type, rb_tainted_str_new2(token_val));
|
201
|
-
rb_hash_aset(vor_curr_tok_hsh, hsh_key, ary_for_token_type);
|
202
|
-
break;
|
203
|
-
case T_ARRAY:
|
204
|
-
rb_ary_push(hsh_value, rb_tainted_str_new2(token_val));
|
205
|
-
break;
|
206
|
-
default:
|
207
|
-
/* raise exception */
|
208
|
-
rb_raise(rb_eTypeError, "expecting member of hash to be nil or array");
|
209
|
-
break;
|
210
|
-
}
|
211
|
-
}
|
212
|
-
|
213
|
-
void Init_tokenize_apache_logs() {
|
214
|
-
rb_define_method(rb_cString, "tokenize_apache_logs", t_tokenize_apache_logs, 0);
|
215
|
-
}
|