danielsdeleo-teeth 0.0.2 → 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/LICENSE +11 -0
- data/README.rdoc +107 -10
- data/Rakefile +47 -31
- data/VERSION.yml +4 -0
- data/doc/classes/String.html +182 -0
- data/doc/classes/Teeth/DuplicateDefinitionError.html +113 -0
- data/doc/classes/Teeth/DuplicateRuleError.html +113 -0
- data/doc/classes/Teeth/InvalidDefaultDefinitionName.html +113 -0
- data/doc/classes/Teeth/InvalidExtensionDirectory.html +113 -0
- data/doc/classes/Teeth/RuleStatement.html +291 -0
- data/doc/classes/Teeth/RuleStatementGroup.html +195 -0
- data/doc/classes/Teeth/Scanner.html +535 -0
- data/doc/classes/Teeth/ScannerDefinition.html +253 -0
- data/doc/classes/Teeth/ScannerDefinitionArgumentError.html +113 -0
- data/doc/classes/Teeth/ScannerDefinitionGroup.html +269 -0
- data/doc/classes/Teeth/ScannerError.html +111 -0
- data/doc/classes/Teeth.html +129 -0
- data/doc/created.rid +1 -0
- data/doc/files/README_rdoc.html +314 -0
- data/doc/files/ext/scan_apache_logs/scan_apache_logs_yy_c.html +101 -0
- data/doc/files/ext/scan_rails_logs/scan_rails_logs_yy_c.html +101 -0
- data/doc/files/lib/rule_statement_rb.html +101 -0
- data/doc/files/lib/scanner_definition_rb.html +101 -0
- data/doc/files/lib/scanner_rb.html +108 -0
- data/doc/files/lib/teeth_rb.html +111 -0
- data/doc/fr_class_index.html +39 -0
- data/doc/fr_file_index.html +33 -0
- data/doc/fr_method_index.html +60 -0
- data/doc/index.html +24 -0
- data/doc/rdoc-style.css +208 -0
- data/ext/scan_apache_logs/Makefile +158 -0
- data/ext/scan_apache_logs/extconf.rb +3 -0
- data/ext/scan_apache_logs/scan_apache_logs.yy +267 -0
- data/ext/scan_apache_logs/scan_apache_logs.yy.c +8355 -0
- data/ext/scan_rails_logs/Makefile +158 -0
- data/ext/scan_rails_logs/extconf.rb +3 -0
- data/ext/scan_rails_logs/scan_rails_logs.yy +376 -0
- data/ext/scan_rails_logs/scan_rails_logs.yy.c +11127 -0
- data/lib/rule_statement.rb +61 -0
- data/lib/scanner.rb +98 -0
- data/lib/scanner_definition.rb +116 -0
- data/lib/teeth.rb +5 -1
- data/scanners/scan_apache_logs.rb +27 -0
- data/scanners/scan_rails_logs.rb +70 -0
- data/spec/fixtures/rails_1x.log +59 -0
- data/spec/fixtures/rails_22.log +12 -0
- data/spec/fixtures/rails_22_cached.log +10 -0
- data/spec/fixtures/rails_unordered.log +24 -0
- data/spec/playground/show_apache_processing.rb +13 -0
- data/spec/spec_helper.rb +6 -1
- data/spec/unit/rule_statement_spec.rb +60 -0
- data/spec/unit/{tokenize_apache_spec.rb → scan_apache_spec.rb} +16 -11
- data/spec/unit/scan_rails_logs_spec.rb +90 -0
- data/spec/unit/scaner_definition_spec.rb +65 -0
- data/spec/unit/scanner_spec.rb +109 -0
- data/teeth.gemspec +31 -0
- data/templates/tokenizer.yy.erb +168 -0
- metadata +60 -15
- data/ext/extconf.rb +0 -4
- data/ext/tokenize_apache_logs.yy +0 -215
- data/ext/tokenize_apache_logs.yy.c +0 -12067
data/ext/tokenize_apache_logs.yy
DELETED
@@ -1,215 +0,0 @@
|
|
1
|
-
/*
|
2
|
-
* uuid_unparse_upper_sans_dash is derived from the uuid library
|
3
|
-
* from OS X / darwin, therefore:
|
4
|
-
*
|
5
|
-
* Copyright (c) 2004 Apple Computer, Inc. All rights reserved.
|
6
|
-
*
|
7
|
-
* %Begin-Header%
|
8
|
-
* Redistribution and use in source and binary forms, with or without
|
9
|
-
* modification, are permitted provided that the following conditions
|
10
|
-
* are met:
|
11
|
-
* 1. Redistributions of source code must retain the above copyright
|
12
|
-
* notice, and the entire permission notice in its entirety,
|
13
|
-
* including the disclaimer of warranties.
|
14
|
-
* 2. Redistributions in binary form must reproduce the above copyright
|
15
|
-
* notice, this list of conditions and the following disclaimer in the
|
16
|
-
* documentation and/or other materials provided with the distribution.
|
17
|
-
* 3. The name of the author may not be used to endorse or promote
|
18
|
-
* products derived from this software without specific prior
|
19
|
-
* written permission.
|
20
|
-
*
|
21
|
-
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
|
22
|
-
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
23
|
-
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ALL OF
|
24
|
-
* WHICH ARE HEREBY DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE
|
25
|
-
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
26
|
-
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
|
27
|
-
* OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
|
28
|
-
* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
29
|
-
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
30
|
-
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
31
|
-
* USE OF THIS SOFTWARE, EVEN IF NOT ADVISED OF THE POSSIBILITY OF SUCH
|
32
|
-
* DAMAGE.
|
33
|
-
* %End-Header%
|
34
|
-
*/
|
35
|
-
|
36
|
-
/* %option prefix="vor_yy" */
|
37
|
-
%option full
|
38
|
-
%{
|
39
|
-
#include <ruby.h>
|
40
|
-
#include <uuid/uuid.h>
|
41
|
-
|
42
|
-
/* prototypes */
|
43
|
-
static VALUE vor_curr_tok_hsh;
|
44
|
-
char *strip_ends(char *);
|
45
|
-
void push_token_to_hash(char *, char *);
|
46
|
-
VALUE t_tokenize_apache_logs(VALUE);
|
47
|
-
void new_uuid(char *str_ptr);
|
48
|
-
%}
|
49
|
-
/* Definitions */
|
50
|
-
|
51
|
-
IP4_OCT [0-9]|[0-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5]
|
52
|
-
|
53
|
-
WDAY (Mon|Tue|Wed|Thu|Fri|Sat|Sun)
|
54
|
-
|
55
|
-
MON (Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)
|
56
|
-
|
57
|
-
MDAY (3[0-1]|[1-2][0-9]|0[1-9])
|
58
|
-
|
59
|
-
HOUR (2[0-3]|[0-1][0-9])
|
60
|
-
|
61
|
-
MINSEC [0-5][0-9]
|
62
|
-
|
63
|
-
YEAR [0-9][0-9][0-9][0-9]
|
64
|
-
|
65
|
-
REL_URL (\/|\\|\.)[a-z0-9\._\~\-\/\?&;#=\%\:\+\[\]\\]*
|
66
|
-
|
67
|
-
PROTO (http:|https:)
|
68
|
-
|
69
|
-
HOST [a-z0-9][a-z0-9\-]*\.[a-z0-9][a-z0-9\-]*.[a-z0-9][a-z0-9\-\.]*[a-z]+(\:[0-9]+)?
|
70
|
-
|
71
|
-
ERR_LVL (emerg|alert|crit|err|error|warn|warning|notice|info|debug)
|
72
|
-
|
73
|
-
PLUSMINUS (\+|\-)
|
74
|
-
|
75
|
-
HTTP_VERS HTTP\/(1.0|1.1)
|
76
|
-
|
77
|
-
HTTP_VERB (get|head|put|post|delete|trace|connect)
|
78
|
-
|
79
|
-
HTTPCODE (100|101|20[0-6]|30[0-5]|307|40[0-9]|41[0-7]|50[0-5])
|
80
|
-
|
81
|
-
WS [\000-\s]
|
82
|
-
|
83
|
-
/*
|
84
|
-
Covers most of the bases, but there are a F*-ton of these: http://www.zytrax.com/tech/web/browser_ids.htm
|
85
|
-
Also, handling of quotes is nieve. If it becomes a problem try something like
|
86
|
-
http://flex.sourceforge.net/manual/How-can-I-match-C_002dstyle-comments_003f.html#How-can-I-match-C_002dstyle-comments_003f
|
87
|
-
*/
|
88
|
-
BROWSER_STR \"(moz|msie|lynx).+\"
|
89
|
-
|
90
|
-
NON_WS ([a-z]|[0-9]|[:punct:])
|
91
|
-
|
92
|
-
%%
|
93
|
-
/*
|
94
|
-
Actions
|
95
|
-
*/
|
96
|
-
|
97
|
-
{IP4_OCT}"."{IP4_OCT}"."{IP4_OCT}"."{IP4_OCT} {
|
98
|
-
push_token_to_hash("ipv4_addr", yytext);
|
99
|
-
}
|
100
|
-
|
101
|
-
{WDAY}{WS}{MON}{WS}{MDAY}{WS}{HOUR}\:{MINSEC}\:{MINSEC}{WS}{YEAR} {
|
102
|
-
push_token_to_hash("apache_err_datetime", yytext);
|
103
|
-
}
|
104
|
-
|
105
|
-
{MDAY}\/{MON}\/{YEAR}\:{HOUR}\:{MINSEC}\:{MINSEC}{WS}{PLUSMINUS}{YEAR} {
|
106
|
-
push_token_to_hash("apache_access_datetime", yytext);
|
107
|
-
}
|
108
|
-
|
109
|
-
{HTTP_VERS} { push_token_to_hash("http_version", yytext);}
|
110
|
-
|
111
|
-
{BROWSER_STR} { push_token_to_hash("browser_string", strip_ends(yytext));}
|
112
|
-
|
113
|
-
{PROTO}"\/\/"({HOST}|{IP4_OCT}"."{IP4_OCT}"."{IP4_OCT}"."{IP4_OCT})({REL_URL}|"\/")? {
|
114
|
-
push_token_to_hash("absolute_url", yytext);
|
115
|
-
}
|
116
|
-
|
117
|
-
{HOST} {push_token_to_hash("host", yytext);}
|
118
|
-
|
119
|
-
{REL_URL} {
|
120
|
-
push_token_to_hash("relative_url", yytext);
|
121
|
-
}
|
122
|
-
|
123
|
-
{ERR_LVL} {
|
124
|
-
push_token_to_hash("error_level", yytext);
|
125
|
-
}
|
126
|
-
|
127
|
-
{HTTPCODE} { push_token_to_hash("http_response", yytext); }
|
128
|
-
|
129
|
-
{HTTP_VERB} { push_token_to_hash("http_method", yytext);}
|
130
|
-
|
131
|
-
{NON_WS}{NON_WS}* {
|
132
|
-
push_token_to_hash("word", yytext);
|
133
|
-
}
|
134
|
-
|
135
|
-
(.|"\n") /* ignore */
|
136
|
-
%%
|
137
|
-
|
138
|
-
char *strip_ends(char *string) {
|
139
|
-
string[yyleng-1] = '\0';
|
140
|
-
++string;
|
141
|
-
return string;
|
142
|
-
}
|
143
|
-
|
144
|
-
void uuid_unparse_upper_sans_dash(const uuid_t uu, char *out)
|
145
|
-
{
|
146
|
-
sprintf(out,
|
147
|
-
"%02X%02X%02X%02X"
|
148
|
-
"%02X%02X"
|
149
|
-
"%02X%02X"
|
150
|
-
"%02X%02X"
|
151
|
-
"%02X%02X%02X%02X%02X%02X",
|
152
|
-
uu[0], uu[1], uu[2], uu[3],
|
153
|
-
uu[4], uu[5],
|
154
|
-
uu[6], uu[7],
|
155
|
-
uu[8], uu[9],
|
156
|
-
uu[10], uu[11], uu[12], uu[13], uu[14], uu[15]);
|
157
|
-
}
|
158
|
-
|
159
|
-
void new_uuid(char *str_ptr){
|
160
|
-
uuid_t new_uuid;
|
161
|
-
uuid_generate_time(new_uuid);
|
162
|
-
uuid_unparse_upper_sans_dash(new_uuid, str_ptr);
|
163
|
-
}
|
164
|
-
|
165
|
-
/* Processes a line from an Apache log file (error or access log) and returns a
|
166
|
-
* Hash of the form {:token_type => ["value1", "value2"...] ...}
|
167
|
-
* The types of tokens extracted are IPv4 addresses, HTTP verbs, response codes
|
168
|
-
* and version strings, hostnames, relative and absolute URIs, browser strings,
|
169
|
-
* error levels, and other words */
|
170
|
-
VALUE t_tokenize_apache_logs(VALUE self) {
|
171
|
-
char new_uuid_str[33];
|
172
|
-
vor_curr_tok_hsh = rb_hash_new();
|
173
|
-
rb_global_variable(&vor_curr_tok_hsh);
|
174
|
-
/* error out on absurdly large strings */
|
175
|
-
if( strlen(RSTRING(self)->ptr) > 1000000){
|
176
|
-
rb_raise(rb_eArgError, "string too long for tokenize_apache_logs! max length is 1,000,000 chars");
|
177
|
-
}
|
178
|
-
else{
|
179
|
-
/* {:message => self} */
|
180
|
-
VALUE hsh_key_msg = ID2SYM(rb_intern("message"));
|
181
|
-
rb_hash_aset(vor_curr_tok_hsh, hsh_key_msg, self);
|
182
|
-
/* {:id => generated_uuid} */
|
183
|
-
new_uuid(new_uuid_str);
|
184
|
-
VALUE hsh_key_id = ID2SYM(rb_intern("id"));
|
185
|
-
VALUE hsh_val_id = rb_tainted_str_new2(new_uuid_str);
|
186
|
-
rb_hash_aset(vor_curr_tok_hsh, hsh_key_id, hsh_val_id);
|
187
|
-
yy_scan_string(RSTRING(self)->ptr);
|
188
|
-
yylex();
|
189
|
-
yy_delete_buffer(YY_CURRENT_BUFFER);
|
190
|
-
}
|
191
|
-
return rb_obj_dup(vor_curr_tok_hsh);
|
192
|
-
}
|
193
|
-
|
194
|
-
void push_token_to_hash(char * token_type, char * token_val) {
|
195
|
-
VALUE hsh_key = ID2SYM(rb_intern(token_type));
|
196
|
-
VALUE hsh_value = rb_hash_aref(vor_curr_tok_hsh, hsh_key);
|
197
|
-
VALUE ary_for_token_type = rb_ary_new();
|
198
|
-
switch (TYPE(hsh_value)) {
|
199
|
-
case T_NIL:
|
200
|
-
rb_ary_push(ary_for_token_type, rb_tainted_str_new2(token_val));
|
201
|
-
rb_hash_aset(vor_curr_tok_hsh, hsh_key, ary_for_token_type);
|
202
|
-
break;
|
203
|
-
case T_ARRAY:
|
204
|
-
rb_ary_push(hsh_value, rb_tainted_str_new2(token_val));
|
205
|
-
break;
|
206
|
-
default:
|
207
|
-
/* raise exception */
|
208
|
-
rb_raise(rb_eTypeError, "expecting member of hash to be nil or array");
|
209
|
-
break;
|
210
|
-
}
|
211
|
-
}
|
212
|
-
|
213
|
-
void Init_tokenize_apache_logs() {
|
214
|
-
rb_define_method(rb_cString, "tokenize_apache_logs", t_tokenize_apache_logs, 0);
|
215
|
-
}
|