danielsdeleo-teeth 0.0.2 → 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (61) hide show
  1. data/LICENSE +11 -0
  2. data/README.rdoc +107 -10
  3. data/Rakefile +47 -31
  4. data/VERSION.yml +4 -0
  5. data/doc/classes/String.html +182 -0
  6. data/doc/classes/Teeth/DuplicateDefinitionError.html +113 -0
  7. data/doc/classes/Teeth/DuplicateRuleError.html +113 -0
  8. data/doc/classes/Teeth/InvalidDefaultDefinitionName.html +113 -0
  9. data/doc/classes/Teeth/InvalidExtensionDirectory.html +113 -0
  10. data/doc/classes/Teeth/RuleStatement.html +291 -0
  11. data/doc/classes/Teeth/RuleStatementGroup.html +195 -0
  12. data/doc/classes/Teeth/Scanner.html +535 -0
  13. data/doc/classes/Teeth/ScannerDefinition.html +253 -0
  14. data/doc/classes/Teeth/ScannerDefinitionArgumentError.html +113 -0
  15. data/doc/classes/Teeth/ScannerDefinitionGroup.html +269 -0
  16. data/doc/classes/Teeth/ScannerError.html +111 -0
  17. data/doc/classes/Teeth.html +129 -0
  18. data/doc/created.rid +1 -0
  19. data/doc/files/README_rdoc.html +314 -0
  20. data/doc/files/ext/scan_apache_logs/scan_apache_logs_yy_c.html +101 -0
  21. data/doc/files/ext/scan_rails_logs/scan_rails_logs_yy_c.html +101 -0
  22. data/doc/files/lib/rule_statement_rb.html +101 -0
  23. data/doc/files/lib/scanner_definition_rb.html +101 -0
  24. data/doc/files/lib/scanner_rb.html +108 -0
  25. data/doc/files/lib/teeth_rb.html +111 -0
  26. data/doc/fr_class_index.html +39 -0
  27. data/doc/fr_file_index.html +33 -0
  28. data/doc/fr_method_index.html +60 -0
  29. data/doc/index.html +24 -0
  30. data/doc/rdoc-style.css +208 -0
  31. data/ext/scan_apache_logs/Makefile +158 -0
  32. data/ext/scan_apache_logs/extconf.rb +3 -0
  33. data/ext/scan_apache_logs/scan_apache_logs.yy +267 -0
  34. data/ext/scan_apache_logs/scan_apache_logs.yy.c +8355 -0
  35. data/ext/scan_rails_logs/Makefile +158 -0
  36. data/ext/scan_rails_logs/extconf.rb +3 -0
  37. data/ext/scan_rails_logs/scan_rails_logs.yy +376 -0
  38. data/ext/scan_rails_logs/scan_rails_logs.yy.c +11127 -0
  39. data/lib/rule_statement.rb +61 -0
  40. data/lib/scanner.rb +98 -0
  41. data/lib/scanner_definition.rb +116 -0
  42. data/lib/teeth.rb +5 -1
  43. data/scanners/scan_apache_logs.rb +27 -0
  44. data/scanners/scan_rails_logs.rb +70 -0
  45. data/spec/fixtures/rails_1x.log +59 -0
  46. data/spec/fixtures/rails_22.log +12 -0
  47. data/spec/fixtures/rails_22_cached.log +10 -0
  48. data/spec/fixtures/rails_unordered.log +24 -0
  49. data/spec/playground/show_apache_processing.rb +13 -0
  50. data/spec/spec_helper.rb +6 -1
  51. data/spec/unit/rule_statement_spec.rb +60 -0
  52. data/spec/unit/{tokenize_apache_spec.rb → scan_apache_spec.rb} +16 -11
  53. data/spec/unit/scan_rails_logs_spec.rb +90 -0
  54. data/spec/unit/scaner_definition_spec.rb +65 -0
  55. data/spec/unit/scanner_spec.rb +109 -0
  56. data/teeth.gemspec +31 -0
  57. data/templates/tokenizer.yy.erb +168 -0
  58. metadata +60 -15
  59. data/ext/extconf.rb +0 -4
  60. data/ext/tokenize_apache_logs.yy +0 -215
  61. data/ext/tokenize_apache_logs.yy.c +0 -12067
@@ -1,215 +0,0 @@
1
- /*
2
- * uuid_unparse_upper_sans_dash is derived from the uuid library
3
- * from OS X / darwin, therefore:
4
- *
5
- * Copyright (c) 2004 Apple Computer, Inc. All rights reserved.
6
- *
7
- * %Begin-Header%
8
- * Redistribution and use in source and binary forms, with or without
9
- * modification, are permitted provided that the following conditions
10
- * are met:
11
- * 1. Redistributions of source code must retain the above copyright
12
- * notice, and the entire permission notice in its entirety,
13
- * including the disclaimer of warranties.
14
- * 2. Redistributions in binary form must reproduce the above copyright
15
- * notice, this list of conditions and the following disclaimer in the
16
- * documentation and/or other materials provided with the distribution.
17
- * 3. The name of the author may not be used to endorse or promote
18
- * products derived from this software without specific prior
19
- * written permission.
20
- *
21
- * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
22
- * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
23
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ALL OF
24
- * WHICH ARE HEREBY DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE
25
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
27
- * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
28
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
29
- * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
31
- * USE OF THIS SOFTWARE, EVEN IF NOT ADVISED OF THE POSSIBILITY OF SUCH
32
- * DAMAGE.
33
- * %End-Header%
34
- */
35
-
36
- /* %option prefix="vor_yy" */
37
- %option full
38
- %{
39
- #include <ruby.h>
40
- #include <uuid/uuid.h>
41
-
42
- /* prototypes */
43
- static VALUE vor_curr_tok_hsh;
44
- char *strip_ends(char *);
45
- void push_token_to_hash(char *, char *);
46
- VALUE t_tokenize_apache_logs(VALUE);
47
- void new_uuid(char *str_ptr);
48
- %}
49
- /* Definitions */
50
-
51
- IP4_OCT [0-9]|[0-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5]
52
-
53
- WDAY (Mon|Tue|Wed|Thu|Fri|Sat|Sun)
54
-
55
- MON (Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)
56
-
57
- MDAY (3[0-1]|[1-2][0-9]|0[1-9])
58
-
59
- HOUR (2[0-3]|[0-1][0-9])
60
-
61
- MINSEC [0-5][0-9]
62
-
63
- YEAR [0-9][0-9][0-9][0-9]
64
-
65
- REL_URL (\/|\\|\.)[a-z0-9\._\~\-\/\?&;#=\%\:\+\[\]\\]*
66
-
67
- PROTO (http:|https:)
68
-
69
- HOST [a-z0-9][a-z0-9\-]*\.[a-z0-9][a-z0-9\-]*.[a-z0-9][a-z0-9\-\.]*[a-z]+(\:[0-9]+)?
70
-
71
- ERR_LVL (emerg|alert|crit|err|error|warn|warning|notice|info|debug)
72
-
73
- PLUSMINUS (\+|\-)
74
-
75
- HTTP_VERS HTTP\/(1.0|1.1)
76
-
77
- HTTP_VERB (get|head|put|post|delete|trace|connect)
78
-
79
- HTTPCODE (100|101|20[0-6]|30[0-5]|307|40[0-9]|41[0-7]|50[0-5])
80
-
81
- WS [\000-\s]
82
-
83
- /*
84
- Covers most of the bases, but there are a F*-ton of these: http://www.zytrax.com/tech/web/browser_ids.htm
85
- Also, handling of quotes is nieve. If it becomes a problem try something like
86
- http://flex.sourceforge.net/manual/How-can-I-match-C_002dstyle-comments_003f.html#How-can-I-match-C_002dstyle-comments_003f
87
- */
88
- BROWSER_STR \"(moz|msie|lynx).+\"
89
-
90
- NON_WS ([a-z]|[0-9]|[:punct:])
91
-
92
- %%
93
- /*
94
- Actions
95
- */
96
-
97
- {IP4_OCT}"."{IP4_OCT}"."{IP4_OCT}"."{IP4_OCT} {
98
- push_token_to_hash("ipv4_addr", yytext);
99
- }
100
-
101
- {WDAY}{WS}{MON}{WS}{MDAY}{WS}{HOUR}\:{MINSEC}\:{MINSEC}{WS}{YEAR} {
102
- push_token_to_hash("apache_err_datetime", yytext);
103
- }
104
-
105
- {MDAY}\/{MON}\/{YEAR}\:{HOUR}\:{MINSEC}\:{MINSEC}{WS}{PLUSMINUS}{YEAR} {
106
- push_token_to_hash("apache_access_datetime", yytext);
107
- }
108
-
109
- {HTTP_VERS} { push_token_to_hash("http_version", yytext);}
110
-
111
- {BROWSER_STR} { push_token_to_hash("browser_string", strip_ends(yytext));}
112
-
113
- {PROTO}"\/\/"({HOST}|{IP4_OCT}"."{IP4_OCT}"."{IP4_OCT}"."{IP4_OCT})({REL_URL}|"\/")? {
114
- push_token_to_hash("absolute_url", yytext);
115
- }
116
-
117
- {HOST} {push_token_to_hash("host", yytext);}
118
-
119
- {REL_URL} {
120
- push_token_to_hash("relative_url", yytext);
121
- }
122
-
123
- {ERR_LVL} {
124
- push_token_to_hash("error_level", yytext);
125
- }
126
-
127
- {HTTPCODE} { push_token_to_hash("http_response", yytext); }
128
-
129
- {HTTP_VERB} { push_token_to_hash("http_method", yytext);}
130
-
131
- {NON_WS}{NON_WS}* {
132
- push_token_to_hash("word", yytext);
133
- }
134
-
135
- (.|"\n") /* ignore */
136
- %%
137
-
138
- char *strip_ends(char *string) {
139
- string[yyleng-1] = '\0';
140
- ++string;
141
- return string;
142
- }
143
-
144
- void uuid_unparse_upper_sans_dash(const uuid_t uu, char *out)
145
- {
146
- sprintf(out,
147
- "%02X%02X%02X%02X"
148
- "%02X%02X"
149
- "%02X%02X"
150
- "%02X%02X"
151
- "%02X%02X%02X%02X%02X%02X",
152
- uu[0], uu[1], uu[2], uu[3],
153
- uu[4], uu[5],
154
- uu[6], uu[7],
155
- uu[8], uu[9],
156
- uu[10], uu[11], uu[12], uu[13], uu[14], uu[15]);
157
- }
158
-
159
- void new_uuid(char *str_ptr){
160
- uuid_t new_uuid;
161
- uuid_generate_time(new_uuid);
162
- uuid_unparse_upper_sans_dash(new_uuid, str_ptr);
163
- }
164
-
165
- /* Processes a line from an Apache log file (error or access log) and returns a
166
- * Hash of the form {:token_type => ["value1", "value2"...] ...}
167
- * The types of tokens extracted are IPv4 addresses, HTTP verbs, response codes
168
- * and version strings, hostnames, relative and absolute URIs, browser strings,
169
- * error levels, and other words */
170
- VALUE t_tokenize_apache_logs(VALUE self) {
171
- char new_uuid_str[33];
172
- vor_curr_tok_hsh = rb_hash_new();
173
- rb_global_variable(&vor_curr_tok_hsh);
174
- /* error out on absurdly large strings */
175
- if( strlen(RSTRING(self)->ptr) > 1000000){
176
- rb_raise(rb_eArgError, "string too long for tokenize_apache_logs! max length is 1,000,000 chars");
177
- }
178
- else{
179
- /* {:message => self} */
180
- VALUE hsh_key_msg = ID2SYM(rb_intern("message"));
181
- rb_hash_aset(vor_curr_tok_hsh, hsh_key_msg, self);
182
- /* {:id => generated_uuid} */
183
- new_uuid(new_uuid_str);
184
- VALUE hsh_key_id = ID2SYM(rb_intern("id"));
185
- VALUE hsh_val_id = rb_tainted_str_new2(new_uuid_str);
186
- rb_hash_aset(vor_curr_tok_hsh, hsh_key_id, hsh_val_id);
187
- yy_scan_string(RSTRING(self)->ptr);
188
- yylex();
189
- yy_delete_buffer(YY_CURRENT_BUFFER);
190
- }
191
- return rb_obj_dup(vor_curr_tok_hsh);
192
- }
193
-
194
- void push_token_to_hash(char * token_type, char * token_val) {
195
- VALUE hsh_key = ID2SYM(rb_intern(token_type));
196
- VALUE hsh_value = rb_hash_aref(vor_curr_tok_hsh, hsh_key);
197
- VALUE ary_for_token_type = rb_ary_new();
198
- switch (TYPE(hsh_value)) {
199
- case T_NIL:
200
- rb_ary_push(ary_for_token_type, rb_tainted_str_new2(token_val));
201
- rb_hash_aset(vor_curr_tok_hsh, hsh_key, ary_for_token_type);
202
- break;
203
- case T_ARRAY:
204
- rb_ary_push(hsh_value, rb_tainted_str_new2(token_val));
205
- break;
206
- default:
207
- /* raise exception */
208
- rb_raise(rb_eTypeError, "expecting member of hash to be nil or array");
209
- break;
210
- }
211
- }
212
-
213
- void Init_tokenize_apache_logs() {
214
- rb_define_method(rb_cString, "tokenize_apache_logs", t_tokenize_apache_logs, 0);
215
- }