danielsdeleo-teeth 0.0.2 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. data/LICENSE +11 -0
  2. data/README.rdoc +107 -10
  3. data/Rakefile +47 -31
  4. data/VERSION.yml +4 -0
  5. data/doc/classes/String.html +182 -0
  6. data/doc/classes/Teeth/DuplicateDefinitionError.html +113 -0
  7. data/doc/classes/Teeth/DuplicateRuleError.html +113 -0
  8. data/doc/classes/Teeth/InvalidDefaultDefinitionName.html +113 -0
  9. data/doc/classes/Teeth/InvalidExtensionDirectory.html +113 -0
  10. data/doc/classes/Teeth/RuleStatement.html +291 -0
  11. data/doc/classes/Teeth/RuleStatementGroup.html +195 -0
  12. data/doc/classes/Teeth/Scanner.html +535 -0
  13. data/doc/classes/Teeth/ScannerDefinition.html +253 -0
  14. data/doc/classes/Teeth/ScannerDefinitionArgumentError.html +113 -0
  15. data/doc/classes/Teeth/ScannerDefinitionGroup.html +269 -0
  16. data/doc/classes/Teeth/ScannerError.html +111 -0
  17. data/doc/classes/Teeth.html +129 -0
  18. data/doc/created.rid +1 -0
  19. data/doc/files/README_rdoc.html +314 -0
  20. data/doc/files/ext/scan_apache_logs/scan_apache_logs_yy_c.html +101 -0
  21. data/doc/files/ext/scan_rails_logs/scan_rails_logs_yy_c.html +101 -0
  22. data/doc/files/lib/rule_statement_rb.html +101 -0
  23. data/doc/files/lib/scanner_definition_rb.html +101 -0
  24. data/doc/files/lib/scanner_rb.html +108 -0
  25. data/doc/files/lib/teeth_rb.html +111 -0
  26. data/doc/fr_class_index.html +39 -0
  27. data/doc/fr_file_index.html +33 -0
  28. data/doc/fr_method_index.html +60 -0
  29. data/doc/index.html +24 -0
  30. data/doc/rdoc-style.css +208 -0
  31. data/ext/scan_apache_logs/Makefile +158 -0
  32. data/ext/scan_apache_logs/extconf.rb +3 -0
  33. data/ext/scan_apache_logs/scan_apache_logs.yy +267 -0
  34. data/ext/scan_apache_logs/scan_apache_logs.yy.c +8355 -0
  35. data/ext/scan_rails_logs/Makefile +158 -0
  36. data/ext/scan_rails_logs/extconf.rb +3 -0
  37. data/ext/scan_rails_logs/scan_rails_logs.yy +376 -0
  38. data/ext/scan_rails_logs/scan_rails_logs.yy.c +11127 -0
  39. data/lib/rule_statement.rb +61 -0
  40. data/lib/scanner.rb +98 -0
  41. data/lib/scanner_definition.rb +116 -0
  42. data/lib/teeth.rb +5 -1
  43. data/scanners/scan_apache_logs.rb +27 -0
  44. data/scanners/scan_rails_logs.rb +70 -0
  45. data/spec/fixtures/rails_1x.log +59 -0
  46. data/spec/fixtures/rails_22.log +12 -0
  47. data/spec/fixtures/rails_22_cached.log +10 -0
  48. data/spec/fixtures/rails_unordered.log +24 -0
  49. data/spec/playground/show_apache_processing.rb +13 -0
  50. data/spec/spec_helper.rb +6 -1
  51. data/spec/unit/rule_statement_spec.rb +60 -0
  52. data/spec/unit/{tokenize_apache_spec.rb → scan_apache_spec.rb} +16 -11
  53. data/spec/unit/scan_rails_logs_spec.rb +90 -0
  54. data/spec/unit/scaner_definition_spec.rb +65 -0
  55. data/spec/unit/scanner_spec.rb +109 -0
  56. data/teeth.gemspec +31 -0
  57. data/templates/tokenizer.yy.erb +168 -0
  58. metadata +60 -15
  59. data/ext/extconf.rb +0 -4
  60. data/ext/tokenize_apache_logs.yy +0 -215
  61. data/ext/tokenize_apache_logs.yy.c +0 -12067
@@ -1,215 +0,0 @@
1
- /*
2
- * uuid_unparse_upper_sans_dash is derived from the uuid library
3
- * from OS X / darwin, therefore:
4
- *
5
- * Copyright (c) 2004 Apple Computer, Inc. All rights reserved.
6
- *
7
- * %Begin-Header%
8
- * Redistribution and use in source and binary forms, with or without
9
- * modification, are permitted provided that the following conditions
10
- * are met:
11
- * 1. Redistributions of source code must retain the above copyright
12
- * notice, and the entire permission notice in its entirety,
13
- * including the disclaimer of warranties.
14
- * 2. Redistributions in binary form must reproduce the above copyright
15
- * notice, this list of conditions and the following disclaimer in the
16
- * documentation and/or other materials provided with the distribution.
17
- * 3. The name of the author may not be used to endorse or promote
18
- * products derived from this software without specific prior
19
- * written permission.
20
- *
21
- * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
22
- * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
23
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ALL OF
24
- * WHICH ARE HEREBY DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE
25
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
27
- * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
28
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
29
- * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
31
- * USE OF THIS SOFTWARE, EVEN IF NOT ADVISED OF THE POSSIBILITY OF SUCH
32
- * DAMAGE.
33
- * %End-Header%
34
- */
35
-
36
- /* %option prefix="vor_yy" */
37
- %option full
38
- %{
39
- #include <ruby.h>
40
- #include <uuid/uuid.h>
41
-
42
- /* prototypes */
43
- static VALUE vor_curr_tok_hsh;
44
- char *strip_ends(char *);
45
- void push_token_to_hash(char *, char *);
46
- VALUE t_tokenize_apache_logs(VALUE);
47
- void new_uuid(char *str_ptr);
48
- %}
49
- /* Definitions */
50
-
51
- IP4_OCT [0-9]|[0-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5]
52
-
53
- WDAY (Mon|Tue|Wed|Thu|Fri|Sat|Sun)
54
-
55
- MON (Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)
56
-
57
- MDAY (3[0-1]|[1-2][0-9]|0[1-9])
58
-
59
- HOUR (2[0-3]|[0-1][0-9])
60
-
61
- MINSEC [0-5][0-9]
62
-
63
- YEAR [0-9][0-9][0-9][0-9]
64
-
65
- REL_URL (\/|\\|\.)[a-z0-9\._\~\-\/\?&;#=\%\:\+\[\]\\]*
66
-
67
- PROTO (http:|https:)
68
-
69
- HOST [a-z0-9][a-z0-9\-]*\.[a-z0-9][a-z0-9\-]*.[a-z0-9][a-z0-9\-\.]*[a-z]+(\:[0-9]+)?
70
-
71
- ERR_LVL (emerg|alert|crit|err|error|warn|warning|notice|info|debug)
72
-
73
- PLUSMINUS (\+|\-)
74
-
75
- HTTP_VERS HTTP\/(1.0|1.1)
76
-
77
- HTTP_VERB (get|head|put|post|delete|trace|connect)
78
-
79
- HTTPCODE (100|101|20[0-6]|30[0-5]|307|40[0-9]|41[0-7]|50[0-5])
80
-
81
- WS [\000-\s]
82
-
83
- /*
84
- Covers most of the bases, but there are a F*-ton of these: http://www.zytrax.com/tech/web/browser_ids.htm
85
- Also, handling of quotes is nieve. If it becomes a problem try something like
86
- http://flex.sourceforge.net/manual/How-can-I-match-C_002dstyle-comments_003f.html#How-can-I-match-C_002dstyle-comments_003f
87
- */
88
- BROWSER_STR \"(moz|msie|lynx).+\"
89
-
90
- NON_WS ([a-z]|[0-9]|[:punct:])
91
-
92
- %%
93
- /*
94
- Actions
95
- */
96
-
97
- {IP4_OCT}"."{IP4_OCT}"."{IP4_OCT}"."{IP4_OCT} {
98
- push_token_to_hash("ipv4_addr", yytext);
99
- }
100
-
101
- {WDAY}{WS}{MON}{WS}{MDAY}{WS}{HOUR}\:{MINSEC}\:{MINSEC}{WS}{YEAR} {
102
- push_token_to_hash("apache_err_datetime", yytext);
103
- }
104
-
105
- {MDAY}\/{MON}\/{YEAR}\:{HOUR}\:{MINSEC}\:{MINSEC}{WS}{PLUSMINUS}{YEAR} {
106
- push_token_to_hash("apache_access_datetime", yytext);
107
- }
108
-
109
- {HTTP_VERS} { push_token_to_hash("http_version", yytext);}
110
-
111
- {BROWSER_STR} { push_token_to_hash("browser_string", strip_ends(yytext));}
112
-
113
- {PROTO}"\/\/"({HOST}|{IP4_OCT}"."{IP4_OCT}"."{IP4_OCT}"."{IP4_OCT})({REL_URL}|"\/")? {
114
- push_token_to_hash("absolute_url", yytext);
115
- }
116
-
117
- {HOST} {push_token_to_hash("host", yytext);}
118
-
119
- {REL_URL} {
120
- push_token_to_hash("relative_url", yytext);
121
- }
122
-
123
- {ERR_LVL} {
124
- push_token_to_hash("error_level", yytext);
125
- }
126
-
127
- {HTTPCODE} { push_token_to_hash("http_response", yytext); }
128
-
129
- {HTTP_VERB} { push_token_to_hash("http_method", yytext);}
130
-
131
- {NON_WS}{NON_WS}* {
132
- push_token_to_hash("word", yytext);
133
- }
134
-
135
- (.|"\n") /* ignore */
136
- %%
137
-
138
- char *strip_ends(char *string) {
139
- string[yyleng-1] = '\0';
140
- ++string;
141
- return string;
142
- }
143
-
144
- void uuid_unparse_upper_sans_dash(const uuid_t uu, char *out)
145
- {
146
- sprintf(out,
147
- "%02X%02X%02X%02X"
148
- "%02X%02X"
149
- "%02X%02X"
150
- "%02X%02X"
151
- "%02X%02X%02X%02X%02X%02X",
152
- uu[0], uu[1], uu[2], uu[3],
153
- uu[4], uu[5],
154
- uu[6], uu[7],
155
- uu[8], uu[9],
156
- uu[10], uu[11], uu[12], uu[13], uu[14], uu[15]);
157
- }
158
-
159
- void new_uuid(char *str_ptr){
160
- uuid_t new_uuid;
161
- uuid_generate_time(new_uuid);
162
- uuid_unparse_upper_sans_dash(new_uuid, str_ptr);
163
- }
164
-
165
- /* Processes a line from an Apache log file (error or access log) and returns a
166
- * Hash of the form {:token_type => ["value1", "value2"...] ...}
167
- * The types of tokens extracted are IPv4 addresses, HTTP verbs, response codes
168
- * and version strings, hostnames, relative and absolute URIs, browser strings,
169
- * error levels, and other words */
170
- VALUE t_tokenize_apache_logs(VALUE self) {
171
- char new_uuid_str[33];
172
- vor_curr_tok_hsh = rb_hash_new();
173
- rb_global_variable(&vor_curr_tok_hsh);
174
- /* error out on absurdly large strings */
175
- if( strlen(RSTRING(self)->ptr) > 1000000){
176
- rb_raise(rb_eArgError, "string too long for tokenize_apache_logs! max length is 1,000,000 chars");
177
- }
178
- else{
179
- /* {:message => self} */
180
- VALUE hsh_key_msg = ID2SYM(rb_intern("message"));
181
- rb_hash_aset(vor_curr_tok_hsh, hsh_key_msg, self);
182
- /* {:id => generated_uuid} */
183
- new_uuid(new_uuid_str);
184
- VALUE hsh_key_id = ID2SYM(rb_intern("id"));
185
- VALUE hsh_val_id = rb_tainted_str_new2(new_uuid_str);
186
- rb_hash_aset(vor_curr_tok_hsh, hsh_key_id, hsh_val_id);
187
- yy_scan_string(RSTRING(self)->ptr);
188
- yylex();
189
- yy_delete_buffer(YY_CURRENT_BUFFER);
190
- }
191
- return rb_obj_dup(vor_curr_tok_hsh);
192
- }
193
-
194
- void push_token_to_hash(char * token_type, char * token_val) {
195
- VALUE hsh_key = ID2SYM(rb_intern(token_type));
196
- VALUE hsh_value = rb_hash_aref(vor_curr_tok_hsh, hsh_key);
197
- VALUE ary_for_token_type = rb_ary_new();
198
- switch (TYPE(hsh_value)) {
199
- case T_NIL:
200
- rb_ary_push(ary_for_token_type, rb_tainted_str_new2(token_val));
201
- rb_hash_aset(vor_curr_tok_hsh, hsh_key, ary_for_token_type);
202
- break;
203
- case T_ARRAY:
204
- rb_ary_push(hsh_value, rb_tainted_str_new2(token_val));
205
- break;
206
- default:
207
- /* raise exception */
208
- rb_raise(rb_eTypeError, "expecting member of hash to be nil or array");
209
- break;
210
- }
211
- }
212
-
213
- void Init_tokenize_apache_logs() {
214
- rb_define_method(rb_cString, "tokenize_apache_logs", t_tokenize_apache_logs, 0);
215
- }