danielsdeleo-teeth 0.0.2 → 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (61) hide show
  1. data/LICENSE +11 -0
  2. data/README.rdoc +107 -10
  3. data/Rakefile +47 -31
  4. data/VERSION.yml +4 -0
  5. data/doc/classes/String.html +182 -0
  6. data/doc/classes/Teeth/DuplicateDefinitionError.html +113 -0
  7. data/doc/classes/Teeth/DuplicateRuleError.html +113 -0
  8. data/doc/classes/Teeth/InvalidDefaultDefinitionName.html +113 -0
  9. data/doc/classes/Teeth/InvalidExtensionDirectory.html +113 -0
  10. data/doc/classes/Teeth/RuleStatement.html +291 -0
  11. data/doc/classes/Teeth/RuleStatementGroup.html +195 -0
  12. data/doc/classes/Teeth/Scanner.html +535 -0
  13. data/doc/classes/Teeth/ScannerDefinition.html +253 -0
  14. data/doc/classes/Teeth/ScannerDefinitionArgumentError.html +113 -0
  15. data/doc/classes/Teeth/ScannerDefinitionGroup.html +269 -0
  16. data/doc/classes/Teeth/ScannerError.html +111 -0
  17. data/doc/classes/Teeth.html +129 -0
  18. data/doc/created.rid +1 -0
  19. data/doc/files/README_rdoc.html +314 -0
  20. data/doc/files/ext/scan_apache_logs/scan_apache_logs_yy_c.html +101 -0
  21. data/doc/files/ext/scan_rails_logs/scan_rails_logs_yy_c.html +101 -0
  22. data/doc/files/lib/rule_statement_rb.html +101 -0
  23. data/doc/files/lib/scanner_definition_rb.html +101 -0
  24. data/doc/files/lib/scanner_rb.html +108 -0
  25. data/doc/files/lib/teeth_rb.html +111 -0
  26. data/doc/fr_class_index.html +39 -0
  27. data/doc/fr_file_index.html +33 -0
  28. data/doc/fr_method_index.html +60 -0
  29. data/doc/index.html +24 -0
  30. data/doc/rdoc-style.css +208 -0
  31. data/ext/scan_apache_logs/Makefile +158 -0
  32. data/ext/scan_apache_logs/extconf.rb +3 -0
  33. data/ext/scan_apache_logs/scan_apache_logs.yy +267 -0
  34. data/ext/scan_apache_logs/scan_apache_logs.yy.c +8355 -0
  35. data/ext/scan_rails_logs/Makefile +158 -0
  36. data/ext/scan_rails_logs/extconf.rb +3 -0
  37. data/ext/scan_rails_logs/scan_rails_logs.yy +376 -0
  38. data/ext/scan_rails_logs/scan_rails_logs.yy.c +11127 -0
  39. data/lib/rule_statement.rb +61 -0
  40. data/lib/scanner.rb +98 -0
  41. data/lib/scanner_definition.rb +116 -0
  42. data/lib/teeth.rb +5 -1
  43. data/scanners/scan_apache_logs.rb +27 -0
  44. data/scanners/scan_rails_logs.rb +70 -0
  45. data/spec/fixtures/rails_1x.log +59 -0
  46. data/spec/fixtures/rails_22.log +12 -0
  47. data/spec/fixtures/rails_22_cached.log +10 -0
  48. data/spec/fixtures/rails_unordered.log +24 -0
  49. data/spec/playground/show_apache_processing.rb +13 -0
  50. data/spec/spec_helper.rb +6 -1
  51. data/spec/unit/rule_statement_spec.rb +60 -0
  52. data/spec/unit/{tokenize_apache_spec.rb → scan_apache_spec.rb} +16 -11
  53. data/spec/unit/scan_rails_logs_spec.rb +90 -0
  54. data/spec/unit/scaner_definition_spec.rb +65 -0
  55. data/spec/unit/scanner_spec.rb +109 -0
  56. data/teeth.gemspec +31 -0
  57. data/templates/tokenizer.yy.erb +168 -0
  58. metadata +60 -15
  59. data/ext/extconf.rb +0 -4
  60. data/ext/tokenize_apache_logs.yy +0 -215
  61. data/ext/tokenize_apache_logs.yy.c +0 -12067
@@ -0,0 +1,208 @@
1
+
2
+ body {
3
+ font-family: Verdana,Arial,Helvetica,sans-serif;
4
+ font-size: 90%;
5
+ margin: 0;
6
+ margin-left: 40px;
7
+ padding: 0;
8
+ background: white;
9
+ }
10
+
11
+ h1,h2,h3,h4 { margin: 0; color: #efefef; background: transparent; }
12
+ h1 { font-size: 150%; }
13
+ h2,h3,h4 { margin-top: 1em; }
14
+
15
+ a { background: #eef; color: #039; text-decoration: none; }
16
+ a:hover { background: #039; color: #eef; }
17
+
18
+ /* Override the base stylesheet's Anchor inside a table cell */
19
+ td > a {
20
+ background: transparent;
21
+ color: #039;
22
+ text-decoration: none;
23
+ }
24
+
25
+ /* and inside a section title */
26
+ .section-title > a {
27
+ background: transparent;
28
+ color: #eee;
29
+ text-decoration: none;
30
+ }
31
+
32
+ /* === Structural elements =================================== */
33
+
34
+ div#index {
35
+ margin: 0;
36
+ margin-left: -40px;
37
+ padding: 0;
38
+ font-size: 90%;
39
+ }
40
+
41
+
42
+ div#index a {
43
+ margin-left: 0.7em;
44
+ }
45
+
46
+ div#index .section-bar {
47
+ margin-left: 0px;
48
+ padding-left: 0.7em;
49
+ background: #ccc;
50
+ font-size: small;
51
+ }
52
+
53
+
54
+ div#classHeader, div#fileHeader {
55
+ width: auto;
56
+ color: white;
57
+ padding: 0.5em 1.5em 0.5em 1.5em;
58
+ margin: 0;
59
+ margin-left: -40px;
60
+ border-bottom: 3px solid #006;
61
+ }
62
+
63
+ div#classHeader a, div#fileHeader a {
64
+ background: inherit;
65
+ color: white;
66
+ }
67
+
68
+ div#classHeader td, div#fileHeader td {
69
+ background: inherit;
70
+ color: white;
71
+ }
72
+
73
+
74
+ div#fileHeader {
75
+ background: #057;
76
+ }
77
+
78
+ div#classHeader {
79
+ background: #048;
80
+ }
81
+
82
+
83
+ .class-name-in-header {
84
+ font-size: 180%;
85
+ font-weight: bold;
86
+ }
87
+
88
+
89
+ div#bodyContent {
90
+ padding: 0 1.5em 0 1.5em;
91
+ }
92
+
93
+ div#description {
94
+ padding: 0.5em 1.5em;
95
+ background: #efefef;
96
+ border: 1px dotted #999;
97
+ }
98
+
99
+ div#description h1,h2,h3,h4,h5,h6 {
100
+ color: #125;;
101
+ background: transparent;
102
+ }
103
+
104
+ div#validator-badges {
105
+ text-align: center;
106
+ }
107
+ div#validator-badges img { border: 0; }
108
+
109
+ div#copyright {
110
+ color: #333;
111
+ background: #efefef;
112
+ font: 0.75em sans-serif;
113
+ margin-top: 5em;
114
+ margin-bottom: 0;
115
+ padding: 0.5em 2em;
116
+ }
117
+
118
+
119
+ /* === Classes =================================== */
120
+
121
+ table.header-table {
122
+ color: white;
123
+ font-size: small;
124
+ }
125
+
126
+ .type-note {
127
+ font-size: small;
128
+ color: #DEDEDE;
129
+ }
130
+
131
+ .xxsection-bar {
132
+ background: #eee;
133
+ color: #333;
134
+ padding: 3px;
135
+ }
136
+
137
+ .section-bar {
138
+ color: #333;
139
+ border-bottom: 1px solid #999;
140
+ margin-left: -20px;
141
+ }
142
+
143
+
144
+ .section-title {
145
+ background: #79a;
146
+ color: #eee;
147
+ padding: 3px;
148
+ margin-top: 2em;
149
+ margin-left: -30px;
150
+ border: 1px solid #999;
151
+ }
152
+
153
+ .top-aligned-row { vertical-align: top }
154
+ .bottom-aligned-row { vertical-align: bottom }
155
+
156
+ /* --- Context section classes ----------------------- */
157
+
158
+ .context-row { }
159
+ .context-item-name { font-family: monospace; font-weight: bold; color: black; }
160
+ .context-item-value { font-size: small; color: #448; }
161
+ .context-item-desc { color: #333; padding-left: 2em; }
162
+
163
+ /* --- Method classes -------------------------- */
164
+ .method-detail {
165
+ background: #efefef;
166
+ padding: 0;
167
+ margin-top: 0.5em;
168
+ margin-bottom: 1em;
169
+ border: 1px dotted #ccc;
170
+ }
171
+ .method-heading {
172
+ color: black;
173
+ background: #ccc;
174
+ border-bottom: 1px solid #666;
175
+ padding: 0.2em 0.5em 0 0.5em;
176
+ }
177
+ .method-signature { color: black; background: inherit; }
178
+ .method-name { font-weight: bold; }
179
+ .method-args { font-style: italic; }
180
+ .method-description { padding: 0 0.5em 0 0.5em; }
181
+
182
+ /* --- Source code sections -------------------- */
183
+
184
+ a.source-toggle { font-size: 90%; }
185
+ div.method-source-code {
186
+ background: #262626;
187
+ color: #ffdead;
188
+ margin: 1em;
189
+ padding: 0.5em;
190
+ border: 1px dashed #999;
191
+ overflow: hidden;
192
+ }
193
+
194
+ div.method-source-code pre { color: #ffdead; overflow: hidden; }
195
+
196
+ /* --- Ruby keyword styles --------------------- */
197
+
198
+ .standalone-code { background: #221111; color: #ffdead; overflow: hidden; }
199
+
200
+ .ruby-constant { color: #7fffd4; background: transparent; }
201
+ .ruby-keyword { color: #00ffff; background: transparent; }
202
+ .ruby-ivar { color: #eedd82; background: transparent; }
203
+ .ruby-operator { color: #00ffee; background: transparent; }
204
+ .ruby-identifier { color: #ffdead; background: transparent; }
205
+ .ruby-node { color: #ffa07a; background: transparent; }
206
+ .ruby-comment { color: #b22222; font-weight: bold; background: transparent; }
207
+ .ruby-regexp { color: #ffa07a; background: transparent; }
208
+ .ruby-value { color: #7fffd4; background: transparent; }
@@ -0,0 +1,158 @@
1
+
2
+ SHELL = /bin/sh
3
+
4
+ #### Start of system configuration section. ####
5
+
6
+ srcdir = ./
7
+ topdir = /opt/local/lib/ruby/1.8/i686-darwin8.11.1
8
+ hdrdir = $(topdir)
9
+ VPATH = $(srcdir):$(topdir):$(hdrdir)
10
+ exec_prefix = $(prefix)
11
+ prefix = $(DESTDIR)/opt/local
12
+ sharedstatedir = $(prefix)/com
13
+ mandir = $(DESTDIR)/opt/local/share/man
14
+ psdir = $(docdir)
15
+ oldincludedir = $(DESTDIR)/usr/include
16
+ localedir = $(datarootdir)/locale
17
+ bindir = $(exec_prefix)/bin
18
+ libexecdir = $(exec_prefix)/libexec
19
+ sitedir = $(libdir)/ruby/site_ruby
20
+ htmldir = $(docdir)
21
+ vendorarchdir = $(vendorlibdir)/$(sitearch)
22
+ includedir = $(prefix)/include
23
+ infodir = $(datarootdir)/info
24
+ vendorlibdir = $(vendordir)/$(ruby_version)
25
+ sysconfdir = $(prefix)/etc
26
+ libdir = $(exec_prefix)/lib
27
+ sbindir = $(exec_prefix)/sbin
28
+ rubylibdir = $(libdir)/ruby/$(ruby_version)
29
+ docdir = $(datarootdir)/doc/$(PACKAGE)
30
+ dvidir = $(docdir)
31
+ vendordir = $(DESTDIR)/opt/local/lib/ruby/vendor_ruby
32
+ datarootdir = $(prefix)/share
33
+ pdfdir = $(docdir)
34
+ archdir = $(rubylibdir)/$(arch)
35
+ sitearchdir = $(sitelibdir)/$(sitearch)
36
+ datadir = $(datarootdir)
37
+ localstatedir = $(prefix)/var
38
+ sitelibdir = $(sitedir)/$(ruby_version)
39
+
40
+ CC = /usr/bin/gcc-4.0
41
+ LIBRUBY = $(LIBRUBY_SO)
42
+ LIBRUBY_A = lib$(RUBY_SO_NAME)-static.a
43
+ LIBRUBYARG_SHARED = -l$(RUBY_SO_NAME)
44
+ LIBRUBYARG_STATIC = -l$(RUBY_SO_NAME)-static
45
+
46
+ RUBY_EXTCONF_H =
47
+ CFLAGS = -fno-common -O2 -fno-common -pipe -fno-common $(cflags) -Wall
48
+ INCFLAGS = -I. -I$(topdir) -I$(hdrdir) -I$(srcdir)
49
+ DEFS =
50
+ CPPFLAGS = -I/opt/local/include -D_XOPEN_SOURCE -D_DARWIN_C_SOURCE $(DEFS) $(cppflags)
51
+ CXXFLAGS = $(CFLAGS)
52
+ ldflags = -L. -L/opt/local/lib
53
+ dldflags =
54
+ archflag =
55
+ DLDFLAGS = $(ldflags) $(dldflags) $(archflag)
56
+ LDSHARED = cc -dynamic -bundle -undefined suppress -flat_namespace
57
+ AR = ar
58
+ EXEEXT =
59
+
60
+ RUBY_INSTALL_NAME = ruby
61
+ RUBY_SO_NAME = ruby
62
+ arch = i686-darwin8.11.1
63
+ sitearch = i686-darwin8.11.1
64
+ vendorarch = i686-darwin8.11.1
65
+ ruby_version = 1.8
66
+ ruby = /opt/local/bin/ruby
67
+ RUBY = $(ruby)
68
+ RM = rm -f
69
+ MAKEDIRS = mkdir -p
70
+ INSTALL = /usr/bin/install
71
+ INSTALL_PROG = $(INSTALL) -m 0755
72
+ INSTALL_DATA = $(INSTALL) -m 644
73
+ COPY = cp
74
+
75
+ #### End of system configuration section. ####
76
+
77
+ preload =
78
+
79
+ libpath = . $(libdir)
80
+ LIBPATH = -L. -L$(libdir)
81
+ DEFFILE =
82
+
83
+ CLEANFILES = mkmf.log
84
+ DISTCLEANFILES =
85
+
86
+ extout =
87
+ extout_prefix =
88
+ target_prefix = /teeth
89
+ LOCAL_LIBS =
90
+ LIBS = $(LIBRUBYARG_SHARED) -lpthread -ldl -lobjc
91
+ SRCS = scan_apache_logs.yy.c
92
+ OBJS = scan_apache_logs.yy.o
93
+ TARGET = scan_apache_logs
94
+ DLLIB = $(TARGET).bundle
95
+ EXTSTATIC =
96
+ STATIC_LIB =
97
+
98
+ BINDIR = $(bindir)
99
+ RUBYCOMMONDIR = $(sitedir)$(target_prefix)
100
+ RUBYLIBDIR = $(sitelibdir)$(target_prefix)
101
+ RUBYARCHDIR = $(sitearchdir)$(target_prefix)
102
+
103
+ TARGET_SO = $(DLLIB)
104
+ CLEANLIBS = $(TARGET).bundle $(TARGET).il? $(TARGET).tds $(TARGET).map
105
+ CLEANOBJS = *.o *.a *.s[ol] *.pdb *.exp *.bak
106
+
107
+ all: $(DLLIB)
108
+ static: $(STATIC_LIB)
109
+
110
+ clean:
111
+ @-$(RM) $(CLEANLIBS) $(CLEANOBJS) $(CLEANFILES)
112
+
113
+ distclean: clean
114
+ @-$(RM) Makefile $(RUBY_EXTCONF_H) conftest.* mkmf.log
115
+ @-$(RM) core ruby$(EXEEXT) *~ $(DISTCLEANFILES)
116
+
117
+ realclean: distclean
118
+ install: install-so install-rb
119
+
120
+ install-so: $(RUBYARCHDIR)
121
+ install-so: $(RUBYARCHDIR)/$(DLLIB)
122
+ $(RUBYARCHDIR)/$(DLLIB): $(DLLIB)
123
+ $(INSTALL_PROG) $(DLLIB) $(RUBYARCHDIR)
124
+ install-rb: pre-install-rb install-rb-default
125
+ install-rb-default: pre-install-rb-default
126
+ pre-install-rb: Makefile
127
+ pre-install-rb-default: Makefile
128
+ $(RUBYARCHDIR):
129
+ $(MAKEDIRS) $@
130
+
131
+ site-install: site-install-so site-install-rb
132
+ site-install-so: install-so
133
+ site-install-rb: install-rb
134
+
135
+ .SUFFIXES: .c .m .cc .cxx .cpp .C .o
136
+
137
+ .cc.o:
138
+ $(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) -c $<
139
+
140
+ .cxx.o:
141
+ $(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) -c $<
142
+
143
+ .cpp.o:
144
+ $(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) -c $<
145
+
146
+ .C.o:
147
+ $(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) -c $<
148
+
149
+ .c.o:
150
+ $(CC) $(INCFLAGS) $(CPPFLAGS) $(CFLAGS) -c $<
151
+
152
+ $(DLLIB): $(OBJS)
153
+ @-$(RM) $@
154
+ $(LDSHARED) -o $@ $(OBJS) $(LIBPATH) $(DLDFLAGS) $(LOCAL_LIBS) $(LIBS)
155
+
156
+
157
+
158
+ $(OBJS): ruby.h defines.h
@@ -0,0 +1,3 @@
1
+ require "mkmf"
2
+ $CFLAGS += " -Wall"
3
+ create_makefile "teeth/scan_apache_logs", "./"
@@ -0,0 +1,267 @@
1
+ %option prefix="apache_logs_yy"
2
+ %option full
3
+ %option never-interactive
4
+ %option read
5
+ %option nounput
6
+ %option noyywrap noreject noyymore nodefault
7
+ %{
8
+ #include <ruby.h>
9
+ #include <uuid/uuid.h>
10
+ /* Data types */
11
+ typedef struct {
12
+ char *key;
13
+ char *value;
14
+ } KVPAIR;
15
+ const KVPAIR EOF_KVPAIR = {"EOF", "EOF"};
16
+ /* prototypes */
17
+ char *strip_ends(char *);
18
+ VALUE t_scan_apache_logs(VALUE);
19
+ void new_uuid(char *str_ptr);
20
+ void raise_error_for_string_too_long(VALUE string);
21
+ void include_message_in_token_hash(VALUE message, VALUE token_hash);
22
+ void add_uuid_to_token_hash(VALUE token_hash);
23
+ void push_kv_pair_to_hash(KVPAIR key_value, VALUE token_hash);
24
+ void concat_word_to_string(KVPAIR key_value, VALUE token_hash);
25
+ /* Set the scanner name, and return type */
26
+ #define YY_DECL KVPAIR scan_apache_logs(void)
27
+ #define yyterminate() return EOF_KVPAIR
28
+ /* Ruby 1.8 and 1.9 compatibility */
29
+ #if !defined(RSTRING_LEN)
30
+ # define RSTRING_LEN(x) (RSTRING(x)->len)
31
+ # define RSTRING_PTR(x) (RSTRING(x)->ptr)
32
+ #endif
33
+
34
+ %}
35
+
36
+ /* Definitions */
37
+
38
+ CATCHALL (.|"\n")
39
+
40
+
41
+ WS [[:space:]]
42
+
43
+ NON_WS ([a-z]|[0-9]|[:punct:])
44
+
45
+ IP4_OCT [0-9]|[0-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5]
46
+
47
+ HOST [a-z0-9][a-z0-9\-]*\.[a-z0-9][a-z0-9\-]*.[a-z0-9][a-z0-9\-\.]*[a-z]+(\:[0-9]+)?
48
+
49
+ WDAY mon|tue|wed|thu|fri|sat|sun
50
+
51
+ MON jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec
52
+
53
+ MONTH_NUM 0[1-9]|1[0-2]
54
+
55
+ MDAY 3[0-1]|[1-2][0-9]|0[1-9]
56
+
57
+ HOUR 2[0-3]|[0-1][0-9]
58
+
59
+ MINSEC [0-5][0-9]|60
60
+
61
+ YEAR [0-9][0-9][0-9][0-9]
62
+
63
+ PLUSMINUS (\+|\-)
64
+
65
+ REL_URL (\/|\\|\.)[a-z0-9\._\~\-\/\?&;#=\%\:\+\[\]\\]*
66
+
67
+ PROTO (http:|https:)
68
+
69
+ ERR_LVL (emerg|alert|crit|err|error|warn|warning|notice|info|debug)
70
+
71
+ HTTP_VERS HTTP\/(1.0|1.1)
72
+
73
+ HTTP_VERB (get|head|put|post|delete|trace|connect)
74
+
75
+ HTTPCODE (100|101|20[0-6]|30[0-5]|307|40[0-9]|41[0-7]|50[0-5])
76
+
77
+ BROWSER_STR \"(moz|msie|lynx).+\"
78
+
79
+
80
+ %%
81
+ /*
82
+ Actions
83
+ */
84
+
85
+
86
+ {IP4_OCT}"."{IP4_OCT}"."{IP4_OCT}"."{IP4_OCT} {
87
+ KVPAIR ipv4_addr = {"ipv4_addr", yytext};
88
+ return ipv4_addr;
89
+ }
90
+
91
+ {WDAY}{WS}{MON}{WS}{MDAY}{WS}{HOUR}":"{MINSEC}":"{MINSEC}{WS}{YEAR} {
92
+ KVPAIR apache_err_datetime = {"apache_err_datetime", yytext};
93
+ return apache_err_datetime;
94
+ }
95
+
96
+ {MDAY}\/{MON}\/{YEAR}":"{HOUR}":"{MINSEC}":"{MINSEC}{WS}{PLUSMINUS}{YEAR} {
97
+ KVPAIR apache_access_datetime = {"apache_access_datetime", yytext};
98
+ return apache_access_datetime;
99
+ }
100
+
101
+ {HTTP_VERS} {
102
+ KVPAIR http_version = {"http_version", yytext};
103
+ return http_version;
104
+ }
105
+
106
+ {BROWSER_STR} {
107
+ KVPAIR browser_string = {"browser_string", strip_ends(yytext)};
108
+ return browser_string;
109
+ }
110
+
111
+ {PROTO}"\/\/"({HOST}|{IP4_OCT}"."{IP4_OCT}"."{IP4_OCT}"."{IP4_OCT})({REL_URL}|"\/")? {
112
+ KVPAIR absolute_url = {"absolute_url", yytext};
113
+ return absolute_url;
114
+ }
115
+
116
+ {HOST} {
117
+ KVPAIR host = {"host", yytext};
118
+ return host;
119
+ }
120
+
121
+ {REL_URL} {
122
+ KVPAIR relative_url = {"relative_url", yytext};
123
+ return relative_url;
124
+ }
125
+
126
+ {ERR_LVL} {
127
+ KVPAIR error_level = {"error_level", yytext};
128
+ return error_level;
129
+ }
130
+
131
+ {HTTPCODE} {
132
+ KVPAIR http_response = {"http_response", yytext};
133
+ return http_response;
134
+ }
135
+
136
+ {HTTP_VERB} {
137
+ KVPAIR http_method = {"http_method", yytext};
138
+ return http_method;
139
+ }
140
+
141
+ {NON_WS}{NON_WS}* {
142
+ KVPAIR strings = {"strings", yytext};
143
+ return strings;
144
+ }
145
+
146
+ {CATCHALL} /* ignore */
147
+ %%
148
+
149
+ char *strip_ends(char *string) {
150
+ string[yyleng-1] = '\0';
151
+ ++string;
152
+ return string;
153
+ }
154
+
155
+ void uuid_unparse_upper_sans_dash(const uuid_t uu, char *out)
156
+ {
157
+ sprintf(out,
158
+ "%02X%02X%02X%02X"
159
+ "%02X%02X"
160
+ "%02X%02X"
161
+ "%02X%02X"
162
+ "%02X%02X%02X%02X%02X%02X",
163
+ uu[0], uu[1], uu[2], uu[3],
164
+ uu[4], uu[5],
165
+ uu[6], uu[7],
166
+ uu[8], uu[9],
167
+ uu[10], uu[11], uu[12], uu[13], uu[14], uu[15]);
168
+ }
169
+
170
+ void new_uuid(char *str_ptr){
171
+ uuid_t new_uuid;
172
+ uuid_generate_time(new_uuid);
173
+ uuid_unparse_upper_sans_dash(new_uuid, str_ptr);
174
+ }
175
+
176
+ void raise_error_for_string_too_long(VALUE string){
177
+ if( RSTRING_LEN(string) > 1000000){
178
+ rb_raise(rb_eArgError, "string too long for scan_apache_logs! max length is 1,000,000 chars");
179
+ }
180
+ }
181
+
182
+ /* Scans self, which is expected to be a single line from an Apache error or
183
+ * access log, and returns a Hash of the components of the log message. The
184
+ * following parts of the log message are returned if they are present:
185
+ * IPv4 address, datetime, HTTP Version used, the browser string given by the
186
+ * client, any absolute or relative URLs, the error level, HTTP response code,
187
+ * HTTP Method (verb), and any other uncategorized strings present. */
188
+ VALUE t_scan_apache_logs(VALUE self) {
189
+ KVPAIR kv_result;
190
+ int scan_complete = 0;
191
+ int building_words_to_string = 0;
192
+ VALUE token_hash = rb_hash_new();
193
+
194
+ BEGIN(INITIAL);
195
+
196
+ /* error out on absurdly large strings */
197
+ raise_error_for_string_too_long(self);
198
+ /* {:message => self()} */
199
+ include_message_in_token_hash(self, token_hash);
200
+ /* {:id => UUID} */
201
+ add_uuid_to_token_hash(token_hash);
202
+ yy_scan_string(RSTRING_PTR(self));
203
+ while (scan_complete == 0) {
204
+ kv_result = scan_apache_logs();
205
+ if (kv_result.key == "EOF"){
206
+ scan_complete = 1;
207
+ }
208
+ else if (kv_result.key == "strings"){
209
+ /* build a string until we get a non-word */
210
+ if (building_words_to_string == 0){
211
+ building_words_to_string = 1;
212
+ push_kv_pair_to_hash(kv_result, token_hash);
213
+ }
214
+ else{
215
+ concat_word_to_string(kv_result, token_hash);
216
+ }
217
+ }
218
+ else {
219
+ building_words_to_string = 0;
220
+ push_kv_pair_to_hash(kv_result, token_hash);
221
+ }
222
+ }
223
+ yy_delete_buffer(YY_CURRENT_BUFFER);
224
+ return rb_obj_dup(token_hash);
225
+ }
226
+
227
+ void add_uuid_to_token_hash(VALUE token_hash) {
228
+ char new_uuid_str[33];
229
+ new_uuid(new_uuid_str);
230
+ VALUE hsh_key_id = ID2SYM(rb_intern("id"));
231
+ VALUE hsh_val_id = rb_tainted_str_new2(new_uuid_str);
232
+ rb_hash_aset(token_hash, hsh_key_id, hsh_val_id);
233
+ }
234
+
235
+ void include_message_in_token_hash(VALUE message, VALUE token_hash) {
236
+ /* {:message => self()} */
237
+ VALUE hsh_key_msg = ID2SYM(rb_intern("message"));
238
+ rb_hash_aset(token_hash, hsh_key_msg, message);
239
+ }
240
+
241
+ void concat_word_to_string(KVPAIR key_value, VALUE token_hash) {
242
+ char * space = " ";
243
+ VALUE hsh_key = ID2SYM(rb_intern(key_value.key));
244
+ VALUE hsh_value = rb_hash_aref(token_hash, hsh_key);
245
+ VALUE string = rb_ary_entry(hsh_value, -1);
246
+ rb_str_cat(string, space, 1);
247
+ rb_str_cat(string, key_value.value, yyleng);
248
+ }
249
+
250
+ void push_kv_pair_to_hash(KVPAIR key_value, VALUE token_hash) {
251
+ VALUE hsh_key = ID2SYM(rb_intern(key_value.key));
252
+ VALUE hsh_value = rb_hash_aref(token_hash, hsh_key);
253
+ VALUE ary_for_token_type = rb_ary_new();
254
+ switch (TYPE(hsh_value)) {
255
+ case T_NIL:
256
+ rb_ary_push(ary_for_token_type, rb_tainted_str_new2(key_value.value));
257
+ rb_hash_aset(token_hash, hsh_key, ary_for_token_type);
258
+ break;
259
+ case T_ARRAY:
260
+ rb_ary_push(hsh_value, rb_tainted_str_new2(key_value.value));
261
+ break;
262
+ }
263
+ }
264
+
265
+ void Init_scan_apache_logs() {
266
+ rb_define_method(rb_cString, "scan_apache_logs", t_scan_apache_logs, 0);
267
+ }