danielsdeleo-teeth 0.0.2 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. data/LICENSE +11 -0
  2. data/README.rdoc +107 -10
  3. data/Rakefile +47 -31
  4. data/VERSION.yml +4 -0
  5. data/doc/classes/String.html +182 -0
  6. data/doc/classes/Teeth/DuplicateDefinitionError.html +113 -0
  7. data/doc/classes/Teeth/DuplicateRuleError.html +113 -0
  8. data/doc/classes/Teeth/InvalidDefaultDefinitionName.html +113 -0
  9. data/doc/classes/Teeth/InvalidExtensionDirectory.html +113 -0
  10. data/doc/classes/Teeth/RuleStatement.html +291 -0
  11. data/doc/classes/Teeth/RuleStatementGroup.html +195 -0
  12. data/doc/classes/Teeth/Scanner.html +535 -0
  13. data/doc/classes/Teeth/ScannerDefinition.html +253 -0
  14. data/doc/classes/Teeth/ScannerDefinitionArgumentError.html +113 -0
  15. data/doc/classes/Teeth/ScannerDefinitionGroup.html +269 -0
  16. data/doc/classes/Teeth/ScannerError.html +111 -0
  17. data/doc/classes/Teeth.html +129 -0
  18. data/doc/created.rid +1 -0
  19. data/doc/files/README_rdoc.html +314 -0
  20. data/doc/files/ext/scan_apache_logs/scan_apache_logs_yy_c.html +101 -0
  21. data/doc/files/ext/scan_rails_logs/scan_rails_logs_yy_c.html +101 -0
  22. data/doc/files/lib/rule_statement_rb.html +101 -0
  23. data/doc/files/lib/scanner_definition_rb.html +101 -0
  24. data/doc/files/lib/scanner_rb.html +108 -0
  25. data/doc/files/lib/teeth_rb.html +111 -0
  26. data/doc/fr_class_index.html +39 -0
  27. data/doc/fr_file_index.html +33 -0
  28. data/doc/fr_method_index.html +60 -0
  29. data/doc/index.html +24 -0
  30. data/doc/rdoc-style.css +208 -0
  31. data/ext/scan_apache_logs/Makefile +158 -0
  32. data/ext/scan_apache_logs/extconf.rb +3 -0
  33. data/ext/scan_apache_logs/scan_apache_logs.yy +267 -0
  34. data/ext/scan_apache_logs/scan_apache_logs.yy.c +8355 -0
  35. data/ext/scan_rails_logs/Makefile +158 -0
  36. data/ext/scan_rails_logs/extconf.rb +3 -0
  37. data/ext/scan_rails_logs/scan_rails_logs.yy +376 -0
  38. data/ext/scan_rails_logs/scan_rails_logs.yy.c +11127 -0
  39. data/lib/rule_statement.rb +61 -0
  40. data/lib/scanner.rb +98 -0
  41. data/lib/scanner_definition.rb +116 -0
  42. data/lib/teeth.rb +5 -1
  43. data/scanners/scan_apache_logs.rb +27 -0
  44. data/scanners/scan_rails_logs.rb +70 -0
  45. data/spec/fixtures/rails_1x.log +59 -0
  46. data/spec/fixtures/rails_22.log +12 -0
  47. data/spec/fixtures/rails_22_cached.log +10 -0
  48. data/spec/fixtures/rails_unordered.log +24 -0
  49. data/spec/playground/show_apache_processing.rb +13 -0
  50. data/spec/spec_helper.rb +6 -1
  51. data/spec/unit/rule_statement_spec.rb +60 -0
  52. data/spec/unit/{tokenize_apache_spec.rb → scan_apache_spec.rb} +16 -11
  53. data/spec/unit/scan_rails_logs_spec.rb +90 -0
  54. data/spec/unit/scaner_definition_spec.rb +65 -0
  55. data/spec/unit/scanner_spec.rb +109 -0
  56. data/teeth.gemspec +31 -0
  57. data/templates/tokenizer.yy.erb +168 -0
  58. metadata +60 -15
  59. data/ext/extconf.rb +0 -4
  60. data/ext/tokenize_apache_logs.yy +0 -215
  61. data/ext/tokenize_apache_logs.yy.c +0 -12067
@@ -0,0 +1,208 @@
1
+
2
+ body {
3
+ font-family: Verdana,Arial,Helvetica,sans-serif;
4
+ font-size: 90%;
5
+ margin: 0;
6
+ margin-left: 40px;
7
+ padding: 0;
8
+ background: white;
9
+ }
10
+
11
+ h1,h2,h3,h4 { margin: 0; color: #efefef; background: transparent; }
12
+ h1 { font-size: 150%; }
13
+ h2,h3,h4 { margin-top: 1em; }
14
+
15
+ a { background: #eef; color: #039; text-decoration: none; }
16
+ a:hover { background: #039; color: #eef; }
17
+
18
+ /* Override the base stylesheet's Anchor inside a table cell */
19
+ td > a {
20
+ background: transparent;
21
+ color: #039;
22
+ text-decoration: none;
23
+ }
24
+
25
+ /* and inside a section title */
26
+ .section-title > a {
27
+ background: transparent;
28
+ color: #eee;
29
+ text-decoration: none;
30
+ }
31
+
32
+ /* === Structural elements =================================== */
33
+
34
+ div#index {
35
+ margin: 0;
36
+ margin-left: -40px;
37
+ padding: 0;
38
+ font-size: 90%;
39
+ }
40
+
41
+
42
+ div#index a {
43
+ margin-left: 0.7em;
44
+ }
45
+
46
+ div#index .section-bar {
47
+ margin-left: 0px;
48
+ padding-left: 0.7em;
49
+ background: #ccc;
50
+ font-size: small;
51
+ }
52
+
53
+
54
+ div#classHeader, div#fileHeader {
55
+ width: auto;
56
+ color: white;
57
+ padding: 0.5em 1.5em 0.5em 1.5em;
58
+ margin: 0;
59
+ margin-left: -40px;
60
+ border-bottom: 3px solid #006;
61
+ }
62
+
63
+ div#classHeader a, div#fileHeader a {
64
+ background: inherit;
65
+ color: white;
66
+ }
67
+
68
+ div#classHeader td, div#fileHeader td {
69
+ background: inherit;
70
+ color: white;
71
+ }
72
+
73
+
74
+ div#fileHeader {
75
+ background: #057;
76
+ }
77
+
78
+ div#classHeader {
79
+ background: #048;
80
+ }
81
+
82
+
83
+ .class-name-in-header {
84
+ font-size: 180%;
85
+ font-weight: bold;
86
+ }
87
+
88
+
89
+ div#bodyContent {
90
+ padding: 0 1.5em 0 1.5em;
91
+ }
92
+
93
+ div#description {
94
+ padding: 0.5em 1.5em;
95
+ background: #efefef;
96
+ border: 1px dotted #999;
97
+ }
98
+
99
+ div#description h1,h2,h3,h4,h5,h6 {
100
+ color: #125;;
101
+ background: transparent;
102
+ }
103
+
104
+ div#validator-badges {
105
+ text-align: center;
106
+ }
107
+ div#validator-badges img { border: 0; }
108
+
109
+ div#copyright {
110
+ color: #333;
111
+ background: #efefef;
112
+ font: 0.75em sans-serif;
113
+ margin-top: 5em;
114
+ margin-bottom: 0;
115
+ padding: 0.5em 2em;
116
+ }
117
+
118
+
119
+ /* === Classes =================================== */
120
+
121
+ table.header-table {
122
+ color: white;
123
+ font-size: small;
124
+ }
125
+
126
+ .type-note {
127
+ font-size: small;
128
+ color: #DEDEDE;
129
+ }
130
+
131
+ .xxsection-bar {
132
+ background: #eee;
133
+ color: #333;
134
+ padding: 3px;
135
+ }
136
+
137
+ .section-bar {
138
+ color: #333;
139
+ border-bottom: 1px solid #999;
140
+ margin-left: -20px;
141
+ }
142
+
143
+
144
+ .section-title {
145
+ background: #79a;
146
+ color: #eee;
147
+ padding: 3px;
148
+ margin-top: 2em;
149
+ margin-left: -30px;
150
+ border: 1px solid #999;
151
+ }
152
+
153
+ .top-aligned-row { vertical-align: top }
154
+ .bottom-aligned-row { vertical-align: bottom }
155
+
156
+ /* --- Context section classes ----------------------- */
157
+
158
+ .context-row { }
159
+ .context-item-name { font-family: monospace; font-weight: bold; color: black; }
160
+ .context-item-value { font-size: small; color: #448; }
161
+ .context-item-desc { color: #333; padding-left: 2em; }
162
+
163
+ /* --- Method classes -------------------------- */
164
+ .method-detail {
165
+ background: #efefef;
166
+ padding: 0;
167
+ margin-top: 0.5em;
168
+ margin-bottom: 1em;
169
+ border: 1px dotted #ccc;
170
+ }
171
+ .method-heading {
172
+ color: black;
173
+ background: #ccc;
174
+ border-bottom: 1px solid #666;
175
+ padding: 0.2em 0.5em 0 0.5em;
176
+ }
177
+ .method-signature { color: black; background: inherit; }
178
+ .method-name { font-weight: bold; }
179
+ .method-args { font-style: italic; }
180
+ .method-description { padding: 0 0.5em 0 0.5em; }
181
+
182
+ /* --- Source code sections -------------------- */
183
+
184
+ a.source-toggle { font-size: 90%; }
185
+ div.method-source-code {
186
+ background: #262626;
187
+ color: #ffdead;
188
+ margin: 1em;
189
+ padding: 0.5em;
190
+ border: 1px dashed #999;
191
+ overflow: hidden;
192
+ }
193
+
194
+ div.method-source-code pre { color: #ffdead; overflow: hidden; }
195
+
196
+ /* --- Ruby keyword styles --------------------- */
197
+
198
+ .standalone-code { background: #221111; color: #ffdead; overflow: hidden; }
199
+
200
+ .ruby-constant { color: #7fffd4; background: transparent; }
201
+ .ruby-keyword { color: #00ffff; background: transparent; }
202
+ .ruby-ivar { color: #eedd82; background: transparent; }
203
+ .ruby-operator { color: #00ffee; background: transparent; }
204
+ .ruby-identifier { color: #ffdead; background: transparent; }
205
+ .ruby-node { color: #ffa07a; background: transparent; }
206
+ .ruby-comment { color: #b22222; font-weight: bold; background: transparent; }
207
+ .ruby-regexp { color: #ffa07a; background: transparent; }
208
+ .ruby-value { color: #7fffd4; background: transparent; }
@@ -0,0 +1,158 @@
1
+
2
+ SHELL = /bin/sh
3
+
4
+ #### Start of system configuration section. ####
5
+
6
+ srcdir = ./
7
+ topdir = /opt/local/lib/ruby/1.8/i686-darwin8.11.1
8
+ hdrdir = $(topdir)
9
+ VPATH = $(srcdir):$(topdir):$(hdrdir)
10
+ exec_prefix = $(prefix)
11
+ prefix = $(DESTDIR)/opt/local
12
+ sharedstatedir = $(prefix)/com
13
+ mandir = $(DESTDIR)/opt/local/share/man
14
+ psdir = $(docdir)
15
+ oldincludedir = $(DESTDIR)/usr/include
16
+ localedir = $(datarootdir)/locale
17
+ bindir = $(exec_prefix)/bin
18
+ libexecdir = $(exec_prefix)/libexec
19
+ sitedir = $(libdir)/ruby/site_ruby
20
+ htmldir = $(docdir)
21
+ vendorarchdir = $(vendorlibdir)/$(sitearch)
22
+ includedir = $(prefix)/include
23
+ infodir = $(datarootdir)/info
24
+ vendorlibdir = $(vendordir)/$(ruby_version)
25
+ sysconfdir = $(prefix)/etc
26
+ libdir = $(exec_prefix)/lib
27
+ sbindir = $(exec_prefix)/sbin
28
+ rubylibdir = $(libdir)/ruby/$(ruby_version)
29
+ docdir = $(datarootdir)/doc/$(PACKAGE)
30
+ dvidir = $(docdir)
31
+ vendordir = $(DESTDIR)/opt/local/lib/ruby/vendor_ruby
32
+ datarootdir = $(prefix)/share
33
+ pdfdir = $(docdir)
34
+ archdir = $(rubylibdir)/$(arch)
35
+ sitearchdir = $(sitelibdir)/$(sitearch)
36
+ datadir = $(datarootdir)
37
+ localstatedir = $(prefix)/var
38
+ sitelibdir = $(sitedir)/$(ruby_version)
39
+
40
+ CC = /usr/bin/gcc-4.0
41
+ LIBRUBY = $(LIBRUBY_SO)
42
+ LIBRUBY_A = lib$(RUBY_SO_NAME)-static.a
43
+ LIBRUBYARG_SHARED = -l$(RUBY_SO_NAME)
44
+ LIBRUBYARG_STATIC = -l$(RUBY_SO_NAME)-static
45
+
46
+ RUBY_EXTCONF_H =
47
+ CFLAGS = -fno-common -O2 -fno-common -pipe -fno-common $(cflags) -Wall
48
+ INCFLAGS = -I. -I$(topdir) -I$(hdrdir) -I$(srcdir)
49
+ DEFS =
50
+ CPPFLAGS = -I/opt/local/include -D_XOPEN_SOURCE -D_DARWIN_C_SOURCE $(DEFS) $(cppflags)
51
+ CXXFLAGS = $(CFLAGS)
52
+ ldflags = -L. -L/opt/local/lib
53
+ dldflags =
54
+ archflag =
55
+ DLDFLAGS = $(ldflags) $(dldflags) $(archflag)
56
+ LDSHARED = cc -dynamic -bundle -undefined suppress -flat_namespace
57
+ AR = ar
58
+ EXEEXT =
59
+
60
+ RUBY_INSTALL_NAME = ruby
61
+ RUBY_SO_NAME = ruby
62
+ arch = i686-darwin8.11.1
63
+ sitearch = i686-darwin8.11.1
64
+ vendorarch = i686-darwin8.11.1
65
+ ruby_version = 1.8
66
+ ruby = /opt/local/bin/ruby
67
+ RUBY = $(ruby)
68
+ RM = rm -f
69
+ MAKEDIRS = mkdir -p
70
+ INSTALL = /usr/bin/install
71
+ INSTALL_PROG = $(INSTALL) -m 0755
72
+ INSTALL_DATA = $(INSTALL) -m 644
73
+ COPY = cp
74
+
75
+ #### End of system configuration section. ####
76
+
77
+ preload =
78
+
79
+ libpath = . $(libdir)
80
+ LIBPATH = -L. -L$(libdir)
81
+ DEFFILE =
82
+
83
+ CLEANFILES = mkmf.log
84
+ DISTCLEANFILES =
85
+
86
+ extout =
87
+ extout_prefix =
88
+ target_prefix = /teeth
89
+ LOCAL_LIBS =
90
+ LIBS = $(LIBRUBYARG_SHARED) -lpthread -ldl -lobjc
91
+ SRCS = scan_apache_logs.yy.c
92
+ OBJS = scan_apache_logs.yy.o
93
+ TARGET = scan_apache_logs
94
+ DLLIB = $(TARGET).bundle
95
+ EXTSTATIC =
96
+ STATIC_LIB =
97
+
98
+ BINDIR = $(bindir)
99
+ RUBYCOMMONDIR = $(sitedir)$(target_prefix)
100
+ RUBYLIBDIR = $(sitelibdir)$(target_prefix)
101
+ RUBYARCHDIR = $(sitearchdir)$(target_prefix)
102
+
103
+ TARGET_SO = $(DLLIB)
104
+ CLEANLIBS = $(TARGET).bundle $(TARGET).il? $(TARGET).tds $(TARGET).map
105
+ CLEANOBJS = *.o *.a *.s[ol] *.pdb *.exp *.bak
106
+
107
+ all: $(DLLIB)
108
+ static: $(STATIC_LIB)
109
+
110
+ clean:
111
+ @-$(RM) $(CLEANLIBS) $(CLEANOBJS) $(CLEANFILES)
112
+
113
+ distclean: clean
114
+ @-$(RM) Makefile $(RUBY_EXTCONF_H) conftest.* mkmf.log
115
+ @-$(RM) core ruby$(EXEEXT) *~ $(DISTCLEANFILES)
116
+
117
+ realclean: distclean
118
+ install: install-so install-rb
119
+
120
+ install-so: $(RUBYARCHDIR)
121
+ install-so: $(RUBYARCHDIR)/$(DLLIB)
122
+ $(RUBYARCHDIR)/$(DLLIB): $(DLLIB)
123
+ $(INSTALL_PROG) $(DLLIB) $(RUBYARCHDIR)
124
+ install-rb: pre-install-rb install-rb-default
125
+ install-rb-default: pre-install-rb-default
126
+ pre-install-rb: Makefile
127
+ pre-install-rb-default: Makefile
128
+ $(RUBYARCHDIR):
129
+ $(MAKEDIRS) $@
130
+
131
+ site-install: site-install-so site-install-rb
132
+ site-install-so: install-so
133
+ site-install-rb: install-rb
134
+
135
+ .SUFFIXES: .c .m .cc .cxx .cpp .C .o
136
+
137
+ .cc.o:
138
+ $(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) -c $<
139
+
140
+ .cxx.o:
141
+ $(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) -c $<
142
+
143
+ .cpp.o:
144
+ $(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) -c $<
145
+
146
+ .C.o:
147
+ $(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) -c $<
148
+
149
+ .c.o:
150
+ $(CC) $(INCFLAGS) $(CPPFLAGS) $(CFLAGS) -c $<
151
+
152
+ $(DLLIB): $(OBJS)
153
+ @-$(RM) $@
154
+ $(LDSHARED) -o $@ $(OBJS) $(LIBPATH) $(DLDFLAGS) $(LOCAL_LIBS) $(LIBS)
155
+
156
+
157
+
158
+ $(OBJS): ruby.h defines.h
@@ -0,0 +1,3 @@
1
+ require "mkmf"
2
+ $CFLAGS += " -Wall"
3
+ create_makefile "teeth/scan_apache_logs", "./"
@@ -0,0 +1,267 @@
1
+ %option prefix="apache_logs_yy"
2
+ %option full
3
+ %option never-interactive
4
+ %option read
5
+ %option nounput
6
+ %option noyywrap noreject noyymore nodefault
7
+ %{
8
+ #include <ruby.h>
9
+ #include <uuid/uuid.h>
10
+ /* Data types */
11
+ typedef struct {
12
+ char *key;
13
+ char *value;
14
+ } KVPAIR;
15
+ const KVPAIR EOF_KVPAIR = {"EOF", "EOF"};
16
+ /* prototypes */
17
+ char *strip_ends(char *);
18
+ VALUE t_scan_apache_logs(VALUE);
19
+ void new_uuid(char *str_ptr);
20
+ void raise_error_for_string_too_long(VALUE string);
21
+ void include_message_in_token_hash(VALUE message, VALUE token_hash);
22
+ void add_uuid_to_token_hash(VALUE token_hash);
23
+ void push_kv_pair_to_hash(KVPAIR key_value, VALUE token_hash);
24
+ void concat_word_to_string(KVPAIR key_value, VALUE token_hash);
25
+ /* Set the scanner name, and return type */
26
+ #define YY_DECL KVPAIR scan_apache_logs(void)
27
+ #define yyterminate() return EOF_KVPAIR
28
+ /* Ruby 1.8 and 1.9 compatibility */
29
+ #if !defined(RSTRING_LEN)
30
+ # define RSTRING_LEN(x) (RSTRING(x)->len)
31
+ # define RSTRING_PTR(x) (RSTRING(x)->ptr)
32
+ #endif
33
+
34
+ %}
35
+
36
+ /* Definitions */
37
+
38
+ CATCHALL (.|"\n")
39
+
40
+
41
+ WS [[:space:]]
42
+
43
+ NON_WS ([a-z]|[0-9]|[:punct:])
44
+
45
+ IP4_OCT [0-9]|[0-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5]
46
+
47
+ HOST [a-z0-9][a-z0-9\-]*\.[a-z0-9][a-z0-9\-]*.[a-z0-9][a-z0-9\-\.]*[a-z]+(\:[0-9]+)?
48
+
49
+ WDAY mon|tue|wed|thu|fri|sat|sun
50
+
51
+ MON jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec
52
+
53
+ MONTH_NUM 0[1-9]|1[0-2]
54
+
55
+ MDAY 3[0-1]|[1-2][0-9]|0[1-9]
56
+
57
+ HOUR 2[0-3]|[0-1][0-9]
58
+
59
+ MINSEC [0-5][0-9]|60
60
+
61
+ YEAR [0-9][0-9][0-9][0-9]
62
+
63
+ PLUSMINUS (\+|\-)
64
+
65
+ REL_URL (\/|\\|\.)[a-z0-9\._\~\-\/\?&;#=\%\:\+\[\]\\]*
66
+
67
+ PROTO (http:|https:)
68
+
69
+ ERR_LVL (emerg|alert|crit|err|error|warn|warning|notice|info|debug)
70
+
71
+ HTTP_VERS HTTP\/(1.0|1.1)
72
+
73
+ HTTP_VERB (get|head|put|post|delete|trace|connect)
74
+
75
+ HTTPCODE (100|101|20[0-6]|30[0-5]|307|40[0-9]|41[0-7]|50[0-5])
76
+
77
+ BROWSER_STR \"(moz|msie|lynx).+\"
78
+
79
+
80
+ %%
81
+ /*
82
+ Actions
83
+ */
84
+
85
+
86
+ {IP4_OCT}"."{IP4_OCT}"."{IP4_OCT}"."{IP4_OCT} {
87
+ KVPAIR ipv4_addr = {"ipv4_addr", yytext};
88
+ return ipv4_addr;
89
+ }
90
+
91
+ {WDAY}{WS}{MON}{WS}{MDAY}{WS}{HOUR}":"{MINSEC}":"{MINSEC}{WS}{YEAR} {
92
+ KVPAIR apache_err_datetime = {"apache_err_datetime", yytext};
93
+ return apache_err_datetime;
94
+ }
95
+
96
+ {MDAY}\/{MON}\/{YEAR}":"{HOUR}":"{MINSEC}":"{MINSEC}{WS}{PLUSMINUS}{YEAR} {
97
+ KVPAIR apache_access_datetime = {"apache_access_datetime", yytext};
98
+ return apache_access_datetime;
99
+ }
100
+
101
+ {HTTP_VERS} {
102
+ KVPAIR http_version = {"http_version", yytext};
103
+ return http_version;
104
+ }
105
+
106
+ {BROWSER_STR} {
107
+ KVPAIR browser_string = {"browser_string", strip_ends(yytext)};
108
+ return browser_string;
109
+ }
110
+
111
+ {PROTO}"\/\/"({HOST}|{IP4_OCT}"."{IP4_OCT}"."{IP4_OCT}"."{IP4_OCT})({REL_URL}|"\/")? {
112
+ KVPAIR absolute_url = {"absolute_url", yytext};
113
+ return absolute_url;
114
+ }
115
+
116
+ {HOST} {
117
+ KVPAIR host = {"host", yytext};
118
+ return host;
119
+ }
120
+
121
+ {REL_URL} {
122
+ KVPAIR relative_url = {"relative_url", yytext};
123
+ return relative_url;
124
+ }
125
+
126
+ {ERR_LVL} {
127
+ KVPAIR error_level = {"error_level", yytext};
128
+ return error_level;
129
+ }
130
+
131
+ {HTTPCODE} {
132
+ KVPAIR http_response = {"http_response", yytext};
133
+ return http_response;
134
+ }
135
+
136
+ {HTTP_VERB} {
137
+ KVPAIR http_method = {"http_method", yytext};
138
+ return http_method;
139
+ }
140
+
141
+ {NON_WS}{NON_WS}* {
142
+ KVPAIR strings = {"strings", yytext};
143
+ return strings;
144
+ }
145
+
146
+ {CATCHALL} /* ignore */
147
+ %%
148
+
149
+ char *strip_ends(char *string) {
150
+ string[yyleng-1] = '\0';
151
+ ++string;
152
+ return string;
153
+ }
154
+
155
+ void uuid_unparse_upper_sans_dash(const uuid_t uu, char *out)
156
+ {
157
+ sprintf(out,
158
+ "%02X%02X%02X%02X"
159
+ "%02X%02X"
160
+ "%02X%02X"
161
+ "%02X%02X"
162
+ "%02X%02X%02X%02X%02X%02X",
163
+ uu[0], uu[1], uu[2], uu[3],
164
+ uu[4], uu[5],
165
+ uu[6], uu[7],
166
+ uu[8], uu[9],
167
+ uu[10], uu[11], uu[12], uu[13], uu[14], uu[15]);
168
+ }
169
+
170
+ void new_uuid(char *str_ptr){
171
+ uuid_t new_uuid;
172
+ uuid_generate_time(new_uuid);
173
+ uuid_unparse_upper_sans_dash(new_uuid, str_ptr);
174
+ }
175
+
176
+ void raise_error_for_string_too_long(VALUE string){
177
+ if( RSTRING_LEN(string) > 1000000){
178
+ rb_raise(rb_eArgError, "string too long for scan_apache_logs! max length is 1,000,000 chars");
179
+ }
180
+ }
181
+
182
+ /* Scans self, which is expected to be a single line from an Apache error or
183
+ * access log, and returns a Hash of the components of the log message. The
184
+ * following parts of the log message are returned if they are present:
185
+ * IPv4 address, datetime, HTTP Version used, the browser string given by the
186
+ * client, any absolute or relative URLs, the error level, HTTP response code,
187
+ * HTTP Method (verb), and any other uncategorized strings present. */
188
+ VALUE t_scan_apache_logs(VALUE self) {
189
+ KVPAIR kv_result;
190
+ int scan_complete = 0;
191
+ int building_words_to_string = 0;
192
+ VALUE token_hash = rb_hash_new();
193
+
194
+ BEGIN(INITIAL);
195
+
196
+ /* error out on absurdly large strings */
197
+ raise_error_for_string_too_long(self);
198
+ /* {:message => self()} */
199
+ include_message_in_token_hash(self, token_hash);
200
+ /* {:id => UUID} */
201
+ add_uuid_to_token_hash(token_hash);
202
+ yy_scan_string(RSTRING_PTR(self));
203
+ while (scan_complete == 0) {
204
+ kv_result = scan_apache_logs();
205
+ if (kv_result.key == "EOF"){
206
+ scan_complete = 1;
207
+ }
208
+ else if (kv_result.key == "strings"){
209
+ /* build a string until we get a non-word */
210
+ if (building_words_to_string == 0){
211
+ building_words_to_string = 1;
212
+ push_kv_pair_to_hash(kv_result, token_hash);
213
+ }
214
+ else{
215
+ concat_word_to_string(kv_result, token_hash);
216
+ }
217
+ }
218
+ else {
219
+ building_words_to_string = 0;
220
+ push_kv_pair_to_hash(kv_result, token_hash);
221
+ }
222
+ }
223
+ yy_delete_buffer(YY_CURRENT_BUFFER);
224
+ return rb_obj_dup(token_hash);
225
+ }
226
+
227
+ void add_uuid_to_token_hash(VALUE token_hash) {
228
+ char new_uuid_str[33];
229
+ new_uuid(new_uuid_str);
230
+ VALUE hsh_key_id = ID2SYM(rb_intern("id"));
231
+ VALUE hsh_val_id = rb_tainted_str_new2(new_uuid_str);
232
+ rb_hash_aset(token_hash, hsh_key_id, hsh_val_id);
233
+ }
234
+
235
+ void include_message_in_token_hash(VALUE message, VALUE token_hash) {
236
+ /* {:message => self()} */
237
+ VALUE hsh_key_msg = ID2SYM(rb_intern("message"));
238
+ rb_hash_aset(token_hash, hsh_key_msg, message);
239
+ }
240
+
241
+ void concat_word_to_string(KVPAIR key_value, VALUE token_hash) {
242
+ char * space = " ";
243
+ VALUE hsh_key = ID2SYM(rb_intern(key_value.key));
244
+ VALUE hsh_value = rb_hash_aref(token_hash, hsh_key);
245
+ VALUE string = rb_ary_entry(hsh_value, -1);
246
+ rb_str_cat(string, space, 1);
247
+ rb_str_cat(string, key_value.value, yyleng);
248
+ }
249
+
250
+ void push_kv_pair_to_hash(KVPAIR key_value, VALUE token_hash) {
251
+ VALUE hsh_key = ID2SYM(rb_intern(key_value.key));
252
+ VALUE hsh_value = rb_hash_aref(token_hash, hsh_key);
253
+ VALUE ary_for_token_type = rb_ary_new();
254
+ switch (TYPE(hsh_value)) {
255
+ case T_NIL:
256
+ rb_ary_push(ary_for_token_type, rb_tainted_str_new2(key_value.value));
257
+ rb_hash_aset(token_hash, hsh_key, ary_for_token_type);
258
+ break;
259
+ case T_ARRAY:
260
+ rb_ary_push(hsh_value, rb_tainted_str_new2(key_value.value));
261
+ break;
262
+ }
263
+ }
264
+
265
+ void Init_scan_apache_logs() {
266
+ rb_define_method(rb_cString, "scan_apache_logs", t_scan_apache_logs, 0);
267
+ }