danielsdeleo-teeth 0.0.2 → 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/LICENSE +11 -0
- data/README.rdoc +107 -10
- data/Rakefile +47 -31
- data/VERSION.yml +4 -0
- data/doc/classes/String.html +182 -0
- data/doc/classes/Teeth/DuplicateDefinitionError.html +113 -0
- data/doc/classes/Teeth/DuplicateRuleError.html +113 -0
- data/doc/classes/Teeth/InvalidDefaultDefinitionName.html +113 -0
- data/doc/classes/Teeth/InvalidExtensionDirectory.html +113 -0
- data/doc/classes/Teeth/RuleStatement.html +291 -0
- data/doc/classes/Teeth/RuleStatementGroup.html +195 -0
- data/doc/classes/Teeth/Scanner.html +535 -0
- data/doc/classes/Teeth/ScannerDefinition.html +253 -0
- data/doc/classes/Teeth/ScannerDefinitionArgumentError.html +113 -0
- data/doc/classes/Teeth/ScannerDefinitionGroup.html +269 -0
- data/doc/classes/Teeth/ScannerError.html +111 -0
- data/doc/classes/Teeth.html +129 -0
- data/doc/created.rid +1 -0
- data/doc/files/README_rdoc.html +314 -0
- data/doc/files/ext/scan_apache_logs/scan_apache_logs_yy_c.html +101 -0
- data/doc/files/ext/scan_rails_logs/scan_rails_logs_yy_c.html +101 -0
- data/doc/files/lib/rule_statement_rb.html +101 -0
- data/doc/files/lib/scanner_definition_rb.html +101 -0
- data/doc/files/lib/scanner_rb.html +108 -0
- data/doc/files/lib/teeth_rb.html +111 -0
- data/doc/fr_class_index.html +39 -0
- data/doc/fr_file_index.html +33 -0
- data/doc/fr_method_index.html +60 -0
- data/doc/index.html +24 -0
- data/doc/rdoc-style.css +208 -0
- data/ext/scan_apache_logs/Makefile +158 -0
- data/ext/scan_apache_logs/extconf.rb +3 -0
- data/ext/scan_apache_logs/scan_apache_logs.yy +267 -0
- data/ext/scan_apache_logs/scan_apache_logs.yy.c +8355 -0
- data/ext/scan_rails_logs/Makefile +158 -0
- data/ext/scan_rails_logs/extconf.rb +3 -0
- data/ext/scan_rails_logs/scan_rails_logs.yy +376 -0
- data/ext/scan_rails_logs/scan_rails_logs.yy.c +11127 -0
- data/lib/rule_statement.rb +61 -0
- data/lib/scanner.rb +98 -0
- data/lib/scanner_definition.rb +116 -0
- data/lib/teeth.rb +5 -1
- data/scanners/scan_apache_logs.rb +27 -0
- data/scanners/scan_rails_logs.rb +70 -0
- data/spec/fixtures/rails_1x.log +59 -0
- data/spec/fixtures/rails_22.log +12 -0
- data/spec/fixtures/rails_22_cached.log +10 -0
- data/spec/fixtures/rails_unordered.log +24 -0
- data/spec/playground/show_apache_processing.rb +13 -0
- data/spec/spec_helper.rb +6 -1
- data/spec/unit/rule_statement_spec.rb +60 -0
- data/spec/unit/{tokenize_apache_spec.rb → scan_apache_spec.rb} +16 -11
- data/spec/unit/scan_rails_logs_spec.rb +90 -0
- data/spec/unit/scaner_definition_spec.rb +65 -0
- data/spec/unit/scanner_spec.rb +109 -0
- data/teeth.gemspec +31 -0
- data/templates/tokenizer.yy.erb +168 -0
- metadata +60 -15
- data/ext/extconf.rb +0 -4
- data/ext/tokenize_apache_logs.yy +0 -215
- data/ext/tokenize_apache_logs.yy.c +0 -12067
data/doc/rdoc-style.css
ADDED
@@ -0,0 +1,208 @@
|
|
1
|
+
|
2
|
+
body {
|
3
|
+
font-family: Verdana,Arial,Helvetica,sans-serif;
|
4
|
+
font-size: 90%;
|
5
|
+
margin: 0;
|
6
|
+
margin-left: 40px;
|
7
|
+
padding: 0;
|
8
|
+
background: white;
|
9
|
+
}
|
10
|
+
|
11
|
+
h1,h2,h3,h4 { margin: 0; color: #efefef; background: transparent; }
|
12
|
+
h1 { font-size: 150%; }
|
13
|
+
h2,h3,h4 { margin-top: 1em; }
|
14
|
+
|
15
|
+
a { background: #eef; color: #039; text-decoration: none; }
|
16
|
+
a:hover { background: #039; color: #eef; }
|
17
|
+
|
18
|
+
/* Override the base stylesheet's Anchor inside a table cell */
|
19
|
+
td > a {
|
20
|
+
background: transparent;
|
21
|
+
color: #039;
|
22
|
+
text-decoration: none;
|
23
|
+
}
|
24
|
+
|
25
|
+
/* and inside a section title */
|
26
|
+
.section-title > a {
|
27
|
+
background: transparent;
|
28
|
+
color: #eee;
|
29
|
+
text-decoration: none;
|
30
|
+
}
|
31
|
+
|
32
|
+
/* === Structural elements =================================== */
|
33
|
+
|
34
|
+
div#index {
|
35
|
+
margin: 0;
|
36
|
+
margin-left: -40px;
|
37
|
+
padding: 0;
|
38
|
+
font-size: 90%;
|
39
|
+
}
|
40
|
+
|
41
|
+
|
42
|
+
div#index a {
|
43
|
+
margin-left: 0.7em;
|
44
|
+
}
|
45
|
+
|
46
|
+
div#index .section-bar {
|
47
|
+
margin-left: 0px;
|
48
|
+
padding-left: 0.7em;
|
49
|
+
background: #ccc;
|
50
|
+
font-size: small;
|
51
|
+
}
|
52
|
+
|
53
|
+
|
54
|
+
div#classHeader, div#fileHeader {
|
55
|
+
width: auto;
|
56
|
+
color: white;
|
57
|
+
padding: 0.5em 1.5em 0.5em 1.5em;
|
58
|
+
margin: 0;
|
59
|
+
margin-left: -40px;
|
60
|
+
border-bottom: 3px solid #006;
|
61
|
+
}
|
62
|
+
|
63
|
+
div#classHeader a, div#fileHeader a {
|
64
|
+
background: inherit;
|
65
|
+
color: white;
|
66
|
+
}
|
67
|
+
|
68
|
+
div#classHeader td, div#fileHeader td {
|
69
|
+
background: inherit;
|
70
|
+
color: white;
|
71
|
+
}
|
72
|
+
|
73
|
+
|
74
|
+
div#fileHeader {
|
75
|
+
background: #057;
|
76
|
+
}
|
77
|
+
|
78
|
+
div#classHeader {
|
79
|
+
background: #048;
|
80
|
+
}
|
81
|
+
|
82
|
+
|
83
|
+
.class-name-in-header {
|
84
|
+
font-size: 180%;
|
85
|
+
font-weight: bold;
|
86
|
+
}
|
87
|
+
|
88
|
+
|
89
|
+
div#bodyContent {
|
90
|
+
padding: 0 1.5em 0 1.5em;
|
91
|
+
}
|
92
|
+
|
93
|
+
div#description {
|
94
|
+
padding: 0.5em 1.5em;
|
95
|
+
background: #efefef;
|
96
|
+
border: 1px dotted #999;
|
97
|
+
}
|
98
|
+
|
99
|
+
div#description h1,h2,h3,h4,h5,h6 {
|
100
|
+
color: #125;;
|
101
|
+
background: transparent;
|
102
|
+
}
|
103
|
+
|
104
|
+
div#validator-badges {
|
105
|
+
text-align: center;
|
106
|
+
}
|
107
|
+
div#validator-badges img { border: 0; }
|
108
|
+
|
109
|
+
div#copyright {
|
110
|
+
color: #333;
|
111
|
+
background: #efefef;
|
112
|
+
font: 0.75em sans-serif;
|
113
|
+
margin-top: 5em;
|
114
|
+
margin-bottom: 0;
|
115
|
+
padding: 0.5em 2em;
|
116
|
+
}
|
117
|
+
|
118
|
+
|
119
|
+
/* === Classes =================================== */
|
120
|
+
|
121
|
+
table.header-table {
|
122
|
+
color: white;
|
123
|
+
font-size: small;
|
124
|
+
}
|
125
|
+
|
126
|
+
.type-note {
|
127
|
+
font-size: small;
|
128
|
+
color: #DEDEDE;
|
129
|
+
}
|
130
|
+
|
131
|
+
.xxsection-bar {
|
132
|
+
background: #eee;
|
133
|
+
color: #333;
|
134
|
+
padding: 3px;
|
135
|
+
}
|
136
|
+
|
137
|
+
.section-bar {
|
138
|
+
color: #333;
|
139
|
+
border-bottom: 1px solid #999;
|
140
|
+
margin-left: -20px;
|
141
|
+
}
|
142
|
+
|
143
|
+
|
144
|
+
.section-title {
|
145
|
+
background: #79a;
|
146
|
+
color: #eee;
|
147
|
+
padding: 3px;
|
148
|
+
margin-top: 2em;
|
149
|
+
margin-left: -30px;
|
150
|
+
border: 1px solid #999;
|
151
|
+
}
|
152
|
+
|
153
|
+
.top-aligned-row { vertical-align: top }
|
154
|
+
.bottom-aligned-row { vertical-align: bottom }
|
155
|
+
|
156
|
+
/* --- Context section classes ----------------------- */
|
157
|
+
|
158
|
+
.context-row { }
|
159
|
+
.context-item-name { font-family: monospace; font-weight: bold; color: black; }
|
160
|
+
.context-item-value { font-size: small; color: #448; }
|
161
|
+
.context-item-desc { color: #333; padding-left: 2em; }
|
162
|
+
|
163
|
+
/* --- Method classes -------------------------- */
|
164
|
+
.method-detail {
|
165
|
+
background: #efefef;
|
166
|
+
padding: 0;
|
167
|
+
margin-top: 0.5em;
|
168
|
+
margin-bottom: 1em;
|
169
|
+
border: 1px dotted #ccc;
|
170
|
+
}
|
171
|
+
.method-heading {
|
172
|
+
color: black;
|
173
|
+
background: #ccc;
|
174
|
+
border-bottom: 1px solid #666;
|
175
|
+
padding: 0.2em 0.5em 0 0.5em;
|
176
|
+
}
|
177
|
+
.method-signature { color: black; background: inherit; }
|
178
|
+
.method-name { font-weight: bold; }
|
179
|
+
.method-args { font-style: italic; }
|
180
|
+
.method-description { padding: 0 0.5em 0 0.5em; }
|
181
|
+
|
182
|
+
/* --- Source code sections -------------------- */
|
183
|
+
|
184
|
+
a.source-toggle { font-size: 90%; }
|
185
|
+
div.method-source-code {
|
186
|
+
background: #262626;
|
187
|
+
color: #ffdead;
|
188
|
+
margin: 1em;
|
189
|
+
padding: 0.5em;
|
190
|
+
border: 1px dashed #999;
|
191
|
+
overflow: hidden;
|
192
|
+
}
|
193
|
+
|
194
|
+
div.method-source-code pre { color: #ffdead; overflow: hidden; }
|
195
|
+
|
196
|
+
/* --- Ruby keyword styles --------------------- */
|
197
|
+
|
198
|
+
.standalone-code { background: #221111; color: #ffdead; overflow: hidden; }
|
199
|
+
|
200
|
+
.ruby-constant { color: #7fffd4; background: transparent; }
|
201
|
+
.ruby-keyword { color: #00ffff; background: transparent; }
|
202
|
+
.ruby-ivar { color: #eedd82; background: transparent; }
|
203
|
+
.ruby-operator { color: #00ffee; background: transparent; }
|
204
|
+
.ruby-identifier { color: #ffdead; background: transparent; }
|
205
|
+
.ruby-node { color: #ffa07a; background: transparent; }
|
206
|
+
.ruby-comment { color: #b22222; font-weight: bold; background: transparent; }
|
207
|
+
.ruby-regexp { color: #ffa07a; background: transparent; }
|
208
|
+
.ruby-value { color: #7fffd4; background: transparent; }
|
@@ -0,0 +1,158 @@
|
|
1
|
+
|
2
|
+
SHELL = /bin/sh
|
3
|
+
|
4
|
+
#### Start of system configuration section. ####
|
5
|
+
|
6
|
+
srcdir = ./
|
7
|
+
topdir = /opt/local/lib/ruby/1.8/i686-darwin8.11.1
|
8
|
+
hdrdir = $(topdir)
|
9
|
+
VPATH = $(srcdir):$(topdir):$(hdrdir)
|
10
|
+
exec_prefix = $(prefix)
|
11
|
+
prefix = $(DESTDIR)/opt/local
|
12
|
+
sharedstatedir = $(prefix)/com
|
13
|
+
mandir = $(DESTDIR)/opt/local/share/man
|
14
|
+
psdir = $(docdir)
|
15
|
+
oldincludedir = $(DESTDIR)/usr/include
|
16
|
+
localedir = $(datarootdir)/locale
|
17
|
+
bindir = $(exec_prefix)/bin
|
18
|
+
libexecdir = $(exec_prefix)/libexec
|
19
|
+
sitedir = $(libdir)/ruby/site_ruby
|
20
|
+
htmldir = $(docdir)
|
21
|
+
vendorarchdir = $(vendorlibdir)/$(sitearch)
|
22
|
+
includedir = $(prefix)/include
|
23
|
+
infodir = $(datarootdir)/info
|
24
|
+
vendorlibdir = $(vendordir)/$(ruby_version)
|
25
|
+
sysconfdir = $(prefix)/etc
|
26
|
+
libdir = $(exec_prefix)/lib
|
27
|
+
sbindir = $(exec_prefix)/sbin
|
28
|
+
rubylibdir = $(libdir)/ruby/$(ruby_version)
|
29
|
+
docdir = $(datarootdir)/doc/$(PACKAGE)
|
30
|
+
dvidir = $(docdir)
|
31
|
+
vendordir = $(DESTDIR)/opt/local/lib/ruby/vendor_ruby
|
32
|
+
datarootdir = $(prefix)/share
|
33
|
+
pdfdir = $(docdir)
|
34
|
+
archdir = $(rubylibdir)/$(arch)
|
35
|
+
sitearchdir = $(sitelibdir)/$(sitearch)
|
36
|
+
datadir = $(datarootdir)
|
37
|
+
localstatedir = $(prefix)/var
|
38
|
+
sitelibdir = $(sitedir)/$(ruby_version)
|
39
|
+
|
40
|
+
CC = /usr/bin/gcc-4.0
|
41
|
+
LIBRUBY = $(LIBRUBY_SO)
|
42
|
+
LIBRUBY_A = lib$(RUBY_SO_NAME)-static.a
|
43
|
+
LIBRUBYARG_SHARED = -l$(RUBY_SO_NAME)
|
44
|
+
LIBRUBYARG_STATIC = -l$(RUBY_SO_NAME)-static
|
45
|
+
|
46
|
+
RUBY_EXTCONF_H =
|
47
|
+
CFLAGS = -fno-common -O2 -fno-common -pipe -fno-common $(cflags) -Wall
|
48
|
+
INCFLAGS = -I. -I$(topdir) -I$(hdrdir) -I$(srcdir)
|
49
|
+
DEFS =
|
50
|
+
CPPFLAGS = -I/opt/local/include -D_XOPEN_SOURCE -D_DARWIN_C_SOURCE $(DEFS) $(cppflags)
|
51
|
+
CXXFLAGS = $(CFLAGS)
|
52
|
+
ldflags = -L. -L/opt/local/lib
|
53
|
+
dldflags =
|
54
|
+
archflag =
|
55
|
+
DLDFLAGS = $(ldflags) $(dldflags) $(archflag)
|
56
|
+
LDSHARED = cc -dynamic -bundle -undefined suppress -flat_namespace
|
57
|
+
AR = ar
|
58
|
+
EXEEXT =
|
59
|
+
|
60
|
+
RUBY_INSTALL_NAME = ruby
|
61
|
+
RUBY_SO_NAME = ruby
|
62
|
+
arch = i686-darwin8.11.1
|
63
|
+
sitearch = i686-darwin8.11.1
|
64
|
+
vendorarch = i686-darwin8.11.1
|
65
|
+
ruby_version = 1.8
|
66
|
+
ruby = /opt/local/bin/ruby
|
67
|
+
RUBY = $(ruby)
|
68
|
+
RM = rm -f
|
69
|
+
MAKEDIRS = mkdir -p
|
70
|
+
INSTALL = /usr/bin/install
|
71
|
+
INSTALL_PROG = $(INSTALL) -m 0755
|
72
|
+
INSTALL_DATA = $(INSTALL) -m 644
|
73
|
+
COPY = cp
|
74
|
+
|
75
|
+
#### End of system configuration section. ####
|
76
|
+
|
77
|
+
preload =
|
78
|
+
|
79
|
+
libpath = . $(libdir)
|
80
|
+
LIBPATH = -L. -L$(libdir)
|
81
|
+
DEFFILE =
|
82
|
+
|
83
|
+
CLEANFILES = mkmf.log
|
84
|
+
DISTCLEANFILES =
|
85
|
+
|
86
|
+
extout =
|
87
|
+
extout_prefix =
|
88
|
+
target_prefix = /teeth
|
89
|
+
LOCAL_LIBS =
|
90
|
+
LIBS = $(LIBRUBYARG_SHARED) -lpthread -ldl -lobjc
|
91
|
+
SRCS = scan_apache_logs.yy.c
|
92
|
+
OBJS = scan_apache_logs.yy.o
|
93
|
+
TARGET = scan_apache_logs
|
94
|
+
DLLIB = $(TARGET).bundle
|
95
|
+
EXTSTATIC =
|
96
|
+
STATIC_LIB =
|
97
|
+
|
98
|
+
BINDIR = $(bindir)
|
99
|
+
RUBYCOMMONDIR = $(sitedir)$(target_prefix)
|
100
|
+
RUBYLIBDIR = $(sitelibdir)$(target_prefix)
|
101
|
+
RUBYARCHDIR = $(sitearchdir)$(target_prefix)
|
102
|
+
|
103
|
+
TARGET_SO = $(DLLIB)
|
104
|
+
CLEANLIBS = $(TARGET).bundle $(TARGET).il? $(TARGET).tds $(TARGET).map
|
105
|
+
CLEANOBJS = *.o *.a *.s[ol] *.pdb *.exp *.bak
|
106
|
+
|
107
|
+
all: $(DLLIB)
|
108
|
+
static: $(STATIC_LIB)
|
109
|
+
|
110
|
+
clean:
|
111
|
+
@-$(RM) $(CLEANLIBS) $(CLEANOBJS) $(CLEANFILES)
|
112
|
+
|
113
|
+
distclean: clean
|
114
|
+
@-$(RM) Makefile $(RUBY_EXTCONF_H) conftest.* mkmf.log
|
115
|
+
@-$(RM) core ruby$(EXEEXT) *~ $(DISTCLEANFILES)
|
116
|
+
|
117
|
+
realclean: distclean
|
118
|
+
install: install-so install-rb
|
119
|
+
|
120
|
+
install-so: $(RUBYARCHDIR)
|
121
|
+
install-so: $(RUBYARCHDIR)/$(DLLIB)
|
122
|
+
$(RUBYARCHDIR)/$(DLLIB): $(DLLIB)
|
123
|
+
$(INSTALL_PROG) $(DLLIB) $(RUBYARCHDIR)
|
124
|
+
install-rb: pre-install-rb install-rb-default
|
125
|
+
install-rb-default: pre-install-rb-default
|
126
|
+
pre-install-rb: Makefile
|
127
|
+
pre-install-rb-default: Makefile
|
128
|
+
$(RUBYARCHDIR):
|
129
|
+
$(MAKEDIRS) $@
|
130
|
+
|
131
|
+
site-install: site-install-so site-install-rb
|
132
|
+
site-install-so: install-so
|
133
|
+
site-install-rb: install-rb
|
134
|
+
|
135
|
+
.SUFFIXES: .c .m .cc .cxx .cpp .C .o
|
136
|
+
|
137
|
+
.cc.o:
|
138
|
+
$(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) -c $<
|
139
|
+
|
140
|
+
.cxx.o:
|
141
|
+
$(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) -c $<
|
142
|
+
|
143
|
+
.cpp.o:
|
144
|
+
$(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) -c $<
|
145
|
+
|
146
|
+
.C.o:
|
147
|
+
$(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) -c $<
|
148
|
+
|
149
|
+
.c.o:
|
150
|
+
$(CC) $(INCFLAGS) $(CPPFLAGS) $(CFLAGS) -c $<
|
151
|
+
|
152
|
+
$(DLLIB): $(OBJS)
|
153
|
+
@-$(RM) $@
|
154
|
+
$(LDSHARED) -o $@ $(OBJS) $(LIBPATH) $(DLDFLAGS) $(LOCAL_LIBS) $(LIBS)
|
155
|
+
|
156
|
+
|
157
|
+
|
158
|
+
$(OBJS): ruby.h defines.h
|
@@ -0,0 +1,267 @@
|
|
1
|
+
%option prefix="apache_logs_yy"
|
2
|
+
%option full
|
3
|
+
%option never-interactive
|
4
|
+
%option read
|
5
|
+
%option nounput
|
6
|
+
%option noyywrap noreject noyymore nodefault
|
7
|
+
%{
|
8
|
+
#include <ruby.h>
|
9
|
+
#include <uuid/uuid.h>
|
10
|
+
/* Data types */
|
11
|
+
typedef struct {
|
12
|
+
char *key;
|
13
|
+
char *value;
|
14
|
+
} KVPAIR;
|
15
|
+
const KVPAIR EOF_KVPAIR = {"EOF", "EOF"};
|
16
|
+
/* prototypes */
|
17
|
+
char *strip_ends(char *);
|
18
|
+
VALUE t_scan_apache_logs(VALUE);
|
19
|
+
void new_uuid(char *str_ptr);
|
20
|
+
void raise_error_for_string_too_long(VALUE string);
|
21
|
+
void include_message_in_token_hash(VALUE message, VALUE token_hash);
|
22
|
+
void add_uuid_to_token_hash(VALUE token_hash);
|
23
|
+
void push_kv_pair_to_hash(KVPAIR key_value, VALUE token_hash);
|
24
|
+
void concat_word_to_string(KVPAIR key_value, VALUE token_hash);
|
25
|
+
/* Set the scanner name, and return type */
|
26
|
+
#define YY_DECL KVPAIR scan_apache_logs(void)
|
27
|
+
#define yyterminate() return EOF_KVPAIR
|
28
|
+
/* Ruby 1.8 and 1.9 compatibility */
|
29
|
+
#if !defined(RSTRING_LEN)
|
30
|
+
# define RSTRING_LEN(x) (RSTRING(x)->len)
|
31
|
+
# define RSTRING_PTR(x) (RSTRING(x)->ptr)
|
32
|
+
#endif
|
33
|
+
|
34
|
+
%}
|
35
|
+
|
36
|
+
/* Definitions */
|
37
|
+
|
38
|
+
CATCHALL (.|"\n")
|
39
|
+
|
40
|
+
|
41
|
+
WS [[:space:]]
|
42
|
+
|
43
|
+
NON_WS ([a-z]|[0-9]|[:punct:])
|
44
|
+
|
45
|
+
IP4_OCT [0-9]|[0-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5]
|
46
|
+
|
47
|
+
HOST [a-z0-9][a-z0-9\-]*\.[a-z0-9][a-z0-9\-]*.[a-z0-9][a-z0-9\-\.]*[a-z]+(\:[0-9]+)?
|
48
|
+
|
49
|
+
WDAY mon|tue|wed|thu|fri|sat|sun
|
50
|
+
|
51
|
+
MON jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec
|
52
|
+
|
53
|
+
MONTH_NUM 0[1-9]|1[0-2]
|
54
|
+
|
55
|
+
MDAY 3[0-1]|[1-2][0-9]|0[1-9]
|
56
|
+
|
57
|
+
HOUR 2[0-3]|[0-1][0-9]
|
58
|
+
|
59
|
+
MINSEC [0-5][0-9]|60
|
60
|
+
|
61
|
+
YEAR [0-9][0-9][0-9][0-9]
|
62
|
+
|
63
|
+
PLUSMINUS (\+|\-)
|
64
|
+
|
65
|
+
REL_URL (\/|\\|\.)[a-z0-9\._\~\-\/\?&;#=\%\:\+\[\]\\]*
|
66
|
+
|
67
|
+
PROTO (http:|https:)
|
68
|
+
|
69
|
+
ERR_LVL (emerg|alert|crit|err|error|warn|warning|notice|info|debug)
|
70
|
+
|
71
|
+
HTTP_VERS HTTP\/(1.0|1.1)
|
72
|
+
|
73
|
+
HTTP_VERB (get|head|put|post|delete|trace|connect)
|
74
|
+
|
75
|
+
HTTPCODE (100|101|20[0-6]|30[0-5]|307|40[0-9]|41[0-7]|50[0-5])
|
76
|
+
|
77
|
+
BROWSER_STR \"(moz|msie|lynx).+\"
|
78
|
+
|
79
|
+
|
80
|
+
%%
|
81
|
+
/*
|
82
|
+
Actions
|
83
|
+
*/
|
84
|
+
|
85
|
+
|
86
|
+
{IP4_OCT}"."{IP4_OCT}"."{IP4_OCT}"."{IP4_OCT} {
|
87
|
+
KVPAIR ipv4_addr = {"ipv4_addr", yytext};
|
88
|
+
return ipv4_addr;
|
89
|
+
}
|
90
|
+
|
91
|
+
{WDAY}{WS}{MON}{WS}{MDAY}{WS}{HOUR}":"{MINSEC}":"{MINSEC}{WS}{YEAR} {
|
92
|
+
KVPAIR apache_err_datetime = {"apache_err_datetime", yytext};
|
93
|
+
return apache_err_datetime;
|
94
|
+
}
|
95
|
+
|
96
|
+
{MDAY}\/{MON}\/{YEAR}":"{HOUR}":"{MINSEC}":"{MINSEC}{WS}{PLUSMINUS}{YEAR} {
|
97
|
+
KVPAIR apache_access_datetime = {"apache_access_datetime", yytext};
|
98
|
+
return apache_access_datetime;
|
99
|
+
}
|
100
|
+
|
101
|
+
{HTTP_VERS} {
|
102
|
+
KVPAIR http_version = {"http_version", yytext};
|
103
|
+
return http_version;
|
104
|
+
}
|
105
|
+
|
106
|
+
{BROWSER_STR} {
|
107
|
+
KVPAIR browser_string = {"browser_string", strip_ends(yytext)};
|
108
|
+
return browser_string;
|
109
|
+
}
|
110
|
+
|
111
|
+
{PROTO}"\/\/"({HOST}|{IP4_OCT}"."{IP4_OCT}"."{IP4_OCT}"."{IP4_OCT})({REL_URL}|"\/")? {
|
112
|
+
KVPAIR absolute_url = {"absolute_url", yytext};
|
113
|
+
return absolute_url;
|
114
|
+
}
|
115
|
+
|
116
|
+
{HOST} {
|
117
|
+
KVPAIR host = {"host", yytext};
|
118
|
+
return host;
|
119
|
+
}
|
120
|
+
|
121
|
+
{REL_URL} {
|
122
|
+
KVPAIR relative_url = {"relative_url", yytext};
|
123
|
+
return relative_url;
|
124
|
+
}
|
125
|
+
|
126
|
+
{ERR_LVL} {
|
127
|
+
KVPAIR error_level = {"error_level", yytext};
|
128
|
+
return error_level;
|
129
|
+
}
|
130
|
+
|
131
|
+
{HTTPCODE} {
|
132
|
+
KVPAIR http_response = {"http_response", yytext};
|
133
|
+
return http_response;
|
134
|
+
}
|
135
|
+
|
136
|
+
{HTTP_VERB} {
|
137
|
+
KVPAIR http_method = {"http_method", yytext};
|
138
|
+
return http_method;
|
139
|
+
}
|
140
|
+
|
141
|
+
{NON_WS}{NON_WS}* {
|
142
|
+
KVPAIR strings = {"strings", yytext};
|
143
|
+
return strings;
|
144
|
+
}
|
145
|
+
|
146
|
+
{CATCHALL} /* ignore */
|
147
|
+
%%
|
148
|
+
|
149
|
+
char *strip_ends(char *string) {
|
150
|
+
string[yyleng-1] = '\0';
|
151
|
+
++string;
|
152
|
+
return string;
|
153
|
+
}
|
154
|
+
|
155
|
+
void uuid_unparse_upper_sans_dash(const uuid_t uu, char *out)
|
156
|
+
{
|
157
|
+
sprintf(out,
|
158
|
+
"%02X%02X%02X%02X"
|
159
|
+
"%02X%02X"
|
160
|
+
"%02X%02X"
|
161
|
+
"%02X%02X"
|
162
|
+
"%02X%02X%02X%02X%02X%02X",
|
163
|
+
uu[0], uu[1], uu[2], uu[3],
|
164
|
+
uu[4], uu[5],
|
165
|
+
uu[6], uu[7],
|
166
|
+
uu[8], uu[9],
|
167
|
+
uu[10], uu[11], uu[12], uu[13], uu[14], uu[15]);
|
168
|
+
}
|
169
|
+
|
170
|
+
void new_uuid(char *str_ptr){
|
171
|
+
uuid_t new_uuid;
|
172
|
+
uuid_generate_time(new_uuid);
|
173
|
+
uuid_unparse_upper_sans_dash(new_uuid, str_ptr);
|
174
|
+
}
|
175
|
+
|
176
|
+
void raise_error_for_string_too_long(VALUE string){
|
177
|
+
if( RSTRING_LEN(string) > 1000000){
|
178
|
+
rb_raise(rb_eArgError, "string too long for scan_apache_logs! max length is 1,000,000 chars");
|
179
|
+
}
|
180
|
+
}
|
181
|
+
|
182
|
+
/* Scans self, which is expected to be a single line from an Apache error or
|
183
|
+
* access log, and returns a Hash of the components of the log message. The
|
184
|
+
* following parts of the log message are returned if they are present:
|
185
|
+
* IPv4 address, datetime, HTTP Version used, the browser string given by the
|
186
|
+
* client, any absolute or relative URLs, the error level, HTTP response code,
|
187
|
+
* HTTP Method (verb), and any other uncategorized strings present. */
|
188
|
+
VALUE t_scan_apache_logs(VALUE self) {
|
189
|
+
KVPAIR kv_result;
|
190
|
+
int scan_complete = 0;
|
191
|
+
int building_words_to_string = 0;
|
192
|
+
VALUE token_hash = rb_hash_new();
|
193
|
+
|
194
|
+
BEGIN(INITIAL);
|
195
|
+
|
196
|
+
/* error out on absurdly large strings */
|
197
|
+
raise_error_for_string_too_long(self);
|
198
|
+
/* {:message => self()} */
|
199
|
+
include_message_in_token_hash(self, token_hash);
|
200
|
+
/* {:id => UUID} */
|
201
|
+
add_uuid_to_token_hash(token_hash);
|
202
|
+
yy_scan_string(RSTRING_PTR(self));
|
203
|
+
while (scan_complete == 0) {
|
204
|
+
kv_result = scan_apache_logs();
|
205
|
+
if (kv_result.key == "EOF"){
|
206
|
+
scan_complete = 1;
|
207
|
+
}
|
208
|
+
else if (kv_result.key == "strings"){
|
209
|
+
/* build a string until we get a non-word */
|
210
|
+
if (building_words_to_string == 0){
|
211
|
+
building_words_to_string = 1;
|
212
|
+
push_kv_pair_to_hash(kv_result, token_hash);
|
213
|
+
}
|
214
|
+
else{
|
215
|
+
concat_word_to_string(kv_result, token_hash);
|
216
|
+
}
|
217
|
+
}
|
218
|
+
else {
|
219
|
+
building_words_to_string = 0;
|
220
|
+
push_kv_pair_to_hash(kv_result, token_hash);
|
221
|
+
}
|
222
|
+
}
|
223
|
+
yy_delete_buffer(YY_CURRENT_BUFFER);
|
224
|
+
return rb_obj_dup(token_hash);
|
225
|
+
}
|
226
|
+
|
227
|
+
void add_uuid_to_token_hash(VALUE token_hash) {
|
228
|
+
char new_uuid_str[33];
|
229
|
+
new_uuid(new_uuid_str);
|
230
|
+
VALUE hsh_key_id = ID2SYM(rb_intern("id"));
|
231
|
+
VALUE hsh_val_id = rb_tainted_str_new2(new_uuid_str);
|
232
|
+
rb_hash_aset(token_hash, hsh_key_id, hsh_val_id);
|
233
|
+
}
|
234
|
+
|
235
|
+
void include_message_in_token_hash(VALUE message, VALUE token_hash) {
|
236
|
+
/* {:message => self()} */
|
237
|
+
VALUE hsh_key_msg = ID2SYM(rb_intern("message"));
|
238
|
+
rb_hash_aset(token_hash, hsh_key_msg, message);
|
239
|
+
}
|
240
|
+
|
241
|
+
void concat_word_to_string(KVPAIR key_value, VALUE token_hash) {
|
242
|
+
char * space = " ";
|
243
|
+
VALUE hsh_key = ID2SYM(rb_intern(key_value.key));
|
244
|
+
VALUE hsh_value = rb_hash_aref(token_hash, hsh_key);
|
245
|
+
VALUE string = rb_ary_entry(hsh_value, -1);
|
246
|
+
rb_str_cat(string, space, 1);
|
247
|
+
rb_str_cat(string, key_value.value, yyleng);
|
248
|
+
}
|
249
|
+
|
250
|
+
void push_kv_pair_to_hash(KVPAIR key_value, VALUE token_hash) {
|
251
|
+
VALUE hsh_key = ID2SYM(rb_intern(key_value.key));
|
252
|
+
VALUE hsh_value = rb_hash_aref(token_hash, hsh_key);
|
253
|
+
VALUE ary_for_token_type = rb_ary_new();
|
254
|
+
switch (TYPE(hsh_value)) {
|
255
|
+
case T_NIL:
|
256
|
+
rb_ary_push(ary_for_token_type, rb_tainted_str_new2(key_value.value));
|
257
|
+
rb_hash_aset(token_hash, hsh_key, ary_for_token_type);
|
258
|
+
break;
|
259
|
+
case T_ARRAY:
|
260
|
+
rb_ary_push(hsh_value, rb_tainted_str_new2(key_value.value));
|
261
|
+
break;
|
262
|
+
}
|
263
|
+
}
|
264
|
+
|
265
|
+
void Init_scan_apache_logs() {
|
266
|
+
rb_define_method(rb_cString, "scan_apache_logs", t_scan_apache_logs, 0);
|
267
|
+
}
|