danielsdeleo-teeth 0.0.2 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/LICENSE +11 -0
- data/README.rdoc +107 -10
- data/Rakefile +47 -31
- data/VERSION.yml +4 -0
- data/doc/classes/String.html +182 -0
- data/doc/classes/Teeth/DuplicateDefinitionError.html +113 -0
- data/doc/classes/Teeth/DuplicateRuleError.html +113 -0
- data/doc/classes/Teeth/InvalidDefaultDefinitionName.html +113 -0
- data/doc/classes/Teeth/InvalidExtensionDirectory.html +113 -0
- data/doc/classes/Teeth/RuleStatement.html +291 -0
- data/doc/classes/Teeth/RuleStatementGroup.html +195 -0
- data/doc/classes/Teeth/Scanner.html +535 -0
- data/doc/classes/Teeth/ScannerDefinition.html +253 -0
- data/doc/classes/Teeth/ScannerDefinitionArgumentError.html +113 -0
- data/doc/classes/Teeth/ScannerDefinitionGroup.html +269 -0
- data/doc/classes/Teeth/ScannerError.html +111 -0
- data/doc/classes/Teeth.html +129 -0
- data/doc/created.rid +1 -0
- data/doc/files/README_rdoc.html +314 -0
- data/doc/files/ext/scan_apache_logs/scan_apache_logs_yy_c.html +101 -0
- data/doc/files/ext/scan_rails_logs/scan_rails_logs_yy_c.html +101 -0
- data/doc/files/lib/rule_statement_rb.html +101 -0
- data/doc/files/lib/scanner_definition_rb.html +101 -0
- data/doc/files/lib/scanner_rb.html +108 -0
- data/doc/files/lib/teeth_rb.html +111 -0
- data/doc/fr_class_index.html +39 -0
- data/doc/fr_file_index.html +33 -0
- data/doc/fr_method_index.html +60 -0
- data/doc/index.html +24 -0
- data/doc/rdoc-style.css +208 -0
- data/ext/scan_apache_logs/Makefile +158 -0
- data/ext/scan_apache_logs/extconf.rb +3 -0
- data/ext/scan_apache_logs/scan_apache_logs.yy +267 -0
- data/ext/scan_apache_logs/scan_apache_logs.yy.c +8355 -0
- data/ext/scan_rails_logs/Makefile +158 -0
- data/ext/scan_rails_logs/extconf.rb +3 -0
- data/ext/scan_rails_logs/scan_rails_logs.yy +376 -0
- data/ext/scan_rails_logs/scan_rails_logs.yy.c +11127 -0
- data/lib/rule_statement.rb +61 -0
- data/lib/scanner.rb +98 -0
- data/lib/scanner_definition.rb +116 -0
- data/lib/teeth.rb +5 -1
- data/scanners/scan_apache_logs.rb +27 -0
- data/scanners/scan_rails_logs.rb +70 -0
- data/spec/fixtures/rails_1x.log +59 -0
- data/spec/fixtures/rails_22.log +12 -0
- data/spec/fixtures/rails_22_cached.log +10 -0
- data/spec/fixtures/rails_unordered.log +24 -0
- data/spec/playground/show_apache_processing.rb +13 -0
- data/spec/spec_helper.rb +6 -1
- data/spec/unit/rule_statement_spec.rb +60 -0
- data/spec/unit/{tokenize_apache_spec.rb → scan_apache_spec.rb} +16 -11
- data/spec/unit/scan_rails_logs_spec.rb +90 -0
- data/spec/unit/scaner_definition_spec.rb +65 -0
- data/spec/unit/scanner_spec.rb +109 -0
- data/teeth.gemspec +31 -0
- data/templates/tokenizer.yy.erb +168 -0
- metadata +60 -15
- data/ext/extconf.rb +0 -4
- data/ext/tokenize_apache_logs.yy +0 -215
- data/ext/tokenize_apache_logs.yy.c +0 -12067
data/doc/rdoc-style.css
ADDED
@@ -0,0 +1,208 @@
|
|
1
|
+
|
2
|
+
body {
|
3
|
+
font-family: Verdana,Arial,Helvetica,sans-serif;
|
4
|
+
font-size: 90%;
|
5
|
+
margin: 0;
|
6
|
+
margin-left: 40px;
|
7
|
+
padding: 0;
|
8
|
+
background: white;
|
9
|
+
}
|
10
|
+
|
11
|
+
h1,h2,h3,h4 { margin: 0; color: #efefef; background: transparent; }
|
12
|
+
h1 { font-size: 150%; }
|
13
|
+
h2,h3,h4 { margin-top: 1em; }
|
14
|
+
|
15
|
+
a { background: #eef; color: #039; text-decoration: none; }
|
16
|
+
a:hover { background: #039; color: #eef; }
|
17
|
+
|
18
|
+
/* Override the base stylesheet's Anchor inside a table cell */
|
19
|
+
td > a {
|
20
|
+
background: transparent;
|
21
|
+
color: #039;
|
22
|
+
text-decoration: none;
|
23
|
+
}
|
24
|
+
|
25
|
+
/* and inside a section title */
|
26
|
+
.section-title > a {
|
27
|
+
background: transparent;
|
28
|
+
color: #eee;
|
29
|
+
text-decoration: none;
|
30
|
+
}
|
31
|
+
|
32
|
+
/* === Structural elements =================================== */
|
33
|
+
|
34
|
+
div#index {
|
35
|
+
margin: 0;
|
36
|
+
margin-left: -40px;
|
37
|
+
padding: 0;
|
38
|
+
font-size: 90%;
|
39
|
+
}
|
40
|
+
|
41
|
+
|
42
|
+
div#index a {
|
43
|
+
margin-left: 0.7em;
|
44
|
+
}
|
45
|
+
|
46
|
+
div#index .section-bar {
|
47
|
+
margin-left: 0px;
|
48
|
+
padding-left: 0.7em;
|
49
|
+
background: #ccc;
|
50
|
+
font-size: small;
|
51
|
+
}
|
52
|
+
|
53
|
+
|
54
|
+
div#classHeader, div#fileHeader {
|
55
|
+
width: auto;
|
56
|
+
color: white;
|
57
|
+
padding: 0.5em 1.5em 0.5em 1.5em;
|
58
|
+
margin: 0;
|
59
|
+
margin-left: -40px;
|
60
|
+
border-bottom: 3px solid #006;
|
61
|
+
}
|
62
|
+
|
63
|
+
div#classHeader a, div#fileHeader a {
|
64
|
+
background: inherit;
|
65
|
+
color: white;
|
66
|
+
}
|
67
|
+
|
68
|
+
div#classHeader td, div#fileHeader td {
|
69
|
+
background: inherit;
|
70
|
+
color: white;
|
71
|
+
}
|
72
|
+
|
73
|
+
|
74
|
+
div#fileHeader {
|
75
|
+
background: #057;
|
76
|
+
}
|
77
|
+
|
78
|
+
div#classHeader {
|
79
|
+
background: #048;
|
80
|
+
}
|
81
|
+
|
82
|
+
|
83
|
+
.class-name-in-header {
|
84
|
+
font-size: 180%;
|
85
|
+
font-weight: bold;
|
86
|
+
}
|
87
|
+
|
88
|
+
|
89
|
+
div#bodyContent {
|
90
|
+
padding: 0 1.5em 0 1.5em;
|
91
|
+
}
|
92
|
+
|
93
|
+
div#description {
|
94
|
+
padding: 0.5em 1.5em;
|
95
|
+
background: #efefef;
|
96
|
+
border: 1px dotted #999;
|
97
|
+
}
|
98
|
+
|
99
|
+
div#description h1,h2,h3,h4,h5,h6 {
|
100
|
+
color: #125;;
|
101
|
+
background: transparent;
|
102
|
+
}
|
103
|
+
|
104
|
+
div#validator-badges {
|
105
|
+
text-align: center;
|
106
|
+
}
|
107
|
+
div#validator-badges img { border: 0; }
|
108
|
+
|
109
|
+
div#copyright {
|
110
|
+
color: #333;
|
111
|
+
background: #efefef;
|
112
|
+
font: 0.75em sans-serif;
|
113
|
+
margin-top: 5em;
|
114
|
+
margin-bottom: 0;
|
115
|
+
padding: 0.5em 2em;
|
116
|
+
}
|
117
|
+
|
118
|
+
|
119
|
+
/* === Classes =================================== */
|
120
|
+
|
121
|
+
table.header-table {
|
122
|
+
color: white;
|
123
|
+
font-size: small;
|
124
|
+
}
|
125
|
+
|
126
|
+
.type-note {
|
127
|
+
font-size: small;
|
128
|
+
color: #DEDEDE;
|
129
|
+
}
|
130
|
+
|
131
|
+
.xxsection-bar {
|
132
|
+
background: #eee;
|
133
|
+
color: #333;
|
134
|
+
padding: 3px;
|
135
|
+
}
|
136
|
+
|
137
|
+
.section-bar {
|
138
|
+
color: #333;
|
139
|
+
border-bottom: 1px solid #999;
|
140
|
+
margin-left: -20px;
|
141
|
+
}
|
142
|
+
|
143
|
+
|
144
|
+
.section-title {
|
145
|
+
background: #79a;
|
146
|
+
color: #eee;
|
147
|
+
padding: 3px;
|
148
|
+
margin-top: 2em;
|
149
|
+
margin-left: -30px;
|
150
|
+
border: 1px solid #999;
|
151
|
+
}
|
152
|
+
|
153
|
+
.top-aligned-row { vertical-align: top }
|
154
|
+
.bottom-aligned-row { vertical-align: bottom }
|
155
|
+
|
156
|
+
/* --- Context section classes ----------------------- */
|
157
|
+
|
158
|
+
.context-row { }
|
159
|
+
.context-item-name { font-family: monospace; font-weight: bold; color: black; }
|
160
|
+
.context-item-value { font-size: small; color: #448; }
|
161
|
+
.context-item-desc { color: #333; padding-left: 2em; }
|
162
|
+
|
163
|
+
/* --- Method classes -------------------------- */
|
164
|
+
.method-detail {
|
165
|
+
background: #efefef;
|
166
|
+
padding: 0;
|
167
|
+
margin-top: 0.5em;
|
168
|
+
margin-bottom: 1em;
|
169
|
+
border: 1px dotted #ccc;
|
170
|
+
}
|
171
|
+
.method-heading {
|
172
|
+
color: black;
|
173
|
+
background: #ccc;
|
174
|
+
border-bottom: 1px solid #666;
|
175
|
+
padding: 0.2em 0.5em 0 0.5em;
|
176
|
+
}
|
177
|
+
.method-signature { color: black; background: inherit; }
|
178
|
+
.method-name { font-weight: bold; }
|
179
|
+
.method-args { font-style: italic; }
|
180
|
+
.method-description { padding: 0 0.5em 0 0.5em; }
|
181
|
+
|
182
|
+
/* --- Source code sections -------------------- */
|
183
|
+
|
184
|
+
a.source-toggle { font-size: 90%; }
|
185
|
+
div.method-source-code {
|
186
|
+
background: #262626;
|
187
|
+
color: #ffdead;
|
188
|
+
margin: 1em;
|
189
|
+
padding: 0.5em;
|
190
|
+
border: 1px dashed #999;
|
191
|
+
overflow: hidden;
|
192
|
+
}
|
193
|
+
|
194
|
+
div.method-source-code pre { color: #ffdead; overflow: hidden; }
|
195
|
+
|
196
|
+
/* --- Ruby keyword styles --------------------- */
|
197
|
+
|
198
|
+
.standalone-code { background: #221111; color: #ffdead; overflow: hidden; }
|
199
|
+
|
200
|
+
.ruby-constant { color: #7fffd4; background: transparent; }
|
201
|
+
.ruby-keyword { color: #00ffff; background: transparent; }
|
202
|
+
.ruby-ivar { color: #eedd82; background: transparent; }
|
203
|
+
.ruby-operator { color: #00ffee; background: transparent; }
|
204
|
+
.ruby-identifier { color: #ffdead; background: transparent; }
|
205
|
+
.ruby-node { color: #ffa07a; background: transparent; }
|
206
|
+
.ruby-comment { color: #b22222; font-weight: bold; background: transparent; }
|
207
|
+
.ruby-regexp { color: #ffa07a; background: transparent; }
|
208
|
+
.ruby-value { color: #7fffd4; background: transparent; }
|
@@ -0,0 +1,158 @@
|
|
1
|
+
|
2
|
+
SHELL = /bin/sh
|
3
|
+
|
4
|
+
#### Start of system configuration section. ####
|
5
|
+
|
6
|
+
srcdir = ./
|
7
|
+
topdir = /opt/local/lib/ruby/1.8/i686-darwin8.11.1
|
8
|
+
hdrdir = $(topdir)
|
9
|
+
VPATH = $(srcdir):$(topdir):$(hdrdir)
|
10
|
+
exec_prefix = $(prefix)
|
11
|
+
prefix = $(DESTDIR)/opt/local
|
12
|
+
sharedstatedir = $(prefix)/com
|
13
|
+
mandir = $(DESTDIR)/opt/local/share/man
|
14
|
+
psdir = $(docdir)
|
15
|
+
oldincludedir = $(DESTDIR)/usr/include
|
16
|
+
localedir = $(datarootdir)/locale
|
17
|
+
bindir = $(exec_prefix)/bin
|
18
|
+
libexecdir = $(exec_prefix)/libexec
|
19
|
+
sitedir = $(libdir)/ruby/site_ruby
|
20
|
+
htmldir = $(docdir)
|
21
|
+
vendorarchdir = $(vendorlibdir)/$(sitearch)
|
22
|
+
includedir = $(prefix)/include
|
23
|
+
infodir = $(datarootdir)/info
|
24
|
+
vendorlibdir = $(vendordir)/$(ruby_version)
|
25
|
+
sysconfdir = $(prefix)/etc
|
26
|
+
libdir = $(exec_prefix)/lib
|
27
|
+
sbindir = $(exec_prefix)/sbin
|
28
|
+
rubylibdir = $(libdir)/ruby/$(ruby_version)
|
29
|
+
docdir = $(datarootdir)/doc/$(PACKAGE)
|
30
|
+
dvidir = $(docdir)
|
31
|
+
vendordir = $(DESTDIR)/opt/local/lib/ruby/vendor_ruby
|
32
|
+
datarootdir = $(prefix)/share
|
33
|
+
pdfdir = $(docdir)
|
34
|
+
archdir = $(rubylibdir)/$(arch)
|
35
|
+
sitearchdir = $(sitelibdir)/$(sitearch)
|
36
|
+
datadir = $(datarootdir)
|
37
|
+
localstatedir = $(prefix)/var
|
38
|
+
sitelibdir = $(sitedir)/$(ruby_version)
|
39
|
+
|
40
|
+
CC = /usr/bin/gcc-4.0
|
41
|
+
LIBRUBY = $(LIBRUBY_SO)
|
42
|
+
LIBRUBY_A = lib$(RUBY_SO_NAME)-static.a
|
43
|
+
LIBRUBYARG_SHARED = -l$(RUBY_SO_NAME)
|
44
|
+
LIBRUBYARG_STATIC = -l$(RUBY_SO_NAME)-static
|
45
|
+
|
46
|
+
RUBY_EXTCONF_H =
|
47
|
+
CFLAGS = -fno-common -O2 -fno-common -pipe -fno-common $(cflags) -Wall
|
48
|
+
INCFLAGS = -I. -I$(topdir) -I$(hdrdir) -I$(srcdir)
|
49
|
+
DEFS =
|
50
|
+
CPPFLAGS = -I/opt/local/include -D_XOPEN_SOURCE -D_DARWIN_C_SOURCE $(DEFS) $(cppflags)
|
51
|
+
CXXFLAGS = $(CFLAGS)
|
52
|
+
ldflags = -L. -L/opt/local/lib
|
53
|
+
dldflags =
|
54
|
+
archflag =
|
55
|
+
DLDFLAGS = $(ldflags) $(dldflags) $(archflag)
|
56
|
+
LDSHARED = cc -dynamic -bundle -undefined suppress -flat_namespace
|
57
|
+
AR = ar
|
58
|
+
EXEEXT =
|
59
|
+
|
60
|
+
RUBY_INSTALL_NAME = ruby
|
61
|
+
RUBY_SO_NAME = ruby
|
62
|
+
arch = i686-darwin8.11.1
|
63
|
+
sitearch = i686-darwin8.11.1
|
64
|
+
vendorarch = i686-darwin8.11.1
|
65
|
+
ruby_version = 1.8
|
66
|
+
ruby = /opt/local/bin/ruby
|
67
|
+
RUBY = $(ruby)
|
68
|
+
RM = rm -f
|
69
|
+
MAKEDIRS = mkdir -p
|
70
|
+
INSTALL = /usr/bin/install
|
71
|
+
INSTALL_PROG = $(INSTALL) -m 0755
|
72
|
+
INSTALL_DATA = $(INSTALL) -m 644
|
73
|
+
COPY = cp
|
74
|
+
|
75
|
+
#### End of system configuration section. ####
|
76
|
+
|
77
|
+
preload =
|
78
|
+
|
79
|
+
libpath = . $(libdir)
|
80
|
+
LIBPATH = -L. -L$(libdir)
|
81
|
+
DEFFILE =
|
82
|
+
|
83
|
+
CLEANFILES = mkmf.log
|
84
|
+
DISTCLEANFILES =
|
85
|
+
|
86
|
+
extout =
|
87
|
+
extout_prefix =
|
88
|
+
target_prefix = /teeth
|
89
|
+
LOCAL_LIBS =
|
90
|
+
LIBS = $(LIBRUBYARG_SHARED) -lpthread -ldl -lobjc
|
91
|
+
SRCS = scan_apache_logs.yy.c
|
92
|
+
OBJS = scan_apache_logs.yy.o
|
93
|
+
TARGET = scan_apache_logs
|
94
|
+
DLLIB = $(TARGET).bundle
|
95
|
+
EXTSTATIC =
|
96
|
+
STATIC_LIB =
|
97
|
+
|
98
|
+
BINDIR = $(bindir)
|
99
|
+
RUBYCOMMONDIR = $(sitedir)$(target_prefix)
|
100
|
+
RUBYLIBDIR = $(sitelibdir)$(target_prefix)
|
101
|
+
RUBYARCHDIR = $(sitearchdir)$(target_prefix)
|
102
|
+
|
103
|
+
TARGET_SO = $(DLLIB)
|
104
|
+
CLEANLIBS = $(TARGET).bundle $(TARGET).il? $(TARGET).tds $(TARGET).map
|
105
|
+
CLEANOBJS = *.o *.a *.s[ol] *.pdb *.exp *.bak
|
106
|
+
|
107
|
+
all: $(DLLIB)
|
108
|
+
static: $(STATIC_LIB)
|
109
|
+
|
110
|
+
clean:
|
111
|
+
@-$(RM) $(CLEANLIBS) $(CLEANOBJS) $(CLEANFILES)
|
112
|
+
|
113
|
+
distclean: clean
|
114
|
+
@-$(RM) Makefile $(RUBY_EXTCONF_H) conftest.* mkmf.log
|
115
|
+
@-$(RM) core ruby$(EXEEXT) *~ $(DISTCLEANFILES)
|
116
|
+
|
117
|
+
realclean: distclean
|
118
|
+
install: install-so install-rb
|
119
|
+
|
120
|
+
install-so: $(RUBYARCHDIR)
|
121
|
+
install-so: $(RUBYARCHDIR)/$(DLLIB)
|
122
|
+
$(RUBYARCHDIR)/$(DLLIB): $(DLLIB)
|
123
|
+
$(INSTALL_PROG) $(DLLIB) $(RUBYARCHDIR)
|
124
|
+
install-rb: pre-install-rb install-rb-default
|
125
|
+
install-rb-default: pre-install-rb-default
|
126
|
+
pre-install-rb: Makefile
|
127
|
+
pre-install-rb-default: Makefile
|
128
|
+
$(RUBYARCHDIR):
|
129
|
+
$(MAKEDIRS) $@
|
130
|
+
|
131
|
+
site-install: site-install-so site-install-rb
|
132
|
+
site-install-so: install-so
|
133
|
+
site-install-rb: install-rb
|
134
|
+
|
135
|
+
.SUFFIXES: .c .m .cc .cxx .cpp .C .o
|
136
|
+
|
137
|
+
.cc.o:
|
138
|
+
$(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) -c $<
|
139
|
+
|
140
|
+
.cxx.o:
|
141
|
+
$(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) -c $<
|
142
|
+
|
143
|
+
.cpp.o:
|
144
|
+
$(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) -c $<
|
145
|
+
|
146
|
+
.C.o:
|
147
|
+
$(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) -c $<
|
148
|
+
|
149
|
+
.c.o:
|
150
|
+
$(CC) $(INCFLAGS) $(CPPFLAGS) $(CFLAGS) -c $<
|
151
|
+
|
152
|
+
$(DLLIB): $(OBJS)
|
153
|
+
@-$(RM) $@
|
154
|
+
$(LDSHARED) -o $@ $(OBJS) $(LIBPATH) $(DLDFLAGS) $(LOCAL_LIBS) $(LIBS)
|
155
|
+
|
156
|
+
|
157
|
+
|
158
|
+
$(OBJS): ruby.h defines.h
|
@@ -0,0 +1,267 @@
|
|
1
|
+
%option prefix="apache_logs_yy"
|
2
|
+
%option full
|
3
|
+
%option never-interactive
|
4
|
+
%option read
|
5
|
+
%option nounput
|
6
|
+
%option noyywrap noreject noyymore nodefault
|
7
|
+
%{
|
8
|
+
#include <ruby.h>
|
9
|
+
#include <uuid/uuid.h>
|
10
|
+
/* Data types */
|
11
|
+
typedef struct {
|
12
|
+
char *key;
|
13
|
+
char *value;
|
14
|
+
} KVPAIR;
|
15
|
+
const KVPAIR EOF_KVPAIR = {"EOF", "EOF"};
|
16
|
+
/* prototypes */
|
17
|
+
char *strip_ends(char *);
|
18
|
+
VALUE t_scan_apache_logs(VALUE);
|
19
|
+
void new_uuid(char *str_ptr);
|
20
|
+
void raise_error_for_string_too_long(VALUE string);
|
21
|
+
void include_message_in_token_hash(VALUE message, VALUE token_hash);
|
22
|
+
void add_uuid_to_token_hash(VALUE token_hash);
|
23
|
+
void push_kv_pair_to_hash(KVPAIR key_value, VALUE token_hash);
|
24
|
+
void concat_word_to_string(KVPAIR key_value, VALUE token_hash);
|
25
|
+
/* Set the scanner name, and return type */
|
26
|
+
#define YY_DECL KVPAIR scan_apache_logs(void)
|
27
|
+
#define yyterminate() return EOF_KVPAIR
|
28
|
+
/* Ruby 1.8 and 1.9 compatibility */
|
29
|
+
#if !defined(RSTRING_LEN)
|
30
|
+
# define RSTRING_LEN(x) (RSTRING(x)->len)
|
31
|
+
# define RSTRING_PTR(x) (RSTRING(x)->ptr)
|
32
|
+
#endif
|
33
|
+
|
34
|
+
%}
|
35
|
+
|
36
|
+
/* Definitions */
|
37
|
+
|
38
|
+
CATCHALL (.|"\n")
|
39
|
+
|
40
|
+
|
41
|
+
WS [[:space:]]
|
42
|
+
|
43
|
+
NON_WS ([a-z]|[0-9]|[:punct:])
|
44
|
+
|
45
|
+
IP4_OCT [0-9]|[0-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5]
|
46
|
+
|
47
|
+
HOST [a-z0-9][a-z0-9\-]*\.[a-z0-9][a-z0-9\-]*.[a-z0-9][a-z0-9\-\.]*[a-z]+(\:[0-9]+)?
|
48
|
+
|
49
|
+
WDAY mon|tue|wed|thu|fri|sat|sun
|
50
|
+
|
51
|
+
MON jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec
|
52
|
+
|
53
|
+
MONTH_NUM 0[1-9]|1[0-2]
|
54
|
+
|
55
|
+
MDAY 3[0-1]|[1-2][0-9]|0[1-9]
|
56
|
+
|
57
|
+
HOUR 2[0-3]|[0-1][0-9]
|
58
|
+
|
59
|
+
MINSEC [0-5][0-9]|60
|
60
|
+
|
61
|
+
YEAR [0-9][0-9][0-9][0-9]
|
62
|
+
|
63
|
+
PLUSMINUS (\+|\-)
|
64
|
+
|
65
|
+
REL_URL (\/|\\|\.)[a-z0-9\._\~\-\/\?&;#=\%\:\+\[\]\\]*
|
66
|
+
|
67
|
+
PROTO (http:|https:)
|
68
|
+
|
69
|
+
ERR_LVL (emerg|alert|crit|err|error|warn|warning|notice|info|debug)
|
70
|
+
|
71
|
+
HTTP_VERS HTTP\/(1.0|1.1)
|
72
|
+
|
73
|
+
HTTP_VERB (get|head|put|post|delete|trace|connect)
|
74
|
+
|
75
|
+
HTTPCODE (100|101|20[0-6]|30[0-5]|307|40[0-9]|41[0-7]|50[0-5])
|
76
|
+
|
77
|
+
BROWSER_STR \"(moz|msie|lynx).+\"
|
78
|
+
|
79
|
+
|
80
|
+
%%
|
81
|
+
/*
|
82
|
+
Actions
|
83
|
+
*/
|
84
|
+
|
85
|
+
|
86
|
+
{IP4_OCT}"."{IP4_OCT}"."{IP4_OCT}"."{IP4_OCT} {
|
87
|
+
KVPAIR ipv4_addr = {"ipv4_addr", yytext};
|
88
|
+
return ipv4_addr;
|
89
|
+
}
|
90
|
+
|
91
|
+
{WDAY}{WS}{MON}{WS}{MDAY}{WS}{HOUR}":"{MINSEC}":"{MINSEC}{WS}{YEAR} {
|
92
|
+
KVPAIR apache_err_datetime = {"apache_err_datetime", yytext};
|
93
|
+
return apache_err_datetime;
|
94
|
+
}
|
95
|
+
|
96
|
+
{MDAY}\/{MON}\/{YEAR}":"{HOUR}":"{MINSEC}":"{MINSEC}{WS}{PLUSMINUS}{YEAR} {
|
97
|
+
KVPAIR apache_access_datetime = {"apache_access_datetime", yytext};
|
98
|
+
return apache_access_datetime;
|
99
|
+
}
|
100
|
+
|
101
|
+
{HTTP_VERS} {
|
102
|
+
KVPAIR http_version = {"http_version", yytext};
|
103
|
+
return http_version;
|
104
|
+
}
|
105
|
+
|
106
|
+
{BROWSER_STR} {
|
107
|
+
KVPAIR browser_string = {"browser_string", strip_ends(yytext)};
|
108
|
+
return browser_string;
|
109
|
+
}
|
110
|
+
|
111
|
+
{PROTO}"\/\/"({HOST}|{IP4_OCT}"."{IP4_OCT}"."{IP4_OCT}"."{IP4_OCT})({REL_URL}|"\/")? {
|
112
|
+
KVPAIR absolute_url = {"absolute_url", yytext};
|
113
|
+
return absolute_url;
|
114
|
+
}
|
115
|
+
|
116
|
+
{HOST} {
|
117
|
+
KVPAIR host = {"host", yytext};
|
118
|
+
return host;
|
119
|
+
}
|
120
|
+
|
121
|
+
{REL_URL} {
|
122
|
+
KVPAIR relative_url = {"relative_url", yytext};
|
123
|
+
return relative_url;
|
124
|
+
}
|
125
|
+
|
126
|
+
{ERR_LVL} {
|
127
|
+
KVPAIR error_level = {"error_level", yytext};
|
128
|
+
return error_level;
|
129
|
+
}
|
130
|
+
|
131
|
+
{HTTPCODE} {
|
132
|
+
KVPAIR http_response = {"http_response", yytext};
|
133
|
+
return http_response;
|
134
|
+
}
|
135
|
+
|
136
|
+
{HTTP_VERB} {
|
137
|
+
KVPAIR http_method = {"http_method", yytext};
|
138
|
+
return http_method;
|
139
|
+
}
|
140
|
+
|
141
|
+
{NON_WS}{NON_WS}* {
|
142
|
+
KVPAIR strings = {"strings", yytext};
|
143
|
+
return strings;
|
144
|
+
}
|
145
|
+
|
146
|
+
{CATCHALL} /* ignore */
|
147
|
+
%%
|
148
|
+
|
149
|
+
char *strip_ends(char *string) {
|
150
|
+
string[yyleng-1] = '\0';
|
151
|
+
++string;
|
152
|
+
return string;
|
153
|
+
}
|
154
|
+
|
155
|
+
void uuid_unparse_upper_sans_dash(const uuid_t uu, char *out)
|
156
|
+
{
|
157
|
+
sprintf(out,
|
158
|
+
"%02X%02X%02X%02X"
|
159
|
+
"%02X%02X"
|
160
|
+
"%02X%02X"
|
161
|
+
"%02X%02X"
|
162
|
+
"%02X%02X%02X%02X%02X%02X",
|
163
|
+
uu[0], uu[1], uu[2], uu[3],
|
164
|
+
uu[4], uu[5],
|
165
|
+
uu[6], uu[7],
|
166
|
+
uu[8], uu[9],
|
167
|
+
uu[10], uu[11], uu[12], uu[13], uu[14], uu[15]);
|
168
|
+
}
|
169
|
+
|
170
|
+
void new_uuid(char *str_ptr){
|
171
|
+
uuid_t new_uuid;
|
172
|
+
uuid_generate_time(new_uuid);
|
173
|
+
uuid_unparse_upper_sans_dash(new_uuid, str_ptr);
|
174
|
+
}
|
175
|
+
|
176
|
+
void raise_error_for_string_too_long(VALUE string){
|
177
|
+
if( RSTRING_LEN(string) > 1000000){
|
178
|
+
rb_raise(rb_eArgError, "string too long for scan_apache_logs! max length is 1,000,000 chars");
|
179
|
+
}
|
180
|
+
}
|
181
|
+
|
182
|
+
/* Scans self, which is expected to be a single line from an Apache error or
|
183
|
+
* access log, and returns a Hash of the components of the log message. The
|
184
|
+
* following parts of the log message are returned if they are present:
|
185
|
+
* IPv4 address, datetime, HTTP Version used, the browser string given by the
|
186
|
+
* client, any absolute or relative URLs, the error level, HTTP response code,
|
187
|
+
* HTTP Method (verb), and any other uncategorized strings present. */
|
188
|
+
VALUE t_scan_apache_logs(VALUE self) {
|
189
|
+
KVPAIR kv_result;
|
190
|
+
int scan_complete = 0;
|
191
|
+
int building_words_to_string = 0;
|
192
|
+
VALUE token_hash = rb_hash_new();
|
193
|
+
|
194
|
+
BEGIN(INITIAL);
|
195
|
+
|
196
|
+
/* error out on absurdly large strings */
|
197
|
+
raise_error_for_string_too_long(self);
|
198
|
+
/* {:message => self()} */
|
199
|
+
include_message_in_token_hash(self, token_hash);
|
200
|
+
/* {:id => UUID} */
|
201
|
+
add_uuid_to_token_hash(token_hash);
|
202
|
+
yy_scan_string(RSTRING_PTR(self));
|
203
|
+
while (scan_complete == 0) {
|
204
|
+
kv_result = scan_apache_logs();
|
205
|
+
if (kv_result.key == "EOF"){
|
206
|
+
scan_complete = 1;
|
207
|
+
}
|
208
|
+
else if (kv_result.key == "strings"){
|
209
|
+
/* build a string until we get a non-word */
|
210
|
+
if (building_words_to_string == 0){
|
211
|
+
building_words_to_string = 1;
|
212
|
+
push_kv_pair_to_hash(kv_result, token_hash);
|
213
|
+
}
|
214
|
+
else{
|
215
|
+
concat_word_to_string(kv_result, token_hash);
|
216
|
+
}
|
217
|
+
}
|
218
|
+
else {
|
219
|
+
building_words_to_string = 0;
|
220
|
+
push_kv_pair_to_hash(kv_result, token_hash);
|
221
|
+
}
|
222
|
+
}
|
223
|
+
yy_delete_buffer(YY_CURRENT_BUFFER);
|
224
|
+
return rb_obj_dup(token_hash);
|
225
|
+
}
|
226
|
+
|
227
|
+
void add_uuid_to_token_hash(VALUE token_hash) {
|
228
|
+
char new_uuid_str[33];
|
229
|
+
new_uuid(new_uuid_str);
|
230
|
+
VALUE hsh_key_id = ID2SYM(rb_intern("id"));
|
231
|
+
VALUE hsh_val_id = rb_tainted_str_new2(new_uuid_str);
|
232
|
+
rb_hash_aset(token_hash, hsh_key_id, hsh_val_id);
|
233
|
+
}
|
234
|
+
|
235
|
+
void include_message_in_token_hash(VALUE message, VALUE token_hash) {
|
236
|
+
/* {:message => self()} */
|
237
|
+
VALUE hsh_key_msg = ID2SYM(rb_intern("message"));
|
238
|
+
rb_hash_aset(token_hash, hsh_key_msg, message);
|
239
|
+
}
|
240
|
+
|
241
|
+
void concat_word_to_string(KVPAIR key_value, VALUE token_hash) {
|
242
|
+
char * space = " ";
|
243
|
+
VALUE hsh_key = ID2SYM(rb_intern(key_value.key));
|
244
|
+
VALUE hsh_value = rb_hash_aref(token_hash, hsh_key);
|
245
|
+
VALUE string = rb_ary_entry(hsh_value, -1);
|
246
|
+
rb_str_cat(string, space, 1);
|
247
|
+
rb_str_cat(string, key_value.value, yyleng);
|
248
|
+
}
|
249
|
+
|
250
|
+
void push_kv_pair_to_hash(KVPAIR key_value, VALUE token_hash) {
|
251
|
+
VALUE hsh_key = ID2SYM(rb_intern(key_value.key));
|
252
|
+
VALUE hsh_value = rb_hash_aref(token_hash, hsh_key);
|
253
|
+
VALUE ary_for_token_type = rb_ary_new();
|
254
|
+
switch (TYPE(hsh_value)) {
|
255
|
+
case T_NIL:
|
256
|
+
rb_ary_push(ary_for_token_type, rb_tainted_str_new2(key_value.value));
|
257
|
+
rb_hash_aset(token_hash, hsh_key, ary_for_token_type);
|
258
|
+
break;
|
259
|
+
case T_ARRAY:
|
260
|
+
rb_ary_push(hsh_value, rb_tainted_str_new2(key_value.value));
|
261
|
+
break;
|
262
|
+
}
|
263
|
+
}
|
264
|
+
|
265
|
+
void Init_scan_apache_logs() {
|
266
|
+
rb_define_method(rb_cString, "scan_apache_logs", t_scan_apache_logs, 0);
|
267
|
+
}
|