teeth 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
data/LICENSE ADDED
@@ -0,0 +1,11 @@
1
+ Copyright (c) 2009, Daniel Stephen DeLeo
2
+ Portions Copyright (c) 2004 Apple Computer, Inc. All rights reserved.
3
+ All rights reserved.
4
+
5
+ Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
6
+
7
+ Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
8
+ Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
9
+ Neither the name of the author nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.
10
+
11
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
@@ -0,0 +1,123 @@
1
+ = Teeth
2
+ Teeth is a library for fast parsing of log files such as Apache access and error logs. It uses C extensions generated by flex[http://flex.sourceforge.net/index.html] (as in Flex and Bison). If you only want to use the built-in scanners, you don't need flex. If you want to add support for new/different log formats, you'll need to have flex installed.
3
+
4
+ = Example
5
+ require "teeth"
6
+
7
+ access_log = %q{myhost.localdomain:80 172.16.115.1 - - [13/Dec/2008:19:26:11 -0500] "GET /favicon.ico HTTP/1.1" 404 241 "http://172.16.115.130/" "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_4_11; en) AppleWebKit/525.27.1 (KHTML, like Gecko) Version/3.2.1 Safari/525.27.1"}
8
+ access_log.scan_apache_logs
9
+ => {:strings=>["241"],
10
+ :apache_access_datetime=>["13/Dec/2008:19:26:11 -0500"],
11
+ :absolute_url=>["http://172.16.115.130/"],
12
+ :message=>"myhost.localdomain:80 172.16.115.1 - - [13/Dec/2008:19:26:11 -0500] \"GET /favicon.ico HTTP/1.1\" 404 241 \"http://172.16.115.130/\" \"Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_4_11; en) AppleWebKit/525.27.1 (KHTML, like Gecko) Version/3.2.1 Safari/525.27.1\"",
13
+ :http_method=>["GET"],
14
+ :browser_string=>["Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_4_11; en) AppleWebKit/525.27.1 (KHTML, like Gecko) Version/3.2.1 Safari/525.27.1"],
15
+ :relative_url=>["/favicon.ico"],
16
+ :http_version=>["HTTP/1.1"],
17
+ :host=>["myhost.localdomain:80"],
18
+ :id=>"8AD5CBCC1CB011DE8CE10017F22FF48F",
19
+ :http_response=>["404"],
20
+ :ipv4_addr=>["172.16.115.1"]}
21
+
22
+ = Supported Log Formats
23
+ * Apache (access and error logs)
24
+ * Rails
25
+
26
+ Support for other web servers, app servers, and applications as well as other types of servers (e.g., SMTP, etc.) and generic syslog logs is planned for the future.
27
+
28
+ == Creating Your Own Scanners
29
+ Teeth includes a library that can generate a flex scanner definition using a simplified definition written in ruby. This cuts down on the repetition involved in writing all the C code by hand. The included scanners for Apache and Rails logs are defined this way. You can find them in the scanners directory.
30
+
31
+ Here's an example based on the definition for the Rails log scanner:
32
+
33
+ require File.dirname(__FILE__) + "/../lib/teeth"
34
+ scanner = Teeth::Scanner.new(:rails_logs, File.dirname(__FILE__) + '/../ext/scan_rails_logs/')
35
+
36
+ Flex definitions are kinda like macros for regular expressions.
37
+ We include some of the available defaults here to make writing
38
+ the scanner easier
39
+
40
+ scanner.load_default_definitions_for(:whitespace, :ip, :time, :web)
41
+
42
+ Add some more definitions
43
+ scanner.definitions do |define|
44
+ define.RAILS_TEASER '(processing|filter\ chain\ halted|rendered)'
45
+ define.CONTROLLER_ACTION '[a-z0-9]+#[a-z0-9]+'
46
+
47
+ Scanner is case insensitive
48
+ define.RAILS_ERROR_CLASS '([a-z]+\:\:)*[a-z]+error'
49
+
50
+ "start conditions" are a feature of flex that allows
51
+ us to have some regular expressions that are only active
52
+ when we tell the scanner to enter a certain state. Here
53
+ we define the ``REQUEST_COMPLETED'' state, and specify
54
+ that it is exclusive. This means that if the scanner is
55
+ in this state, it only matches rules written for this state
56
+ define.REQUEST_COMPLETED :start_condition => :exclusive
57
+ end
58
+
59
+ Define rules. These are the actions that the scanner executes when
60
+ it sees text that matches a regular expression. The default action
61
+ is to add :action_name => [matched_text] to the results Hash, or push
62
+ the matched text on the end of the array if it already exists.
63
+ scanner.rules do |r|
64
+
65
+ This will add something like :teaser => ["Processing"] to the results
66
+ r.teaser '{RAILS_TEASER}'
67
+ r.controller_action '{CONTROLLER_ACTION}'
68
+
69
+ Use some of the default definitions we added above.
70
+ r.datetime '{YEAR}"-"{MONTH_NUM}"-"{MDAY}{WS}{HOUR}":"{MINSEC}":"{MINSEC}'
71
+ r.http_method '{HTTP_VERB}'
72
+
73
+ With :skip_line => true, scanner stops processing the line immediately
74
+ r.skip_lines '{RAILS_SKIP_LINES}', :skip_line => true
75
+ r.error '{RAILS_ERROR_CLASS}'
76
+
77
+ with :strip_ends => true, scanner removes first and last characters from matched text
78
+ r.error_message '\(({WS}|{NON_WS})+\)', :strip_ends => true
79
+
80
+ Puts scanner in the ``REQUEST_COMPLETED'' state we defined above.
81
+ The scanner only matches rules beginning with ``<REQUEST_COMPLETED>''
82
+ now
83
+ r.teaser 'completed\ in', :begin => "REQUEST_COMPLETED"
84
+
85
+ These rules only apply to the ``REQUEST_COMPLETED'' State
86
+ r.duration_s '<REQUEST_COMPLETED>[0-9]+\.[0-9]+'
87
+ r.duration_ms '<REQUEST_COMPLETED>[0-9]+/ms'
88
+ r.http_response '<REQUEST_COMPLETED>{HTTPCODE}'
89
+
90
+ Need a "catchall" rule -- flex scanner "jams" if there isn't a default rule (the
91
+ catchall rule for the default/INITIAL state is automatically included).
92
+ note that :ignore => true makes the scanner ignore what it matches but doesn't
93
+ stop processing of the line.
94
+ r.ignore_others '<REQUEST_COMPLETED>{CATCHALL}', :ignore => true
95
+
96
+ The "strings" action is special. It keeps track of whether the last token
97
+ was also a string, and if it was, the new string is appended to the
98
+ last string instead of being pushed to the array. For example, when scanning
99
+ an apache error log, ``Invalid URI in request'' will be extracted as a complete
100
+ string (instead of ["Invalid", "URI", "in", "request"])
101
+ r.strings '{NON_WS}{NON_WS}*'
102
+ end
103
+
104
+ Writes the generated scanner and an extconf.rb for it to the directory
105
+ we specified when we initialized the scanner.
106
+ scanner.write!
107
+
108
+ There's not much in the way of documentation for the scanner generator, but you can
109
+ refer to the specs and the definitions for Apache and Rails logs to get a sense
110
+ of how it works. It would probably help to learn about flex's regex syntax
111
+ and other features.
112
+
113
+ = Ruby 1.9
114
+ Ruby 1.9 is supported on the master branch. Don't use the ruby1.9 branch, it is orphaned.
115
+
116
+ = Shortcomings and Known Issues
117
+ In addition to the lack of support for formats other than Apache and Rails described above:
118
+ * It's a new project, lots of API changes
119
+ * Does not convert datetimes to Ruby Time objects
120
+ * Does not always use context or knowledge of the log format to its advantage. This is improving now that the scanner can utilize start conditions.
121
+
122
+ = Performance
123
+ On my laptop, a white MacBook 2.0 GHz Intel Core Duo, teeth can process more than 30k lines of Apache access logs per second. So it's pretty fast. If modified to not create a UUID or keep the full message, this can be increased to around 45k lines/sec. One could potentially do pretty well on the wide_finder2[http://www.tbray.org/ongoing/When/200x/2008/05/01/Wide-Finder-2]...
@@ -0,0 +1,112 @@
1
+ require "spec/rake/spectask"
2
+ require "rake/clean"
3
+ require "rake/rdoctask"
4
+
5
+ desc "Run all of the specs"
6
+ Spec::Rake::SpecTask.new do |t|
7
+ t.spec_opts = ['--options', "\"spec/spec.opts\""]
8
+ t.fail_on_error = false
9
+ end
10
+
11
+ task :default => :spec
12
+
13
+ namespace :spec do
14
+
15
+ desc "Generate HTML report for failing examples"
16
+ Spec::Rake::SpecTask.new('report') do |t|
17
+ t.spec_files = FileList['failing_examples/**/*.rb']
18
+ t.spec_opts = ["--format", "html:doc/tools/reports/failing_examples.html", "--diff", '--options', '"spec/spec.opts"']
19
+ t.fail_on_error = false
20
+ end
21
+
22
+ end
23
+
24
+ Rake::RDocTask.new do |rdt|
25
+ rdt.rdoc_dir = "doc"
26
+ rdt.main = "README.rdoc"
27
+ rdt.rdoc_files.include("README.rdoc", "lib/*", "ext/*/*.yy.c")
28
+ end
29
+
30
+ begin
31
+ require 'jeweler'
32
+ Jeweler::Tasks.new do |s|
33
+ s.name = 'teeth'
34
+ s.summary = 'Fast log file parsing in Ruby'
35
+ s.description = s.summary
36
+ s.email = 'ddeleo@basecommander.net'
37
+ s.homepage = "http://github.com/danielsdeleo/teeth"
38
+ s.platform = Gem::Platform::RUBY
39
+ s.has_rdoc = true
40
+ s.extra_rdoc_files = ["README.rdoc"]
41
+ s.require_path = ["lib"]
42
+ s.authors = ["Daniel DeLeo"]
43
+ s.extensions = ["ext/scan_apache_logs/extconf.rb", "ext/scan_rails_logs/extconf.rb"]
44
+
45
+ # ruby -rpp -e' pp `git ls-files`.split("\n") '
46
+ s.files = `git ls-files`.split("\n").reject {|f| f =~ /git/}
47
+ end
48
+ rescue LoadError
49
+ puts "Jeweler not available. Install it with: sudo gem install technicalpickles-jeweler -s http://gems.github.com"
50
+ end
51
+
52
+ desc "outputs a list of files suitable for use with the gemspec"
53
+ task :list_files do
54
+ sh %q{ruby -rpp -e' pp `git ls-files`.split("\n").reject {|f| f =~ /git/} '}
55
+ end
56
+
57
+
58
+ CLEAN.add ["ext/*/*.bundle", "ext/*/*.so", "ext/*/*.o"]
59
+ CLOBBER.add ["ext/*/Makefile", "ext/*/*.c"]
60
+
61
+ namespace :ext do
62
+ desc "Installs the C extensions. Usually requires root."
63
+ task :install => :build do
64
+ Dir.glob("ext/*/").each do |ext_dir|
65
+ Dir.chdir(ext_dir) {sh "make install"}
66
+ end
67
+ end
68
+
69
+ desc "Compiles the C extensions"
70
+ task :build do |t|
71
+ Dir.glob("ext/*/").each do |ext_dir|
72
+ cd(ext_dir) {sh "make"}
73
+ end
74
+ end
75
+
76
+ desc "Generates Makefiles with extconf/mkmf"
77
+ task :makefiles
78
+
79
+ FileList["ext/*/*.yy"].each do |flex_file|
80
+ flex_generated_c = flex_file.ext("yy.c")
81
+ file flex_generated_c => flex_file do |t|
82
+ sh "flex -i -s -o #{flex_generated_c} #{flex_file}"
83
+ end
84
+ task :build => flex_generated_c
85
+ file flex_file
86
+ end
87
+
88
+ FileList["ext/*/extconf.rb"].each do |extconf_file|
89
+ extension_dir = extconf_file.sub("extconf.rb", '')
90
+ makefile = extension_dir + "Makefile"
91
+ file makefile => extconf_file do |t|
92
+ Dir.chdir(extension_dir) {ruby "./extconf.rb"}
93
+ end
94
+ file extconf_file
95
+ task :build => makefile
96
+ end
97
+
98
+ desc "Compiles Teeth::Scanner scanner definitions into flex scanner definition"
99
+ task :scanners do
100
+ FileList["scanners/*"].each do |scanner|
101
+ ruby scanner
102
+ end
103
+ end
104
+
105
+
106
+ end
107
+
108
+ desc "deletes generated gem"
109
+ task :clobber_gem do
110
+ FileList['pkg/*gem'].each { |gemfile| rm gemfile }
111
+ end
112
+ task :clobber => :clobber_gem
@@ -0,0 +1,5 @@
1
+ ---
2
+ :minor: 2
3
+ :build:
4
+ :patch: 0
5
+ :major: 0
@@ -0,0 +1,4 @@
1
+ require "mkmf"
2
+ $CFLAGS += " -Wall"
3
+ have_library("uuid", "uuid_generate_time")
4
+ create_makefile "teeth/scanners/scan_apache_logs", "./"
@@ -0,0 +1,274 @@
1
+ %option prefix="apache_logs_yy"
2
+ %option full
3
+ %option never-interactive
4
+ %option read
5
+ %option nounput
6
+ %option noyywrap noreject noyymore nodefault
7
+ %{
8
+ #include <ruby.h>
9
+ #include <uuid/uuid.h>
10
+ /* Data types */
11
+ typedef struct {
12
+ char *key;
13
+ char *value;
14
+ } KVPAIR;
15
+ const KVPAIR EOF_KVPAIR = {"EOF", "EOF"};
16
+ /* prototypes */
17
+ char *strip_ends(char *);
18
+ VALUE t_scan_apache_logs(VALUE);
19
+ void new_uuid(char *str_ptr);
20
+ void raise_error_for_string_too_long(VALUE string);
21
+ void include_message_in_token_hash(VALUE message, VALUE token_hash);
22
+ void add_uuid_to_token_hash(VALUE token_hash);
23
+ void push_kv_pair_to_hash(KVPAIR key_value, VALUE token_hash);
24
+ void concat_word_to_string(KVPAIR key_value, VALUE token_hash);
25
+ /* Set the scanner name, and return type */
26
+ #define YY_DECL KVPAIR scan_apache_logs(void)
27
+ #define yyterminate() return EOF_KVPAIR
28
+ /* Ruby 1.8 and 1.9 compatibility */
29
+ #if !defined(RSTRING_LEN)
30
+ # define RSTRING_LEN(x) (RSTRING(x)->len)
31
+ # define RSTRING_PTR(x) (RSTRING(x)->ptr)
32
+ #endif
33
+
34
+ %}
35
+
36
+ /* Definitions */
37
+
38
+ CATCHALL (.|"\n")
39
+
40
+
41
+ WS [[:space:]]
42
+
43
+ NON_WS ([a-z]|[0-9]|[:punct:])
44
+
45
+ IP4_OCT [0-9]|[0-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5]
46
+
47
+ HOST ([a-z0-9][a-z0-9\-]*\.[a-z0-9][a-z0-9\-]*.[a-z0-9][a-z0-9\-\.]*[a-z]+(\:[0-9]+)?)|localhost
48
+
49
+ WDAY mon|tue|wed|thu|fri|sat|sun
50
+
51
+ MON jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec
52
+
53
+ MONTH_NUM 0[1-9]|1[0-2]
54
+
55
+ MDAY 3[0-1]|[1-2][0-9]|0[1-9]
56
+
57
+ HOUR 2[0-3]|[0-1][0-9]
58
+
59
+ MINSEC [0-5][0-9]|60
60
+
61
+ YEAR [0-9][0-9][0-9][0-9]
62
+
63
+ PLUSMINUS (\+|\-)
64
+
65
+ TIMING [0-9]+\.[0-9]+
66
+
67
+ REL_URL (\/|\\|\.)[a-z0-9\._\~\-\/\?&;#=\%\:\+\[\]\\]*
68
+
69
+ PROTO (http:|https:)
70
+
71
+ ERR_LVL (emerg|alert|crit|err|error|warn|warning|notice|info|debug)
72
+
73
+ HTTP_VERS HTTP\/(1.0|1.1)
74
+
75
+ HTTP_VERB (get|head|put|post|delete|trace|connect)
76
+
77
+ HTTPCODE (100|101|20[0-6]|30[0-5]|307|40[0-9]|41[0-7]|50[0-5])
78
+
79
+ BROWSER_STR \"(moz|msie|lynx|reconnoiter|pingdom)[^"]+\"
80
+
81
+
82
+ %%
83
+ /*
84
+ Actions
85
+ */
86
+
87
+
88
+ {TIMING} {
89
+ KVPAIR timing = {"timing", yytext};
90
+ return timing;
91
+ }
92
+
93
+ {IP4_OCT}"."{IP4_OCT}"."{IP4_OCT}"."{IP4_OCT} {
94
+ KVPAIR ipv4_addr = {"ipv4_addr", yytext};
95
+ return ipv4_addr;
96
+ }
97
+
98
+ {WDAY}{WS}{MON}{WS}{MDAY}{WS}{HOUR}":"{MINSEC}":"{MINSEC}{WS}{YEAR} {
99
+ KVPAIR apache_err_datetime = {"apache_err_datetime", yytext};
100
+ return apache_err_datetime;
101
+ }
102
+
103
+ {MDAY}\/{MON}\/{YEAR}":"{HOUR}":"{MINSEC}":"{MINSEC}{WS}{PLUSMINUS}{YEAR} {
104
+ KVPAIR apache_access_datetime = {"apache_access_datetime", yytext};
105
+ return apache_access_datetime;
106
+ }
107
+
108
+ {HTTP_VERS} {
109
+ KVPAIR http_version = {"http_version", yytext};
110
+ return http_version;
111
+ }
112
+
113
+ {BROWSER_STR} {
114
+ KVPAIR browser_string = {"browser_string", strip_ends(yytext)};
115
+ return browser_string;
116
+ }
117
+
118
+ {PROTO}"\/\/"({HOST}|{IP4_OCT}"."{IP4_OCT}"."{IP4_OCT}"."{IP4_OCT})({REL_URL}|"\/")? {
119
+ KVPAIR absolute_url = {"absolute_url", yytext};
120
+ return absolute_url;
121
+ }
122
+
123
+ {HOST} {
124
+ KVPAIR host = {"host", yytext};
125
+ return host;
126
+ }
127
+
128
+ {REL_URL} {
129
+ KVPAIR relative_url = {"relative_url", yytext};
130
+ return relative_url;
131
+ }
132
+
133
+ {ERR_LVL} {
134
+ KVPAIR error_level = {"error_level", yytext};
135
+ return error_level;
136
+ }
137
+
138
+ {HTTPCODE} {
139
+ KVPAIR http_response = {"http_response", yytext};
140
+ return http_response;
141
+ }
142
+
143
+ {HTTP_VERB} {
144
+ KVPAIR http_method = {"http_method", yytext};
145
+ return http_method;
146
+ }
147
+
148
+ {NON_WS}{NON_WS}* {
149
+ KVPAIR strings = {"strings", yytext};
150
+ return strings;
151
+ }
152
+
153
+ {CATCHALL} /* ignore */
154
+ %%
155
+
156
+ char *strip_ends(char *string) {
157
+ string[yyleng-1] = '\0';
158
+ ++string;
159
+ return string;
160
+ }
161
+
162
+ void uuid_unparse_upper_sans_dash(const uuid_t uu, char *out)
163
+ {
164
+ sprintf(out,
165
+ "%02X%02X%02X%02X"
166
+ "%02X%02X"
167
+ "%02X%02X"
168
+ "%02X%02X"
169
+ "%02X%02X%02X%02X%02X%02X",
170
+ uu[0], uu[1], uu[2], uu[3],
171
+ uu[4], uu[5],
172
+ uu[6], uu[7],
173
+ uu[8], uu[9],
174
+ uu[10], uu[11], uu[12], uu[13], uu[14], uu[15]);
175
+ }
176
+
177
+ void new_uuid(char *str_ptr){
178
+ uuid_t new_uuid;
179
+ uuid_generate_time(new_uuid);
180
+ uuid_unparse_upper_sans_dash(new_uuid, str_ptr);
181
+ }
182
+
183
+ void raise_error_for_string_too_long(VALUE string){
184
+ if( RSTRING_LEN(string) > 1000000){
185
+ rb_raise(rb_eArgError, "string too long for scan_apache_logs! max length is 1,000,000 chars");
186
+ }
187
+ }
188
+
189
+ /* Scans self, which is expected to be a single line from an Apache error or
190
+ * access log, and returns a Hash of the components of the log message. The
191
+ * following parts of the log message are returned if they are present:
192
+ * IPv4 address, datetime, HTTP Version used, the browser string given by the
193
+ * client, any absolute or relative URLs, the error level, HTTP response code,
194
+ * HTTP Method (verb), and any other uncategorized strings present. */
195
+ VALUE t_scan_apache_logs(VALUE self) {
196
+ KVPAIR kv_result;
197
+ int scan_complete = 0;
198
+ int building_words_to_string = 0;
199
+ VALUE token_hash = rb_hash_new();
200
+
201
+ BEGIN(INITIAL);
202
+
203
+ /* error out on absurdly large strings */
204
+ raise_error_for_string_too_long(self);
205
+ /* {:message => self()} */
206
+ include_message_in_token_hash(self, token_hash);
207
+ /* {:id => UUID} */
208
+ add_uuid_to_token_hash(token_hash);
209
+ yy_scan_string(RSTRING_PTR(self));
210
+ while (scan_complete == 0) {
211
+ kv_result = scan_apache_logs();
212
+ if (kv_result.key == "EOF"){
213
+ scan_complete = 1;
214
+ }
215
+ else if (kv_result.key == "strings"){
216
+ /* build a string until we get a non-word */
217
+ if (building_words_to_string == 0){
218
+ building_words_to_string = 1;
219
+ push_kv_pair_to_hash(kv_result, token_hash);
220
+ }
221
+ else{
222
+ concat_word_to_string(kv_result, token_hash);
223
+ }
224
+ }
225
+ else {
226
+ building_words_to_string = 0;
227
+ push_kv_pair_to_hash(kv_result, token_hash);
228
+ }
229
+ }
230
+ yy_delete_buffer(YY_CURRENT_BUFFER);
231
+ return rb_obj_dup(token_hash);
232
+ }
233
+
234
+ void add_uuid_to_token_hash(VALUE token_hash) {
235
+ char new_uuid_str[33];
236
+ new_uuid(new_uuid_str);
237
+ VALUE hsh_key_id = ID2SYM(rb_intern("id"));
238
+ VALUE hsh_val_id = rb_tainted_str_new2(new_uuid_str);
239
+ rb_hash_aset(token_hash, hsh_key_id, hsh_val_id);
240
+ }
241
+
242
+ void include_message_in_token_hash(VALUE message, VALUE token_hash) {
243
+ /* {:message => self()} */
244
+ VALUE hsh_key_msg = ID2SYM(rb_intern("message"));
245
+ rb_hash_aset(token_hash, hsh_key_msg, message);
246
+ }
247
+
248
+ void concat_word_to_string(KVPAIR key_value, VALUE token_hash) {
249
+ char * space = " ";
250
+ VALUE hsh_key = ID2SYM(rb_intern(key_value.key));
251
+ VALUE hsh_value = rb_hash_aref(token_hash, hsh_key);
252
+ VALUE string = rb_ary_entry(hsh_value, -1);
253
+ rb_str_cat(string, space, 1);
254
+ rb_str_cat(string, key_value.value, yyleng);
255
+ }
256
+
257
+ void push_kv_pair_to_hash(KVPAIR key_value, VALUE token_hash) {
258
+ VALUE hsh_key = ID2SYM(rb_intern(key_value.key));
259
+ VALUE hsh_value = rb_hash_aref(token_hash, hsh_key);
260
+ VALUE ary_for_token_type = rb_ary_new();
261
+ switch (TYPE(hsh_value)) {
262
+ case T_NIL:
263
+ rb_ary_push(ary_for_token_type, rb_tainted_str_new2(key_value.value));
264
+ rb_hash_aset(token_hash, hsh_key, ary_for_token_type);
265
+ break;
266
+ case T_ARRAY:
267
+ rb_ary_push(hsh_value, rb_tainted_str_new2(key_value.value));
268
+ break;
269
+ }
270
+ }
271
+
272
+ void Init_scan_apache_logs() {
273
+ rb_define_method(rb_cString, "scan_apache_logs", t_scan_apache_logs, 0);
274
+ }