teeth 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/LICENSE +11 -0
- data/README.rdoc +123 -0
- data/Rakefile +112 -0
- data/VERSION.yml +5 -0
- data/ext/scan_apache_logs/extconf.rb +4 -0
- data/ext/scan_apache_logs/scan_apache_logs.yy +274 -0
- data/ext/scan_apache_logs/scan_apache_logs.yy.c +9345 -0
- data/ext/scan_rails_logs/extconf.rb +4 -0
- data/ext/scan_rails_logs/scan_rails_logs.yy +378 -0
- data/ext/scan_rails_logs/scan_rails_logs.yy.c +11528 -0
- data/lib/teeth.rb +14 -0
- data/lib/teeth/rule_statement.rb +61 -0
- data/lib/teeth/scanner.rb +101 -0
- data/lib/teeth/scanner_definition.rb +117 -0
- data/lib/teeth/scanner_definitions/scan_apache_logs.rb +28 -0
- data/lib/teeth/scanner_definitions/scan_rails_logs.rb +70 -0
- data/lib/teeth/templates/tokenizer.yy.erb +168 -0
- data/spec/fixtures/rails_1x.log +59 -0
- data/spec/fixtures/rails_22.log +12 -0
- data/spec/fixtures/rails_22_cached.log +10 -0
- data/spec/fixtures/rails_unordered.log +24 -0
- data/spec/playground/scan_rails_logs.rb +56 -0
- data/spec/playground/show_apache_processing.rb +13 -0
- data/spec/spec.opts +1 -0
- data/spec/spec_helper.rb +17 -0
- data/spec/unit/rule_statement_spec.rb +60 -0
- data/spec/unit/scan_apache_spec.rb +110 -0
- data/spec/unit/scan_rails_logs_spec.rb +100 -0
- data/spec/unit/scaner_definition_spec.rb +65 -0
- data/spec/unit/scanner_spec.rb +108 -0
- data/teeth.gemspec +78 -0
- metadata +100 -0
data/lib/teeth.rb
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
TEETH_EXT_DIR = File.expand_path(File.dirname(__FILE__) + '/../ext')
|
4
|
+
|
5
|
+
require "teeth/scanner"
|
6
|
+
require "teeth/scanner_definition"
|
7
|
+
require "teeth/rule_statement"
|
8
|
+
|
9
|
+
begin
|
10
|
+
require "teeth/scanners/scan_apache_logs"
|
11
|
+
require "teeth/scanners/scan_rails_logs"
|
12
|
+
rescue LoadError => e
|
13
|
+
STDERR.puts "WARNING: could not load extensions. This is okay if you are creating them from source for the first time."
|
14
|
+
end
|
@@ -0,0 +1,61 @@
|
|
1
|
+
module Teeth
|
2
|
+
|
3
|
+
class DuplicateRuleError < ScannerError
|
4
|
+
end
|
5
|
+
|
6
|
+
class RuleStatement
|
7
|
+
attr_reader :name, :regex, :strip_ends, :skip_line, :begin
|
8
|
+
|
9
|
+
def initialize(name, regex, options={})
|
10
|
+
@name, @regex = name, regex
|
11
|
+
@strip_ends, @skip_line, @begin = options[:strip_ends], options[:skip_line], options[:begin]
|
12
|
+
@ignore = options[:ignore]
|
13
|
+
end
|
14
|
+
|
15
|
+
def ==(other)
|
16
|
+
other.kind_of?(RuleStatement) && other.name == name && other.regex == regex
|
17
|
+
end
|
18
|
+
|
19
|
+
def scanner_code
|
20
|
+
if @ignore
|
21
|
+
regex
|
22
|
+
else
|
23
|
+
"#{regex} {\n" + function_body + "}"
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
def function_body
|
28
|
+
code = ""
|
29
|
+
code += " BEGIN(#{@begin});\n" if @begin
|
30
|
+
if skip_line
|
31
|
+
code += " return EOF_KVPAIR;\n"
|
32
|
+
else
|
33
|
+
code += " KVPAIR #{name.to_s} = {\"#{name.to_s}\", #{yytext_statement}};\n" +
|
34
|
+
" return #{name.to_s};\n"
|
35
|
+
end
|
36
|
+
code
|
37
|
+
end
|
38
|
+
|
39
|
+
def yytext_statement
|
40
|
+
strip_ends ? "strip_ends(yytext)" : "yytext"
|
41
|
+
end
|
42
|
+
|
43
|
+
end
|
44
|
+
|
45
|
+
class RuleStatementGroup < Array
|
46
|
+
|
47
|
+
def add(name, regex, options={})
|
48
|
+
push RuleStatement.new(name, regex, options)
|
49
|
+
end
|
50
|
+
|
51
|
+
def rule_names
|
52
|
+
map { |rule_statement| rule_statement.name.to_s }
|
53
|
+
end
|
54
|
+
|
55
|
+
def method_missing(called_method_name, *args, &block)
|
56
|
+
args[1] ||={}
|
57
|
+
add(called_method_name, args[0], args[1])
|
58
|
+
end
|
59
|
+
|
60
|
+
end
|
61
|
+
end
|
@@ -0,0 +1,101 @@
|
|
1
|
+
require "erb"
|
2
|
+
|
3
|
+
module Teeth
|
4
|
+
class ScannerError < StandardError
|
5
|
+
end
|
6
|
+
|
7
|
+
class InvalidExtensionDirectory < ScannerError
|
8
|
+
end
|
9
|
+
|
10
|
+
class Scanner
|
11
|
+
TEMPLATE = File.dirname(__FILE__) + "/templates/tokenizer.yy.erb"
|
12
|
+
attr_reader :scanner_defns, :scanner_rules, :rdoc
|
13
|
+
|
14
|
+
def initialize(name, ext_dir=nil)
|
15
|
+
@scanner_base_name, @ext_dir = name, ext_dir
|
16
|
+
@scanner_defns, @scanner_rules = ScannerDefinitionGroup.new, RuleStatementGroup.new
|
17
|
+
ensure_ext_dir_exists if ext_dir
|
18
|
+
end
|
19
|
+
|
20
|
+
def scanner_name
|
21
|
+
"scan_" + @scanner_base_name.to_s
|
22
|
+
end
|
23
|
+
|
24
|
+
def main_function_name
|
25
|
+
"t_" + scanner_name
|
26
|
+
end
|
27
|
+
|
28
|
+
def init_function_name
|
29
|
+
"Init_" + scanner_name
|
30
|
+
end
|
31
|
+
|
32
|
+
def function_prefix
|
33
|
+
@scanner_base_name.to_s + "_yy"
|
34
|
+
end
|
35
|
+
|
36
|
+
def entry_point
|
37
|
+
"scan_" + @scanner_base_name.to_s
|
38
|
+
end
|
39
|
+
|
40
|
+
def extconf
|
41
|
+
'require "mkmf"' + "\n" + '$CFLAGS += " -Wall"' + "\n" +
|
42
|
+
'have_library("uuid", "uuid_generate_time")' + "\n" +
|
43
|
+
"create_makefile " +
|
44
|
+
%Q|"teeth/#{scanner_name}", "./"\n|
|
45
|
+
end
|
46
|
+
|
47
|
+
def rdoc=(rdoc_text)
|
48
|
+
lines_of_rdoc_text = rdoc_text.split("\n").map { |line| " * " + line.strip}
|
49
|
+
lines_of_rdoc_text.first[0] = "/"
|
50
|
+
lines_of_rdoc_text[-1] = lines_of_rdoc_text.last + " */"
|
51
|
+
@rdoc = lines_of_rdoc_text.join("\n")
|
52
|
+
end
|
53
|
+
|
54
|
+
def define(*args)
|
55
|
+
@scanner_defns.add(*args)
|
56
|
+
end
|
57
|
+
|
58
|
+
def definitions
|
59
|
+
yield @scanner_defns
|
60
|
+
end
|
61
|
+
|
62
|
+
def load_default_definitions_for(*defn_types)
|
63
|
+
@scanner_defns.defaults_for(*defn_types)
|
64
|
+
end
|
65
|
+
|
66
|
+
def rule(*args)
|
67
|
+
scanner_rules.add(*args)
|
68
|
+
end
|
69
|
+
|
70
|
+
def rules
|
71
|
+
yield scanner_rules
|
72
|
+
end
|
73
|
+
|
74
|
+
def generate
|
75
|
+
template = ERB.new(IO.read(TEMPLATE))
|
76
|
+
scanner = self
|
77
|
+
b = binding
|
78
|
+
template.result(b)
|
79
|
+
end
|
80
|
+
|
81
|
+
def write!
|
82
|
+
raise InvalidExtensionDirectory, "no extension directory specified" unless @ext_dir
|
83
|
+
File.open(@ext_dir + "/extconf.rb", "w") do |extconf_rb|
|
84
|
+
extconf_rb.write extconf
|
85
|
+
end
|
86
|
+
File.open(@ext_dir + "/" + scanner_name + ".yy", "w") do |scanner|
|
87
|
+
scanner.write generate
|
88
|
+
end
|
89
|
+
end
|
90
|
+
|
91
|
+
private
|
92
|
+
|
93
|
+
def ensure_ext_dir_exists
|
94
|
+
unless File.exist?(@ext_dir)
|
95
|
+
Dir.mkdir @ext_dir
|
96
|
+
end
|
97
|
+
end
|
98
|
+
|
99
|
+
end
|
100
|
+
|
101
|
+
end
|
@@ -0,0 +1,117 @@
|
|
1
|
+
module Teeth
|
2
|
+
class DuplicateDefinitionError < ScannerError
|
3
|
+
end
|
4
|
+
|
5
|
+
class InvalidDefaultDefinitionName < ScannerError
|
6
|
+
end
|
7
|
+
|
8
|
+
class ScannerDefinitionArgumentError < ScannerError
|
9
|
+
end
|
10
|
+
|
11
|
+
class ScannerDefinition
|
12
|
+
attr_reader :name, :regex
|
13
|
+
|
14
|
+
def initialize(name, regex, opts={})
|
15
|
+
if regex.kind_of?(Hash)
|
16
|
+
regex, opts = nil, regex
|
17
|
+
end
|
18
|
+
@name, @regex, @start_condition = name, regex, opts[:start_condition]
|
19
|
+
assert_valid_argument_combination
|
20
|
+
end
|
21
|
+
|
22
|
+
def scanner_code
|
23
|
+
start_condition_string + @name.to_s + regex_to_s
|
24
|
+
end
|
25
|
+
|
26
|
+
def regex_to_s
|
27
|
+
unless @regex.to_s == ""
|
28
|
+
" " + @regex.to_s
|
29
|
+
else
|
30
|
+
""
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
def start_condition_string
|
35
|
+
case @start_condition.to_s
|
36
|
+
when /^inc/
|
37
|
+
"%s "
|
38
|
+
when /^exc/
|
39
|
+
"%x "
|
40
|
+
else
|
41
|
+
""
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
private
|
46
|
+
|
47
|
+
def assert_valid_argument_combination
|
48
|
+
if @start_condition
|
49
|
+
if @regex.to_s != "" # (nil or "").to_s == ""
|
50
|
+
raise ScannerDefinitionArgumentError, "a scanner definition cannot define both a regex and start condition"
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
end
|
56
|
+
|
57
|
+
class ScannerDefinitionGroup < Array
|
58
|
+
|
59
|
+
DEFAULT_DEFINITIONS = {}
|
60
|
+
DEFAULT_DEFINITIONS[:whitespace] = [["WS", '[[:space:]]'],
|
61
|
+
["NON_WS", "([a-z]|[0-9]|[:punct:])"]]
|
62
|
+
DEFAULT_DEFINITIONS[:ip] = [ ["IP4_OCT", "[0-9]|[0-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5]"],
|
63
|
+
["HOST", '([a-z0-9][a-z0-9\-]*\.[a-z0-9][a-z0-9\-]*.[a-z0-9][a-z0-9\-\.]*[a-z]+(\:[0-9]+)?)|localhost']]
|
64
|
+
DEFAULT_DEFINITIONS[:time] = [ ["WDAY", "mon|tue|wed|thu|fri|sat|sun"],
|
65
|
+
["MON", "jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec"],
|
66
|
+
["MONTH_NUM", "0[1-9]|1[0-2]"],
|
67
|
+
["MDAY", "3[0-1]|[1-2][0-9]|0[1-9]"],
|
68
|
+
["HOUR", "2[0-3]|[0-1][0-9]"],
|
69
|
+
["MINSEC", "[0-5][0-9]|60"],
|
70
|
+
["YEAR", "[0-9][0-9][0-9][0-9]"],
|
71
|
+
["PLUSMINUS", '(\+|\-)']]
|
72
|
+
DEFAULT_DEFINITIONS[:web] = [ ["TIMING", %q{[0-9]+\.[0-9]+}],
|
73
|
+
["REL_URL", %q{(\/|\\\\|\.)[a-z0-9\._\~\-\/\?&;#=\%\:\+\[\]\\\\]*}],
|
74
|
+
["PROTO", "(http:|https:)"],
|
75
|
+
["ERR_LVL", "(emerg|alert|crit|err|error|warn|warning|notice|info|debug)"],
|
76
|
+
["HTTP_VERS", 'HTTP\/(1.0|1.1)'],
|
77
|
+
["HTTP_VERB", "(get|head|put|post|delete|trace|connect)"],
|
78
|
+
["HTTPCODE", "(100|101|20[0-6]|30[0-5]|307|40[0-9]|41[0-7]|50[0-5])"],
|
79
|
+
["BROWSER_STR", '\"(moz|msie|lynx|reconnoiter|pingdom)[^"]+\"']]
|
80
|
+
|
81
|
+
def add(name, regex, options={})
|
82
|
+
assert_defn_has_unique_name(name)
|
83
|
+
push ScannerDefinition.new(name, regex, options)
|
84
|
+
end
|
85
|
+
|
86
|
+
def assert_defn_has_unique_name(name)
|
87
|
+
if defn_names.include?(name.to_s)
|
88
|
+
raise DuplicateDefinitionError, "a definition for #{name.to_s} has already been defined"
|
89
|
+
end
|
90
|
+
end
|
91
|
+
|
92
|
+
def defn_names
|
93
|
+
map { |defn_statement| defn_statement.name.to_s }
|
94
|
+
end
|
95
|
+
|
96
|
+
def method_missing(called_method_name, *args, &block)
|
97
|
+
args[1] ||={}
|
98
|
+
add(called_method_name, args[0], args[1])
|
99
|
+
end
|
100
|
+
|
101
|
+
def defaults_for(*default_types)
|
102
|
+
default_types.each do |default_type|
|
103
|
+
unless default_definitions = DEFAULT_DEFINITIONS[default_type]
|
104
|
+
raise InvalidDefaultDefinitionName, "no default definitions found for #{default_type.to_s}"
|
105
|
+
end
|
106
|
+
default_definitions.each do |defn|
|
107
|
+
begin
|
108
|
+
add(defn.first, defn.last)
|
109
|
+
rescue DuplicateDefinitionError
|
110
|
+
end
|
111
|
+
end
|
112
|
+
end
|
113
|
+
end
|
114
|
+
|
115
|
+
|
116
|
+
end
|
117
|
+
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
require "teeth"
|
2
|
+
scanner = Teeth::Scanner.new(:apache_logs, TEETH_EXT_DIR + '/scan_apache_logs/')
|
3
|
+
scanner.load_default_definitions_for(:whitespace, :ip, :time, :web)
|
4
|
+
scanner.rdoc = <<-RDOC
|
5
|
+
Scans self, which is expected to be a single line from an Apache error or
|
6
|
+
access log, and returns a Hash of the components of the log message. The
|
7
|
+
following parts of the log message are returned if they are present:
|
8
|
+
IPv4 address, datetime, HTTP Version used, the browser string given by the
|
9
|
+
client, any absolute or relative URLs, the error level, HTTP response code,
|
10
|
+
HTTP Method (verb), and any other uncategorized strings present.
|
11
|
+
RDOC
|
12
|
+
scanner.rules do |r|
|
13
|
+
r.timing '{TIMING}'
|
14
|
+
r.ipv4_addr '{IP4_OCT}"."{IP4_OCT}"."{IP4_OCT}"."{IP4_OCT}'
|
15
|
+
r.apache_err_datetime '{WDAY}{WS}{MON}{WS}{MDAY}{WS}{HOUR}":"{MINSEC}":"{MINSEC}{WS}{YEAR}'
|
16
|
+
r.apache_access_datetime '{MDAY}\/{MON}\/{YEAR}":"{HOUR}":"{MINSEC}":"{MINSEC}{WS}{PLUSMINUS}{YEAR}'
|
17
|
+
r.http_version '{HTTP_VERS}'
|
18
|
+
r.browser_string '{BROWSER_STR}', :strip_ends => true
|
19
|
+
r.absolute_url '{PROTO}"\/\/"({HOST}|{IP4_OCT}"."{IP4_OCT}"."{IP4_OCT}"."{IP4_OCT})({REL_URL}|"\/")?'
|
20
|
+
r.host '{HOST}'
|
21
|
+
r.relative_url '{REL_URL}'
|
22
|
+
r.error_level '{ERR_LVL}'
|
23
|
+
r.http_response '{HTTPCODE}'
|
24
|
+
r.http_method '{HTTP_VERB}'
|
25
|
+
r.strings '{NON_WS}{NON_WS}*'
|
26
|
+
end
|
27
|
+
|
28
|
+
scanner.write!
|
@@ -0,0 +1,70 @@
|
|
1
|
+
require "teeth"
|
2
|
+
scanner = Teeth::Scanner.new(:rails_logs, TEETH_EXT_DIR + '/scan_rails_logs/')
|
3
|
+
scanner.load_default_definitions_for(:whitespace, :ip, :time, :web)
|
4
|
+
scanner.rdoc = <<-RDOC
|
5
|
+
Scans self, which is expected to be a line from a Rails production or dev log,
|
6
|
+
and returns a Hash of the significant features in the log message, including
|
7
|
+
the IP address of the client, the Controller and Action, any partials rendered,
|
8
|
+
and the time spent rendering them, the duration of the DB request(s), the HTTP
|
9
|
+
verb, etc.
|
10
|
+
RDOC
|
11
|
+
scanner.definitions do |define|
|
12
|
+
define.RAILS_TEASER '(processing|filter\ chain\ halted|rendered)'
|
13
|
+
define.CONTROLLER_ACTION '[a-z0-9]+#[a-z0-9]+'
|
14
|
+
define.RAILS_SKIP_LINES '(session\ id)'
|
15
|
+
define.CACHE_HIT 'actioncontroller"::"caching"::"actions"::"actioncachefilter":"0x[0-9a-f]+'
|
16
|
+
define.PARTIAL_SESSION_ID '^([a-z0-9]+"="*"-"+[a-z0-9]+)'
|
17
|
+
define.RAILS_ERROR_CLASS '([a-z]+\:\:)*[a-z]+error'
|
18
|
+
define.REQUEST_COMPLETED :start_condition => :exclusive
|
19
|
+
define.COMPLETED_REQ_VIEW_STATS :start_condition => :exclusive
|
20
|
+
define.COMPLETED_REQ_DB_STATS :start_condition => :exclusive
|
21
|
+
end
|
22
|
+
scanner.rules do |r|
|
23
|
+
# Processing DashboardController#index (for 1.1.1.1 at 2008-08-14 21:16:25) [GET]
|
24
|
+
r.teaser '{RAILS_TEASER}'
|
25
|
+
r.controller_action '{CONTROLLER_ACTION}'
|
26
|
+
r.ipv4_addr '{IP4_OCT}"."{IP4_OCT}"."{IP4_OCT}"."{IP4_OCT}'
|
27
|
+
r.datetime '{YEAR}"-"{MONTH_NUM}"-"{MDAY}{WS}{HOUR}":"{MINSEC}":"{MINSEC}'
|
28
|
+
r.http_method '{HTTP_VERB}'
|
29
|
+
# Session ID: BAh7CToMcmVmZXJlciIbL3ByaXNjaWxsYS9wZW9wbGUvMjM1MCIKZmxhc2hJ
|
30
|
+
# QzonQWN0aW9uQ29udHJvbGxlcjo6Rmxhc2g6OkZsYXNoSGFzaHsABjoKQHVz ...
|
31
|
+
r.skip_lines '{RAILS_SKIP_LINES}', :skip_line => true
|
32
|
+
r.end_session_id '{PARTIAL_SESSION_ID}'
|
33
|
+
# RuntimeError (Cannot destroy employee): /app/models/employee.rb:198:in `before_destroy'
|
34
|
+
# ActionController::RoutingError (no route found to match "/favicon.ico" with {:method=>:get}):
|
35
|
+
# ActionView::TemplateError (No rhtml, rxml, rjs or delegate template found for /shared/_ids_modal_selection_panel in script/../config/../app/views) on line #2 of app/views/events/index.rhtml:
|
36
|
+
# ActionView::TemplateError (You have a nil object when you didn't expect it!
|
37
|
+
# NoMethodError (undefined method `find' for ActionController::Filters::Filter:Class):
|
38
|
+
r.error '{RAILS_ERROR_CLASS}'
|
39
|
+
r.error_message '\(({WS}|{NON_WS})+\)', :strip_ends => true
|
40
|
+
r.line_number '"#"[0-9]+{WS}', :strip_ends => true
|
41
|
+
r.file_and_line '{WS}{REL_URL}":"', :strip_ends => true
|
42
|
+
# Filter chain halted as [#<ActionController::Caching::Actions::ActionCacheFilter:0x2a999ad620 @check=nil, @options={:store_options=>{}, :layout=>nil, :cache_path=>#<Proc:0x0000002a999b8890@/app/controllers/cached_controller.rb:8>}>] rendered_or_redirected.
|
43
|
+
r.cache_hit '{CACHE_HIT}'
|
44
|
+
# Rendered shared/_analytics (0.2ms)
|
45
|
+
# Rendered layouts/_doc_type (0.00001)
|
46
|
+
r.partial '[a-z0-9]+{REL_URL}/\ \('
|
47
|
+
r.render_duration_ms '[0-9\.]+/ms\)'
|
48
|
+
r.render_duration_s '\([0-9\.]+\)', :strip_ends => true
|
49
|
+
# Completed in 0.21665 (4 reqs/sec) | Rendering: 0.00926 (4%) | DB: 0.00000 (0%) | 200 OK [http://demo.nu/employees]
|
50
|
+
# Completed in 614ms (View: 120, DB: 31) | 200 OK [http://floorplanner.local/demo]
|
51
|
+
r.teaser 'completed\ in', :begin => "REQUEST_COMPLETED"
|
52
|
+
r.duration_s '<REQUEST_COMPLETED>[0-9]+\.[0-9]+'
|
53
|
+
r.duration_ms '<REQUEST_COMPLETED>[0-9]+/ms'
|
54
|
+
r.start_view_stats '<REQUEST_COMPLETED>(View":"|Rendering":")', :begin => "COMPLETED_REQ_VIEW_STATS"
|
55
|
+
r.view_s '<COMPLETED_REQ_VIEW_STATS>([0-9]+\.[0-9]+)', :begin => "REQUEST_COMPLETED"
|
56
|
+
r.view_ms '<COMPLETED_REQ_VIEW_STATS>[0-9]+', :begin => "REQUEST_COMPLETED"
|
57
|
+
r.view_throwaway_tokens '<COMPLETED_REQ_VIEW_STATS>{CATCHALL}', :ignore => true
|
58
|
+
r.start_db_stats '<REQUEST_COMPLETED>DB":"', :begin => "COMPLETED_REQ_DB_STATS"
|
59
|
+
r.db_s '<COMPLETED_REQ_DB_STATS>[0-9]+\.[0-9]+', :begin => "REQUEST_COMPLETED"
|
60
|
+
r.db_ms '<COMPLETED_REQ_DB_STATS>[0-9]+', :begin => "REQUEST_COMPLETED"
|
61
|
+
r.db_throwaway_tokens '<COMPLETED_REQ_DB_STATS>{CATCHALL}', :ignore => true
|
62
|
+
r.url '<REQUEST_COMPLETED>\[{PROTO}"\/\/"({HOST}|({IP4_OCT}"."{IP4_OCT}"."{IP4_OCT}"."{IP4_OCT}))({REL_URL}|"/"|"\\\\")?\]', :strip_ends => true
|
63
|
+
r.http_response '<REQUEST_COMPLETED>{HTTPCODE}'
|
64
|
+
r.strings '<REQUEST_COMPLETED>{NON_WS}{NON_WS}*'
|
65
|
+
r.ignore_others '<REQUEST_COMPLETED>{CATCHALL}', :ignore => true
|
66
|
+
# fallback to collecting strings
|
67
|
+
r.strings '{NON_WS}{NON_WS}*'
|
68
|
+
end
|
69
|
+
|
70
|
+
scanner.write!
|
@@ -0,0 +1,168 @@
|
|
1
|
+
%option prefix="<%= scanner.function_prefix %>"
|
2
|
+
%option full
|
3
|
+
%option never-interactive
|
4
|
+
%option read
|
5
|
+
%option nounput
|
6
|
+
%option noyywrap noreject noyymore nodefault
|
7
|
+
%{
|
8
|
+
#include <ruby.h>
|
9
|
+
#include <uuid/uuid.h>
|
10
|
+
/* Data types */
|
11
|
+
typedef struct {
|
12
|
+
char *key;
|
13
|
+
char *value;
|
14
|
+
} KVPAIR;
|
15
|
+
const KVPAIR EOF_KVPAIR = {"EOF", "EOF"};
|
16
|
+
/* prototypes */
|
17
|
+
char *strip_ends(char *);
|
18
|
+
VALUE <%= scanner.main_function_name %>(VALUE);
|
19
|
+
void new_uuid(char *str_ptr);
|
20
|
+
void raise_error_for_string_too_long(VALUE string);
|
21
|
+
void include_message_in_token_hash(VALUE message, VALUE token_hash);
|
22
|
+
void add_uuid_to_token_hash(VALUE token_hash);
|
23
|
+
void push_kv_pair_to_hash(KVPAIR key_value, VALUE token_hash);
|
24
|
+
void concat_word_to_string(KVPAIR key_value, VALUE token_hash);
|
25
|
+
/* Set the scanner name, and return type */
|
26
|
+
#define YY_DECL KVPAIR <%= scanner.entry_point %>(void)
|
27
|
+
#define yyterminate() return EOF_KVPAIR
|
28
|
+
/* Ruby 1.8 and 1.9 compatibility */
|
29
|
+
#if !defined(RSTRING_LEN)
|
30
|
+
# define RSTRING_LEN(x) (RSTRING(x)->len)
|
31
|
+
# define RSTRING_PTR(x) (RSTRING(x)->ptr)
|
32
|
+
#endif
|
33
|
+
|
34
|
+
%}
|
35
|
+
|
36
|
+
/* Definitions */
|
37
|
+
|
38
|
+
CATCHALL (.|"\n")
|
39
|
+
|
40
|
+
<% scanner.scanner_defns.each do |scanner_defn| %>
|
41
|
+
<%= scanner_defn.scanner_code %>
|
42
|
+
<% end %>
|
43
|
+
|
44
|
+
%%
|
45
|
+
/*
|
46
|
+
Actions
|
47
|
+
*/
|
48
|
+
|
49
|
+
<% scanner.scanner_rules.each do |scanner_rule| %>
|
50
|
+
<%= scanner_rule.scanner_code %>
|
51
|
+
<% end %>
|
52
|
+
{CATCHALL} /* ignore */
|
53
|
+
%%
|
54
|
+
|
55
|
+
char *strip_ends(char *string) {
|
56
|
+
string[yyleng-1] = '\0';
|
57
|
+
++string;
|
58
|
+
return string;
|
59
|
+
}
|
60
|
+
|
61
|
+
void uuid_unparse_upper_sans_dash(const uuid_t uu, char *out)
|
62
|
+
{
|
63
|
+
sprintf(out,
|
64
|
+
"%02X%02X%02X%02X"
|
65
|
+
"%02X%02X"
|
66
|
+
"%02X%02X"
|
67
|
+
"%02X%02X"
|
68
|
+
"%02X%02X%02X%02X%02X%02X",
|
69
|
+
uu[0], uu[1], uu[2], uu[3],
|
70
|
+
uu[4], uu[5],
|
71
|
+
uu[6], uu[7],
|
72
|
+
uu[8], uu[9],
|
73
|
+
uu[10], uu[11], uu[12], uu[13], uu[14], uu[15]);
|
74
|
+
}
|
75
|
+
|
76
|
+
void new_uuid(char *str_ptr){
|
77
|
+
uuid_t new_uuid;
|
78
|
+
uuid_generate_time(new_uuid);
|
79
|
+
uuid_unparse_upper_sans_dash(new_uuid, str_ptr);
|
80
|
+
}
|
81
|
+
|
82
|
+
void raise_error_for_string_too_long(VALUE string){
|
83
|
+
if( RSTRING_LEN(string) > 1000000){
|
84
|
+
rb_raise(rb_eArgError, "string too long for <%=scanner.scanner_name %>! max length is 1,000,000 chars");
|
85
|
+
}
|
86
|
+
}
|
87
|
+
|
88
|
+
<%= scanner.rdoc %>
|
89
|
+
VALUE <%= scanner.main_function_name %>(VALUE self) {
|
90
|
+
KVPAIR kv_result;
|
91
|
+
int scan_complete = 0;
|
92
|
+
int building_words_to_string = 0;
|
93
|
+
VALUE token_hash = rb_hash_new();
|
94
|
+
|
95
|
+
BEGIN(INITIAL);
|
96
|
+
|
97
|
+
/* error out on absurdly large strings */
|
98
|
+
raise_error_for_string_too_long(self);
|
99
|
+
/* {:message => self()} */
|
100
|
+
include_message_in_token_hash(self, token_hash);
|
101
|
+
/* {:id => UUID} */
|
102
|
+
add_uuid_to_token_hash(token_hash);
|
103
|
+
yy_scan_string(RSTRING_PTR(self));
|
104
|
+
while (scan_complete == 0) {
|
105
|
+
kv_result = <%= scanner.entry_point %>();
|
106
|
+
if (kv_result.key == "EOF"){
|
107
|
+
scan_complete = 1;
|
108
|
+
}
|
109
|
+
else if (kv_result.key == "strings"){
|
110
|
+
/* build a string until we get a non-word */
|
111
|
+
if (building_words_to_string == 0){
|
112
|
+
building_words_to_string = 1;
|
113
|
+
push_kv_pair_to_hash(kv_result, token_hash);
|
114
|
+
}
|
115
|
+
else{
|
116
|
+
concat_word_to_string(kv_result, token_hash);
|
117
|
+
}
|
118
|
+
}
|
119
|
+
else {
|
120
|
+
building_words_to_string = 0;
|
121
|
+
push_kv_pair_to_hash(kv_result, token_hash);
|
122
|
+
}
|
123
|
+
}
|
124
|
+
yy_delete_buffer(YY_CURRENT_BUFFER);
|
125
|
+
return rb_obj_dup(token_hash);
|
126
|
+
}
|
127
|
+
|
128
|
+
void add_uuid_to_token_hash(VALUE token_hash) {
|
129
|
+
char new_uuid_str[33];
|
130
|
+
new_uuid(new_uuid_str);
|
131
|
+
VALUE hsh_key_id = ID2SYM(rb_intern("id"));
|
132
|
+
VALUE hsh_val_id = rb_tainted_str_new2(new_uuid_str);
|
133
|
+
rb_hash_aset(token_hash, hsh_key_id, hsh_val_id);
|
134
|
+
}
|
135
|
+
|
136
|
+
void include_message_in_token_hash(VALUE message, VALUE token_hash) {
|
137
|
+
/* {:message => self()} */
|
138
|
+
VALUE hsh_key_msg = ID2SYM(rb_intern("message"));
|
139
|
+
rb_hash_aset(token_hash, hsh_key_msg, message);
|
140
|
+
}
|
141
|
+
|
142
|
+
void concat_word_to_string(KVPAIR key_value, VALUE token_hash) {
|
143
|
+
char * space = " ";
|
144
|
+
VALUE hsh_key = ID2SYM(rb_intern(key_value.key));
|
145
|
+
VALUE hsh_value = rb_hash_aref(token_hash, hsh_key);
|
146
|
+
VALUE string = rb_ary_entry(hsh_value, -1);
|
147
|
+
rb_str_cat(string, space, 1);
|
148
|
+
rb_str_cat(string, key_value.value, yyleng);
|
149
|
+
}
|
150
|
+
|
151
|
+
void push_kv_pair_to_hash(KVPAIR key_value, VALUE token_hash) {
|
152
|
+
VALUE hsh_key = ID2SYM(rb_intern(key_value.key));
|
153
|
+
VALUE hsh_value = rb_hash_aref(token_hash, hsh_key);
|
154
|
+
VALUE ary_for_token_type = rb_ary_new();
|
155
|
+
switch (TYPE(hsh_value)) {
|
156
|
+
case T_NIL:
|
157
|
+
rb_ary_push(ary_for_token_type, rb_tainted_str_new2(key_value.value));
|
158
|
+
rb_hash_aset(token_hash, hsh_key, ary_for_token_type);
|
159
|
+
break;
|
160
|
+
case T_ARRAY:
|
161
|
+
rb_ary_push(hsh_value, rb_tainted_str_new2(key_value.value));
|
162
|
+
break;
|
163
|
+
}
|
164
|
+
}
|
165
|
+
|
166
|
+
void <%=scanner.init_function_name %>() {
|
167
|
+
rb_define_method(rb_cString, "<%= scanner.scanner_name %>", <%= scanner.main_function_name %>, 0);
|
168
|
+
}
|