mongrel_experimental 1.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,73 @@
1
+
2
+ #include "tst.h"
3
+ #include <stdio.h>
4
+ #include <stdlib.h>
5
+ #include <assert.h>
6
+
7
+
8
+ void *tst_search(const unsigned char *key, struct tst *tst, int option,
9
+ unsigned int *match_len)
10
+ {
11
+ struct node *current_node;
12
+ struct node *longest_match = NULL;
13
+ unsigned int longest_match_len = 0;
14
+ int key_index;
15
+
16
+ assert(key != NULL && "key can't be NULL");
17
+ assert(tst != NULL && "tst can't be NULL");
18
+
19
+ if (key[0] == 0)
20
+ return NULL;
21
+
22
+ if (tst->head[(int) key[0]] == NULL)
23
+ return NULL;
24
+
25
+ if (match_len)
26
+ *match_len = 0;
27
+
28
+ current_node = tst->head[(int) key[0]];
29
+ key_index = 1;
30
+
31
+ while (current_node != NULL) {
32
+ if (key[key_index] == current_node->value) {
33
+ if (current_node->value == 0) {
34
+ if (match_len)
35
+ *match_len = key_index;
36
+ return current_node->middle;
37
+ } else {
38
+ current_node = current_node->middle;
39
+ key_index++;
40
+ continue;
41
+ }
42
+ } else {
43
+ if (current_node->value == 0) {
44
+ if (option & TST_LONGEST_MATCH) {
45
+ longest_match = current_node->middle;
46
+ longest_match_len = key_index;
47
+ }
48
+
49
+ if (key[key_index] < 64) {
50
+ current_node = current_node->left;
51
+ continue;
52
+ } else {
53
+ current_node = current_node->right;
54
+ continue;
55
+ }
56
+ } else {
57
+ if (key[key_index] < current_node->value) {
58
+ current_node = current_node->left;
59
+ continue;
60
+ } else {
61
+ current_node = current_node->right;
62
+ continue;
63
+ }
64
+ }
65
+ }
66
+ }
67
+
68
+ if (match_len)
69
+ *match_len = longest_match_len;
70
+
71
+ return longest_match;
72
+
73
+ }
@@ -0,0 +1,216 @@
1
+ /**
2
+ * Copyright (c) 2005 Zed A. Shaw
3
+ * You can redistribute it and/or modify it under the same terms as Ruby.
4
+ */
5
+ #include "ruby.h"
6
+ #include "ext_help.h"
7
+ #include <assert.h>
8
+ #include <string.h>
9
+ #include <ctype.h>
10
+ #include "tst.h"
11
+
12
+ static VALUE mMongrel;
13
+ static VALUE cURIClassifier;
14
+
15
+ #define id_handler_map rb_intern("@handler_map")
16
+
17
+ #define TRIE_INCREASE 30
18
+
19
+ void URIClassifier_free(void *data)
20
+ {
21
+ TRACE();
22
+
23
+ if(data) {
24
+ tst_cleanup((struct tst *)data);
25
+ }
26
+ }
27
+
28
+ VALUE URIClassifier_alloc(VALUE klass)
29
+ {
30
+ VALUE obj;
31
+ struct tst *tst = tst_init(TRIE_INCREASE);
32
+ TRACE();
33
+ assert(tst && "failed to initialize trie structure");
34
+
35
+ obj = Data_Wrap_Struct(klass, NULL, URIClassifier_free, tst);
36
+
37
+ return obj;
38
+ }
39
+
40
+ /**
41
+ * call-seq:
42
+ * URIClassifier.new -> URIClassifier
43
+ *
44
+ * Initializes a new URIClassifier object that you can use to associate URI sequences
45
+ * with objects. You can actually use it with any string sequence and any objects,
46
+ * but it's mostly used with URIs.
47
+ *
48
+ * It uses TST from http://www.octavian.org/cs/software.html to build an ternary search
49
+ * trie to hold all of the URIs. It uses this to do an initial search for the a URI
50
+ * prefix, and then to break the URI into SCRIPT_NAME and PATH_INFO portions. It actually
51
+ * will do two searches most of the time in order to find the right handler for the
52
+ * registered prefix portion.
53
+ *
54
+ */
55
+ VALUE URIClassifier_init(VALUE self)
56
+ {
57
+ VALUE hash;
58
+
59
+ /* we create an internal hash to protect stuff from the GC */
60
+ hash = rb_hash_new();
61
+ rb_ivar_set(self, id_handler_map, hash);
62
+
63
+ return self;
64
+ }
65
+
66
+
67
+ /**
68
+ * call-seq:
69
+ * uc.register("/someuri", SampleHandler.new) -> nil
70
+ *
71
+ * Registers the SampleHandler (one for all requests) with the "/someuri".
72
+ * When URIClassifier::resolve is called with "/someuri" it'll return
73
+ * SampleHandler immediately. When called with "/someuri/iwant" it'll also
74
+ * return SomeHandler immediatly, with no additional searches, but it will
75
+ * return path info with "/iwant".
76
+ *
77
+ * You actually can reuse this class to register nearly anything and
78
+ * quickly resolve it. This could be used for caching, fast mapping, etc.
79
+ * The downside is it uses much more memory than a Hash, but it can be
80
+ * a lot faster. It's main advantage is that it works on prefixes, which
81
+ * is damn hard to get right with a Hash.
82
+ */
83
+ VALUE URIClassifier_register(VALUE self, VALUE uri, VALUE handler)
84
+ {
85
+ int rc = 0;
86
+ void *ptr = NULL;
87
+ struct tst *tst = NULL;
88
+ DATA_GET(self, struct tst, tst);
89
+
90
+ rc = tst_insert((unsigned char *)StringValueCStr(uri), (void *)handler , tst, 0, &ptr);
91
+
92
+ if(rc == TST_DUPLICATE_KEY) {
93
+ rb_raise(rb_eStandardError, "Handler already registered with that name");
94
+ } else if(rc == TST_ERROR) {
95
+ rb_raise(rb_eStandardError, "Memory error registering handler");
96
+ } else if(rc == TST_NULL_KEY) {
97
+ rb_raise(rb_eStandardError, "URI was empty");
98
+ }
99
+
100
+ rb_hash_aset(rb_ivar_get(self, id_handler_map), uri, handler);
101
+
102
+ return Qnil;
103
+ }
104
+
105
+
106
+ /**
107
+ * call-seq:
108
+ * uc.unregister("/someuri")
109
+ *
110
+ * Yep, just removes this uri and it's handler from the trie.
111
+ */
112
+ VALUE URIClassifier_unregister(VALUE self, VALUE uri)
113
+ {
114
+ void *handler = NULL;
115
+ struct tst *tst = NULL;
116
+ DATA_GET(self, struct tst, tst);
117
+
118
+ handler = tst_delete((unsigned char *)StringValueCStr(uri), tst);
119
+
120
+ if(handler) {
121
+ rb_hash_delete(rb_ivar_get(self, id_handler_map), uri);
122
+
123
+ return (VALUE)handler;
124
+ } else {
125
+ return Qnil;
126
+ }
127
+ }
128
+
129
+
130
+ /**
131
+ * call-seq:
132
+ * uc.resolve("/someuri") -> "/someuri", "", handler
133
+ * uc.resolve("/someuri/pathinfo") -> "/someuri", "/pathinfo", handler
134
+ * uc.resolve("/notfound/orhere") -> nil, nil, nil
135
+ * uc.resolve("/") -> "/", "/", handler # if uc.register("/", handler)
136
+ * uc.resolve("/path/from/root") -> "/", "/path/from/root", handler # if uc.register("/", handler)
137
+ *
138
+ * Attempts to resolve either the whole URI or at the longest prefix, returning
139
+ * the prefix (as script_info), path (as path_info), and registered handler
140
+ * (usually an HttpHandler). If it doesn't find a handler registered at the longest
141
+ * match then it returns nil,nil,nil.
142
+ *
143
+ * Because the resolver uses a trie you are able to register a handler at *any* character
144
+ * in the URI and it will be handled as long as it's the longest prefix. So, if you
145
+ * registered handler #1 at "/something/lik", and #2 at "/something/like/that", then a
146
+ * a search for "/something/like" would give you #1. A search for "/something/like/that/too"
147
+ * would give you #2.
148
+ *
149
+ * This is very powerful since it means you can also attach handlers to parts of the ;
150
+ * (semi-colon) separated path params, any part of the path, use off chars, anything really.
151
+ * It also means that it's very efficient to do this only taking as long as the URI has
152
+ * characters.
153
+ *
154
+ * A slight modification to the CGI 1.2 standard is given for handlers registered to "/".
155
+ * CGI expects all CGI scripts to be at some script path, so it doesn't really say anything
156
+ * about a script that handles the root. To make this work, the resolver will detect that
157
+ * the requested handler is at "/", and return that for script_name, and then simply return
158
+ * the full URI back as path_info.
159
+ *
160
+ * It expects strings with no embedded '\0' characters. Don't try other string-like stuff yet.
161
+ */
162
+ VALUE URIClassifier_resolve(VALUE self, VALUE uri)
163
+ {
164
+ void *handler = NULL;
165
+ int pref_len = 0;
166
+ struct tst *tst = NULL;
167
+ VALUE result;
168
+ unsigned char *uri_str = NULL;
169
+
170
+ DATA_GET(self, struct tst, tst);
171
+ uri_str = (unsigned char *)StringValueCStr(uri);
172
+
173
+ handler = tst_search(uri_str, tst, TST_LONGEST_MATCH, &pref_len);
174
+
175
+ /* setup for multiple return values */
176
+ result = rb_ary_new();
177
+
178
+ if(handler) {
179
+ rb_ary_push(result, rb_str_substr (uri, 0, pref_len));
180
+ /* compensate for a script_name="/" where we need to add the "/" to path_info to keep it consistent */
181
+ if(pref_len == 1 && uri_str[0] == '/') {
182
+ /* matches the root URI so we have to use the whole URI as the path_info */
183
+ rb_ary_push(result, uri);
184
+ } else {
185
+ /* matches a script so process like normal */
186
+ rb_ary_push(result, rb_str_substr(uri, pref_len, RSTRING(uri)->len));
187
+ }
188
+
189
+ rb_ary_push(result, (VALUE)handler);
190
+ } else {
191
+ /* not found so push back nothing */
192
+ rb_ary_push(result, Qnil);
193
+ rb_ary_push(result, Qnil);
194
+ rb_ary_push(result, Qnil);
195
+ }
196
+
197
+ return result;
198
+ }
199
+
200
+ VALUE URIClassifier_uris(VALUE self) {
201
+ return rb_funcall(rb_ivar_get(self, id_handler_map), rb_intern("keys"), 0);
202
+ }
203
+
204
+ void Init_uri_classifier()
205
+ {
206
+
207
+ mMongrel = rb_define_module("Mongrel");
208
+
209
+ cURIClassifier = rb_define_class_under(mMongrel, "URIClassifier", rb_cObject);
210
+ rb_define_alloc_func(cURIClassifier, URIClassifier_alloc);
211
+ rb_define_method(cURIClassifier, "initialize", URIClassifier_init, 0);
212
+ rb_define_method(cURIClassifier, "register", URIClassifier_register, 2);
213
+ rb_define_method(cURIClassifier, "unregister", URIClassifier_unregister, 1);
214
+ rb_define_method(cURIClassifier, "resolve", URIClassifier_resolve, 1);
215
+ rb_define_method(cURIClassifier, "uris", URIClassifier_uris, 0);
216
+ }
@@ -0,0 +1,3 @@
1
+
2
+ require 'uri_classifier'
3
+ STDERR.puts "** Mongrel_experimental loaded"
@@ -0,0 +1,44 @@
1
+
2
+ # Gem::Specification for Mongrel_experimental-1.1
3
+ # Originally generated by Echoe
4
+
5
+ Gem::Specification.new do |s|
6
+ s.name = %q{mongrel_experimental}
7
+ s.version = "1.1"
8
+
9
+ s.specification_version = 2 if s.respond_to? :specification_version=
10
+
11
+ s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
12
+ s.authors = ["The Mongrel Team"]
13
+ s.date = %q{2007-11-01}
14
+ s.description = %q{Backports and experimental features for Mongrel.}
15
+ s.email = %q{}
16
+ s.extensions = ["ext/uri_classifier/extconf.rb"]
17
+ s.files = ["CHANGELOG", "COPYING", "ext/uri_classifier/ext_help.h", "ext/uri_classifier/extconf.rb", "ext/uri_classifier/tst.h", "ext/uri_classifier/tst_cleanup.c", "ext/uri_classifier/tst_delete.c", "ext/uri_classifier/tst_grow_node_free_list.c", "ext/uri_classifier/tst_init.c", "ext/uri_classifier/tst_insert.c", "ext/uri_classifier/tst_search.c", "ext/uri_classifier/uri_classifier.c", "lib/mongrel_experimental.rb", "LICENSE", "Manifest", "README", "test/test_uriclassifier.rb", "mongrel_experimental.gemspec"]
18
+ s.has_rdoc = true
19
+ s.homepage = %q{}
20
+ s.require_paths = ["lib", "ext"]
21
+ s.rubyforge_project = %q{mongrel}
22
+ s.rubygems_version = %q{0.9.4.6}
23
+ s.summary = %q{Backports and experimental features for Mongrel.}
24
+ s.test_files = ["test/test_uriclassifier.rb"]
25
+
26
+ s.add_dependency(%q<mongrel>, ["= 1.1"])
27
+ end
28
+
29
+
30
+ # # Original Rakefile source (requires the Echoe gem):
31
+ #
32
+ #
33
+ # require 'echoe'
34
+ #
35
+ # Echoe.new("mongrel_experimental") do |p|
36
+ # p.summary = "Backports and experimental features for Mongrel."
37
+ # p.project = "mongrel"
38
+ # p.author="The Mongrel Team"
39
+ # p.dependencies = ['mongrel =1.1']
40
+ #
41
+ # p.need_tar_gz = false
42
+ # p.need_tgz = true
43
+ # p.require_signed = true
44
+ # end
@@ -0,0 +1,262 @@
1
+ # Copyright (c) 2005 Zed A. Shaw
2
+ # You can redistribute it and/or modify it under the same terms as Ruby.
3
+ #
4
+ # Additional work donated by contributors. See http://mongrel.rubyforge.org/attributions.html
5
+ # for more information.
6
+
7
+ require 'lib/uri_classifier'
8
+ require 'test/unit'
9
+
10
+ include Mongrel
11
+
12
+ class URIClassifierTest < Test::Unit::TestCase
13
+
14
+ def test_uri_finding
15
+ uri_classifier = URIClassifier.new
16
+ uri_classifier.register("/test", 1)
17
+
18
+ script_name, path_info, value = uri_classifier.resolve("/test")
19
+ assert_equal 1, value
20
+ assert_equal "/test", script_name
21
+ end
22
+
23
+ def test_root_handler_only
24
+ uri_classifier = URIClassifier.new
25
+ uri_classifier.register("/", 1)
26
+
27
+ script_name, path_info, value = uri_classifier.resolve("/test")
28
+ assert_equal 1, value
29
+ assert_equal "/", script_name
30
+ assert_equal "/test", path_info
31
+ end
32
+
33
+ def test_uri_prefix_ops
34
+ test = "/pre/fix/test"
35
+ prefix = "/pre"
36
+
37
+ uri_classifier = URIClassifier.new
38
+ uri_classifier.register(prefix,1)
39
+
40
+ script_name, path_info, value = uri_classifier.resolve(prefix)
41
+ script_name, path_info, value = uri_classifier.resolve(test)
42
+ assert_equal 1, value
43
+ assert_equal prefix, script_name
44
+ assert_equal test[script_name.length .. -1], path_info
45
+
46
+ assert uri_classifier.inspect
47
+ assert_equal prefix, uri_classifier.uris[0]
48
+ end
49
+
50
+ def test_not_finding
51
+ test = "/cant/find/me"
52
+ uri_classifier = URIClassifier.new
53
+ uri_classifier.register(test, 1)
54
+
55
+ script_name, path_info, value = uri_classifier.resolve("/nope/not/here")
56
+ assert_nil script_name
57
+ assert_nil path_info
58
+ assert_nil value
59
+ end
60
+
61
+ def test_exceptions
62
+ uri_classifier = URIClassifier.new
63
+
64
+ uri_classifier.register("/test", 1)
65
+
66
+ failed = false
67
+ begin
68
+ uri_classifier.register("/test", 1)
69
+ rescue => e
70
+ failed = true
71
+ end
72
+
73
+ assert failed
74
+
75
+ failed = false
76
+ begin
77
+ uri_classifier.register("", 1)
78
+ rescue => e
79
+ failed = true
80
+ end
81
+
82
+ assert failed
83
+ end
84
+
85
+
86
+ def test_register_unregister
87
+ uri_classifier = URIClassifier.new
88
+
89
+ 100.times do
90
+ uri_classifier.register("/stuff", 1)
91
+ value = uri_classifier.unregister("/stuff")
92
+ assert_equal 1, value
93
+ end
94
+
95
+ uri_classifier.register("/things",1)
96
+ script_name, path_info, value = uri_classifier.resolve("/things")
97
+ assert_equal 1, value
98
+
99
+ uri_classifier.unregister("/things")
100
+ script_name, path_info, value = uri_classifier.resolve("/things")
101
+ assert_nil value
102
+
103
+ end
104
+
105
+
106
+ def test_uri_branching
107
+ uri_classifier = URIClassifier.new
108
+ uri_classifier.register("/test", 1)
109
+ uri_classifier.register("/test/this",2)
110
+
111
+ script_name, path_info, handler = uri_classifier.resolve("/test")
112
+ script_name, path_info, handler = uri_classifier.resolve("/test/that")
113
+ assert_equal "/test", script_name, "failed to properly find script off branch portion of uri"
114
+ assert_equal "/that", path_info
115
+ assert_equal 1, handler, "wrong result for branching uri"
116
+ end
117
+
118
+ def test_all_prefixing
119
+ tests = ["/test","/test/that","/test/this"]
120
+ uri = "/test/this/that"
121
+ uri_classifier = URIClassifier.new
122
+
123
+ current = ""
124
+ uri.each_byte do |c|
125
+ current << c.chr
126
+ uri_classifier.register(current, c)
127
+ end
128
+
129
+
130
+ # Try to resolve everything with no asserts as a fuzzing
131
+ tests.each do |prefix|
132
+ current = ""
133
+ prefix.each_byte do |c|
134
+ current << c.chr
135
+ script_name, path_info, handler = uri_classifier.resolve(current)
136
+ assert script_name
137
+ assert path_info
138
+ assert handler
139
+ end
140
+ end
141
+
142
+ # Assert that we find stuff
143
+ tests.each do |t|
144
+ script_name, path_info, handler = uri_classifier.resolve(t)
145
+ assert handler
146
+ end
147
+
148
+ # Assert we don't find stuff
149
+ script_name, path_info, handler = uri_classifier.resolve("chicken")
150
+ assert_nil handler
151
+ assert_nil script_name
152
+ assert_nil path_info
153
+ end
154
+
155
+
156
+ # Verifies that a root mounted ("/") handler resolves
157
+ # such that path info matches the original URI.
158
+ # This is needed to accommodate real usage of handlers.
159
+ def test_root_mounted
160
+ uri_classifier = URIClassifier.new
161
+ root = "/"
162
+ path = "/this/is/a/test"
163
+
164
+ uri_classifier.register(root, 1)
165
+
166
+ script_name, path_info, handler = uri_classifier.resolve(root)
167
+ assert_equal 1, handler
168
+ assert_equal root, path_info
169
+ assert_equal root, script_name
170
+
171
+ script_name, path_info, handler = uri_classifier.resolve(path)
172
+ assert_equal path, path_info
173
+ assert_equal root, script_name
174
+ assert_equal 1, handler
175
+ end
176
+
177
+ # Verifies that a root mounted ("/") handler
178
+ # is the default point, doesn't matter the order we use
179
+ # to register the URIs
180
+ def test_classifier_order
181
+ tests = ["/before", "/way_past"]
182
+ root = "/"
183
+ path = "/path"
184
+
185
+ uri_classifier = URIClassifier.new
186
+ uri_classifier.register(path, 1)
187
+ uri_classifier.register(root, 2)
188
+
189
+ tests.each do |uri|
190
+ script_name, path_info, handler = uri_classifier.resolve(uri)
191
+ assert_equal root, script_name, "#{uri} did not resolve to #{root}"
192
+ assert_equal uri, path_info
193
+ assert_equal 2, handler
194
+ end
195
+ end
196
+
197
+ if ENV['BENCHMARK']
198
+ # Eventually we will have a suite of benchmarks instead of lamely installing a test
199
+
200
+ def test_benchmark
201
+
202
+ # This URI set should favor a TST. Both versions increase linearly until you hit 14
203
+ # URIs, then the TST flattens out.
204
+ @uris = %w(
205
+ /
206
+ /dag /dig /digbark /dog /dogbark /dog/bark /dug /dugbarking /puppy
207
+ /c /cat /cat/tree /cat/tree/mulberry /cats /cot /cot/tree/mulberry /kitty /kittycat
208
+ # /eag /eig /eigbark /eog /eogbark /eog/bark /eug /eugbarking /iuppy
209
+ # /f /fat /fat/tree /fat/tree/mulberry /fats /fot /fot/tree/mulberry /jitty /jittyfat
210
+ # /gag /gig /gigbark /gog /gogbark /gog/bark /gug /gugbarking /kuppy
211
+ # /h /hat /hat/tree /hat/tree/mulberry /hats /hot /hot/tree/mulberry /litty /littyhat
212
+ # /ceag /ceig /ceigbark /ceog /ceogbark /ceog/cbark /ceug /ceugbarking /ciuppy
213
+ # /cf /cfat /cfat/ctree /cfat/ctree/cmulberry /cfats /cfot /cfot/ctree/cmulberry /cjitty /cjittyfat
214
+ # /cgag /cgig /cgigbark /cgog /cgogbark /cgog/cbark /cgug /cgugbarking /ckuppy
215
+ # /ch /chat /chat/ctree /chat/ctree/cmulberry /chats /chot /chot/ctree/cmulberry /citty /cittyhat
216
+ )
217
+
218
+ @requests = %w(
219
+ /
220
+ /dig
221
+ /digging
222
+ /dogging
223
+ /dogbarking/
224
+ /puppy/barking
225
+ /c
226
+ /cat
227
+ /cat/shrub
228
+ /cat/tree
229
+ /cat/tree/maple
230
+ /cat/tree/mulberry/tree
231
+ /cat/tree/oak
232
+ /cats/
233
+ /cats/tree
234
+ /cod
235
+ /zebra
236
+ )
237
+
238
+ @classifier = URIClassifier.new
239
+ @uris.each do |uri|
240
+ @classifier.register(uri, 1)
241
+ end
242
+
243
+ puts "#{@uris.size} URIs / #{@requests.size * 10000} requests"
244
+
245
+ Benchmark.bm do |x|
246
+ x.report do
247
+ # require 'ruby-prof'
248
+ # profile = RubyProf.profile do
249
+ 10000.times do
250
+ @requests.each do |request|
251
+ @classifier.resolve(request)
252
+ end
253
+ end
254
+ # end
255
+ # File.open("profile.html", 'w') { |file| RubyProf::GraphHtmlPrinter.new(profile).print(file, 0) }
256
+ end
257
+ end
258
+ end
259
+ end
260
+
261
+ end
262
+