mongrel_experimental 1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,73 @@
1
+
2
+ #include "tst.h"
3
+ #include <stdio.h>
4
+ #include <stdlib.h>
5
+ #include <assert.h>
6
+
7
+
8
+ void *tst_search(const unsigned char *key, struct tst *tst, int option,
9
+ unsigned int *match_len)
10
+ {
11
+ struct node *current_node;
12
+ struct node *longest_match = NULL;
13
+ unsigned int longest_match_len = 0;
14
+ int key_index;
15
+
16
+ assert(key != NULL && "key can't be NULL");
17
+ assert(tst != NULL && "tst can't be NULL");
18
+
19
+ if (key[0] == 0)
20
+ return NULL;
21
+
22
+ if (tst->head[(int) key[0]] == NULL)
23
+ return NULL;
24
+
25
+ if (match_len)
26
+ *match_len = 0;
27
+
28
+ current_node = tst->head[(int) key[0]];
29
+ key_index = 1;
30
+
31
+ while (current_node != NULL) {
32
+ if (key[key_index] == current_node->value) {
33
+ if (current_node->value == 0) {
34
+ if (match_len)
35
+ *match_len = key_index;
36
+ return current_node->middle;
37
+ } else {
38
+ current_node = current_node->middle;
39
+ key_index++;
40
+ continue;
41
+ }
42
+ } else {
43
+ if (current_node->value == 0) {
44
+ if (option & TST_LONGEST_MATCH) {
45
+ longest_match = current_node->middle;
46
+ longest_match_len = key_index;
47
+ }
48
+
49
+ if (key[key_index] < 64) {
50
+ current_node = current_node->left;
51
+ continue;
52
+ } else {
53
+ current_node = current_node->right;
54
+ continue;
55
+ }
56
+ } else {
57
+ if (key[key_index] < current_node->value) {
58
+ current_node = current_node->left;
59
+ continue;
60
+ } else {
61
+ current_node = current_node->right;
62
+ continue;
63
+ }
64
+ }
65
+ }
66
+ }
67
+
68
+ if (match_len)
69
+ *match_len = longest_match_len;
70
+
71
+ return longest_match;
72
+
73
+ }
@@ -0,0 +1,216 @@
1
+ /**
2
+ * Copyright (c) 2005 Zed A. Shaw
3
+ * You can redistribute it and/or modify it under the same terms as Ruby.
4
+ */
5
+ #include "ruby.h"
6
+ #include "ext_help.h"
7
+ #include <assert.h>
8
+ #include <string.h>
9
+ #include <ctype.h>
10
+ #include "tst.h"
11
+
12
+ static VALUE mMongrel;
13
+ static VALUE cURIClassifier;
14
+
15
+ #define id_handler_map rb_intern("@handler_map")
16
+
17
+ #define TRIE_INCREASE 30
18
+
19
+ void URIClassifier_free(void *data)
20
+ {
21
+ TRACE();
22
+
23
+ if(data) {
24
+ tst_cleanup((struct tst *)data);
25
+ }
26
+ }
27
+
28
+ VALUE URIClassifier_alloc(VALUE klass)
29
+ {
30
+ VALUE obj;
31
+ struct tst *tst = tst_init(TRIE_INCREASE);
32
+ TRACE();
33
+ assert(tst && "failed to initialize trie structure");
34
+
35
+ obj = Data_Wrap_Struct(klass, NULL, URIClassifier_free, tst);
36
+
37
+ return obj;
38
+ }
39
+
40
+ /**
41
+ * call-seq:
42
+ * URIClassifier.new -> URIClassifier
43
+ *
44
+ * Initializes a new URIClassifier object that you can use to associate URI sequences
45
+ * with objects. You can actually use it with any string sequence and any objects,
46
+ * but it's mostly used with URIs.
47
+ *
48
+ * It uses TST from http://www.octavian.org/cs/software.html to build an ternary search
49
+ * trie to hold all of the URIs. It uses this to do an initial search for the a URI
50
+ * prefix, and then to break the URI into SCRIPT_NAME and PATH_INFO portions. It actually
51
+ * will do two searches most of the time in order to find the right handler for the
52
+ * registered prefix portion.
53
+ *
54
+ */
55
+ VALUE URIClassifier_init(VALUE self)
56
+ {
57
+ VALUE hash;
58
+
59
+ /* we create an internal hash to protect stuff from the GC */
60
+ hash = rb_hash_new();
61
+ rb_ivar_set(self, id_handler_map, hash);
62
+
63
+ return self;
64
+ }
65
+
66
+
67
+ /**
68
+ * call-seq:
69
+ * uc.register("/someuri", SampleHandler.new) -> nil
70
+ *
71
+ * Registers the SampleHandler (one for all requests) with the "/someuri".
72
+ * When URIClassifier::resolve is called with "/someuri" it'll return
73
+ * SampleHandler immediately. When called with "/someuri/iwant" it'll also
74
+ * return SomeHandler immediatly, with no additional searches, but it will
75
+ * return path info with "/iwant".
76
+ *
77
+ * You actually can reuse this class to register nearly anything and
78
+ * quickly resolve it. This could be used for caching, fast mapping, etc.
79
+ * The downside is it uses much more memory than a Hash, but it can be
80
+ * a lot faster. It's main advantage is that it works on prefixes, which
81
+ * is damn hard to get right with a Hash.
82
+ */
83
+ VALUE URIClassifier_register(VALUE self, VALUE uri, VALUE handler)
84
+ {
85
+ int rc = 0;
86
+ void *ptr = NULL;
87
+ struct tst *tst = NULL;
88
+ DATA_GET(self, struct tst, tst);
89
+
90
+ rc = tst_insert((unsigned char *)StringValueCStr(uri), (void *)handler , tst, 0, &ptr);
91
+
92
+ if(rc == TST_DUPLICATE_KEY) {
93
+ rb_raise(rb_eStandardError, "Handler already registered with that name");
94
+ } else if(rc == TST_ERROR) {
95
+ rb_raise(rb_eStandardError, "Memory error registering handler");
96
+ } else if(rc == TST_NULL_KEY) {
97
+ rb_raise(rb_eStandardError, "URI was empty");
98
+ }
99
+
100
+ rb_hash_aset(rb_ivar_get(self, id_handler_map), uri, handler);
101
+
102
+ return Qnil;
103
+ }
104
+
105
+
106
+ /**
107
+ * call-seq:
108
+ * uc.unregister("/someuri")
109
+ *
110
+ * Yep, just removes this uri and it's handler from the trie.
111
+ */
112
+ VALUE URIClassifier_unregister(VALUE self, VALUE uri)
113
+ {
114
+ void *handler = NULL;
115
+ struct tst *tst = NULL;
116
+ DATA_GET(self, struct tst, tst);
117
+
118
+ handler = tst_delete((unsigned char *)StringValueCStr(uri), tst);
119
+
120
+ if(handler) {
121
+ rb_hash_delete(rb_ivar_get(self, id_handler_map), uri);
122
+
123
+ return (VALUE)handler;
124
+ } else {
125
+ return Qnil;
126
+ }
127
+ }
128
+
129
+
130
+ /**
131
+ * call-seq:
132
+ * uc.resolve("/someuri") -> "/someuri", "", handler
133
+ * uc.resolve("/someuri/pathinfo") -> "/someuri", "/pathinfo", handler
134
+ * uc.resolve("/notfound/orhere") -> nil, nil, nil
135
+ * uc.resolve("/") -> "/", "/", handler # if uc.register("/", handler)
136
+ * uc.resolve("/path/from/root") -> "/", "/path/from/root", handler # if uc.register("/", handler)
137
+ *
138
+ * Attempts to resolve either the whole URI or at the longest prefix, returning
139
+ * the prefix (as script_info), path (as path_info), and registered handler
140
+ * (usually an HttpHandler). If it doesn't find a handler registered at the longest
141
+ * match then it returns nil,nil,nil.
142
+ *
143
+ * Because the resolver uses a trie you are able to register a handler at *any* character
144
+ * in the URI and it will be handled as long as it's the longest prefix. So, if you
145
+ * registered handler #1 at "/something/lik", and #2 at "/something/like/that", then a
146
+ * a search for "/something/like" would give you #1. A search for "/something/like/that/too"
147
+ * would give you #2.
148
+ *
149
+ * This is very powerful since it means you can also attach handlers to parts of the ;
150
+ * (semi-colon) separated path params, any part of the path, use off chars, anything really.
151
+ * It also means that it's very efficient to do this only taking as long as the URI has
152
+ * characters.
153
+ *
154
+ * A slight modification to the CGI 1.2 standard is given for handlers registered to "/".
155
+ * CGI expects all CGI scripts to be at some script path, so it doesn't really say anything
156
+ * about a script that handles the root. To make this work, the resolver will detect that
157
+ * the requested handler is at "/", and return that for script_name, and then simply return
158
+ * the full URI back as path_info.
159
+ *
160
+ * It expects strings with no embedded '\0' characters. Don't try other string-like stuff yet.
161
+ */
162
+ VALUE URIClassifier_resolve(VALUE self, VALUE uri)
163
+ {
164
+ void *handler = NULL;
165
+ int pref_len = 0;
166
+ struct tst *tst = NULL;
167
+ VALUE result;
168
+ unsigned char *uri_str = NULL;
169
+
170
+ DATA_GET(self, struct tst, tst);
171
+ uri_str = (unsigned char *)StringValueCStr(uri);
172
+
173
+ handler = tst_search(uri_str, tst, TST_LONGEST_MATCH, &pref_len);
174
+
175
+ /* setup for multiple return values */
176
+ result = rb_ary_new();
177
+
178
+ if(handler) {
179
+ rb_ary_push(result, rb_str_substr (uri, 0, pref_len));
180
+ /* compensate for a script_name="/" where we need to add the "/" to path_info to keep it consistent */
181
+ if(pref_len == 1 && uri_str[0] == '/') {
182
+ /* matches the root URI so we have to use the whole URI as the path_info */
183
+ rb_ary_push(result, uri);
184
+ } else {
185
+ /* matches a script so process like normal */
186
+ rb_ary_push(result, rb_str_substr(uri, pref_len, RSTRING(uri)->len));
187
+ }
188
+
189
+ rb_ary_push(result, (VALUE)handler);
190
+ } else {
191
+ /* not found so push back nothing */
192
+ rb_ary_push(result, Qnil);
193
+ rb_ary_push(result, Qnil);
194
+ rb_ary_push(result, Qnil);
195
+ }
196
+
197
+ return result;
198
+ }
199
+
200
+ VALUE URIClassifier_uris(VALUE self) {
201
+ return rb_funcall(rb_ivar_get(self, id_handler_map), rb_intern("keys"), 0);
202
+ }
203
+
204
+ void Init_uri_classifier()
205
+ {
206
+
207
+ mMongrel = rb_define_module("Mongrel");
208
+
209
+ cURIClassifier = rb_define_class_under(mMongrel, "URIClassifier", rb_cObject);
210
+ rb_define_alloc_func(cURIClassifier, URIClassifier_alloc);
211
+ rb_define_method(cURIClassifier, "initialize", URIClassifier_init, 0);
212
+ rb_define_method(cURIClassifier, "register", URIClassifier_register, 2);
213
+ rb_define_method(cURIClassifier, "unregister", URIClassifier_unregister, 1);
214
+ rb_define_method(cURIClassifier, "resolve", URIClassifier_resolve, 1);
215
+ rb_define_method(cURIClassifier, "uris", URIClassifier_uris, 0);
216
+ }
@@ -0,0 +1,3 @@
1
+
2
+ require 'uri_classifier'
3
+ STDERR.puts "** Mongrel_experimental loaded"
@@ -0,0 +1,44 @@
1
+
2
+ # Gem::Specification for Mongrel_experimental-1.1
3
+ # Originally generated by Echoe
4
+
5
+ Gem::Specification.new do |s|
6
+ s.name = %q{mongrel_experimental}
7
+ s.version = "1.1"
8
+
9
+ s.specification_version = 2 if s.respond_to? :specification_version=
10
+
11
+ s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
12
+ s.authors = ["The Mongrel Team"]
13
+ s.date = %q{2007-11-01}
14
+ s.description = %q{Backports and experimental features for Mongrel.}
15
+ s.email = %q{}
16
+ s.extensions = ["ext/uri_classifier/extconf.rb"]
17
+ s.files = ["CHANGELOG", "COPYING", "ext/uri_classifier/ext_help.h", "ext/uri_classifier/extconf.rb", "ext/uri_classifier/tst.h", "ext/uri_classifier/tst_cleanup.c", "ext/uri_classifier/tst_delete.c", "ext/uri_classifier/tst_grow_node_free_list.c", "ext/uri_classifier/tst_init.c", "ext/uri_classifier/tst_insert.c", "ext/uri_classifier/tst_search.c", "ext/uri_classifier/uri_classifier.c", "lib/mongrel_experimental.rb", "LICENSE", "Manifest", "README", "test/test_uriclassifier.rb", "mongrel_experimental.gemspec"]
18
+ s.has_rdoc = true
19
+ s.homepage = %q{}
20
+ s.require_paths = ["lib", "ext"]
21
+ s.rubyforge_project = %q{mongrel}
22
+ s.rubygems_version = %q{0.9.4.6}
23
+ s.summary = %q{Backports and experimental features for Mongrel.}
24
+ s.test_files = ["test/test_uriclassifier.rb"]
25
+
26
+ s.add_dependency(%q<mongrel>, ["= 1.1"])
27
+ end
28
+
29
+
30
+ # # Original Rakefile source (requires the Echoe gem):
31
+ #
32
+ #
33
+ # require 'echoe'
34
+ #
35
+ # Echoe.new("mongrel_experimental") do |p|
36
+ # p.summary = "Backports and experimental features for Mongrel."
37
+ # p.project = "mongrel"
38
+ # p.author="The Mongrel Team"
39
+ # p.dependencies = ['mongrel =1.1']
40
+ #
41
+ # p.need_tar_gz = false
42
+ # p.need_tgz = true
43
+ # p.require_signed = true
44
+ # end
@@ -0,0 +1,262 @@
1
+ # Copyright (c) 2005 Zed A. Shaw
2
+ # You can redistribute it and/or modify it under the same terms as Ruby.
3
+ #
4
+ # Additional work donated by contributors. See http://mongrel.rubyforge.org/attributions.html
5
+ # for more information.
6
+
7
+ require 'lib/uri_classifier'
8
+ require 'test/unit'
9
+
10
+ include Mongrel
11
+
12
+ class URIClassifierTest < Test::Unit::TestCase
13
+
14
+ def test_uri_finding
15
+ uri_classifier = URIClassifier.new
16
+ uri_classifier.register("/test", 1)
17
+
18
+ script_name, path_info, value = uri_classifier.resolve("/test")
19
+ assert_equal 1, value
20
+ assert_equal "/test", script_name
21
+ end
22
+
23
+ def test_root_handler_only
24
+ uri_classifier = URIClassifier.new
25
+ uri_classifier.register("/", 1)
26
+
27
+ script_name, path_info, value = uri_classifier.resolve("/test")
28
+ assert_equal 1, value
29
+ assert_equal "/", script_name
30
+ assert_equal "/test", path_info
31
+ end
32
+
33
+ def test_uri_prefix_ops
34
+ test = "/pre/fix/test"
35
+ prefix = "/pre"
36
+
37
+ uri_classifier = URIClassifier.new
38
+ uri_classifier.register(prefix,1)
39
+
40
+ script_name, path_info, value = uri_classifier.resolve(prefix)
41
+ script_name, path_info, value = uri_classifier.resolve(test)
42
+ assert_equal 1, value
43
+ assert_equal prefix, script_name
44
+ assert_equal test[script_name.length .. -1], path_info
45
+
46
+ assert uri_classifier.inspect
47
+ assert_equal prefix, uri_classifier.uris[0]
48
+ end
49
+
50
+ def test_not_finding
51
+ test = "/cant/find/me"
52
+ uri_classifier = URIClassifier.new
53
+ uri_classifier.register(test, 1)
54
+
55
+ script_name, path_info, value = uri_classifier.resolve("/nope/not/here")
56
+ assert_nil script_name
57
+ assert_nil path_info
58
+ assert_nil value
59
+ end
60
+
61
+ def test_exceptions
62
+ uri_classifier = URIClassifier.new
63
+
64
+ uri_classifier.register("/test", 1)
65
+
66
+ failed = false
67
+ begin
68
+ uri_classifier.register("/test", 1)
69
+ rescue => e
70
+ failed = true
71
+ end
72
+
73
+ assert failed
74
+
75
+ failed = false
76
+ begin
77
+ uri_classifier.register("", 1)
78
+ rescue => e
79
+ failed = true
80
+ end
81
+
82
+ assert failed
83
+ end
84
+
85
+
86
+ def test_register_unregister
87
+ uri_classifier = URIClassifier.new
88
+
89
+ 100.times do
90
+ uri_classifier.register("/stuff", 1)
91
+ value = uri_classifier.unregister("/stuff")
92
+ assert_equal 1, value
93
+ end
94
+
95
+ uri_classifier.register("/things",1)
96
+ script_name, path_info, value = uri_classifier.resolve("/things")
97
+ assert_equal 1, value
98
+
99
+ uri_classifier.unregister("/things")
100
+ script_name, path_info, value = uri_classifier.resolve("/things")
101
+ assert_nil value
102
+
103
+ end
104
+
105
+
106
+ def test_uri_branching
107
+ uri_classifier = URIClassifier.new
108
+ uri_classifier.register("/test", 1)
109
+ uri_classifier.register("/test/this",2)
110
+
111
+ script_name, path_info, handler = uri_classifier.resolve("/test")
112
+ script_name, path_info, handler = uri_classifier.resolve("/test/that")
113
+ assert_equal "/test", script_name, "failed to properly find script off branch portion of uri"
114
+ assert_equal "/that", path_info
115
+ assert_equal 1, handler, "wrong result for branching uri"
116
+ end
117
+
118
+ def test_all_prefixing
119
+ tests = ["/test","/test/that","/test/this"]
120
+ uri = "/test/this/that"
121
+ uri_classifier = URIClassifier.new
122
+
123
+ current = ""
124
+ uri.each_byte do |c|
125
+ current << c.chr
126
+ uri_classifier.register(current, c)
127
+ end
128
+
129
+
130
+ # Try to resolve everything with no asserts as a fuzzing
131
+ tests.each do |prefix|
132
+ current = ""
133
+ prefix.each_byte do |c|
134
+ current << c.chr
135
+ script_name, path_info, handler = uri_classifier.resolve(current)
136
+ assert script_name
137
+ assert path_info
138
+ assert handler
139
+ end
140
+ end
141
+
142
+ # Assert that we find stuff
143
+ tests.each do |t|
144
+ script_name, path_info, handler = uri_classifier.resolve(t)
145
+ assert handler
146
+ end
147
+
148
+ # Assert we don't find stuff
149
+ script_name, path_info, handler = uri_classifier.resolve("chicken")
150
+ assert_nil handler
151
+ assert_nil script_name
152
+ assert_nil path_info
153
+ end
154
+
155
+
156
+ # Verifies that a root mounted ("/") handler resolves
157
+ # such that path info matches the original URI.
158
+ # This is needed to accommodate real usage of handlers.
159
+ def test_root_mounted
160
+ uri_classifier = URIClassifier.new
161
+ root = "/"
162
+ path = "/this/is/a/test"
163
+
164
+ uri_classifier.register(root, 1)
165
+
166
+ script_name, path_info, handler = uri_classifier.resolve(root)
167
+ assert_equal 1, handler
168
+ assert_equal root, path_info
169
+ assert_equal root, script_name
170
+
171
+ script_name, path_info, handler = uri_classifier.resolve(path)
172
+ assert_equal path, path_info
173
+ assert_equal root, script_name
174
+ assert_equal 1, handler
175
+ end
176
+
177
+ # Verifies that a root mounted ("/") handler
178
+ # is the default point, doesn't matter the order we use
179
+ # to register the URIs
180
+ def test_classifier_order
181
+ tests = ["/before", "/way_past"]
182
+ root = "/"
183
+ path = "/path"
184
+
185
+ uri_classifier = URIClassifier.new
186
+ uri_classifier.register(path, 1)
187
+ uri_classifier.register(root, 2)
188
+
189
+ tests.each do |uri|
190
+ script_name, path_info, handler = uri_classifier.resolve(uri)
191
+ assert_equal root, script_name, "#{uri} did not resolve to #{root}"
192
+ assert_equal uri, path_info
193
+ assert_equal 2, handler
194
+ end
195
+ end
196
+
197
+ if ENV['BENCHMARK']
198
+ # Eventually we will have a suite of benchmarks instead of lamely installing a test
199
+
200
+ def test_benchmark
201
+
202
+ # This URI set should favor a TST. Both versions increase linearly until you hit 14
203
+ # URIs, then the TST flattens out.
204
+ @uris = %w(
205
+ /
206
+ /dag /dig /digbark /dog /dogbark /dog/bark /dug /dugbarking /puppy
207
+ /c /cat /cat/tree /cat/tree/mulberry /cats /cot /cot/tree/mulberry /kitty /kittycat
208
+ # /eag /eig /eigbark /eog /eogbark /eog/bark /eug /eugbarking /iuppy
209
+ # /f /fat /fat/tree /fat/tree/mulberry /fats /fot /fot/tree/mulberry /jitty /jittyfat
210
+ # /gag /gig /gigbark /gog /gogbark /gog/bark /gug /gugbarking /kuppy
211
+ # /h /hat /hat/tree /hat/tree/mulberry /hats /hot /hot/tree/mulberry /litty /littyhat
212
+ # /ceag /ceig /ceigbark /ceog /ceogbark /ceog/cbark /ceug /ceugbarking /ciuppy
213
+ # /cf /cfat /cfat/ctree /cfat/ctree/cmulberry /cfats /cfot /cfot/ctree/cmulberry /cjitty /cjittyfat
214
+ # /cgag /cgig /cgigbark /cgog /cgogbark /cgog/cbark /cgug /cgugbarking /ckuppy
215
+ # /ch /chat /chat/ctree /chat/ctree/cmulberry /chats /chot /chot/ctree/cmulberry /citty /cittyhat
216
+ )
217
+
218
+ @requests = %w(
219
+ /
220
+ /dig
221
+ /digging
222
+ /dogging
223
+ /dogbarking/
224
+ /puppy/barking
225
+ /c
226
+ /cat
227
+ /cat/shrub
228
+ /cat/tree
229
+ /cat/tree/maple
230
+ /cat/tree/mulberry/tree
231
+ /cat/tree/oak
232
+ /cats/
233
+ /cats/tree
234
+ /cod
235
+ /zebra
236
+ )
237
+
238
+ @classifier = URIClassifier.new
239
+ @uris.each do |uri|
240
+ @classifier.register(uri, 1)
241
+ end
242
+
243
+ puts "#{@uris.size} URIs / #{@requests.size * 10000} requests"
244
+
245
+ Benchmark.bm do |x|
246
+ x.report do
247
+ # require 'ruby-prof'
248
+ # profile = RubyProf.profile do
249
+ 10000.times do
250
+ @requests.each do |request|
251
+ @classifier.resolve(request)
252
+ end
253
+ end
254
+ # end
255
+ # File.open("profile.html", 'w') { |file| RubyProf::GraphHtmlPrinter.new(profile).print(file, 0) }
256
+ end
257
+ end
258
+ end
259
+ end
260
+
261
+ end
262
+