RubyGems - mongrel_experimental - Versions diffs - 1.1 - Mend

mongrel_experimental 1.1

Files changed (21) hide show

data.tar.gz.sig +0 -0
data/CHANGELOG +2 -0
data/COPYING +504 -0
data/LICENSE +55 -0
data/Manifest +17 -0
data/README +3 -0
data/ext/uri_classifier/ext_help.h +14 -0
data/ext/uri_classifier/extconf.rb +5 -0
data/ext/uri_classifier/tst.h +40 -0
data/ext/uri_classifier/tst_cleanup.c +23 -0
data/ext/uri_classifier/tst_delete.c +146 -0
data/ext/uri_classifier/tst_grow_node_free_list.c +38 -0
data/ext/uri_classifier/tst_init.c +41 -0
data/ext/uri_classifier/tst_insert.c +218 -0
data/ext/uri_classifier/tst_search.c +73 -0
data/ext/uri_classifier/uri_classifier.c +216 -0
data/lib/mongrel_experimental.rb +3 -0
data/mongrel_experimental.gemspec +44 -0
data/test/test_uriclassifier.rb +262 -0
metadata +122 -0
metadata.gz.sig +0 -0

data/ext/uri_classifier/tst_search.c ADDED

@@ -0,0 +1,73 @@
+#include "tst.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <assert.h>
+void *tst_search(const unsigned char *key, struct tst *tst, int option,
+		 unsigned int *match_len)
+{
+    struct node *current_node;
+    struct node *longest_match = NULL;
+    unsigned int longest_match_len = 0;
+    int key_index;
+    assert(key != NULL && "key can't be NULL");
+    assert(tst != NULL && "tst can't be NULL");
+    if (key[0] == 0)
+	return NULL;
+    if (tst->head[(int) key[0]] == NULL)
+	return NULL;
+    if (match_len)
+	*match_len = 0;
+    current_node = tst->head[(int) key[0]];
+    key_index = 1;
+    while (current_node != NULL) {
+	if (key[key_index] == current_node->value) {
+	    if (current_node->value == 0) {
+		if (match_len)
+		    *match_len = key_index;
+		return current_node->middle;
+	    } else {
+		current_node = current_node->middle;
+		key_index++;
+		continue;
+	    }
+	} else {
+	    if (current_node->value == 0) {
+		if (option & TST_LONGEST_MATCH) {
+		    longest_match = current_node->middle;
+		    longest_match_len = key_index;
+		}
+		if (key[key_index] < 64) {
+		    current_node = current_node->left;
+		    continue;
+		} else {
+		    current_node = current_node->right;
+		    continue;
+		}
+	    } else {
+		if (key[key_index] < current_node->value) {
+		    current_node = current_node->left;
+		    continue;
+		} else {
+		    current_node = current_node->right;
+		    continue;
+		}
+	    }
+	}
+    }
+    if (match_len)
+	*match_len = longest_match_len;
+    return longest_match;
+}

data/ext/uri_classifier/uri_classifier.c ADDED

@@ -0,0 +1,216 @@
+/**
+ * Copyright (c) 2005 Zed A. Shaw
+ * You can redistribute it and/or modify it under the same terms as Ruby.
+ */
+#include "ruby.h"
+#include "ext_help.h"
+#include <assert.h>
+#include <string.h>
+#include <ctype.h>
+#include "tst.h"
+static VALUE mMongrel;
+static VALUE cURIClassifier;
+#define id_handler_map rb_intern("@handler_map")
+#define TRIE_INCREASE 30
+void URIClassifier_free(void *data)
+{
+  TRACE();
+  if(data) {
+    tst_cleanup((struct tst *)data);
+  }
+}
+VALUE URIClassifier_alloc(VALUE klass)
+{
+  VALUE obj;
+  struct tst *tst = tst_init(TRIE_INCREASE);
+  TRACE();
+  assert(tst && "failed to initialize trie structure");
+  obj = Data_Wrap_Struct(klass, NULL, URIClassifier_free, tst);
+  return obj;
+}
+/**
+ * call-seq:
+ *    URIClassifier.new -> URIClassifier
+ *
+ * Initializes a new URIClassifier object that you can use to associate URI sequences
+ * with objects.  You can actually use it with any string sequence and any objects,
+ * but it's mostly used with URIs.
+ *
+ * It uses TST from http://www.octavian.org/cs/software.html to build an ternary search
+ * trie to hold all of the URIs.  It uses this to do an initial search for the a URI
+ * prefix, and then to break the URI into SCRIPT_NAME and PATH_INFO portions.  It actually
+ * will do two searches most of the time in order to find the right handler for the
+ * registered prefix portion.
+ *
+ */
+VALUE URIClassifier_init(VALUE self)
+{
+  VALUE hash;
+  /* we create an internal hash to protect stuff from the GC */
+  hash = rb_hash_new();
+  rb_ivar_set(self, id_handler_map, hash);
+  return self;
+}
+/**
+ * call-seq:
+ *    uc.register("/someuri", SampleHandler.new) -> nil
+ *
+ * Registers the SampleHandler (one for all requests) with the "/someuri".
+ * When URIClassifier::resolve is called with "/someuri" it'll return
+ * SampleHandler immediately.  When called with "/someuri/iwant" it'll also
+ * return SomeHandler immediatly, with no additional searches, but it will
+ * return path info with "/iwant".
+ *
+ * You actually can reuse this class to register nearly anything and
+ * quickly resolve it.  This could be used for caching, fast mapping, etc.
+ * The downside is it uses much more memory than a Hash, but it can be
+ * a lot faster.  It's main advantage is that it works on prefixes, which
+ * is damn hard to get right with a Hash.
+ */
+VALUE URIClassifier_register(VALUE self, VALUE uri, VALUE handler)
+{
+  int rc = 0;
+  void *ptr = NULL;
+  struct tst *tst = NULL;
+  DATA_GET(self, struct tst, tst);
+  rc = tst_insert((unsigned char *)StringValueCStr(uri), (void *)handler , tst, 0, &ptr);
+  if(rc == TST_DUPLICATE_KEY) {
+    rb_raise(rb_eStandardError, "Handler already registered with that name");
+  } else if(rc == TST_ERROR) {
+    rb_raise(rb_eStandardError, "Memory error registering handler");
+  } else if(rc == TST_NULL_KEY) {
+    rb_raise(rb_eStandardError, "URI was empty");
+  }
+  rb_hash_aset(rb_ivar_get(self, id_handler_map), uri, handler);
+  return Qnil;
+}
+/**
+ * call-seq:
+ *    uc.unregister("/someuri")
+ *
+ * Yep, just removes this uri and it's handler from the trie.
+ */
+VALUE URIClassifier_unregister(VALUE self, VALUE uri)
+{
+  void *handler = NULL;
+  struct tst *tst = NULL;
+  DATA_GET(self, struct tst, tst);
+  handler = tst_delete((unsigned char *)StringValueCStr(uri), tst);
+  if(handler) {
+    rb_hash_delete(rb_ivar_get(self, id_handler_map), uri);
+    return (VALUE)handler;
+  } else {
+    return Qnil;
+  }
+}
+/**
+ * call-seq:
+ *    uc.resolve("/someuri") -> "/someuri", "", handler
+ *    uc.resolve("/someuri/pathinfo") -> "/someuri", "/pathinfo", handler
+ *    uc.resolve("/notfound/orhere") -> nil, nil, nil
+ *    uc.resolve("/") -> "/", "/", handler  # if uc.register("/", handler)
+ *    uc.resolve("/path/from/root") -> "/", "/path/from/root", handler  # if uc.register("/", handler)
+ *
+ * Attempts to resolve either the whole URI or at the longest prefix, returning
+ * the prefix (as script_info), path (as path_info), and registered handler
+ * (usually an HttpHandler).  If it doesn't find a handler registered at the longest
+ * match then it returns nil,nil,nil.
+ *
+ * Because the resolver uses a trie you are able to register a handler at *any* character
+ * in the URI and it will be handled as long as it's the longest prefix.  So, if you
+ * registered handler #1 at "/something/lik", and #2 at "/something/like/that", then a
+ * a search for "/something/like" would give you #1.  A search for "/something/like/that/too"
+ * would give you #2.
+ *
+ * This is very powerful since it means you can also attach handlers to parts of the ;
+ * (semi-colon) separated path params, any part of the path, use off chars, anything really.
+ * It also means that it's very efficient to do this only taking as long as the URI has
+ * characters.
+ *
+ * A slight modification to the CGI 1.2 standard is given for handlers registered to "/".
+ * CGI expects all CGI scripts to be at some script path, so it doesn't really say anything
+ * about a script that handles the root.  To make this work, the resolver will detect that
+ * the requested handler is at "/", and return that for script_name, and then simply return
+ * the full URI back as path_info.
+ *
+ * It expects strings with no embedded '\0' characters.  Don't try other string-like stuff yet.
+ */
+VALUE URIClassifier_resolve(VALUE self, VALUE uri)
+{
+  void *handler = NULL;
+  int pref_len = 0;
+  struct tst *tst = NULL;
+  VALUE result;
+  unsigned char *uri_str = NULL;
+  DATA_GET(self, struct tst, tst);
+  uri_str = (unsigned char *)StringValueCStr(uri);
+  handler = tst_search(uri_str, tst, TST_LONGEST_MATCH, &pref_len);
+  /* setup for multiple return values */
+  result = rb_ary_new();
+  if(handler) {
+    rb_ary_push(result, rb_str_substr (uri, 0, pref_len));
+    /* compensate for a script_name="/" where we need to add the "/" to path_info to keep it consistent */
+    if(pref_len == 1 && uri_str[0] == '/') {
+      /* matches the root URI so we have to use the whole URI as the path_info */
+      rb_ary_push(result, uri);
+    } else {
+      /* matches a script so process like normal */
+      rb_ary_push(result, rb_str_substr(uri, pref_len, RSTRING(uri)->len));
+    }
+    rb_ary_push(result, (VALUE)handler);
+  } else {
+    /* not found so push back nothing */
+    rb_ary_push(result, Qnil);
+    rb_ary_push(result, Qnil);
+    rb_ary_push(result, Qnil);
+  }
+  return result;
+}
+VALUE URIClassifier_uris(VALUE self) {
+  return rb_funcall(rb_ivar_get(self, id_handler_map), rb_intern("keys"), 0);
+}
+void Init_uri_classifier()
+{
+  mMongrel = rb_define_module("Mongrel");
+  cURIClassifier = rb_define_class_under(mMongrel, "URIClassifier", rb_cObject);
+  rb_define_alloc_func(cURIClassifier, URIClassifier_alloc);
+  rb_define_method(cURIClassifier, "initialize", URIClassifier_init, 0);
+  rb_define_method(cURIClassifier, "register", URIClassifier_register, 2);
+  rb_define_method(cURIClassifier, "unregister", URIClassifier_unregister, 1);
+  rb_define_method(cURIClassifier, "resolve", URIClassifier_resolve, 1);
+  rb_define_method(cURIClassifier, "uris", URIClassifier_uris, 0);
+}

data/lib/mongrel_experimental.rb ADDED

@@ -0,0 +1,3 @@
+require 'uri_classifier'
+STDERR.puts "** Mongrel_experimental loaded"

data/mongrel_experimental.gemspec ADDED

@@ -0,0 +1,44 @@
+# Gem::Specification for Mongrel_experimental-1.1
+# Originally generated by Echoe
+Gem::Specification.new do |s|
+  s.name = %q{mongrel_experimental}
+  s.version = "1.1"
+  s.specification_version = 2 if s.respond_to? :specification_version=
+  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
+  s.authors = ["The Mongrel Team"]
+  s.date = %q{2007-11-01}
+  s.description = %q{Backports and experimental features for Mongrel.}
+  s.email = %q{}
+  s.extensions = ["ext/uri_classifier/extconf.rb"]
+  s.files = ["CHANGELOG", "COPYING", "ext/uri_classifier/ext_help.h", "ext/uri_classifier/extconf.rb", "ext/uri_classifier/tst.h", "ext/uri_classifier/tst_cleanup.c", "ext/uri_classifier/tst_delete.c", "ext/uri_classifier/tst_grow_node_free_list.c", "ext/uri_classifier/tst_init.c", "ext/uri_classifier/tst_insert.c", "ext/uri_classifier/tst_search.c", "ext/uri_classifier/uri_classifier.c", "lib/mongrel_experimental.rb", "LICENSE", "Manifest", "README", "test/test_uriclassifier.rb", "mongrel_experimental.gemspec"]
+  s.has_rdoc = true
+  s.homepage = %q{}
+  s.require_paths = ["lib", "ext"]
+  s.rubyforge_project = %q{mongrel}
+  s.rubygems_version = %q{0.9.4.6}
+  s.summary = %q{Backports and experimental features for Mongrel.}
+  s.test_files = ["test/test_uriclassifier.rb"]
+  s.add_dependency(%q<mongrel>, ["= 1.1"])
+end
+# # Original Rakefile source (requires the Echoe gem):
+#
+#
+# require 'echoe'
+#
+# Echoe.new("mongrel_experimental") do |p|
+#   p.summary = "Backports and experimental features for Mongrel."
+#   p.project = "mongrel"
+#   p.author="The Mongrel Team"
+#   p.dependencies = ['mongrel =1.1']
+#
+#   p.need_tar_gz = false
+#   p.need_tgz = true
+#   p.require_signed = true
+# end

data/test/test_uriclassifier.rb ADDED

@@ -0,0 +1,262 @@
+# Copyright (c) 2005 Zed A. Shaw
+# You can redistribute it and/or modify it under the same terms as Ruby.
+#
+# Additional work donated by contributors.  See http://mongrel.rubyforge.org/attributions.html
+# for more information.
+require 'lib/uri_classifier'
+require 'test/unit'
+include Mongrel
+class URIClassifierTest < Test::Unit::TestCase
+  def test_uri_finding
+    uri_classifier = URIClassifier.new
+    uri_classifier.register("/test", 1)
+    script_name, path_info, value = uri_classifier.resolve("/test")
+    assert_equal 1, value
+    assert_equal "/test", script_name
+  end
+  def test_root_handler_only
+    uri_classifier = URIClassifier.new
+    uri_classifier.register("/", 1)
+    script_name, path_info, value = uri_classifier.resolve("/test")
+    assert_equal 1, value
+    assert_equal "/", script_name
+    assert_equal "/test", path_info
+  end
+  def test_uri_prefix_ops
+    test = "/pre/fix/test"
+    prefix = "/pre"
+    uri_classifier = URIClassifier.new
+    uri_classifier.register(prefix,1)
+    script_name, path_info, value = uri_classifier.resolve(prefix)
+    script_name, path_info, value = uri_classifier.resolve(test)
+    assert_equal 1, value
+    assert_equal prefix, script_name
+    assert_equal test[script_name.length .. -1], path_info
+    assert uri_classifier.inspect
+    assert_equal prefix, uri_classifier.uris[0]
+  end
+  def test_not_finding
+    test = "/cant/find/me"
+    uri_classifier = URIClassifier.new
+    uri_classifier.register(test, 1)
+    script_name, path_info, value = uri_classifier.resolve("/nope/not/here")
+    assert_nil script_name
+    assert_nil path_info
+    assert_nil value
+  end
+  def test_exceptions
+    uri_classifier = URIClassifier.new
+    uri_classifier.register("/test", 1)
+    failed = false
+    begin
+      uri_classifier.register("/test", 1)
+    rescue => e
+      failed = true
+    end
+    assert failed
+    failed = false
+    begin
+      uri_classifier.register("", 1)
+    rescue => e
+      failed = true
+    end
+    assert failed
+  end
+  def test_register_unregister
+    uri_classifier = URIClassifier.new
+    100.times do
+      uri_classifier.register("/stuff", 1)
+      value = uri_classifier.unregister("/stuff")
+      assert_equal 1, value
+    end
+    uri_classifier.register("/things",1)
+    script_name, path_info, value = uri_classifier.resolve("/things")
+    assert_equal 1, value
+    uri_classifier.unregister("/things")
+    script_name, path_info, value = uri_classifier.resolve("/things")
+    assert_nil value
+  end
+  def test_uri_branching
+    uri_classifier = URIClassifier.new
+    uri_classifier.register("/test", 1)
+    uri_classifier.register("/test/this",2)
+    script_name, path_info, handler = uri_classifier.resolve("/test")
+    script_name, path_info, handler = uri_classifier.resolve("/test/that")
+    assert_equal "/test", script_name, "failed to properly find script off branch portion of uri"
+    assert_equal "/that", path_info
+    assert_equal 1, handler, "wrong result for branching uri"
+  end
+  def test_all_prefixing
+    tests = ["/test","/test/that","/test/this"]
+    uri = "/test/this/that"
+    uri_classifier = URIClassifier.new
+    current = ""
+    uri.each_byte do |c|
+      current << c.chr
+      uri_classifier.register(current, c)
+    end
+    # Try to resolve everything with no asserts as a fuzzing
+    tests.each do |prefix|
+      current = ""
+      prefix.each_byte do |c|
+        current << c.chr
+        script_name, path_info, handler = uri_classifier.resolve(current)
+        assert script_name
+        assert path_info
+        assert handler
+      end
+    end
+    # Assert that we find stuff
+    tests.each do |t|
+      script_name, path_info, handler = uri_classifier.resolve(t)
+      assert handler
+    end
+    # Assert we don't find stuff
+    script_name, path_info, handler = uri_classifier.resolve("chicken")
+    assert_nil handler
+    assert_nil script_name
+    assert_nil path_info
+  end
+  # Verifies that a root mounted ("/") handler resolves
+  # such that path info matches the original URI.
+  # This is needed to accommodate real usage of handlers.
+  def test_root_mounted
+    uri_classifier = URIClassifier.new
+    root = "/"
+    path = "/this/is/a/test"
+    uri_classifier.register(root, 1)
+    script_name, path_info, handler = uri_classifier.resolve(root)
+    assert_equal 1, handler
+    assert_equal root, path_info
+    assert_equal root, script_name
+    script_name, path_info, handler = uri_classifier.resolve(path)
+    assert_equal path, path_info
+    assert_equal root, script_name
+    assert_equal 1, handler
+  end
+  # Verifies that a root mounted ("/") handler
+  # is the default point, doesn't matter the order we use
+  # to register the URIs
+  def test_classifier_order
+    tests = ["/before", "/way_past"]
+    root = "/"
+    path = "/path"
+    uri_classifier = URIClassifier.new
+    uri_classifier.register(path, 1)
+    uri_classifier.register(root, 2)
+    tests.each do |uri|
+      script_name, path_info, handler = uri_classifier.resolve(uri)
+      assert_equal root, script_name, "#{uri} did not resolve to #{root}"
+      assert_equal uri, path_info
+      assert_equal 2, handler
+    end
+  end
+  if ENV['BENCHMARK']
+    # Eventually we will have a suite of benchmarks instead of lamely installing a test
+    def test_benchmark
+      # This URI set should favor a TST. Both versions increase linearly until you hit 14
+      # URIs, then the TST flattens out.
+      @uris = %w(
+        /
+        /dag /dig /digbark /dog /dogbark /dog/bark /dug /dugbarking /puppy
+        /c /cat /cat/tree /cat/tree/mulberry /cats /cot /cot/tree/mulberry /kitty /kittycat
+#        /eag /eig /eigbark /eog /eogbark /eog/bark /eug /eugbarking /iuppy
+#        /f /fat /fat/tree /fat/tree/mulberry /fats /fot /fot/tree/mulberry /jitty /jittyfat
+#        /gag /gig /gigbark /gog /gogbark /gog/bark /gug /gugbarking /kuppy
+#        /h /hat /hat/tree /hat/tree/mulberry /hats /hot /hot/tree/mulberry /litty /littyhat
+#        /ceag /ceig /ceigbark /ceog /ceogbark /ceog/cbark /ceug /ceugbarking /ciuppy
+#        /cf /cfat /cfat/ctree /cfat/ctree/cmulberry /cfats /cfot /cfot/ctree/cmulberry /cjitty /cjittyfat
+#        /cgag /cgig /cgigbark /cgog /cgogbark /cgog/cbark /cgug /cgugbarking /ckuppy
+#        /ch /chat /chat/ctree /chat/ctree/cmulberry /chats /chot /chot/ctree/cmulberry /citty /cittyhat
+      )
+      @requests = %w(
+        /
+        /dig
+        /digging
+        /dogging
+        /dogbarking/
+        /puppy/barking
+        /c
+        /cat
+        /cat/shrub
+        /cat/tree
+        /cat/tree/maple
+        /cat/tree/mulberry/tree
+        /cat/tree/oak
+        /cats/
+        /cats/tree
+        /cod
+        /zebra
+      )
+      @classifier = URIClassifier.new
+      @uris.each do |uri|
+        @classifier.register(uri, 1)
+      end
+      puts "#{@uris.size} URIs / #{@requests.size * 10000} requests"
+      Benchmark.bm do |x|
+        x.report do
+  #        require 'ruby-prof'
+  #        profile = RubyProf.profile do
+            10000.times do
+              @requests.each do |request|
+                @classifier.resolve(request)
+              end
+            end
+  #        end
+  #        File.open("profile.html", 'w') { |file| RubyProf::GraphHtmlPrinter.new(profile).print(file, 0) }
+        end
+      end
+    end
+  end
+end