RubyGems - mongrel_experimental - Versions diffs - 1.1 - Mend

mongrel_experimental 1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (21) hide show

data.tar.gz.sig +0 -0
data/CHANGELOG +2 -0
data/COPYING +504 -0
data/LICENSE +55 -0
data/Manifest +17 -0
data/README +3 -0
data/ext/uri_classifier/ext_help.h +14 -0
data/ext/uri_classifier/extconf.rb +5 -0
data/ext/uri_classifier/tst.h +40 -0
data/ext/uri_classifier/tst_cleanup.c +23 -0
data/ext/uri_classifier/tst_delete.c +146 -0
data/ext/uri_classifier/tst_grow_node_free_list.c +38 -0
data/ext/uri_classifier/tst_init.c +41 -0
data/ext/uri_classifier/tst_insert.c +218 -0
data/ext/uri_classifier/tst_search.c +73 -0
data/ext/uri_classifier/uri_classifier.c +216 -0
data/lib/mongrel_experimental.rb +3 -0
data/mongrel_experimental.gemspec +44 -0
data/test/test_uriclassifier.rb +262 -0
metadata +122 -0
metadata.gz.sig +0 -0

data/ext/uri_classifier/tst_search.c ADDED

@@ -0,0 +1,73 @@
+#include "tst.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <assert.h>
+void *tst_search(const unsigned char *key, struct tst *tst, int option,
+		 unsigned int *match_len)
+{
+    struct node *current_node;
+    struct node *longest_match = NULL;
+    unsigned int longest_match_len = 0;
+    int key_index;
+    assert(key != NULL && "key can't be NULL");
+    assert(tst != NULL && "tst can't be NULL");
+    if (key[0] == 0)
+	return NULL;
+    if (tst->head[(int) key[0]] == NULL)
+	return NULL;
+    if (match_len)
+	*match_len = 0;
+    current_node = tst->head[(int) key[0]];
+    key_index = 1;
+    while (current_node != NULL) {
+	if (key[key_index] == current_node->value) {
+	    if (current_node->value == 0) {
+		if (match_len)
+		    *match_len = key_index;
+		return current_node->middle;
+	    } else {
+		current_node = current_node->middle;
+		key_index++;
+		continue;
+	    }
+	} else {
+	    if (current_node->value == 0) {
+		if (option & TST_LONGEST_MATCH) {
+		    longest_match = current_node->middle;
+		    longest_match_len = key_index;
+		}
+		if (key[key_index] < 64) {
+		    current_node = current_node->left;
+		    continue;
+		} else {
+		    current_node = current_node->right;
+		    continue;
+		}
+	    } else {
+		if (key[key_index] < current_node->value) {
+		    current_node = current_node->left;
+		    continue;
+		} else {
+		    current_node = current_node->right;
+		    continue;
+		}
+	    }
+	}
+    }
+    if (match_len)
+	*match_len = longest_match_len;
+    return longest_match;
+}

data/ext/uri_classifier/uri_classifier.c ADDED

@@ -0,0 +1,216 @@
+/**
+ * Copyright (c) 2005 Zed A. Shaw
+ * You can redistribute it and/or modify it under the same terms as Ruby.
+ */
+#include "ruby.h"
+#include "ext_help.h"
+#include <assert.h>
+#include <string.h>
+#include <ctype.h>
+#include "tst.h"
+static VALUE mMongrel;
+static VALUE cURIClassifier;
+#define id_handler_map rb_intern("@handler_map")
+#define TRIE_INCREASE 30
+void URIClassifier_free(void *data)
+{
+  TRACE();
+  if(data) {
+    tst_cleanup((struct tst *)data);
+  }
+}
+VALUE URIClassifier_alloc(VALUE klass)
+{
+  VALUE obj;
+  struct tst *tst = tst_init(TRIE_INCREASE);
+  TRACE();
+  assert(tst && "failed to initialize trie structure");
+  obj = Data_Wrap_Struct(klass, NULL, URIClassifier_free, tst);
+  return obj;
+}
+/**
+ * call-seq:
+ *    URIClassifier.new -> URIClassifier
+ *
+ * Initializes a new URIClassifier object that you can use to associate URI sequences
+ * with objects.  You can actually use it with any string sequence and any objects,
+ * but it's mostly used with URIs.
+ *
+ * It uses TST from http://www.octavian.org/cs/software.html to build an ternary search
+ * trie to hold all of the URIs.  It uses this to do an initial search for the a URI
+ * prefix, and then to break the URI into SCRIPT_NAME and PATH_INFO portions.  It actually
+ * will do two searches most of the time in order to find the right handler for the
+ * registered prefix portion.
+ *
+ */
+VALUE URIClassifier_init(VALUE self)
+{
+  VALUE hash;
+  /* we create an internal hash to protect stuff from the GC */
+  hash = rb_hash_new();
+  rb_ivar_set(self, id_handler_map, hash);
+  return self;
+}
+/**
+ * call-seq:
+ *    uc.register("/someuri", SampleHandler.new) -> nil
+ *
+ * Registers the SampleHandler (one for all requests) with the "/someuri".
+ * When URIClassifier::resolve is called with "/someuri" it'll return
+ * SampleHandler immediately.  When called with "/someuri/iwant" it'll also
+ * return SomeHandler immediatly, with no additional searches, but it will
+ * return path info with "/iwant".
+ *
+ * You actually can reuse this class to register nearly anything and
+ * quickly resolve it.  This could be used for caching, fast mapping, etc.
+ * The downside is it uses much more memory than a Hash, but it can be
+ * a lot faster.  It's main advantage is that it works on prefixes, which
+ * is damn hard to get right with a Hash.
+ */
+VALUE URIClassifier_register(VALUE self, VALUE uri, VALUE handler)
+{
+  int rc = 0;
+  void *ptr = NULL;
+  struct tst *tst = NULL;
+  DATA_GET(self, struct tst, tst);
+  rc = tst_insert((unsigned char *)StringValueCStr(uri), (void *)handler , tst, 0, &ptr);
+  if(rc == TST_DUPLICATE_KEY) {
+    rb_raise(rb_eStandardError, "Handler already registered with that name");
+  } else if(rc == TST_ERROR) {
+    rb_raise(rb_eStandardError, "Memory error registering handler");
+  } else if(rc == TST_NULL_KEY) {
+    rb_raise(rb_eStandardError, "URI was empty");
+  }
+  rb_hash_aset(rb_ivar_get(self, id_handler_map), uri, handler);
+  return Qnil;
+}
+/**
+ * call-seq:
+ *    uc.unregister("/someuri")
+ *
+ * Yep, just removes this uri and it's handler from the trie.
+ */
+VALUE URIClassifier_unregister(VALUE self, VALUE uri)
+{
+  void *handler = NULL;
+  struct tst *tst = NULL;
+  DATA_GET(self, struct tst, tst);
+  handler = tst_delete((unsigned char *)StringValueCStr(uri), tst);
+  if(handler) {
+    rb_hash_delete(rb_ivar_get(self, id_handler_map), uri);
+    return (VALUE)handler;
+  } else {
+    return Qnil;
+  }
+}
+/**
+ * call-seq:
+ *    uc.resolve("/someuri") -> "/someuri", "", handler
+ *    uc.resolve("/someuri/pathinfo") -> "/someuri", "/pathinfo", handler
+ *    uc.resolve("/notfound/orhere") -> nil, nil, nil
+ *    uc.resolve("/") -> "/", "/", handler  # if uc.register("/", handler)
+ *    uc.resolve("/path/from/root") -> "/", "/path/from/root", handler  # if uc.register("/", handler)
+ *
+ * Attempts to resolve either the whole URI or at the longest prefix, returning
+ * the prefix (as script_info), path (as path_info), and registered handler
+ * (usually an HttpHandler).  If it doesn't find a handler registered at the longest
+ * match then it returns nil,nil,nil.
+ *
+ * Because the resolver uses a trie you are able to register a handler at *any* character
+ * in the URI and it will be handled as long as it's the longest prefix.  So, if you
+ * registered handler #1 at "/something/lik", and #2 at "/something/like/that", then a
+ * a search for "/something/like" would give you #1.  A search for "/something/like/that/too"
+ * would give you #2.
+ *
+ * This is very powerful since it means you can also attach handlers to parts of the ;
+ * (semi-colon) separated path params, any part of the path, use off chars, anything really.
+ * It also means that it's very efficient to do this only taking as long as the URI has
+ * characters.
+ *
+ * A slight modification to the CGI 1.2 standard is given for handlers registered to "/".
+ * CGI expects all CGI scripts to be at some script path, so it doesn't really say anything
+ * about a script that handles the root.  To make this work, the resolver will detect that
+ * the requested handler is at "/", and return that for script_name, and then simply return
+ * the full URI back as path_info.
+ *
+ * It expects strings with no embedded '\0' characters.  Don't try other string-like stuff yet.
+ */
+VALUE URIClassifier_resolve(VALUE self, VALUE uri)
+{
+  void *handler = NULL;
+  int pref_len = 0;
+  struct tst *tst = NULL;
+  VALUE result;
+  unsigned char *uri_str = NULL;
+  DATA_GET(self, struct tst, tst);
+  uri_str = (unsigned char *)StringValueCStr(uri);
+  handler = tst_search(uri_str, tst, TST_LONGEST_MATCH, &pref_len);
+  /* setup for multiple return values */
+  result = rb_ary_new();
+  if(handler) {
+    rb_ary_push(result, rb_str_substr (uri, 0, pref_len));
+    /* compensate for a script_name="/" where we need to add the "/" to path_info to keep it consistent */
+    if(pref_len == 1 && uri_str[0] == '/') {
+      /* matches the root URI so we have to use the whole URI as the path_info */
+      rb_ary_push(result, uri);
+    } else {
+      /* matches a script so process like normal */
+      rb_ary_push(result, rb_str_substr(uri, pref_len, RSTRING(uri)->len));
+    }
+    rb_ary_push(result, (VALUE)handler);
+  } else {
+    /* not found so push back nothing */
+    rb_ary_push(result, Qnil);
+    rb_ary_push(result, Qnil);
+    rb_ary_push(result, Qnil);
+  }
+  return result;
+}
+VALUE URIClassifier_uris(VALUE self) {
+  return rb_funcall(rb_ivar_get(self, id_handler_map), rb_intern("keys"), 0);
+}
+void Init_uri_classifier()
+{
+  mMongrel = rb_define_module("Mongrel");
+  cURIClassifier = rb_define_class_under(mMongrel, "URIClassifier", rb_cObject);
+  rb_define_alloc_func(cURIClassifier, URIClassifier_alloc);
+  rb_define_method(cURIClassifier, "initialize", URIClassifier_init, 0);
+  rb_define_method(cURIClassifier, "register", URIClassifier_register, 2);
+  rb_define_method(cURIClassifier, "unregister", URIClassifier_unregister, 1);
+  rb_define_method(cURIClassifier, "resolve", URIClassifier_resolve, 1);
+  rb_define_method(cURIClassifier, "uris", URIClassifier_uris, 0);
+}

data/lib/mongrel_experimental.rb ADDED

@@ -0,0 +1,3 @@
+require 'uri_classifier'
+STDERR.puts "** Mongrel_experimental loaded"

data/mongrel_experimental.gemspec ADDED

@@ -0,0 +1,44 @@
+# Gem::Specification for Mongrel_experimental-1.1
+# Originally generated by Echoe
+Gem::Specification.new do |s|
+  s.name = %q{mongrel_experimental}
+  s.version = "1.1"
+  s.specification_version = 2 if s.respond_to? :specification_version=
+  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
+  s.authors = ["The Mongrel Team"]
+  s.date = %q{2007-11-01}
+  s.description = %q{Backports and experimental features for Mongrel.}
+  s.email = %q{}
+  s.extensions = ["ext/uri_classifier/extconf.rb"]
+  s.files = ["CHANGELOG", "COPYING", "ext/uri_classifier/ext_help.h", "ext/uri_classifier/extconf.rb", "ext/uri_classifier/tst.h", "ext/uri_classifier/tst_cleanup.c", "ext/uri_classifier/tst_delete.c", "ext/uri_classifier/tst_grow_node_free_list.c", "ext/uri_classifier/tst_init.c", "ext/uri_classifier/tst_insert.c", "ext/uri_classifier/tst_search.c", "ext/uri_classifier/uri_classifier.c", "lib/mongrel_experimental.rb", "LICENSE", "Manifest", "README", "test/test_uriclassifier.rb", "mongrel_experimental.gemspec"]
+  s.has_rdoc = true
+  s.homepage = %q{}
+  s.require_paths = ["lib", "ext"]
+  s.rubyforge_project = %q{mongrel}
+  s.rubygems_version = %q{0.9.4.6}
+  s.summary = %q{Backports and experimental features for Mongrel.}
+  s.test_files = ["test/test_uriclassifier.rb"]
+  s.add_dependency(%q<mongrel>, ["= 1.1"])
+end
+# # Original Rakefile source (requires the Echoe gem):
+#
+#
+# require 'echoe'
+#
+# Echoe.new("mongrel_experimental") do |p|
+#   p.summary = "Backports and experimental features for Mongrel."
+#   p.project = "mongrel"
+#   p.author="The Mongrel Team"
+#   p.dependencies = ['mongrel =1.1']
+#
+#   p.need_tar_gz = false
+#   p.need_tgz = true
+#   p.require_signed = true
+# end

data/test/test_uriclassifier.rb ADDED

@@ -0,0 +1,262 @@
+# Copyright (c) 2005 Zed A. Shaw
+# You can redistribute it and/or modify it under the same terms as Ruby.
+#
+# Additional work donated by contributors.  See http://mongrel.rubyforge.org/attributions.html
+# for more information.
+require 'lib/uri_classifier'
+require 'test/unit'
+include Mongrel
+class URIClassifierTest < Test::Unit::TestCase
+  def test_uri_finding
+    uri_classifier = URIClassifier.new
+    uri_classifier.register("/test", 1)
+    script_name, path_info, value = uri_classifier.resolve("/test")
+    assert_equal 1, value
+    assert_equal "/test", script_name
+  end
+  def test_root_handler_only
+    uri_classifier = URIClassifier.new
+    uri_classifier.register("/", 1)
+    script_name, path_info, value = uri_classifier.resolve("/test")
+    assert_equal 1, value
+    assert_equal "/", script_name
+    assert_equal "/test", path_info
+  end
+  def test_uri_prefix_ops
+    test = "/pre/fix/test"
+    prefix = "/pre"
+    uri_classifier = URIClassifier.new
+    uri_classifier.register(prefix,1)
+    script_name, path_info, value = uri_classifier.resolve(prefix)
+    script_name, path_info, value = uri_classifier.resolve(test)
+    assert_equal 1, value
+    assert_equal prefix, script_name
+    assert_equal test[script_name.length .. -1], path_info
+    assert uri_classifier.inspect
+    assert_equal prefix, uri_classifier.uris[0]
+  end
+  def test_not_finding
+    test = "/cant/find/me"
+    uri_classifier = URIClassifier.new
+    uri_classifier.register(test, 1)
+    script_name, path_info, value = uri_classifier.resolve("/nope/not/here")
+    assert_nil script_name
+    assert_nil path_info
+    assert_nil value
+  end
+  def test_exceptions
+    uri_classifier = URIClassifier.new
+    uri_classifier.register("/test", 1)
+    failed = false
+    begin
+      uri_classifier.register("/test", 1)
+    rescue => e
+      failed = true
+    end
+    assert failed
+    failed = false
+    begin
+      uri_classifier.register("", 1)
+    rescue => e
+      failed = true
+    end
+    assert failed
+  end
+  def test_register_unregister
+    uri_classifier = URIClassifier.new
+    100.times do
+      uri_classifier.register("/stuff", 1)
+      value = uri_classifier.unregister("/stuff")
+      assert_equal 1, value
+    end
+    uri_classifier.register("/things",1)
+    script_name, path_info, value = uri_classifier.resolve("/things")
+    assert_equal 1, value
+    uri_classifier.unregister("/things")
+    script_name, path_info, value = uri_classifier.resolve("/things")
+    assert_nil value
+  end
+  def test_uri_branching
+    uri_classifier = URIClassifier.new
+    uri_classifier.register("/test", 1)
+    uri_classifier.register("/test/this",2)
+    script_name, path_info, handler = uri_classifier.resolve("/test")
+    script_name, path_info, handler = uri_classifier.resolve("/test/that")
+    assert_equal "/test", script_name, "failed to properly find script off branch portion of uri"
+    assert_equal "/that", path_info
+    assert_equal 1, handler, "wrong result for branching uri"
+  end
+  def test_all_prefixing
+    tests = ["/test","/test/that","/test/this"]
+    uri = "/test/this/that"
+    uri_classifier = URIClassifier.new
+    current = ""
+    uri.each_byte do |c|
+      current << c.chr
+      uri_classifier.register(current, c)
+    end
+    # Try to resolve everything with no asserts as a fuzzing
+    tests.each do |prefix|
+      current = ""
+      prefix.each_byte do |c|
+        current << c.chr
+        script_name, path_info, handler = uri_classifier.resolve(current)
+        assert script_name
+        assert path_info
+        assert handler
+      end
+    end
+    # Assert that we find stuff
+    tests.each do |t|
+      script_name, path_info, handler = uri_classifier.resolve(t)
+      assert handler
+    end
+    # Assert we don't find stuff
+    script_name, path_info, handler = uri_classifier.resolve("chicken")
+    assert_nil handler
+    assert_nil script_name
+    assert_nil path_info
+  end
+  # Verifies that a root mounted ("/") handler resolves
+  # such that path info matches the original URI.
+  # This is needed to accommodate real usage of handlers.
+  def test_root_mounted
+    uri_classifier = URIClassifier.new
+    root = "/"
+    path = "/this/is/a/test"
+    uri_classifier.register(root, 1)
+    script_name, path_info, handler = uri_classifier.resolve(root)
+    assert_equal 1, handler
+    assert_equal root, path_info
+    assert_equal root, script_name
+    script_name, path_info, handler = uri_classifier.resolve(path)
+    assert_equal path, path_info
+    assert_equal root, script_name
+    assert_equal 1, handler
+  end
+  # Verifies that a root mounted ("/") handler
+  # is the default point, doesn't matter the order we use
+  # to register the URIs
+  def test_classifier_order
+    tests = ["/before", "/way_past"]
+    root = "/"
+    path = "/path"
+    uri_classifier = URIClassifier.new
+    uri_classifier.register(path, 1)
+    uri_classifier.register(root, 2)
+    tests.each do |uri|
+      script_name, path_info, handler = uri_classifier.resolve(uri)
+      assert_equal root, script_name, "#{uri} did not resolve to #{root}"
+      assert_equal uri, path_info
+      assert_equal 2, handler
+    end
+  end
+  if ENV['BENCHMARK']
+    # Eventually we will have a suite of benchmarks instead of lamely installing a test
+    def test_benchmark
+      # This URI set should favor a TST. Both versions increase linearly until you hit 14
+      # URIs, then the TST flattens out.
+      @uris = %w(
+        /
+        /dag /dig /digbark /dog /dogbark /dog/bark /dug /dugbarking /puppy
+        /c /cat /cat/tree /cat/tree/mulberry /cats /cot /cot/tree/mulberry /kitty /kittycat
+#        /eag /eig /eigbark /eog /eogbark /eog/bark /eug /eugbarking /iuppy
+#        /f /fat /fat/tree /fat/tree/mulberry /fats /fot /fot/tree/mulberry /jitty /jittyfat
+#        /gag /gig /gigbark /gog /gogbark /gog/bark /gug /gugbarking /kuppy
+#        /h /hat /hat/tree /hat/tree/mulberry /hats /hot /hot/tree/mulberry /litty /littyhat
+#        /ceag /ceig /ceigbark /ceog /ceogbark /ceog/cbark /ceug /ceugbarking /ciuppy
+#        /cf /cfat /cfat/ctree /cfat/ctree/cmulberry /cfats /cfot /cfot/ctree/cmulberry /cjitty /cjittyfat
+#        /cgag /cgig /cgigbark /cgog /cgogbark /cgog/cbark /cgug /cgugbarking /ckuppy
+#        /ch /chat /chat/ctree /chat/ctree/cmulberry /chats /chot /chot/ctree/cmulberry /citty /cittyhat
+      )
+      @requests = %w(
+        /
+        /dig
+        /digging
+        /dogging
+        /dogbarking/
+        /puppy/barking
+        /c
+        /cat
+        /cat/shrub
+        /cat/tree
+        /cat/tree/maple
+        /cat/tree/mulberry/tree
+        /cat/tree/oak
+        /cats/
+        /cats/tree
+        /cod
+        /zebra
+      )
+      @classifier = URIClassifier.new
+      @uris.each do |uri|
+        @classifier.register(uri, 1)
+      end
+      puts "#{@uris.size} URIs / #{@requests.size * 10000} requests"
+      Benchmark.bm do |x|
+        x.report do
+  #        require 'ruby-prof'
+  #        profile = RubyProf.profile do
+            10000.times do
+              @requests.each do |request|
+                @classifier.resolve(request)
+              end
+            end
+  #        end
+  #        File.open("profile.html", 'w') { |file| RubyProf::GraphHtmlPrinter.new(profile).print(file, 0) }
+        end
+      end
+    end
+  end
+end