RubyGems - amatch - Versions diffs - 0.1.5 → 0.2.0 - Mend

amatch 0.1.5 → 0.2.0

Files changed (23) hide show

data/CHANGES +5 -2
data/InstalledFiles +5 -0
data/README.en +1 -1
data/Rakefile +67 -58
data/VERSION +1 -1
data/bin/agrep.rb +65 -52
data/config.save +12 -0
data/ext/amatch.bundle +0 -0
data/ext/amatch.c +1301 -225
data/ext/extconf.rb +6 -1
data/ext/pair.c +78 -0
data/ext/pair.h +29 -0
data/ext/tags +24 -0
data/tests/runner.rb +26 -0
data/tests/test_hamming.rb +54 -0
data/tests/test_levenshtein.rb +74 -0
data/tests/test_longest_subsequence.rb +57 -0
data/tests/test_longest_substring.rb +57 -0
data/tests/test_pair_distance.rb +81 -0
data/tests/test_sellers.rb +94 -0
metadata +26 -8
data/amatch.txt.en +0 -117
data/tests/test.rb +0 -94

data/CHANGES CHANGED

@@ -1,5 +1,8 @@
-2005-01-20 (0.1.5)
-    * Bugfix for insertion/deletion weights. Sorry.
+2005-06-01 (0.2.0)
+    * Major changes in API and implementation:
+      Now the Levenshtein edit distance, Sellers edit distance, the Hamming
+      distance, the longest common subsequence length, the longest common
+      substring length, and the pair distance metric can be computed.
 2005-01-20 (0.1.4)
     * Better argument handling in initialization method
     * Minor changes in Rakefile and README.en

data/InstalledFiles ADDED

@@ -0,0 +1,5 @@
+/usr/local/stow/ruby/bin/
+/usr/local/stow/ruby/bin/CVS
+/usr/local/stow/ruby/bin/CVS
+/usr/local/stow/ruby/bin/CVS
+/usr/local/stow/ruby/lib/ruby/site_ruby/1.8/i686-linux/.

data/README.en CHANGED

@@ -27,5 +27,5 @@ Florian Frank <flori@ping.de>
 License
 =======
-GNU General Public License (GPL)
+GNU General Public License, Version 2 (GPLv2)

data/Rakefile CHANGED

@@ -3,101 +3,110 @@
 require 'rake/clean'
 require 'rake/testtask'
 require 'rake/gempackagetask'
+require 'rake/rdoctask'
 require 'rbconfig'
 include Config
-PKG_NAME = 'amatch'
 PKG_VERSION = File.read('VERSION').chomp
-PKG_FILES = Dir.glob("**/*").delete_if {|item|
-    item.include?("CVS") or item.include?("pkg")
-}
+PKG_FILES   = FileList['**/*']
+PKG_FILES.exclude(/CVS/)
+PKG_FILES.exclude(/^pkg/)
+PKG_FILES.exclude(/^doc/)
-task :default => [:test]
+task :default => :test
 desc "Run unit tests"
-task(:test => [ :clean, :compile ]) do
-    ruby %{-Iext tests/test.rb}
+task(:test => [:compile]) do
+  cd 'tests' do
+    ruby %{-I../ext runner.rb}
+  end
 end
 desc "Compiling library"
 task :compile do
-    cd 'ext'  do
-        ruby %{extconf.rb}
-        sh "make"
-    end
+  cd 'ext'  do
+    ruby %{extconf.rb}
+    sh "make"
+  end
 end
 desc "Installing library"
-task(:install => [:test]) do
-    src, = Dir['ext/amatch.*'].reject { |x| /\.[co]$/.match x }
-    filename = File.basename(src)
-    dst = File.join(CONFIG["sitelibdir"], filename)
-    install(src, dst, :verbose => true)
+task :install => :test do
+  src, = Dir['ext/amatch.*'].reject { |x| /\.[co]$/.match x }
+  filename = File.basename(src)
+  dst = File.join(CONFIG["sitelibdir"], filename)
+  install(src, dst, :verbose => true)
 end
+desc "Removing generated files"
 task :clean do
-    cd 'ext'  do
-        ruby %{extconf.rb}
-        sh "make distclean" if File.exist?('Makefile')
-    end
+  cd 'ext'  do
+    ruby 'extconf.rb'
+    sh "make distclean" if File.exist?('Makefile')
+  end
 end
-spec = Gem::Specification.new do |s|
-    #### Basic information.
+Rake::RDocTask.new do |rd|
+  rd.main = 'Amatch'
+  rd.rdoc_files.include("ext/amatch.c")
+  rd.rdoc_dir = 'doc'
+end
-    s.name = 'amatch'
-    s.version = PKG_VERSION
-    s.summary = "Approximate String Matching library"
-    s.description = <<EOF
-Amatch is a library for approximate string matching and searching using
-a dynamic programming algorithm to compute the Levenstein distance
-between strings.
+spec = Gem::Specification.new do |s|
+  #### Basic information.
+  s.name = 'amatch'
+  s.version = PKG_VERSION
+  s.summary = "Approximate String Matching library"
+  s.description = <<EOF
+Amatch is a library for approximate string matching and searching in strings.
+Several algorithms can be used to do this, and it's also possible to compute a
+similarity metric number between 0.0 and 1.0 for two given strings.
 EOF
-    #### Dependencies and requirements.
+  #### Dependencies and requirements.
-    #s.add_dependency('log4r', '> 1.0.4')
-    #s.requirements << ""
+  #s.add_dependency('log4r', '> 1.0.4')
+  #s.requirements << ""
-    s.files = PKG_FILES
+  s.files = PKG_FILES
-    #### C code extensions.
+  #### C code extensions.
-    s.extensions << "ext/extconf.rb"
+  s.extensions << "ext/extconf.rb"
-    #### Load-time details: library and application (you will need one or both).
+  #### Load-time details: library and application (you will need one or both).
-    s.require_path = 'lib'                         # Use these for libraries.
-    s.autorequire = 'amatch'
+  s.require_path = 'ext'                         # Use these for libraries.
+  s.autorequire = 'amatch'
-    s.bindir = "bin"                               # Use these for applications.
-    s.executables = ["agrep.rb"]
-    s.default_executable = "agrep.rb"
+  s.bindir = "bin"                               # Use these for applications.
+  s.executables = ["agrep.rb"]
+  s.default_executable = "agrep.rb"
-    #### Documentation and testing.
+  #### Documentation and testing.
-    s.has_rdoc = true
-    #s.extra_rdoc_files = rd.rdoc_files.reject { |fn| fn =~ /\.rb$/ }.to_a
-    #s.rdoc_options <<
-    #  '--title' <<  'Rake -- Ruby Make' <<
-    #  '--main' << 'README' <<
-    #  '--line-numbers'
-    s.test_files << 'tests/test.rb'
+  s.has_rdoc = true
+  #s.extra_rdoc_files = FileList['ext/amatch.c']
+  s.rdoc_options <<
+    '--title' <<  'Amatch -- Approximate Matching' <<
+    '--main' << 'Amatch' <<
+    '--line-numbers'
+  s.test_files << 'tests/runner.rb'
-    #### Author and project details.
+  #### Author and project details.
-    s.author = "Florian Frank"
-    s.email = "flori@ping.de"
-    s.homepage = "http://amatch.rubyforge.org"
-    s.rubyforge_project = "amatch"
+  s.author = "Florian Frank"
+  s.email = "flori@ping.de"
+  s.homepage = "http://amatch.rubyforge.org"
+  s.rubyforge_project = "amatch"
 end
 Rake::GemPackageTask.new(spec) do |pkg|
-    pkg.need_tar = true
-    pkg.package_files += PKG_FILES
+  pkg.need_tar      = true
+  pkg.package_files += PKG_FILES
 end
 task :release => [ :clean, :package ]
-    # vim: set et sw=4 ts=4:
+  # vim: set et sw=2 ts=2:

data/VERSION CHANGED

	@@ -1 +1 @@
1	- 0.1.5
1	+ 0.2.0

data/bin/agrep.rb CHANGED

@@ -1,74 +1,87 @@
 #! /usr/bin/env ruby
 #
-## $Id: agrep.rb,v 1.1.1.1 2004/09/27 19:23:42 flori Exp $
+## $Id: agrep.rb,v 1.3 2005/04/24 21:11:06 flori Exp $
 #
 require 'amatch'
 require 'getoptlong'
 def usage(msg, options)
-	print msg, "\nUsage: #{File.basename($0)} pattern [FILE ...]\n\n"
-	options.each { |o|
-		print "  " + o[1] + ", " + o[0] + " " +
-			(o[2] == GetoptLong::REQUIRED_ARGUMENT ? 'ARGUMENT' : '') + "\n"
-	}
-	print "\nReport bugs to <flori@ping.de>.\n"
-	exit 0
+  print msg, "\nUsage: #{File.basename($0)} pattern [FILE ...]\n\n"
+  options.each do |o|
+    puts "  " + o[1] + ", " + o[0] + " " +
+      (o[2] == GetoptLong::REQUIRED_ARGUMENT ? 'ARGUMENT' : '')
+  end
+  puts "\nReport bugs to <flori@ping.de>."
+  exit 0
+end
+class MyAmatch < Amatch
+  def l_search_relative(strings)
+    if strings.is_a? Array
+      l_search(strings).map { |x| x / pattern.size }
+    else
+      l_search(strings) / pattern.size
+    end
+  end
 end
 $distance = 1
+$mode = :l_search
 begin
-	parser = GetoptLong.new
-	options = [
-		[ '--distance',	'-d',	GetoptLong::REQUIRED_ARGUMENT ],
-		[ '--relative',	'-r',	GetoptLong::NO_ARGUMENT ],
-		[ '--verbose',	'-v',	GetoptLong::NO_ARGUMENT ],
-		[ '--help',		'-h',	GetoptLong::NO_ARGUMENT ],
-	]
-	parser.set_options(*options)
-	parser.each_option { |name, arg|
-		name = name.sub(/^--/, '')
-		case name
-		when 'distance'
-			$distance = arg.to_f
-		when 'relative'
-			$relative = 1
-		when 'verbose'
-			$verbose = 1
-		when 'help'
-			usage('You\'ve asked for it!', options)
-		end
-	}
+  parser = GetoptLong.new
+  options = [
+    [ '--distance',   '-d',  GetoptLong::REQUIRED_ARGUMENT ],
+    [ '--relative',   '-r',  GetoptLong::NO_ARGUMENT ],
+    [ '--verbose',    '-v',  GetoptLong::NO_ARGUMENT ],
+    [ '--help',       '-h',  GetoptLong::NO_ARGUMENT ],
+  ]
+  parser.set_options(*options)
+  parser.each_option do |name, arg|
+    name = name.sub(/^--/, '')
+    case name
+    when 'distance'
+      $distance = arg.to_f
+    when 'relative'
+      $mode = :l_search_relative
+    when 'verbose'
+      $verbose = 1
+    when 'help'
+      usage('You\'ve asked for it!', options)
+    end
+  end
 rescue
-	exit 1
+  exit 1
 end
-$pattern = ARGV.shift or usage('Pattern needed!', options)
+pattern = ARGV.shift or usage('Pattern needed!', options)
-matcher = Amatch.new($pattern)
+matcher = MyAmatch.new(pattern)
 size = 0
 start = Time.new
 if ARGV.size > 0 then
-	ARGV.each { |filename|
-		File.stat(filename).file? or next
-		size += File.size(filename)
-		begin
-			File.open(filename, 'r').each_line { |line|
-				print "#{filename}:#{line}" if
-					($relative ?	matcher.searchr(line) :
-									matcher.search(line)) <= $distance
-			}
-		rescue
-			$stderr.print "Failure at #{filename}: #{$!} => Skipping!\n"
-		end
-	}
+  ARGV.each do |filename|
+    File.stat(filename).file? or next
+    size += File.size(filename)
+    begin
+      File.open(filename, 'r').each_line do |line|
+        if matcher.__send__($mode, line) < $distance
+          puts "#{filename}:#{line}"
+        end
+      end
+    rescue
+      STDERR.print "Failure at #{filename}: #{$!} => Skipping!\n"
+    end
+  end
 else
-	$stdin.each_line { |line|
-		size += line.size
-		print line if ($relative ?	matcher.searchr(line) :
-									matcher.search(line)) <= $distance
-	}
+  STDIN.each_line do |line|
+    size += line.size
+    if matcher.__send__($mode, line) <= $distance
+      puts line
+    end
+  end
 end
 time = Time.new - start
-$verbose and $stderr.printf "%.3f secs running, scanned %.3f KB/s.\n",
-	time, size / time / 1024
+$verbose and STDERR.printf "%.3f secs running, scanned %.3f KB/s.\n",
+  time, size / time / 1024
 exit 0
+  # vim: set et sw=2 ts=2:

data/config.save ADDED

@@ -0,0 +1,12 @@
+bin-dir=$prefix/bin
+site-ruby=$prefix/lib/ruby/site_ruby/1.8
+prefix=/usr/local/stow/ruby
+ruby-path=/usr/local/stow/ruby/bin/ruby
+make-prog=make
+rb-dir=$site-ruby
+without-ext=no
+ruby-prog=/usr/local/stow/ruby/bin/ruby
+site-ruby-common=$prefix/lib/ruby/site_ruby
+std-ruby=$prefix/lib/ruby/1.8
+data-dir=$prefix/share
+so-dir=$prefix/lib/ruby/site_ruby/1.8/i686-linux

data/ext/amatch.bundle ADDED

Binary file

data/ext/amatch.c CHANGED

@@ -1,312 +1,1388 @@
 #include "ruby.h"
+#include "pair.h"
-static VALUE cAmatch;
+/*
+ * Document-method: pattern
+ *
+ * call-seq: pattern -> pattern string
+ *
+ * Returns the current pattern string of this instance.
+ */
 /*
- * Vector stuff
+ * Document-method: pattern=
+ *
+ * call-seq: pattern=(pattern)
+ *
+ * Sets the current pattern string of this instance to <code>pattern</code>.
  */
-typedef struct {
-    int *ptr;
-    int len;
-} vector;
-static vector *
-vector_new(len)
-    int len;
-{
-    vector *v;
-    v = ALLOC(vector);
-    if (v == NULL) rb_raise(rb_eNoMemError, "couldn't malloc vector");
-    v->ptr = ALLOC_N(int, len + 1);
-    if (v->ptr == NULL) rb_raise(rb_eNoMemError, "couldn't malloc vector data");
-    v->len = len;
-    return v;
-}
-static void
-vector_print(v)
-    vector *v;
+static VALUE rb_mAmatch, rb_cLevenshtein, rb_cSellers, rb_cHamming,
+             rb_cPairDistance, rb_cLongestSubsequence, rb_cLongestSubstring;
+static ID id_split, id_to_f;
+#define GET_STRUCT(klass)                 \
+    klass *amatch;                        \
+    Data_Get_Struct(self, klass, amatch);
+#define DEF_ALLOCATOR(type)                                             \
+static type *type##_allocate()                                          \
+{                                                                       \
+    type *obj = ALLOC(type);                                            \
+    MEMZERO(obj, type, 1);                                              \
+    return obj;                                                         \
+}
+#define DEF_CONSTRUCTOR(klass, type)                                    \
+static VALUE rb_##klass##_s_allocate(VALUE klass2)                      \
+{                                                                       \
+    type *amatch = type##_allocate();                                   \
+    return Data_Wrap_Struct(klass2, NULL, rb_##klass##_free, amatch);   \
+}                                                                       \
+VALUE rb_##klass##_new(VALUE klass2, VALUE pattern)                     \
+{                                                                       \
+    VALUE obj = rb_##klass##_s_allocate(klass2);                        \
+    rb_##klass##_initialize(obj, pattern);                              \
+    return obj;                                                         \
+}
+#define DEF_RB_FREE(klass, type)                            \
+static void rb_##klass##_free(type *amatch)                 \
+{                                                           \
+    MEMZERO(amatch->pattern, char, amatch->pattern_len);    \
+    free(amatch->pattern);                                  \
+    MEMZERO(amatch, type, 1);                               \
+    free(amatch);                                           \
+}
+#define DEF_PATTERN_ACCESSOR(type)                              \
+static void type##_pattern_set(type *amatch, VALUE pattern)     \
+{                                                               \
+    Check_Type(pattern, T_STRING);                              \
+    free(amatch->pattern);                                      \
+    amatch->pattern_len = RSTRING(pattern)->len;                \
+    amatch->pattern = ALLOC_N(char, amatch->pattern_len);       \
+    MEMCPY(amatch->pattern, RSTRING(pattern)->ptr, char,        \
+        RSTRING(pattern)->len);                                 \
+}                                                               \
+static VALUE rb_##type##_pattern(VALUE self)                    \
+{                                                               \
+    GET_STRUCT(type)                                            \
+    return rb_str_new(amatch->pattern, amatch->pattern_len);    \
+}                                                               \
+static VALUE rb_##type##_pattern_set(VALUE self, VALUE pattern) \
+{                                                               \
+    GET_STRUCT(type)                                            \
+    type##_pattern_set(amatch, pattern);                        \
+    return Qnil;                                                \
+}
+#define DEF_ITERATE_STRINGS(type)                                   \
+static VALUE type##_iterate_strings(type *amatch, VALUE strings,     \
+    VALUE (*match_function) (type *amatch, VALUE strings))        \
+{                                                                   \
+    if (TYPE(strings) == T_STRING) {                                \
+        return match_function(amatch, strings);                     \
+    } else {                                                        \
+        Check_Type(strings, T_ARRAY);                               \
+        int i;                                                      \
+        VALUE result = rb_ary_new2(RARRAY(strings)->len);           \
+        for (i = 0; i < RARRAY(strings)->len; i++) {                \
+            VALUE string = rb_ary_entry(strings, i);                \
+            if (TYPE(string) != T_STRING) {                         \
+                rb_raise(rb_eTypeError,                             \
+                    "array has to contain only strings (%s given)", \
+                    NIL_P(string) ?                                 \
+                        "NilClass" :                                \
+                        rb_class2name(CLASS_OF(string)));           \
+            }                                                       \
+            rb_ary_push(result, match_function(amatch, string));    \
+        }                                                           \
+        return result;                                              \
+    }                                                               \
+}
+#define DEF_RB_READER(type, function, name, converter)              \
+VALUE function(VALUE self)                                          \
+{                                                                   \
+    GET_STRUCT(type)                                                \
+    return converter(amatch->name);                                 \
+}
+#define DEF_RB_WRITER(type, function, name, vtype, caster, converter, check)\
+VALUE function(VALUE self, VALUE value)                                 \
+{                                                                       \
+    vtype value_ ## vtype;                                              \
+    GET_STRUCT(type)                                                    \
+    caster(value);                                                      \
+    value_ ## vtype = converter(value);                                 \
+    if (!(value_ ## vtype check))                                       \
+        rb_raise(rb_eTypeError, "check of value " #check " failed");    \
+    amatch->name = value_ ## vtype;                                     \
+    return Qnil;                                                        \
+}
+#define CAST2FLOAT(obj) \
+    if (TYPE(obj) != T_FLOAT && rb_respond_to(obj, id_to_f))    \
+            obj = rb_funcall(obj, id_to_f, 0, 0);               \
+        else                                                    \
+            Check_Type(obj, T_FLOAT)
+#define FLOAT2C(obj) RFLOAT(obj)->value
+#define OPTIMIZE_TIME                                   \
+    if (amatch->pattern_len < RSTRING(string)->len) {   \
+        a_ptr = amatch->pattern;                        \
+        a_len = amatch->pattern_len;                    \
+        b_ptr = RSTRING(string)->ptr;                   \
+        b_len = RSTRING(string)->len;                   \
+    } else {                                            \
+        a_ptr = RSTRING(string)->ptr;                   \
+        a_len = RSTRING(string)->len;                   \
+        b_ptr = amatch->pattern;                        \
+        b_len = amatch->pattern_len;                    \
+    }
+#define DONT_OPTIMIZE                                   \
+        a_ptr = amatch->pattern;                        \
+        a_len = amatch->pattern_len;                    \
+        b_ptr = RSTRING(string)->ptr;                   \
+        b_len = RSTRING(string)->len;                   \
+/*
+ * C structures of the Amatch classes
+ */
+typedef struct GeneralStruct {
+    char        *pattern;
+    char        pattern_len;
+} General;
+DEF_ALLOCATOR(General)
+DEF_PATTERN_ACCESSOR(General)
+DEF_ITERATE_STRINGS(General)
+typedef struct SellersStruct {
+    char        *pattern;
+    char        pattern_len;
+    double      substitution;
+    double      deletion;
+    double      insertion;
+} Sellers;
+DEF_ALLOCATOR(Sellers)
+DEF_PATTERN_ACCESSOR(Sellers)
+DEF_ITERATE_STRINGS(Sellers)
+static void Sellers_reset_weights(Sellers *self)
 {
-    int i;
-    for(i = 0; i < v->len; i++) printf("%d", v->ptr[i]);
-    puts("");
+    self->substitution = 1.0;
+    self->deletion     = 1.0;
+    self->insertion    = 1.0;
 }
-static void
-vector_destroy(v)
-    vector *v;
+typedef struct PairDistanceStruct {
+    char        *pattern;
+    char        pattern_len;
+    PairArray   *pattern_pair_array;
+} PairDistance;
+DEF_ALLOCATOR(PairDistance)
+DEF_PATTERN_ACCESSOR(PairDistance)
+/*
+ * Levenshtein edit distances are computed here:
+ */
+#define COMPUTE_LEVENSHTEIN_DISTANCE                                        \
+    for (i = 1, c = 0, p = 1; i <= a_len; i++) {                            \
+        c = i % 2;                      /* current row */                   \
+        p = (i + 1) % 2;                /* previous row */                  \
+        v[c][0] = i;                    /* first column */                  \
+        for (j = 1; j <= b_len; j++) {                                      \
+            /* Bellman's principle of optimality: */                        \
+            weight = v[p][j - 1] + (a_ptr[i - 1] == b_ptr[j - 1] ? 0 : 1);  \
+            if (weight > v[p][j] + 1) {                                     \
+                 weight = v[p][j] + 1;                                      \
+            }                                                               \
+            if (weight > v[c][j - 1] + 1) {                                 \
+                weight = v[c][j - 1] + 1;                                   \
+            }                                                               \
+            v[c][j] = weight;                                               \
+        }                                                                   \
+        p = c;                                                              \
+        c = (c + 1) % 2;                                                    \
+    }
+static VALUE Levenshtein_match(General *amatch, VALUE string)
 {
-    xfree(v->ptr);
-    xfree(v);
+    VALUE result;
+    char *a_ptr, *b_ptr;
+    int a_len, b_len;
+    int *v[2], weight;
+    int  i, j, c, p;
+    Check_Type(string, T_STRING);
+    DONT_OPTIMIZE
+    v[0] = ALLOC_N(int, b_len + 1);
+    v[1] = ALLOC_N(int, b_len + 1);
+    for (i = 0; i <= b_len; i++) {
+        v[0][i] = i;
+        v[1][i] = i;
+    }
+    COMPUTE_LEVENSHTEIN_DISTANCE
+    result = INT2FIX(v[p][b_len]);
+    free(v[0]);
+    free(v[1]);
+    return result;
 }
-static int
-vector_minimum(v)
-    vector *v;
+static VALUE Levenshtein_similar(General *amatch, VALUE string)
 {
-    int i;
-    int min;
+    VALUE result;
+    char *a_ptr, *b_ptr;
+    int a_len, b_len;
+    int *v[2], weight;
+    int  i, j, c, p;
-    if (v->len == 0) return -1;
-    min = v->ptr[0];
-    for (i = 1; i <= v->len; i++) {
-        if (min > v->ptr[i]) min = v->ptr[i];
+    Check_Type(string, T_STRING);
+    DONT_OPTIMIZE
+    if (a_len == 0 && b_len == 0) return rb_float_new(1.0);
+    if (a_len == 0 || b_len == 0) return rb_float_new(0.0);
+    v[0] = ALLOC_N(int, b_len + 1);
+    v[1] = ALLOC_N(int, b_len + 1);
+    for (i = 0; i <= b_len; i++) {
+        v[0][i] = i;
+        v[1][i] = i;
+    }
+    COMPUTE_LEVENSHTEIN_DISTANCE
+    if (b_len > a_len) {
+        result = rb_float_new(1.0 - ((double) v[p][b_len]) / b_len);
+    } else {
+        result = rb_float_new(1.0 - ((double) v[p][b_len]) / a_len);
     }
-    return min;
+    free(v[0]);
+    free(v[1]);
+    return result;
 }
-static int
-vector_last(v)
-    vector *v;
+static VALUE Levenshtein_search(General *amatch, VALUE string)
 {
-    return v->ptr[v->len];
+    VALUE result;
+    char *a_ptr, *b_ptr;
+    int a_len, b_len;
+    int *v[2], weight, min;
+    int  i, j, c, p;
+    Check_Type(string, T_STRING);
+    DONT_OPTIMIZE
+    v[0] = ALLOC_N(int, b_len + 1);
+    v[1] = ALLOC_N(int, b_len + 1);
+    MEMZERO(v[0], int, b_len + 1);
+    MEMZERO(v[1], int, b_len + 1);
+    COMPUTE_LEVENSHTEIN_DISTANCE
+    for (i = 0, min = a_len; i <= b_len; i++) {
+        if (v[p][i] < min) min = v[p][i];
+    }
+    result = INT2FIX(min);
+    free(v[0]);
+    free(v[1]);
+    return result;
 }
 /*
- * Edit distances are calculated here
+ * Sellers edit distances are computed here:
  */
-enum { MATCH = 1, MATCHR, SEARCH, SEARCHR, COMPARE, COMPARER };
+#define COMPUTE_SELLERS_DISTANCE                                            \
+    for (i = 1, c = 0, p = 1; i <= a_len; i++) {                            \
+        c = i % 2;                      /* current row */                   \
+        p = (i + 1) % 2;                /* previous row */                  \
+        v[c][0] = i * amatch->deletion; /* first column */                  \
+        for (j = 1; j <= b_len; j++) {                                      \
+            /* Bellman's principle of optimality: */                        \
+            weight = v[p][j - 1] +                                          \
+                (a_ptr[i - 1] == b_ptr[j - 1] ? 0 : amatch->substitution);  \
+            if (weight > v[p][j] + amatch->insertion) {                     \
+                 weight = v[p][j] + amatch->insertion;                      \
+            }                                                               \
+            if (weight > v[c][j - 1] + amatch->deletion) {                  \
+                weight = v[c][j - 1] + amatch->deletion;                    \
+            }                                                               \
+            v[c][j] = weight;                                               \
+        }                                                                   \
+        p = c;                                                              \
+        c = (c + 1) % 2;                                                    \
+    }
-static int weight2int(weight, name)
-    VALUE weight;
-    char *name;
+static VALUE Sellers_match(Sellers *amatch, VALUE string)
 {
-    if (TYPE(weight) != T_FIXNUM) {
-        rb_raise(rb_eTypeError,
-            "value of weight %s has to be of type Fixnum (%s given)",
-            "subw", NIL_P(weight) ? "NilClass" : rb_class2name(CLASS_OF(weight)));
+    VALUE result;
+    char *a_ptr, *b_ptr;
+    int a_len, b_len;
+    double *v[2], weight;
+    int  i, j, c, p;
+    Check_Type(string, T_STRING);
+    DONT_OPTIMIZE
+    v[0] = ALLOC_N(double, b_len + 1);
+    v[1] = ALLOC_N(double, b_len + 1);
+    for (i = 0; i <= b_len; i++) {
+        v[0][i] = i * amatch->deletion;
+        v[1][i] = i * amatch->deletion;
     }
-    return FIX2INT(weight);
+    COMPUTE_SELLERS_DISTANCE
+    result = rb_float_new(v[p][b_len]);
+    free(v[0]);
+    free(v[1]);
+    return result;
 }
-static VALUE
-calculate_distance (self, string, mode)
-    VALUE self;
-    VALUE string;
-    char mode;
+static VALUE Sellers_similar(Sellers *amatch, VALUE string)
 {
-    VALUE pattern, tmp;
-    static VALUE result;
-    int pattern_len, string_len;
-    char *pattern_ptr, *string_ptr;
-    vector *v[2];
-    int weight, sw, dw, iw, i, j, tmpi;
-    int c = 0, p = 1;
+    VALUE result;
+    char *a_ptr, *b_ptr;
+    int a_len, b_len;
+    double *v[2], weight, max_weight;
+    int  i, j, c, p;
+    if (amatch->insertion >= amatch->deletion) {
+        if (amatch->substitution >= amatch->insertion) {
+            max_weight = amatch->substitution;
+        } else {
+            max_weight = amatch->insertion;
+        }
+    } else {
+        if (amatch->substitution >= amatch->deletion) {
+            max_weight = amatch->substitution;
+        } else {
+            max_weight = amatch->deletion;
+        }
+    }
     Check_Type(string, T_STRING);
-    string_ptr = RSTRING(string)->ptr;
-    string_len = RSTRING(string)->len;
+    DONT_OPTIMIZE
+    if (a_len == 0 && b_len == 0) return rb_float_new(1.0);
+    if (a_len == 0 || b_len == 0) return rb_float_new(0.0);
+    v[0] = ALLOC_N(double, b_len + 1);
+    v[1] = ALLOC_N(double, b_len + 1);
+    for (i = 0; i <= b_len; i++) {
+        v[0][i] = i * amatch->deletion;
+        v[1][i] = i * amatch->deletion;
+    }
-    pattern = rb_iv_get(self, "@pattern");
-    Check_Type(pattern, T_STRING);
-    pattern_ptr = RSTRING(pattern)->ptr;
-    pattern_len = RSTRING(pattern)->len;
+    COMPUTE_SELLERS_DISTANCE
-    sw = weight2int(rb_iv_get(self, "@subw"), "subw");
-    dw = weight2int(rb_iv_get(self, "@delw"), "delw");
-    iw = weight2int(rb_iv_get(self, "@insw"), "insw");
-    v[0] = vector_new(string_len);
-    switch (mode) {
-        case MATCH:
-        case MATCHR:
-        case COMPARE:
-        case COMPARER:
-            for (i = 0; i <= v[0]->len; i++) v[0]->ptr[i] = i * iw;
-            break;
-        case SEARCH:
-        case SEARCHR:
-            for (i = 0; i <= v[0]->len; i++) v[0]->ptr[i] = 0;
-            break;
-        default:
-            rb_raise(rb_eFatal, "unknown mode in calculate_distance");
+    if (b_len > a_len) {
+        result = rb_float_new(1.0 - v[p][b_len] / (b_len * max_weight));
+    } else {
+        result = rb_float_new(1.0 - v[p][b_len] / (a_len * max_weight));
     }
+    free(v[0]);
+    free(v[1]);
+    return result;
+}
+static VALUE Sellers_search(Sellers *amatch, VALUE string)
+{
+    VALUE result;
+    char *a_ptr, *b_ptr;
+    int a_len, b_len;
+    double *v[2], weight, min;
+    int  i, j, c, p;
+    Check_Type(string, T_STRING);
+    DONT_OPTIMIZE
+    v[0] = ALLOC_N(double, b_len + 1);
+    v[1] = ALLOC_N(double, b_len + 1);
+    MEMZERO(v[0], double, b_len + 1);
+    MEMZERO(v[1], double, b_len + 1);
+    COMPUTE_SELLERS_DISTANCE
+    for (i = 0, min = a_len; i <= b_len; i++) {
+        if (v[p][i] < min) min = v[p][i];
+    }
+    result = rb_float_new(min);
+    free(v[0]);
+    free(v[1]);
+    return result;
+}
+/*
+ * Pair distances are computed here:
+ */
-    v[1] = vector_new(string_len);
-    for (i = 1; i <= pattern_len; i++) {
-        c = i % 2;                /* current row */
-        p = (i - 1) % 2;          /* previous row */
-        v[c]->ptr[0] = i * dw;    /* first column */
-        for (j = 1; j <= string_len; j++) {
-            /* Bellman's principle of optimality: */
-            weight = v[p]->ptr[j - 1] +
-                (pattern_ptr[i - 1] == string_ptr[j - 1] ? 0 : sw);
-            if (weight > v[p]->ptr[j] + iw) weight = v[p]->ptr[j] + iw;
-            if (weight > v[c]->ptr[j - 1] + dw) weight = v[c]->ptr[j - 1] + dw;
-            v[c]->ptr[j] = weight;
+static VALUE PairDistance_match(
+        PairDistance *amatch, VALUE string, VALUE regexp, int use_regexp)
+{
+    double result;
+    VALUE tokens;
+    PairArray *pair_array;
+    Check_Type(string, T_STRING);
+    if (!NIL_P(regexp) || use_regexp) {
+        tokens = rb_funcall(
+            rb_str_new(amatch->pattern, amatch->pattern_len),
+            id_split, 1, regexp
+        );
+        if (!amatch->pattern_pair_array) {
+            amatch->pattern_pair_array = PairArray_new(tokens);
+        } else {
+            pair_array_reactivate(amatch->pattern_pair_array);
+        }
+        tokens = rb_funcall(string, id_split, 1, regexp);
+        pair_array = PairArray_new(tokens);
+    } else {
+        VALUE tmp = rb_str_new(amatch->pattern, amatch->pattern_len);
+        tokens = rb_ary_new4(1, &tmp);
+        if (!amatch->pattern_pair_array) {
+            amatch->pattern_pair_array = PairArray_new(tokens);
+        } else {
+            pair_array_reactivate(amatch->pattern_pair_array);
         }
+        tokens = rb_ary_new4(1, &string);
+        pair_array = PairArray_new(tokens);
     }
-    switch (mode) {
-        case MATCH:
-            result = INT2FIX(vector_last(v[c]));
-            break;
-        case MATCHR:
-            result = rb_float_new((double) vector_last(v[c]) / pattern_len);
-            break;
-        case SEARCH:
-            tmpi = vector_minimum(v[c]);
-            result = tmpi < 0 ? INT2FIX(pattern_len) : INT2FIX(tmpi);
-            break;
-        case SEARCHR:
-            tmpi = vector_minimum(v[c]);
-            result = rb_float_new( tmpi < 0 ? 1.0 : (double) tmpi / pattern_len);
-            break;
-        case COMPARE:
-            result = INT2FIX((string_len < pattern_len ? -1 : 1) *
-                vector_last(v[c]));
-            break;
-        case COMPARER:
-            result = rb_float_new((double)
-                (string_len < pattern_len ? -1 : 1)     *
-                vector_last(v[c]) / pattern_len);
-            break;
-        default:
-            rb_raise(rb_eFatal, "unknown mode in calculate_distance");
+    result = pair_array_match(amatch->pattern_pair_array, pair_array);
+    pair_array_destroy(pair_array);
+    return rb_float_new(result);
+}
+/*
+ * Hamming distances are computed here:
+ */
+#define COMPUTE_HAMMING_DISTANCE                            \
+    for (i = 0, result = b_len - a_len; i < a_len; i++) {   \
+        if (i >= b_len) {                                   \
+            result +=  a_len - b_len;                       \
+            break;                                          \
+        }                                                   \
+        if (b_ptr[i] != a_ptr[i]) result++;                 \
     }
-    vector_destroy(v[0]);
-    vector_destroy(v[1]);
-    return result;
+static VALUE Hamming_match(General *amatch, VALUE string)
+{
+    char *a_ptr, *b_ptr;
+    int a_len, b_len;
+    int i, result;
+    Check_Type(string, T_STRING);
+    OPTIMIZE_TIME
+    COMPUTE_HAMMING_DISTANCE
+    return INT2FIX(result);
+}
+static VALUE Hamming_similar(General *amatch, VALUE string)
+{
+    char *a_ptr, *b_ptr;
+    int a_len, b_len;
+    int i, result;
+    Check_Type(string, T_STRING);
+    OPTIMIZE_TIME
+    if (a_len == 0 && b_len == 0) return rb_float_new(1.0);
+    if (a_len == 0 || b_len == 0) return rb_float_new(0.0);
+    COMPUTE_HAMMING_DISTANCE
+    return rb_float_new(1.0 - ((double) result) / b_len);
+}
+/*
+ * Longest Common Subsequence computation
+ */
+#define COMPUTE_LONGEST_SUBSEQUENCE                         \
+    l[0] = ALLOC_N(int, b_len + 1);                         \
+    l[1] = ALLOC_N(int, b_len + 1);                         \
+    for (i = a_len, c = 0, p = 1; i >= 0; i--) {            \
+        for (j = b_len; j >= 0; j--) {                      \
+            if (i == a_len || j == b_len) {                 \
+                l[c][j] = 0;                                \
+            } else if (a_ptr[i] == b_ptr[j]) {              \
+                l[c][j] = 1 + l[p][j + 1];                  \
+            } else {                                        \
+                int x = l[p][j], y = l[c][j + 1];           \
+                if (x > y) l[c][j] = x; else l[c][j] = y;   \
+            }                                               \
+        }                                                   \
+        p = c;                                              \
+        c = (c + 1) % 2;                                    \
+    }                                                       \
+    result = l[p][0];                                       \
+    free(l[0]);                                             \
+    free(l[1]);
+static VALUE LongestSubsequence_match(General *amatch, VALUE string)
+{
+    char *a_ptr, *b_ptr;
+    int a_len, b_len;
+    int result, c, p, i, j, *l[2];
+    Check_Type(string, T_STRING);
+    OPTIMIZE_TIME
+    if (a_len == 0 || b_len == 0) return INT2FIX(0);
+    COMPUTE_LONGEST_SUBSEQUENCE
+    return INT2FIX(result);
+}
+static VALUE LongestSubsequence_similar(General *amatch, VALUE string)
+{
+    char *a_ptr, *b_ptr;
+    int a_len, b_len;
+    int result, c, p, i, j, *l[2];
+    Check_Type(string, T_STRING);
+    OPTIMIZE_TIME
+    if (a_len == 0 && b_len == 0) return rb_float_new(1.0);
+    if (a_len == 0 || b_len == 0) return rb_float_new(0.0);
+    COMPUTE_LONGEST_SUBSEQUENCE
+    return rb_float_new(((double) result) / b_len);
+}
+/*
+ * Longest Common Substring computation
+ */
+#define COMPUTE_LONGEST_SUBSTRING                           \
+    l[0] = ALLOC_N(int, b_len);                             \
+    MEMZERO(l[0], int, b_len);                              \
+    l[1] = ALLOC_N(int, b_len);                             \
+    MEMZERO(l[1], int, b_len);                              \
+    result = 0;                                             \
+    for (i = 0, c = 0, p = 1; i < a_len; i++) {             \
+        for (j = 0; j < b_len; j++) {                       \
+            if (a_ptr[i] == b_ptr[j]) {                     \
+                l[c][j] = j == 0 ? 1 : 1 + l[p][j - 1];     \
+                if (l[c][j] > result) result = l[c][j];     \
+            } else {                                        \
+                l[c][j] = 0;                                \
+            }                                               \
+        }                                                   \
+        p = c;                                              \
+        c = (c + 1) % 2;                                    \
+    }                                                       \
+    free(l[0]);                                             \
+    free(l[1]);
+static VALUE LongestSubstring_match(General *amatch, VALUE string)
+{
+    char *a_ptr, *b_ptr;
+    int a_len, b_len;
+    int result, c, p, i, j, *l[2];
+    Check_Type(string, T_STRING);
+    OPTIMIZE_TIME
+    if (a_len == 0 || b_len == 0) return INT2FIX(0);
+    COMPUTE_LONGEST_SUBSTRING
+    return INT2FIX(result);
+}
+static VALUE LongestSubstring_similar(General *amatch, VALUE string)
+{
+    char *a_ptr, *b_ptr;
+    int a_len, b_len;
+    int result, c, p, i, j, *l[2];
+    Check_Type(string, T_STRING);
+    OPTIMIZE_TIME
+    if (a_len == 0 && b_len == 0) return rb_float_new(1.0);
+    if (a_len == 0 || b_len == 0) return rb_float_new(0.0);
+    COMPUTE_LONGEST_SUBSTRING
+    return rb_float_new(((double) result) / b_len);
+}
+/*
+ * Ruby API
+ */
+ /*
+  * Document-class: Amatch::Levenshtein
+  *
+  * The Levenshtein edit distance is defined as the minimal costs involved to
+  * transform one string into another by using three elementary operations:
+  * deletion, insertion and substitution of a character. To transform "water"
+  * into "wine", for instance, you have to substitute "a" -> "i": "witer", "t"
+  * -> "n": "winer" and delete "r": "wine". The edit distance between "water"
+  * and "wine" is 3, because you have to apply three operations. The edit
+  * distance between "wine" and "wine" is 0 of course: no operation is
+  * necessary for the transformation -- they're already the same string. It's
+  * easy to see that more similar strings have smaller edit distances than
+  * strings that differ a lot.
+  */
+DEF_RB_FREE(Levenshtein, General)
+/*
+ * call-seq: new(pattern)
+ *
+ * Creates a new Amatch::Levenshtein instance from <code>pattern</code>.
+ */
+static VALUE rb_Levenshtein_initialize(VALUE self, VALUE pattern)
+{
+    GET_STRUCT(General)
+    General_pattern_set(amatch, pattern);
+    return self;
+}
+DEF_CONSTRUCTOR(Levenshtein, General)
+/*
+ * call-seq: match(strings) -> results
+ *
+ * Uses this Amatch::Levenshtein instance to match Amatch::Levenshtein#pattern
+ * against <code>strings</code>. It returns the number operations, the Sellers
+ * distance. <code>strings</code> has to be either a String or an Array of
+ * Strings. The returned <code>results</code> are either a Float or an Array of
+ * Floats respectively.
+ */
+static VALUE rb_Levenshtein_match(VALUE self, VALUE strings)
+{
+    GET_STRUCT(General)
+    return General_iterate_strings(amatch, strings, Levenshtein_match);
+}
+/*
+ * call-seq: similar(strings) -> results
+ *
+ * Uses this Amatch::Levenshtein instance to match  Amatch::Levenshtein#pattern
+ * against <code>strings</code>, and compute a Levenshtein distance metric
+ * number between 0.0 for very unsimilar strings and 1.0 for an exact match.
+ * <code>strings</code> has to be either a String or an Array of Strings. The
+ * returned <code>results</code> are either a Fixnum or an Array of Fixnums
+ * respectively.
+ */
+static VALUE rb_Levenshtein_similar(VALUE self, VALUE strings)
+{
+    GET_STRUCT(General)
+    return General_iterate_strings(amatch, strings, Levenshtein_similar);
+}
+/*
+ * call-seq: levenshtein_similar(strings) -> results
+ *
+ * If called on a String, this string is used as a Amatch::Levenshtein#pattern
+ * to match against <code>strings</code>. It returns a Levenshtein distance
+ * metric number between 0.0 for very unsimilar strings and 1.0 for an exact
+ * match. <code>strings</code> has to be either a String or an Array of
+ * Strings. The returned <code>results</code> are either a Float or an Array of
+ * Floats respectively.
+ */
+static VALUE rb_str_levenshtein_similar(VALUE self, VALUE strings)
+{
+    VALUE amatch = rb_Levenshtein_new(rb_cSellers, self);
+    return rb_Levenshtein_similar(amatch, strings);
+}
+/*
+ * call-seq: search(strings) -> results
+ *
+ * searches Amatch::Levenshtein#pattern in <code>strings</code> and returns the
+ * edit distance (the sum of character operations) as a Fixnum value, by greedy
+ * trimming prefixes or postfixes of the match. <code>strings</code> has
+ * to be either a String or an Array of Strings. The returned
+ * <code>results</code> are either a Float or an Array of Floats respectively.
+ */
+static VALUE rb_Levenshtein_search(VALUE self, VALUE strings)
+{
+    GET_STRUCT(General)
+    return General_iterate_strings(amatch, strings, Levenshtein_search);
+}
+/*
+ * Document-class: Amatch::Sellers
+ *
+ * The Sellers edit distance is very similar to the Levenshtein edit distance.
+ * The difference is, that you can also specify different weights for every
+ * operation to prefer special operations over others. This extension of the
+ * Sellers edit distance is also known under the names: Needleman-Wunsch
+ * distance.
+ */
+DEF_RB_FREE(Sellers, Sellers)
+/*
+ * Document-method: substitution
+ *
+ * call-seq: substitution -> weight
+ *
+ * Returns the weight of the substitution operation, that is used to compute
+ * the Sellers distance.
+ */
+DEF_RB_READER(Sellers, rb_Sellers_substitution, substitution,
+    rb_float_new)
+/*
+ * Document-method: deletion
+ *
+ * call-seq: deletion -> weight
+ *
+ * Returns the weight of the deletion operation, that is used to compute
+ * the Sellers distance.
+ */
+DEF_RB_READER(Sellers, rb_Sellers_deletion, deletion,
+    rb_float_new)
+/*
+ * Document-method: insertion
+ *
+ * call-seq: insertion -> weight
+ *
+ * Returns the weight of the insertion operation, that is used to compute
+ * the Sellers distance.
+ */
+DEF_RB_READER(Sellers, rb_Sellers_insertion, insertion,
+    rb_float_new)
+/*
+ * Document-method: substitution=
+ *
+ * call-seq: substitution=(weight)
+ *
+ * Sets the weight of the substitution operation, that is used to compute
+ * the Sellers distance, to <code>weight</code>. The <code>weight</code>
+ * should be a Float value >= 0.0.
+ */
+DEF_RB_WRITER(Sellers, rb_Sellers_substitution_set, substitution,
+    double, CAST2FLOAT, FLOAT2C, >= 0)
+/*
+ * Document-method: deletion=
+ *
+ * call-seq: deletion=(weight)
+ *
+ * Sets the weight of the deletion operation, that is used to compute
+ * the Sellers distance, to <code>weight</code>. The <code>weight</code>
+ * should be a Float value >= 0.0.
+ */
+DEF_RB_WRITER(Sellers, rb_Sellers_deletion_set, deletion,
+    double, CAST2FLOAT, FLOAT2C, >= 0)
+/*
+ * Document-method: insertion=
+ *
+ * call-seq: insertion=(weight)
+ *
+ * Sets the weight of the insertion operation, that is used to compute
+ * the Sellers distance, to <code>weight</code>. The <code>weight</code>
+ * should be a Float value >= 0.0.
+ */
+DEF_RB_WRITER(Sellers, rb_Sellers_insertion_set, insertion,
+    double, CAST2FLOAT, FLOAT2C, >= 0)
+/*
+ * Resets all weights (substitution, deletion, and insertion) to 1.0.
+ */
+static VALUE rb_Sellers_reset_weights(VALUE self)
+{
+    GET_STRUCT(Sellers)
+    Sellers_reset_weights(amatch);
+    return self;
 }
-static VALUE
-handle_strings(self, strings, mode)
-    VALUE self;
-    VALUE strings;
-    char mode;
+/*
+ * call-seq: new(pattern)
+ *
+ * Creates a new Amatch::Sellers instance from <code>pattern</code>,
+ * with all weights initially set to 1.0.
+ */
+static VALUE rb_Sellers_initialize(VALUE self, VALUE pattern)
 {
-    if (TYPE(strings) == T_ARRAY) {
+    GET_STRUCT(Sellers)
+    Sellers_pattern_set(amatch, pattern);
+    Sellers_reset_weights(amatch);
+    return self;
+}
+DEF_CONSTRUCTOR(Sellers, Sellers)
+/*
+ * Document-method: pattern
+ *
+ * call-seq: pattern -> pattern string
+ *
+ * Returns the current pattern string of this Amatch::Sellers instance.
+ */
+/*
+ * Document-method: pattern=
+ *
+ * call-seq: pattern=(pattern)
+ *
+ * Sets the current pattern string of this Amatch::Sellers instance to
+ * <code>pattern</code>.
+ */
+/*
+ * call-seq: match(strings) -> results
+ *
+ * Uses this Amatch::Sellers instance to match Sellers#pattern against
+ * <code>strings</code>, while taking into account the given weights. It
+ * returns the number of weighted character operations, the Sellers distance.
+ * <code>strings</code> has to be either a String or an Array of Strings. The
+ * returned <code>results</code> are either a Float or an Array of Floats
+ * respectively.
+ */
+static VALUE rb_Sellers_match(VALUE self, VALUE strings)
+{
+    GET_STRUCT(Sellers)
+    return Sellers_iterate_strings(amatch, strings, Sellers_match);
+}
+/*
+ * call-seq: similar(strings) -> results
+ *
+ * Uses this Amatch::Sellers instance to match Amatch::Sellers#pattern
+ * against <code>strings</code> (taking into account the given weights), and
+ * compute a Sellers distance metric number between 0.0 for very unsimilar
+ * strings and 1.0 for an exact match. <code>strings</code> has to be either a
+ * String or an Array of Strings. The returned <code>results</code> are either
+ * a Fixnum or an Array of Fixnums
+ * respectively.
+ */
+static VALUE rb_Sellers_similar(VALUE self, VALUE strings)
+{
+    GET_STRUCT(Sellers)
+    return Sellers_iterate_strings(amatch, strings, Sellers_similar);
+}
+/*
+ * call-seq: search(strings) -> results
+ *
+ * searches Sellers#pattern in <code>strings</code> and returns the edit
+ * distance (the sum of weighted character operations) as a Float value, by
+ * greedy trimming prefixes or postfixes of the match. <code>strings</code> has
+ * to be either a String or an Array of Strings. The returned
+ * <code>results</code> are either a Float or an Array of Floats respectively.
+ */
+static VALUE rb_Sellers_search(VALUE self, VALUE strings)
+{
+    GET_STRUCT(Sellers)
+    return Sellers_iterate_strings(amatch, strings, Sellers_search);
+}
+/*
+ * Document-class: Amatch::PairDistance
+ *
+ * The pair distance between two strings is based on the number of adjacent
+ * character pairs, that are contained in both strings. The similiarity
+ * metric of two strings s1 and s2 is
+ *   2*|union(pairs(s1), pairs(s2))| / |pairs(s1)| + |pairs(s2)|
+ * If it is 1.0 the two strings are an exact match, if less than 1.0 they
+ * are more dissimilar. The advantage of considering adjacent characters, is to
+ * take account not only of the characters, but also of the character ordering
+ * in the original strings.
+ *
+ * This metric is very capable to find similarities in natural languages.
+ * It is explained in more detail in Simon White's article "How to Strike a
+ * Match", located at this url:
+ * http://www.catalysoft.com/articles/StrikeAMatch.html
+ * It is also very similar (a special case) to the method described under
+ * http://citeseer.lcs.mit.edu/gravano01using.html in "Using q-grams in a DBMS
+ * for Approximate String Processing."
+ */
+DEF_RB_FREE(PairDistance, PairDistance)
+/*
+ * call-seq: new(pattern)
+ *
+ * Creates a new Amatch::PairDistance instance from <code>pattern</code>.
+ */
+static VALUE rb_PairDistance_initialize(VALUE self, VALUE pattern)
+{
+    GET_STRUCT(PairDistance)
+    PairDistance_pattern_set(amatch, pattern);
+    return self;
+}
+DEF_CONSTRUCTOR(PairDistance, PairDistance)
+/*
+ * call-seq: match(strings, regexp = /\s+/) -> results
+ *
+ * Uses this Amatch::PairDistance instance to match  PairDistance#pattern against
+ * <code>strings</code>. It returns the pair distance measure, that is a
+ * returned value of 1.0 is an exact match, partial matches are lower
+ * values, while 0.0 means no match at all.
+ *
+ * <code>strings</code> has to be either a String or an
+ * Array of Strings. The argument <code>regexp</code> is used to split the
+ * pattern and strings into tokens first. It defaults to /\s+/. If the
+ * splitting should be omitted, call the method with nil as <code>regexp</code>
+ * explicitly.
+ *
+ * The returned <code>results</code> are either a Float or an
+ * Array of Floats respectively.
+ */
+static VALUE rb_PairDistance_match(int argc, VALUE *argv, VALUE self)
+{
+    VALUE result, strings, regexp = Qnil;
+    int use_regexp;
+    GET_STRUCT(PairDistance)
+    rb_scan_args(argc, argv, "11", &strings, &regexp);
+    use_regexp = NIL_P(regexp) && argc != 2;
+    if (TYPE(strings) == T_STRING) {
+        result = PairDistance_match(amatch, strings, regexp, use_regexp);
+    } else {
+        Check_Type(strings, T_ARRAY);
         int i;
-        VALUE result = rb_ary_new2(RARRAY(strings)->len);
+        result = rb_ary_new2(RARRAY(strings)->len);
         for (i = 0; i < RARRAY(strings)->len; i++) {
             VALUE string = rb_ary_entry(strings, i);
             if (TYPE(string) != T_STRING) {
                 rb_raise(rb_eTypeError,
                     "array has to contain only strings (%s given)",
-                    NIL_P(string) ? "NilClass" :
-                                    rb_class2name(CLASS_OF(string)));
+                    NIL_P(string) ?
+                        "NilClass" :
+                        rb_class2name(CLASS_OF(string)));
             }
-            rb_ary_push(result, calculate_distance(self, string, mode));
+            rb_ary_push(result,
+                PairDistance_match(amatch, string, regexp, use_regexp));
         }
-        return result;
-    } else if (TYPE(strings) == T_STRING) {
-        return calculate_distance(self, strings, mode);
-    } else {
-        rb_raise(rb_eTypeError,
-            "value of strings needs to be string or array (%s given)",
-            NIL_P(strings) ? "NilClass" : rb_class2name(CLASS_OF(strings)));
     }
+    pair_array_destroy(amatch->pattern_pair_array);
+    amatch->pattern_pair_array = NULL;
+    return result;
 }
 /*
- * Ruby API
+ * call-seq: pair_distance_similar(strings) -> results
+ *
+ * If called on a String, this string is used as a Amatch::PairDistance#pattern
+ * to match against <code>strings</code> using /\s+/ as the tokenizing regular
+ * expression. It returns a pair distance metric number between 0.0 for very
+ * unsimilar strings and 1.0 for an exact match. <code>strings</code> has to be
+ * either a String or an Array of Strings. The returned <code>results</code>
+ * are either a Float or an Array of Floats respectively.
  */
-static VALUE
-rb_amatch_resetw(self)
-    VALUE self;
+static VALUE rb_str_pair_distance_similar(VALUE self, VALUE strings)
 {
-    rb_iv_set(self, "@subw", INT2FIX(1));
-    rb_iv_set(self, "@delw", INT2FIX(1));
-    rb_iv_set(self, "@insw", INT2FIX(1));
-    return Qtrue;
+    VALUE amatch = rb_PairDistance_new(rb_cSellers, self);
+    return rb_PairDistance_match(1, &strings, amatch);
 }
-static VALUE
-rb_amatch_initialize(self, pattern)
-    VALUE self;
-    VALUE pattern;
-{
+/*
+ * Document-class: Amatch::Hamming
+ *
+ *  This class computes the Hamming distance between two strings.
+ *
+ *  The Hamming distance between two strings is the number of characters, that
+ *  are different. Thus a hamming distance of 0 means an exact
+ *  match, a hamming distance of 1 means one character is different, and so on.
+ *  If one string is longer than the other string, the missing characters are
+ *  counted as different characters.
+ */
+DEF_RB_FREE(Hamming, General)
-    Check_Type(pattern, T_STRING);
-    rb_iv_set(self, "@pattern", pattern);
-    rb_amatch_resetw(self);
+/*
+ * call-seq: new(pattern)
+ *
+ * Creates a new Amatch::Hamming instance from <code>pattern</code>.
+ */
+static VALUE rb_Hamming_initialize(VALUE self, VALUE pattern)
+{
+    GET_STRUCT(General)
+    General_pattern_set(amatch, pattern);
     return self;
 }
-static VALUE
-rb_amatch_pattern_is(self, pattern)
-    VALUE self;
-    VALUE pattern;
-{
-    Check_Type(pattern, T_STRING);
-    rb_iv_set(self, "@pattern", pattern);
+DEF_CONSTRUCTOR(Hamming, General)
-    return pattern;
+/*
+ * call-seq: match(strings) -> results
+ *
+ * Uses this Amatch::Hamming instance to match Amatch::Hamming#pattern against
+ * <code>strings</code>, that is compute the hamming distance between
+ * <code>pattern</code> and <code>strings</code>. <code>strings</code> has to
+ * be either a String or an Array of Strings. The returned <code>results</code>
+ * are either a Fixnum or an Array of Fixnums respectively.
+ */
+static VALUE rb_Hamming_match(VALUE self, VALUE strings)
+{
+    GET_STRUCT(General)
+    return General_iterate_strings(amatch, strings, Hamming_match);
 }
+/*
+ * call-seq: similar(strings) -> results
+ *
+ * Uses this Amatch::Hamming instance to match  Amatch::Hamming#pattern against
+ * <code>strings</code>, and compute a Hamming distance metric number between
+ * 0.0 for very unsimilar strings and 1.0 for an exact match.
+ * <code>strings</code> has to be either a String or an Array of Strings. The
+ * returned <code>results</code> are either a Fixnum or an Array of Fixnums
+ * respectively.
+ */
+static VALUE rb_Hamming_similar(VALUE self, VALUE strings)
+{
+    GET_STRUCT(General)
+    return General_iterate_strings(amatch, strings, Hamming_similar);
+}
-static VALUE
-rb_amatch_match(self, strings)
-    VALUE self;
-    VALUE strings;
+/*
+ * call-seq: hamming_similar(strings) -> results
+ *
+ * If called on a String, this string is used as a Amatch::Hamming#pattern to
+ * match against <code>strings</code>. It returns a Hamming distance metric
+ * number between 0.0 for very unsimilar strings and 1.0 for an exact match.
+ * <code>strings</code>
+ * has to be either a String or an Array of Strings. The returned
+ * <code>results</code> are either a Float or an Array of Floats respectively.
+ */
+static VALUE rb_str_hamming_similar(VALUE self, VALUE strings)
 {
-    return handle_strings(self, strings, MATCH);
+    VALUE amatch = rb_Hamming_new(rb_cHamming, self);
+    return rb_Hamming_similar(amatch, strings);
 }
-static VALUE
-rb_amatch_matchr(self, strings)
-    VALUE self;
-    VALUE strings;
+/*
+ * Document-class: Amatch::LongestSubsequence
+ *
+ *  This class computes the length of the longest subsequence common to two
+ *  strings. A subsequence doesn't have to be contiguous. The longer the common
+ *  subsequence is, the more similar the two strings will be.
+ *
+ *  The longest common subsequence between "test" and "test" is of length 4,
+ *  because "test" itself is this subsequence. The longest common subsequence
+ *  between "test" and "east" is "e", "s", "t" and the length of the
+ *  sequence is 3.
+ */
+DEF_RB_FREE(LongestSubsequence, General)
+/*
+ * call-seq: new(pattern)
+ *
+ * Creates a new Amatch::LongestSubsequence instance from <code>pattern</code>.
+ */
+static VALUE rb_LongestSubsequence_initialize(VALUE self, VALUE pattern)
 {
-    return handle_strings(self, strings, MATCHR);
+    GET_STRUCT(General)
+    General_pattern_set(amatch, pattern);
+    return self;
 }
-static VALUE
-rb_amatch_compare(self, strings)
-    VALUE self;
-    VALUE strings;
-{
-    return handle_strings(self, strings, COMPARE);
+DEF_CONSTRUCTOR(LongestSubsequence, General)
+/*
+ * call-seq: match(strings) -> results
+ *
+ * Uses this Amatch::LongestSubsequence instance to match
+ * LongestSubsequence#pattern against <code>strings</code>, that is compute the
+ * length of the longest common subsequence. <code>strings</code> has to be
+ * either a String or an Array of Strings. The returned <code>results</code>
+ * are either a Fixnum or an Array of Fixnums respectively.
+ */
+static VALUE rb_LongestSubsequence_match(VALUE self, VALUE strings)
+{
+    GET_STRUCT(General)
+    return General_iterate_strings(amatch, strings, LongestSubsequence_match);
 }
-static VALUE
-rb_amatch_comparer(self, strings)
-    VALUE self;
-    VALUE strings;
+/*
+ * call-seq: similar(strings) -> results
+ *
+ * Uses this Amatch::LongestSubsequence instance to match
+ * Amatch::LongestSubsequence#pattern against <code>strings</code>, and compute
+ * a longest substring distance metric number between 0.0 for very unsimilar
+ * strings and 1.0 for an exact match. <code>strings</code> has to be either a
+ * String or an Array of Strings. The returned <code>results</code> are either
+ * a Fixnum or an Array of Fixnums
+ */
+static VALUE rb_LongestSubsequence_similar(VALUE self, VALUE strings)
+{
+    GET_STRUCT(General)
+    return General_iterate_strings(amatch, strings, LongestSubsequence_similar);
+}
+/*
+ * call-seq: longest_subsequence_similar(strings) -> results
+ *
+ * If called on a String, this string is used as a
+ * Amatch::LongestSubsequence#pattern to match against <code>strings</code>. It
+ * returns a longest subsequence distance metric number between 0.0 for very
+ * unsimilar strings and 1.0 for an exact match. <code>strings</code> has to be
+ * either a String or an Array of Strings. The returned <code>results</code>
+ * are either a Float or an Array of Floats respectively.
+ */
+static VALUE rb_str_longest_subsequence_similar(VALUE self, VALUE strings)
+{
+    VALUE amatch = rb_LongestSubsequence_new(rb_cSellers, self);
+    return rb_LongestSubsequence_similar(amatch, strings);
+}
+/*
+ * Document-class: Amatch::LongestSubstring
+ *
+ * The longest common substring is the longest substring, that is part of
+ * two strings. A substring is contiguous, while a subsequence need not to
+ * be. The longer the common substring is, the more similar the two strings
+ * will be.
+ *
+ * The longest common substring between 'string' and 'string' is 'string'
+ * again, thus the longest common substring length is 6. The longest common
+ * substring between 'string' and 'storing' is 'ring', thus the longest common
+ * substring length is 4.
+ */
+DEF_RB_FREE(LongestSubstring, General)
+/*
+ * call-seq: new(pattern)
+ *
+ * Creates a new Amatch::LongestSubstring instance from <code>pattern</code>.
+ */
+static VALUE rb_LongestSubstring_initialize(VALUE self, VALUE pattern)
 {
-    return handle_strings(self, strings, COMPARER);
+    GET_STRUCT(General)
+    General_pattern_set(amatch, pattern);
+    return self;
 }
+DEF_CONSTRUCTOR(LongestSubstring, General)
-static VALUE
-rb_amatch_search(self, strings)
-    VALUE self;
-    VALUE strings;
+/*
+ * call-seq: match(strings) -> results
+ *
+ * Uses this Amatch::LongestSubstring instance to match
+ * LongestSubstring#pattern against <code>strings</code>, that is compute the
+ * length of the longest common substring. <code>strings</code> has to be
+ * either a String or an Array of Strings. The returned <code>results</code>
+ * are either a Fixnum or an Array of Fixnums respectively.
+ */
+static VALUE rb_LongestSubstring_match(VALUE self, VALUE strings)
 {
-    return handle_strings(self, strings, SEARCH);
+    GET_STRUCT(General)
+    return General_iterate_strings(amatch, strings, LongestSubstring_match);
 }
-static VALUE
-rb_amatch_searchr(self, strings)
-    VALUE self;
-    VALUE strings;
+/*
+ * call-seq: similar(strings) -> results
+ *
+ * Uses this Amatch::LongestSubstring instance to match
+ * Amatch::LongestSubstring#pattern against <code>strings</code>, and compute a
+ * longest substring distance metric number between 0.0 for very unsimilar
+ * strings and 1.0 for an exact match. <code>strings</code> has to be either a
+ * String or an Array of Strings. The returned <code>results</code> are either
+ * a Fixnum or an Array of Fixnums
+ * respectively.
+ */
+static VALUE rb_LongestSubstring_similar(VALUE self, VALUE strings)
 {
-    return handle_strings(self, strings, SEARCHR);
+    GET_STRUCT(General)
+    return General_iterate_strings(amatch, strings, LongestSubstring_similar);
 }
-void
-Init_amatch()
+/*
+ * call-seq: longest_substring_similar(strings) -> results
+ *
+ * If called on a String, this string is used as a
+ * Amatch::LongestSubstring#pattern to match against <code>strings</code>. It
+ * returns a longest substring distance metric number between 0.0 for very
+ * unsimilar strings and 1.0 for an exact match. <code>strings</code> has to be
+ * either a String or an Array of Strings. The returned <code>results</code>
+ * are either a Float or an Array of Floats respectively.
+ */
+static VALUE rb_str_longest_substring_similar(VALUE self, VALUE strings)
+{
+    VALUE amatch = rb_LongestSubsequence_new(rb_cSellers, self);
+    return rb_LongestSubstring_similar(amatch, strings);
+}
+/*
+ * = amatch - Approximate Matching Extension for Ruby
+ *
+ * == Description
+ *
+ * This is a collection of classes that can be used for Approximate
+ * matching, searching, and comparing of Strings. They implement algorithms
+ * that compute the Levenshtein edit distance, Sellers edit distance, the
+ * Hamming distance, the longest common subsequence length, the longest common
+ * substring length, and the pair distance metric.
+ *
+ * == Author
+ *
+ * Florian Frank mailto:flori@ping.de
+ *
+ * == License
+ *
+ * This is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU General Public License Version 2 as published by
+ * the Free Software Foundation: http://www.gnu.org/copyleft/gpl.html
+ *
+ * == Download
+ *
+ * The latest version of <b>amatch</b> can be found at
+ *
+ * * http://rubyforge.org/frs/?group_id=390
+ *
+ * Online Documentation should be located at
+ *
+ * * http://amatch.rubyforge.org
+ *
+ * == Examples
+ *  require 'amatch'
+ *  # => true
+ *  include Amatch
+ *  # => Object
+ *
+ *  m = Sellers.new("pattern")
+ *  # => #<Amatch::Sellers:0x40366324>
+ *  m.match("pattren")
+ *  # => 2.0
+ *  m.substitution = m.insertion = 3
+ *  # => 3
+ *  m.match("pattren")
+ *  # => 4.0
+ *  m.reset_weights
+ *  # => #<Amatch::Sellers:0x40366324>
+ *  m.match(["pattren","parent"])
+ *  # => [2.0, 4.0]
+ *  m.search("abcpattrendef")
+ *  # => 2.0
+ *
+ *  m = Levenshtein.new("pattern")
+ *  # => #<Amatch::Levenshtein:0x4035919c>
+ *  m.match("pattren")
+ *  # => 2
+ *  m.search("abcpattrendef")
+ *  # => 2
+ *  "pattern language".levenshtein_similar("language of patterns")
+ *  # => 0.2
+ *
+ *  m = Hamming.new("pattern")
+ *  # => #<Amatch::Hamming:0x40350858>
+ *  m.match("pattren")
+ *  # => 2
+ *  "pattern language".hamming_similar("language of patterns")
+ *  # => 0.1
+ *
+ *  m = PairDistance.new("pattern")
+ *  # => #<Amatch::PairDistance:0x40349be8>
+ *  m.match("pattr en")
+ *  # => 0.545454545454545
+ *  m.match("pattr en", nil)
+ *  # => 0.461538461538462
+ *  m.match("pattr en", /t+/)
+ *  # => 0.285714285714286
+ *  "pattern language".pair_distance_similar("language of patterns")
+ *  # => 0.928571428571429
+ *
+ *  m = LongestSubsequence.new("pattern")
+ *  # => #<Amatch::LongestSubsequence:0x4033e900>
+ *  m.match("pattren")
+ *  # => 6
+ *  "pattern language".longest_subsequence_similar("language of patterns")
+ *  # => 0.4
+ *
+ *  m = LongestSubstring.new("pattern")
+ *  # => #<Amatch::LongestSubstring:0x403378d0>
+ *  m.match("pattren")
+ *  # => 4
+ *  "pattern language".longest_substring_similar("language of patterns")
+ *  # => 0.4
+ *
+ */
+void Init_amatch()
 {
-    cAmatch = rb_define_class("Amatch", rb_cObject);
-    rb_define_method(cAmatch, "initialize", rb_amatch_initialize, 1);
-    rb_define_attr(cAmatch, "debug", 1, 1);
-    rb_define_attr(cAmatch, "subw", 1, 1);
-    rb_define_attr(cAmatch, "delw", 1, 1);
-    rb_define_attr(cAmatch, "insw", 1, 1);
-    rb_define_method(cAmatch, "resetw", rb_amatch_resetw, 0);
-    rb_define_method(cAmatch, "pattern=", rb_amatch_pattern_is, 1);
-    rb_define_attr(cAmatch, "pattern", 1, 0);
-    rb_define_method(cAmatch, "match", rb_amatch_match, 1);
-    rb_define_method(cAmatch, "matchr", rb_amatch_matchr, 1);
-    rb_define_method(cAmatch, "compare", rb_amatch_compare, 1);
-    rb_define_method(cAmatch, "comparer", rb_amatch_comparer, 1);
-    rb_define_method(cAmatch, "search", rb_amatch_search, 1);
-    rb_define_method(cAmatch, "searchr", rb_amatch_searchr, 1);
+    rb_mAmatch = rb_define_module("Amatch");
+    /* Levenshtein */
+    rb_cLevenshtein = rb_define_class_under(rb_mAmatch, "Levenshtein", rb_cObject);
+    rb_define_alloc_func(rb_cLevenshtein, rb_Levenshtein_s_allocate);
+    rb_define_method(rb_cLevenshtein, "initialize", rb_Levenshtein_initialize, 1);
+    rb_define_method(rb_cLevenshtein, "pattern", rb_General_pattern, 0);
+    rb_define_method(rb_cLevenshtein, "pattern=", rb_General_pattern_set, 1);
+    rb_define_method(rb_cLevenshtein, "match", rb_Levenshtein_match, 1);
+    rb_define_method(rb_cLevenshtein, "search", rb_Levenshtein_search, 1);
+    rb_define_method(rb_cLevenshtein, "similar", rb_Levenshtein_similar, 1);
+    rb_define_method(rb_cString, "levenshtein_similar", rb_str_levenshtein_similar, 1);
+    /* Sellers */
+    rb_cSellers = rb_define_class_under(rb_mAmatch, "Sellers", rb_cObject);
+    rb_define_alloc_func(rb_cSellers, rb_Sellers_s_allocate);
+    rb_define_method(rb_cSellers, "initialize", rb_Sellers_initialize, 1);
+    rb_define_method(rb_cSellers, "pattern", rb_Sellers_pattern, 0);
+    rb_define_method(rb_cSellers, "pattern=", rb_Sellers_pattern_set, 1);
+    rb_define_method(rb_cSellers, "substitution", rb_Sellers_substitution, 0);
+    rb_define_method(rb_cSellers, "substitution=", rb_Sellers_substitution_set, 1);
+    rb_define_method(rb_cSellers, "deletion", rb_Sellers_deletion, 0);
+    rb_define_method(rb_cSellers, "deletion=", rb_Sellers_deletion_set, 1);
+    rb_define_method(rb_cSellers, "insertion", rb_Sellers_insertion, 0);
+    rb_define_method(rb_cSellers, "insertion=", rb_Sellers_insertion_set, 1);
+    rb_define_method(rb_cSellers, "reset_weights", rb_Sellers_reset_weights, 0);
+    rb_define_method(rb_cSellers, "match", rb_Sellers_match, 1);
+    rb_define_method(rb_cSellers, "search", rb_Sellers_search, 1);
+    rb_define_method(rb_cSellers, "similar", rb_Sellers_similar, 1);
+    /* Hamming */
+    rb_cHamming = rb_define_class_under(rb_mAmatch, "Hamming", rb_cObject);
+    rb_define_alloc_func(rb_cHamming, rb_Hamming_s_allocate);
+    rb_define_method(rb_cHamming, "initialize", rb_Hamming_initialize, 1);
+    rb_define_method(rb_cHamming, "pattern", rb_General_pattern, 0);
+    rb_define_method(rb_cHamming, "pattern=", rb_General_pattern_set, 1);
+    rb_define_method(rb_cHamming, "match", rb_Hamming_match, 1);
+    rb_define_method(rb_cHamming, "similar", rb_Hamming_similar, 1);
+    rb_define_method(rb_cString, "hamming_similar", rb_str_hamming_similar, 1);
+    /* Pair Distance Metric */
+    rb_cPairDistance = rb_define_class_under(rb_mAmatch, "PairDistance", rb_cObject);
+    rb_define_alloc_func(rb_cPairDistance, rb_PairDistance_s_allocate);
+    rb_define_method(rb_cPairDistance, "initialize", rb_PairDistance_initialize, 1);
+    rb_define_method(rb_cPairDistance, "pattern", rb_PairDistance_pattern, 0);
+    rb_define_method(rb_cPairDistance, "pattern=", rb_PairDistance_pattern_set, 1);
+    rb_define_method(rb_cPairDistance, "match", rb_PairDistance_match, -1);
+    rb_define_alias(rb_cPairDistance, "similar", "match");
+    rb_define_method(rb_cString, "pair_distance_similar", rb_str_pair_distance_similar, 1);
+    /* Longest Common Subsequence */
+    rb_cLongestSubsequence = rb_define_class_under(rb_mAmatch, "LongestSubsequence", rb_cObject);
+    rb_define_alloc_func(rb_cLongestSubsequence, rb_LongestSubsequence_s_allocate);
+    rb_define_method(rb_cLongestSubsequence, "initialize", rb_LongestSubsequence_initialize, 1);
+    rb_define_method(rb_cLongestSubsequence, "pattern", rb_General_pattern, 0);
+    rb_define_method(rb_cLongestSubsequence, "pattern=", rb_General_pattern_set, 1);
+    rb_define_method(rb_cLongestSubsequence, "match", rb_LongestSubsequence_match, 1);
+    rb_define_method(rb_cLongestSubsequence, "similar", rb_LongestSubsequence_similar, 1);
+    rb_define_method(rb_cString, "longest_subsequence_similar", rb_str_longest_subsequence_similar, 1);
+    /* Longest Common Substring */
+    rb_cLongestSubstring = rb_define_class_under(rb_mAmatch, "LongestSubstring", rb_cObject);
+    rb_define_alloc_func(rb_cLongestSubstring, rb_LongestSubstring_s_allocate);
+    rb_define_method(rb_cLongestSubstring, "initialize", rb_LongestSubstring_initialize, 1);
+    rb_define_method(rb_cLongestSubstring, "pattern", rb_General_pattern, 0);
+    rb_define_method(rb_cLongestSubstring, "pattern=", rb_General_pattern_set, 1);
+    rb_define_method(rb_cLongestSubstring, "match", rb_LongestSubstring_match, 1);
+    rb_define_method(rb_cLongestSubstring, "similar", rb_LongestSubstring_similar, 1);
+    rb_define_method(rb_cString, "longest_substring_similar", rb_str_longest_substring_similar, 1);
+    id_split = rb_intern("split");
+    id_to_f = rb_intern("to_f");
 }
     /* vim: set et cin sw=4 ts=4: */