RubyGems - levenshtein - Versions diffs - 0.2.1 → 0.2.2 - Mend

levenshtein 0.2.1 → 0.2.2

Files changed (13) hide show

data/CHANGELOG +5 -1
data/README +10 -7
data/VERSION +1 -1
data/ext/levenshtein/levenshtein_fast.c +113 -9
data/lib/levenshtein.rb +80 -49
data/lib/levenshtein/version.rb +3 -1
data/test/test.rb +56 -39
metadata +22 -44
data/ext/levenshtein/levenshtein_array.c +0 -130
data/ext/levenshtein/levenshtein_array_of_strings.c +0 -130
data/ext/levenshtein/levenshtein_generic.c +0 -133
data/ext/levenshtein/levenshtein_string.c +0 -138
data/lib/levenshtein/exception.rb +0 -4

data/CHANGELOG CHANGED

@@ -1,4 +1,8 @@
-0.2.1 (11-02-2012)
+0.2.2 (16-03-2012)
+* Simplified code.
+0.2.1 (11-03-2012)
 * Better memory handling.

data/README CHANGED

@@ -1,12 +1,15 @@
-The Levenshtein distance is a metric for measuring the amount of difference
-between two sequences (i.e., the so called edit distance). The Levenshtein
-distance between two sequences is given by the minimum number of operations
-needed to transform one sequence into the other, where an operation is an
+The Levenshtein distance is a metric for measuring the amount
+of difference between two sequences (i.e., the so called edit
+distance). The Levenshtein distance between two sequences is
+given by the minimum number of operations needed to transform
+one sequence into the other, where an operation is an
 insertion, deletion, or substitution of a single element.
-The two sequences can be two strings, two arrays, or two other objects.
-Strings, arrays and arrays of strings are handled with optimized (very fast) C
-code. All other sequences are handled with generic (fast) C code.
+The two sequences can be two strings, two arrays, or two other
+objects responding to :each. All sequences are by generic
+(fast) C code.
+All objects in the sequences should respond to :hash and :eql?.
 More information about the Levenshtein distance algorithm:
 http://en.wikipedia.org/wiki/Levenshtein_distance .

data/VERSION CHANGED

	@@ -1 +1 @@
1	- 0.2.1
1	+ 0.2.2

data/ext/levenshtein/levenshtein_fast.c CHANGED

@@ -2,17 +2,121 @@
 #include "levenshtein.h"
 VALUE levenshtein_distance_fast(VALUE self, VALUE rb_o1, VALUE rb_o2, VALUE rb_threshold) {
-  if ((TYPE(rb_o1) == T_STRING) && (TYPE(rb_o2)) == T_STRING) {
-    return levenshtein_distance_string(self, rb_o1, rb_o2, rb_threshold);
-  } else if ((TYPE(rb_o1) == T_ARRAY) && (TYPE(rb_o2)) == T_ARRAY) {
-    if ((TYPE(rb_ary_entry(rb_o1, 0)) == T_STRING) && (TYPE(rb_ary_entry(rb_o2, 0))) == T_STRING) {
-      return levenshtein_distance_array_of_strings(self, rb_o1, rb_o2, rb_threshold);
-    } else {
-      return levenshtein_distance_array(self, rb_o1, rb_o2, rb_threshold);
-    }
+  VALUE	*p1, *p2;
+  long	l1, l2;
+  long	col, row;
+  int	threshold;
+  int	*prev_row, *curr_row, *temp_row;
+  int	curr_row_min, result;
+  int	value1, value2;
+  /* Be sure that all equivalent objects in rb_o1 and rb_o2 (a.eql?(b) == true) are taken from a pool (a.equal?(b) == true). */
+  /* This is done in levenshtein.rb by means of Util.pool. */
+  /* Get the sizes of both arrays. */
+  l1	= RARRAY_LEN(rb_o1);
+  l2	= RARRAY_LEN(rb_o2);
+  /* Get the pointers of both arrays. */
+  p1	= RARRAY_PTR(rb_o1);
+  p2	= RARRAY_PTR(rb_o2);
+  /* Convert Ruby's threshold to C's threshold. */
+  if (!NIL_P(rb_threshold)) {
+    threshold	= FIX2INT(rb_threshold);
   } else {
-    return levenshtein_distance_generic(self, rb_o1, rb_o2, rb_threshold);
+    threshold	= -1;
+  }
+  /* The Levenshtein algorithm itself. */
+  /*       s1=              */
+  /*       ERIK             */
+  /*                        */
+  /*      01234             */
+  /* s2=V 11234             */
+  /*    E 21234             */
+  /*    E 32234             */
+  /*    N 43334 <- prev_row */
+  /*    S 54444 <- curr_row */
+  /*    T 65555             */
+  /*    R 76566             */
+  /*    A 87667             */
+  /* Allocate memory for both rows */
+  prev_row	= (int*) ALLOC_N(int, (l1+1));
+  curr_row	= (int*) ALLOC_N(int, (l1+1));
+  /* Initialize the current row. */
+  for (col=0; col<=l1; col++) {
+    curr_row[col]	= col;
   }
+  for (row=1; row<=l2; row++) {
+    /* Copy the current row to the previous row. */
+    temp_row	= prev_row;
+    prev_row	= curr_row;
+    curr_row	= temp_row;
+    /* Calculate the values of the current row. */
+    curr_row[0]		= row;
+    curr_row_min	= row;
+    for (col=1; col<=l1; col++) {
+      /* Equal (cost=0) or substitution (cost=1). */
+      value1	= prev_row[col-1] + ((p1[col-1] == p2[row-1]) ? 0 : 1);
+      /* Insertion if it's cheaper than substitution. */
+      value2	= prev_row[col]+1;
+      if (value2 < value1) {
+        value1	= value2;
+      }
+      /* Deletion if it's cheaper than substitution. */
+      value2	= curr_row[col-1]+1;
+      if (value2 < value1) {
+        value1	= value2;
+      }
+      /* Keep track of the minimum value on this row. */
+      if (value1 < curr_row_min) {
+        curr_row_min	= value1;
+      }
+      curr_row[col]	= value1;
+    }
+    /* Return nil as soon as we exceed the threshold. */
+    if (threshold > -1 && curr_row_min >= threshold) {
+      free(prev_row);
+      free(curr_row);
+      return Qnil;
+    }
+  }
+  /* The result is the last value on the last row. */
+  result	= curr_row[l1];
+  free(prev_row);
+  free(curr_row);
+  /* Return the Ruby version of the result. */
+  return INT2FIX(result);
 }
 void Init_levenshtein_fast() {

data/lib/levenshtein.rb CHANGED

@@ -1,25 +1,30 @@
-require "levenshtein/exception"
+# encoding: UTF-8
 require "levenshtein/version"
 module Levenshtein
   # Returns the Levenshtein distance as a number between 0.0 and
   # 1.0. It's basically the Levenshtein distance divided by the
-  # length of the longest sequence.
+  # size of the longest sequence.
-  def self.normalized_distance(a1, a2, threshold=nil)
-    a1, a2	= a2, a1	if a1.length > a2.length	# a1 is the short one; a2 is the long one.
+  def self.normalized_distance(a1, a2, threshold=nil, options={})
+    size	= [a1.size, a2.size].max
-    if a2.length == 0
-      0.0	# Since a1.length < a2.length, a1 must be empty as well.
+    if a1.size == 0 and a2.size == 0
+      0.0
+    elsif a1.size == 0
+      a2.size.to_f/size
+    elsif a2.size == 0
+      a1.size.to_f/size
     else
       if threshold
-        if d = self.distance(a1, a2, (threshold*a2.length+1).to_i)
-          d.to_f/a2.length
+        if d = self.distance(a1, a2, (threshold*size).to_i+1)
+          d.to_f/size
         else
           nil
         end
       else
-        self.distance(a1, a2).to_f/a2.length
+        self.distance(a1, a2).to_f/size
       end
     end
   end
@@ -27,71 +32,79 @@ module Levenshtein
   # Returns the Levenshtein distance between two sequences.
   #
   # The two sequences can be two strings, two arrays, or two other
-  # objects. Strings, arrays and arrays of strings are handled with
-  # optimized (very fast) C code. All other sequences are handled
-  # with generic (fast) C code.
+  # objects responding to :each. All sequences are by generic
+  # (fast) C code.
   #
-  # The sequences should respond to :length and :[] and all objects
-  # in the sequences (as returned by []) should response to :==.
+  # All objects in the sequences should respond to :hash and :eql?.
-  def self.distance(a1, a2, threshold=nil)
-    a1, a2	= a2, a1	if a1.length > a2.length	# a1 is the short one; a2 is the long one.
+  def self.distance(a1, a2, threshold=nil, options={})
+    a1, a2	= a1.scan(/./), a2.scan(/./)	if String === a1 and String === a2
+    a1, a2	= Util.pool(a1, a2)
     # Handle some basic circumstances.
     return 0		if a1 == a2
-    return a2.length	if a1.length == 0
+    return a2.size	if a1.empty?
+    return a1.size	if a2.empty?
     if threshold
-      return nil	if (a2.length-a1.length) >= threshold
-      a3, a4	= nil, nil
-      a3, a4	= a1, a2			if a1.respond_to?(:-) and a2.respond_to?(:-)
-      a3, a4	= a1.scan(/./), a2.scan(/./)	if a1.respond_to?(:scan) and a2.respond_to?(:scan)
-      if a3 and a4
-        return nil	if (a3-a4).length >= threshold
-        return nil	if (a4-a3).length >= threshold
-      end
+      return nil	if (a1.size-a2.size) >= threshold
+      return nil	if (a2.size-a1.size) >= threshold
+      return nil	if (a1-a2).size >= threshold
+      return nil	if (a2-a1).size >= threshold
     end
-    distance_fast_or_slow(a1, a2, threshold)
-  end
-  def self.distance_fast_or_slow(a1, a2, threshold)	# :nodoc:
-    if respond_to?(:distance_fast)
-      distance_fast(a1, a2, threshold)	# Implemented in C.
-    else
-      distance_slow(a1, a2, threshold)	# Implemented in Ruby.
-    end
-  end
+    # Remove the common prefix and the common postfix.
-  def self.distance_slow(a1, a2, threshold)	# :nodoc:
-    l1	= a1.length
-    l2	= a2.length
+    l1	= a1.size
+    l2	= a2.size
-    offset	= 0
+    offset			= 0
+    no_more_optimizations	= true
-    while offset < l1 and offset < l2 and a1[offset] == a2[offset]
+    while offset < l1 and offset < l2 and a1[offset].equal?(a2[offset])
       offset += 1
+      no_more_optimizations	= false
     end
-    while offset < l1 and offset < l2 and a1[l1-1] == a2[l2-1]
+    while offset < l1 and offset < l2 and a1[l1-1].equal?(a2[l2-1])
       l1 -= 1
       l2 -= 1
+      no_more_optimizations	= false
     end
-    l1 -= offset
-    l2 -= offset
+    if no_more_optimizations
+      distance_fast_or_slow(a1, a2, threshold, options)
+    else
+      l1 -= offset
+      l2 -= offset
+      a1	= a1[offset, l1]
+      a2	= a2[offset, l2]
-    crow	= (0..l1).to_a
+      distance(a1, a2, threshold, options)
+    end
+  end
-    1.upto(l2) do |y|
+  def self.distance_fast_or_slow(a1, a2, threshold, options)	# :nodoc:
+    if respond_to?(:distance_fast) and options[:force_slow]
+      distance_fast(a1, a2, threshold)	# Implemented in C.
+    else
+      distance_slow(a1, a2, threshold)	# Implemented in Ruby.
+    end
+  end
+  def self.distance_slow(a1, a2, threshold)	# :nodoc:
+    crow	= (0..a1.size).to_a
+    1.upto(a2.size) do |y|
       prow	= crow
       crow	= [y]
-      1.upto(l1) do |x|
-        crow[x]	= [prow[x]+1, crow[x-1]+1, prow[x-1]+(a1[offset+x-1]==a2[offset+y-1] ? 0 : 1)].min
+      1.upto(a1.size) do |x|
+        crow[x]	= [prow[x]+1, crow[x-1]+1, prow[x-1]+(a1[x-1].equal?(a2[y-1]) ? 0 : 1)].min
       end
       # Stop analysing this sequence as soon as the best possible
@@ -104,6 +117,24 @@ module Levenshtein
     crow[-1]
   end
+  module Util	# :nodoc:
+    def self.pool(*args)
+      # So we can compare pointers instead of objects (equal?() instead of ==()).
+      pool	= {}
+      args.collect do |arg|
+        a	= []
+        arg.each do |o|
+          a << pool[o] ||= o
+        end
+        a
+      end
+    end
+  end
 end
 begin

data/lib/levenshtein/version.rb CHANGED

@@ -1,3 +1,5 @@
+# encoding: UTF-8
 module Levenshtein
-  VERSION	= "0.2.1"
+  VERSION	= "0.2.2"
 end

data/test/test.rb CHANGED

@@ -1,3 +1,6 @@
+#!/usr/bin/env ruby
+# encoding: UTF-8
 require "test/unit"
 require "levenshtein"
@@ -7,14 +10,10 @@ module Levenshtein
       @sequence	= o
     end
-    def length
-      @sequence.length
-    end
-    def [](pos)
-      raise "type not allowed [#{pos.inspect}]"	unless pos.kind_of?(Fixnum)
-      @sequence[pos]
+    def each
+      @sequence.length.times do |pos|
+        yield(@sequence[pos])
+      end
     end
   end
@@ -24,14 +23,18 @@ module Levenshtein
     def initialize(o)
       @object	= o
     end
-    def ==(other)
-      @object == other.object
+    def hash
+      @object.hash
+    end
+    def eql?(other)
+      @object.eql?(other.object)
     end
   end
 end
-class TestLevenshteinString < Test::Unit::TestCase
+class TestLevenshtein < Test::Unit::TestCase
   def test_erik_veenstra
     assert_equal(7, Levenshtein.distance("erik", "veenstra"))
     assert_equal(7, Levenshtein.distance("veenstra", "erik"))
@@ -79,59 +82,73 @@ class TestLevenshteinString < Test::Unit::TestCase
     assert_in_delta(0.6, Levenshtein.normalized_distance("123cd", "xyzcd"), 0.01)
     assert_in_delta(0.625, Levenshtein.normalized_distance("123cd123", "123"), 0.01)
   end
-end
-class TestLevenshteinArray < Test::Unit::TestCase
-  def test_erik_veenstra
-    x	= lambda{|s| s.scan(/./).collect{|e| Levenshtein::TestElement.new(e)}}
+  def test_interface
+    seq1	= Levenshtein::TestSequence.new("erik".scan(/./).collect{|e| Levenshtein::TestElement.new(e)})
+    seq2	= Levenshtein::TestSequence.new("veenstra".scan(/./).collect{|e| Levenshtein::TestElement.new(e)})
-    assert_equal(7, Levenshtein.distance(x["erik"], x["veenstra"]))
+    assert_equal(7, Levenshtein.distance(seq1, seq2))
   end
 end
-class TestLevenshteinArrayOfStrings < Test::Unit::TestCase
+class TestLevenshteinFast < Test::Unit::TestCase
   def test_erik_veenstra
-    x	= lambda{|s| s.scan(/./)}
+    assert_equal(7, Levenshtein.distance("erik", "veenstra", nil, :force_slow=>false))
+    assert_equal(7, Levenshtein.distance("veenstra", "erik", nil, :force_slow=>false))
+  end
-    assert_equal(7, Levenshtein.distance(x["erik"], x["veenstra"]))
+  def test_empty_string
+    assert_equal(0, Levenshtein.distance("", "", nil, :force_slow=>false))
+    assert_equal(3, Levenshtein.distance("", "foo", nil, :force_slow=>false))
+    assert_equal(3, Levenshtein.distance("foo", "", nil, :force_slow=>false))
   end
-end
-class TestLevenshteinGeneric < Test::Unit::TestCase
-  def test_erik_veenstra
-    x	= lambda{|s| Levenshtein::TestSequence.new(s.scan(/./).collect{|e| Levenshtein::TestElement.new(e)})}
+  def test_same_string
+    assert_equal(0, Levenshtein.distance("", "", nil, :force_slow=>false))
+    assert_equal(0, Levenshtein.distance("foo", "foo", nil, :force_slow=>false))
+  end
-    assert_equal(7, Levenshtein.distance(x["erik"], x["veenstra"]))
+  def test_threshold
+    assert_equal(3, Levenshtein.distance("foo", "foobar", nil, :force_slow=>false))
+    assert_equal(3, Levenshtein.distance("foo", "foobar", 4, :force_slow=>false))
+    assert_equal(nil, Levenshtein.distance("foo", "foobar", 2, :force_slow=>false))
+  end
+  def test_same_head_and_or_tail
+    assert_equal(3, Levenshtein.distance("ab123cd", "abxyzcd", nil, :force_slow=>false))
+    assert_equal(3, Levenshtein.distance("ab123", "abxyz", nil, :force_slow=>false))
+    assert_equal(3, Levenshtein.distance("123cd", "xyzcd", nil, :force_slow=>false))
+    assert_equal(5, Levenshtein.distance("123cd123", "123", nil, :force_slow=>false))
   end
 end
 class TestLevenshteinSlow < Test::Unit::TestCase
   def test_erik_veenstra
-    assert_equal(7, Levenshtein.distance_slow("erik", "veenstra", nil))
-    assert_equal(7, Levenshtein.distance_slow("veenstra", "erik", nil))
+    assert_equal(7, Levenshtein.distance("erik", "veenstra", nil, :force_slow=>true))
+    assert_equal(7, Levenshtein.distance("veenstra", "erik", nil, :force_slow=>true))
   end
   def test_empty_string
-    assert_equal(0, Levenshtein.distance_slow("", "", nil))
-    assert_equal(3, Levenshtein.distance_slow("", "foo", nil))
-    assert_equal(3, Levenshtein.distance_slow("foo", "", nil))
+    assert_equal(0, Levenshtein.distance("", "", nil, :force_slow=>true))
+    assert_equal(3, Levenshtein.distance("", "foo", nil, :force_slow=>true))
+    assert_equal(3, Levenshtein.distance("foo", "", nil, :force_slow=>true))
   end
   def test_same_string
-    assert_equal(0, Levenshtein.distance_slow("", "", nil))
-    assert_equal(0, Levenshtein.distance_slow("foo", "foo", nil))
+    assert_equal(0, Levenshtein.distance("", "", nil, :force_slow=>true))
+    assert_equal(0, Levenshtein.distance("foo", "foo", nil, :force_slow=>true))
   end
   def test_threshold
-    assert_equal(3, Levenshtein.distance_slow("foo", "foobar", nil))
-    assert_equal(3, Levenshtein.distance_slow("foo", "foobar", 4))
-    assert_equal(nil, Levenshtein.distance_slow("foo", "foobar", 2))
+    assert_equal(3, Levenshtein.distance("foo", "foobar", nil, :force_slow=>true))
+    assert_equal(3, Levenshtein.distance("foo", "foobar", 4, :force_slow=>true))
+    assert_equal(nil, Levenshtein.distance("foo", "foobar", 2, :force_slow=>true))
   end
   def test_same_head_and_or_tail
-    assert_equal(3, Levenshtein.distance_slow("ab123cd", "abxyzcd", nil))
-    assert_equal(3, Levenshtein.distance_slow("ab123", "abxyz", nil))
-    assert_equal(3, Levenshtein.distance_slow("123cd", "xyzcd", nil))
-    assert_equal(5, Levenshtein.distance_slow("123cd123", "123", nil))
+    assert_equal(3, Levenshtein.distance("ab123cd", "abxyzcd", nil, :force_slow=>true))
+    assert_equal(3, Levenshtein.distance("ab123", "abxyz", nil, :force_slow=>true))
+    assert_equal(3, Levenshtein.distance("123cd", "xyzcd", nil, :force_slow=>true))
+    assert_equal(5, Levenshtein.distance("123cd123", "123", nil, :force_slow=>true))
   end
 end

metadata CHANGED

@@ -1,41 +1,27 @@
---- !ruby/object:Gem::Specification
+--- !ruby/object:Gem::Specification
 name: levenshtein
-version: !ruby/object:Gem::Version
-  hash: 21
+version: !ruby/object:Gem::Version
+  version: 0.2.2
   prerelease:
-  segments:
-  - 0
-  - 2
-  - 1
-  version: 0.2.1
 platform: ruby
-authors:
+authors:
 - Erik Veenstra
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2012-02-11 00:00:00 Z
+date: 2012-03-16 00:00:00.000000000 Z
 dependencies: []
 description: Calculates the Levenshtein distance between two byte strings.
 email: levenshtein@erikveen.dds.nl
 executables: []
-extensions:
+extensions:
 - ext/levenshtein/extconf.rb
 extra_rdoc_files: []
-files:
-- lib/levenshtein/exception.rb
+files:
 - lib/levenshtein/version.rb
 - lib/levenshtein.rb
-- ext/levenshtein/levenshtein_string.c
-- ext/levenshtein/levenshtein_generic.c
 - ext/levenshtein/levenshtein.h
 - ext/levenshtein/levenshtein_fast.c
-- ext/levenshtein/levenshtein_array_of_strings.c
-- ext/levenshtein/levenshtein_array.c
 - ext/levenshtein/extconf.rb
 - README
 - LICENSE
@@ -44,43 +30,35 @@ files:
 - test/test.rb
 homepage: http://www.erikveen.dds.nl/levenshtein/index.html
 licenses: []
 post_install_message:
-rdoc_options:
+rdoc_options:
 - README
 - LICENSE
 - VERSION
 - CHANGELOG
 - --title
-- levenshtein (0.2.1)
+- levenshtein (0.2.2)
 - --main
 - README
-require_paths:
+require_paths:
 - lib
-required_ruby_version: !ruby/object:Gem::Requirement
+required_ruby_version: !ruby/object:Gem::Requirement
   none: false
-  requirements:
-  - - ">="
-    - !ruby/object:Gem::Version
-      hash: 3
-      segments:
-      - 0
-      version: "0"
-required_rubygems_version: !ruby/object:Gem::Requirement
+  requirements:
+  - - ! '>='
+    - !ruby/object:Gem::Version
+      version: '0'
+required_rubygems_version: !ruby/object:Gem::Requirement
   none: false
-  requirements:
-  - - ">="
-    - !ruby/object:Gem::Version
-      hash: 3
-      segments:
-      - 0
-      version: "0"
+  requirements:
+  - - ! '>='
+    - !ruby/object:Gem::Version
+      version: '0'
 requirements: []
 rubyforge_project: levenshtein
-rubygems_version: 1.8.12
+rubygems_version: 1.8.18
 signing_key:
 specification_version: 3
 summary: Calculates the Levenshtein distance between two byte strings.
-test_files:
+test_files:
 - test/test.rb

data/ext/levenshtein/levenshtein_array.c DELETED

@@ -1,130 +0,0 @@
-#include "ruby.h"
-#include "levenshtein.h"
-VALUE levenshtein_distance_array(VALUE self, VALUE rb_o1, VALUE rb_o2, VALUE rb_threshold) {
-  int	threshold;
-  int	l1, l2;
-  int	*prev_row, *curr_row, *temp_row;
-  int	col, row;
-  int	curr_row_min, result;
-  int	offset;
-  int	value1, value2;
-  /* Get the sizes of both arrays. */
-  l1	= RARRAY_LEN(rb_o1);
-  l2	= RARRAY_LEN(rb_o2);
-  /* Convert Ruby's threshold to C's threshold. */
-  if (!NIL_P(rb_threshold)) {
-    threshold	= FIX2INT(rb_threshold);
-  } else {
-    threshold	= -1;
-  }
-  /* Do the expensive calculation on a subset of the sequences, if possible, by removing the common prefix. */
-  offset	= 0;
-  while ((offset < l1) && (offset < l2) && RTEST(rb_equal(rb_ary_entry(rb_o1, offset), rb_ary_entry(rb_o2, offset)))) {
-    offset++;
-  }
-  /* Do the expensive calculation on a subset of the sequences, if possible, by removing the common postfix. */
-  while ((offset < l1) && (offset < l2) && RTEST(rb_equal(rb_ary_entry(rb_o1, l1-1), rb_ary_entry(rb_o2, l2-1)))) {
-    l1--;
-    l2--;
-  }
-  l1 -= offset;
-  l2 -= offset;
-  /* The Levenshtein algorithm itself. */
-  /*       s1=              */
-  /*       ERIK             */
-  /*                        */
-  /*      01234             */
-  /* s2=V 11234             */
-  /*    E 21234             */
-  /*    E 32234             */
-  /*    N 43334 <- prev_row */
-  /*    S 54444 <- curr_row */
-  /*    T 65555             */
-  /*    R 76566             */
-  /*    A 87667             */
-  /* Allocate memory for both rows */
-  prev_row	= (int*) ALLOC_N(int, (l1+1));
-  curr_row	= (int*) ALLOC_N(int, (l1+1));
-  /* Initialize the current row. */
-  for (col=0; col<=l1; col++) {
-    curr_row[col]	= col;
-  }
-  for (row=1; row<=l2; row++) {
-    /* Copy the current row to the previous row. */
-    temp_row	= prev_row;
-    prev_row	= curr_row;
-    curr_row	= temp_row;
-    /* Calculate the values of the current row. */
-    curr_row[0]		= row;
-    curr_row_min	= row;
-    for (col=1; col<=l1; col++) {
-      /* Equal (cost=0) or substitution (cost=1). */
-      value1	= prev_row[col-1] + (RTEST(rb_equal(rb_ary_entry(rb_o1, offset+col-1), rb_ary_entry(rb_o2, offset+row-1))) ? 0 : 1);
-      /* Insertion if it's cheaper than substitution. */
-      value2	= prev_row[col]+1;
-      if (value2 < value1) {
-        value1	= value2;
-      }
-      /* Deletion if it's cheaper than substitution. */
-      value2	= curr_row[col-1]+1;
-      if (value2 < value1) {
-        value1	= value2;
-      }
-      /* Keep track of the minimum value on this row. */
-      if (value1 < curr_row_min) {
-        curr_row_min	= value1;
-      }
-      curr_row[col]	= value1;
-    }
-    /* Return nil as soon as we exceed the threshold. */
-    if (threshold > -1 && curr_row_min >= threshold) {
-      free(prev_row);
-      free(curr_row);
-      return Qnil;
-    }
-  }
-  /* The result is the last value on the last row. */
-  result	= curr_row[l1];
-  free(prev_row);
-  free(curr_row);
-  /* Return the Ruby version of the result. */
-  return INT2FIX(result);
-}

data/ext/levenshtein/levenshtein_array_of_strings.c DELETED

@@ -1,130 +0,0 @@
-#include "ruby.h"
-#include "levenshtein.h"
-VALUE levenshtein_distance_array_of_strings(VALUE self, VALUE rb_o1, VALUE rb_o2, VALUE rb_threshold) {
-  int	threshold;
-  int	l1, l2;
-  int	*prev_row, *curr_row, *temp_row;
-  int	col, row;
-  int	curr_row_min, result;
-  int	offset;
-  int	value1, value2;
-  /* Get the sizes of both arrays. */
-  l1	= RARRAY_LEN(rb_o1);
-  l2	= RARRAY_LEN(rb_o2);
-  /* Convert Ruby's threshold to C's threshold. */
-  if (!NIL_P(rb_threshold)) {
-    threshold	= FIX2INT(rb_threshold);
-  } else {
-    threshold	= -1;
-  }
-  /* Do the expensive calculation on a subset of the sequences, if possible, by removing the common prefix. */
-  offset	= 0;
-  while ((offset < l1) && (offset < l2) && (rb_str_cmp(rb_ary_entry(rb_o1, offset), rb_ary_entry(rb_o2, offset)) == 0)) {
-    offset++;
-  }
-  /* Do the expensive calculation on a subset of the sequences, if possible, by removing the common postfix. */
-  while ((offset < l1) && (offset < l2) && (rb_str_cmp(rb_ary_entry(rb_o1, l1-1), rb_ary_entry(rb_o2, l2-1)) == 0 )) {
-    l1--;
-    l2--;
-  }
-  l1 -= offset;
-  l2 -= offset;
-  /* The Levenshtein algorithm itself. */
-  /*       s1=              */
-  /*       ERIK             */
-  /*                        */
-  /*      01234             */
-  /* s2=V 11234             */
-  /*    E 21234             */
-  /*    E 32234             */
-  /*    N 43334 <- prev_row */
-  /*    S 54444 <- curr_row */
-  /*    T 65555             */
-  /*    R 76566             */
-  /*    A 87667             */
-  /* Allocate memory for both rows */
-  prev_row	= (int*) ALLOC_N(int, (l1+1));
-  curr_row	= (int*) ALLOC_N(int, (l1+1));
-  /* Initialize the current row. */
-  for (col=0; col<=l1; col++) {
-    curr_row[col]	= col;
-  }
-  for (row=1; row<=l2; row++) {
-    /* Copy the current row to the previous row. */
-    temp_row	= prev_row;
-    prev_row	= curr_row;
-    curr_row	= temp_row;
-    /* Calculate the values of the current row. */
-    curr_row[0]		= row;
-    curr_row_min	= row;
-    for (col=1; col<=l1; col++) {
-      /* Equal (cost=0) or substitution (cost=1). */
-      value1	= prev_row[col-1] + ((rb_str_cmp(rb_ary_entry(rb_o1, offset+col-1), rb_ary_entry(rb_o2, offset+row-1)) == 0) ? 0 : 1);
-      /* Insertion if it's cheaper than substitution. */
-      value2	= prev_row[col]+1;
-      if (value2 < value1) {
-        value1	= value2;
-      }
-      /* Deletion if it's cheaper than substitution. */
-      value2	= curr_row[col-1]+1;
-      if (value2 < value1) {
-        value1	= value2;
-      }
-      /* Keep track of the minimum value on this row. */
-      if (value1 < curr_row_min) {
-        curr_row_min	= value1;
-      }
-      curr_row[col]	= value1;
-    }
-    /* Return nil as soon as we exceed the threshold. */
-    if (threshold > -1 && curr_row_min >= threshold) {
-      free(prev_row);
-      free(curr_row);
-      return Qnil;
-    }
-  }
-  /* The result is the last value on the last row. */
-  result	= curr_row[l1];
-  free(prev_row);
-  free(curr_row);
-  /* Return the Ruby version of the result. */
-  return INT2FIX(result);
-}

data/ext/levenshtein/levenshtein_generic.c DELETED

@@ -1,133 +0,0 @@
-#include "ruby.h"
-#include "levenshtein.h"
-VALUE levenshtein_distance_generic(VALUE self, VALUE rb_o1, VALUE rb_o2, VALUE rb_threshold) {
-  int	threshold;
-  int	l1, l2;
-  int	*prev_row, *curr_row, *temp_row;
-  int	col, row;
-  int	curr_row_min, result;
-  int	offset;
-  int	value1, value2;
-  ID	id_length	= rb_intern("length");
-  ID	id_get		= rb_intern("[]");
-  /* Get the sizes of both sequences. */
-  l1	= FIX2INT(rb_funcall(rb_o1, id_length, 0));
-  l2	= FIX2INT(rb_funcall(rb_o2, id_length, 0));
-  /* Convert Ruby's threshold to C's threshold. */
-  if (!NIL_P(rb_threshold)) {
-    threshold	= FIX2INT(rb_threshold);
-  } else {
-    threshold	= -1;
-  }
-  /* Do the expensive calculation on a subset of the sequences, if possible, by removing the common prefix. */
-  offset	= 0;
-  while ((offset < l1) && (offset < l2) && RTEST(rb_equal(rb_funcall(rb_o1, id_get, 1, INT2FIX(offset)), rb_funcall(rb_o2, id_get, 1, INT2FIX(offset))))) {
-    offset++;
-  }
-  /* Do the expensive calculation on a subset of the sequences, if possible, by removing the common postfix. */
-  while ((offset < l1) && (offset < l2) && RTEST(rb_equal(rb_funcall(rb_o1, id_get, 1, INT2FIX(l1-1)), rb_funcall(rb_o2, id_get, 1, INT2FIX(l2-1))))) {
-    l1--;
-    l2--;
-  }
-  l1 -= offset;
-  l2 -= offset;
-  /* The Levenshtein algorithm itself. */
-  /*       s1=              */
-  /*       ERIK             */
-  /*                        */
-  /*      01234             */
-  /* s2=V 11234             */
-  /*    E 21234             */
-  /*    E 32234             */
-  /*    N 43334 <- prev_row */
-  /*    S 54444 <- curr_row */
-  /*    T 65555             */
-  /*    R 76566             */
-  /*    A 87667             */
-  /* Allocate memory for both rows */
-  prev_row	= (int*) ALLOC_N(int, (l1+1));
-  curr_row	= (int*) ALLOC_N(int, (l1+1));
-  /* Initialize the current row. */
-  for (col=0; col<=l1; col++) {
-    curr_row[col]	= col;
-  }
-  for (row=1; row<=l2; row++) {
-    /* Copy the current row to the previous row. */
-    temp_row	= prev_row;
-    prev_row	= curr_row;
-    curr_row	= temp_row;
-    /* Calculate the values of the current row. */
-    curr_row[0]		= row;
-    curr_row_min	= row;
-    for (col=1; col<=l1; col++) {
-      /* Equal (cost=0) or substitution (cost=1). */
-      value1	= prev_row[col-1] + (RTEST(rb_equal(rb_funcall(rb_o1, id_get, 1, INT2FIX(offset+col-1)), rb_funcall(rb_o2, id_get, 1, INT2FIX(offset+row-1)))) ? 0 : 1);
-      /* Insertion if it's cheaper than substitution. */
-      value2	= prev_row[col]+1;
-      if (value2 < value1) {
-        value1	= value2;
-      }
-      /* Deletion if it's cheaper than substitution. */
-      value2	= curr_row[col-1]+1;
-      if (value2 < value1) {
-        value1	= value2;
-      }
-      /* Keep track of the minimum value on this row. */
-      if (value1 < curr_row_min) {
-        curr_row_min	= value1;
-      }
-      curr_row[col]	= value1;
-    }
-    /* Return nil as soon as we exceed the threshold. */
-    if (threshold > -1 && curr_row_min >= threshold) {
-      free(prev_row);
-      free(curr_row);
-      return Qnil;
-    }
-  }
-  /* The result is the last value on the last row. */
-  result	= curr_row[l1];
-  free(prev_row);
-  free(curr_row);
-  /* Return the Ruby version of the result. */
-  return INT2FIX(result);
-}

data/ext/levenshtein/levenshtein_string.c DELETED

@@ -1,138 +0,0 @@
-#include "ruby.h"
-#include "levenshtein.h"
-VALUE levenshtein_distance_string(VALUE self, VALUE rb_o1, VALUE rb_o2, VALUE rb_threshold) {
-  int	threshold;
-  int	l1, l2;
-  int	*prev_row, *curr_row, *temp_row;
-  int	col, row;
-  int	curr_row_min, result;
-  int	offset;
-  int	value1, value2;
-  char	*s1, *s2;
-  /* Convert Ruby's s1 to C's s1. */
-  rb_o1	= StringValue(rb_o1);
-  s1	= RSTRING_PTR(rb_o1);
-  l1	= RSTRING_LEN(rb_o1);
-  /* Convert Ruby's s2 to C's s2. */
-  rb_o2	= StringValue(rb_o2);
-  s2	= RSTRING_PTR(rb_o2);
-  l2	= RSTRING_LEN(rb_o2);
-  /* Convert Ruby's threshold to C's threshold. */
-  if (!NIL_P(rb_threshold)) {
-    threshold	= FIX2INT(rb_threshold);
-  } else {
-    threshold	= -1;
-  }
-  /* Do the expensive calculation on a subset of the sequences, if possible, by removing the common prefix. */
-  offset	= 0;
-  while ((offset < l1) && (offset < l2) && (s1[offset] == s2[offset])) {
-    offset++;
-  }
-  /* Do the expensive calculation on a subset of the sequences, if possible, by removing the common postfix. */
-  while ((offset < l1) && (offset < l2) && (s1[l1-1] == s2[l2-1])) {
-    l1--;
-    l2--;
-  }
-  l1 -= offset;
-  l2 -= offset;
-  /* The Levenshtein algorithm itself. */
-  /*       s1=              */
-  /*       ERIK             */
-  /*                        */
-  /*      01234             */
-  /* s2=V 11234             */
-  /*    E 21234             */
-  /*    E 32234             */
-  /*    N 43334 <- prev_row */
-  /*    S 54444 <- curr_row */
-  /*    T 65555             */
-  /*    R 76566             */
-  /*    A 87667             */
-  /* Allocate memory for both rows */
-  prev_row	= (int*) ALLOC_N(int, (l1+1));
-  curr_row	= (int*) ALLOC_N(int, (l1+1));
-  /* Initialize the current row. */
-  for (col=0; col<=l1; col++) {
-    curr_row[col]	= col;
-  }
-  for (row=1; row<=l2; row++) {
-    /* Copy the current row to the previous row. */
-    temp_row	= prev_row;
-    prev_row	= curr_row;
-    curr_row	= temp_row;
-    /* Calculate the values of the current row. */
-    curr_row[0]		= row;
-    curr_row_min	= row;
-    for (col=1; col<=l1; col++) {
-      /* Equal (cost=0) or substitution (cost=1). */
-      value1	= prev_row[col-1] + ((s1[offset+col-1] == s2[offset+row-1]) ? 0 : 1);
-      /* Insertion if it's cheaper than substitution. */
-      value2	= prev_row[col]+1;
-      if (value2 < value1) {
-        value1	= value2;
-      }
-      /* Deletion if it's cheaper than substitution. */
-      value2	= curr_row[col-1]+1;
-      if (value2 < value1) {
-        value1	= value2;
-      }
-      /* Keep track of the minimum value on this row. */
-      if (value1 < curr_row_min) {
-        curr_row_min	= value1;
-      }
-      curr_row[col]	= value1;
-    }
-    /* Return nil as soon as we exceed the threshold. */
-    if (threshold > -1 && curr_row_min >= threshold) {
-      free(prev_row);
-      free(curr_row);
-      return Qnil;
-    }
-  }
-  /* The result is the last value on the last row. */
-  result	= curr_row[l1];
-  free(prev_row);
-  free(curr_row);
-  /* Return the Ruby version of the result. */
-  return INT2FIX(result);
-}

data/lib/levenshtein/exception.rb DELETED

@@ -1,4 +0,0 @@
-module Levenshtein
-  class LevenshteinException < RuntimeError
-  end
-end