damerau-levenshtein 0.5.4 → 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG +1 -0
- data/Gemfile +16 -17
- data/Gemfile.lock +62 -55
- data/README.md +138 -0
- data/Rakefile +17 -13
- data/VERSION +1 -1
- data/damerau-levenshtein.gemspec +65 -0
- data/lib/damerau-levenshtein.rb +5 -1
- data/spec/damerau-levenshtein_spec.rb +10 -2
- metadata +48 -87
- data/README.rdoc +0 -76
data/CHANGELOG
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
1.0.0 -- stable version
|
data/Gemfile
CHANGED
@@ -1,20 +1,19 @@
|
|
1
|
-
source
|
2
|
-
|
3
|
-
|
4
|
-
|
1
|
+
source 'https://rubygems.org'
|
2
|
+
|
3
|
+
gem 'bundler', '~> 1'
|
4
|
+
gem 'jeweler', '~> 1'
|
5
|
+
gem 'rake', '~> 10'
|
6
|
+
gem 'rake-compiler', '~> 0.8'
|
5
7
|
|
6
|
-
# Add dependencies to develop your gem here.
|
7
|
-
# Include everything needed to run rake, tests, features, etc.
|
8
8
|
group :development do
|
9
|
-
gem
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
gem
|
14
|
-
gem
|
15
|
-
gem
|
16
|
-
gem
|
17
|
-
gem
|
18
|
-
gem
|
19
|
-
gem "mocha"
|
9
|
+
gem 'debugger', '~> 1'
|
10
|
+
end
|
11
|
+
|
12
|
+
group :test do
|
13
|
+
gem 'rspec', '~> 2'
|
14
|
+
gem 'cucumber', '~> 1'
|
15
|
+
gem 'simplecov', '~> 0.7'
|
16
|
+
gem 'ruby-prof', '~> 0.13'
|
17
|
+
gem 'shoulda', '~> 3'
|
18
|
+
gem 'mocha', '~> 0.10'
|
20
19
|
end
|
data/Gemfile.lock
CHANGED
@@ -1,71 +1,78 @@
|
|
1
1
|
GEM
|
2
|
-
remote:
|
2
|
+
remote: https://rubygems.org/
|
3
3
|
specs:
|
4
|
-
|
5
|
-
|
4
|
+
activesupport (3.2.12)
|
5
|
+
i18n (~> 0.6)
|
6
|
+
multi_json (~> 1.0)
|
7
|
+
bourne (1.1.2)
|
8
|
+
mocha (= 0.10.5)
|
9
|
+
builder (3.2.0)
|
6
10
|
columnize (0.3.6)
|
7
|
-
cucumber (1.
|
11
|
+
cucumber (1.2.3)
|
8
12
|
builder (>= 2.1.2)
|
9
|
-
diff-lcs (>= 1.1.
|
10
|
-
gherkin (~> 2.
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
13
|
+
diff-lcs (>= 1.1.3)
|
14
|
+
gherkin (~> 2.11.6)
|
15
|
+
multi_json (~> 1.3)
|
16
|
+
debugger (1.4.0)
|
17
|
+
columnize (>= 0.3.1)
|
18
|
+
debugger-linecache (~> 1.1.1)
|
19
|
+
debugger-ruby_core_source (~> 1.2.0)
|
20
|
+
debugger-linecache (1.1.2)
|
21
|
+
debugger-ruby_core_source (>= 1.1.1)
|
22
|
+
debugger-ruby_core_source (1.2.0)
|
23
|
+
diff-lcs (1.2.1)
|
24
|
+
gherkin (2.11.6)
|
25
|
+
json (>= 1.7.6)
|
16
26
|
git (1.2.5)
|
17
|
-
|
27
|
+
i18n (0.6.4)
|
28
|
+
jeweler (1.8.4)
|
18
29
|
bundler (~> 1.0)
|
19
30
|
git (>= 1.2.5)
|
20
31
|
rake
|
21
|
-
|
22
|
-
|
23
|
-
ruby_core_source (>= 0.1.4)
|
32
|
+
rdoc
|
33
|
+
json (1.7.7)
|
24
34
|
metaclass (0.0.1)
|
25
|
-
mocha (0.10.
|
35
|
+
mocha (0.10.5)
|
26
36
|
metaclass (~> 0.0.1)
|
27
|
-
multi_json (1.
|
28
|
-
rake (0.
|
29
|
-
rake-compiler (0.8.
|
37
|
+
multi_json (1.6.1)
|
38
|
+
rake (10.0.3)
|
39
|
+
rake-compiler (0.8.3)
|
30
40
|
rake
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
rspec-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
rspec-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
simplecov-html (~> 0.5.3)
|
54
|
-
simplecov-html (0.5.3)
|
55
|
-
term-ansicolor (1.0.7)
|
41
|
+
rdoc (4.0.0)
|
42
|
+
json (~> 1.4)
|
43
|
+
rspec (2.13.0)
|
44
|
+
rspec-core (~> 2.13.0)
|
45
|
+
rspec-expectations (~> 2.13.0)
|
46
|
+
rspec-mocks (~> 2.13.0)
|
47
|
+
rspec-core (2.13.1)
|
48
|
+
rspec-expectations (2.13.0)
|
49
|
+
diff-lcs (>= 1.1.3, < 2.0)
|
50
|
+
rspec-mocks (2.13.0)
|
51
|
+
ruby-prof (0.13.0)
|
52
|
+
shoulda (3.3.2)
|
53
|
+
shoulda-context (~> 1.0.1)
|
54
|
+
shoulda-matchers (~> 1.4.1)
|
55
|
+
shoulda-context (1.0.2)
|
56
|
+
shoulda-matchers (1.4.2)
|
57
|
+
activesupport (>= 3.0.0)
|
58
|
+
bourne (~> 1.1.2)
|
59
|
+
simplecov (0.7.1)
|
60
|
+
multi_json (~> 1.0)
|
61
|
+
simplecov-html (~> 0.7.1)
|
62
|
+
simplecov-html (0.7.1)
|
56
63
|
|
57
64
|
PLATFORMS
|
58
65
|
ruby
|
59
66
|
|
60
67
|
DEPENDENCIES
|
61
|
-
bundler (~> 1
|
62
|
-
cucumber
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
rake
|
67
|
-
|
68
|
-
|
69
|
-
ruby-prof
|
70
|
-
shoulda
|
71
|
-
simplecov
|
68
|
+
bundler (~> 1)
|
69
|
+
cucumber (~> 1)
|
70
|
+
debugger (~> 1)
|
71
|
+
jeweler (~> 1)
|
72
|
+
mocha (~> 0.10)
|
73
|
+
rake (~> 10)
|
74
|
+
rake-compiler (~> 0.8)
|
75
|
+
rspec (~> 2)
|
76
|
+
ruby-prof (~> 0.13)
|
77
|
+
shoulda (~> 3)
|
78
|
+
simplecov (~> 0.7)
|
data/README.md
ADDED
@@ -0,0 +1,138 @@
|
|
1
|
+
damerau-levenshtein
|
2
|
+
===================
|
3
|
+
|
4
|
+
[![Gem Version][1]][2]
|
5
|
+
[![Continuous Integration Status][3]][4]
|
6
|
+
[![Dependency Status][5]][6]
|
7
|
+
|
8
|
+
The damerau-levenshtein gem allows to find edit distance between two UTF-8
|
9
|
+
or ASCII encoded strings with O(N**2) efficiency.
|
10
|
+
|
11
|
+
This gem implements pure Levenshtein algorithm, Damerau modification of it
|
12
|
+
(where 2 character transposition counts as 1 edit distance). It also includes
|
13
|
+
Boehmer & Rees 2008 modification of Damerau algorithm, where transposition
|
14
|
+
of bigger than 1 character blocks is taken in account as well
|
15
|
+
(Boehmer & Rees 2008).
|
16
|
+
|
17
|
+
require 'damerau-levenshtein'
|
18
|
+
DamerauLevenshtein.distance('Something', 'Smoething') #returns 1
|
19
|
+
|
20
|
+
Gem damerau-levenshtein is compatible with ruby versions 1.8.7
|
21
|
+
and 1.9.2 and higher
|
22
|
+
|
23
|
+
Installation
|
24
|
+
------------
|
25
|
+
|
26
|
+
gem install damerau-levenshtein
|
27
|
+
|
28
|
+
Examples
|
29
|
+
--------
|
30
|
+
|
31
|
+
require 'rubygems' #not needed for ruby >= 1.9.0
|
32
|
+
require 'damerau-levenshtein'
|
33
|
+
dl = DamerauLevenshtein
|
34
|
+
|
35
|
+
* compare using Damerau Levenshtein algorithm
|
36
|
+
|
37
|
+
`dl.distance("Something", "Smoething") #returns 1`
|
38
|
+
|
39
|
+
* compare using Levensthein algorithm
|
40
|
+
|
41
|
+
`dl.distance("Something", "Smoething", 0) #returns 2`
|
42
|
+
|
43
|
+
* compare using Boehmer & Rees modification
|
44
|
+
|
45
|
+
`dl.distance("Something", "meSothing", 2) #returns 2 instead of 4`
|
46
|
+
|
47
|
+
* comparison of words with utf-8 characters should work fine:
|
48
|
+
|
49
|
+
`dl.distance("Sjöstedt", "Sjostedt") #returns 1`
|
50
|
+
|
51
|
+
API Description
|
52
|
+
-----------
|
53
|
+
|
54
|
+
Gem defines two methods
|
55
|
+
|
56
|
+
DamerauLevenshtein.version
|
57
|
+
#returns version number of the gem
|
58
|
+
|
59
|
+
DamerauLevenshtein.distance(string1, string2, block_size, max_distance)
|
60
|
+
#returns [edit distance][7] between 2 strings
|
61
|
+
|
62
|
+
|
63
|
+
|
64
|
+
DamerauLevenshtein.distance takes 4 arguments:
|
65
|
+
|
66
|
+
* string1
|
67
|
+
* string2
|
68
|
+
* block_size (default is 1)
|
69
|
+
* max_distance (default is 10)
|
70
|
+
|
71
|
+
`block_size` determines maximum number of characters in a transposition block:
|
72
|
+
|
73
|
+
block_size = 0
|
74
|
+
(transposition does not count -- it is a pure Levenshtein algorithm)
|
75
|
+
|
76
|
+
block_size = 1
|
77
|
+
(transposition between 2 adjustent characters --
|
78
|
+
it is pure Damerau-Levenshtein algorithm)
|
79
|
+
|
80
|
+
block_size = 2
|
81
|
+
(transposition between blocks as big as 2 characters -- so abcd and cdab
|
82
|
+
counts as edit distance 2, not 4)
|
83
|
+
|
84
|
+
block_size = 3
|
85
|
+
(transposition between blocks as big as 3 characters --
|
86
|
+
so abcdef and defabc counts as edit distance 3, not 6)
|
87
|
+
|
88
|
+
etc.
|
89
|
+
|
90
|
+
`max_distance` -- is a threshold after which algorithm gives up and
|
91
|
+
returns max_distance instead of real edit distance.
|
92
|
+
|
93
|
+
Levenshtein algorithm is expensive, so it makes sense to give up when edit
|
94
|
+
distance is becoming too big. The argument max_distance does just that.
|
95
|
+
|
96
|
+
DamerauLevenshtein.distance('abcdefg', '1234567', 0, 3)
|
97
|
+
#give up when edit distance exceeds 3)
|
98
|
+
|
99
|
+
Contributing to damerau-levenshtein
|
100
|
+
-----------------------------------
|
101
|
+
|
102
|
+
* Check out the latest master to make sure the feature hasn't been
|
103
|
+
implemented or the bug hasn't been fixed yet
|
104
|
+
* Check out the issue tracker to make sure someone already hasn't requested
|
105
|
+
it and/or contributed it
|
106
|
+
* Fork the project
|
107
|
+
* Start a feature/bugfix branch
|
108
|
+
* Commit and push until you are happy with your contribution
|
109
|
+
* Make sure to add tests for it. This is important so I don't break it
|
110
|
+
in a future version unintentionally.
|
111
|
+
* Please try not to mess with the Rakefile, version, or history. If you want
|
112
|
+
to have your own version, or is otherwise necessary, that is fine, but please
|
113
|
+
isolate to its own commit so I can cherry-pick around it.
|
114
|
+
|
115
|
+
Versioning
|
116
|
+
----------
|
117
|
+
|
118
|
+
This gem is following practices of [Semantic Versioning][8]
|
119
|
+
|
120
|
+
Authors
|
121
|
+
-------
|
122
|
+
|
123
|
+
Dmitry Mozzherin
|
124
|
+
|
125
|
+
Copyright
|
126
|
+
---------
|
127
|
+
|
128
|
+
Copyright (c) 2011-2013 Marine Biological Laboratory. See LICENSE.txt for
|
129
|
+
further details.
|
130
|
+
|
131
|
+
[1]: https://badge.fury.io/rb/damerau-levenshtein.png
|
132
|
+
[2]: http://badge.fury.io/rb/damerau-levenshtein
|
133
|
+
[3]: https://secure.travis-ci.org/GlobalNamesArchitecture/damerau-levenshtein.png
|
134
|
+
[4]: http://travis-ci.org/GlobalNamesArchitecture/damerau-levenshtein
|
135
|
+
[5]: https://gemnasium.com/GlobalNamesArchitecture/damerau-levenshtein.png
|
136
|
+
[6]: https://gemnasium.com/GlobalNamesArchitecture/damerau-levenshtein
|
137
|
+
[7]: http://en.wikipedia.org/wiki/Edit_distance
|
138
|
+
[8]: http://semver.org/
|
data/Rakefile
CHANGED
@@ -6,7 +6,7 @@ begin
|
|
6
6
|
Bundler.setup(:default, :development)
|
7
7
|
rescue Bundler::BundlerError => e
|
8
8
|
$stderr.puts e.message
|
9
|
-
$stderr.puts
|
9
|
+
$stderr.puts 'Run `bundle install` to install missing gems'
|
10
10
|
exit e.status_code
|
11
11
|
end
|
12
12
|
require 'rake/dsl_definition'
|
@@ -15,22 +15,25 @@ require 'rake/extensiontask'
|
|
15
15
|
|
16
16
|
require 'jeweler'
|
17
17
|
Jeweler::Tasks.new do |gem|
|
18
|
-
# gem is a Gem::Specification...
|
19
|
-
|
20
|
-
gem.
|
21
|
-
gem.
|
22
|
-
gem.
|
23
|
-
gem.
|
24
|
-
|
25
|
-
gem.
|
26
|
-
|
18
|
+
# gem is a Gem::Specification...
|
19
|
+
# see http://docs.rubygems.org/read/chapter/20 for more options
|
20
|
+
gem.name = 'damerau-levenshtein'
|
21
|
+
gem.homepage = 'http://github.com/GlobalNamesArchitecture/damerau-levenshtein'
|
22
|
+
gem.license = 'MIT'
|
23
|
+
gem.summary = %Q{Calculation of editing distance for 2 strings \
|
24
|
+
using Levenshtein or Damerau-Levenshtein algorithms}
|
25
|
+
gem.description = %Q{Calculation of editing distance for 2 strings using \
|
26
|
+
Levenshtein or Damerau-Levenshtein algorithms}
|
27
|
+
gem.email = 'dmozzherin@gmail.com'
|
28
|
+
gem.authors = ['Dmitry Mozzherin']
|
29
|
+
gem.files = FileList['[A-Z]*', '*.gemspec', '{bin,generators,lib,spec}/**/*']
|
27
30
|
gem.files -= FileList['lib/**/*.bundle', 'lib/**/*.dll', 'lib/**/*.so']
|
28
31
|
gem.files += FileList['ext/**/*.c']
|
29
32
|
gem.extensions = FileList['ext/**/extconf.rb']
|
30
33
|
end
|
31
34
|
Jeweler::RubygemsDotOrgTasks.new
|
32
35
|
|
33
|
-
require 'rspec
|
36
|
+
require 'rspec'
|
34
37
|
require 'rspec/core/rake_task'
|
35
38
|
RSpec::Core::RakeTask.new(:spec) do |spec|
|
36
39
|
spec.pattern = FileList['spec/**/*_spec.rb']
|
@@ -44,9 +47,10 @@ end
|
|
44
47
|
require 'cucumber/rake/task'
|
45
48
|
Cucumber::Rake::Task.new(:features)
|
46
49
|
|
47
|
-
Rake::ExtensionTask.new(
|
48
|
-
extension.lib_dir =
|
50
|
+
Rake::ExtensionTask.new('damerau_levenshtein_binding') do |extension|
|
51
|
+
extension.lib_dir = 'lib'
|
49
52
|
end
|
50
53
|
|
51
54
|
Rake::Task[:spec].prerequisites << :compile
|
55
|
+
Rake::Task[:features].prerequisites << :compile
|
52
56
|
task :default => :spec
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.
|
1
|
+
1.0.0
|
@@ -0,0 +1,65 @@
|
|
1
|
+
# Generated by jeweler
|
2
|
+
# DO NOT EDIT THIS FILE DIRECTLY
|
3
|
+
# Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
|
4
|
+
# -*- encoding: utf-8 -*-
|
5
|
+
|
6
|
+
Gem::Specification.new do |s|
|
7
|
+
s.name = "damerau-levenshtein"
|
8
|
+
s.version = "1.0.0"
|
9
|
+
|
10
|
+
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
|
+
s.authors = ["Dmitry Mozzherin"]
|
12
|
+
s.date = "2013-03-13"
|
13
|
+
s.description = "Calculation of editing distance for 2 strings using Levenshtein or Damerau-Levenshtein algorithms"
|
14
|
+
s.email = "dmozzherin@gmail.com"
|
15
|
+
s.extensions = ["ext/damerau_levenshtein_binding/extconf.rb"]
|
16
|
+
s.extra_rdoc_files = [
|
17
|
+
"LICENSE.txt",
|
18
|
+
"README.md"
|
19
|
+
]
|
20
|
+
s.files = [
|
21
|
+
"CHANGELOG",
|
22
|
+
"Gemfile",
|
23
|
+
"Gemfile.lock",
|
24
|
+
"LICENSE.txt",
|
25
|
+
"README.md",
|
26
|
+
"Rakefile",
|
27
|
+
"VERSION",
|
28
|
+
"damerau-levenshtein.gemspec",
|
29
|
+
"ext/damerau_levenshtein_binding/damerau_levenshtein_binding.c",
|
30
|
+
"lib/damerau-levenshtein.rb",
|
31
|
+
"spec/damerau-levenshtein_spec.rb",
|
32
|
+
"spec/damerau_levenshtein_test.txt",
|
33
|
+
"spec/spec_helper.rb"
|
34
|
+
]
|
35
|
+
s.homepage = "http://github.com/GlobalNamesArchitecture/damerau-levenshtein"
|
36
|
+
s.licenses = ["MIT"]
|
37
|
+
s.require_paths = ["lib"]
|
38
|
+
s.rubygems_version = "1.8.25"
|
39
|
+
s.summary = "Calculation of editing distance for 2 strings using Levenshtein or Damerau-Levenshtein algorithms"
|
40
|
+
|
41
|
+
if s.respond_to? :specification_version then
|
42
|
+
s.specification_version = 3
|
43
|
+
|
44
|
+
if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
|
45
|
+
s.add_runtime_dependency(%q<bundler>, ["~> 1"])
|
46
|
+
s.add_runtime_dependency(%q<jeweler>, ["~> 1"])
|
47
|
+
s.add_runtime_dependency(%q<rake>, ["~> 10"])
|
48
|
+
s.add_runtime_dependency(%q<rake-compiler>, ["~> 0.8"])
|
49
|
+
s.add_development_dependency(%q<debugger>, ["~> 1"])
|
50
|
+
else
|
51
|
+
s.add_dependency(%q<bundler>, ["~> 1"])
|
52
|
+
s.add_dependency(%q<jeweler>, ["~> 1"])
|
53
|
+
s.add_dependency(%q<rake>, ["~> 10"])
|
54
|
+
s.add_dependency(%q<rake-compiler>, ["~> 0.8"])
|
55
|
+
s.add_dependency(%q<debugger>, ["~> 1"])
|
56
|
+
end
|
57
|
+
else
|
58
|
+
s.add_dependency(%q<bundler>, ["~> 1"])
|
59
|
+
s.add_dependency(%q<jeweler>, ["~> 1"])
|
60
|
+
s.add_dependency(%q<rake>, ["~> 10"])
|
61
|
+
s.add_dependency(%q<rake-compiler>, ["~> 0.8"])
|
62
|
+
s.add_dependency(%q<debugger>, ["~> 1"])
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
data/lib/damerau-levenshtein.rb
CHANGED
@@ -5,7 +5,11 @@ require 'damerau_levenshtein_binding'
|
|
5
5
|
module DamerauLevenshtein
|
6
6
|
extend DamerauLevenshteinBinding
|
7
7
|
|
8
|
+
def self.version
|
9
|
+
open(File.join(File.dirname(__FILE__), '..', 'VERSION')).read.strip
|
10
|
+
end
|
11
|
+
|
8
12
|
def self.distance(str1, str2, block_size = 1, max_distance = 10)
|
9
|
-
distance_utf(str1.unpack(
|
13
|
+
distance_utf(str1.unpack('U*'), str2.unpack('U*'), block_size, max_distance)
|
10
14
|
end
|
11
15
|
end
|
@@ -1,8 +1,16 @@
|
|
1
1
|
require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
|
2
2
|
|
3
|
-
describe
|
3
|
+
describe DamerauLevenshtein do
|
4
|
+
|
5
|
+
it 'should return version' do
|
6
|
+
DamerauLevenshtein.version.should =~ /^\d+\.\d+\.\d+$/
|
7
|
+
end
|
8
|
+
|
4
9
|
it 'should get tests' do
|
5
|
-
|
10
|
+
tests = File.expand_path(File.dirname(__FILE__)) +
|
11
|
+
'/damerau_levenshtein_test.txt'
|
12
|
+
|
13
|
+
read_test_file(tests, 5) do |y|
|
6
14
|
dl = DamerauLevenshtein
|
7
15
|
if y
|
8
16
|
res = dl.distance(y[0], y[1], y[3].to_i, y[2].to_i)
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: damerau-levenshtein
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 1.0.0
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,152 +9,113 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date:
|
12
|
+
date: 2013-03-13 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
|
-
name:
|
16
|
-
requirement:
|
15
|
+
name: bundler
|
16
|
+
requirement: !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
|
-
- -
|
19
|
+
- - ~>
|
20
20
|
- !ruby/object:Gem::Version
|
21
|
-
version: '
|
22
|
-
type: :
|
21
|
+
version: '1'
|
22
|
+
type: :runtime
|
23
23
|
prerelease: false
|
24
|
-
version_requirements:
|
25
|
-
- !ruby/object:Gem::Dependency
|
26
|
-
name: rake-compiler
|
27
|
-
requirement: &70145779820100 !ruby/object:Gem::Requirement
|
28
|
-
none: false
|
29
|
-
requirements:
|
30
|
-
- - ! '>='
|
31
|
-
- !ruby/object:Gem::Version
|
32
|
-
version: '0'
|
33
|
-
type: :development
|
34
|
-
prerelease: false
|
35
|
-
version_requirements: *70145779820100
|
36
|
-
- !ruby/object:Gem::Dependency
|
37
|
-
name: rspec
|
38
|
-
requirement: &70145779819600 !ruby/object:Gem::Requirement
|
24
|
+
version_requirements: !ruby/object:Gem::Requirement
|
39
25
|
none: false
|
40
26
|
requirements:
|
41
27
|
- - ~>
|
42
28
|
- !ruby/object:Gem::Version
|
43
|
-
version:
|
44
|
-
type: :development
|
45
|
-
prerelease: false
|
46
|
-
version_requirements: *70145779819600
|
29
|
+
version: '1'
|
47
30
|
- !ruby/object:Gem::Dependency
|
48
|
-
name:
|
49
|
-
requirement:
|
31
|
+
name: jeweler
|
32
|
+
requirement: !ruby/object:Gem::Requirement
|
50
33
|
none: false
|
51
34
|
requirements:
|
52
|
-
- -
|
35
|
+
- - ~>
|
53
36
|
- !ruby/object:Gem::Version
|
54
|
-
version: '
|
55
|
-
type: :
|
37
|
+
version: '1'
|
38
|
+
type: :runtime
|
56
39
|
prerelease: false
|
57
|
-
version_requirements:
|
58
|
-
- !ruby/object:Gem::Dependency
|
59
|
-
name: bundler
|
60
|
-
requirement: &70145779818600 !ruby/object:Gem::Requirement
|
40
|
+
version_requirements: !ruby/object:Gem::Requirement
|
61
41
|
none: false
|
62
42
|
requirements:
|
63
43
|
- - ~>
|
64
44
|
- !ruby/object:Gem::Version
|
65
|
-
version: 1
|
66
|
-
type: :development
|
67
|
-
prerelease: false
|
68
|
-
version_requirements: *70145779818600
|
45
|
+
version: '1'
|
69
46
|
- !ruby/object:Gem::Dependency
|
70
|
-
name:
|
71
|
-
requirement:
|
47
|
+
name: rake
|
48
|
+
requirement: !ruby/object:Gem::Requirement
|
72
49
|
none: false
|
73
50
|
requirements:
|
74
51
|
- - ~>
|
75
52
|
- !ruby/object:Gem::Version
|
76
|
-
version:
|
77
|
-
type: :
|
53
|
+
version: '10'
|
54
|
+
type: :runtime
|
78
55
|
prerelease: false
|
79
|
-
version_requirements:
|
80
|
-
- !ruby/object:Gem::Dependency
|
81
|
-
name: simplecov
|
82
|
-
requirement: &70145779817600 !ruby/object:Gem::Requirement
|
56
|
+
version_requirements: !ruby/object:Gem::Requirement
|
83
57
|
none: false
|
84
58
|
requirements:
|
85
|
-
- -
|
59
|
+
- - ~>
|
86
60
|
- !ruby/object:Gem::Version
|
87
|
-
version: '
|
88
|
-
type: :development
|
89
|
-
prerelease: false
|
90
|
-
version_requirements: *70145779817600
|
61
|
+
version: '10'
|
91
62
|
- !ruby/object:Gem::Dependency
|
92
|
-
name:
|
93
|
-
requirement:
|
63
|
+
name: rake-compiler
|
64
|
+
requirement: !ruby/object:Gem::Requirement
|
94
65
|
none: false
|
95
66
|
requirements:
|
96
|
-
- -
|
67
|
+
- - ~>
|
97
68
|
- !ruby/object:Gem::Version
|
98
|
-
version: '0'
|
99
|
-
type: :
|
69
|
+
version: '0.8'
|
70
|
+
type: :runtime
|
100
71
|
prerelease: false
|
101
|
-
version_requirements:
|
102
|
-
- !ruby/object:Gem::Dependency
|
103
|
-
name: ruby-prof
|
104
|
-
requirement: &70145779816600 !ruby/object:Gem::Requirement
|
72
|
+
version_requirements: !ruby/object:Gem::Requirement
|
105
73
|
none: false
|
106
74
|
requirements:
|
107
|
-
- -
|
75
|
+
- - ~>
|
108
76
|
- !ruby/object:Gem::Version
|
109
|
-
version: '0'
|
110
|
-
type: :development
|
111
|
-
prerelease: false
|
112
|
-
version_requirements: *70145779816600
|
77
|
+
version: '0.8'
|
113
78
|
- !ruby/object:Gem::Dependency
|
114
|
-
name:
|
115
|
-
requirement:
|
79
|
+
name: debugger
|
80
|
+
requirement: !ruby/object:Gem::Requirement
|
116
81
|
none: false
|
117
82
|
requirements:
|
118
|
-
- -
|
83
|
+
- - ~>
|
119
84
|
- !ruby/object:Gem::Version
|
120
|
-
version: '
|
85
|
+
version: '1'
|
121
86
|
type: :development
|
122
87
|
prerelease: false
|
123
|
-
version_requirements:
|
124
|
-
- !ruby/object:Gem::Dependency
|
125
|
-
name: mocha
|
126
|
-
requirement: &70145779815620 !ruby/object:Gem::Requirement
|
88
|
+
version_requirements: !ruby/object:Gem::Requirement
|
127
89
|
none: false
|
128
90
|
requirements:
|
129
|
-
- -
|
91
|
+
- - ~>
|
130
92
|
- !ruby/object:Gem::Version
|
131
|
-
version: '
|
132
|
-
|
133
|
-
|
134
|
-
version_requirements: *70145779815620
|
135
|
-
description: Calculation of editing distance for 2 strings using Levenshtein or Damerau-Levenshtein
|
136
|
-
algorithms
|
93
|
+
version: '1'
|
94
|
+
description: Calculation of editing distance for 2 strings using Levenshtein or
|
95
|
+
Damerau-Levenshtein algorithms
|
137
96
|
email: dmozzherin@gmail.com
|
138
97
|
executables: []
|
139
98
|
extensions:
|
140
99
|
- ext/damerau_levenshtein_binding/extconf.rb
|
141
100
|
extra_rdoc_files:
|
142
101
|
- LICENSE.txt
|
143
|
-
- README.
|
102
|
+
- README.md
|
144
103
|
files:
|
104
|
+
- CHANGELOG
|
145
105
|
- Gemfile
|
146
106
|
- Gemfile.lock
|
147
107
|
- LICENSE.txt
|
148
|
-
- README.
|
108
|
+
- README.md
|
149
109
|
- Rakefile
|
150
110
|
- VERSION
|
111
|
+
- damerau-levenshtein.gemspec
|
151
112
|
- ext/damerau_levenshtein_binding/damerau_levenshtein_binding.c
|
152
113
|
- lib/damerau-levenshtein.rb
|
153
114
|
- spec/damerau-levenshtein_spec.rb
|
154
115
|
- spec/damerau_levenshtein_test.txt
|
155
116
|
- spec/spec_helper.rb
|
156
117
|
- ext/damerau_levenshtein_binding/extconf.rb
|
157
|
-
homepage: http://github.com/
|
118
|
+
homepage: http://github.com/GlobalNamesArchitecture/damerau-levenshtein
|
158
119
|
licenses:
|
159
120
|
- MIT
|
160
121
|
post_install_message:
|
@@ -169,7 +130,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
169
130
|
version: '0'
|
170
131
|
segments:
|
171
132
|
- 0
|
172
|
-
hash:
|
133
|
+
hash: 1941174905285694980
|
173
134
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
174
135
|
none: false
|
175
136
|
requirements:
|
@@ -178,9 +139,9 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
178
139
|
version: '0'
|
179
140
|
requirements: []
|
180
141
|
rubyforge_project:
|
181
|
-
rubygems_version: 1.8.
|
142
|
+
rubygems_version: 1.8.25
|
182
143
|
signing_key:
|
183
144
|
specification_version: 3
|
184
|
-
summary: Calculation of editing distance for 2 strings
|
145
|
+
summary: Calculation of editing distance for 2 strings using Levenshtein or Damerau-Levenshtein
|
185
146
|
algorithms
|
186
147
|
test_files: []
|
data/README.rdoc
DELETED
@@ -1,76 +0,0 @@
|
|
1
|
-
= damerau-levenshtein
|
2
|
-
|
3
|
-
The damerau-levenshtein gem allows to find edit distance between two UTF-8 or ASCII encoded strings with O(N**2) efficiency.
|
4
|
-
|
5
|
-
This gem implements pure Levenshtein algorithm, Damerau modification of it (where 2 character transposition counts as 1 edit distance).
|
6
|
-
It also includes Boehmer & Rees 2008 modification of Damerau algorithm, where transposition of bigger than 1 character blocks is taken in account as well (Boehmer & Rees 2008).
|
7
|
-
|
8
|
-
require 'damerau-levenshtein'
|
9
|
-
DamerauLevenshtein.distance('Something', 'Smoething') #returns 1
|
10
|
-
|
11
|
-
Gem damerau-levenshtein is compatible with ruby versions 1.8.7 and 1.9.2 and higher
|
12
|
-
|
13
|
-
== Installation
|
14
|
-
|
15
|
-
gem install damerau-levenshtein
|
16
|
-
|
17
|
-
== Examples
|
18
|
-
|
19
|
-
require 'rubygems' #not needed for ruby >= 1.9.0
|
20
|
-
require 'damerau-levenshtein'
|
21
|
-
dl = DamerauLevenshtein
|
22
|
-
|
23
|
-
* compare using Damerau Levenshtein algorithm
|
24
|
-
|
25
|
-
dl.distance("Something", "Smoething") #returns 1
|
26
|
-
|
27
|
-
* compare using Levensthein algorithm
|
28
|
-
|
29
|
-
dl.distance("Something", "Smoething", 0) #returns 2
|
30
|
-
|
31
|
-
* compare using Boehmer & Rees modification
|
32
|
-
|
33
|
-
dl.distance("Something", "meSothing", 2) #returns 2 instead of 4
|
34
|
-
|
35
|
-
* comparison of words with utf-8 characters should work fine:
|
36
|
-
|
37
|
-
dl.distance("Sjöstedt", "Sjostedt") #returns 1
|
38
|
-
|
39
|
-
== Description
|
40
|
-
|
41
|
-
DamerauLevenshtein.distance takes 4 arguments:
|
42
|
-
|
43
|
-
* string1
|
44
|
-
* string2
|
45
|
-
* block_size (default is 1)
|
46
|
-
* max_distance (default is 10)
|
47
|
-
|
48
|
-
block_size determines maximum number of characters in a transposition block:
|
49
|
-
|
50
|
-
block_size = 0 (transposition does not count -- it is a pure Levenshtein algorithm)
|
51
|
-
block_size = 1 (transposition between 2 adjustent characters -- it is pure Damerau-Levenshtein algorithm)
|
52
|
-
block_size = 2 (transposition between blocks as big as 2 characters -- so abcd and cdab counts as edit distance 2, not 4)
|
53
|
-
block_size = 3 (transposition between blocks as big as 3 characters -- so abcdef and defabc counts as edit distance 3, not 6)
|
54
|
-
etc.
|
55
|
-
|
56
|
-
max_distance -- is a threshold after which algorithm gives up and returns max_distance instead of real edit distance.
|
57
|
-
|
58
|
-
Levenshtein algorithm is expensive, so it makes sense to give up when edit distance is becoming too big. The argument max_distance does just that.
|
59
|
-
|
60
|
-
DamerauLevenshtein.distance('abcdefg', '1234567', 0, 3) #give up when edit distance exceeds 3)
|
61
|
-
|
62
|
-
== Contributing to damerau-levenshtein
|
63
|
-
|
64
|
-
* Check out the latest master to make sure the feature hasn't been implemented or the bug hasn't been fixed yet
|
65
|
-
* Check out the issue tracker to make sure someone already hasn't requested it and/or contributed it
|
66
|
-
* Fork the project
|
67
|
-
* Start a feature/bugfix branch
|
68
|
-
* Commit and push until you are happy with your contribution
|
69
|
-
* Make sure to add tests for it. This is important so I don't break it in a future version unintentionally.
|
70
|
-
* Please try not to mess with the Rakefile, version, or history. If you want to have your own version, or is otherwise necessary, that is fine, but please isolate to its own commit so I can cherry-pick around it.
|
71
|
-
|
72
|
-
== Copyright
|
73
|
-
|
74
|
-
Copyright (c) 2011 Marine Biological Laboratory. See LICENSE.txt for
|
75
|
-
further details.
|
76
|
-
|