rubyfish 0.0.4 → 0.0.5
Sign up to get free protection for your applications and to get access to all the features.
- data/README.md +8 -9
- data/lib/rubyfish/damerau_levenshtein.rb +33 -15
- data/lib/rubyfish/hamming.rb +7 -1
- data/lib/rubyfish/jaro.rb +2 -2
- data/lib/rubyfish/jaro_winkler.rb +7 -1
- data/lib/rubyfish/levenshtein.rb +5 -28
- data/lib/rubyfish/longest_subsequence.rb +8 -1
- data/lib/rubyfish/longest_substring.rb +18 -4
- data/lib/rubyfish/version.rb +1 -1
- metadata +21 -40
data/README.md
CHANGED
@@ -1,21 +1,20 @@
|
|
1
|
-
|
2
1
|
RubyFish
|
3
2
|
=========
|
4
3
|
|
5
|
-
RubyFish is a ruby port of python library
|
4
|
+
RubyFish is a ruby port of python library <a href = "http://github.com/sunlightlabs/jellyfish">jellyfish</a> for doing approximate and phonetic matching of strings.
|
5
|
+
|
6
|
+
-------------
|
6
7
|
|
7
8
|
RubyFish is a project of AnjLab (c) 2010.
|
8
9
|
All code is released under a BSD-style license, see LICENSE for details.
|
9
10
|
|
10
|
-
Originally written by
|
11
|
-
Written by Michael Stephens <mstephens@sunlightfoundation.com> and James Turk
|
12
|
-
<jturk@sunlightfoundation.com>.
|
11
|
+
Originally written by <a href="mailto:mstephens@sunlightfoundation.com">Michael Stephens</a> and <a href="mailto:jturk@sunlightfoundation.com">James Turk</a>.
|
13
12
|
|
14
|
-
Ported by
|
15
|
-
Source is available
|
13
|
+
Ported by <a href="mailto:yury.korolev@gmail.com">Yury Korolev</a>
|
14
|
+
Source is available on <a href="http://github.com/anjlab/rubyfish">GitHub</a>
|
16
15
|
|
17
16
|
Included Algorithms
|
18
|
-
|
17
|
+
-------------------
|
19
18
|
|
20
19
|
String comparison:
|
21
20
|
|
@@ -32,7 +31,7 @@ Phonetic encoding:
|
|
32
31
|
* Double Metaphone
|
33
32
|
|
34
33
|
Example Usage
|
35
|
-
|
34
|
+
-------------
|
36
35
|
|
37
36
|
ruby-1.9.2-p0 > require 'rubyfish'
|
38
37
|
ruby-1.9.2-p0 > RubyFish::Levenshtein.distance("jellyfish", "rubyfish")
|
@@ -1,41 +1,59 @@
|
|
1
|
-
require 'matrix'
|
2
|
-
|
3
1
|
module RubyFish::DamerauLevenshtein
|
4
2
|
|
5
|
-
|
3
|
+
# http://en.wikipedia.org/wiki/Levenshtein_distance
|
4
|
+
# http://en.wikipedia.org/wiki/Damerau%E2%80%93Levenshtein_distance
|
5
|
+
|
6
|
+
def _distance a, b, opts = {}
|
7
|
+
allow_swaps = opts[:allow_swaps]
|
8
|
+
ignore_case = opts[:ignore_case]
|
9
|
+
|
6
10
|
as = a.to_s
|
7
11
|
bs = b.to_s
|
12
|
+
|
13
|
+
if ignore_case
|
14
|
+
as.downcase!
|
15
|
+
bs.downcase!
|
16
|
+
end
|
8
17
|
|
9
18
|
rows = as.size + 1
|
10
19
|
cols = bs.size + 1
|
11
20
|
|
12
|
-
dist =
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
21
|
+
dist = [
|
22
|
+
Array.new(cols) {|k| k},
|
23
|
+
Array.new(cols) {0},
|
24
|
+
Array.new(cols) {0}
|
25
|
+
]
|
26
|
+
|
17
27
|
(1...rows).each do |i|
|
28
|
+
k = i % 3
|
29
|
+
dist[k][0] = i
|
30
|
+
|
18
31
|
(1...cols).each do |j|
|
19
32
|
cost = as[i - 1] == bs[j - 1] ? 0 : 1
|
20
33
|
|
21
34
|
#minimum of deletion, insertion, substitution
|
22
|
-
d1 = dist[
|
23
|
-
d2 = dist[
|
24
|
-
d3 = dist[
|
35
|
+
d1 = dist[k - 1][j] + 1
|
36
|
+
d2 = dist[k][j - 1] + 1
|
37
|
+
d3 = dist[k - 1][j - 1] + cost
|
25
38
|
|
26
39
|
d_now = [d1, d2, d3].min
|
27
40
|
|
28
|
-
if i > 2 && j > 2 && as[i - 1] == bs[j - 2] && as[i - 2] == bs[j - 1]
|
29
|
-
d1 = dist[
|
41
|
+
if allow_swaps && i > 2 && j > 2 && as[i - 1] == bs[j - 2] && as[i - 2] == bs[j - 1]
|
42
|
+
d1 = dist[k - 2][j - 2] + cost
|
30
43
|
d_now = [d_now, d1].min;
|
31
44
|
end
|
32
45
|
|
33
|
-
dist[
|
46
|
+
dist[k][j] = d_now;
|
34
47
|
end
|
35
48
|
end
|
36
49
|
|
37
|
-
dist[
|
50
|
+
dist[(rows - 1) % 3][-1]
|
51
|
+
end
|
52
|
+
|
53
|
+
def distance a, b, opts = {}
|
54
|
+
_distance(a, b, :allow_swaps => true, :ignore_case => opts[:ignore_case])
|
38
55
|
end
|
39
56
|
|
40
57
|
module_function :distance
|
58
|
+
module_function :_distance
|
41
59
|
end
|
data/lib/rubyfish/hamming.rb
CHANGED
data/lib/rubyfish/jaro.rb
CHANGED
@@ -2,8 +2,8 @@ module RubyFish::Jaro
|
|
2
2
|
include ::RubyFish::JaroWinkler
|
3
3
|
extend ::RubyFish::JaroWinkler
|
4
4
|
|
5
|
-
def distance a, b
|
6
|
-
_distance(a, b, :winklerize => false)
|
5
|
+
def distance a, b, opts={}
|
6
|
+
_distance(a, b, :winklerize => false, :ignore_case => opts[:ignore_case])
|
7
7
|
end
|
8
8
|
|
9
9
|
module_function :distance
|
@@ -3,9 +3,15 @@ module RubyFish::JaroWinkler
|
|
3
3
|
def _distance a, b, opts = {}
|
4
4
|
long_tolerance = opts[:long_tolerance]
|
5
5
|
winklerize = opts[:winklerize]
|
6
|
+
ignore_case = opts[:ignore_case]
|
6
7
|
|
7
8
|
as = a.to_s
|
8
9
|
bs = b.to_s
|
10
|
+
|
11
|
+
if ignore_case
|
12
|
+
as.downcase!
|
13
|
+
bs.downcase!
|
14
|
+
end
|
9
15
|
|
10
16
|
as_length = as.size
|
11
17
|
bs_length = bs.size
|
@@ -98,7 +104,7 @@ module RubyFish::JaroWinkler
|
|
98
104
|
end
|
99
105
|
|
100
106
|
def distance a, b, opts = {}
|
101
|
-
_distance(a, b, :winklerize => true)
|
107
|
+
_distance(a, b, :winklerize => true, :ignore_case => opts[:ignore_case])
|
102
108
|
end
|
103
109
|
|
104
110
|
module_function :distance
|
data/lib/rubyfish/levenshtein.rb
CHANGED
@@ -1,36 +1,13 @@
|
|
1
1
|
module RubyFish::Levenshtein
|
2
2
|
|
3
|
-
|
4
|
-
|
5
|
-
as, bs = a.to_s, b.to_s
|
3
|
+
include ::RubyFish::DamerauLevenshtein
|
4
|
+
extend ::RubyFish::DamerauLevenshtein
|
6
5
|
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
rows = as_length + 1
|
11
|
-
cols = bs_length + 1
|
12
|
-
|
13
|
-
dist = ::RubyFish::MMatrix.new rows, cols
|
14
|
-
(0...rows).each {|i| dist[i, 0] = i}
|
15
|
-
(0...cols).each {|j| dist[0, j] = j}
|
16
|
-
|
17
|
-
(1...cols).each do |j|
|
18
|
-
(1...rows).each do |i|
|
19
|
-
if as[i - 1] == bs[j - 1]
|
20
|
-
dist[i, j] = dist[i - 1, j - 1]
|
21
|
-
else
|
22
|
-
d1 = dist[i - 1, j] + 1
|
23
|
-
d2 = dist[i, j - 1] + 1
|
24
|
-
d3 = dist[i - 1, j - 1] + 1
|
25
|
-
|
26
|
-
dist[i, j] = [d1, d2, d3].min;
|
27
|
-
end
|
28
|
-
end
|
29
|
-
end
|
30
|
-
|
31
|
-
dist[as_length, bs_length];
|
6
|
+
def distance a, b, opts={}
|
7
|
+
_distance(a, b, :allowswaps => false, :ignore_case => opts[:ignore_case])
|
32
8
|
end
|
33
9
|
|
34
10
|
module_function :distance
|
35
11
|
|
12
|
+
|
36
13
|
end
|
@@ -1,10 +1,17 @@
|
|
1
1
|
module RubyFish::LongestSubsequence
|
2
2
|
|
3
3
|
# http://en.wikibooks.org/wiki/Algorithm_Implementation/Strings/Longest_common_subsequence#Ruby
|
4
|
-
def distance a, b
|
4
|
+
def distance a, b, opts={}
|
5
|
+
ignore_case = opts[:ignore_case]
|
6
|
+
|
5
7
|
as = a.to_s
|
6
8
|
bs = b.to_s
|
7
9
|
|
10
|
+
if ignore_case
|
11
|
+
as.downcase!
|
12
|
+
bs.downcase!
|
13
|
+
end
|
14
|
+
|
8
15
|
rows = as.size
|
9
16
|
cols = bs.size
|
10
17
|
|
@@ -1,9 +1,16 @@
|
|
1
1
|
module RubyFish::LongestSubstring
|
2
2
|
|
3
3
|
# http://en.wikibooks.org/wiki/Algorithm_implementation/Strings/Longest_common_substring#Ruby
|
4
|
-
def distance a, b
|
4
|
+
def distance a, b, opts={}
|
5
|
+
ignore_case = opts[:ignore_case]
|
6
|
+
|
5
7
|
as = a.to_s
|
6
8
|
bs = b.to_s
|
9
|
+
|
10
|
+
if ignore_case
|
11
|
+
as.downcase!
|
12
|
+
bs.downcase!
|
13
|
+
end
|
7
14
|
|
8
15
|
rows = as.size
|
9
16
|
cols = bs.size
|
@@ -29,9 +36,16 @@ module RubyFish::LongestSubstring
|
|
29
36
|
ans
|
30
37
|
end
|
31
38
|
|
32
|
-
def longest_substring a, b
|
39
|
+
def longest_substring a, b, opts={}
|
40
|
+
ignore_case = opts[:ignore_case]
|
41
|
+
|
33
42
|
as = a.to_s
|
34
43
|
bs = b.to_s
|
44
|
+
|
45
|
+
if ignore_case
|
46
|
+
as.downcase!
|
47
|
+
bs.downcase!
|
48
|
+
end
|
35
49
|
|
36
50
|
rows = as.size
|
37
51
|
cols = bs.size
|
@@ -70,8 +84,8 @@ module RubyFish::LongestSubstring
|
|
70
84
|
res
|
71
85
|
end
|
72
86
|
|
73
|
-
def longest_substring_index(a, b)
|
74
|
-
a.index(longest_substring(a, b))
|
87
|
+
def longest_substring_index(a, b, opts={})
|
88
|
+
a.index(longest_substring(a, b, :ignore_case => opts[:ignore_case]))
|
75
89
|
end
|
76
90
|
|
77
91
|
module_function :distance
|
data/lib/rubyfish/version.rb
CHANGED
metadata
CHANGED
@@ -1,33 +1,23 @@
|
|
1
|
-
--- !ruby/object:Gem::Specification
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
2
|
name: rubyfish
|
3
|
-
version: !ruby/object:Gem::Version
|
4
|
-
|
5
|
-
|
6
|
-
- 0
|
7
|
-
- 0
|
8
|
-
- 4
|
9
|
-
version: 0.0.4
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.5
|
5
|
+
prerelease:
|
10
6
|
platform: ruby
|
11
|
-
authors:
|
7
|
+
authors:
|
12
8
|
- Yury Korolev
|
13
9
|
autorequire:
|
14
10
|
bindir: bin
|
15
11
|
cert_chain: []
|
16
|
-
|
17
|
-
date: 2010-09-20 00:00:00 +04:00
|
18
|
-
default_executable:
|
12
|
+
date: 2012-08-16 00:00:00.000000000 Z
|
19
13
|
dependencies: []
|
20
|
-
|
21
14
|
description: Port of http://github.com/sunlightlabs/jellyfish
|
22
|
-
email:
|
15
|
+
email:
|
23
16
|
- yury.korolev@gmail.com
|
24
17
|
executables: []
|
25
|
-
|
26
18
|
extensions: []
|
27
|
-
|
28
19
|
extra_rdoc_files: []
|
29
|
-
|
30
|
-
files:
|
20
|
+
files:
|
31
21
|
- lib/rubyfish/damerau_levenshtein.rb
|
32
22
|
- lib/rubyfish/double_metaphone.rb
|
33
23
|
- lib/rubyfish/hamming.rb
|
@@ -43,40 +33,31 @@ files:
|
|
43
33
|
- CHANGELOG.md
|
44
34
|
- README.md
|
45
35
|
- ROADMAP.md
|
46
|
-
has_rdoc: true
|
47
36
|
homepage: http://github.com/anjlab/rubyfish
|
48
37
|
licenses: []
|
49
|
-
|
50
38
|
post_install_message:
|
51
39
|
rdoc_options: []
|
52
|
-
|
53
|
-
require_paths:
|
40
|
+
require_paths:
|
54
41
|
- lib
|
55
|
-
required_ruby_version: !ruby/object:Gem::Requirement
|
42
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
56
43
|
none: false
|
57
|
-
requirements:
|
58
|
-
- -
|
59
|
-
- !ruby/object:Gem::Version
|
60
|
-
|
61
|
-
segments:
|
44
|
+
requirements:
|
45
|
+
- - ! '>='
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '0'
|
48
|
+
segments:
|
62
49
|
- 0
|
63
|
-
|
64
|
-
required_rubygems_version: !ruby/object:Gem::Requirement
|
50
|
+
hash: -749425856707131905
|
51
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
65
52
|
none: false
|
66
|
-
requirements:
|
67
|
-
- -
|
68
|
-
- !ruby/object:Gem::Version
|
69
|
-
segments:
|
70
|
-
- 1
|
71
|
-
- 3
|
72
|
-
- 6
|
53
|
+
requirements:
|
54
|
+
- - ! '>='
|
55
|
+
- !ruby/object:Gem::Version
|
73
56
|
version: 1.3.6
|
74
57
|
requirements: []
|
75
|
-
|
76
58
|
rubyforge_project: rubyfish
|
77
|
-
rubygems_version: 1.
|
59
|
+
rubygems_version: 1.8.24
|
78
60
|
signing_key:
|
79
61
|
specification_version: 3
|
80
62
|
summary: Library for doing approximate and phonetic matching of string
|
81
63
|
test_files: []
|
82
|
-
|