rubyfish 0.0.4 → 0.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.md +8 -9
- data/lib/rubyfish/damerau_levenshtein.rb +33 -15
- data/lib/rubyfish/hamming.rb +7 -1
- data/lib/rubyfish/jaro.rb +2 -2
- data/lib/rubyfish/jaro_winkler.rb +7 -1
- data/lib/rubyfish/levenshtein.rb +5 -28
- data/lib/rubyfish/longest_subsequence.rb +8 -1
- data/lib/rubyfish/longest_substring.rb +18 -4
- data/lib/rubyfish/version.rb +1 -1
- metadata +21 -40
data/README.md
CHANGED
@@ -1,21 +1,20 @@
|
|
1
|
-
|
2
1
|
RubyFish
|
3
2
|
=========
|
4
3
|
|
5
|
-
RubyFish is a ruby port of python library
|
4
|
+
RubyFish is a ruby port of python library <a href = "http://github.com/sunlightlabs/jellyfish">jellyfish</a> for doing approximate and phonetic matching of strings.
|
5
|
+
|
6
|
+
-------------
|
6
7
|
|
7
8
|
RubyFish is a project of AnjLab (c) 2010.
|
8
9
|
All code is released under a BSD-style license, see LICENSE for details.
|
9
10
|
|
10
|
-
Originally written by
|
11
|
-
Written by Michael Stephens <mstephens@sunlightfoundation.com> and James Turk
|
12
|
-
<jturk@sunlightfoundation.com>.
|
11
|
+
Originally written by <a href="mailto:mstephens@sunlightfoundation.com">Michael Stephens</a> and <a href="mailto:jturk@sunlightfoundation.com">James Turk</a>.
|
13
12
|
|
14
|
-
Ported by
|
15
|
-
Source is available
|
13
|
+
Ported by <a href="mailto:yury.korolev@gmail.com">Yury Korolev</a>
|
14
|
+
Source is available on <a href="http://github.com/anjlab/rubyfish">GitHub</a>
|
16
15
|
|
17
16
|
Included Algorithms
|
18
|
-
|
17
|
+
-------------------
|
19
18
|
|
20
19
|
String comparison:
|
21
20
|
|
@@ -32,7 +31,7 @@ Phonetic encoding:
|
|
32
31
|
* Double Metaphone
|
33
32
|
|
34
33
|
Example Usage
|
35
|
-
|
34
|
+
-------------
|
36
35
|
|
37
36
|
ruby-1.9.2-p0 > require 'rubyfish'
|
38
37
|
ruby-1.9.2-p0 > RubyFish::Levenshtein.distance("jellyfish", "rubyfish")
|
@@ -1,41 +1,59 @@
|
|
1
|
-
require 'matrix'
|
2
|
-
|
3
1
|
module RubyFish::DamerauLevenshtein
|
4
2
|
|
5
|
-
|
3
|
+
# http://en.wikipedia.org/wiki/Levenshtein_distance
|
4
|
+
# http://en.wikipedia.org/wiki/Damerau%E2%80%93Levenshtein_distance
|
5
|
+
|
6
|
+
def _distance a, b, opts = {}
|
7
|
+
allow_swaps = opts[:allow_swaps]
|
8
|
+
ignore_case = opts[:ignore_case]
|
9
|
+
|
6
10
|
as = a.to_s
|
7
11
|
bs = b.to_s
|
12
|
+
|
13
|
+
if ignore_case
|
14
|
+
as.downcase!
|
15
|
+
bs.downcase!
|
16
|
+
end
|
8
17
|
|
9
18
|
rows = as.size + 1
|
10
19
|
cols = bs.size + 1
|
11
20
|
|
12
|
-
dist =
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
21
|
+
dist = [
|
22
|
+
Array.new(cols) {|k| k},
|
23
|
+
Array.new(cols) {0},
|
24
|
+
Array.new(cols) {0}
|
25
|
+
]
|
26
|
+
|
17
27
|
(1...rows).each do |i|
|
28
|
+
k = i % 3
|
29
|
+
dist[k][0] = i
|
30
|
+
|
18
31
|
(1...cols).each do |j|
|
19
32
|
cost = as[i - 1] == bs[j - 1] ? 0 : 1
|
20
33
|
|
21
34
|
#minimum of deletion, insertion, substitution
|
22
|
-
d1 = dist[
|
23
|
-
d2 = dist[
|
24
|
-
d3 = dist[
|
35
|
+
d1 = dist[k - 1][j] + 1
|
36
|
+
d2 = dist[k][j - 1] + 1
|
37
|
+
d3 = dist[k - 1][j - 1] + cost
|
25
38
|
|
26
39
|
d_now = [d1, d2, d3].min
|
27
40
|
|
28
|
-
if i > 2 && j > 2 && as[i - 1] == bs[j - 2] && as[i - 2] == bs[j - 1]
|
29
|
-
d1 = dist[
|
41
|
+
if allow_swaps && i > 2 && j > 2 && as[i - 1] == bs[j - 2] && as[i - 2] == bs[j - 1]
|
42
|
+
d1 = dist[k - 2][j - 2] + cost
|
30
43
|
d_now = [d_now, d1].min;
|
31
44
|
end
|
32
45
|
|
33
|
-
dist[
|
46
|
+
dist[k][j] = d_now;
|
34
47
|
end
|
35
48
|
end
|
36
49
|
|
37
|
-
dist[
|
50
|
+
dist[(rows - 1) % 3][-1]
|
51
|
+
end
|
52
|
+
|
53
|
+
def distance a, b, opts = {}
|
54
|
+
_distance(a, b, :allow_swaps => true, :ignore_case => opts[:ignore_case])
|
38
55
|
end
|
39
56
|
|
40
57
|
module_function :distance
|
58
|
+
module_function :_distance
|
41
59
|
end
|
data/lib/rubyfish/hamming.rb
CHANGED
data/lib/rubyfish/jaro.rb
CHANGED
@@ -2,8 +2,8 @@ module RubyFish::Jaro
|
|
2
2
|
include ::RubyFish::JaroWinkler
|
3
3
|
extend ::RubyFish::JaroWinkler
|
4
4
|
|
5
|
-
def distance a, b
|
6
|
-
_distance(a, b, :winklerize => false)
|
5
|
+
def distance a, b, opts={}
|
6
|
+
_distance(a, b, :winklerize => false, :ignore_case => opts[:ignore_case])
|
7
7
|
end
|
8
8
|
|
9
9
|
module_function :distance
|
@@ -3,9 +3,15 @@ module RubyFish::JaroWinkler
|
|
3
3
|
def _distance a, b, opts = {}
|
4
4
|
long_tolerance = opts[:long_tolerance]
|
5
5
|
winklerize = opts[:winklerize]
|
6
|
+
ignore_case = opts[:ignore_case]
|
6
7
|
|
7
8
|
as = a.to_s
|
8
9
|
bs = b.to_s
|
10
|
+
|
11
|
+
if ignore_case
|
12
|
+
as.downcase!
|
13
|
+
bs.downcase!
|
14
|
+
end
|
9
15
|
|
10
16
|
as_length = as.size
|
11
17
|
bs_length = bs.size
|
@@ -98,7 +104,7 @@ module RubyFish::JaroWinkler
|
|
98
104
|
end
|
99
105
|
|
100
106
|
def distance a, b, opts = {}
|
101
|
-
_distance(a, b, :winklerize => true)
|
107
|
+
_distance(a, b, :winklerize => true, :ignore_case => opts[:ignore_case])
|
102
108
|
end
|
103
109
|
|
104
110
|
module_function :distance
|
data/lib/rubyfish/levenshtein.rb
CHANGED
@@ -1,36 +1,13 @@
|
|
1
1
|
module RubyFish::Levenshtein
|
2
2
|
|
3
|
-
|
4
|
-
|
5
|
-
as, bs = a.to_s, b.to_s
|
3
|
+
include ::RubyFish::DamerauLevenshtein
|
4
|
+
extend ::RubyFish::DamerauLevenshtein
|
6
5
|
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
rows = as_length + 1
|
11
|
-
cols = bs_length + 1
|
12
|
-
|
13
|
-
dist = ::RubyFish::MMatrix.new rows, cols
|
14
|
-
(0...rows).each {|i| dist[i, 0] = i}
|
15
|
-
(0...cols).each {|j| dist[0, j] = j}
|
16
|
-
|
17
|
-
(1...cols).each do |j|
|
18
|
-
(1...rows).each do |i|
|
19
|
-
if as[i - 1] == bs[j - 1]
|
20
|
-
dist[i, j] = dist[i - 1, j - 1]
|
21
|
-
else
|
22
|
-
d1 = dist[i - 1, j] + 1
|
23
|
-
d2 = dist[i, j - 1] + 1
|
24
|
-
d3 = dist[i - 1, j - 1] + 1
|
25
|
-
|
26
|
-
dist[i, j] = [d1, d2, d3].min;
|
27
|
-
end
|
28
|
-
end
|
29
|
-
end
|
30
|
-
|
31
|
-
dist[as_length, bs_length];
|
6
|
+
def distance a, b, opts={}
|
7
|
+
_distance(a, b, :allowswaps => false, :ignore_case => opts[:ignore_case])
|
32
8
|
end
|
33
9
|
|
34
10
|
module_function :distance
|
35
11
|
|
12
|
+
|
36
13
|
end
|
@@ -1,10 +1,17 @@
|
|
1
1
|
module RubyFish::LongestSubsequence
|
2
2
|
|
3
3
|
# http://en.wikibooks.org/wiki/Algorithm_Implementation/Strings/Longest_common_subsequence#Ruby
|
4
|
-
def distance a, b
|
4
|
+
def distance a, b, opts={}
|
5
|
+
ignore_case = opts[:ignore_case]
|
6
|
+
|
5
7
|
as = a.to_s
|
6
8
|
bs = b.to_s
|
7
9
|
|
10
|
+
if ignore_case
|
11
|
+
as.downcase!
|
12
|
+
bs.downcase!
|
13
|
+
end
|
14
|
+
|
8
15
|
rows = as.size
|
9
16
|
cols = bs.size
|
10
17
|
|
@@ -1,9 +1,16 @@
|
|
1
1
|
module RubyFish::LongestSubstring
|
2
2
|
|
3
3
|
# http://en.wikibooks.org/wiki/Algorithm_implementation/Strings/Longest_common_substring#Ruby
|
4
|
-
def distance a, b
|
4
|
+
def distance a, b, opts={}
|
5
|
+
ignore_case = opts[:ignore_case]
|
6
|
+
|
5
7
|
as = a.to_s
|
6
8
|
bs = b.to_s
|
9
|
+
|
10
|
+
if ignore_case
|
11
|
+
as.downcase!
|
12
|
+
bs.downcase!
|
13
|
+
end
|
7
14
|
|
8
15
|
rows = as.size
|
9
16
|
cols = bs.size
|
@@ -29,9 +36,16 @@ module RubyFish::LongestSubstring
|
|
29
36
|
ans
|
30
37
|
end
|
31
38
|
|
32
|
-
def longest_substring a, b
|
39
|
+
def longest_substring a, b, opts={}
|
40
|
+
ignore_case = opts[:ignore_case]
|
41
|
+
|
33
42
|
as = a.to_s
|
34
43
|
bs = b.to_s
|
44
|
+
|
45
|
+
if ignore_case
|
46
|
+
as.downcase!
|
47
|
+
bs.downcase!
|
48
|
+
end
|
35
49
|
|
36
50
|
rows = as.size
|
37
51
|
cols = bs.size
|
@@ -70,8 +84,8 @@ module RubyFish::LongestSubstring
|
|
70
84
|
res
|
71
85
|
end
|
72
86
|
|
73
|
-
def longest_substring_index(a, b)
|
74
|
-
a.index(longest_substring(a, b))
|
87
|
+
def longest_substring_index(a, b, opts={})
|
88
|
+
a.index(longest_substring(a, b, :ignore_case => opts[:ignore_case]))
|
75
89
|
end
|
76
90
|
|
77
91
|
module_function :distance
|
data/lib/rubyfish/version.rb
CHANGED
metadata
CHANGED
@@ -1,33 +1,23 @@
|
|
1
|
-
--- !ruby/object:Gem::Specification
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
2
|
name: rubyfish
|
3
|
-
version: !ruby/object:Gem::Version
|
4
|
-
|
5
|
-
|
6
|
-
- 0
|
7
|
-
- 0
|
8
|
-
- 4
|
9
|
-
version: 0.0.4
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.5
|
5
|
+
prerelease:
|
10
6
|
platform: ruby
|
11
|
-
authors:
|
7
|
+
authors:
|
12
8
|
- Yury Korolev
|
13
9
|
autorequire:
|
14
10
|
bindir: bin
|
15
11
|
cert_chain: []
|
16
|
-
|
17
|
-
date: 2010-09-20 00:00:00 +04:00
|
18
|
-
default_executable:
|
12
|
+
date: 2012-08-16 00:00:00.000000000 Z
|
19
13
|
dependencies: []
|
20
|
-
|
21
14
|
description: Port of http://github.com/sunlightlabs/jellyfish
|
22
|
-
email:
|
15
|
+
email:
|
23
16
|
- yury.korolev@gmail.com
|
24
17
|
executables: []
|
25
|
-
|
26
18
|
extensions: []
|
27
|
-
|
28
19
|
extra_rdoc_files: []
|
29
|
-
|
30
|
-
files:
|
20
|
+
files:
|
31
21
|
- lib/rubyfish/damerau_levenshtein.rb
|
32
22
|
- lib/rubyfish/double_metaphone.rb
|
33
23
|
- lib/rubyfish/hamming.rb
|
@@ -43,40 +33,31 @@ files:
|
|
43
33
|
- CHANGELOG.md
|
44
34
|
- README.md
|
45
35
|
- ROADMAP.md
|
46
|
-
has_rdoc: true
|
47
36
|
homepage: http://github.com/anjlab/rubyfish
|
48
37
|
licenses: []
|
49
|
-
|
50
38
|
post_install_message:
|
51
39
|
rdoc_options: []
|
52
|
-
|
53
|
-
require_paths:
|
40
|
+
require_paths:
|
54
41
|
- lib
|
55
|
-
required_ruby_version: !ruby/object:Gem::Requirement
|
42
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
56
43
|
none: false
|
57
|
-
requirements:
|
58
|
-
- -
|
59
|
-
- !ruby/object:Gem::Version
|
60
|
-
|
61
|
-
segments:
|
44
|
+
requirements:
|
45
|
+
- - ! '>='
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '0'
|
48
|
+
segments:
|
62
49
|
- 0
|
63
|
-
|
64
|
-
required_rubygems_version: !ruby/object:Gem::Requirement
|
50
|
+
hash: -749425856707131905
|
51
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
65
52
|
none: false
|
66
|
-
requirements:
|
67
|
-
- -
|
68
|
-
- !ruby/object:Gem::Version
|
69
|
-
segments:
|
70
|
-
- 1
|
71
|
-
- 3
|
72
|
-
- 6
|
53
|
+
requirements:
|
54
|
+
- - ! '>='
|
55
|
+
- !ruby/object:Gem::Version
|
73
56
|
version: 1.3.6
|
74
57
|
requirements: []
|
75
|
-
|
76
58
|
rubyforge_project: rubyfish
|
77
|
-
rubygems_version: 1.
|
59
|
+
rubygems_version: 1.8.24
|
78
60
|
signing_key:
|
79
61
|
specification_version: 3
|
80
62
|
summary: Library for doing approximate and phonetic matching of string
|
81
63
|
test_files: []
|
82
|
-
|