rubyfish 0.0.4 → 0.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README.md CHANGED
@@ -1,21 +1,20 @@
1
-
2
1
  RubyFish
3
2
  =========
4
3
 
5
- RubyFish is a ruby port of python library jellyfish (http://github.com/sunlightlabs/jellyfish) for doing approximate and phonetic matching of strings.
4
+ RubyFish is a ruby port of python library <a href = "http://github.com/sunlightlabs/jellyfish">jellyfish</a> for doing approximate and phonetic matching of strings.
5
+
6
+ -------------
6
7
 
7
8
  RubyFish is a project of AnjLab (c) 2010.
8
9
  All code is released under a BSD-style license, see LICENSE for details.
9
10
 
10
- Originally written by
11
- Written by Michael Stephens <mstephens@sunlightfoundation.com> and James Turk
12
- <jturk@sunlightfoundation.com>.
11
+ Originally written by <a href="mailto:mstephens@sunlightfoundation.com">Michael Stephens</a> and <a href="mailto:jturk@sunlightfoundation.com">James Turk</a>.
13
12
 
14
- Ported by Yury Korolev <yury.korolev@gmail.com>
15
- Source is available at http://github.com/anjlab/rubyfish
13
+ Ported by <a href="mailto:yury.korolev@gmail.com">Yury Korolev</a>
14
+ Source is available on <a href="http://github.com/anjlab/rubyfish">GitHub</a>
16
15
 
17
16
  Included Algorithms
18
- ===================
17
+ -------------------
19
18
 
20
19
  String comparison:
21
20
 
@@ -32,7 +31,7 @@ Phonetic encoding:
32
31
  * Double Metaphone
33
32
 
34
33
  Example Usage
35
- =============
34
+ -------------
36
35
 
37
36
  ruby-1.9.2-p0 > require 'rubyfish'
38
37
  ruby-1.9.2-p0 > RubyFish::Levenshtein.distance("jellyfish", "rubyfish")
@@ -1,41 +1,59 @@
1
- require 'matrix'
2
-
3
1
  module RubyFish::DamerauLevenshtein
4
2
 
5
- def distance a, b
3
+ # http://en.wikipedia.org/wiki/Levenshtein_distance
4
+ # http://en.wikipedia.org/wiki/Damerau%E2%80%93Levenshtein_distance
5
+
6
+ def _distance a, b, opts = {}
7
+ allow_swaps = opts[:allow_swaps]
8
+ ignore_case = opts[:ignore_case]
9
+
6
10
  as = a.to_s
7
11
  bs = b.to_s
12
+
13
+ if ignore_case
14
+ as.downcase!
15
+ bs.downcase!
16
+ end
8
17
 
9
18
  rows = as.size + 1
10
19
  cols = bs.size + 1
11
20
 
12
- dist = ::RubyFish::MMatrix.new(rows, cols)
13
-
14
- (0...rows).each {|i| dist[i, 0] = i}
15
- (0...cols).each {|j| dist[0, j] = j}
16
-
21
+ dist = [
22
+ Array.new(cols) {|k| k},
23
+ Array.new(cols) {0},
24
+ Array.new(cols) {0}
25
+ ]
26
+
17
27
  (1...rows).each do |i|
28
+ k = i % 3
29
+ dist[k][0] = i
30
+
18
31
  (1...cols).each do |j|
19
32
  cost = as[i - 1] == bs[j - 1] ? 0 : 1
20
33
 
21
34
  #minimum of deletion, insertion, substitution
22
- d1 = dist[i - 1, j] + 1
23
- d2 = dist[i, j - 1] + 1
24
- d3 = dist[i - 1, j - 1] + cost
35
+ d1 = dist[k - 1][j] + 1
36
+ d2 = dist[k][j - 1] + 1
37
+ d3 = dist[k - 1][j - 1] + cost
25
38
 
26
39
  d_now = [d1, d2, d3].min
27
40
 
28
- if i > 2 && j > 2 && as[i - 1] == bs[j - 2] && as[i - 2] == bs[j - 1]
29
- d1 = dist[i - 2, j - 2] + cost
41
+ if allow_swaps && i > 2 && j > 2 && as[i - 1] == bs[j - 2] && as[i - 2] == bs[j - 1]
42
+ d1 = dist[k - 2][j - 2] + cost
30
43
  d_now = [d_now, d1].min;
31
44
  end
32
45
 
33
- dist[i, j] = d_now;
46
+ dist[k][j] = d_now;
34
47
  end
35
48
  end
36
49
 
37
- dist[as.size, bs.size]
50
+ dist[(rows - 1) % 3][-1]
51
+ end
52
+
53
+ def distance a, b, opts = {}
54
+ _distance(a, b, :allow_swaps => true, :ignore_case => opts[:ignore_case])
38
55
  end
39
56
 
40
57
  module_function :distance
58
+ module_function :_distance
41
59
  end
@@ -1,7 +1,13 @@
1
1
  module RubyFish::Hamming
2
- def distance a, b
2
+ def distance a, b, opts={}
3
+ ignore_case = opts[:ignore_case]
3
4
  distance = 0
4
5
  as, bs = a.to_s, b.to_s
6
+
7
+ if ignore_case
8
+ as.downcase!
9
+ bs.downcase!
10
+ end
5
11
 
6
12
  short, long = [as, bs].sort
7
13
 
data/lib/rubyfish/jaro.rb CHANGED
@@ -2,8 +2,8 @@ module RubyFish::Jaro
2
2
  include ::RubyFish::JaroWinkler
3
3
  extend ::RubyFish::JaroWinkler
4
4
 
5
- def distance a, b
6
- _distance(a, b, :winklerize => false)
5
+ def distance a, b, opts={}
6
+ _distance(a, b, :winklerize => false, :ignore_case => opts[:ignore_case])
7
7
  end
8
8
 
9
9
  module_function :distance
@@ -3,9 +3,15 @@ module RubyFish::JaroWinkler
3
3
  def _distance a, b, opts = {}
4
4
  long_tolerance = opts[:long_tolerance]
5
5
  winklerize = opts[:winklerize]
6
+ ignore_case = opts[:ignore_case]
6
7
 
7
8
  as = a.to_s
8
9
  bs = b.to_s
10
+
11
+ if ignore_case
12
+ as.downcase!
13
+ bs.downcase!
14
+ end
9
15
 
10
16
  as_length = as.size
11
17
  bs_length = bs.size
@@ -98,7 +104,7 @@ module RubyFish::JaroWinkler
98
104
  end
99
105
 
100
106
  def distance a, b, opts = {}
101
- _distance(a, b, :winklerize => true)
107
+ _distance(a, b, :winklerize => true, :ignore_case => opts[:ignore_case])
102
108
  end
103
109
 
104
110
  module_function :distance
@@ -1,36 +1,13 @@
1
1
  module RubyFish::Levenshtein
2
2
 
3
- # http://en.wikipedia.org/wiki/Levenshtein_distance
4
- def distance a, b
5
- as, bs = a.to_s, b.to_s
3
+ include ::RubyFish::DamerauLevenshtein
4
+ extend ::RubyFish::DamerauLevenshtein
6
5
 
7
- as_length = as.size
8
- bs_length = bs.size
9
-
10
- rows = as_length + 1
11
- cols = bs_length + 1
12
-
13
- dist = ::RubyFish::MMatrix.new rows, cols
14
- (0...rows).each {|i| dist[i, 0] = i}
15
- (0...cols).each {|j| dist[0, j] = j}
16
-
17
- (1...cols).each do |j|
18
- (1...rows).each do |i|
19
- if as[i - 1] == bs[j - 1]
20
- dist[i, j] = dist[i - 1, j - 1]
21
- else
22
- d1 = dist[i - 1, j] + 1
23
- d2 = dist[i, j - 1] + 1
24
- d3 = dist[i - 1, j - 1] + 1
25
-
26
- dist[i, j] = [d1, d2, d3].min;
27
- end
28
- end
29
- end
30
-
31
- dist[as_length, bs_length];
6
+ def distance a, b, opts={}
7
+ _distance(a, b, :allowswaps => false, :ignore_case => opts[:ignore_case])
32
8
  end
33
9
 
34
10
  module_function :distance
35
11
 
12
+
36
13
  end
@@ -1,10 +1,17 @@
1
1
  module RubyFish::LongestSubsequence
2
2
 
3
3
  # http://en.wikibooks.org/wiki/Algorithm_Implementation/Strings/Longest_common_subsequence#Ruby
4
- def distance a, b
4
+ def distance a, b, opts={}
5
+ ignore_case = opts[:ignore_case]
6
+
5
7
  as = a.to_s
6
8
  bs = b.to_s
7
9
 
10
+ if ignore_case
11
+ as.downcase!
12
+ bs.downcase!
13
+ end
14
+
8
15
  rows = as.size
9
16
  cols = bs.size
10
17
 
@@ -1,9 +1,16 @@
1
1
  module RubyFish::LongestSubstring
2
2
 
3
3
  # http://en.wikibooks.org/wiki/Algorithm_implementation/Strings/Longest_common_substring#Ruby
4
- def distance a, b
4
+ def distance a, b, opts={}
5
+ ignore_case = opts[:ignore_case]
6
+
5
7
  as = a.to_s
6
8
  bs = b.to_s
9
+
10
+ if ignore_case
11
+ as.downcase!
12
+ bs.downcase!
13
+ end
7
14
 
8
15
  rows = as.size
9
16
  cols = bs.size
@@ -29,9 +36,16 @@ module RubyFish::LongestSubstring
29
36
  ans
30
37
  end
31
38
 
32
- def longest_substring a, b
39
+ def longest_substring a, b, opts={}
40
+ ignore_case = opts[:ignore_case]
41
+
33
42
  as = a.to_s
34
43
  bs = b.to_s
44
+
45
+ if ignore_case
46
+ as.downcase!
47
+ bs.downcase!
48
+ end
35
49
 
36
50
  rows = as.size
37
51
  cols = bs.size
@@ -70,8 +84,8 @@ module RubyFish::LongestSubstring
70
84
  res
71
85
  end
72
86
 
73
- def longest_substring_index(a, b)
74
- a.index(longest_substring(a, b))
87
+ def longest_substring_index(a, b, opts={})
88
+ a.index(longest_substring(a, b, :ignore_case => opts[:ignore_case]))
75
89
  end
76
90
 
77
91
  module_function :distance
@@ -1,3 +1,3 @@
1
1
  module RubyFish
2
- VERSION = "0.0.4"
2
+ VERSION = "0.0.5"
3
3
  end
metadata CHANGED
@@ -1,33 +1,23 @@
1
- --- !ruby/object:Gem::Specification
1
+ --- !ruby/object:Gem::Specification
2
2
  name: rubyfish
3
- version: !ruby/object:Gem::Version
4
- prerelease: false
5
- segments:
6
- - 0
7
- - 0
8
- - 4
9
- version: 0.0.4
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.5
5
+ prerelease:
10
6
  platform: ruby
11
- authors:
7
+ authors:
12
8
  - Yury Korolev
13
9
  autorequire:
14
10
  bindir: bin
15
11
  cert_chain: []
16
-
17
- date: 2010-09-20 00:00:00 +04:00
18
- default_executable:
12
+ date: 2012-08-16 00:00:00.000000000 Z
19
13
  dependencies: []
20
-
21
14
  description: Port of http://github.com/sunlightlabs/jellyfish
22
- email:
15
+ email:
23
16
  - yury.korolev@gmail.com
24
17
  executables: []
25
-
26
18
  extensions: []
27
-
28
19
  extra_rdoc_files: []
29
-
30
- files:
20
+ files:
31
21
  - lib/rubyfish/damerau_levenshtein.rb
32
22
  - lib/rubyfish/double_metaphone.rb
33
23
  - lib/rubyfish/hamming.rb
@@ -43,40 +33,31 @@ files:
43
33
  - CHANGELOG.md
44
34
  - README.md
45
35
  - ROADMAP.md
46
- has_rdoc: true
47
36
  homepage: http://github.com/anjlab/rubyfish
48
37
  licenses: []
49
-
50
38
  post_install_message:
51
39
  rdoc_options: []
52
-
53
- require_paths:
40
+ require_paths:
54
41
  - lib
55
- required_ruby_version: !ruby/object:Gem::Requirement
42
+ required_ruby_version: !ruby/object:Gem::Requirement
56
43
  none: false
57
- requirements:
58
- - - ">="
59
- - !ruby/object:Gem::Version
60
- hash: 3404657759252333384
61
- segments:
44
+ requirements:
45
+ - - ! '>='
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ segments:
62
49
  - 0
63
- version: "0"
64
- required_rubygems_version: !ruby/object:Gem::Requirement
50
+ hash: -749425856707131905
51
+ required_rubygems_version: !ruby/object:Gem::Requirement
65
52
  none: false
66
- requirements:
67
- - - ">="
68
- - !ruby/object:Gem::Version
69
- segments:
70
- - 1
71
- - 3
72
- - 6
53
+ requirements:
54
+ - - ! '>='
55
+ - !ruby/object:Gem::Version
73
56
  version: 1.3.6
74
57
  requirements: []
75
-
76
58
  rubyforge_project: rubyfish
77
- rubygems_version: 1.3.7
59
+ rubygems_version: 1.8.24
78
60
  signing_key:
79
61
  specification_version: 3
80
62
  summary: Library for doing approximate and phonetic matching of string
81
63
  test_files: []
82
-