rubyfish 0.0.4 → 0.0.5

Sign up to get free protection for your applications and to get access to all the features.
data/README.md CHANGED
@@ -1,21 +1,20 @@
1
-
2
1
  RubyFish
3
2
  =========
4
3
 
5
- RubyFish is a ruby port of python library jellyfish (http://github.com/sunlightlabs/jellyfish) for doing approximate and phonetic matching of strings.
4
+ RubyFish is a ruby port of python library <a href = "http://github.com/sunlightlabs/jellyfish">jellyfish</a> for doing approximate and phonetic matching of strings.
5
+
6
+ -------------
6
7
 
7
8
  RubyFish is a project of AnjLab (c) 2010.
8
9
  All code is released under a BSD-style license, see LICENSE for details.
9
10
 
10
- Originally written by
11
- Written by Michael Stephens <mstephens@sunlightfoundation.com> and James Turk
12
- <jturk@sunlightfoundation.com>.
11
+ Originally written by <a href="mailto:mstephens@sunlightfoundation.com">Michael Stephens</a> and <a href="mailto:jturk@sunlightfoundation.com">James Turk</a>.
13
12
 
14
- Ported by Yury Korolev <yury.korolev@gmail.com>
15
- Source is available at http://github.com/anjlab/rubyfish
13
+ Ported by <a href="mailto:yury.korolev@gmail.com">Yury Korolev</a>
14
+ Source is available on <a href="http://github.com/anjlab/rubyfish">GitHub</a>
16
15
 
17
16
  Included Algorithms
18
- ===================
17
+ -------------------
19
18
 
20
19
  String comparison:
21
20
 
@@ -32,7 +31,7 @@ Phonetic encoding:
32
31
  * Double Metaphone
33
32
 
34
33
  Example Usage
35
- =============
34
+ -------------
36
35
 
37
36
  ruby-1.9.2-p0 > require 'rubyfish'
38
37
  ruby-1.9.2-p0 > RubyFish::Levenshtein.distance("jellyfish", "rubyfish")
@@ -1,41 +1,59 @@
1
- require 'matrix'
2
-
3
1
  module RubyFish::DamerauLevenshtein
4
2
 
5
- def distance a, b
3
+ # http://en.wikipedia.org/wiki/Levenshtein_distance
4
+ # http://en.wikipedia.org/wiki/Damerau%E2%80%93Levenshtein_distance
5
+
6
+ def _distance a, b, opts = {}
7
+ allow_swaps = opts[:allow_swaps]
8
+ ignore_case = opts[:ignore_case]
9
+
6
10
  as = a.to_s
7
11
  bs = b.to_s
12
+
13
+ if ignore_case
14
+ as.downcase!
15
+ bs.downcase!
16
+ end
8
17
 
9
18
  rows = as.size + 1
10
19
  cols = bs.size + 1
11
20
 
12
- dist = ::RubyFish::MMatrix.new(rows, cols)
13
-
14
- (0...rows).each {|i| dist[i, 0] = i}
15
- (0...cols).each {|j| dist[0, j] = j}
16
-
21
+ dist = [
22
+ Array.new(cols) {|k| k},
23
+ Array.new(cols) {0},
24
+ Array.new(cols) {0}
25
+ ]
26
+
17
27
  (1...rows).each do |i|
28
+ k = i % 3
29
+ dist[k][0] = i
30
+
18
31
  (1...cols).each do |j|
19
32
  cost = as[i - 1] == bs[j - 1] ? 0 : 1
20
33
 
21
34
  #minimum of deletion, insertion, substitution
22
- d1 = dist[i - 1, j] + 1
23
- d2 = dist[i, j - 1] + 1
24
- d3 = dist[i - 1, j - 1] + cost
35
+ d1 = dist[k - 1][j] + 1
36
+ d2 = dist[k][j - 1] + 1
37
+ d3 = dist[k - 1][j - 1] + cost
25
38
 
26
39
  d_now = [d1, d2, d3].min
27
40
 
28
- if i > 2 && j > 2 && as[i - 1] == bs[j - 2] && as[i - 2] == bs[j - 1]
29
- d1 = dist[i - 2, j - 2] + cost
41
+ if allow_swaps && i > 2 && j > 2 && as[i - 1] == bs[j - 2] && as[i - 2] == bs[j - 1]
42
+ d1 = dist[k - 2][j - 2] + cost
30
43
  d_now = [d_now, d1].min;
31
44
  end
32
45
 
33
- dist[i, j] = d_now;
46
+ dist[k][j] = d_now;
34
47
  end
35
48
  end
36
49
 
37
- dist[as.size, bs.size]
50
+ dist[(rows - 1) % 3][-1]
51
+ end
52
+
53
+ def distance a, b, opts = {}
54
+ _distance(a, b, :allow_swaps => true, :ignore_case => opts[:ignore_case])
38
55
  end
39
56
 
40
57
  module_function :distance
58
+ module_function :_distance
41
59
  end
@@ -1,7 +1,13 @@
1
1
  module RubyFish::Hamming
2
- def distance a, b
2
+ def distance a, b, opts={}
3
+ ignore_case = opts[:ignore_case]
3
4
  distance = 0
4
5
  as, bs = a.to_s, b.to_s
6
+
7
+ if ignore_case
8
+ as.downcase!
9
+ bs.downcase!
10
+ end
5
11
 
6
12
  short, long = [as, bs].sort
7
13
 
data/lib/rubyfish/jaro.rb CHANGED
@@ -2,8 +2,8 @@ module RubyFish::Jaro
2
2
  include ::RubyFish::JaroWinkler
3
3
  extend ::RubyFish::JaroWinkler
4
4
 
5
- def distance a, b
6
- _distance(a, b, :winklerize => false)
5
+ def distance a, b, opts={}
6
+ _distance(a, b, :winklerize => false, :ignore_case => opts[:ignore_case])
7
7
  end
8
8
 
9
9
  module_function :distance
@@ -3,9 +3,15 @@ module RubyFish::JaroWinkler
3
3
  def _distance a, b, opts = {}
4
4
  long_tolerance = opts[:long_tolerance]
5
5
  winklerize = opts[:winklerize]
6
+ ignore_case = opts[:ignore_case]
6
7
 
7
8
  as = a.to_s
8
9
  bs = b.to_s
10
+
11
+ if ignore_case
12
+ as.downcase!
13
+ bs.downcase!
14
+ end
9
15
 
10
16
  as_length = as.size
11
17
  bs_length = bs.size
@@ -98,7 +104,7 @@ module RubyFish::JaroWinkler
98
104
  end
99
105
 
100
106
  def distance a, b, opts = {}
101
- _distance(a, b, :winklerize => true)
107
+ _distance(a, b, :winklerize => true, :ignore_case => opts[:ignore_case])
102
108
  end
103
109
 
104
110
  module_function :distance
@@ -1,36 +1,13 @@
1
1
  module RubyFish::Levenshtein
2
2
 
3
- # http://en.wikipedia.org/wiki/Levenshtein_distance
4
- def distance a, b
5
- as, bs = a.to_s, b.to_s
3
+ include ::RubyFish::DamerauLevenshtein
4
+ extend ::RubyFish::DamerauLevenshtein
6
5
 
7
- as_length = as.size
8
- bs_length = bs.size
9
-
10
- rows = as_length + 1
11
- cols = bs_length + 1
12
-
13
- dist = ::RubyFish::MMatrix.new rows, cols
14
- (0...rows).each {|i| dist[i, 0] = i}
15
- (0...cols).each {|j| dist[0, j] = j}
16
-
17
- (1...cols).each do |j|
18
- (1...rows).each do |i|
19
- if as[i - 1] == bs[j - 1]
20
- dist[i, j] = dist[i - 1, j - 1]
21
- else
22
- d1 = dist[i - 1, j] + 1
23
- d2 = dist[i, j - 1] + 1
24
- d3 = dist[i - 1, j - 1] + 1
25
-
26
- dist[i, j] = [d1, d2, d3].min;
27
- end
28
- end
29
- end
30
-
31
- dist[as_length, bs_length];
6
+ def distance a, b, opts={}
7
+ _distance(a, b, :allowswaps => false, :ignore_case => opts[:ignore_case])
32
8
  end
33
9
 
34
10
  module_function :distance
35
11
 
12
+
36
13
  end
@@ -1,10 +1,17 @@
1
1
  module RubyFish::LongestSubsequence
2
2
 
3
3
  # http://en.wikibooks.org/wiki/Algorithm_Implementation/Strings/Longest_common_subsequence#Ruby
4
- def distance a, b
4
+ def distance a, b, opts={}
5
+ ignore_case = opts[:ignore_case]
6
+
5
7
  as = a.to_s
6
8
  bs = b.to_s
7
9
 
10
+ if ignore_case
11
+ as.downcase!
12
+ bs.downcase!
13
+ end
14
+
8
15
  rows = as.size
9
16
  cols = bs.size
10
17
 
@@ -1,9 +1,16 @@
1
1
  module RubyFish::LongestSubstring
2
2
 
3
3
  # http://en.wikibooks.org/wiki/Algorithm_implementation/Strings/Longest_common_substring#Ruby
4
- def distance a, b
4
+ def distance a, b, opts={}
5
+ ignore_case = opts[:ignore_case]
6
+
5
7
  as = a.to_s
6
8
  bs = b.to_s
9
+
10
+ if ignore_case
11
+ as.downcase!
12
+ bs.downcase!
13
+ end
7
14
 
8
15
  rows = as.size
9
16
  cols = bs.size
@@ -29,9 +36,16 @@ module RubyFish::LongestSubstring
29
36
  ans
30
37
  end
31
38
 
32
- def longest_substring a, b
39
+ def longest_substring a, b, opts={}
40
+ ignore_case = opts[:ignore_case]
41
+
33
42
  as = a.to_s
34
43
  bs = b.to_s
44
+
45
+ if ignore_case
46
+ as.downcase!
47
+ bs.downcase!
48
+ end
35
49
 
36
50
  rows = as.size
37
51
  cols = bs.size
@@ -70,8 +84,8 @@ module RubyFish::LongestSubstring
70
84
  res
71
85
  end
72
86
 
73
- def longest_substring_index(a, b)
74
- a.index(longest_substring(a, b))
87
+ def longest_substring_index(a, b, opts={})
88
+ a.index(longest_substring(a, b, :ignore_case => opts[:ignore_case]))
75
89
  end
76
90
 
77
91
  module_function :distance
@@ -1,3 +1,3 @@
1
1
  module RubyFish
2
- VERSION = "0.0.4"
2
+ VERSION = "0.0.5"
3
3
  end
metadata CHANGED
@@ -1,33 +1,23 @@
1
- --- !ruby/object:Gem::Specification
1
+ --- !ruby/object:Gem::Specification
2
2
  name: rubyfish
3
- version: !ruby/object:Gem::Version
4
- prerelease: false
5
- segments:
6
- - 0
7
- - 0
8
- - 4
9
- version: 0.0.4
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.5
5
+ prerelease:
10
6
  platform: ruby
11
- authors:
7
+ authors:
12
8
  - Yury Korolev
13
9
  autorequire:
14
10
  bindir: bin
15
11
  cert_chain: []
16
-
17
- date: 2010-09-20 00:00:00 +04:00
18
- default_executable:
12
+ date: 2012-08-16 00:00:00.000000000 Z
19
13
  dependencies: []
20
-
21
14
  description: Port of http://github.com/sunlightlabs/jellyfish
22
- email:
15
+ email:
23
16
  - yury.korolev@gmail.com
24
17
  executables: []
25
-
26
18
  extensions: []
27
-
28
19
  extra_rdoc_files: []
29
-
30
- files:
20
+ files:
31
21
  - lib/rubyfish/damerau_levenshtein.rb
32
22
  - lib/rubyfish/double_metaphone.rb
33
23
  - lib/rubyfish/hamming.rb
@@ -43,40 +33,31 @@ files:
43
33
  - CHANGELOG.md
44
34
  - README.md
45
35
  - ROADMAP.md
46
- has_rdoc: true
47
36
  homepage: http://github.com/anjlab/rubyfish
48
37
  licenses: []
49
-
50
38
  post_install_message:
51
39
  rdoc_options: []
52
-
53
- require_paths:
40
+ require_paths:
54
41
  - lib
55
- required_ruby_version: !ruby/object:Gem::Requirement
42
+ required_ruby_version: !ruby/object:Gem::Requirement
56
43
  none: false
57
- requirements:
58
- - - ">="
59
- - !ruby/object:Gem::Version
60
- hash: 3404657759252333384
61
- segments:
44
+ requirements:
45
+ - - ! '>='
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ segments:
62
49
  - 0
63
- version: "0"
64
- required_rubygems_version: !ruby/object:Gem::Requirement
50
+ hash: -749425856707131905
51
+ required_rubygems_version: !ruby/object:Gem::Requirement
65
52
  none: false
66
- requirements:
67
- - - ">="
68
- - !ruby/object:Gem::Version
69
- segments:
70
- - 1
71
- - 3
72
- - 6
53
+ requirements:
54
+ - - ! '>='
55
+ - !ruby/object:Gem::Version
73
56
  version: 1.3.6
74
57
  requirements: []
75
-
76
58
  rubyforge_project: rubyfish
77
- rubygems_version: 1.3.7
59
+ rubygems_version: 1.8.24
78
60
  signing_key:
79
61
  specification_version: 3
80
62
  summary: Library for doing approximate and phonetic matching of string
81
63
  test_files: []
82
-