string_metric 0.1.1 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 229ed953c5aadc5c929cb2b47ad7898cdc19d742
4
- data.tar.gz: b41a66496dd7c0cb2627af22b9764d2bf8d06c37
3
+ metadata.gz: 08ed461c4edc7dfd5e32fb34e93b13b4442c5167
4
+ data.tar.gz: 69d6cd7273238442f184d6a63fec949ffcc6f6db
5
5
  SHA512:
6
- metadata.gz: c0e932fbe3e03ec7b8268a2cc85f276b7b0041cdaf8fea5fb98d7e8a966ded52280fbe51b01a89da4e26430020a4995cc3b043110f2a95731e70c07ad3a30748
7
- data.tar.gz: 19f270701a5e4383b84ab089ea534e7930fb980ff7dba494c51201436c69408cf54b0cca7d64098aefdd24214cc6e250168a1dc975abbf49e3672ba262b7eead
6
+ metadata.gz: f5d7e8664514c5f31075755c3b09b2a7c4a0cd963601aefd26f1b928426dea2051a12b8abd039dddb0d8986fe25c6ce860d8c4a867757a4bf5b5a1883c6ace1e
7
+ data.tar.gz: d9b631aad62da262d7a9afec9c7c1295e8b238cbfe4ef432354e0a2c42d9bf3b777758cf00abe2b14b702069eeb335df505ba9db37faa165335d748f4d748f2e
@@ -4,10 +4,6 @@ module StringMetric
4
4
  module Levenshtein
5
5
  class Experiment
6
6
  def self.distance(from, to, options = {})
7
- return 0 if from == to
8
- return to.size if from.size.zero?
9
- return from.size if to.size.zero?
10
-
11
7
  m = from.length
12
8
  n = to.length
13
9
 
@@ -25,4 +21,4 @@ module StringMetric
25
21
  end
26
22
  end
27
23
  end
28
- end
24
+ end
@@ -4,24 +4,32 @@ module StringMetric
4
4
  module Levenshtein
5
5
  class IterativeWithFullMatrix
6
6
  def self.distance(from, to, options = {})
7
- return 0 if from == to
8
- return to.size if from.size.zero?
9
- return from.size if to.size.zero?
10
-
11
7
  max_distance = options[:max_distance]
12
8
  insertion_cost = options.fetch(:insertion_cost, 1)
13
9
  deletion_cost = options.fetch(:deletion_cost, 1)
14
10
  substitution_cost = options.fetch(:substitution_cost, 1)
15
11
 
16
- d = (0..to.size).map do |i|
17
- [0] * (from.size + 1)
12
+ m = from.length
13
+ n = to.length
14
+
15
+ if max_distance && (m - n).abs >= max_distance
16
+ return max_distance
17
+ end
18
+
19
+ return 0 if from == to
20
+ return n if m.zero?
21
+ return m if n.zero?
22
+
23
+ d = (0..n).map do |i|
24
+ [0] * (m + 1)
18
25
  end
19
26
 
20
- (1..from.size).each { |j| d[0][j] = j }
21
- (1..to.size).each { |i| d[i][0] = i }
27
+ (1..m).each { |j| d[0][j] = j }
28
+ (1..n).each { |i| d[i][0] = i }
22
29
 
23
- (1..from.size).each do |j|
24
- (1..to.size).each do |i|
30
+ to_column = 0
31
+ (1..m).each do |j|
32
+ (1..n).each do |i|
25
33
  if from[j-1] == to[i-1]
26
34
  d[i][j] = d[i -1][j-1]
27
35
  else
@@ -30,12 +38,13 @@ module StringMetric
30
38
  d[i-1][j-1] + substitution_cost # substitution
31
39
  ].min
32
40
  end
41
+ to_column = i
33
42
  end
34
43
 
35
- break if max_distance and d[j][j] > max_distance
44
+ break if max_distance and d[to_column].min > max_distance
36
45
  end
37
46
 
38
- x = d[to.size][from.size]
47
+ x = d[n][m]
39
48
  if max_distance && x > max_distance
40
49
  max_distance
41
50
  else
@@ -4,10 +4,6 @@ module StringMetric
4
4
  module Levenshtein
5
5
  class IterativeWithTwoMatrixRows
6
6
  def self.distance(from, to, options = {})
7
- return 0 if from == to
8
- return to.size if from.size.zero?
9
- return from.size if to.size.zero?
10
-
11
7
  max_distance = options[:max_distance]
12
8
  insertion_cost = options.fetch(:insertion_cost, 1)
13
9
  deletion_cost = options.fetch(:deletion_cost, 1)
@@ -16,13 +12,19 @@ module StringMetric
16
12
  m = from.length
17
13
  n = to.length
18
14
 
15
+ if max_distance && (n - m).abs >= max_distance
16
+ return max_distance
17
+ end
18
+
19
+ return 0 if from == to
20
+ return n if m.zero?
21
+ return m if n.zero?
22
+
19
23
  v0 = (0..m).to_a
20
- v1 = []
21
24
  x = 0
22
25
 
23
26
  n.times do |i|
24
- x = v1[0] = i + 1
25
-
27
+ current = x = i + 1
26
28
  sub_cell = v0[0]
27
29
 
28
30
  m.times do |j|
@@ -30,20 +32,18 @@ module StringMetric
30
32
 
31
33
  ins_cell = v0[j+1]
32
34
 
33
- x = [x + deletion_cost, # deletion
35
+ x = [current + deletion_cost, # deletion
34
36
  ins_cell + insertion_cost, # insertion
35
37
  sub_cell + cost # substitution
36
38
  ].min
37
39
 
38
-
39
- v1[j + 1] = x
40
-
40
+ v0[j] = current
41
+ current = x
41
42
  sub_cell = ins_cell
42
43
  end
43
44
 
44
- break if max_distance && v0[i] > max_distance
45
-
46
- v0 = v1.dup
45
+ v0[m] = x
46
+ break if max_distance && v0.min > max_distance
47
47
  end
48
48
 
49
49
  if max_distance && x > max_distance
@@ -54,4 +54,4 @@ module StringMetric
54
54
  end
55
55
  end
56
56
  end
57
- end
57
+ end
@@ -4,10 +4,6 @@ module StringMetric
4
4
  module Levenshtein
5
5
  class IterativeWithTwoMatrixRowsOptimized
6
6
  def self.distance(from, to, options = {})
7
- return 0 if from == to
8
- return to.size if from.size.zero?
9
- return from.size if to.size.zero?
10
-
11
7
  max_distance = options[:max_distance]
12
8
  insertion_cost = options[:insertion_cost] || 1
13
9
  deletion_cost = options[:deletion_cost] || 1
@@ -16,36 +12,40 @@ module StringMetric
16
12
  m = from.length
17
13
  n = to.length
18
14
 
15
+ if max_distance && (n - m).abs >= max_distance
16
+ return max_distance
17
+ end
18
+
19
+ return 0 if from == to
20
+ return n if m.zero?
21
+ return m if n.zero?
22
+
19
23
  from = from.codepoints.to_a
20
24
  to = to.codepoints.to_a
21
25
 
22
26
  v0 = (0..m).to_a
23
- v1 = []
24
27
  x = 0
25
28
 
26
29
  n.times do |i|
27
- x = v1[0] = i + 1
28
-
30
+ current = x = i + 1
29
31
  sub_cell = v0[0]
30
32
 
31
33
  m.times do |j|
32
34
  cost = (from[j] == to[i]) ? 0 : substitution_cost
33
-
34
35
  ins_cell = v0[j + 1]
35
36
 
36
- x = [x + deletion_cost, # deletion
37
+ x = [current + deletion_cost, # deletion
37
38
  ins_cell + insertion_cost, # insertion
38
39
  sub_cell + cost # substitution
39
40
  ].sort![0]
40
41
 
41
- v1[j + 1] = x
42
-
42
+ v0[j] = current
43
+ current = x
43
44
  sub_cell = ins_cell
44
45
  end
45
46
 
46
- break if max_distance && v0[i] > max_distance
47
-
48
- v0 = v1.dup
47
+ v0[m] = x
48
+ break if max_distance && v0.sort[0] > max_distance
49
49
  end
50
50
 
51
51
  if max_distance && x > max_distance
@@ -4,15 +4,19 @@ module StringMetric
4
4
  module Levenshtein
5
5
  class Recursive
6
6
  def self.distance(from, to, options = {})
7
- return 0 if from == to
8
- return to.size if from.size.zero?
9
- return from.size if to.size.zero?
10
-
11
7
  max_distance = options[:max_distance]
12
8
  insertion_cost = options.fetch(:insertion_cost, 1)
13
9
  deletion_cost = options.fetch(:deletion_cost, 1)
14
10
  substitution_cost = options.fetch(:substitution_cost, 1)
15
11
 
12
+ if max_distance && (from.size - to.size).abs >= max_distance
13
+ return max_distance
14
+ end
15
+
16
+ return 0 if from == to
17
+ return to.size if from.size.zero?
18
+ return from.size if to.size.zero?
19
+
16
20
  if from.chars.to_a.last == to.chars.to_a.last
17
21
  cost = 0
18
22
  else
@@ -35,4 +39,4 @@ module StringMetric
35
39
  end
36
40
  end
37
41
  end
38
- end
42
+ end
@@ -1,3 +1,3 @@
1
1
  module StringMetric
2
- VERSION = "0.1.1"
2
+ VERSION = "0.1.2"
3
3
  end
@@ -6,6 +6,9 @@ hello, hellol, 1
6
6
  hello, heloll, 2
7
7
  hello, cheese, 4
8
8
  hello, saint, 5
9
+ hellol, hello, 1
9
10
  hello,, 5
11
+ , hello, 5
12
+ , , 0
10
13
  sitting, kitten, 3
11
- αλφα, βητα, 3
14
+ αλφα, βητα, 3
@@ -24,11 +24,48 @@ shared_examples "Levenshtein Distance" do |options|
24
24
 
25
25
  context "when max_distance is passed as option" do
26
26
  context "and normal distance is greater than max_distance" do
27
- let(:max_distance) { 2 }
28
-
29
27
  it "is trimmed to max_distance" do
30
- expect(described_class.distance("kitten", "sitting",
31
- max_distance: max_distance)).to eq max_distance
28
+ expect(described_class.distance("gumbo", "gambol", max_distance: 1)).to eq 1
29
+ expect(described_class.distance("test", "tasf", max_distance: 1)).to eq 1
30
+ expect(described_class.distance("kitten", "sitting", max_distance: 2)).to eq 2
31
+ expect(described_class.distance("kitten", "kittenss", max_distance: 1)).to eq 1
32
+ expect(described_class.distance("kittenss", "kitten", max_distance: 1)).to eq 1
33
+ expect(described_class.distance("sitting", "kitten", max_distance: 2)).to eq 2
34
+ expect(described_class.distance("gambol", "gumbo", max_distance: 1)).to eq 1
35
+ expect(described_class.distance("kitten", "", max_distance: 2)).to eq 2
36
+ expect(described_class.distance("", "kitten", max_distance: 3)).to eq 3
37
+ end
38
+ end
39
+ context "and normal distance is less than max_distance" do
40
+ it "is calculated distance" do
41
+ expect(described_class.distance("", "t", max_distance: 2)).to eq 1
42
+ expect(described_class.distance("t", "", max_distance: 3)).to eq 1
43
+ expect(described_class.distance("test", "test", max_distance: 1)).to eq 0
44
+ expect(described_class.distance("test", "tent", max_distance: 2)).to eq 1
45
+ expect(described_class.distance("gumbo", "gambol", max_distance: 3)).to eq 2
46
+ expect(described_class.distance("kitten", "sitting", max_distance: 4)).to eq 3
47
+ expect(described_class.distance("kitten", "kittenss", max_distance: 4)).to eq 2
48
+ expect(described_class.distance("kittenss", "kitten", max_distance: 4)).to eq 2
49
+ expect(described_class.distance("sitting", "kitten", max_distance: 4)).to eq 3
50
+ expect(described_class.distance("gambol", "gumbo", max_distance: 3)).to eq 2
51
+ expect(described_class.distance("", "cat", max_distance: 4)).to eq 3
52
+ expect(described_class.distance("cat", "", max_distance: 5)).to eq 3
53
+ expect(described_class.distance("", "", max_distance: 2)).to eq 0
54
+ end
55
+ end
56
+ context "and normal distance is same as max_distance" do
57
+ it "is calculated distance" do
58
+ expect(described_class.distance("test", "test", max_distance: 0)).to eq 0
59
+ expect(described_class.distance("test", "tent", max_distance: 1)).to eq 1
60
+ expect(described_class.distance("gumbo", "gambol", max_distance: 2)).to eq 2
61
+ expect(described_class.distance("kitten", "sitting", max_distance: 3)).to eq 3
62
+ expect(described_class.distance("kitten", "kittenss", max_distance: 2)).to eq 2
63
+ expect(described_class.distance("kittenss", "kitten", max_distance: 2)).to eq 2
64
+ expect(described_class.distance("sitting", "kitten", max_distance: 3)).to eq 3
65
+ expect(described_class.distance("gambol", "gumbo", max_distance: 2)).to eq 2
66
+ expect(described_class.distance("", "cat", max_distance: 3)).to eq 3
67
+ expect(described_class.distance("cat", "", max_distance: 3)).to eq 3
68
+ expect(described_class.distance("", "", max_distance: 0)).to eq 0
32
69
  end
33
70
  end
34
71
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: string_metric
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.1
4
+ version: 0.1.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Giorgos Tsiftsis
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-02-03 00:00:00.000000000 Z
11
+ date: 2015-04-13 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -133,7 +133,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
133
133
  version: '0'
134
134
  requirements: []
135
135
  rubyforge_project:
136
- rubygems_version: 2.2.0
136
+ rubygems_version: 2.4.6
137
137
  signing_key:
138
138
  specification_version: 4
139
139
  summary: A simple library with String Metric algorithms
@@ -147,3 +147,4 @@ test_files:
147
147
  - spec/lib/levenshtein_spec.rb
148
148
  - spec/spec_helper.rb
149
149
  - spec/support/levenshtein.rb
150
+ has_rdoc: