string_metric 0.1.1 → 0.1.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 229ed953c5aadc5c929cb2b47ad7898cdc19d742
4
- data.tar.gz: b41a66496dd7c0cb2627af22b9764d2bf8d06c37
3
+ metadata.gz: 08ed461c4edc7dfd5e32fb34e93b13b4442c5167
4
+ data.tar.gz: 69d6cd7273238442f184d6a63fec949ffcc6f6db
5
5
  SHA512:
6
- metadata.gz: c0e932fbe3e03ec7b8268a2cc85f276b7b0041cdaf8fea5fb98d7e8a966ded52280fbe51b01a89da4e26430020a4995cc3b043110f2a95731e70c07ad3a30748
7
- data.tar.gz: 19f270701a5e4383b84ab089ea534e7930fb980ff7dba494c51201436c69408cf54b0cca7d64098aefdd24214cc6e250168a1dc975abbf49e3672ba262b7eead
6
+ metadata.gz: f5d7e8664514c5f31075755c3b09b2a7c4a0cd963601aefd26f1b928426dea2051a12b8abd039dddb0d8986fe25c6ce860d8c4a867757a4bf5b5a1883c6ace1e
7
+ data.tar.gz: d9b631aad62da262d7a9afec9c7c1295e8b238cbfe4ef432354e0a2c42d9bf3b777758cf00abe2b14b702069eeb335df505ba9db37faa165335d748f4d748f2e
@@ -4,10 +4,6 @@ module StringMetric
4
4
  module Levenshtein
5
5
  class Experiment
6
6
  def self.distance(from, to, options = {})
7
- return 0 if from == to
8
- return to.size if from.size.zero?
9
- return from.size if to.size.zero?
10
-
11
7
  m = from.length
12
8
  n = to.length
13
9
 
@@ -25,4 +21,4 @@ module StringMetric
25
21
  end
26
22
  end
27
23
  end
28
- end
24
+ end
@@ -4,24 +4,32 @@ module StringMetric
4
4
  module Levenshtein
5
5
  class IterativeWithFullMatrix
6
6
  def self.distance(from, to, options = {})
7
- return 0 if from == to
8
- return to.size if from.size.zero?
9
- return from.size if to.size.zero?
10
-
11
7
  max_distance = options[:max_distance]
12
8
  insertion_cost = options.fetch(:insertion_cost, 1)
13
9
  deletion_cost = options.fetch(:deletion_cost, 1)
14
10
  substitution_cost = options.fetch(:substitution_cost, 1)
15
11
 
16
- d = (0..to.size).map do |i|
17
- [0] * (from.size + 1)
12
+ m = from.length
13
+ n = to.length
14
+
15
+ if max_distance && (m - n).abs >= max_distance
16
+ return max_distance
17
+ end
18
+
19
+ return 0 if from == to
20
+ return n if m.zero?
21
+ return m if n.zero?
22
+
23
+ d = (0..n).map do |i|
24
+ [0] * (m + 1)
18
25
  end
19
26
 
20
- (1..from.size).each { |j| d[0][j] = j }
21
- (1..to.size).each { |i| d[i][0] = i }
27
+ (1..m).each { |j| d[0][j] = j }
28
+ (1..n).each { |i| d[i][0] = i }
22
29
 
23
- (1..from.size).each do |j|
24
- (1..to.size).each do |i|
30
+ to_column = 0
31
+ (1..m).each do |j|
32
+ (1..n).each do |i|
25
33
  if from[j-1] == to[i-1]
26
34
  d[i][j] = d[i -1][j-1]
27
35
  else
@@ -30,12 +38,13 @@ module StringMetric
30
38
  d[i-1][j-1] + substitution_cost # substitution
31
39
  ].min
32
40
  end
41
+ to_column = i
33
42
  end
34
43
 
35
- break if max_distance and d[j][j] > max_distance
44
+ break if max_distance and d[to_column].min > max_distance
36
45
  end
37
46
 
38
- x = d[to.size][from.size]
47
+ x = d[n][m]
39
48
  if max_distance && x > max_distance
40
49
  max_distance
41
50
  else
@@ -4,10 +4,6 @@ module StringMetric
4
4
  module Levenshtein
5
5
  class IterativeWithTwoMatrixRows
6
6
  def self.distance(from, to, options = {})
7
- return 0 if from == to
8
- return to.size if from.size.zero?
9
- return from.size if to.size.zero?
10
-
11
7
  max_distance = options[:max_distance]
12
8
  insertion_cost = options.fetch(:insertion_cost, 1)
13
9
  deletion_cost = options.fetch(:deletion_cost, 1)
@@ -16,13 +12,19 @@ module StringMetric
16
12
  m = from.length
17
13
  n = to.length
18
14
 
15
+ if max_distance && (n - m).abs >= max_distance
16
+ return max_distance
17
+ end
18
+
19
+ return 0 if from == to
20
+ return n if m.zero?
21
+ return m if n.zero?
22
+
19
23
  v0 = (0..m).to_a
20
- v1 = []
21
24
  x = 0
22
25
 
23
26
  n.times do |i|
24
- x = v1[0] = i + 1
25
-
27
+ current = x = i + 1
26
28
  sub_cell = v0[0]
27
29
 
28
30
  m.times do |j|
@@ -30,20 +32,18 @@ module StringMetric
30
32
 
31
33
  ins_cell = v0[j+1]
32
34
 
33
- x = [x + deletion_cost, # deletion
35
+ x = [current + deletion_cost, # deletion
34
36
  ins_cell + insertion_cost, # insertion
35
37
  sub_cell + cost # substitution
36
38
  ].min
37
39
 
38
-
39
- v1[j + 1] = x
40
-
40
+ v0[j] = current
41
+ current = x
41
42
  sub_cell = ins_cell
42
43
  end
43
44
 
44
- break if max_distance && v0[i] > max_distance
45
-
46
- v0 = v1.dup
45
+ v0[m] = x
46
+ break if max_distance && v0.min > max_distance
47
47
  end
48
48
 
49
49
  if max_distance && x > max_distance
@@ -54,4 +54,4 @@ module StringMetric
54
54
  end
55
55
  end
56
56
  end
57
- end
57
+ end
@@ -4,10 +4,6 @@ module StringMetric
4
4
  module Levenshtein
5
5
  class IterativeWithTwoMatrixRowsOptimized
6
6
  def self.distance(from, to, options = {})
7
- return 0 if from == to
8
- return to.size if from.size.zero?
9
- return from.size if to.size.zero?
10
-
11
7
  max_distance = options[:max_distance]
12
8
  insertion_cost = options[:insertion_cost] || 1
13
9
  deletion_cost = options[:deletion_cost] || 1
@@ -16,36 +12,40 @@ module StringMetric
16
12
  m = from.length
17
13
  n = to.length
18
14
 
15
+ if max_distance && (n - m).abs >= max_distance
16
+ return max_distance
17
+ end
18
+
19
+ return 0 if from == to
20
+ return n if m.zero?
21
+ return m if n.zero?
22
+
19
23
  from = from.codepoints.to_a
20
24
  to = to.codepoints.to_a
21
25
 
22
26
  v0 = (0..m).to_a
23
- v1 = []
24
27
  x = 0
25
28
 
26
29
  n.times do |i|
27
- x = v1[0] = i + 1
28
-
30
+ current = x = i + 1
29
31
  sub_cell = v0[0]
30
32
 
31
33
  m.times do |j|
32
34
  cost = (from[j] == to[i]) ? 0 : substitution_cost
33
-
34
35
  ins_cell = v0[j + 1]
35
36
 
36
- x = [x + deletion_cost, # deletion
37
+ x = [current + deletion_cost, # deletion
37
38
  ins_cell + insertion_cost, # insertion
38
39
  sub_cell + cost # substitution
39
40
  ].sort![0]
40
41
 
41
- v1[j + 1] = x
42
-
42
+ v0[j] = current
43
+ current = x
43
44
  sub_cell = ins_cell
44
45
  end
45
46
 
46
- break if max_distance && v0[i] > max_distance
47
-
48
- v0 = v1.dup
47
+ v0[m] = x
48
+ break if max_distance && v0.sort[0] > max_distance
49
49
  end
50
50
 
51
51
  if max_distance && x > max_distance
@@ -4,15 +4,19 @@ module StringMetric
4
4
  module Levenshtein
5
5
  class Recursive
6
6
  def self.distance(from, to, options = {})
7
- return 0 if from == to
8
- return to.size if from.size.zero?
9
- return from.size if to.size.zero?
10
-
11
7
  max_distance = options[:max_distance]
12
8
  insertion_cost = options.fetch(:insertion_cost, 1)
13
9
  deletion_cost = options.fetch(:deletion_cost, 1)
14
10
  substitution_cost = options.fetch(:substitution_cost, 1)
15
11
 
12
+ if max_distance && (from.size - to.size).abs >= max_distance
13
+ return max_distance
14
+ end
15
+
16
+ return 0 if from == to
17
+ return to.size if from.size.zero?
18
+ return from.size if to.size.zero?
19
+
16
20
  if from.chars.to_a.last == to.chars.to_a.last
17
21
  cost = 0
18
22
  else
@@ -35,4 +39,4 @@ module StringMetric
35
39
  end
36
40
  end
37
41
  end
38
- end
42
+ end
@@ -1,3 +1,3 @@
1
1
  module StringMetric
2
- VERSION = "0.1.1"
2
+ VERSION = "0.1.2"
3
3
  end
@@ -6,6 +6,9 @@ hello, hellol, 1
6
6
  hello, heloll, 2
7
7
  hello, cheese, 4
8
8
  hello, saint, 5
9
+ hellol, hello, 1
9
10
  hello,, 5
11
+ , hello, 5
12
+ , , 0
10
13
  sitting, kitten, 3
11
- αλφα, βητα, 3
14
+ αλφα, βητα, 3
@@ -24,11 +24,48 @@ shared_examples "Levenshtein Distance" do |options|
24
24
 
25
25
  context "when max_distance is passed as option" do
26
26
  context "and normal distance is greater than max_distance" do
27
- let(:max_distance) { 2 }
28
-
29
27
  it "is trimmed to max_distance" do
30
- expect(described_class.distance("kitten", "sitting",
31
- max_distance: max_distance)).to eq max_distance
28
+ expect(described_class.distance("gumbo", "gambol", max_distance: 1)).to eq 1
29
+ expect(described_class.distance("test", "tasf", max_distance: 1)).to eq 1
30
+ expect(described_class.distance("kitten", "sitting", max_distance: 2)).to eq 2
31
+ expect(described_class.distance("kitten", "kittenss", max_distance: 1)).to eq 1
32
+ expect(described_class.distance("kittenss", "kitten", max_distance: 1)).to eq 1
33
+ expect(described_class.distance("sitting", "kitten", max_distance: 2)).to eq 2
34
+ expect(described_class.distance("gambol", "gumbo", max_distance: 1)).to eq 1
35
+ expect(described_class.distance("kitten", "", max_distance: 2)).to eq 2
36
+ expect(described_class.distance("", "kitten", max_distance: 3)).to eq 3
37
+ end
38
+ end
39
+ context "and normal distance is less than max_distance" do
40
+ it "is calculated distance" do
41
+ expect(described_class.distance("", "t", max_distance: 2)).to eq 1
42
+ expect(described_class.distance("t", "", max_distance: 3)).to eq 1
43
+ expect(described_class.distance("test", "test", max_distance: 1)).to eq 0
44
+ expect(described_class.distance("test", "tent", max_distance: 2)).to eq 1
45
+ expect(described_class.distance("gumbo", "gambol", max_distance: 3)).to eq 2
46
+ expect(described_class.distance("kitten", "sitting", max_distance: 4)).to eq 3
47
+ expect(described_class.distance("kitten", "kittenss", max_distance: 4)).to eq 2
48
+ expect(described_class.distance("kittenss", "kitten", max_distance: 4)).to eq 2
49
+ expect(described_class.distance("sitting", "kitten", max_distance: 4)).to eq 3
50
+ expect(described_class.distance("gambol", "gumbo", max_distance: 3)).to eq 2
51
+ expect(described_class.distance("", "cat", max_distance: 4)).to eq 3
52
+ expect(described_class.distance("cat", "", max_distance: 5)).to eq 3
53
+ expect(described_class.distance("", "", max_distance: 2)).to eq 0
54
+ end
55
+ end
56
+ context "and normal distance is same as max_distance" do
57
+ it "is calculated distance" do
58
+ expect(described_class.distance("test", "test", max_distance: 0)).to eq 0
59
+ expect(described_class.distance("test", "tent", max_distance: 1)).to eq 1
60
+ expect(described_class.distance("gumbo", "gambol", max_distance: 2)).to eq 2
61
+ expect(described_class.distance("kitten", "sitting", max_distance: 3)).to eq 3
62
+ expect(described_class.distance("kitten", "kittenss", max_distance: 2)).to eq 2
63
+ expect(described_class.distance("kittenss", "kitten", max_distance: 2)).to eq 2
64
+ expect(described_class.distance("sitting", "kitten", max_distance: 3)).to eq 3
65
+ expect(described_class.distance("gambol", "gumbo", max_distance: 2)).to eq 2
66
+ expect(described_class.distance("", "cat", max_distance: 3)).to eq 3
67
+ expect(described_class.distance("cat", "", max_distance: 3)).to eq 3
68
+ expect(described_class.distance("", "", max_distance: 0)).to eq 0
32
69
  end
33
70
  end
34
71
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: string_metric
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.1
4
+ version: 0.1.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Giorgos Tsiftsis
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-02-03 00:00:00.000000000 Z
11
+ date: 2015-04-13 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -133,7 +133,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
133
133
  version: '0'
134
134
  requirements: []
135
135
  rubyforge_project:
136
- rubygems_version: 2.2.0
136
+ rubygems_version: 2.4.6
137
137
  signing_key:
138
138
  specification_version: 4
139
139
  summary: A simple library with String Metric algorithms
@@ -147,3 +147,4 @@ test_files:
147
147
  - spec/lib/levenshtein_spec.rb
148
148
  - spec/spec_helper.rb
149
149
  - spec/support/levenshtein.rb
150
+ has_rdoc: