string_metric 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 229ed953c5aadc5c929cb2b47ad7898cdc19d742
|
4
|
+
data.tar.gz: b41a66496dd7c0cb2627af22b9764d2bf8d06c37
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: c0e932fbe3e03ec7b8268a2cc85f276b7b0041cdaf8fea5fb98d7e8a966ded52280fbe51b01a89da4e26430020a4995cc3b043110f2a95731e70c07ad3a30748
|
7
|
+
data.tar.gz: 19f270701a5e4383b84ab089ea534e7930fb980ff7dba494c51201436c69408cf54b0cca7d64098aefdd24214cc6e250168a1dc975abbf49e3672ba262b7eead
|
data/README.md
CHANGED
@@ -3,12 +3,14 @@
|
|
3
3
|
[](https://travis-ci.org/skroutz/string_metric)
|
4
4
|
[](https://codeclimate.com/github/skroutz/string_metric)
|
5
5
|
[](https://coveralls.io/r/skroutz/string_metric?branch=master)
|
6
|
+
[](http://badge.fury.io/rb/string_metric)
|
7
|
+
[](https://gemnasium.com/skroutz/string_metric)
|
6
8
|
|
7
9
|
A simple library with String Metric algorithms. If you want to read more about
|
8
10
|
String Metric algorithms please read [here](https://en.wikipedia.org/wiki/String_metric).
|
9
11
|
|
10
12
|
This library wants to support __MRI__ (1.9.3, 2.0.0, 2.1.0), __JRuby__ and
|
11
|
-
|
13
|
+
__Rubinius__.
|
12
14
|
|
13
15
|
## Installation
|
14
16
|
|
@@ -42,13 +44,14 @@ __Options__
|
|
42
44
|
* `:deletion_cost`: It overrides the default (equals to 1) deletion penanty.
|
43
45
|
Can be `Fixnum` or `Float`.
|
44
46
|
|
45
|
-
* `:
|
47
|
+
* `:substitution_cost`: It overrides the default (equals to 1) substitution
|
46
48
|
penalty. Can be `Fixum` or `Float`.
|
47
49
|
|
48
50
|
* `:strategy`: The desired strategy for Levenshtein distance. Supported
|
49
|
-
strategies are `:recursive`, `:two_matrix_rows`, `:
|
50
|
-
`:experiment`. The default strategy is
|
51
|
-
|
51
|
+
strategies are `:recursive`, `:two_matrix_rows`, `:two_matrix_rows_v2`,
|
52
|
+
`:full_matrix` and `:experiment`. The default strategy is
|
53
|
+
`:two_matrix_rows_v2` for MRI and `:two_matrix_rows` for other platforms
|
54
|
+
One should not depend on `:experiment` strategy.
|
52
55
|
|
53
56
|
__Examples__
|
54
57
|
|
@@ -96,15 +99,15 @@ or you can choose to benchmark a specific algorithm like:
|
|
96
99
|
|
97
100
|
__Levenshtein__
|
98
101
|
|
102
|
+
Implementation | User | Real
|
103
|
+
-------------------------------------------------|-----------|-----------
|
104
|
+
Levenshtein::IterativeWithFullMatrix | 2.260000 | 2.265873
|
105
|
+
Levenshtein::IterativeWithTwoMatrixRows | 1.970000 | 1.971205
|
106
|
+
Levenshtein::Experiment | 1.680000 | 1.684419
|
107
|
+
Levenshtein::IterativeWithTwoMatrixRowsOptimized | 1.270000 | 1.269643
|
108
|
+
Text::Levenshtein (from gem text) | 2.180000 | 2.186258
|
99
109
|
|
100
|
-
|
101
|
-
--------------------------------------------|-----------|-----------
|
102
|
-
Levenshtein::IterativeWithFullMatrix | 0.480000 | 0.475662
|
103
|
-
Levenshtein::IterativeWithTwoMatrixRows | 0.350000 | 0.352388
|
104
|
-
Levenshtein::Experiment | 0.420000 | 0.420000
|
105
|
-
Text::Levenshtein (from gem text) | 0.400000 | 0.400346
|
106
|
-
|
107
|
-
_Currently the set of fixtures is very small_
|
110
|
+
_Currently the set of fixtures is very small - ruby 2.1.0 is used_
|
108
111
|
|
109
112
|
## Other implementations
|
110
113
|
|
@@ -131,4 +134,4 @@ __Various__
|
|
131
134
|
|
132
135
|
## Licence
|
133
136
|
|
134
|
-
string_metric is licensed under MIT. See [License](LICENSE.txt)
|
137
|
+
string_metric is licensed under MIT. See [License](LICENSE.txt)
|
@@ -9,13 +9,16 @@ module StringMetric
|
|
9
9
|
return from.size if to.size.zero?
|
10
10
|
|
11
11
|
max_distance = options[:max_distance]
|
12
|
-
insertion_cost = options
|
13
|
-
deletion_cost = options
|
14
|
-
substitution_cost = options
|
12
|
+
insertion_cost = options[:insertion_cost] || 1
|
13
|
+
deletion_cost = options[:deletion_cost] || 1
|
14
|
+
substitution_cost = options[:substitution_cost] || 1
|
15
15
|
|
16
16
|
m = from.length
|
17
17
|
n = to.length
|
18
18
|
|
19
|
+
from = from.codepoints.to_a
|
20
|
+
to = to.codepoints.to_a
|
21
|
+
|
19
22
|
v0 = (0..m).to_a
|
20
23
|
v1 = []
|
21
24
|
x = 0
|
@@ -28,14 +31,12 @@ module StringMetric
|
|
28
31
|
m.times do |j|
|
29
32
|
cost = (from[j] == to[i]) ? 0 : substitution_cost
|
30
33
|
|
31
|
-
ins_cell = v0[j+1]
|
34
|
+
ins_cell = v0[j + 1]
|
32
35
|
|
33
36
|
x = [x + deletion_cost, # deletion
|
34
37
|
ins_cell + insertion_cost, # insertion
|
35
38
|
sub_cell + cost # substitution
|
36
|
-
].sort!
|
37
|
-
|
38
|
-
x = x[0]
|
39
|
+
].sort![0]
|
39
40
|
|
40
41
|
v1[j + 1] = x
|
41
42
|
|
@@ -35,7 +35,8 @@ module StringMetric
|
|
35
35
|
# new substitution cost is taken into account (be default is 1)
|
36
36
|
# @option options [Symbol] :strategy The desired strategy for Levenshtein
|
37
37
|
# distance. Supported strategies are :recursive, :two_matrix_rows,
|
38
|
-
# :full_matrix and :experiment. The default strategy
|
38
|
+
# :full_matrix, :two_matrix_rows_v2 and :experiment. The default strategy
|
39
|
+
# is :two_matrix_rows_v2 for MRI and :two_matrix_rows for other platforms.
|
39
40
|
# One should not depend on :experiment strategy.
|
40
41
|
# @return [Fixnum, Float] the Levenshtein Distance
|
41
42
|
def distance(from, to, options = {})
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: string_metric
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Giorgos Tsiftsis
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-
|
11
|
+
date: 2014-02-03 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|