string_metric 0.1.0 → 0.1.1
Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 229ed953c5aadc5c929cb2b47ad7898cdc19d742
|
4
|
+
data.tar.gz: b41a66496dd7c0cb2627af22b9764d2bf8d06c37
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: c0e932fbe3e03ec7b8268a2cc85f276b7b0041cdaf8fea5fb98d7e8a966ded52280fbe51b01a89da4e26430020a4995cc3b043110f2a95731e70c07ad3a30748
|
7
|
+
data.tar.gz: 19f270701a5e4383b84ab089ea534e7930fb980ff7dba494c51201436c69408cf54b0cca7d64098aefdd24214cc6e250168a1dc975abbf49e3672ba262b7eead
|
data/README.md
CHANGED
@@ -3,12 +3,14 @@
|
|
3
3
|
[![Build Status](https://travis-ci.org/skroutz/string_metric.png?branch=master)](https://travis-ci.org/skroutz/string_metric)
|
4
4
|
[![Code Climate](https://codeclimate.com/github/skroutz/string_metric.png)](https://codeclimate.com/github/skroutz/string_metric)
|
5
5
|
[![Coverage Status](https://coveralls.io/repos/skroutz/string_metric/badge.png?branch=master)](https://coveralls.io/r/skroutz/string_metric?branch=master)
|
6
|
+
[![Gem Version](https://badge.fury.io/rb/string_metric.png)](http://badge.fury.io/rb/string_metric)
|
7
|
+
[![Dependency Status](https://gemnasium.com/skroutz/string_metric.png)](https://gemnasium.com/skroutz/string_metric)
|
6
8
|
|
7
9
|
A simple library with String Metric algorithms. If you want to read more about
|
8
10
|
String Metric algorithms please read [here](https://en.wikipedia.org/wiki/String_metric).
|
9
11
|
|
10
12
|
This library wants to support __MRI__ (1.9.3, 2.0.0, 2.1.0), __JRuby__ and
|
11
|
-
|
13
|
+
__Rubinius__.
|
12
14
|
|
13
15
|
## Installation
|
14
16
|
|
@@ -42,13 +44,14 @@ __Options__
|
|
42
44
|
* `:deletion_cost`: It overrides the default (equals to 1) deletion penanty.
|
43
45
|
Can be `Fixnum` or `Float`.
|
44
46
|
|
45
|
-
* `:
|
47
|
+
* `:substitution_cost`: It overrides the default (equals to 1) substitution
|
46
48
|
penalty. Can be `Fixum` or `Float`.
|
47
49
|
|
48
50
|
* `:strategy`: The desired strategy for Levenshtein distance. Supported
|
49
|
-
strategies are `:recursive`, `:two_matrix_rows`, `:
|
50
|
-
`:experiment`. The default strategy is
|
51
|
-
|
51
|
+
strategies are `:recursive`, `:two_matrix_rows`, `:two_matrix_rows_v2`,
|
52
|
+
`:full_matrix` and `:experiment`. The default strategy is
|
53
|
+
`:two_matrix_rows_v2` for MRI and `:two_matrix_rows` for other platforms
|
54
|
+
One should not depend on `:experiment` strategy.
|
52
55
|
|
53
56
|
__Examples__
|
54
57
|
|
@@ -96,15 +99,15 @@ or you can choose to benchmark a specific algorithm like:
|
|
96
99
|
|
97
100
|
__Levenshtein__
|
98
101
|
|
102
|
+
Implementation | User | Real
|
103
|
+
-------------------------------------------------|-----------|-----------
|
104
|
+
Levenshtein::IterativeWithFullMatrix | 2.260000 | 2.265873
|
105
|
+
Levenshtein::IterativeWithTwoMatrixRows | 1.970000 | 1.971205
|
106
|
+
Levenshtein::Experiment | 1.680000 | 1.684419
|
107
|
+
Levenshtein::IterativeWithTwoMatrixRowsOptimized | 1.270000 | 1.269643
|
108
|
+
Text::Levenshtein (from gem text) | 2.180000 | 2.186258
|
99
109
|
|
100
|
-
|
101
|
-
--------------------------------------------|-----------|-----------
|
102
|
-
Levenshtein::IterativeWithFullMatrix | 0.480000 | 0.475662
|
103
|
-
Levenshtein::IterativeWithTwoMatrixRows | 0.350000 | 0.352388
|
104
|
-
Levenshtein::Experiment | 0.420000 | 0.420000
|
105
|
-
Text::Levenshtein (from gem text) | 0.400000 | 0.400346
|
106
|
-
|
107
|
-
_Currently the set of fixtures is very small_
|
110
|
+
_Currently the set of fixtures is very small - ruby 2.1.0 is used_
|
108
111
|
|
109
112
|
## Other implementations
|
110
113
|
|
@@ -131,4 +134,4 @@ __Various__
|
|
131
134
|
|
132
135
|
## Licence
|
133
136
|
|
134
|
-
string_metric is licensed under MIT. See [License](LICENSE.txt)
|
137
|
+
string_metric is licensed under MIT. See [License](LICENSE.txt)
|
@@ -9,13 +9,16 @@ module StringMetric
|
|
9
9
|
return from.size if to.size.zero?
|
10
10
|
|
11
11
|
max_distance = options[:max_distance]
|
12
|
-
insertion_cost = options
|
13
|
-
deletion_cost = options
|
14
|
-
substitution_cost = options
|
12
|
+
insertion_cost = options[:insertion_cost] || 1
|
13
|
+
deletion_cost = options[:deletion_cost] || 1
|
14
|
+
substitution_cost = options[:substitution_cost] || 1
|
15
15
|
|
16
16
|
m = from.length
|
17
17
|
n = to.length
|
18
18
|
|
19
|
+
from = from.codepoints.to_a
|
20
|
+
to = to.codepoints.to_a
|
21
|
+
|
19
22
|
v0 = (0..m).to_a
|
20
23
|
v1 = []
|
21
24
|
x = 0
|
@@ -28,14 +31,12 @@ module StringMetric
|
|
28
31
|
m.times do |j|
|
29
32
|
cost = (from[j] == to[i]) ? 0 : substitution_cost
|
30
33
|
|
31
|
-
ins_cell = v0[j+1]
|
34
|
+
ins_cell = v0[j + 1]
|
32
35
|
|
33
36
|
x = [x + deletion_cost, # deletion
|
34
37
|
ins_cell + insertion_cost, # insertion
|
35
38
|
sub_cell + cost # substitution
|
36
|
-
].sort!
|
37
|
-
|
38
|
-
x = x[0]
|
39
|
+
].sort![0]
|
39
40
|
|
40
41
|
v1[j + 1] = x
|
41
42
|
|
@@ -35,7 +35,8 @@ module StringMetric
|
|
35
35
|
# new substitution cost is taken into account (be default is 1)
|
36
36
|
# @option options [Symbol] :strategy The desired strategy for Levenshtein
|
37
37
|
# distance. Supported strategies are :recursive, :two_matrix_rows,
|
38
|
-
# :full_matrix and :experiment. The default strategy
|
38
|
+
# :full_matrix, :two_matrix_rows_v2 and :experiment. The default strategy
|
39
|
+
# is :two_matrix_rows_v2 for MRI and :two_matrix_rows for other platforms.
|
39
40
|
# One should not depend on :experiment strategy.
|
40
41
|
# @return [Fixnum, Float] the Levenshtein Distance
|
41
42
|
def distance(from, to, options = {})
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: string_metric
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Giorgos Tsiftsis
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-
|
11
|
+
date: 2014-02-03 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|