string_metric 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 32870ce170c62b4036f6b8eaf3f92e43280a0246
4
+ data.tar.gz: a7f38c88be3211f6ef1d82c71d701a0655081570
5
+ SHA512:
6
+ metadata.gz: 8668f405e8568c4254ef7856660a87bd4488d8e4ddbcea95f185575562787ad072696868a831431b821d95f868a8a1f67e1f2b057272375dbda3f0fbe73a6ccb
7
+ data.tar.gz: 9e9b25912b3f695161262fb331cc8f043fe8b269b29f1880ee9d8bce8dbf367c339de18b9c3756d15d9ee0e5ba677e1ff5b2837909869c0b1984797b75638f5b
data/.gitignore ADDED
@@ -0,0 +1,18 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .ruby-version
6
+ .yardoc
7
+ Gemfile.lock
8
+ InstalledFiles
9
+ _yardoc
10
+ coverage
11
+ doc/
12
+ lib/bundler/man
13
+ pkg
14
+ rdoc
15
+ spec/reports
16
+ test/tmp
17
+ test/version_tmp
18
+ tmp
data/.rspec ADDED
@@ -0,0 +1,2 @@
1
+ --color
2
+ --format doc
data/.travis.yml ADDED
@@ -0,0 +1,8 @@
1
+ language: ruby
2
+ rvm:
3
+ - "1.9.3"
4
+ - "2.0.0"
5
+ - "2.1.0"
6
+ - "jruby-19mode"
7
+
8
+ script: bundle exec rspec spec
data/Gemfile ADDED
@@ -0,0 +1,5 @@
1
+ source 'https://rubygems.org'
2
+
3
+ gemspec
4
+
5
+ gem "coveralls", require: false
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2014 Skroutz S.A.
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,134 @@
1
+ # StringMetric
2
+
3
+ [![Build Status](https://travis-ci.org/skroutz/string_metric.png?branch=master)](https://travis-ci.org/skroutz/string_metric)
4
+ [![Code Climate](https://codeclimate.com/github/skroutz/string_metric.png)](https://codeclimate.com/github/skroutz/string_metric)
5
+ [![Coverage Status](https://coveralls.io/repos/skroutz/string_metric/badge.png?branch=master)](https://coveralls.io/r/skroutz/string_metric?branch=master)
6
+
7
+ A simple library with String Metric algorithms. If you want to read more about
8
+ String Metric algorithms please read [here](https://en.wikipedia.org/wiki/String_metric).
9
+
10
+ This library wants to support __MRI__ (1.9.3, 2.0.0, 2.1.0), __JRuby__ and
11
+ __Rubinious__.
12
+
13
+ ## Installation
14
+
15
+ Add this line to your application's Gemfile:
16
+
17
+ gem 'string_metric'
18
+
19
+ And then execute:
20
+
21
+ $ bundle
22
+
23
+ Or install it yourself as:
24
+
25
+ $ gem install string_metric
26
+
27
+ ## Usage
28
+
29
+ ### Levenshtein Distance
30
+
31
+ The public api for Levenshtein Distance is the method
32
+ `StringMetric::Levenshtein.distance`.
33
+
34
+ __Options__
35
+
36
+ * `:max_distance`: It sets an upper limit for the calculated distance. Can be
37
+ `Fixnum` or `Float`.
38
+
39
+ * `:insertion_cost`: It overrides the default (equals to 1) insertion penalty.
40
+ Can be `Fixnum` or `Float`.
41
+
42
+ * `:deletion_cost`: It overrides the default (equals to 1) deletion penanty.
43
+ Can be `Fixnum` or `Float`.
44
+
45
+ * `:subsctitution_cost`: It overrides the default (equals to 1) substitution
46
+ penalty. Can be `Fixum` or `Float`.
47
+
48
+ * `:strategy`: The desired strategy for Levenshtein distance. Supported
49
+ strategies are `:recursive`, `:two_matrix_rows`, `:full_matrix` and
50
+ `:experiment`. The default strategy is `:two_matrix_rows`. One should not
51
+ depend on `:experiment` strategy.
52
+
53
+ __Examples__
54
+
55
+ ```ruby
56
+
57
+ require 'string_metric'
58
+
59
+ StringMetric::Levenshtein.distance("kitten", "sitting")
60
+ # Generates: 3
61
+
62
+ # Trim distance to :max_distance
63
+ StringMetric::Levenshtein.distance("kitten", "sitting",
64
+ max_distance: 2)
65
+ # Generates: 2
66
+
67
+ # Pass different costs for increase, delete or substitute actions
68
+ StringMetric::Levenshtein.distance("kitten", "sitting",
69
+ insertion_cost: 2,
70
+ deletion_cost: 2,
71
+ substitution_cost: 2)
72
+ # Generates: 6
73
+
74
+ ```
75
+
76
+ ## References
77
+
78
+ * [Levenshtein Distance](https://en.wikipedia.org/wiki/Levenshtein_distance)
79
+ * [String Metric](https://en.wikipedia.org/wiki/String_metric)
80
+
81
+ ## Benchmarks
82
+
83
+ You can run benchmarks with
84
+
85
+ ```
86
+ $ bundle exec ruby benchmarks/*
87
+ ```
88
+
89
+ or you can choose to benchmark a specific algorithm like:
90
+
91
+ ```
92
+ $ bundle exec ruby benchmarks/levenshtein.rb
93
+ ```
94
+
95
+ ## Current Benchmarks status
96
+
97
+ __Levenshtein__
98
+
99
+
100
+ Implementation | User | Real
101
+ --------------------------------------------|-----------|-----------
102
+ Levenshtein::IterativeWithFullMatrix | 0.480000 | 0.475662
103
+ Levenshtein::IterativeWithTwoMatrixRows | 0.350000 | 0.352388
104
+ Levenshtein::Experiment | 0.420000 | 0.420000
105
+ Text::Levenshtein (from gem text) | 0.400000 | 0.400346
106
+
107
+ _Currently the set of fixtures is very small_
108
+
109
+ ## Other implementations
110
+
111
+ __Levenshtein__
112
+
113
+ * this beautiful gem, [text](https://github.com/threedaymonk/text)
114
+ * ffi implementations, like [this](https://github.com/dbalatero/levenshtein-ffi) or check [The Ruby Toolbox](https://www.ruby-toolbox.com/projects/levenshtein-ffi)
115
+
116
+ __Various__
117
+ * Approximate String matching [library](https://github.com/flori/amatch)
118
+
119
+ ## Tools
120
+
121
+ * Try to use [SemVer](http://semver.org/)
122
+
123
+
124
+ ## Contributing
125
+
126
+ 1. Fork it ( http://github.com/<my-github-username>/string_metric/fork )
127
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
128
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
129
+ 4. Push to the branch (`git push origin my-new-feature`)
130
+ 5. Create new Pull Request
131
+
132
+ ## Licence
133
+
134
+ string_metric is licensed under MIT. See [License](LICENSE.txt)
data/Rakefile ADDED
@@ -0,0 +1 @@
1
+ require "bundler/gem_tasks"
@@ -0,0 +1,42 @@
1
+ require "benchmark"
2
+ require "string_metric"
3
+ require "text"
4
+ require "csv"
5
+
6
+ if RUBY_ENGINE == "ruby"
7
+ require "pry"
8
+ end
9
+
10
+ Benchmark.bmbm(7) do |x|
11
+
12
+ iterations = 10_000
13
+ options = { insertion_cost: 2 }
14
+
15
+ fixtures = []
16
+ CSV.foreach("spec/fixtures/levenshtein.csv") do |row|
17
+ from, to, _ = row
18
+
19
+ fixtures.push [from.to_s.strip, to.to_s.strip]
20
+ end
21
+
22
+ StringMetric::Levenshtein::STRATEGIES.each do |strategy, implementation|
23
+ next if strategy == :recursive
24
+
25
+ x.report("#{implementation.to_s} implementation") do
26
+ iterations.times do |i|
27
+
28
+ fixtures.each do |from, to|
29
+ implementation.distance(from, to, options)
30
+ end
31
+ end
32
+ end
33
+ end
34
+
35
+ x.report("Text::Levenshtein implementation") do
36
+ iterations.times do |i|
37
+ fixtures.each do |from, to|
38
+ Text::Levenshtein.distance(from, to)
39
+ end
40
+ end
41
+ end
42
+ end
@@ -0,0 +1,5 @@
1
+ require "string_metric/version"
2
+ require "string_metric/levenshtein"
3
+
4
+ module StringMetric
5
+ end
@@ -0,0 +1,65 @@
1
+ # coding: utf-8
2
+
3
+ require_relative "levenshtein/experiment"
4
+ require_relative "levenshtein/iterative_with_two_matrix_rows"
5
+ require_relative "levenshtein/iterative_with_two_matrix_rows_optimized"
6
+ require_relative "levenshtein/iterative_with_full_matrix"
7
+ require_relative "levenshtein/recursive"
8
+
9
+ module StringMetric
10
+ # Levenshtein Distance implementation
11
+ #
12
+ # @see https://en.wikipedia.org/wiki/Levenshtein_distance
13
+ module Levenshtein
14
+
15
+ STRATEGIES = {
16
+ experiment: Experiment,
17
+ full_matrix: IterativeWithFullMatrix,
18
+ recursive: Recursive,
19
+ two_matrix_rows: IterativeWithTwoMatrixRows,
20
+ two_matrix_rows_v2: IterativeWithTwoMatrixRowsOptimized
21
+ }
22
+
23
+ # Levenshtein Distance of two strings
24
+ #
25
+ # @param from [String] the first string
26
+ # @param to [String] the second string
27
+ # @param options [Hash] options
28
+ # @option options [Fixnum, Float] :max_distance If this option is passed then
29
+ # levenstein distance is trimmed to this value (if greater)
30
+ # @option options [Fixnum, Float] :insertion_cost If this option is passed then
31
+ # new insertion cost is taken into account (by default is 1)
32
+ # @option options [Fixnum, Float] :deletion_cost If this option is passed then
33
+ # new deletion cost is taken into account (by default is 1)
34
+ # @option options [Fixnum, Float] :substitution_cost If this option is passed then
35
+ # new substitution cost is taken into account (be default is 1)
36
+ # @option options [Symbol] :strategy The desired strategy for Levenshtein
37
+ # distance. Supported strategies are :recursive, :two_matrix_rows,
38
+ # :full_matrix and :experiment. The default strategy is :two_matrix_rows.
39
+ # One should not depend on :experiment strategy.
40
+ # @return [Fixnum, Float] the Levenshtein Distance
41
+ def distance(from, to, options = {})
42
+ strategy = pick_strategy(options[:strategy]) || Levenshtein.default_strategy
43
+ args = [from, to, options]
44
+
45
+ strategy.distance(*args)
46
+ end
47
+ module_function :distance
48
+
49
+ # Currently the default strategy is set to IterativeWithTwoMatrixRows
50
+ def default_strategy
51
+ if RUBY_ENGINE == "ruby"
52
+ pick_strategy(:two_matrix_rows_v2)
53
+ else
54
+ pick_strategy(:two_matrix_rows)
55
+ end
56
+ end
57
+ module_function :default_strategy
58
+
59
+ def pick_strategy(symbol)
60
+ STRATEGIES[symbol]
61
+ end
62
+ module_function :pick_strategy
63
+ private_class_method :pick_strategy
64
+ end
65
+ end
@@ -0,0 +1,28 @@
1
+ # coding: utf-8
2
+
3
+ module StringMetric
4
+ module Levenshtein
5
+ class Experiment
6
+ def self.distance(from, to, options = {})
7
+ return 0 if from == to
8
+ return to.size if from.size.zero?
9
+ return from.size if to.size.zero?
10
+
11
+ m = from.length
12
+ n = to.length
13
+
14
+ [m, n].min.times do |i|
15
+ if from[i] == to[i]
16
+ from.slice!(i)
17
+ to.slice!(i)
18
+ end
19
+ end
20
+
21
+ options.delete(:strategy)
22
+
23
+ # Call default distance implementation
24
+ Levenshtein.distance(from, to, options)
25
+ end
26
+ end
27
+ end
28
+ end
@@ -0,0 +1,47 @@
1
+ # coding: utf-8
2
+
3
+ module StringMetric
4
+ module Levenshtein
5
+ class IterativeWithFullMatrix
6
+ def self.distance(from, to, options = {})
7
+ return 0 if from == to
8
+ return to.size if from.size.zero?
9
+ return from.size if to.size.zero?
10
+
11
+ max_distance = options[:max_distance]
12
+ insertion_cost = options.fetch(:insertion_cost, 1)
13
+ deletion_cost = options.fetch(:deletion_cost, 1)
14
+ substitution_cost = options.fetch(:substitution_cost, 1)
15
+
16
+ d = (0..to.size).map do |i|
17
+ [0] * (from.size + 1)
18
+ end
19
+
20
+ (1..from.size).each { |j| d[0][j] = j }
21
+ (1..to.size).each { |i| d[i][0] = i }
22
+
23
+ (1..from.size).each do |j|
24
+ (1..to.size).each do |i|
25
+ if from[j-1] == to[i-1]
26
+ d[i][j] = d[i -1][j-1]
27
+ else
28
+ d[i][j] = [d[i-1][j] + insertion_cost, # insertion
29
+ d[i][j-1] + deletion_cost, # deletion
30
+ d[i-1][j-1] + substitution_cost # substitution
31
+ ].min
32
+ end
33
+ end
34
+
35
+ break if max_distance and d[j][j] > max_distance
36
+ end
37
+
38
+ x = d[to.size][from.size]
39
+ if max_distance && x > max_distance
40
+ max_distance
41
+ else
42
+ x
43
+ end
44
+ end
45
+ end
46
+ end
47
+ end
@@ -0,0 +1,57 @@
1
+ # coding: utf-8
2
+
3
+ module StringMetric
4
+ module Levenshtein
5
+ class IterativeWithTwoMatrixRows
6
+ def self.distance(from, to, options = {})
7
+ return 0 if from == to
8
+ return to.size if from.size.zero?
9
+ return from.size if to.size.zero?
10
+
11
+ max_distance = options[:max_distance]
12
+ insertion_cost = options.fetch(:insertion_cost, 1)
13
+ deletion_cost = options.fetch(:deletion_cost, 1)
14
+ substitution_cost = options.fetch(:substitution_cost, 1)
15
+
16
+ m = from.length
17
+ n = to.length
18
+
19
+ v0 = (0..m).to_a
20
+ v1 = []
21
+ x = 0
22
+
23
+ n.times do |i|
24
+ x = v1[0] = i + 1
25
+
26
+ sub_cell = v0[0]
27
+
28
+ m.times do |j|
29
+ cost = (from[j] == to[i]) ? 0 : substitution_cost
30
+
31
+ ins_cell = v0[j+1]
32
+
33
+ x = [x + deletion_cost, # deletion
34
+ ins_cell + insertion_cost, # insertion
35
+ sub_cell + cost # substitution
36
+ ].min
37
+
38
+
39
+ v1[j + 1] = x
40
+
41
+ sub_cell = ins_cell
42
+ end
43
+
44
+ break if max_distance && v0[i] > max_distance
45
+
46
+ v0 = v1.dup
47
+ end
48
+
49
+ if max_distance && x > max_distance
50
+ max_distance
51
+ else
52
+ x
53
+ end
54
+ end
55
+ end
56
+ end
57
+ end
@@ -0,0 +1,58 @@
1
+ # coding: utf-8
2
+
3
+ module StringMetric
4
+ module Levenshtein
5
+ class IterativeWithTwoMatrixRowsOptimized
6
+ def self.distance(from, to, options = {})
7
+ return 0 if from == to
8
+ return to.size if from.size.zero?
9
+ return from.size if to.size.zero?
10
+
11
+ max_distance = options[:max_distance]
12
+ insertion_cost = options.fetch(:insertion_cost, 1)
13
+ deletion_cost = options.fetch(:deletion_cost, 1)
14
+ substitution_cost = options.fetch(:substitution_cost, 1)
15
+
16
+ m = from.length
17
+ n = to.length
18
+
19
+ v0 = (0..m).to_a
20
+ v1 = []
21
+ x = 0
22
+
23
+ n.times do |i|
24
+ x = v1[0] = i + 1
25
+
26
+ sub_cell = v0[0]
27
+
28
+ m.times do |j|
29
+ cost = (from[j] == to[i]) ? 0 : substitution_cost
30
+
31
+ ins_cell = v0[j+1]
32
+
33
+ x = [x + deletion_cost, # deletion
34
+ ins_cell + insertion_cost, # insertion
35
+ sub_cell + cost # substitution
36
+ ].sort!
37
+
38
+ x = x[0]
39
+
40
+ v1[j + 1] = x
41
+
42
+ sub_cell = ins_cell
43
+ end
44
+
45
+ break if max_distance && v0[i] > max_distance
46
+
47
+ v0 = v1.dup
48
+ end
49
+
50
+ if max_distance && x > max_distance
51
+ max_distance
52
+ else
53
+ x
54
+ end
55
+ end
56
+ end
57
+ end
58
+ end
@@ -0,0 +1,38 @@
1
+ # coding: utf-8
2
+
3
+ module StringMetric
4
+ module Levenshtein
5
+ class Recursive
6
+ def self.distance(from, to, options = {})
7
+ return 0 if from == to
8
+ return to.size if from.size.zero?
9
+ return from.size if to.size.zero?
10
+
11
+ max_distance = options[:max_distance]
12
+ insertion_cost = options.fetch(:insertion_cost, 1)
13
+ deletion_cost = options.fetch(:deletion_cost, 1)
14
+ substitution_cost = options.fetch(:substitution_cost, 1)
15
+
16
+ if from.chars.to_a.last == to.chars.to_a.last
17
+ cost = 0
18
+ else
19
+ cost = substitution_cost
20
+ end
21
+
22
+ if max_distance
23
+ return [distance(from.chop, to, options) + deletion_cost,
24
+ distance(from, to.chop, options) + insertion_cost,
25
+ distance(from.chop, to.chop, options) + cost,
26
+ max_distance
27
+ ].min
28
+ else
29
+ return [distance(from.chop, to, options) + deletion_cost,
30
+ distance(from, to.chop, options) + insertion_cost,
31
+ distance(from.chop, to.chop, options) + cost
32
+ ].min
33
+
34
+ end
35
+ end
36
+ end
37
+ end
38
+ end
@@ -0,0 +1,3 @@
1
+ module StringMetric
2
+ VERSION = "0.1.0"
3
+ end
@@ -0,0 +1,11 @@
1
+ hello, hello, 0
2
+ hello, helo, 1
3
+ hello, jello, 1
4
+ hello, helol, 2
5
+ hello, hellol, 1
6
+ hello, heloll, 2
7
+ hello, cheese, 4
8
+ hello, saint, 5
9
+ hello,, 5
10
+ sitting, kitten, 3
11
+ αλφα, βητα, 3
@@ -0,0 +1,6 @@
1
+ # coding: utf-8
2
+ require "spec_helper"
3
+
4
+ describe StringMetric::Levenshtein::Experiment do
5
+ it_behaves_like "Levenshtein Distance"
6
+ end
@@ -0,0 +1,6 @@
1
+ # coding: utf-8
2
+ require "spec_helper"
3
+
4
+ describe StringMetric::Levenshtein::IterativeWithFullMatrix do
5
+ it_behaves_like "Levenshtein Distance"
6
+ end
@@ -0,0 +1,6 @@
1
+ # coding: utf-8
2
+ require "spec_helper"
3
+
4
+ describe StringMetric::Levenshtein::IterativeWithTwoMatrixRowsOptimized do
5
+ it_behaves_like "Levenshtein Distance", { strategy: :two_matrix_rows_v2 }
6
+ end
@@ -0,0 +1,6 @@
1
+ # coding: utf-8
2
+ require "spec_helper"
3
+
4
+ describe StringMetric::Levenshtein::IterativeWithTwoMatrixRows do
5
+ it_behaves_like "Levenshtein Distance", { strategy: :two_matrix_rows }
6
+ end
@@ -0,0 +1,6 @@
1
+ # coding: utf-8
2
+ require "spec_helper"
3
+
4
+ describe StringMetric::Levenshtein::Recursive do
5
+ it_behaves_like "Levenshtein Distance"
6
+ end
@@ -0,0 +1,13 @@
1
+ # coding: utf-8
2
+
3
+ require "spec_helper"
4
+
5
+ describe StringMetric::Levenshtein do
6
+ it_behaves_like "Levenshtein Distance"
7
+
8
+ describe '#default_strategy' do
9
+ it "has a default strategy" do
10
+ expect(described_class.default_strategy).to be
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,29 @@
1
+ # This file was generated by the `rspec --init` command. Conventionally, all
2
+ # specs live under a `spec` directory, which RSpec adds to the `$LOAD_PATH`.
3
+ # Require this file using `require "spec_helper"` to ensure that it is only
4
+ # loaded once.
5
+ #
6
+ # See http://rubydoc.info/gems/rspec-core/RSpec/Core/Configuration
7
+
8
+ require "string_metric"
9
+
10
+ if RUBY_ENGINE == "ruby"
11
+ require "pry"
12
+ end
13
+
14
+ require 'coveralls'
15
+ Coveralls.wear!
16
+
17
+ Dir["./spec/support/**/*.rb"].sort.each { |f| require f }
18
+
19
+ RSpec.configure do |config|
20
+ config.treat_symbols_as_metadata_keys_with_true_values = true
21
+ config.run_all_when_everything_filtered = true
22
+ config.filter_run :focus
23
+
24
+ # Run specs in random order to surface order dependencies. If you find an
25
+ # order dependency and want to debug it, you can fix the order by providing
26
+ # the seed, which is printed after each run.
27
+ # --seed 1234
28
+ config.order = 'random'
29
+ end
@@ -0,0 +1,53 @@
1
+ require "csv"
2
+
3
+ shared_examples "Levenshtein Distance" do |options|
4
+ options ||= {}
5
+
6
+ describe ".distance" do
7
+ context "when the two strings are equal" do
8
+ it "is 0" do
9
+ expect(described_class.distance("kitten", "kitten", options)).to eq 0
10
+ end
11
+ end
12
+
13
+ context "when the first string is empty" do
14
+ it "is the size of the second string" do
15
+ expect(described_class.distance("","kitten", options)).to eq("kitten".size)
16
+ end
17
+ end
18
+
19
+ context "when the second string is empty" do
20
+ it "is the size of the first string" do
21
+ expect(described_class.distance("kitten","", options)).to eq("kitten".size)
22
+ end
23
+ end
24
+
25
+ context "when max_distance is passed as option" do
26
+ context "and normal distance is greater than max_distance" do
27
+ let(:max_distance) { 2 }
28
+
29
+ it "is trimmed to max_distance" do
30
+ expect(described_class.distance("kitten", "sitting",
31
+ max_distance: max_distance)).to eq max_distance
32
+ end
33
+ end
34
+ end
35
+
36
+ CSV.foreach("spec/fixtures/levenshtein.csv") do |row|
37
+ from, to, distance = row
38
+ from = from.to_s.strip
39
+ to = to.to_s.strip
40
+
41
+ it "calculates the distance from '#{from}' to '#{to}' correctly" do
42
+ expect(described_class.distance(from, to, options)).to eq distance.to_i
43
+ end
44
+ end
45
+
46
+ context "when insertion_cost is passed" do
47
+ it "takes this cost into account" do
48
+ expect(described_class.distance("kitten", "sitting", insertion_cost: 1)).not_to eq(
49
+ described_class.distance("kitten", "sitting", insertion_cost: 2))
50
+ end
51
+ end
52
+ end
53
+ end
@@ -0,0 +1,33 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'string_metric/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "string_metric"
8
+ spec.version = StringMetric::VERSION
9
+ spec.authors = ["Giorgos Tsiftsis"]
10
+ spec.email = ["giorgos.tsiftsis@skroutz.gr"]
11
+ spec.summary = %q{A simple library with String Metric algorithms}
12
+ spec.description = %q{A simple library with String Metric algorithms}
13
+ spec.homepage = "https://github.com/chief/string_metric"
14
+ spec.license = "MIT"
15
+
16
+ spec.files = `git ls-files`.split($/)
17
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
18
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19
+ spec.require_paths = ["lib"]
20
+
21
+ spec.add_development_dependency "bundler", "~> 1.5"
22
+ spec.add_development_dependency "rake", "~> 10.1.1"
23
+ spec.add_development_dependency "rspec", "~> 2.14.1"
24
+ spec.add_development_dependency "text", "~> 1.2.3"
25
+
26
+ if RUBY_ENGINE == "ruby"
27
+ if RUBY_VERSION > "1.9.3"
28
+ spec.add_development_dependency "pry-byebug", "~> 1.2.1"
29
+ else
30
+ spec.add_development_dependency "pry", "~> 0.9.12.4"
31
+ end
32
+ end
33
+ end
metadata ADDED
@@ -0,0 +1,149 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: string_metric
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Giorgos Tsiftsis
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2014-01-30 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.5'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '1.5'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: 10.1.1
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: 10.1.1
41
+ - !ruby/object:Gem::Dependency
42
+ name: rspec
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: 2.14.1
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: 2.14.1
55
+ - !ruby/object:Gem::Dependency
56
+ name: text
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - "~>"
60
+ - !ruby/object:Gem::Version
61
+ version: 1.2.3
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - "~>"
67
+ - !ruby/object:Gem::Version
68
+ version: 1.2.3
69
+ - !ruby/object:Gem::Dependency
70
+ name: pry-byebug
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - "~>"
74
+ - !ruby/object:Gem::Version
75
+ version: 1.2.1
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - "~>"
81
+ - !ruby/object:Gem::Version
82
+ version: 1.2.1
83
+ description: A simple library with String Metric algorithms
84
+ email:
85
+ - giorgos.tsiftsis@skroutz.gr
86
+ executables: []
87
+ extensions: []
88
+ extra_rdoc_files: []
89
+ files:
90
+ - ".gitignore"
91
+ - ".rspec"
92
+ - ".travis.yml"
93
+ - Gemfile
94
+ - LICENSE.txt
95
+ - README.md
96
+ - Rakefile
97
+ - benchmarks/levenshtein.rb
98
+ - lib/string_metric.rb
99
+ - lib/string_metric/levenshtein.rb
100
+ - lib/string_metric/levenshtein/experiment.rb
101
+ - lib/string_metric/levenshtein/iterative_with_full_matrix.rb
102
+ - lib/string_metric/levenshtein/iterative_with_two_matrix_rows.rb
103
+ - lib/string_metric/levenshtein/iterative_with_two_matrix_rows_optimized.rb
104
+ - lib/string_metric/levenshtein/recursive.rb
105
+ - lib/string_metric/version.rb
106
+ - spec/fixtures/levenshtein.csv
107
+ - spec/lib/levenshtein/experiment_spec.rb
108
+ - spec/lib/levenshtein/iterative_with_full_matric_spec.rb
109
+ - spec/lib/levenshtein/iterative_with_two_matrix_rows_optimized_spec.rb
110
+ - spec/lib/levenshtein/iterative_with_two_matrix_rows_spec.rb
111
+ - spec/lib/levenshtein/recursive_spec.rb
112
+ - spec/lib/levenshtein_spec.rb
113
+ - spec/spec_helper.rb
114
+ - spec/support/levenshtein.rb
115
+ - string_metric.gemspec
116
+ homepage: https://github.com/chief/string_metric
117
+ licenses:
118
+ - MIT
119
+ metadata: {}
120
+ post_install_message:
121
+ rdoc_options: []
122
+ require_paths:
123
+ - lib
124
+ required_ruby_version: !ruby/object:Gem::Requirement
125
+ requirements:
126
+ - - ">="
127
+ - !ruby/object:Gem::Version
128
+ version: '0'
129
+ required_rubygems_version: !ruby/object:Gem::Requirement
130
+ requirements:
131
+ - - ">="
132
+ - !ruby/object:Gem::Version
133
+ version: '0'
134
+ requirements: []
135
+ rubyforge_project:
136
+ rubygems_version: 2.2.0
137
+ signing_key:
138
+ specification_version: 4
139
+ summary: A simple library with String Metric algorithms
140
+ test_files:
141
+ - spec/fixtures/levenshtein.csv
142
+ - spec/lib/levenshtein/experiment_spec.rb
143
+ - spec/lib/levenshtein/iterative_with_full_matric_spec.rb
144
+ - spec/lib/levenshtein/iterative_with_two_matrix_rows_optimized_spec.rb
145
+ - spec/lib/levenshtein/iterative_with_two_matrix_rows_spec.rb
146
+ - spec/lib/levenshtein/recursive_spec.rb
147
+ - spec/lib/levenshtein_spec.rb
148
+ - spec/spec_helper.rb
149
+ - spec/support/levenshtein.rb