damerau-levenshtein 1.1.3 → 1.3.3

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: 6f1db3c601f8dd29e5470deedecee3e99ee84f71
4
- data.tar.gz: dc320d3f50452e226beba347b0711f78c85e1595
2
+ SHA256:
3
+ metadata.gz: 1df0e124b37dd40f7d57bd0d96abcb1e0bdfef4d7dc38439ecdcddb8d292aa9b
4
+ data.tar.gz: '082576cec439a97f6c3bf262c52ec0e6f9dd26805971d9bc3791cdf9f5e2f1dc'
5
5
  SHA512:
6
- metadata.gz: 5231e78a77dbc0f91fd790dab046099d5952649f895d2d418ae83fc8f4d610ffe55aa025f493bba494719c5886e76b67a954b7d22beb6a11bd9aa9c004526a53
7
- data.tar.gz: 7e885987bd0b063145fbb04e31bc6a8474e2957d65abf7d877a08f2335e4527b396696f24e2a6d495a51d0d24ceac841971ba91381f2ceb11460f2ece41e660b
6
+ metadata.gz: c705b20404d0f2a344cd55693fb207666b16505da67390a82f31ebb1df37fa8c260f121da6fe70447179ae14df34281b9cbfa2332ddcced34afb42acfc861817
7
+ data.tar.gz: 846aaab0f6c38fdc4bd4ab626de6aec11ddf213b845b75867cad3ee4bd728bf121efb12b0886ead512cfad24798f052bb2c81dd2b36ba43b8f1633d569768feb
data/.gitignore CHANGED
@@ -5,6 +5,10 @@ tmp
5
5
  *.o
6
6
  *.bundle
7
7
  *.gem
8
+ .nvimlog
9
+ .vim.custom
10
+ .byebug_history
11
+
8
12
  # rcov generated
9
13
  coverage
10
14
 
@@ -1,4 +1,5 @@
1
1
  AllCops:
2
+ NewCops: disable
2
3
  Exclude:
3
4
  - features/**/*
4
5
  - db/**/*
@@ -9,7 +10,7 @@ Metrics/ModuleLength:
9
10
  Max: 1000
10
11
  Style/StringLiterals:
11
12
  EnforcedStyle: double_quotes
12
- Style/DotPosition:
13
+ Layout/DotPosition:
13
14
  EnforcedStyle: trailing
14
- Style/FileName:
15
+ Naming/FileName:
15
16
  Enabled: false
@@ -1 +1 @@
1
- 2.2.4
1
+ 2.5.8
@@ -1,8 +1,7 @@
1
1
  rvm:
2
- - 2.0
3
- - 2.1
4
- - 2.2
5
- - 2.3.1
2
+ - 2.5
3
+ - 2.6
4
+ - 2.7
6
5
  before_install: "gem update bundler"
7
6
  script:
8
7
  - "bundle exec rake"
@@ -1,7 +1,13 @@
1
1
  damerau-levenshtein CHANGELOG
2
2
  =============================
3
3
 
4
- 1.2.3 -- add ruby 2.3.1 to travis tests by request from @greysteil
4
+ 1.3.1 -- gems update
5
+
6
+ 1.3.0 -- (issue #10) shows difference between two strings
7
+
8
+ 1.2.0 -- add edit distance for array of integers (by @azhi)
9
+
10
+ 1.1.3 -- add ruby 2.3.1 to travis tests by request from @greysteil
5
11
 
6
12
  1.1.2 -- remove unnecessary production dependencies (by @ixti)
7
13
 
data/Gemfile CHANGED
@@ -1,3 +1,5 @@
1
- source 'https://rubygems.org'
1
+ # frozen_string_literal: true
2
+
3
+ source "https://rubygems.org"
2
4
 
3
5
  gemspec
@@ -1,6 +1,6 @@
1
1
  The MIT License (MIT)
2
2
 
3
- Copyright (c) 2011-2016 Dmitry Mozzherin
3
+ Copyright (c) 2011-2019 Dmitry Mozzherin
4
4
 
5
5
  Permission is hereby granted, free of charge, to any person obtaining
6
6
  a copy of this software and associated documentation files (the
data/README.md CHANGED
@@ -1,40 +1,44 @@
1
- damerau-levenshtein
2
- ===================
1
+ # damerau-levenshtein #
3
2
 
4
3
  [![Gem Version][gem_svg]][gem]
5
4
  [![Continuous Integration Status][ci_svg]][ci]
6
5
  [![Dependency Status][dep_svg]][dep]
7
6
  [![Coverage Status][cov_svg]][cov]
8
7
 
9
- The damerau-levenshtein gem allows to find edit distance between two UTF-8
10
- or ASCII encoded strings with O(N\*M) efficiency.
8
+ The damerau-levenshtein gem allows to find [edit distance][ed] between two
9
+ UTF-8 or ASCII encoded strings with O(N\*M) efficiency.
11
10
 
12
11
  This gem implements pure Levenshtein algorithm, Damerau modification of it
13
12
  (where 2 character transposition counts as 1 edit distance). It also includes
14
13
  Boehmer & Rees 2008 modification of Damerau algorithm, where transposition
15
14
  of bigger than 1 character blocks is taken in account as well
16
- (Boehmer & Rees 2008).
15
+ [(Rees 2014)][rees2014].
17
16
 
18
17
  ```ruby
19
18
  require "damerau-levenshtein"
20
19
  DamerauLevenshtein.distance("Something", "Smoething") #returns 1
21
20
  ```
22
21
 
23
- Gem damerau-levenshtein is compatible with ruby versions 1.8.7
24
- and 1.9.2 and higher, as well as 2.0.0 and higher
22
+ It also returns a diff between two strings according to Levenshtein alrorithm.
23
+ The diff is expressed by tags `<ins>`, `<del>`, and `<subst>`. Such tags make
24
+ it possible to highlight differnce between strings in a flexible way.
25
25
 
26
- Dependencies
27
- -------------
26
+ ```ruby
27
+ require "damerau-levenshtein"
28
+ differ = DamerauLevenshtein::Differ.new
29
+ differ.run("corn", "cron")
30
+ # output: ["c<subst>or</subst>n", "c<subst>ro</subst>n"]
31
+ ```
32
+
33
+ ## Dependencies ##
28
34
 
29
35
  sudo apt-get install build-essential libgmp3-dev
30
36
 
31
- Installation
32
- ------------
37
+ ## Installation ##
33
38
 
34
39
  gem install damerau-levenshtein
35
40
 
36
- Examples
37
- --------
41
+ ## Examples ##
38
42
 
39
43
  ```ruby
40
44
  require "damerau-levenshtein"
@@ -59,33 +63,63 @@ dl.distance("Something", "Smoething", 0) #returns 2
59
63
  dl.distance("Something", "meSothing", 2) #returns 2 instead of 4
60
64
  ```
61
65
 
62
- * comparison of words with utf-8 characters should work fine:
66
+ * comparison of words with UTF-8 characters should work fine:
63
67
 
64
68
  ```ruby
65
69
  dl.distance("Sjöstedt", "Sjostedt") #returns 1
66
70
  ```
67
71
 
68
- API Description
69
- -----------
72
+ * compare two arrays
73
+
74
+ ```ruby
75
+ dl.array_distance([1,2,3,5], [1,2,3,4]) #returns 1
76
+ ```
77
+
78
+ * return diff between two strings
79
+
80
+ ```ruby
81
+ differ = DamerauLevenshtein::Differ.new
82
+ differ.run("Something", "smthg")
83
+ ```
84
+
85
+ * return diff between two strings in raw format
86
+
87
+ ```ruby
88
+ differ = DamerauLevenshtein::Differ.new
89
+ differ.format = :raw
90
+ differ.run("Something", "smthg")
91
+ ```
92
+
93
+ ## API Description ##
94
+
95
+ ### Methods ###
70
96
 
71
- Gem defines two methods
97
+ #### DamerauLevenshtein.version
72
98
 
73
99
  ```ruby
74
100
  DamerauLevenshtein.version
75
101
  #returns version number of the gem
102
+ ```
76
103
 
104
+ #### DamerauLevenshtein.distance
105
+
106
+ ```ruby
77
107
  DamerauLevenshtein.distance(string1, string2, block_size, max_distance)
78
- #returns [edit distance][ed] between 2 strings
79
- ```
108
+ #returns edit distance between 2 strings
80
109
 
110
+ DamerauLevenshtein.string_distance(string1, string2, block_size, max_distance)
111
+ # an alias for .distance
81
112
 
113
+ DamerauLevenshtein.array_distance(array1, array2, block_size, max_distance)
114
+ # returns edit distance between 2 arrays of integers
115
+ ```
82
116
 
83
- DamerauLevenshtein.distance takes 4 arguments:
117
+ `DamerauLevenshtein.distance` and `.array_distance` take 4 arguments:
84
118
 
85
- * string1
86
- * string2
87
- * block_size (default is 1)
88
- * max_distance (default is 10)
119
+ * `string1` (`array1` for `.array_distance`)
120
+ * `string2` (`array2` for `.array_distance`)
121
+ * `block_size` (default is 1)
122
+ * `max_distance` (default is 10)
89
123
 
90
124
  `block_size` determines maximum number of characters in a transposition block:
91
125
 
@@ -113,45 +147,93 @@ Levenshtein algorithm is expensive, so it makes sense to give up when edit
113
147
  distance is becoming too big. The argument max_distance does just that.
114
148
 
115
149
  ```ruby
150
+
116
151
  DamerauLevenshtein.distance("abcdefg", "1234567", 0, 3)
117
152
  # output: 4 -- it gave up when edit distance exceeded 3
153
+
154
+ ```
155
+
156
+ #### DamerauLevenshtein::Differ
157
+
158
+ `differ = DamerauLevenshtein::Differ.new` creates an instance of new differ class to return difference between two strings
159
+
160
+ `differ.format` shows current format for diff. Default is `:tag` format
161
+
162
+ `differ.format = :raw` changes current format for diffs. Possible values are `:tag` and `:raw`
163
+
164
+ `differ.run("String1", "String2")` returns difference between two strings.
165
+
166
+ For example:
167
+
168
+ ```ruby
169
+ differ = DamerauLevenshtein::Differ.new
170
+ differ.run("Something", "smthng")
171
+ # output: ["<ins>S</ins><subst>o</subst>m<ins>e</ins>th<ins>i</ins>ng",
172
+ # "<del>S</del><subst>s</subst>m<del>e</del>th<del>i</del>ng"]
173
+
174
+ ```
175
+
176
+ Or with parsing:
177
+
178
+ ```ruby
179
+ require "damerau-levenshtein"
180
+ require "nokogiri"
181
+
182
+ differ = DamerauLevenshtein::Differ.new
183
+ res = differ.run("Something", "Smothing!")
184
+ nodes = Nokogiri::XML("<root>#{res.first}</root>")
185
+
186
+ markup = nodes.root.children.map do |n|
187
+ case n.name
188
+ when "text"
189
+ n.text
190
+ when "del"
191
+ "~~#{n.children.first.text}~~"
192
+ when "ins"
193
+ "*#{n.children.first.text}*"
194
+ when "subst"
195
+ "**#{n.children.first.text}**"
196
+ end
197
+ end.join("")
198
+
199
+ puts markup
118
200
  ```
119
201
 
120
- Contributing to damerau-levenshtein
121
- -----------------------------------
202
+ ## Contributing to damerau-levenshtein ##
122
203
 
123
204
  * Check out the latest master to make sure the feature hasn't been
124
- implemented or the bug hasn't been fixed yet
205
+ implemented or the bug hasn't been fixed yet
125
206
  * Check out the issue tracker to make sure someone already hasn't requested
126
- it and/or contributed it
207
+ it and/or contributed it
127
208
  * Fork the project
128
209
  * Start a feature/bugfix branch
129
210
  * Commit and push until you are happy with your contribution
130
211
  * Make sure to add tests for it. This is important so I don't break it
131
- in a future version unintentionally.
212
+ in a future version unintentionally.
132
213
  * Please try not to mess with the Rakefile, version, or history. If you want
133
- to have your own version, or is otherwise necessary, that is fine, but please
134
- isolate to its own commit so I can cherry-pick around it.
214
+ to have your own version, or is otherwise necessary, that is fine, but please
215
+ isolate to its own commit so I can cherry-pick around it.
135
216
 
136
- Versioning
137
- ----------
217
+ ## Versioning ##
138
218
 
139
219
  This gem is following practices of [Semantic Versioning][semver]
140
220
 
141
- Authors
142
- -------
221
+ ## Authors ##
143
222
 
144
223
  [Dmitry Mozzherin][dimus]
145
224
 
146
- Contributors
147
- ------------
225
+ ## Contributors ##
148
226
 
149
- [lazylester][lazylester], [Ran Xie][skarlit], [Alexey Zapparov][ixti]
227
+ [Alexey Zapparov][ixti],
228
+ [azhi][azhi],
229
+ [Fabian Winkler][wynksaiddestroy]
230
+ [Josephine Wright][jozr],
231
+ [lazylester][lazylester],
232
+ [Ran Xie][skarlit],
150
233
 
151
- Copyright
152
- ---------
234
+ ## Copyright ##
153
235
 
154
- Copyright (c) 2011-2016 Dmitry Mozzherin. See LICENSE.txt for
236
+ Copyright (c) 2011-2019 Dmitry Mozzherin. See LICENSE.txt for
155
237
  further details.
156
238
 
157
239
  [gem_svg]: https://badge.fury.io/rb/damerau-levenshtein.svg
@@ -168,3 +250,7 @@ further details.
168
250
  [lazylester]: https://github.com/lazylester
169
251
  [skarlit]: https://github.com/Skarlit
170
252
  [ixti]: https://github.com/ixti
253
+ [azhi]: https://github.com/azhi
254
+ [jozr]: https://github.com/jozr
255
+ [rees2014]: https://dx.doi.org/10.1371/journal.pone.0107510
256
+ [wynksaiddestroy]: https://github.com/wynksaiddestroy
data/Rakefile CHANGED
@@ -1,25 +1,32 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "bundler/gem_tasks"
2
4
  require "rspec/core/rake_task"
3
- require 'cucumber/rake/task'
5
+ require "cucumber/rake/task"
4
6
  require "rubocop/rake_task"
5
- require 'rake/dsl_definition'
6
- require 'rake'
7
- require 'rake/extensiontask'
8
- require 'rspec'
7
+ require "rake/dsl_definition"
8
+ require "rake"
9
+ require "rake/extensiontask"
10
+ require "rspec"
9
11
 
10
12
  RSpec::Core::RakeTask.new(:spec) do |rspec|
11
- rspec.pattern = FileList['spec/**/*_spec.rb']
13
+ rspec.pattern = FileList["spec/**/*_spec.rb"]
12
14
  end
13
15
 
14
16
  Cucumber::Rake::Task.new(:features)
15
17
 
16
- Rake::ExtensionTask.new('damerau_levenshtein') do |extension|
17
- extension.ext_dir = 'ext/damerau_levenshtein'
18
- extension.lib_dir = 'lib/damerau-levenshtein'
18
+ Rake::ExtensionTask.new("damerau_levenshtein") do |extension|
19
+ extension.ext_dir = "ext/damerau_levenshtein"
20
+ extension.lib_dir = "lib/damerau-levenshtein"
19
21
  end
20
22
 
21
23
  Rake::Task[:spec].prerequisites << :compile
22
24
  Rake::Task[:features].prerequisites << :compile
23
25
 
24
26
  RuboCop::RakeTask.new
25
- task :default => [:rubocop, :spec]
27
+ task default: %i[rubocop spec]
28
+
29
+ desc "open an irb session preloaded with this gem"
30
+ task :console do
31
+ sh "irb -r pp -r ./lib/damerau-levenshtein.rb"
32
+ end
@@ -1,8 +1,11 @@
1
- $LOAD_PATH.push File.expand_path("../lib", __FILE__)
1
+ # frozen_string_literal: true
2
+
3
+ $LOAD_PATH.push File.expand_path("lib", __dir__)
2
4
 
3
5
  require "damerau-levenshtein/version"
4
6
 
5
7
  Gem::Specification.new do |s|
8
+ s.required_ruby_version = ">= 2.5"
6
9
  s.name = "damerau-levenshtein"
7
10
  s.version = DamerauLevenshtein::VERSION
8
11
  s.homepage = "https://github.com/GlobalNamesArchitecture/damerau-levenshtein"
@@ -15,21 +18,23 @@ Gem::Specification.new do |s|
15
18
  "Damerau modification (where 2 character " \
16
19
  "transposition counts as 1 edit distance). It also " \
17
20
  "includes Boehmer & Rees 2008 modification, " \
18
- "to handle transposition in bloks with more than " \
21
+ "to handle transposition in blocks with more than " \
19
22
  "2 characters (Boehmer & Rees 2008)."
20
23
  s.files = `git ls-files -z`.split("\x0").
21
24
  reject { |f| f.match(%r{^(test|spec|features)/}) }
22
25
  s.extensions = ["ext/damerau_levenshtein/extconf.rb"]
23
26
  s.require_paths = ["lib", "lib/damerau-levenshtein"]
24
27
 
25
- s.add_development_dependency "rspec", "~> 3.4"
26
- s.add_development_dependency "activesupport", "~> 4.2"
27
- s.add_development_dependency "cucumber", "~> 2.3"
28
- s.add_development_dependency "ruby-prof", "~> 0.15"
29
- s.add_development_dependency "shoulda", "~> 3.5"
30
- s.add_development_dependency "rubocop", "~> 0.38"
28
+ s.add_development_dependency "activesupport", "~> 6.0"
29
+ s.add_development_dependency "bundler", "~> 2.1"
30
+ s.add_development_dependency "byebug", "~> 11.0"
31
31
  s.add_development_dependency "coveralls", "~> 0.8"
32
- s.add_development_dependency "bundler", "~> 1.11"
33
- s.add_development_dependency "rake", "~> 11.1"
34
- s.add_development_dependency "rake-compiler", "~> 0.9"
32
+ s.add_development_dependency "cucumber", "~> 4.1"
33
+ s.add_development_dependency "rake", "~> 13.0"
34
+ s.add_development_dependency "rake-compiler", "~> 1.1"
35
+ s.add_development_dependency "rspec", "~> 3.9"
36
+ s.add_development_dependency "rubocop", "~> 0.88"
37
+ s.add_development_dependency "ruby-prof", "~> 1.4"
38
+ s.add_development_dependency "shoulda", "~> 4.0"
39
+ s.add_development_dependency "solargraph", "~> 0.39"
35
40
  end
@@ -4,14 +4,14 @@ VALUE DamerauLevenshteinBinding = Qnil;
4
4
 
5
5
  void Init_damerau_levenshtein();
6
6
 
7
- VALUE method_distance_utf(VALUE self, VALUE _s, VALUE _t, VALUE _block_size, VALUE _max_distance);
7
+ VALUE method_internal_distance(VALUE self, VALUE _s, VALUE _t, VALUE _block_size, VALUE _max_distance);
8
8
 
9
9
  void Init_damerau_levenshtein() {
10
10
  DamerauLevenshteinBinding = rb_define_module("DamerauLevenshteinBinding");
11
- rb_define_method(DamerauLevenshteinBinding, "distance_utf", method_distance_utf, 4);
11
+ rb_define_method(DamerauLevenshteinBinding, "internal_distance", method_internal_distance, 4);
12
12
  }
13
13
 
14
- VALUE method_distance_utf(VALUE self, VALUE _s, VALUE _t, VALUE _block_size, VALUE _max_distance){
14
+ VALUE method_internal_distance(VALUE self, VALUE _s, VALUE _t, VALUE _block_size, VALUE _max_distance){
15
15
  VALUE *sv = RARRAY_PTR(_s);
16
16
  VALUE *tv = RARRAY_PTR(_t);
17
17
  int i, i1, j, j1, k, half_tl, cost, *d, distance, del, ins, subs, transp, block;
@@ -24,8 +24,8 @@ VALUE method_distance_utf(VALUE self, VALUE _s, VALUE _t, VALUE _block_size, VAL
24
24
  int max_distance = NUM2INT(_max_distance);
25
25
  int sl = (int) RARRAY_LEN(_s);
26
26
  int tl = (int) RARRAY_LEN(_t);
27
- int s[sl];
28
- int t[tl];
27
+ long long s[sl];
28
+ long long t[tl];
29
29
 
30
30
  if (block_size == 0) {
31
31
  pure_levenshtein = 1;
@@ -39,8 +39,8 @@ VALUE method_distance_utf(VALUE self, VALUE _s, VALUE _t, VALUE _block_size, VAL
39
39
  if (sl == 1 && tl == 1 && sv[0] != tv[0]) return INT2NUM(1);
40
40
 
41
41
 
42
- for (i=0; i < sl; i++) s[i] = NUM2INT(sv[i]);
43
- for (i=0; i < tl; i++) t[i] = NUM2INT(tv[i]);
42
+ for (i=0; i < sl; i++) s[i] = NUM2LL(sv[i]);
43
+ for (i=0; i < tl; i++) t[i] = NUM2LL(tv[i]);
44
44
 
45
45
  sl++;
46
46
  tl++;
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  # Loads mkmf which is used to make makefiles for Ruby extensions
2
4
  require "mkmf"
3
5
 
@@ -1,7 +1,9 @@
1
- # encoding: UTF-8
1
+ # frozen_string_literal: true
2
2
 
3
- require "damerau-levenshtein/version"
4
- require "damerau-levenshtein/damerau_levenshtein"
3
+ require_relative "damerau-levenshtein/version"
4
+ require_relative "damerau-levenshtein/damerau_levenshtein"
5
+ require_relative "damerau-levenshtein/formatter"
6
+ require_relative "damerau-levenshtein/differ"
5
7
 
6
8
  # Damerau-Levenshtein algorithm
7
9
  module DamerauLevenshtein
@@ -12,6 +14,23 @@ module DamerauLevenshtein
12
14
  end
13
15
 
14
16
  def self.distance(str1, str2, block_size = 1, max_distance = 10)
15
- distance_utf(str1.unpack("U*"), str2.unpack("U*"), block_size, max_distance)
17
+ internal_distance(
18
+ str1.unpack("U*"), str2.unpack("U*"),
19
+ block_size, max_distance
20
+ )
21
+ end
22
+
23
+ def self.string_distance(*args)
24
+ distance(*args)
25
+ end
26
+
27
+ def self.array_distance(array1, array2, block_size = 1, max_distance = 10)
28
+ internal_distance(array1, array2, block_size, max_distance)
29
+ end
30
+
31
+ # keep backward compatibility - internal_distance was called distance_utf
32
+ # before
33
+ def self.distance_utf(*args)
34
+ internal_distance(*args)
16
35
  end
17
36
  end
@@ -0,0 +1,107 @@
1
+ # frozen_string_literal: true
2
+
3
+ module DamerauLevenshtein
4
+ # Shows the difference between two strings in character by character
5
+ # resolution
6
+ class Differ
7
+ FORMATS = %i[raw tag].freeze
8
+ attr_reader :format
9
+
10
+ def initialize
11
+ @format = :tag
12
+ @matrix = []
13
+ end
14
+
15
+ def format=(new_format)
16
+ new_format = new_format.to_sym
17
+ @format = new_format if FORMATS.include?(new_format)
18
+ end
19
+
20
+ def run(str1, str2)
21
+ @len1 = str1.size.freeze
22
+ @len2 = str2.size.freeze
23
+ prepare_matrix
24
+ edit_distance(str1, str2)
25
+ raw = trace_back
26
+ formatter_factory.show(raw, str1, str2)
27
+ end
28
+
29
+ private
30
+
31
+ def formatter_factory
32
+ formatter =
33
+ case @format
34
+ when :tag
35
+ DamerauLevenshtein::FormatterTag
36
+ when :raw
37
+ DamerauLevenshtein::FormatterRaw
38
+ end
39
+ Formatter.new(formatter)
40
+ end
41
+
42
+ def edit_distance(str1, str2)
43
+ (1..@len2).each do |i|
44
+ (1..@len1).each do |j|
45
+ no_change(i, j) && next if str2[i - 1] == str1[j - 1]
46
+ @matrix[i][j] = [del(i, j), ins(i, j), subst(i, j)].min + 1
47
+ end
48
+ end
49
+ end
50
+
51
+ def trace_back
52
+ res = []
53
+ cell = [@len2, @len1]
54
+ while cell != [0, 0]
55
+ cell, char = char_data(cell)
56
+ res.unshift char
57
+ end
58
+ res
59
+ end
60
+
61
+ def char_data(cell)
62
+ char = { distance: @matrix[cell[0]][cell[1]] }
63
+ val = find_previous(cell)
64
+ previous_value = val[0][0]
65
+ char[:type] = previous_value == char[:distance] ? :same : val[1]
66
+ cell = val.pop
67
+ [cell, char]
68
+ end
69
+
70
+ def find_previous(cell)
71
+ candidates = [[[ins(*cell), 1], :ins, [cell[0], cell[1] - 1]],
72
+ [[del(*cell), 2], :del, [cell[0] - 1, cell[1]]],
73
+ [[subst(*cell), 0], :subst, [cell[0] - 1, cell[1] - 1]]]
74
+ select_cell(candidates)
75
+ end
76
+
77
+ def select_cell(candidates)
78
+ candidates.select { |e| e[-1][0] >= 0 && e[-1][1] >= 0 }.
79
+ sort_by(&:first).first
80
+ end
81
+
82
+ def del(i, j)
83
+ @matrix[i - 1][j]
84
+ end
85
+
86
+ def ins(i, j)
87
+ @matrix[i][j - 1]
88
+ end
89
+
90
+ def subst(i, j)
91
+ @matrix[i - 1][j - 1]
92
+ end
93
+
94
+ def no_change(i, j)
95
+ @matrix[i][j] = @matrix[i - 1][j - 1]
96
+ end
97
+
98
+ def prepare_matrix
99
+ @matrix = []
100
+ @matrix << (0..@len1).to_a
101
+ @len2.times do |i|
102
+ ary = [i + 1] + (1..@len1).map { nil }
103
+ @matrix << ary
104
+ end
105
+ end
106
+ end
107
+ end
@@ -0,0 +1,91 @@
1
+ # frozen_string_literal: true
2
+
3
+ module DamerauLevenshtein
4
+ # Formats supplied strings according to their differences
5
+ class Formatter
6
+ def initialize(formatter)
7
+ @formatter = formatter
8
+ end
9
+
10
+ def show(raw_format, str1, str2)
11
+ @formatter.show(raw_format, str1, str2)
12
+ end
13
+ end
14
+
15
+ # Outputs raw format for two strings
16
+ module FormatterRaw
17
+ def self.show(raw_format, _, _)
18
+ raw_format
19
+ end
20
+ end
21
+
22
+ # Outputs strings marked with tags
23
+ module FormatterTag
24
+ class << self
25
+ def show(raw_format, str1, str2)
26
+ inverted_raw_format = raw_format.map do |e|
27
+ type = invert_type(e[:type])
28
+ { distance: e[:distance], type: type }
29
+ end
30
+ [show_string(raw_format, str1, str2),
31
+ show_string(inverted_raw_format, str2, str1)]
32
+ end
33
+
34
+ private
35
+
36
+ def invert_type(type)
37
+ case type
38
+ when :del
39
+ :ins
40
+ when :ins
41
+ :del
42
+ else
43
+ type
44
+ end
45
+ end
46
+
47
+ def show_string(raw, str1, str2)
48
+ data = { res: [], type: nil, deletes: 0, inserts: 0,
49
+ str1: str1, str2: str2 }
50
+ raw.each_with_index do |e, i|
51
+ process_entry(e, i, data)
52
+ end
53
+ data[:res] << format("</%<type>s>", data) if data[:type] != :same
54
+ data[:res].join("")
55
+ end
56
+
57
+ def process_entry(e, i, data)
58
+ if data[:type] && e[:type] != data[:type]
59
+ insert_tags(e, data)
60
+ elsif data[:type].nil?
61
+ data[:res] << format("<%<type>s>", e) if e[:type] != :same
62
+ end
63
+ insert_letter(e, i, data)
64
+ end
65
+
66
+ def insert_tags(entry, data)
67
+ data[:res] << format("</%<type>s>", data) if data[:type] != :same
68
+ data[:res] << format("<%<type>s>", entry) if entry[:type] != :same
69
+ end
70
+
71
+ def insert_letter(entry, index, data)
72
+ if entry[:type] == :del
73
+ insert_del(index, data)
74
+ else
75
+ insert_others(index, data)
76
+ end
77
+ data[:inserts] += 1 if entry[:type] == :ins
78
+ data[:type] = entry[:type]
79
+ end
80
+
81
+ def insert_del(i, data)
82
+ data[:res] << data[:str2][i - data[:inserts]]
83
+ data[:deletes] += 1
84
+ end
85
+
86
+ def insert_others(i, data)
87
+ data[:res] << data[:str1][i - data[:deletes]]
88
+ end
89
+ end
90
+ end
91
+ end
@@ -1,4 +1,6 @@
1
+ # frozen_string_literal: true
2
+
1
3
  # Damerau Levenshtein algorithm
2
4
  module DamerauLevenshtein
3
- VERSION = "1.1.3".freeze
5
+ VERSION = "1.3.3"
4
6
  end
metadata CHANGED
@@ -1,158 +1,186 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: damerau-levenshtein
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.1.3
4
+ version: 1.3.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Dmitry Mozzherin
8
- autorequire:
8
+ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-07-05 00:00:00.000000000 Z
11
+ date: 2020-07-13 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
- name: rspec
14
+ name: activesupport
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
17
  - - "~>"
18
18
  - !ruby/object:Gem::Version
19
- version: '3.4'
19
+ version: '6.0'
20
20
  type: :development
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
24
  - - "~>"
25
25
  - !ruby/object:Gem::Version
26
- version: '3.4'
26
+ version: '6.0'
27
27
  - !ruby/object:Gem::Dependency
28
- name: activesupport
28
+ name: bundler
29
29
  requirement: !ruby/object:Gem::Requirement
30
30
  requirements:
31
31
  - - "~>"
32
32
  - !ruby/object:Gem::Version
33
- version: '4.2'
33
+ version: '2.1'
34
34
  type: :development
35
35
  prerelease: false
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
38
  - - "~>"
39
39
  - !ruby/object:Gem::Version
40
- version: '4.2'
40
+ version: '2.1'
41
+ - !ruby/object:Gem::Dependency
42
+ name: byebug
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '11.0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '11.0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: coveralls
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - "~>"
60
+ - !ruby/object:Gem::Version
61
+ version: '0.8'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - "~>"
67
+ - !ruby/object:Gem::Version
68
+ version: '0.8'
41
69
  - !ruby/object:Gem::Dependency
42
70
  name: cucumber
43
71
  requirement: !ruby/object:Gem::Requirement
44
72
  requirements:
45
73
  - - "~>"
46
74
  - !ruby/object:Gem::Version
47
- version: '2.3'
75
+ version: '4.1'
48
76
  type: :development
49
77
  prerelease: false
50
78
  version_requirements: !ruby/object:Gem::Requirement
51
79
  requirements:
52
80
  - - "~>"
53
81
  - !ruby/object:Gem::Version
54
- version: '2.3'
82
+ version: '4.1'
55
83
  - !ruby/object:Gem::Dependency
56
- name: ruby-prof
84
+ name: rake
57
85
  requirement: !ruby/object:Gem::Requirement
58
86
  requirements:
59
87
  - - "~>"
60
88
  - !ruby/object:Gem::Version
61
- version: '0.15'
89
+ version: '13.0'
62
90
  type: :development
63
91
  prerelease: false
64
92
  version_requirements: !ruby/object:Gem::Requirement
65
93
  requirements:
66
94
  - - "~>"
67
95
  - !ruby/object:Gem::Version
68
- version: '0.15'
96
+ version: '13.0'
69
97
  - !ruby/object:Gem::Dependency
70
- name: shoulda
98
+ name: rake-compiler
71
99
  requirement: !ruby/object:Gem::Requirement
72
100
  requirements:
73
101
  - - "~>"
74
102
  - !ruby/object:Gem::Version
75
- version: '3.5'
103
+ version: '1.1'
76
104
  type: :development
77
105
  prerelease: false
78
106
  version_requirements: !ruby/object:Gem::Requirement
79
107
  requirements:
80
108
  - - "~>"
81
109
  - !ruby/object:Gem::Version
82
- version: '3.5'
110
+ version: '1.1'
83
111
  - !ruby/object:Gem::Dependency
84
- name: rubocop
112
+ name: rspec
85
113
  requirement: !ruby/object:Gem::Requirement
86
114
  requirements:
87
115
  - - "~>"
88
116
  - !ruby/object:Gem::Version
89
- version: '0.38'
117
+ version: '3.9'
90
118
  type: :development
91
119
  prerelease: false
92
120
  version_requirements: !ruby/object:Gem::Requirement
93
121
  requirements:
94
122
  - - "~>"
95
123
  - !ruby/object:Gem::Version
96
- version: '0.38'
124
+ version: '3.9'
97
125
  - !ruby/object:Gem::Dependency
98
- name: coveralls
126
+ name: rubocop
99
127
  requirement: !ruby/object:Gem::Requirement
100
128
  requirements:
101
129
  - - "~>"
102
130
  - !ruby/object:Gem::Version
103
- version: '0.8'
131
+ version: '0.88'
104
132
  type: :development
105
133
  prerelease: false
106
134
  version_requirements: !ruby/object:Gem::Requirement
107
135
  requirements:
108
136
  - - "~>"
109
137
  - !ruby/object:Gem::Version
110
- version: '0.8'
138
+ version: '0.88'
111
139
  - !ruby/object:Gem::Dependency
112
- name: bundler
140
+ name: ruby-prof
113
141
  requirement: !ruby/object:Gem::Requirement
114
142
  requirements:
115
143
  - - "~>"
116
144
  - !ruby/object:Gem::Version
117
- version: '1.11'
145
+ version: '1.4'
118
146
  type: :development
119
147
  prerelease: false
120
148
  version_requirements: !ruby/object:Gem::Requirement
121
149
  requirements:
122
150
  - - "~>"
123
151
  - !ruby/object:Gem::Version
124
- version: '1.11'
152
+ version: '1.4'
125
153
  - !ruby/object:Gem::Dependency
126
- name: rake
154
+ name: shoulda
127
155
  requirement: !ruby/object:Gem::Requirement
128
156
  requirements:
129
157
  - - "~>"
130
158
  - !ruby/object:Gem::Version
131
- version: '11.1'
159
+ version: '4.0'
132
160
  type: :development
133
161
  prerelease: false
134
162
  version_requirements: !ruby/object:Gem::Requirement
135
163
  requirements:
136
164
  - - "~>"
137
165
  - !ruby/object:Gem::Version
138
- version: '11.1'
166
+ version: '4.0'
139
167
  - !ruby/object:Gem::Dependency
140
- name: rake-compiler
168
+ name: solargraph
141
169
  requirement: !ruby/object:Gem::Requirement
142
170
  requirements:
143
171
  - - "~>"
144
172
  - !ruby/object:Gem::Version
145
- version: '0.9'
173
+ version: '0.39'
146
174
  type: :development
147
175
  prerelease: false
148
176
  version_requirements: !ruby/object:Gem::Requirement
149
177
  requirements:
150
178
  - - "~>"
151
179
  - !ruby/object:Gem::Version
152
- version: '0.9'
180
+ version: '0.39'
153
181
  description: This gem implements pure Levenshtein algorithm, Damerau modification
154
182
  (where 2 character transposition counts as 1 edit distance). It also includes Boehmer
155
- & Rees 2008 modification, to handle transposition in bloks with more than 2 characters
183
+ & Rees 2008 modification, to handle transposition in blocks with more than 2 characters
156
184
  (Boehmer & Rees 2008).
157
185
  email: dmozzherin@gmail.com
158
186
  executables: []
@@ -176,12 +204,14 @@ files:
176
204
  - ext/damerau_levenshtein/extconf.rb
177
205
  - lib/damerau-levenshtein.rb
178
206
  - lib/damerau-levenshtein/damerau_levenshtein.so
207
+ - lib/damerau-levenshtein/differ.rb
208
+ - lib/damerau-levenshtein/formatter.rb
179
209
  - lib/damerau-levenshtein/version.rb
180
210
  homepage: https://github.com/GlobalNamesArchitecture/damerau-levenshtein
181
211
  licenses:
182
212
  - MIT
183
213
  metadata: {}
184
- post_install_message:
214
+ post_install_message:
185
215
  rdoc_options: []
186
216
  require_paths:
187
217
  - lib
@@ -190,16 +220,16 @@ required_ruby_version: !ruby/object:Gem::Requirement
190
220
  requirements:
191
221
  - - ">="
192
222
  - !ruby/object:Gem::Version
193
- version: '0'
223
+ version: '2.5'
194
224
  required_rubygems_version: !ruby/object:Gem::Requirement
195
225
  requirements:
196
226
  - - ">="
197
227
  - !ruby/object:Gem::Version
198
228
  version: '0'
199
229
  requirements: []
200
- rubyforge_project:
201
- rubygems_version: 2.4.5.1
202
- signing_key:
230
+ rubyforge_project:
231
+ rubygems_version: 2.7.6.2
232
+ signing_key:
203
233
  specification_version: 4
204
234
  summary: Calculation of editing distance for 2 strings using Levenshtein or Damerau-Levenshtein
205
235
  algorithms