levenshtein_vvvvvv 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +16 -0
- data/.rspec +3 -0
- data/.ruby-version +1 -0
- data/.travis.yml +7 -0
- data/CODE_OF_CONDUCT.md +74 -0
- data/Gemfile +6 -0
- data/Gemfile.lock +38 -0
- data/LICENSE.txt +21 -0
- data/README.md +49 -0
- data/Rakefile +21 -0
- data/bin/console +14 -0
- data/bin/setup +8 -0
- data/ext/levenshtein_vvvvvv/extconf.rb +3 -0
- data/ext/levenshtein_vvvvvv/levenshtein.c +12428 -0
- data/ext/levenshtein_vvvvvv/levenshtein.v +58 -0
- data/ext/levenshtein_vvvvvv/levenshtein_vvvvvv.c +46 -0
- data/ext/levenshtein_vvvvvv/levenshtein_vvvvvv.h +6 -0
- data/levenshtein_vvvvvv.gemspec +27 -0
- data/lib/levenshtein_vvvvvv/version.rb +3 -0
- data/lib/levenshtein_vvvvvv.rb +7 -0
- metadata +102 -0
@@ -0,0 +1,58 @@
|
|
1
|
+
module levenshtein
|
2
|
+
|
3
|
+
@[inline]
|
4
|
+
fn min(a int, b int, c int) int {
|
5
|
+
return if a < b {
|
6
|
+
if a < c { a } else { c }
|
7
|
+
} else {
|
8
|
+
if b < c { b } else { c }
|
9
|
+
}
|
10
|
+
}
|
11
|
+
|
12
|
+
pub fn distance(s1 string, s2 string) int {
|
13
|
+
r1 := s1.runes()
|
14
|
+
r2 := s2.runes()
|
15
|
+
|
16
|
+
// Allocate two working buffers: one for the previously calculated
|
17
|
+
// distances, and one for the calculation currently in progress.
|
18
|
+
mut v0 := []int{len: r2.len + 1}
|
19
|
+
mut v1 := []int{len: r2.len + 1}
|
20
|
+
|
21
|
+
// The buffers will swap roles after every iteration of the outer loop.
|
22
|
+
// mut distances := &v0
|
23
|
+
//mut workspace := &v1
|
24
|
+
mut toggle := true
|
25
|
+
|
26
|
+
// The initial value of the "previous" row is the distance of each prefix of
|
27
|
+
// s2 from the empty string. This ends up just being the length of each
|
28
|
+
// prefix.
|
29
|
+
for j in 0 .. r2.len + 1 {
|
30
|
+
v0[j] = j
|
31
|
+
}
|
32
|
+
|
33
|
+
for i, c1 in r1 {
|
34
|
+
distances := if toggle { &v0 } else { &v1 }
|
35
|
+
workspace := if toggle { &v1 } else { &v0 }
|
36
|
+
|
37
|
+
toggle = !toggle
|
38
|
+
(*workspace)[0] = i + 1
|
39
|
+
|
40
|
+
for j, c2 in r2 {
|
41
|
+
deletion_cost := (*distances)[j + 1] + 1
|
42
|
+
insertion_cost := (*workspace)[j] + 1
|
43
|
+
substitution_cost :=
|
44
|
+
(*distances)[j] +
|
45
|
+
(if c1 == c2 { 0 } else { 1 })
|
46
|
+
|
47
|
+
(*workspace)[j + 1] = min(
|
48
|
+
deletion_cost,
|
49
|
+
insertion_cost,
|
50
|
+
substitution_cost
|
51
|
+
)
|
52
|
+
}
|
53
|
+
}
|
54
|
+
|
55
|
+
final := if toggle { &v0 } else { &v1 }
|
56
|
+
|
57
|
+
return (*final).last()
|
58
|
+
}
|
@@ -0,0 +1,46 @@
|
|
1
|
+
#include "levenshtein_vvvvvv.h"
|
2
|
+
|
3
|
+
typedef struct vstring {
|
4
|
+
uint8_t* str;
|
5
|
+
int len;
|
6
|
+
int is_lit;
|
7
|
+
} vstring;
|
8
|
+
|
9
|
+
extern int levenshtein__distance(
|
10
|
+
vstring s1,
|
11
|
+
vstring s2
|
12
|
+
);
|
13
|
+
|
14
|
+
static inline vstring mk_vstring(VALUE s) {
|
15
|
+
const char* p = StringValuePtr(s);
|
16
|
+
|
17
|
+
vstring v = {
|
18
|
+
(unsigned char*)p,
|
19
|
+
RSTRING_LEN(s),
|
20
|
+
1
|
21
|
+
};
|
22
|
+
|
23
|
+
return v;
|
24
|
+
}
|
25
|
+
|
26
|
+
VALUE rb_mLevenshteinVvvvvv = Qnil;
|
27
|
+
|
28
|
+
VALUE distance(VALUE _self, VALUE s1, VALUE s2) {
|
29
|
+
vstring v1 = mk_vstring(s1);
|
30
|
+
vstring v2 = mk_vstring(s2);
|
31
|
+
|
32
|
+
int result = levenshtein__distance(v1, v2);
|
33
|
+
|
34
|
+
return INT2NUM(result);
|
35
|
+
}
|
36
|
+
|
37
|
+
void Init_levenshtein_vvvvvv(void) {
|
38
|
+
rb_mLevenshteinVvvvvv = rb_define_module("LevenshteinVvvvvv");
|
39
|
+
|
40
|
+
rb_define_module_function(
|
41
|
+
rb_mLevenshteinVvvvvv,
|
42
|
+
"distance",
|
43
|
+
distance,
|
44
|
+
2
|
45
|
+
);
|
46
|
+
}
|
@@ -0,0 +1,27 @@
|
|
1
|
+
|
2
|
+
lib = File.expand_path("../lib", __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require "levenshtein_vvvvvv/version"
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = "levenshtein_vvvvvv"
|
8
|
+
spec.version = LevenshteinVvvvvv::VERSION
|
9
|
+
spec.authors = ["Erin Paget"]
|
10
|
+
spec.email = ["erin.paget@clio.com"]
|
11
|
+
|
12
|
+
spec.summary = %q{Levenshtein string distance written in V}
|
13
|
+
spec.homepage = "https://github.com/undees/levenshtein_vvvvvv"
|
14
|
+
spec.license = "MIT"
|
15
|
+
|
16
|
+
spec.files = Dir.chdir(File.expand_path('..', __FILE__)) do
|
17
|
+
`git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
|
18
|
+
end
|
19
|
+
spec.bindir = "exe"
|
20
|
+
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
21
|
+
spec.require_paths = ["lib"]
|
22
|
+
spec.extensions = ["ext/levenshtein_vvvvvv/extconf.rb"]
|
23
|
+
|
24
|
+
spec.add_development_dependency "rake", "~> 13.0"
|
25
|
+
spec.add_development_dependency "rake-compiler"
|
26
|
+
spec.add_development_dependency "rspec", "~> 3.0"
|
27
|
+
end
|
metadata
ADDED
@@ -0,0 +1,102 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: levenshtein_vvvvvv
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Erin Paget
|
8
|
+
bindir: exe
|
9
|
+
cert_chain: []
|
10
|
+
date: 1980-01-02 00:00:00.000000000 Z
|
11
|
+
dependencies:
|
12
|
+
- !ruby/object:Gem::Dependency
|
13
|
+
name: rake
|
14
|
+
requirement: !ruby/object:Gem::Requirement
|
15
|
+
requirements:
|
16
|
+
- - "~>"
|
17
|
+
- !ruby/object:Gem::Version
|
18
|
+
version: '13.0'
|
19
|
+
type: :development
|
20
|
+
prerelease: false
|
21
|
+
version_requirements: !ruby/object:Gem::Requirement
|
22
|
+
requirements:
|
23
|
+
- - "~>"
|
24
|
+
- !ruby/object:Gem::Version
|
25
|
+
version: '13.0'
|
26
|
+
- !ruby/object:Gem::Dependency
|
27
|
+
name: rake-compiler
|
28
|
+
requirement: !ruby/object:Gem::Requirement
|
29
|
+
requirements:
|
30
|
+
- - ">="
|
31
|
+
- !ruby/object:Gem::Version
|
32
|
+
version: '0'
|
33
|
+
type: :development
|
34
|
+
prerelease: false
|
35
|
+
version_requirements: !ruby/object:Gem::Requirement
|
36
|
+
requirements:
|
37
|
+
- - ">="
|
38
|
+
- !ruby/object:Gem::Version
|
39
|
+
version: '0'
|
40
|
+
- !ruby/object:Gem::Dependency
|
41
|
+
name: rspec
|
42
|
+
requirement: !ruby/object:Gem::Requirement
|
43
|
+
requirements:
|
44
|
+
- - "~>"
|
45
|
+
- !ruby/object:Gem::Version
|
46
|
+
version: '3.0'
|
47
|
+
type: :development
|
48
|
+
prerelease: false
|
49
|
+
version_requirements: !ruby/object:Gem::Requirement
|
50
|
+
requirements:
|
51
|
+
- - "~>"
|
52
|
+
- !ruby/object:Gem::Version
|
53
|
+
version: '3.0'
|
54
|
+
email:
|
55
|
+
- erin.paget@clio.com
|
56
|
+
executables: []
|
57
|
+
extensions:
|
58
|
+
- ext/levenshtein_vvvvvv/extconf.rb
|
59
|
+
extra_rdoc_files: []
|
60
|
+
files:
|
61
|
+
- ".gitignore"
|
62
|
+
- ".rspec"
|
63
|
+
- ".ruby-version"
|
64
|
+
- ".travis.yml"
|
65
|
+
- CODE_OF_CONDUCT.md
|
66
|
+
- Gemfile
|
67
|
+
- Gemfile.lock
|
68
|
+
- LICENSE.txt
|
69
|
+
- README.md
|
70
|
+
- Rakefile
|
71
|
+
- bin/console
|
72
|
+
- bin/setup
|
73
|
+
- ext/levenshtein_vvvvvv/extconf.rb
|
74
|
+
- ext/levenshtein_vvvvvv/levenshtein.c
|
75
|
+
- ext/levenshtein_vvvvvv/levenshtein.v
|
76
|
+
- ext/levenshtein_vvvvvv/levenshtein_vvvvvv.c
|
77
|
+
- ext/levenshtein_vvvvvv/levenshtein_vvvvvv.h
|
78
|
+
- levenshtein_vvvvvv.gemspec
|
79
|
+
- lib/levenshtein_vvvvvv.rb
|
80
|
+
- lib/levenshtein_vvvvvv/version.rb
|
81
|
+
homepage: https://github.com/undees/levenshtein_vvvvvv
|
82
|
+
licenses:
|
83
|
+
- MIT
|
84
|
+
metadata: {}
|
85
|
+
rdoc_options: []
|
86
|
+
require_paths:
|
87
|
+
- lib
|
88
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
89
|
+
requirements:
|
90
|
+
- - ">="
|
91
|
+
- !ruby/object:Gem::Version
|
92
|
+
version: '0'
|
93
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
94
|
+
requirements:
|
95
|
+
- - ">="
|
96
|
+
- !ruby/object:Gem::Version
|
97
|
+
version: '0'
|
98
|
+
requirements: []
|
99
|
+
rubygems_version: 3.6.7
|
100
|
+
specification_version: 4
|
101
|
+
summary: Levenshtein string distance written in V
|
102
|
+
test_files: []
|