levenshtein-c 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/ext/levenshtein/extconf.rb +5 -0
- data/ext/levenshtein/levenshtein.c +54 -0
- metadata +63 -0
@@ -0,0 +1,54 @@
|
|
1
|
+
|
2
|
+
#include <ruby.h>
|
3
|
+
#include <wchar.h>
|
4
|
+
#include <ruby/encoding.h>
|
5
|
+
|
6
|
+
#define MIN(x ,y) ({\
|
7
|
+
__typeof__ (x) _x = (x);\
|
8
|
+
__typeof__ (y) _y = (y);\
|
9
|
+
_x < _y ? _x : _y; })
|
10
|
+
|
11
|
+
VALUE Levenshtein = Qnil;
|
12
|
+
|
13
|
+
void Init_levenshtein();
|
14
|
+
VALUE method_levenshtein_distance(VALUE self, VALUE word1, VALUE word2);
|
15
|
+
|
16
|
+
void Init_levenshtein() {
|
17
|
+
Levenshtein = rb_define_module("Levenshtein");
|
18
|
+
rb_define_singleton_method(Levenshtein, "distance", method_levenshtein_distance, 2);
|
19
|
+
}
|
20
|
+
|
21
|
+
VALUE method_levenshtein_distance(VALUE self, VALUE word1, VALUE word2) {
|
22
|
+
rb_encoding *enc = rb_enc_find("UTF-32BE");
|
23
|
+
VALUE w1_enc = rb_str_export_to_enc(word1, enc), \
|
24
|
+
w2_enc = rb_str_export_to_enc(word2, enc);
|
25
|
+
|
26
|
+
wchar_t *w1 = RSTRING_PTR(w1_enc), *w2 = RSTRING_PTR(w2_enc);
|
27
|
+
size_t l1 = wcslen(w1), l2 = wcslen(w2);
|
28
|
+
|
29
|
+
unsigned int j, cost, cur, i = 0, next = 0;
|
30
|
+
unsigned int *d = malloc((l2 + 1) * 4);
|
31
|
+
|
32
|
+
while (l1 > 0 && l2 > 0 && w1[0] == w2[0])
|
33
|
+
w1++, w2++, l1--, l2--;
|
34
|
+
|
35
|
+
if (l1 == 0 || l2 == 0)
|
36
|
+
return l1 == 0 ? UINT2NUM(l2) : UINT2NUM(l1);
|
37
|
+
|
38
|
+
for (j = 0; j < l2+1; j++)
|
39
|
+
d[j] = j;
|
40
|
+
|
41
|
+
while(i < l1) {
|
42
|
+
cur = i + 1;
|
43
|
+
for (j = 0; j < l2; j++) {
|
44
|
+
cost = !((w1[i] == w2[j]) || (i && j && (w1[i-1] == w2[j]) && (w1[i] == w2[j-1])));
|
45
|
+
next = MIN(MIN(d[j+1] + 1, cur+ 1), d[j] + cost);
|
46
|
+
d[j] = cur;
|
47
|
+
cur = next;
|
48
|
+
}
|
49
|
+
d[l2] = next;
|
50
|
+
i++;
|
51
|
+
}
|
52
|
+
free(d);
|
53
|
+
return UINT2NUM(next);
|
54
|
+
}
|
metadata
ADDED
@@ -0,0 +1,63 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: levenshtein-c
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Ali Abbas
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2012-10-13 00:00:00.000000000 Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: rake-compiler
|
16
|
+
requirement: !ruby/object:Gem::Requirement
|
17
|
+
none: false
|
18
|
+
requirements:
|
19
|
+
- - ! '>='
|
20
|
+
- !ruby/object:Gem::Version
|
21
|
+
version: '0'
|
22
|
+
type: :development
|
23
|
+
prerelease: false
|
24
|
+
version_requirements: !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
26
|
+
requirements:
|
27
|
+
- - ! '>='
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
version: '0'
|
30
|
+
description: Levenshtein ruby lib with unicode support
|
31
|
+
email: ali@alouche.net
|
32
|
+
executables: []
|
33
|
+
extensions:
|
34
|
+
- ext/levenshtein/extconf.rb
|
35
|
+
extra_rdoc_files: []
|
36
|
+
files:
|
37
|
+
- ext/levenshtein/levenshtein.c
|
38
|
+
- ext/levenshtein/extconf.rb
|
39
|
+
homepage: https://github.com/alouche/levenshtein-c
|
40
|
+
licenses: []
|
41
|
+
post_install_message:
|
42
|
+
rdoc_options: []
|
43
|
+
require_paths:
|
44
|
+
- lib
|
45
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
46
|
+
none: false
|
47
|
+
requirements:
|
48
|
+
- - ! '>='
|
49
|
+
- !ruby/object:Gem::Version
|
50
|
+
version: '0'
|
51
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
52
|
+
none: false
|
53
|
+
requirements:
|
54
|
+
- - ! '>='
|
55
|
+
- !ruby/object:Gem::Version
|
56
|
+
version: '0'
|
57
|
+
requirements: []
|
58
|
+
rubyforge_project:
|
59
|
+
rubygems_version: 1.8.23
|
60
|
+
signing_key:
|
61
|
+
specification_version: 3
|
62
|
+
summary: Simple Ruby C Levenshtein algorithm lib
|
63
|
+
test_files: []
|