levenshtein-c 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,5 @@
1
+
2
+ require 'mkmf'
3
+ extension_name = 'levenshtein'
4
+ dir_config(extension_name)
5
+ create_makefile(extension_name)
@@ -0,0 +1,54 @@
1
+
2
+ #include <ruby.h>
3
+ #include <wchar.h>
4
+ #include <ruby/encoding.h>
5
+
6
+ #define MIN(x ,y) ({\
7
+ __typeof__ (x) _x = (x);\
8
+ __typeof__ (y) _y = (y);\
9
+ _x < _y ? _x : _y; })
10
+
11
+ VALUE Levenshtein = Qnil;
12
+
13
+ void Init_levenshtein();
14
+ VALUE method_levenshtein_distance(VALUE self, VALUE word1, VALUE word2);
15
+
16
+ void Init_levenshtein() {
17
+ Levenshtein = rb_define_module("Levenshtein");
18
+ rb_define_singleton_method(Levenshtein, "distance", method_levenshtein_distance, 2);
19
+ }
20
+
21
+ VALUE method_levenshtein_distance(VALUE self, VALUE word1, VALUE word2) {
22
+ rb_encoding *enc = rb_enc_find("UTF-32BE");
23
+ VALUE w1_enc = rb_str_export_to_enc(word1, enc), \
24
+ w2_enc = rb_str_export_to_enc(word2, enc);
25
+
26
+ wchar_t *w1 = RSTRING_PTR(w1_enc), *w2 = RSTRING_PTR(w2_enc);
27
+ size_t l1 = wcslen(w1), l2 = wcslen(w2);
28
+
29
+ unsigned int j, cost, cur, i = 0, next = 0;
30
+ unsigned int *d = malloc((l2 + 1) * 4);
31
+
32
+ while (l1 > 0 && l2 > 0 && w1[0] == w2[0])
33
+ w1++, w2++, l1--, l2--;
34
+
35
+ if (l1 == 0 || l2 == 0)
36
+ return l1 == 0 ? UINT2NUM(l2) : UINT2NUM(l1);
37
+
38
+ for (j = 0; j < l2+1; j++)
39
+ d[j] = j;
40
+
41
+ while(i < l1) {
42
+ cur = i + 1;
43
+ for (j = 0; j < l2; j++) {
44
+ cost = !((w1[i] == w2[j]) || (i && j && (w1[i-1] == w2[j]) && (w1[i] == w2[j-1])));
45
+ next = MIN(MIN(d[j+1] + 1, cur+ 1), d[j] + cost);
46
+ d[j] = cur;
47
+ cur = next;
48
+ }
49
+ d[l2] = next;
50
+ i++;
51
+ }
52
+ free(d);
53
+ return UINT2NUM(next);
54
+ }
metadata ADDED
@@ -0,0 +1,63 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: levenshtein-c
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Ali Abbas
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2012-10-13 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: rake-compiler
16
+ requirement: !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: '0'
22
+ type: :development
23
+ prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ! '>='
28
+ - !ruby/object:Gem::Version
29
+ version: '0'
30
+ description: Levenshtein ruby lib with unicode support
31
+ email: ali@alouche.net
32
+ executables: []
33
+ extensions:
34
+ - ext/levenshtein/extconf.rb
35
+ extra_rdoc_files: []
36
+ files:
37
+ - ext/levenshtein/levenshtein.c
38
+ - ext/levenshtein/extconf.rb
39
+ homepage: https://github.com/alouche/levenshtein-c
40
+ licenses: []
41
+ post_install_message:
42
+ rdoc_options: []
43
+ require_paths:
44
+ - lib
45
+ required_ruby_version: !ruby/object:Gem::Requirement
46
+ none: false
47
+ requirements:
48
+ - - ! '>='
49
+ - !ruby/object:Gem::Version
50
+ version: '0'
51
+ required_rubygems_version: !ruby/object:Gem::Requirement
52
+ none: false
53
+ requirements:
54
+ - - ! '>='
55
+ - !ruby/object:Gem::Version
56
+ version: '0'
57
+ requirements: []
58
+ rubyforge_project:
59
+ rubygems_version: 1.8.23
60
+ signing_key:
61
+ specification_version: 3
62
+ summary: Simple Ruby C Levenshtein algorithm lib
63
+ test_files: []