levenshtein_ruby 0.1.2 → 0.1.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +2 -0
- data/ext/levenshtein_ruby/.env +2 -0
- data/ext/levenshtein_ruby/distance +0 -0
- data/ext/levenshtein_ruby/distance.c +80 -0
- data/ext/levenshtein_ruby/distance.h +1 -0
- data/ext/levenshtein_ruby/input.txt +2 -0
- data/ext/levenshtein_ruby/levenshtein_ruby.c +3 -43
- data/ext/levenshtein_ruby/test/distance_unit.c +23 -0
- data/levenshtein_ruby-0.1.3.gem +0 -0
- data/lib/levenshtein_ruby.rb +2 -11
- data/lib/levenshtein_ruby/version.rb +1 -1
- metadata +10 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 3a8efe75bc5567183bb5e5da4d984cf6261357e0
|
4
|
+
data.tar.gz: d670a7c6b049833e2a39daabd687e0cb33a6c2f9
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a963b3fda4118b3311b6c1d06f3027927ebf941e6330cbf44f84a6f3cc2dbbb724045a1c4a1f2645f74f31e0f9f491bbd5499d208116699df8ff42a63c1f6630
|
7
|
+
data.tar.gz: 5e4850d3cbd97057b9f52642d8762d0a7b4804fac7c8fc263ca7ab17afe2d107ccf3ea6de08e3846f816f642f5ec68c5a92131d4569d9057afa2abddc160765d
|
data/.gitignore
CHANGED
Binary file
|
@@ -0,0 +1,80 @@
|
|
1
|
+
#include "distance.h"
|
2
|
+
#include <stdio.h>
|
3
|
+
#include <stdlib.h>
|
4
|
+
#include <string.h>
|
5
|
+
|
6
|
+
int* get_point(int** double_pointer, int x, int y)
|
7
|
+
{
|
8
|
+
return *(double_pointer + x) + y;
|
9
|
+
}
|
10
|
+
|
11
|
+
void set_point(int** double_pointer, int x, int y, int value)
|
12
|
+
{
|
13
|
+
*(*(double_pointer + x) + y) = value;
|
14
|
+
}
|
15
|
+
|
16
|
+
int distance(const char* word1, const char* word2)
|
17
|
+
{
|
18
|
+
int i;
|
19
|
+
int len1 = strlen(word1);
|
20
|
+
int len2 = strlen(word2);
|
21
|
+
int result;
|
22
|
+
int** matrix;
|
23
|
+
matrix = (int**)malloc((len1+1) * sizeof(int*));
|
24
|
+
|
25
|
+
for(i = 0; i <= len1; i++) {
|
26
|
+
*(matrix + i) = (int*)malloc((len2+1) * sizeof(int));
|
27
|
+
}
|
28
|
+
|
29
|
+
for (i = 0; i <= len1; i++) {
|
30
|
+
set_point(matrix, i, 0, i);
|
31
|
+
}
|
32
|
+
|
33
|
+
for (i = 0; i <= len2; i++) {
|
34
|
+
set_point(matrix, 0, i, i);
|
35
|
+
}
|
36
|
+
|
37
|
+
for (i = 1; i <= len1; i++) {
|
38
|
+
int j;
|
39
|
+
char c1;
|
40
|
+
c1 = word1[i-1];
|
41
|
+
for (j = 1; j <= len2; j++) {
|
42
|
+
char c2;
|
43
|
+
|
44
|
+
c2 = word2[j-1];
|
45
|
+
if (c1 == c2) {
|
46
|
+
set_point(matrix, i, j, *get_point(matrix, i-1, j-1));
|
47
|
+
}
|
48
|
+
else {
|
49
|
+
int delete;
|
50
|
+
int insert;
|
51
|
+
int substitute;
|
52
|
+
int minimum;
|
53
|
+
|
54
|
+
delete = *get_point(matrix, i-1, j) + 1;
|
55
|
+
insert = *get_point(matrix, i, j-1) + 1;
|
56
|
+
substitute = *get_point(matrix, i-1, j-1) + 1;
|
57
|
+
minimum = delete;
|
58
|
+
if (insert < minimum) {
|
59
|
+
minimum = insert;
|
60
|
+
}
|
61
|
+
if (substitute < minimum) {
|
62
|
+
minimum = substitute;
|
63
|
+
}
|
64
|
+
set_point(matrix, i, j, minimum);
|
65
|
+
}
|
66
|
+
}
|
67
|
+
}
|
68
|
+
result = *get_point(matrix, len1, len2);
|
69
|
+
free(matrix);
|
70
|
+
return result;
|
71
|
+
}
|
72
|
+
|
73
|
+
int main(){
|
74
|
+
char word1[256];
|
75
|
+
char word2[256];
|
76
|
+
scanf("%s", word1);
|
77
|
+
scanf("%s", word2);
|
78
|
+
printf("%d\n", distance(word1, word2));
|
79
|
+
}
|
80
|
+
|
@@ -0,0 +1 @@
|
|
1
|
+
int distance(const char* word1, const char* word2);
|
@@ -1,56 +1,16 @@
|
|
1
1
|
#include <stdio.h>
|
2
2
|
#include <ruby.h>
|
3
|
+
#include "distance.h"
|
3
4
|
|
4
5
|
VALUE cLevenshtein;
|
5
6
|
|
6
7
|
|
7
8
|
VALUE distance_func(VALUE self, VALUE s_word1, VALUE s_word2)
|
8
9
|
{
|
10
|
+
|
9
11
|
char* word1 = StringValuePtr(s_word1);
|
10
12
|
char* word2 = StringValuePtr(s_word2);
|
11
|
-
|
12
|
-
int len2 = strlen(word2);
|
13
|
-
int matrix[len1 + 1][len2 + 1];
|
14
|
-
int i;
|
15
|
-
for (i = 0; i <= len1; i++) {
|
16
|
-
matrix[i][0] = i;
|
17
|
-
}
|
18
|
-
for (i = 0; i <= len2; i++) {
|
19
|
-
matrix[0][i] = i;
|
20
|
-
}
|
21
|
-
for (i = 1; i <= len1; i++) {
|
22
|
-
int j;
|
23
|
-
char c1;
|
24
|
-
|
25
|
-
c1 = word1[i-1];
|
26
|
-
for (j = 1; j <= len2; j++) {
|
27
|
-
char c2;
|
28
|
-
|
29
|
-
c2 = word2[j-1];
|
30
|
-
if (c1 == c2) {
|
31
|
-
matrix[i][j] = matrix[i-1][j-1];
|
32
|
-
}
|
33
|
-
else {
|
34
|
-
int delete;
|
35
|
-
int insert;
|
36
|
-
int substitute;
|
37
|
-
int minimum;
|
38
|
-
|
39
|
-
delete = matrix[i-1][j] + 1;
|
40
|
-
insert = matrix[i][j-1] + 1;
|
41
|
-
substitute = matrix[i-1][j-1] + 1;
|
42
|
-
minimum = delete;
|
43
|
-
if (insert < minimum) {
|
44
|
-
minimum = insert;
|
45
|
-
}
|
46
|
-
if (substitute < minimum) {
|
47
|
-
minimum = substitute;
|
48
|
-
}
|
49
|
-
matrix[i][j] = minimum;
|
50
|
-
}
|
51
|
-
}
|
52
|
-
}
|
53
|
-
return INT2NUM(matrix[len1][len2]);
|
13
|
+
return INT2NUM(distance(word1, word2));
|
54
14
|
}
|
55
15
|
|
56
16
|
void Init_levenshtein_ruby()
|
@@ -0,0 +1,23 @@
|
|
1
|
+
#include <CUnit/CUnit.h>
|
2
|
+
#include <CUnit/Console.h>
|
3
|
+
#include "../distance.h"
|
4
|
+
|
5
|
+
void test_dist_001(void);
|
6
|
+
|
7
|
+
int main() {
|
8
|
+
CU_pSuite dist_suite;
|
9
|
+
|
10
|
+
CU_initialize_registry();
|
11
|
+
dist_suite = CU_add_suite("Distance", NULL, NULL);
|
12
|
+
CU_add_test(dist_suite, "test_001", test_dist_001);
|
13
|
+
CU_console_run_tests();
|
14
|
+
CU_cleanup_registry();
|
15
|
+
|
16
|
+
return(0);
|
17
|
+
}
|
18
|
+
|
19
|
+
void test_dist_001(void) {
|
20
|
+
|
21
|
+
CU_ASSERT(distance("ads", "asd") == 1);
|
22
|
+
}
|
23
|
+
|
Binary file
|
data/lib/levenshtein_ruby.rb
CHANGED
@@ -2,9 +2,8 @@ require "levenshtein_ruby/version"
|
|
2
2
|
require "levenshtein_ruby/levenshtein_ruby"
|
3
3
|
|
4
4
|
module Levenshtein
|
5
|
-
def self.normalized_distance(a1, a2
|
5
|
+
def self.normalized_distance(a1, a2)
|
6
6
|
size = [a1.size, a2.size].max
|
7
|
-
|
8
7
|
if a1.size == 0 and a2.size == 0
|
9
8
|
0.0
|
10
9
|
elsif a1.size == 0
|
@@ -12,15 +11,7 @@ module Levenshtein
|
|
12
11
|
elsif a2.size == 0
|
13
12
|
a1.size.to_f/size
|
14
13
|
else
|
15
|
-
|
16
|
-
if d = self.distance(a1, a2, (threshold*size).to_i+1)
|
17
|
-
d.to_f/size
|
18
|
-
else
|
19
|
-
nil
|
20
|
-
end
|
21
|
-
else
|
22
|
-
self.distance(a1, a2).to_f/size
|
23
|
-
end
|
14
|
+
self.distance(a1, a2).to_f/size
|
24
15
|
end
|
25
16
|
end
|
26
17
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: levenshtein_ruby
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- meriy100
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-
|
11
|
+
date: 2016-11-21 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -71,10 +71,18 @@ files:
|
|
71
71
|
- Rakefile
|
72
72
|
- bin/console
|
73
73
|
- bin/setup
|
74
|
+
- ext/levenshtein_ruby/.env
|
75
|
+
- ext/levenshtein_ruby/a.out.dSYM/Contents/Info.plist
|
76
|
+
- ext/levenshtein_ruby/distance
|
77
|
+
- ext/levenshtein_ruby/distance.c
|
78
|
+
- ext/levenshtein_ruby/distance.h
|
74
79
|
- ext/levenshtein_ruby/extconf.rb
|
80
|
+
- ext/levenshtein_ruby/input.txt
|
75
81
|
- ext/levenshtein_ruby/levenshtein_ruby.c
|
82
|
+
- ext/levenshtein_ruby/test/distance_unit.c
|
76
83
|
- levenshtein_ruby-0.1.0.gem
|
77
84
|
- levenshtein_ruby-0.1.1.gem
|
85
|
+
- levenshtein_ruby-0.1.3.gem
|
78
86
|
- levenshtein_ruby.gemspec
|
79
87
|
- lib/levenshtein_ruby.rb
|
80
88
|
- lib/levenshtein_ruby/version.rb
|