levenshtein_ruby 0.1.2 → 0.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +2 -0
- data/ext/levenshtein_ruby/.env +2 -0
- data/ext/levenshtein_ruby/distance +0 -0
- data/ext/levenshtein_ruby/distance.c +80 -0
- data/ext/levenshtein_ruby/distance.h +1 -0
- data/ext/levenshtein_ruby/input.txt +2 -0
- data/ext/levenshtein_ruby/levenshtein_ruby.c +3 -43
- data/ext/levenshtein_ruby/test/distance_unit.c +23 -0
- data/levenshtein_ruby-0.1.3.gem +0 -0
- data/lib/levenshtein_ruby.rb +2 -11
- data/lib/levenshtein_ruby/version.rb +1 -1
- metadata +10 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 3a8efe75bc5567183bb5e5da4d984cf6261357e0
|
4
|
+
data.tar.gz: d670a7c6b049833e2a39daabd687e0cb33a6c2f9
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a963b3fda4118b3311b6c1d06f3027927ebf941e6330cbf44f84a6f3cc2dbbb724045a1c4a1f2645f74f31e0f9f491bbd5499d208116699df8ff42a63c1f6630
|
7
|
+
data.tar.gz: 5e4850d3cbd97057b9f52642d8762d0a7b4804fac7c8fc263ca7ab17afe2d107ccf3ea6de08e3846f816f642f5ec68c5a92131d4569d9057afa2abddc160765d
|
data/.gitignore
CHANGED
Binary file
|
@@ -0,0 +1,80 @@
|
|
1
|
+
#include "distance.h"
|
2
|
+
#include <stdio.h>
|
3
|
+
#include <stdlib.h>
|
4
|
+
#include <string.h>
|
5
|
+
|
6
|
+
int* get_point(int** double_pointer, int x, int y)
|
7
|
+
{
|
8
|
+
return *(double_pointer + x) + y;
|
9
|
+
}
|
10
|
+
|
11
|
+
void set_point(int** double_pointer, int x, int y, int value)
|
12
|
+
{
|
13
|
+
*(*(double_pointer + x) + y) = value;
|
14
|
+
}
|
15
|
+
|
16
|
+
int distance(const char* word1, const char* word2)
|
17
|
+
{
|
18
|
+
int i;
|
19
|
+
int len1 = strlen(word1);
|
20
|
+
int len2 = strlen(word2);
|
21
|
+
int result;
|
22
|
+
int** matrix;
|
23
|
+
matrix = (int**)malloc((len1+1) * sizeof(int*));
|
24
|
+
|
25
|
+
for(i = 0; i <= len1; i++) {
|
26
|
+
*(matrix + i) = (int*)malloc((len2+1) * sizeof(int));
|
27
|
+
}
|
28
|
+
|
29
|
+
for (i = 0; i <= len1; i++) {
|
30
|
+
set_point(matrix, i, 0, i);
|
31
|
+
}
|
32
|
+
|
33
|
+
for (i = 0; i <= len2; i++) {
|
34
|
+
set_point(matrix, 0, i, i);
|
35
|
+
}
|
36
|
+
|
37
|
+
for (i = 1; i <= len1; i++) {
|
38
|
+
int j;
|
39
|
+
char c1;
|
40
|
+
c1 = word1[i-1];
|
41
|
+
for (j = 1; j <= len2; j++) {
|
42
|
+
char c2;
|
43
|
+
|
44
|
+
c2 = word2[j-1];
|
45
|
+
if (c1 == c2) {
|
46
|
+
set_point(matrix, i, j, *get_point(matrix, i-1, j-1));
|
47
|
+
}
|
48
|
+
else {
|
49
|
+
int delete;
|
50
|
+
int insert;
|
51
|
+
int substitute;
|
52
|
+
int minimum;
|
53
|
+
|
54
|
+
delete = *get_point(matrix, i-1, j) + 1;
|
55
|
+
insert = *get_point(matrix, i, j-1) + 1;
|
56
|
+
substitute = *get_point(matrix, i-1, j-1) + 1;
|
57
|
+
minimum = delete;
|
58
|
+
if (insert < minimum) {
|
59
|
+
minimum = insert;
|
60
|
+
}
|
61
|
+
if (substitute < minimum) {
|
62
|
+
minimum = substitute;
|
63
|
+
}
|
64
|
+
set_point(matrix, i, j, minimum);
|
65
|
+
}
|
66
|
+
}
|
67
|
+
}
|
68
|
+
result = *get_point(matrix, len1, len2);
|
69
|
+
free(matrix);
|
70
|
+
return result;
|
71
|
+
}
|
72
|
+
|
73
|
+
int main(){
|
74
|
+
char word1[256];
|
75
|
+
char word2[256];
|
76
|
+
scanf("%s", word1);
|
77
|
+
scanf("%s", word2);
|
78
|
+
printf("%d\n", distance(word1, word2));
|
79
|
+
}
|
80
|
+
|
@@ -0,0 +1 @@
|
|
1
|
+
int distance(const char* word1, const char* word2);
|
@@ -1,56 +1,16 @@
|
|
1
1
|
#include <stdio.h>
|
2
2
|
#include <ruby.h>
|
3
|
+
#include "distance.h"
|
3
4
|
|
4
5
|
VALUE cLevenshtein;
|
5
6
|
|
6
7
|
|
7
8
|
VALUE distance_func(VALUE self, VALUE s_word1, VALUE s_word2)
|
8
9
|
{
|
10
|
+
|
9
11
|
char* word1 = StringValuePtr(s_word1);
|
10
12
|
char* word2 = StringValuePtr(s_word2);
|
11
|
-
|
12
|
-
int len2 = strlen(word2);
|
13
|
-
int matrix[len1 + 1][len2 + 1];
|
14
|
-
int i;
|
15
|
-
for (i = 0; i <= len1; i++) {
|
16
|
-
matrix[i][0] = i;
|
17
|
-
}
|
18
|
-
for (i = 0; i <= len2; i++) {
|
19
|
-
matrix[0][i] = i;
|
20
|
-
}
|
21
|
-
for (i = 1; i <= len1; i++) {
|
22
|
-
int j;
|
23
|
-
char c1;
|
24
|
-
|
25
|
-
c1 = word1[i-1];
|
26
|
-
for (j = 1; j <= len2; j++) {
|
27
|
-
char c2;
|
28
|
-
|
29
|
-
c2 = word2[j-1];
|
30
|
-
if (c1 == c2) {
|
31
|
-
matrix[i][j] = matrix[i-1][j-1];
|
32
|
-
}
|
33
|
-
else {
|
34
|
-
int delete;
|
35
|
-
int insert;
|
36
|
-
int substitute;
|
37
|
-
int minimum;
|
38
|
-
|
39
|
-
delete = matrix[i-1][j] + 1;
|
40
|
-
insert = matrix[i][j-1] + 1;
|
41
|
-
substitute = matrix[i-1][j-1] + 1;
|
42
|
-
minimum = delete;
|
43
|
-
if (insert < minimum) {
|
44
|
-
minimum = insert;
|
45
|
-
}
|
46
|
-
if (substitute < minimum) {
|
47
|
-
minimum = substitute;
|
48
|
-
}
|
49
|
-
matrix[i][j] = minimum;
|
50
|
-
}
|
51
|
-
}
|
52
|
-
}
|
53
|
-
return INT2NUM(matrix[len1][len2]);
|
13
|
+
return INT2NUM(distance(word1, word2));
|
54
14
|
}
|
55
15
|
|
56
16
|
void Init_levenshtein_ruby()
|
@@ -0,0 +1,23 @@
|
|
1
|
+
#include <CUnit/CUnit.h>
|
2
|
+
#include <CUnit/Console.h>
|
3
|
+
#include "../distance.h"
|
4
|
+
|
5
|
+
void test_dist_001(void);
|
6
|
+
|
7
|
+
int main() {
|
8
|
+
CU_pSuite dist_suite;
|
9
|
+
|
10
|
+
CU_initialize_registry();
|
11
|
+
dist_suite = CU_add_suite("Distance", NULL, NULL);
|
12
|
+
CU_add_test(dist_suite, "test_001", test_dist_001);
|
13
|
+
CU_console_run_tests();
|
14
|
+
CU_cleanup_registry();
|
15
|
+
|
16
|
+
return(0);
|
17
|
+
}
|
18
|
+
|
19
|
+
void test_dist_001(void) {
|
20
|
+
|
21
|
+
CU_ASSERT(distance("ads", "asd") == 1);
|
22
|
+
}
|
23
|
+
|
Binary file
|
data/lib/levenshtein_ruby.rb
CHANGED
@@ -2,9 +2,8 @@ require "levenshtein_ruby/version"
|
|
2
2
|
require "levenshtein_ruby/levenshtein_ruby"
|
3
3
|
|
4
4
|
module Levenshtein
|
5
|
-
def self.normalized_distance(a1, a2
|
5
|
+
def self.normalized_distance(a1, a2)
|
6
6
|
size = [a1.size, a2.size].max
|
7
|
-
|
8
7
|
if a1.size == 0 and a2.size == 0
|
9
8
|
0.0
|
10
9
|
elsif a1.size == 0
|
@@ -12,15 +11,7 @@ module Levenshtein
|
|
12
11
|
elsif a2.size == 0
|
13
12
|
a1.size.to_f/size
|
14
13
|
else
|
15
|
-
|
16
|
-
if d = self.distance(a1, a2, (threshold*size).to_i+1)
|
17
|
-
d.to_f/size
|
18
|
-
else
|
19
|
-
nil
|
20
|
-
end
|
21
|
-
else
|
22
|
-
self.distance(a1, a2).to_f/size
|
23
|
-
end
|
14
|
+
self.distance(a1, a2).to_f/size
|
24
15
|
end
|
25
16
|
end
|
26
17
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: levenshtein_ruby
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- meriy100
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-
|
11
|
+
date: 2016-11-21 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -71,10 +71,18 @@ files:
|
|
71
71
|
- Rakefile
|
72
72
|
- bin/console
|
73
73
|
- bin/setup
|
74
|
+
- ext/levenshtein_ruby/.env
|
75
|
+
- ext/levenshtein_ruby/a.out.dSYM/Contents/Info.plist
|
76
|
+
- ext/levenshtein_ruby/distance
|
77
|
+
- ext/levenshtein_ruby/distance.c
|
78
|
+
- ext/levenshtein_ruby/distance.h
|
74
79
|
- ext/levenshtein_ruby/extconf.rb
|
80
|
+
- ext/levenshtein_ruby/input.txt
|
75
81
|
- ext/levenshtein_ruby/levenshtein_ruby.c
|
82
|
+
- ext/levenshtein_ruby/test/distance_unit.c
|
76
83
|
- levenshtein_ruby-0.1.0.gem
|
77
84
|
- levenshtein_ruby-0.1.1.gem
|
85
|
+
- levenshtein_ruby-0.1.3.gem
|
78
86
|
- levenshtein_ruby.gemspec
|
79
87
|
- lib/levenshtein_ruby.rb
|
80
88
|
- lib/levenshtein_ruby/version.rb
|