phonetics 1.5.2 → 1.5.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +1 -0
- data/VERSION +1 -1
- data/_site/orthographic_example.png +0 -0
- data/_site/phonetic_example.png +0 -0
- data/ext/c_levenshtein/levenshtein.c +16 -17
- data/lib/phonetics/levenshtein.rb +3 -2
- data/lib/phonetics/ruby_levenshtein.rb +2 -2
- data/phonetics.gemspec +2 -3
- metadata +3 -2
- data/lib/phonetics/c_levenshtein.bundle +0 -0
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 04d203a13c081a435c9d94accd6af2dc517d7992349cda641f415e35c5cc787d
|
4
|
+
data.tar.gz: b8c3376c2db3878a0d3c409814abf0bb70d6fc403ce3866a11d65e8bcb01c5c8
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 5dfc217801d09dd1d835debd6440886e56c790c8886291147302b4a7b16f4aafeda51943cc56657d743e9dfab62cd663fae3adb3ea48656e0a6b5ce0cd1130ab
|
7
|
+
data.tar.gz: 1e41e3f77efe3af5a8095a0be58767c3a6ceb3c51ef7add5bebc40db94e14c988e80de221943ef010b7c0818d34a63b84af2290c57c90c6c23a4e9a69f8f9eff
|
data/.gitignore
CHANGED
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
1.5.
|
1
|
+
1.5.3
|
Binary file
|
Binary file
|
@@ -1,33 +1,31 @@
|
|
1
1
|
#include "ruby.h"
|
2
|
+
#include <stdbool.h>
|
2
3
|
#include "./phonetic_cost.h"
|
3
4
|
|
4
|
-
#define
|
5
|
-
#ifdef NDEBUG
|
6
|
-
#define debug(M, ...)
|
7
|
-
#else
|
8
|
-
#define debug(M, ...) printf(M, ##__VA_ARGS__)
|
9
|
-
#endif
|
5
|
+
#define debug(M, ...) if (verbose) printf(M, ##__VA_ARGS__)
|
10
6
|
|
11
7
|
VALUE Binding = Qnil;
|
12
8
|
|
13
9
|
/* Function declarations */
|
14
10
|
|
15
11
|
void Init_c_levenshtein();
|
16
|
-
|
17
|
-
void
|
18
|
-
|
12
|
+
|
13
|
+
void set_initial(double *d, int *string1, int string1_length, int *string2, int string2_length, bool verbose);
|
14
|
+
void print_matrix(double *d, int *string1, int string1_length, int *string2, int string2_length, bool verbose);
|
15
|
+
VALUE method_internal_phonetic_distance(VALUE self, VALUE _string1, VALUE _string2, VALUE _verbose);
|
19
16
|
|
20
17
|
/* Function implemitations */
|
21
18
|
|
22
19
|
void Init_c_levenshtein() {
|
23
20
|
Binding = rb_define_module("PhoneticsLevenshteinCBinding");
|
24
|
-
rb_define_method(Binding, "internal_phonetic_distance", method_internal_phonetic_distance,
|
21
|
+
rb_define_method(Binding, "internal_phonetic_distance", method_internal_phonetic_distance, 3);
|
25
22
|
}
|
26
23
|
|
27
|
-
VALUE method_internal_phonetic_distance(VALUE self, VALUE _string1, VALUE _string2){
|
24
|
+
VALUE method_internal_phonetic_distance(VALUE self, VALUE _string1, VALUE _string2, VALUE _verbose){
|
28
25
|
|
29
26
|
VALUE *string1_ruby = RARRAY_PTR(_string1);
|
30
27
|
VALUE *string2_ruby = RARRAY_PTR(_string2);
|
28
|
+
bool verbose = _verbose;
|
31
29
|
int string1_length = (int) RARRAY_LEN(_string1);
|
32
30
|
int string2_length = (int) RARRAY_LEN(_string2);
|
33
31
|
// We name them as 'strings' but in C-land we're representing our strings as
|
@@ -47,7 +45,7 @@ VALUE method_internal_phonetic_distance(VALUE self, VALUE _string1, VALUE _strin
|
|
47
45
|
|
48
46
|
// Guard clause for two empty strings
|
49
47
|
if (string1_length == 0 && string2_length == 0)
|
50
|
-
return DBL2NUM(0.
|
48
|
+
return DBL2NUM(0.0);
|
51
49
|
|
52
50
|
//
|
53
51
|
// Intial data setup
|
@@ -74,10 +72,10 @@ VALUE method_internal_phonetic_distance(VALUE self, VALUE _string1, VALUE _strin
|
|
74
72
|
|
75
73
|
// First, set the top row and left column of the matrix using the sequential
|
76
74
|
// phonetic edit distance of string1 and string2, respectively
|
77
|
-
set_initial(d, string1, string1_length, string2, string2_length);
|
75
|
+
set_initial(d, string1, string1_length, string2, string2_length, verbose);
|
78
76
|
|
79
77
|
debug("before:\n");
|
80
|
-
print_matrix(d, string1, string1_length, string2, string2_length);
|
78
|
+
print_matrix(d, string1, string1_length, string2, string2_length, verbose);
|
81
79
|
|
82
80
|
// Then walk through the matrix and fill in each cell with the lowest-cost
|
83
81
|
// phonetic edit distance for that matrix cell.
|
@@ -117,7 +115,7 @@ VALUE method_internal_phonetic_distance(VALUE self, VALUE _string1, VALUE _strin
|
|
117
115
|
|
118
116
|
d[(j * (string1_length+1)) + i] = min + cost;
|
119
117
|
debug("\n");
|
120
|
-
print_matrix(d, string1, string1_length, string2, string2_length);
|
118
|
+
print_matrix(d, string1, string1_length, string2, string2_length, verbose);
|
121
119
|
}
|
122
120
|
}
|
123
121
|
|
@@ -141,7 +139,7 @@ VALUE method_internal_phonetic_distance(VALUE self, VALUE _string1, VALUE _strin
|
|
141
139
|
// Subsequent values are the cumulative phonetic distance between each
|
142
140
|
// phoneme within the same string.
|
143
141
|
// "aek" -> [0.0, 1.0, 1.61, 2.61]
|
144
|
-
void set_initial(double *d, int *string1, int string1_length, int *string2, int string2_length) {
|
142
|
+
void set_initial(double *d, int *string1, int string1_length, int *string2, int string2_length, bool verbose) {
|
145
143
|
|
146
144
|
double distance_between_first_phonemes;
|
147
145
|
int i, j;
|
@@ -154,6 +152,7 @@ void set_initial(double *d, int *string1, int string1_length, int *string2, int
|
|
154
152
|
distance_between_first_phonemes = phonetic_cost(string1[0], string2[0]);
|
155
153
|
}
|
156
154
|
|
155
|
+
d[0] = (double) 0.0;
|
157
156
|
// Set the first value of string1's sequential phonetic calculation (maps to
|
158
157
|
// cell x=1, y=0)
|
159
158
|
d[1] = distance_between_first_phonemes;
|
@@ -176,7 +175,7 @@ void set_initial(double *d, int *string1, int string1_length, int *string2, int
|
|
176
175
|
}
|
177
176
|
|
178
177
|
// A handy visualization for developers
|
179
|
-
void print_matrix(double *d, int *string1, int string1_length, int *string2, int string2_length) {
|
178
|
+
void print_matrix(double *d, int *string1, int string1_length, int *string2, int string2_length, bool verbose) {
|
180
179
|
int i, j;
|
181
180
|
debug(" ");
|
182
181
|
for (i=0; i < string1_length; i++) {
|
@@ -15,11 +15,12 @@ module Phonetics
|
|
15
15
|
module Levenshtein
|
16
16
|
extend ::PhoneticsLevenshteinCBinding
|
17
17
|
|
18
|
-
def self.distance(str1, str2)
|
18
|
+
def self.distance(str1, str2, verbose = false)
|
19
19
|
ensure_is_phonetic!(str1, str2)
|
20
20
|
internal_phonetic_distance(
|
21
21
|
Phonetics.as_utf_8_long(str1),
|
22
|
-
Phonetics.as_utf_8_long(str2)
|
22
|
+
Phonetics.as_utf_8_long(str2),
|
23
|
+
verbose
|
23
24
|
)
|
24
25
|
end
|
25
26
|
|
data/phonetics.gemspec
CHANGED
@@ -11,12 +11,11 @@ Gem::Specification.new do |spec|
|
|
11
11
|
spec.homepage = 'https://github.com/JackDanger/phonetics'
|
12
12
|
spec.license = 'MIT'
|
13
13
|
|
14
|
-
|
15
|
-
spec.extensions = ["ext/c_levenshtein/extconf.rb"]
|
14
|
+
spec.extensions = ['ext/c_levenshtein/extconf.rb']
|
16
15
|
|
17
16
|
# Specify which files should be added to the gem when it is released.
|
18
17
|
# The `git ls-files -z` loads the files in the RubyGem that have been added into git.
|
19
|
-
spec.files
|
18
|
+
spec.files = Dir.chdir(File.expand_path(__dir__)) do
|
20
19
|
`git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
|
21
20
|
end
|
22
21
|
spec.require_paths = ['lib']
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: phonetics
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.5.
|
4
|
+
version: 1.5.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jack Danger
|
@@ -113,12 +113,13 @@ files:
|
|
113
113
|
- README.md
|
114
114
|
- Rakefile
|
115
115
|
- VERSION
|
116
|
+
- _site/orthographic_example.png
|
117
|
+
- _site/phonetic_example.png
|
116
118
|
- ext/c_levenshtein/extconf.rb
|
117
119
|
- ext/c_levenshtein/levenshtein.c
|
118
120
|
- ext/c_levenshtein/phonetic_cost.c
|
119
121
|
- ext/c_levenshtein/phonetic_cost.h
|
120
122
|
- lib/phonetics.rb
|
121
|
-
- lib/phonetics/c_levenshtein.bundle
|
122
123
|
- lib/phonetics/levenshtein.rb
|
123
124
|
- lib/phonetics/ruby_levenshtein.rb
|
124
125
|
- lib/phonetics/version.rb
|
Binary file
|