phonetics 1.5.2 → 1.5.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +1 -0
- data/VERSION +1 -1
- data/_site/orthographic_example.png +0 -0
- data/_site/phonetic_example.png +0 -0
- data/ext/c_levenshtein/levenshtein.c +16 -17
- data/lib/phonetics/levenshtein.rb +3 -2
- data/lib/phonetics/ruby_levenshtein.rb +2 -2
- data/phonetics.gemspec +2 -3
- metadata +3 -2
- data/lib/phonetics/c_levenshtein.bundle +0 -0
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 04d203a13c081a435c9d94accd6af2dc517d7992349cda641f415e35c5cc787d
|
4
|
+
data.tar.gz: b8c3376c2db3878a0d3c409814abf0bb70d6fc403ce3866a11d65e8bcb01c5c8
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 5dfc217801d09dd1d835debd6440886e56c790c8886291147302b4a7b16f4aafeda51943cc56657d743e9dfab62cd663fae3adb3ea48656e0a6b5ce0cd1130ab
|
7
|
+
data.tar.gz: 1e41e3f77efe3af5a8095a0be58767c3a6ceb3c51ef7add5bebc40db94e14c988e80de221943ef010b7c0818d34a63b84af2290c57c90c6c23a4e9a69f8f9eff
|
data/.gitignore
CHANGED
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
1.5.
|
1
|
+
1.5.3
|
Binary file
|
Binary file
|
@@ -1,33 +1,31 @@
|
|
1
1
|
#include "ruby.h"
|
2
|
+
#include <stdbool.h>
|
2
3
|
#include "./phonetic_cost.h"
|
3
4
|
|
4
|
-
#define
|
5
|
-
#ifdef NDEBUG
|
6
|
-
#define debug(M, ...)
|
7
|
-
#else
|
8
|
-
#define debug(M, ...) printf(M, ##__VA_ARGS__)
|
9
|
-
#endif
|
5
|
+
#define debug(M, ...) if (verbose) printf(M, ##__VA_ARGS__)
|
10
6
|
|
11
7
|
VALUE Binding = Qnil;
|
12
8
|
|
13
9
|
/* Function declarations */
|
14
10
|
|
15
11
|
void Init_c_levenshtein();
|
16
|
-
|
17
|
-
void
|
18
|
-
|
12
|
+
|
13
|
+
void set_initial(double *d, int *string1, int string1_length, int *string2, int string2_length, bool verbose);
|
14
|
+
void print_matrix(double *d, int *string1, int string1_length, int *string2, int string2_length, bool verbose);
|
15
|
+
VALUE method_internal_phonetic_distance(VALUE self, VALUE _string1, VALUE _string2, VALUE _verbose);
|
19
16
|
|
20
17
|
/* Function implemitations */
|
21
18
|
|
22
19
|
void Init_c_levenshtein() {
|
23
20
|
Binding = rb_define_module("PhoneticsLevenshteinCBinding");
|
24
|
-
rb_define_method(Binding, "internal_phonetic_distance", method_internal_phonetic_distance,
|
21
|
+
rb_define_method(Binding, "internal_phonetic_distance", method_internal_phonetic_distance, 3);
|
25
22
|
}
|
26
23
|
|
27
|
-
VALUE method_internal_phonetic_distance(VALUE self, VALUE _string1, VALUE _string2){
|
24
|
+
VALUE method_internal_phonetic_distance(VALUE self, VALUE _string1, VALUE _string2, VALUE _verbose){
|
28
25
|
|
29
26
|
VALUE *string1_ruby = RARRAY_PTR(_string1);
|
30
27
|
VALUE *string2_ruby = RARRAY_PTR(_string2);
|
28
|
+
bool verbose = _verbose;
|
31
29
|
int string1_length = (int) RARRAY_LEN(_string1);
|
32
30
|
int string2_length = (int) RARRAY_LEN(_string2);
|
33
31
|
// We name them as 'strings' but in C-land we're representing our strings as
|
@@ -47,7 +45,7 @@ VALUE method_internal_phonetic_distance(VALUE self, VALUE _string1, VALUE _strin
|
|
47
45
|
|
48
46
|
// Guard clause for two empty strings
|
49
47
|
if (string1_length == 0 && string2_length == 0)
|
50
|
-
return DBL2NUM(0.
|
48
|
+
return DBL2NUM(0.0);
|
51
49
|
|
52
50
|
//
|
53
51
|
// Intial data setup
|
@@ -74,10 +72,10 @@ VALUE method_internal_phonetic_distance(VALUE self, VALUE _string1, VALUE _strin
|
|
74
72
|
|
75
73
|
// First, set the top row and left column of the matrix using the sequential
|
76
74
|
// phonetic edit distance of string1 and string2, respectively
|
77
|
-
set_initial(d, string1, string1_length, string2, string2_length);
|
75
|
+
set_initial(d, string1, string1_length, string2, string2_length, verbose);
|
78
76
|
|
79
77
|
debug("before:\n");
|
80
|
-
print_matrix(d, string1, string1_length, string2, string2_length);
|
78
|
+
print_matrix(d, string1, string1_length, string2, string2_length, verbose);
|
81
79
|
|
82
80
|
// Then walk through the matrix and fill in each cell with the lowest-cost
|
83
81
|
// phonetic edit distance for that matrix cell.
|
@@ -117,7 +115,7 @@ VALUE method_internal_phonetic_distance(VALUE self, VALUE _string1, VALUE _strin
|
|
117
115
|
|
118
116
|
d[(j * (string1_length+1)) + i] = min + cost;
|
119
117
|
debug("\n");
|
120
|
-
print_matrix(d, string1, string1_length, string2, string2_length);
|
118
|
+
print_matrix(d, string1, string1_length, string2, string2_length, verbose);
|
121
119
|
}
|
122
120
|
}
|
123
121
|
|
@@ -141,7 +139,7 @@ VALUE method_internal_phonetic_distance(VALUE self, VALUE _string1, VALUE _strin
|
|
141
139
|
// Subsequent values are the cumulative phonetic distance between each
|
142
140
|
// phoneme within the same string.
|
143
141
|
// "aek" -> [0.0, 1.0, 1.61, 2.61]
|
144
|
-
void set_initial(double *d, int *string1, int string1_length, int *string2, int string2_length) {
|
142
|
+
void set_initial(double *d, int *string1, int string1_length, int *string2, int string2_length, bool verbose) {
|
145
143
|
|
146
144
|
double distance_between_first_phonemes;
|
147
145
|
int i, j;
|
@@ -154,6 +152,7 @@ void set_initial(double *d, int *string1, int string1_length, int *string2, int
|
|
154
152
|
distance_between_first_phonemes = phonetic_cost(string1[0], string2[0]);
|
155
153
|
}
|
156
154
|
|
155
|
+
d[0] = (double) 0.0;
|
157
156
|
// Set the first value of string1's sequential phonetic calculation (maps to
|
158
157
|
// cell x=1, y=0)
|
159
158
|
d[1] = distance_between_first_phonemes;
|
@@ -176,7 +175,7 @@ void set_initial(double *d, int *string1, int string1_length, int *string2, int
|
|
176
175
|
}
|
177
176
|
|
178
177
|
// A handy visualization for developers
|
179
|
-
void print_matrix(double *d, int *string1, int string1_length, int *string2, int string2_length) {
|
178
|
+
void print_matrix(double *d, int *string1, int string1_length, int *string2, int string2_length, bool verbose) {
|
180
179
|
int i, j;
|
181
180
|
debug(" ");
|
182
181
|
for (i=0; i < string1_length; i++) {
|
@@ -15,11 +15,12 @@ module Phonetics
|
|
15
15
|
module Levenshtein
|
16
16
|
extend ::PhoneticsLevenshteinCBinding
|
17
17
|
|
18
|
-
def self.distance(str1, str2)
|
18
|
+
def self.distance(str1, str2, verbose = false)
|
19
19
|
ensure_is_phonetic!(str1, str2)
|
20
20
|
internal_phonetic_distance(
|
21
21
|
Phonetics.as_utf_8_long(str1),
|
22
|
-
Phonetics.as_utf_8_long(str2)
|
22
|
+
Phonetics.as_utf_8_long(str2),
|
23
|
+
verbose
|
23
24
|
)
|
24
25
|
end
|
25
26
|
|
data/phonetics.gemspec
CHANGED
@@ -11,12 +11,11 @@ Gem::Specification.new do |spec|
|
|
11
11
|
spec.homepage = 'https://github.com/JackDanger/phonetics'
|
12
12
|
spec.license = 'MIT'
|
13
13
|
|
14
|
-
|
15
|
-
spec.extensions = ["ext/c_levenshtein/extconf.rb"]
|
14
|
+
spec.extensions = ['ext/c_levenshtein/extconf.rb']
|
16
15
|
|
17
16
|
# Specify which files should be added to the gem when it is released.
|
18
17
|
# The `git ls-files -z` loads the files in the RubyGem that have been added into git.
|
19
|
-
spec.files
|
18
|
+
spec.files = Dir.chdir(File.expand_path(__dir__)) do
|
20
19
|
`git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
|
21
20
|
end
|
22
21
|
spec.require_paths = ['lib']
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: phonetics
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.5.
|
4
|
+
version: 1.5.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jack Danger
|
@@ -113,12 +113,13 @@ files:
|
|
113
113
|
- README.md
|
114
114
|
- Rakefile
|
115
115
|
- VERSION
|
116
|
+
- _site/orthographic_example.png
|
117
|
+
- _site/phonetic_example.png
|
116
118
|
- ext/c_levenshtein/extconf.rb
|
117
119
|
- ext/c_levenshtein/levenshtein.c
|
118
120
|
- ext/c_levenshtein/phonetic_cost.c
|
119
121
|
- ext/c_levenshtein/phonetic_cost.h
|
120
122
|
- lib/phonetics.rb
|
121
|
-
- lib/phonetics/c_levenshtein.bundle
|
122
123
|
- lib/phonetics/levenshtein.rb
|
123
124
|
- lib/phonetics/ruby_levenshtein.rb
|
124
125
|
- lib/phonetics/version.rb
|
Binary file
|