phonetics 3.2.0 → 4.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +17 -2
- data/Cargo.toml +27 -0
- data/Rakefile +58 -26
- data/VERSION +1 -1
- data/bin/phonetics +89 -0
- data/ext/phonetics_ruby/Cargo.toml +36 -0
- data/ext/phonetics_ruby/build.rs +24 -0
- data/ext/phonetics_ruby/extconf.rb +17 -0
- data/ext/phonetics_ruby/src/lib.rs +56 -0
- data/ext/phonetics_ruby/vendor/phonetics/Cargo.toml +30 -0
- data/ext/phonetics_ruby/vendor/phonetics/README.md +29 -0
- data/ext/phonetics_ruby/vendor/phonetics/src/compounds.rs +40 -0
- data/ext/phonetics_ruby/vendor/phonetics/src/confusion.rs +325 -0
- data/ext/phonetics_ruby/vendor/phonetics/src/consonants.rs +363 -0
- data/ext/phonetics_ruby/vendor/phonetics/src/cross_class.rs +56 -0
- data/ext/phonetics_ruby/vendor/phonetics/src/diacritics.rs +113 -0
- data/ext/phonetics_ruby/vendor/phonetics/src/distance.rs +183 -0
- data/ext/phonetics_ruby/vendor/phonetics/src/levenshtein.rs +146 -0
- data/ext/phonetics_ruby/vendor/phonetics/src/lib.rs +44 -0
- data/ext/phonetics_ruby/vendor/phonetics/src/symbols.rs +21 -0
- data/ext/phonetics_ruby/vendor/phonetics/src/tokenizer.rs +171 -0
- data/ext/phonetics_ruby/vendor/phonetics/src/vowels.rs +197 -0
- data/lib/phonetics.rb +77 -2
- data/phonetics.gemspec +33 -9
- metadata +45 -34
- data/.github/workflows/gempush.yml +0 -28
- data/.github/workflows/test.yml +0 -20
- data/Makefile +0 -9
- data/ext/c_levenshtein/extconf.rb +0 -10
- data/ext/c_levenshtein/levenshtein.c +0 -223
- data/ext/c_levenshtein/next_phoneme_length.c +0 -1365
- data/ext/c_levenshtein/next_phoneme_length.h +0 -1
- data/ext/c_levenshtein/phonemes.c +0 -53
- data/ext/c_levenshtein/phonemes.h +0 -3
- data/ext/c_levenshtein/phonetic_cost.c +0 -88593
- data/ext/c_levenshtein/phonetic_cost.h +0 -1
- data/lib/phonetics/code_generator.rb +0 -228
- data/lib/phonetics/distances.rb +0 -249
- data/lib/phonetics/levenshtein.rb +0 -27
- data/lib/phonetics/ruby_levenshtein.rb +0 -162
|
@@ -1 +0,0 @@
|
|
|
1
|
-
int next_phoneme_length(int *string, int cursor, int length);
|
|
@@ -1,53 +0,0 @@
|
|
|
1
|
-
#include <stdio.h>
|
|
2
|
-
#include <stdlib.h>
|
|
3
|
-
#include <stdint.h>
|
|
4
|
-
#include "./next_phoneme_length.h"
|
|
5
|
-
|
|
6
|
-
void find_phonemes(int *string, int string_length, int *count, int *lengths) {
|
|
7
|
-
int length;
|
|
8
|
-
int i;
|
|
9
|
-
|
|
10
|
-
i = 0;
|
|
11
|
-
while (i < string_length) {
|
|
12
|
-
length = next_phoneme_length(string, i, string_length);
|
|
13
|
-
if (length) {
|
|
14
|
-
lengths[(*count)++] = length;
|
|
15
|
-
i += length;
|
|
16
|
-
} else {
|
|
17
|
-
i++;
|
|
18
|
-
}
|
|
19
|
-
}
|
|
20
|
-
}
|
|
21
|
-
|
|
22
|
-
// Collect between 1 and 8 bytes of a phoneme into a single 64-bit word so we can compare two
|
|
23
|
-
// phonemes using just one instruction.
|
|
24
|
-
// These 64-bit words are how we implement the lookup table in phonetic_cost
|
|
25
|
-
void set_phonemes(uint64_t* phonemes, int* string, int count, int* lengths) {
|
|
26
|
-
int idx = 0;
|
|
27
|
-
int i, j;
|
|
28
|
-
for (i = 0; i < count; i++) {
|
|
29
|
-
phonemes[i] = 0;
|
|
30
|
-
for (j = 0; j < lengths[i]; j++) {
|
|
31
|
-
phonemes[i] = (uint64_t) ( phonemes[i] << 8 | string[idx] );
|
|
32
|
-
idx++;
|
|
33
|
-
}
|
|
34
|
-
}
|
|
35
|
-
}
|
|
36
|
-
|
|
37
|
-
void print_phoneme(int *string, int offset, int length, int padding) {
|
|
38
|
-
int p;
|
|
39
|
-
int max = padding;
|
|
40
|
-
if (length > max) {
|
|
41
|
-
max = length;
|
|
42
|
-
}
|
|
43
|
-
|
|
44
|
-
for (p = 0; p < length; p++) {
|
|
45
|
-
putchar(string[offset + p]);
|
|
46
|
-
}
|
|
47
|
-
// The printable characters take up to four bytes. If a phoneme takes 1-4 we
|
|
48
|
-
// assume the padding is the same. If it takes 5-8 we subtract one from the
|
|
49
|
-
// padding because it'll have printed another character.
|
|
50
|
-
for (p = (length / 4)+1; p < max; p++) {
|
|
51
|
-
printf(" ");
|
|
52
|
-
}
|
|
53
|
-
}
|