phonetics 3.0.9 → 4.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +17 -2
  3. data/CHANGELOG +4 -0
  4. data/Cargo.toml +27 -0
  5. data/Rakefile +58 -26
  6. data/VERSION +1 -1
  7. data/bin/phonetics +89 -0
  8. data/ext/phonetics_ruby/Cargo.toml +36 -0
  9. data/ext/phonetics_ruby/build.rs +24 -0
  10. data/ext/phonetics_ruby/extconf.rb +17 -0
  11. data/ext/phonetics_ruby/src/lib.rs +56 -0
  12. data/ext/phonetics_ruby/vendor/phonetics/Cargo.toml +30 -0
  13. data/ext/phonetics_ruby/vendor/phonetics/README.md +29 -0
  14. data/ext/phonetics_ruby/vendor/phonetics/src/compounds.rs +40 -0
  15. data/ext/phonetics_ruby/vendor/phonetics/src/confusion.rs +325 -0
  16. data/ext/phonetics_ruby/vendor/phonetics/src/consonants.rs +363 -0
  17. data/ext/phonetics_ruby/vendor/phonetics/src/cross_class.rs +56 -0
  18. data/ext/phonetics_ruby/vendor/phonetics/src/diacritics.rs +113 -0
  19. data/ext/phonetics_ruby/vendor/phonetics/src/distance.rs +183 -0
  20. data/ext/phonetics_ruby/vendor/phonetics/src/levenshtein.rs +146 -0
  21. data/ext/phonetics_ruby/vendor/phonetics/src/lib.rs +44 -0
  22. data/ext/phonetics_ruby/vendor/phonetics/src/symbols.rs +21 -0
  23. data/ext/phonetics_ruby/vendor/phonetics/src/tokenizer.rs +171 -0
  24. data/ext/phonetics_ruby/vendor/phonetics/src/vowels.rs +197 -0
  25. data/lib/phonetics.rb +77 -2
  26. data/phonetics.gemspec +33 -9
  27. metadata +46 -34
  28. data/.github/workflows/gempush.yml +0 -28
  29. data/.github/workflows/test.yml +0 -20
  30. data/Makefile +0 -6
  31. data/ext/c_levenshtein/extconf.rb +0 -10
  32. data/ext/c_levenshtein/levenshtein.c +0 -223
  33. data/ext/c_levenshtein/next_phoneme_length.c +0 -1365
  34. data/ext/c_levenshtein/next_phoneme_length.h +0 -1
  35. data/ext/c_levenshtein/phonemes.c +0 -53
  36. data/ext/c_levenshtein/phonemes.h +0 -3
  37. data/ext/c_levenshtein/phonetic_cost.c +0 -88593
  38. data/ext/c_levenshtein/phonetic_cost.h +0 -1
  39. data/lib/phonetics/code_generator.rb +0 -228
  40. data/lib/phonetics/distances.rb +0 -245
  41. data/lib/phonetics/levenshtein.rb +0 -27
  42. data/lib/phonetics/ruby_levenshtein.rb +0 -162
@@ -1 +0,0 @@
1
- int next_phoneme_length(int *string, int cursor, int length);
@@ -1,53 +0,0 @@
1
- #include <stdio.h>
2
- #include <stdlib.h>
3
- #include <stdint.h>
4
- #include "./next_phoneme_length.h"
5
-
6
- void find_phonemes(int *string, int string_length, int *count, int *lengths) {
7
- int length;
8
- int i;
9
-
10
- i = 0;
11
- while (i < string_length) {
12
- length = next_phoneme_length(string, i, string_length);
13
- if (length) {
14
- lengths[(*count)++] = length;
15
- i += length;
16
- } else {
17
- i++;
18
- }
19
- }
20
- }
21
-
22
- // Collect between 1 and 8 bytes of a phoneme into a single 64-bit word so we can compare two
23
- // phonemes using just one instruction.
24
- // These 64-bit words are how we implement the lookup table in phonetic_cost
25
- void set_phonemes(uint64_t* phonemes, int* string, int count, int* lengths) {
26
- int idx = 0;
27
- int i, j;
28
- for (i = 0; i < count; i++) {
29
- phonemes[i] = 0;
30
- for (j = 0; j < lengths[i]; j++) {
31
- phonemes[i] = (uint64_t) ( phonemes[i] << 8 | string[idx] );
32
- idx++;
33
- }
34
- }
35
- }
36
-
37
- void print_phoneme(int *string, int offset, int length, int padding) {
38
- int p;
39
- int max = padding;
40
- if (length > max) {
41
- max = length;
42
- }
43
-
44
- for (p = 0; p < length; p++) {
45
- putchar(string[offset + p]);
46
- }
47
- // The printable characters take up to four bytes. If a phoneme takes 1-4 we
48
- // assume the padding is the same. If it takes 5-8 we subtract one from the
49
- // padding because it'll have printed another character.
50
- for (p = (length / 4)+1; p < max; p++) {
51
- printf(" ");
52
- }
53
- }
@@ -1,3 +0,0 @@
1
- void find_phonemes(int *string, int string_length, int *count, int *lengths);
2
- void print_phoneme(int *string, int offset, int length, int padding);
3
- void set_phonemes(uint64_t* phonemes, int* string, int count, int* lengths);