phonetics 1.5.4 → 1.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: e8f26a3fc9c4a6729a0ddd04beee049efc4d1c0e8e2b887cfd14974de71d989a
4
- data.tar.gz: 4449ecff87444214065a0f964c2cd7a0ba2ce93f52b3cbeae14e4777afcba3df
3
+ metadata.gz: '05877cd48ad83d92a29dbcf60a5bd73373c7ecf974387487d37b719e4df0db4d'
4
+ data.tar.gz: d400d6249ac9204e920e8bc617b00b21009a4d15aeb719e589a100482114a927
5
5
  SHA512:
6
- metadata.gz: 4309942350149685324fa56a9c642f9456c7abe1993ab7f0801fbf56883afc4e31750e969e6d49e3da0ecd7b16a8d93b9212f5469ddc1dcd4c86699d0f48cd46
7
- data.tar.gz: 8007ae8ef77e2d9e5f055fc597c4c5ffbd9ee00007d9d31ed4d06ae96e9980bc2dfc92149c96bd94098faa7d0670c5d7cbd13a30f45acd8d8c6000853692f341
6
+ metadata.gz: 7bfd7f82c7f579e377f8d7669480c7cf4d73cf1c1b1259db1e19d4bbb9c8573480a7825ca51df6e4f600feba67ecfee10a2f9b66a49a9f33e888f94b04be9528
7
+ data.tar.gz: 3ddc3f72fbec2a0f833512b1d84c6d05d30c90db63e48ceb7f44c19cd7df43205517079b4cda113e9d80fe1a1c27586771eaf575c423beec2fa46a55a504cfd0
data/Rakefile CHANGED
@@ -5,23 +5,34 @@ require 'rake/extensiontask'
5
5
  require 'rspec/core/rake_task'
6
6
  require 'rubocop/rake_task'
7
7
 
8
+ EXT_PATH = 'ext/c_levenshtein'
9
+
8
10
  Rake::ExtensionTask.new('c_levenshtein') do |extension|
9
- extension.ext_dir = 'ext/c_levenshtein'
11
+ extension.ext_dir = EXT_PATH
10
12
  extension.lib_dir = 'lib/phonetics'
11
13
  end
12
14
 
13
- PHONETIC_COST_C_EXTENSION = File.expand_path('ext/c_levenshtein/phonetic_cost.c', __dir__)
15
+ PHONETIC_COST_C_EXTENSION = File.join(EXT_PATH, 'phonetic_cost.c')
16
+ NEXT_PHONEME_LENGTH_C_EXTENSION = File.join(EXT_PATH, 'next_phoneme_length.c')
17
+
18
+ require_relative './lib/phonetics/code_generator'
14
19
 
15
- namespace :compile do
16
- desc 'Write phonetic_cost.c using Phonetic values'
17
- task :phonetic_cost do
18
- require_relative './lib/phonetics'
19
- file = File.open(PHONETIC_COST_C_EXTENSION, 'w')
20
- Phonetics.generate_phonetic_cost_c_code(file)
21
- puts "Wrote #{PHONETIC_COST_C_EXTENSION}"
22
- end
20
+ desc 'Write phonetic_cost.c using Phonetic values'
21
+ task PHONETIC_COST_C_EXTENSION do
22
+ file = File.open(PHONETIC_COST_C_EXTENSION, 'w')
23
+ Phonetics::CodeGenerator.new(file).generate_phonetic_cost_c_code
24
+ puts "Wrote #{PHONETIC_COST_C_EXTENSION}"
23
25
  end
24
- task compile: 'compile:phonetic_cost'
26
+
27
+ desc 'Write phonemes.c using a lookup table of byte arrays'
28
+ task NEXT_PHONEME_LENGTH_C_EXTENSION do
29
+ file = File.open(NEXT_PHONEME_LENGTH_C_EXTENSION, 'w')
30
+ Phonetics::CodeGenerator.new(file).generate_next_phoneme_length_c_code
31
+ puts "Wrote #{NEXT_PHONEME_LENGTH_C_EXTENSION}"
32
+ end
33
+
34
+ task compile: PHONETIC_COST_C_EXTENSION
35
+ task compile: NEXT_PHONEME_LENGTH_C_EXTENSION
25
36
 
26
37
  RSpec::Core::RakeTask.new(:spec)
27
38
 
data/VERSION CHANGED
@@ -1 +1 @@
1
- 1.5.4
1
+ 1.8.0
@@ -0,0 +1,13 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ $LOAD_PATH << '../lib'
5
+
6
+ require 'bundler/setup'
7
+ require 'pry-byebug'
8
+ require 'phonetics'
9
+ require 'phonetics/levenshtein'
10
+ require 'phonetics/ruby_levenshtein'
11
+ require 'phonetics/code_generator'
12
+
13
+ Phonetics.pry
@@ -1,5 +1,9 @@
1
- #include "ruby.h"
2
1
  #include <stdbool.h>
2
+ #include "ruby.h"
3
+ #include "ruby/encoding.h"
4
+ #include "ruby/re.h"
5
+ #include "./phonemes.h"
6
+ #include "./next_phoneme_length.h"
3
7
  #include "./phonetic_cost.h"
4
8
 
5
9
  #define debug(M, ...) if (verbose) printf(M, ##__VA_ARGS__)
@@ -10,8 +14,8 @@ VALUE Binding = Qnil;
10
14
 
11
15
  void Init_c_levenshtein();
12
16
 
13
- void set_initial(double *d, int *string1, int string1_length, int *string2, int string2_length, bool verbose);
14
- void print_matrix(double *d, int *string1, int string1_length, int *string2, int string2_length, bool verbose);
17
+ void set_initial(double *d, int *string1, int string1_phoneme_count, int *string1_phoneme_sizes, int *string2, int string2_phoneme_count, int *string2_phoneme_sizes, bool verbose);
18
+ void print_matrix(double *d, int *string1, int string1_phoneme_count, int *string1_phoneme_sizes, int *string2, int string2_phoneme_count, int *string2_phoneme_sizes, bool verbose);
15
19
  VALUE method_internal_phonetic_distance(VALUE self, VALUE _string1, VALUE _string2, VALUE _verbose);
16
20
 
17
21
  /* Function implemitations */
@@ -22,15 +26,23 @@ void Init_c_levenshtein() {
22
26
  }
23
27
 
24
28
  VALUE method_internal_phonetic_distance(VALUE self, VALUE _string1, VALUE _string2, VALUE _verbose){
29
+ if (!RB_TYPE_P(_string1, T_STRING)) {
30
+ rb_raise(rb_eArgError, "must pass string as first argument");
31
+ }
32
+ if (!RB_TYPE_P(_string2, T_STRING)) {
33
+ rb_raise(rb_eArgError, "must pass string as second argument");
34
+ }
25
35
 
26
- VALUE *string1_ruby = RARRAY_PTR(_string1);
27
- VALUE *string2_ruby = RARRAY_PTR(_string2);
28
36
  bool verbose = _verbose;
29
- int string1_length = (int) RARRAY_LEN(_string1);
30
- int string2_length = (int) RARRAY_LEN(_string2);
31
- // We name them as 'strings' but in C-land we're representing our strings as
32
- // arrays of `int`s, where each int represents a consistent (if unusual)
33
- // encoding of a grapheme cluster (a symbol for a phoneme).
37
+ int string1_length = (int) RSTRING_LEN(_string1);
38
+ int string2_length = (int) RSTRING_LEN(_string2);
39
+
40
+ // Given the input strings, we count the phonemes in each and store both the
41
+ // total and, in a phoneme_sizes array, the length of each.
42
+ int string1_phoneme_count = 0;
43
+ int string2_phoneme_count = 0;
44
+ int string1_phoneme_sizes[string1_length + 1];
45
+ int string2_phoneme_sizes[string2_length + 1];
34
46
  int string1[string1_length + 1];
35
47
  int string2[string2_length + 1];
36
48
 
@@ -42,86 +54,85 @@ VALUE method_internal_phonetic_distance(VALUE self, VALUE _string1, VALUE _strin
42
54
  insert, replace,
43
55
  cost;
44
56
  int i, j; // Frequently overwritten loop vars
45
-
46
- // Guard clause for two empty strings
47
- if (string1_length == 0 && string2_length == 0)
48
- return DBL2NUM(0.0);
49
-
50
- //
51
- // Intial data setup
52
- //
57
+ int string1_offset = 0;
58
+ int string2_offset = 0;
53
59
 
54
60
  for (i = 0; i < string1_length; i++) {
55
- string1[i] = NUM2INT(string1_ruby[i]);
56
- debug("string1[%d] = %d\n", i, string1[i]);
61
+ string1[i] = (RSTRING_PTR(_string1)[i] & 0xff);
57
62
  }
58
- for (j = 0; j < string2_length; j++) {
59
- string2[j] = NUM2INT(string2_ruby[j]);
60
- debug("string2[%d] = %d\n", i, string2[j]);
63
+ for (i = 0; i < string2_length; i++) {
64
+ string2[i] = RSTRING_PTR(_string2)[i] & 0xff;
61
65
  }
62
66
 
63
- // one-dimensional representation of 2 dimentional array len(string1)+1 *
64
- // len(string2)+1
65
- d = malloc((sizeof(double)) * (string1_length+1) * (string2_length+1));
67
+ find_phonemes(string1, string1_length, &string1_phoneme_count, string1_phoneme_sizes);
68
+ find_phonemes(string2, string2_length, &string2_phoneme_count, string2_phoneme_sizes);
66
69
 
67
- //
68
- // Fill in the (flattened) matrix using the Levenshtein algorithm so we can
69
- // pluck the lowest-cost edit distance (stored in the lower-right corner, in
70
- // this case the last spot in the array)
71
- //
70
+ // Guard clauses for empty strings
71
+ if (string1_phoneme_count == 0 && string2_phoneme_count == 0)
72
+ return DBL2NUM(0.0);
72
73
 
74
+ // one-dimensional representation of 2 dimensional array
75
+ d = malloc((sizeof(double)) * (string1_phoneme_count+1) * (string2_phoneme_count+1));
76
+
73
77
  // First, set the top row and left column of the matrix using the sequential
74
78
  // phonetic edit distance of string1 and string2, respectively
75
- set_initial(d, string1, string1_length, string2, string2_length, verbose);
79
+ set_initial(d, string1, string1_phoneme_count, string1_phoneme_sizes, string2, string2_phoneme_count, string2_phoneme_sizes, verbose);
76
80
 
77
- debug("before:\n");
78
- print_matrix(d, string1, string1_length, string2, string2_length, verbose);
81
+ print_matrix(d, string1, string1_phoneme_count, string1_phoneme_sizes, string2, string2_phoneme_count, string2_phoneme_sizes, verbose);
79
82
 
80
- // Then walk through the matrix and fill in each cell with the lowest-cost
81
- // phonetic edit distance for that matrix cell.
83
+ // Then Fill in the (flattened) matrix using the Levenshtein algorithm so we can
84
+ // pluck the lowest-cost edit distance (stored in the lower-right corner, in
85
+ // this case the last spot in the array).
86
+ // We'll use phonetic distance instead of '1' as the edit cost.
87
+ //
82
88
  // (Skipping i=0 and j=0 because set_initial filled in all cells where i
83
89
  // or j are zero-valued)
84
- for (j = 1; j <= string2_length; j++){
85
- for (i = 1; i <= string1_length; i++){
90
+ for (j = 1; j <= string2_phoneme_count; j++){
91
+
92
+ for (i = 1; i <= string1_phoneme_count; i++){
86
93
 
87
94
  // The cost of deletion or addition is the Levenshtein distance
88
95
  // calculation (the value in the cell to the left, upper-left, or above)
89
96
  // plus the phonetic distance between the sound we're moving from to the
90
97
  // new one.
91
98
 
92
- debug("------- %d/%d (%d) \n", i, j, j*(string1_length+1) + i);
99
+ debug("------- %d/%d (%d) \n", i, j, j*(string1_phoneme_count+1) + i);
93
100
 
94
- cost = phonetic_cost(string1[i-1], string2[j-1]);
95
- debug("phonetic cost of %d to %d is %f\n", string1[i-1], string2[j-1], cost);
101
+ // TODO: increment i and j by the phoneme lengths
102
+ cost = phonetic_cost(string1, string1_offset, string1_phoneme_sizes[i-1], string2, string2_offset, string2_phoneme_sizes[j-1]);
96
103
 
97
- insert = d[j*(string1_length+1) + i-1];
104
+ insert = d[j*(string1_phoneme_count+1) + i-1];
98
105
  debug("insert proposes cell %d,%d - %f\n", i-1, j, insert);
99
106
  min = insert;
100
107
  debug("min (insert): %f\n", min);
101
108
 
102
- delete = d[(j-1)*(string1_length+1) + i];
109
+ delete = d[(j-1)*(string1_phoneme_count+1) + i];
103
110
  debug("delete proposes cell %d,%d - %f\n", i, j-1, delete);
104
111
  if (delete < min) {
105
112
  debug("delete is %f, better than %f for %d/%d\n", delete, min, i, j);
106
113
  min = delete;
107
114
  }
108
115
 
109
- replace = d[(j-1)*(string1_length+1) + i-1];
116
+ replace = d[(j-1)*(string1_phoneme_count+1) + i-1];
110
117
  debug("replace proposes cell %d,%d - %f\n", i-1, j-1, replace);
111
118
  if (replace < min) {
112
119
  debug("replace is %f, better than %f for %d/%d\n", replace, min, i, j);
113
120
  min = replace;
114
121
  }
115
122
 
116
- d[(j * (string1_length+1)) + i] = min + cost;
123
+ d[(j * (string1_phoneme_count+1)) + i] = min + cost;
117
124
  debug("\n");
118
- print_matrix(d, string1, string1_length, string2, string2_length, verbose);
125
+ print_matrix(d, string1, string1_phoneme_count, string1_phoneme_sizes, string2, string2_phoneme_count, string2_phoneme_sizes, verbose);
126
+
127
+ string1_offset += string1_phoneme_sizes[i-1];
119
128
  }
129
+ string1_offset = 0;
130
+ string2_offset += string2_phoneme_sizes[j-1];
120
131
  }
121
132
 
122
133
  // The final element in the `d` array is the value of the shortest path from
123
134
  // the top-left to the bottom-right of the matrix.
124
- distance = d[(string1_length + 1) * (string2_length + 1) - 1];
135
+ distance = d[(string1_phoneme_count + 1) * (string2_phoneme_count + 1) - 1];
125
136
 
126
137
  free(d);
127
138
  debug("distance: %f\n", distance);
@@ -129,7 +140,6 @@ VALUE method_internal_phonetic_distance(VALUE self, VALUE _string1, VALUE _strin
129
140
  return DBL2NUM(distance);
130
141
  }
131
142
 
132
-
133
143
  // Set the minimum scores equal to the distance between each phoneme,
134
144
  // sequentially.
135
145
  //
@@ -139,12 +149,14 @@ VALUE method_internal_phonetic_distance(VALUE self, VALUE _string1, VALUE _strin
139
149
  // Subsequent values are the cumulative phonetic distance between each
140
150
  // phoneme within the same string.
141
151
  // "aek" -> [0.0, 1.0, 1.61, 2.61]
142
- void set_initial(double *d, int *string1, int string1_length, int *string2, int string2_length, bool verbose) {
152
+ void set_initial(double *d, int *string1, int string1_phoneme_count, int *string1_phoneme_sizes, int *string2, int string2_phoneme_count, int *string2_phoneme_sizes, bool verbose) {
143
153
 
144
154
  double initial_distance;
155
+ int string1_offset = 0;
156
+ int string2_offset = 0;
145
157
  int i, j;
146
158
 
147
- if (string1_length == 0 || string2_length == 0) {
159
+ if (string1_phoneme_count == 0 || string2_phoneme_count == 0) {
148
160
  initial_distance = 0.0;
149
161
  } else {
150
162
  initial_distance = 1.0;
@@ -152,51 +164,67 @@ void set_initial(double *d, int *string1, int string1_length, int *string2, int
152
164
 
153
165
  // The top-left is 0, the cell to the right and down are each 1 to start
154
166
  d[0] = (double) 0.0;
155
- if (string1_length > 0) {
167
+ if (string1_phoneme_count > 0) {
156
168
  d[1] = initial_distance;
157
169
  }
158
- if (string2_length > 0) {
159
- d[string1_length+1] = initial_distance;
170
+ if (string2_phoneme_count > 0) {
171
+ d[string1_phoneme_count+1] = initial_distance;
160
172
  }
161
173
 
162
- debug("string1 length: %d\n", string1_length);
163
- for (i=2; i <= string1_length; i++) {
174
+ debug("string1 phoneme count: %d\n", string1_phoneme_count);
175
+
176
+ for (i=2; i <= string1_phoneme_count; i++) {
164
177
  // The cost of adding the next phoneme is the cost so far plus the phonetic
165
178
  // distance between the previous one and the current one.
166
- d[i] = d[i-1] + phonetic_cost(string1[i-2], string1[i-1]);
179
+ d[i] = d[i-1] +
180
+ phonetic_cost(string1, string1_offset, string1_phoneme_sizes[i-2], string1, string1_offset + string1_phoneme_sizes[i-2], string1_phoneme_sizes[i-1]);
181
+ string1_offset += string1_phoneme_sizes[i-2];
167
182
  }
168
- debug("string2 length: %d\n", string2_length);
169
- for (j=2; j <= string2_length; j++) {
183
+
184
+ debug("string2 phoneme count: %d\n", string2_phoneme_count);
185
+
186
+ for (j=2; j <= string2_phoneme_count; j++) {
170
187
  // The same exact pattern down the left side of the matrix
171
- d[j * (string1_length+1)] = d[(j - 1) * (string1_length+1)] + phonetic_cost(string2[j-2], string2[j-1]);
188
+ d[j * (string1_phoneme_count+1)] = d[(j - 1) * (string1_phoneme_count+1)] +
189
+ phonetic_cost(string2, string2_offset, string2_phoneme_sizes[j-2], string2, string2_offset + string2_phoneme_sizes[j-2], string2_phoneme_sizes[j-1]);
190
+ string2_offset += string2_phoneme_sizes[j-1];
172
191
  }
173
192
 
174
- // And zero out the rest. If you're reading this please edit this to be
175
- // faster.
176
- for (j=1; j <= string2_length; j++) {
177
- for (i=1; i <= string1_length; i++) {
178
- d[j * (string1_length+1) + i] = (double) 0.0;
193
+ // And zero out the rest. If you're reading this please show me a way to do
194
+ // this faster.
195
+ for (j=1; j <= string2_phoneme_count; j++) {
196
+ for (i=1; i <= string1_phoneme_count; i++) {
197
+ d[j * (string1_phoneme_count+1) + i] = (double) 0.0;
179
198
  }
180
199
  }
181
200
  }
182
201
 
183
202
  // A handy visualization for developers
184
- void print_matrix(double *d, int *string1, int string1_length, int *string2, int string2_length, bool verbose) {
203
+ void print_matrix(double *d, int *string1, int string1_phoneme_count, int *string1_phoneme_sizes, int *string2, int string2_phoneme_count, int *string2_phoneme_sizes, bool verbose) {
204
+
185
205
  int i, j;
186
- debug(" ");
187
- for (i=0; i < string1_length; i++) {
188
- debug("%8.d ", string1[i]);
206
+ int string1_offset = 0;
207
+ int string2_offset = 0;
208
+
209
+ if (!verbose)
210
+ return;
211
+
212
+ printf(" ");
213
+ for (i=0; i < string1_phoneme_count; i++) {
214
+ print_phoneme(string1, string1_offset, string1_phoneme_sizes[i], 9);
215
+ string1_offset += string1_phoneme_sizes[i];
189
216
  }
190
- debug("\n");
191
- for (j=0; j <= string2_length; j++) {
217
+ printf("\n");
218
+ for (j=0; j <= string2_phoneme_count; j++) {
192
219
  if (j==0) {
193
- debug(" ");
220
+ printf(" ");
194
221
  } else {
195
- debug("%4.d ", string2[j-1]);
196
- }
197
- for (i=0; i <= string1_length; i++) {
198
- debug("%f ", d[j * (string1_length+1) + i]) ;
222
+ print_phoneme(string2, string2_offset, string2_phoneme_sizes[j-1], 2);
223
+ string2_offset += string2_phoneme_sizes[j-1];
224
+ }
225
+ for (i=0; i <= string1_phoneme_count; i++) {
226
+ printf("%f ", d[j * (string1_phoneme_count+1) + i]) ;
199
227
  }
200
- debug("\n");
228
+ printf("\n");
201
229
  }
202
230
  }
@@ -0,0 +1,1364 @@
1
+ // This is compiled from Ruby, in phonetics/lib/phonetics/code_generator.rb:221
2
+ int next_phoneme_length(int *string, int cursor, int length) {
3
+
4
+ int max_length;
5
+ max_length = length - cursor;
6
+
7
+ switch(string[cursor + 0]) {
8
+
9
+ case 105:
10
+ // Phoneme: "i", bytes: [105]
11
+ // vowel features: {"F1":240,"F2":2400,"rounded":false}
12
+ return 1;
13
+ break;
14
+ case 121:
15
+ // Phoneme: "y", bytes: [121]
16
+ // vowel features: {"F1":235,"F2":2100,"rounded":false}
17
+ return 1;
18
+ break;
19
+ case 201:
20
+ if (max_length > 1) {
21
+ switch(string[cursor + 1]) {
22
+
23
+ case 170:
24
+ // Phoneme: "ɪ", bytes: [201, 170]
25
+ // vowel features: {"F1":300,"F2":2100,"rounded":false}
26
+ return 2;
27
+ break;
28
+ case 155:
29
+ // Phoneme: "ɛ", bytes: [201, 155]
30
+ // vowel features: {"F1":610,"F2":1900,"rounded":false}
31
+ return 2;
32
+ break;
33
+ case 182:
34
+ // Phoneme: "ɶ", bytes: [201, 182]
35
+ // vowel features: {"F1":820,"F2":1530,"rounded":true}
36
+ return 2;
37
+ break;
38
+ case 145:
39
+ // Phoneme: "ɑ", bytes: [201, 145]
40
+ // vowel features: {"F1":750,"F2":940,"rounded":false}
41
+ return 2;
42
+ break;
43
+ case 146:
44
+ // Phoneme: "ɒ", bytes: [201, 146]
45
+ // vowel features: {"F1":700,"F2":760,"rounded":true}
46
+ return 2;
47
+ break;
48
+ case 153:
49
+ // Phoneme: "ə", bytes: [201, 153]
50
+ // vowel features: {"F1":600,"F2":1170,"rounded":false}
51
+ return 2;
52
+ break;
53
+ case 157:
54
+ // Phoneme: "ɝ", bytes: [201, 157]
55
+ // vowel features: {"F1":600,"F2":1170,"rounded":false,"rhotic":true}
56
+ return 2;
57
+ break;
58
+ case 148:
59
+ // Phoneme: "ɔ", bytes: [201, 148]
60
+ // vowel features: {"F1":500,"F2":700,"rounded":true}
61
+ return 2;
62
+ break;
63
+ case 164:
64
+ // Phoneme: "ɤ", bytes: [201, 164]
65
+ // vowel features: {"F1":460,"F2":1310,"rounded":false}
66
+ return 2;
67
+ break;
68
+ case 175:
69
+ // Phoneme: "ɯ", bytes: [201, 175]
70
+ // vowel features: {"F1":300,"F2":1390,"rounded":false}
71
+ return 2;
72
+ break;
73
+ case 177:
74
+ // Phoneme: "ɱ", bytes: [201, 177]
75
+ // consonant features: {"position":"Labio-dental","position_index":2,"manner":"Nasal","voiced":true}
76
+ return 2;
77
+ break;
78
+ case 179:
79
+ // Phoneme: "ɳ", bytes: [201, 179]
80
+ // consonant features: {"position":"Retro-flex","position_index":7,"manner":"Nasal","voiced":true}
81
+ if (max_length > 2) {
82
+ switch(string[cursor + 2]) {
83
+
84
+ case 204:
85
+ if (max_length > 3) {
86
+ switch(string[cursor + 3]) {
87
+
88
+ case 138:
89
+ // Phoneme: "ɳ̊", bytes: [201, 179, 204, 138]
90
+ // consonant features: {"position":"Retro-flex","position_index":7,"manner":"Nasal","voiced":false}
91
+ return 4;
92
+ break;
93
+ }
94
+ } else {
95
+ return 3;
96
+ }
97
+ break;
98
+ default:
99
+ return 2;
100
+ }
101
+ } else {
102
+ return 2;
103
+ }
104
+ break;
105
+ case 178:
106
+ // Phoneme: "ɲ", bytes: [201, 178]
107
+ // consonant features: {"position":"Palatal","position_index":8,"manner":"Nasal","voiced":true}
108
+ if (max_length > 2) {
109
+ switch(string[cursor + 2]) {
110
+
111
+ case 204:
112
+ if (max_length > 3) {
113
+ switch(string[cursor + 3]) {
114
+
115
+ case 138:
116
+ // Phoneme: "ɲ̊", bytes: [201, 178, 204, 138]
117
+ // consonant features: {"position":"Palatal","position_index":8,"manner":"Nasal","voiced":false}
118
+ return 4;
119
+ break;
120
+ }
121
+ } else {
122
+ return 3;
123
+ }
124
+ break;
125
+ default:
126
+ return 2;
127
+ }
128
+ } else {
129
+ return 2;
130
+ }
131
+ break;
132
+ case 180:
133
+ // Phoneme: "ɴ", bytes: [201, 180]
134
+ // consonant features: {"position":"Uvular","position_index":10,"manner":"Nasal","voiced":true}
135
+ return 2;
136
+ break;
137
+ case 150:
138
+ // Phoneme: "ɖ", bytes: [201, 150]
139
+ // consonant features: {"position":"Retro-flex","position_index":7,"manner":"Stop","voiced":true}
140
+ return 2;
141
+ break;
142
+ case 159:
143
+ // Phoneme: "ɟ", bytes: [201, 159]
144
+ // consonant features: {"position":"Palatal","position_index":8,"manner":"Stop","voiced":true}
145
+ return 2;
146
+ break;
147
+ case 162:
148
+ // Phoneme: "ɢ", bytes: [201, 162]
149
+ // consonant features: {"position":"Uvular","position_index":10,"manner":"Stop","voiced":true}
150
+ if (max_length > 2) {
151
+ switch(string[cursor + 2]) {
152
+
153
+ case 204:
154
+ if (max_length > 3) {
155
+ switch(string[cursor + 3]) {
156
+
157
+ case 134:
158
+ // Phoneme: "ɢ̆", bytes: [201, 162, 204, 134]
159
+ // consonant features: {"position":"Uvular","position_index":10,"manner":"Tap/flap","voiced":true}
160
+ return 4;
161
+ break;
162
+ }
163
+ } else {
164
+ return 3;
165
+ }
166
+ break;
167
+ default:
168
+ return 2;
169
+ }
170
+ } else {
171
+ return 2;
172
+ }
173
+ break;
174
+ case 149:
175
+ // Phoneme: "ɕ", bytes: [201, 149]
176
+ // consonant features: {"position":"Palatal","position_index":8,"manner":"Sibilant fricative","voiced":false}
177
+ return 2;
178
+ break;
179
+ case 184:
180
+ // Phoneme: "ɸ", bytes: [201, 184]
181
+ // consonant features: {"position":"Bi-labial","position_index":1,"manner":"Non-sibilant fricative","voiced":false}
182
+ return 2;
183
+ break;
184
+ case 185:
185
+ // Phoneme: "ɹ", bytes: [201, 185]
186
+ // consonant features: {"position":"Alveolar","position_index":5,"manner":"Approximant","voiced":true}
187
+ if (max_length > 2) {
188
+ switch(string[cursor + 2]) {
189
+
190
+ case 204:
191
+ if (max_length > 3) {
192
+ switch(string[cursor + 3]) {
193
+
194
+ case 160:
195
+ if (max_length > 4) {
196
+ switch(string[cursor + 4]) {
197
+
198
+ case 204:
199
+ if (max_length > 5) {
200
+ switch(string[cursor + 5]) {
201
+
202
+ case 138:
203
+ if (max_length > 6) {
204
+ switch(string[cursor + 6]) {
205
+
206
+ case 203:
207
+ if (max_length > 7) {
208
+ switch(string[cursor + 7]) {
209
+
210
+ case 148:
211
+ // Phoneme: "ɹ̠̊˔", bytes: [201, 185, 204, 160, 204, 138, 203, 148]
212
+ // consonant features: {"position":"Post-alveolar","position_index":6,"manner":"Non-sibilant fricative","voiced":false}
213
+ return 8;
214
+ break;
215
+ }
216
+ } else {
217
+ return 7;
218
+ }
219
+ break;
220
+ }
221
+ } else {
222
+ return 6;
223
+ }
224
+ break;
225
+ }
226
+ } else {
227
+ return 5;
228
+ }
229
+ break;
230
+ case 203:
231
+ if (max_length > 5) {
232
+ switch(string[cursor + 5]) {
233
+
234
+ case 148:
235
+ // Phoneme: "ɹ̠˔", bytes: [201, 185, 204, 160, 203, 148]
236
+ // consonant features: {"position":"Post-alveolar","position_index":6,"manner":"Non-sibilant fricative","voiced":true}
237
+ return 6;
238
+ break;
239
+ }
240
+ } else {
241
+ return 5;
242
+ }
243
+ break;
244
+ }
245
+ } else {
246
+ return 4;
247
+ }
248
+ break;
249
+ case 165:
250
+ // Phoneme: "ɹ̥", bytes: [201, 185, 204, 165]
251
+ // consonant features: {"position":"Alveolar","position_index":5,"manner":"Approximant","voiced":false}
252
+ return 4;
253
+ break;
254
+ }
255
+ } else {
256
+ return 3;
257
+ }
258
+ break;
259
+ default:
260
+ return 2;
261
+ }
262
+ } else {
263
+ return 2;
264
+ }
265
+ break;
266
+ case 187:
267
+ // Phoneme: "ɻ", bytes: [201, 187]
268
+ // consonant features: {"position":"Retro-flex","position_index":7,"manner":"Approximant","voiced":true}
269
+ if (max_length > 2) {
270
+ switch(string[cursor + 2]) {
271
+
272
+ case 203:
273
+ if (max_length > 3) {
274
+ switch(string[cursor + 3]) {
275
+
276
+ case 148:
277
+ // Phoneme: "ɻ˔", bytes: [201, 187, 203, 148]
278
+ // consonant features: {"position":"Retro-flex","position_index":7,"manner":"Non-sibilant fricative","voiced":true}
279
+ return 4;
280
+ break;
281
+ }
282
+ } else {
283
+ return 3;
284
+ }
285
+ break;
286
+ case 204:
287
+ if (max_length > 3) {
288
+ switch(string[cursor + 3]) {
289
+
290
+ case 138:
291
+ // Phoneme: "ɻ̊", bytes: [201, 187, 204, 138]
292
+ // consonant features: {"position":"Retro-flex","position_index":7,"manner":"Approximant","voiced":false}
293
+ return 4;
294
+ break;
295
+ }
296
+ } else {
297
+ return 3;
298
+ }
299
+ break;
300
+ default:
301
+ return 2;
302
+ }
303
+ } else {
304
+ return 2;
305
+ }
306
+ break;
307
+ case 163:
308
+ // Phoneme: "ɣ", bytes: [201, 163]
309
+ // consonant features: {"position":"Velar","position_index":9,"manner":"Non-sibilant fricative","voiced":true}
310
+ return 2;
311
+ break;
312
+ case 166:
313
+ // Phoneme: "ɦ", bytes: [201, 166]
314
+ // consonant features: {"position":"Glottal","position_index":12,"manner":"Non-sibilant fricative","voiced":true}
315
+ return 2;
316
+ break;
317
+ case 176:
318
+ // Phoneme: "ɰ", bytes: [201, 176]
319
+ // consonant features: {"position":"Velar","position_index":9,"manner":"Approximant","voiced":true}
320
+ if (max_length > 2) {
321
+ switch(string[cursor + 2]) {
322
+
323
+ case 204:
324
+ if (max_length > 3) {
325
+ switch(string[cursor + 3]) {
326
+
327
+ case 138:
328
+ // Phoneme: "ɰ̊", bytes: [201, 176, 204, 138]
329
+ // consonant features: {"position":"Velar","position_index":9,"manner":"Approximant","voiced":false}
330
+ return 4;
331
+ break;
332
+ }
333
+ } else {
334
+ return 3;
335
+ }
336
+ break;
337
+ default:
338
+ return 2;
339
+ }
340
+ } else {
341
+ return 2;
342
+ }
343
+ break;
344
+ case 190:
345
+ // Phoneme: "ɾ", bytes: [201, 190]
346
+ // consonant features: {"position":"Alveolar","position_index":5,"manner":"Tap/flap","voiced":true}
347
+ if (max_length > 2) {
348
+ switch(string[cursor + 2]) {
349
+
350
+ case 204:
351
+ if (max_length > 3) {
352
+ switch(string[cursor + 3]) {
353
+
354
+ case 188:
355
+ // Phoneme: "ɾ̼", bytes: [201, 190, 204, 188]
356
+ // consonant features: {"position":"Linguo-labial","position_index":3,"manner":"Tap/flap","voiced":true}
357
+ return 4;
358
+ break;
359
+ case 165:
360
+ // Phoneme: "ɾ̥", bytes: [201, 190, 204, 165]
361
+ // consonant features: {"position":"Alveolar","position_index":5,"manner":"Tap/flap","voiced":false}
362
+ return 4;
363
+ break;
364
+ }
365
+ } else {
366
+ return 3;
367
+ }
368
+ break;
369
+ default:
370
+ return 2;
371
+ }
372
+ } else {
373
+ return 2;
374
+ }
375
+ break;
376
+ case 189:
377
+ // Phoneme: "ɽ", bytes: [201, 189]
378
+ // consonant features: {"position":"Retro-flex","position_index":7,"manner":"Tap/flap","voiced":true}
379
+ if (max_length > 2) {
380
+ switch(string[cursor + 2]) {
381
+
382
+ case 204:
383
+ if (max_length > 3) {
384
+ switch(string[cursor + 3]) {
385
+
386
+ case 138:
387
+ // Phoneme: "ɽ̊", bytes: [201, 189, 204, 138]
388
+ // consonant features: {"position":"Retro-flex","position_index":7,"manner":"Tap/flap","voiced":false}
389
+ return 4;
390
+ break;
391
+ }
392
+ } else {
393
+ return 3;
394
+ }
395
+ break;
396
+ default:
397
+ return 2;
398
+ }
399
+ } else {
400
+ return 2;
401
+ }
402
+ break;
403
+ case 172:
404
+ // Phoneme: "ɬ", bytes: [201, 172]
405
+ // consonant features: {"position":"Alveolar","position_index":5,"manner":"Lateral fricative","voiced":false}
406
+ return 2;
407
+ break;
408
+ case 174:
409
+ // Phoneme: "ɮ", bytes: [201, 174]
410
+ // consonant features: {"position":"Alveolar","position_index":5,"manner":"Lateral fricative","voiced":true}
411
+ return 2;
412
+ break;
413
+ case 173:
414
+ // Phoneme: "ɭ", bytes: [201, 173]
415
+ // consonant features: {"position":"Retro-flex","position_index":7,"manner":"Lateral approximant","voiced":true}
416
+ if (max_length > 2) {
417
+ switch(string[cursor + 2]) {
418
+
419
+ case 204:
420
+ if (max_length > 3) {
421
+ switch(string[cursor + 3]) {
422
+
423
+ case 138:
424
+ // Phoneme: "ɭ̊", bytes: [201, 173, 204, 138]
425
+ // consonant features: {"position":"Retro-flex","position_index":7,"manner":"Lateral approximant","voiced":false}
426
+ if (max_length > 4) {
427
+ switch(string[cursor + 4]) {
428
+
429
+ case 203:
430
+ if (max_length > 5) {
431
+ switch(string[cursor + 5]) {
432
+
433
+ case 148:
434
+ // Phoneme: "ɭ̊˔", bytes: [201, 173, 204, 138, 203, 148]
435
+ // consonant features: {"position":"Retro-flex","position_index":7,"manner":"Lateral fricative","voiced":false}
436
+ return 6;
437
+ break;
438
+ }
439
+ } else {
440
+ return 5;
441
+ }
442
+ break;
443
+ default:
444
+ return 4;
445
+ }
446
+ } else {
447
+ return 4;
448
+ }
449
+ break;
450
+ case 134:
451
+ // Phoneme: "ɭ̆", bytes: [201, 173, 204, 134]
452
+ // consonant features: {"position":"Retro-flex","position_index":7,"manner":"Lateral tap/flap","voiced":true}
453
+ return 4;
454
+ break;
455
+ }
456
+ } else {
457
+ return 3;
458
+ }
459
+ break;
460
+ case 203:
461
+ if (max_length > 3) {
462
+ switch(string[cursor + 3]) {
463
+
464
+ case 148:
465
+ // Phoneme: "ɭ˔", bytes: [201, 173, 203, 148]
466
+ // consonant features: {"position":"Retro-flex","position_index":7,"manner":"Lateral fricative","voiced":true}
467
+ return 4;
468
+ break;
469
+ }
470
+ } else {
471
+ return 3;
472
+ }
473
+ break;
474
+ default:
475
+ return 2;
476
+ }
477
+ } else {
478
+ return 2;
479
+ }
480
+ break;
481
+ case 186:
482
+ // Phoneme: "ɺ", bytes: [201, 186]
483
+ // consonant features: {"position":"Alveolar","position_index":5,"manner":"Lateral tap/flap","voiced":true}
484
+ return 2;
485
+ break;
486
+ }
487
+ } else {
488
+ return 1;
489
+ }
490
+ break;
491
+ case 101:
492
+ // Phoneme: "e", bytes: [101]
493
+ // vowel features: {"F1":390,"F2":2300,"rounded":false}
494
+ return 1;
495
+ break;
496
+ case 195:
497
+ if (max_length > 1) {
498
+ switch(string[cursor + 1]) {
499
+
500
+ case 184:
501
+ // Phoneme: "ø", bytes: [195, 184]
502
+ // vowel features: {"F1":370,"F2":1900,"rounded":true}
503
+ return 2;
504
+ break;
505
+ case 166:
506
+ // Phoneme: "æ", bytes: [195, 166]
507
+ // vowel features: {"F1":800,"F2":1900,"rounded":false}
508
+ return 2;
509
+ break;
510
+ case 176:
511
+ // Phoneme: "ð", bytes: [195, 176]
512
+ // consonant features: {"position":"Dental","position_index":4,"manner":"Non-sibilant fricative","voiced":true}
513
+ if (max_length > 2) {
514
+ switch(string[cursor + 2]) {
515
+
516
+ case 204:
517
+ if (max_length > 3) {
518
+ switch(string[cursor + 3]) {
519
+
520
+ case 188:
521
+ // Phoneme: "ð̼", bytes: [195, 176, 204, 188]
522
+ // consonant features: {"position":"Linguo-labial","position_index":3,"manner":"Non-sibilant fricative","voiced":true}
523
+ return 4;
524
+ break;
525
+ case 160:
526
+ // Phoneme: "ð̠", bytes: [195, 176, 204, 160]
527
+ // consonant features: {"position":"Alveolar","position_index":5,"manner":"Non-sibilant fricative","voiced":true}
528
+ return 4;
529
+ break;
530
+ }
531
+ } else {
532
+ return 3;
533
+ }
534
+ break;
535
+ default:
536
+ return 2;
537
+ }
538
+ } else {
539
+ return 2;
540
+ }
541
+ break;
542
+ case 167:
543
+ // Phoneme: "ç", bytes: [195, 167]
544
+ // consonant features: {"position":"Palatal","position_index":8,"manner":"Non-sibilant fricative","voiced":false}
545
+ return 2;
546
+ break;
547
+ }
548
+ } else {
549
+ return 1;
550
+ }
551
+ break;
552
+ case 197:
553
+ if (max_length > 1) {
554
+ switch(string[cursor + 1]) {
555
+
556
+ case 147:
557
+ // Phoneme: "œ", bytes: [197, 147]
558
+ // vowel features: {"F1":585,"F2":1710,"rounded":true}
559
+ return 2;
560
+ break;
561
+ case 139:
562
+ // Phoneme: "ŋ", bytes: [197, 139]
563
+ // consonant features: {"position":"Velar","position_index":9,"manner":"Nasal","voiced":true}
564
+ if (max_length > 2) {
565
+ switch(string[cursor + 2]) {
566
+
567
+ case 204:
568
+ if (max_length > 3) {
569
+ switch(string[cursor + 3]) {
570
+
571
+ case 138:
572
+ // Phoneme: "ŋ̊", bytes: [197, 139, 204, 138]
573
+ // consonant features: {"position":"Velar","position_index":9,"manner":"Nasal","voiced":false}
574
+ return 4;
575
+ break;
576
+ }
577
+ } else {
578
+ return 3;
579
+ }
580
+ break;
581
+ default:
582
+ return 2;
583
+ }
584
+ } else {
585
+ return 2;
586
+ }
587
+ break;
588
+ }
589
+ } else {
590
+ return 1;
591
+ }
592
+ break;
593
+ case 97:
594
+ // Phoneme: "a", bytes: [97]
595
+ // vowel features: {"F1":850,"F2":1610,"rounded":false}
596
+ return 1;
597
+ break;
598
+ case 202:
599
+ if (max_length > 1) {
600
+ switch(string[cursor + 1]) {
601
+
602
+ case 140:
603
+ // Phoneme: "ʌ", bytes: [202, 140]
604
+ // vowel features: {"F1":600,"F2":1170,"rounded":false}
605
+ return 2;
606
+ break;
607
+ case 138:
608
+ // Phoneme: "ʊ", bytes: [202, 138]
609
+ // vowel features: {"F1":350,"F2":650,"rounded":true}
610
+ return 2;
611
+ break;
612
+ case 136:
613
+ // Phoneme: "ʈ", bytes: [202, 136]
614
+ // consonant features: {"position":"Retro-flex","position_index":7,"manner":"Stop","voiced":false}
615
+ return 2;
616
+ break;
617
+ case 161:
618
+ // Phoneme: "ʡ", bytes: [202, 161]
619
+ // consonant features: {"position":"Pharyngeal","position_index":11,"manner":"Stop","voiced":false}
620
+ if (max_length > 2) {
621
+ switch(string[cursor + 2]) {
622
+
623
+ case 204:
624
+ if (max_length > 3) {
625
+ switch(string[cursor + 3]) {
626
+
627
+ case 134:
628
+ // Phoneme: "ʡ̆", bytes: [202, 161, 204, 134]
629
+ // consonant features: {"position":"Pharyngeal","position_index":11,"manner":"Tap/flap","voiced":true}
630
+ return 4;
631
+ break;
632
+ }
633
+ } else {
634
+ return 3;
635
+ }
636
+ break;
637
+ default:
638
+ return 2;
639
+ }
640
+ } else {
641
+ return 2;
642
+ }
643
+ break;
644
+ case 148:
645
+ // Phoneme: "ʔ", bytes: [202, 148]
646
+ // consonant features: {"position":"Glottal","position_index":12,"manner":"Stop","voiced":false}
647
+ if (max_length > 2) {
648
+ switch(string[cursor + 2]) {
649
+
650
+ case 204:
651
+ if (max_length > 3) {
652
+ switch(string[cursor + 3]) {
653
+
654
+ case 158:
655
+ // Phoneme: "ʔ̞", bytes: [202, 148, 204, 158]
656
+ // consonant features: {"position":"Glottal","position_index":12,"manner":"Approximant","voiced":true}
657
+ return 4;
658
+ break;
659
+ }
660
+ } else {
661
+ return 3;
662
+ }
663
+ break;
664
+ default:
665
+ return 2;
666
+ }
667
+ } else {
668
+ return 2;
669
+ }
670
+ break;
671
+ case 131:
672
+ // Phoneme: "ʃ", bytes: [202, 131]
673
+ // consonant features: {"position":"Post-alveolar","position_index":6,"manner":"Sibilant fricative","voiced":false}
674
+ return 2;
675
+ break;
676
+ case 146:
677
+ // Phoneme: "ʒ", bytes: [202, 146]
678
+ // consonant features: {"position":"Post-alveolar","position_index":6,"manner":"Sibilant fricative","voiced":true}
679
+ return 2;
680
+ break;
681
+ case 130:
682
+ // Phoneme: "ʂ", bytes: [202, 130]
683
+ // consonant features: {"position":"Retro-flex","position_index":7,"manner":"Sibilant fricative","voiced":false}
684
+ return 2;
685
+ break;
686
+ case 144:
687
+ // Phoneme: "ʐ", bytes: [202, 144]
688
+ // consonant features: {"position":"Retro-flex","position_index":7,"manner":"Sibilant fricative","voiced":true}
689
+ return 2;
690
+ break;
691
+ case 145:
692
+ // Phoneme: "ʑ", bytes: [202, 145]
693
+ // consonant features: {"position":"Palatal","position_index":8,"manner":"Sibilant fricative","voiced":true}
694
+ return 2;
695
+ break;
696
+ case 157:
697
+ // Phoneme: "ʝ", bytes: [202, 157]
698
+ // consonant features: {"position":"Palatal","position_index":8,"manner":"Non-sibilant fricative","voiced":true}
699
+ return 2;
700
+ break;
701
+ case 129:
702
+ // Phoneme: "ʁ", bytes: [202, 129]
703
+ // consonant features: {"position":"Uvular","position_index":10,"manner":"Non-sibilant fricative","voiced":true}
704
+ return 2;
705
+ break;
706
+ case 149:
707
+ // Phoneme: "ʕ", bytes: [202, 149]
708
+ // consonant features: {"position":"Pharyngeal","position_index":11,"manner":"Non-sibilant fricative","voiced":true}
709
+ return 2;
710
+ break;
711
+ case 139:
712
+ // Phoneme: "ʋ", bytes: [202, 139]
713
+ // consonant features: {"position":"Labio-dental","position_index":2,"manner":"Approximant","voiced":true}
714
+ if (max_length > 2) {
715
+ switch(string[cursor + 2]) {
716
+
717
+ case 204:
718
+ if (max_length > 3) {
719
+ switch(string[cursor + 3]) {
720
+
721
+ case 165:
722
+ // Phoneme: "ʋ̥", bytes: [202, 139, 204, 165]
723
+ // consonant features: {"position":"Labio-dental","position_index":2,"manner":"Approximant","voiced":false}
724
+ return 4;
725
+ break;
726
+ }
727
+ } else {
728
+ return 3;
729
+ }
730
+ break;
731
+ default:
732
+ return 2;
733
+ }
734
+ } else {
735
+ return 2;
736
+ }
737
+ break;
738
+ case 153:
739
+ // Phoneme: "ʙ", bytes: [202, 153]
740
+ // consonant features: {"position":"Bi-labial","position_index":1,"manner":"Trill","voiced":true}
741
+ if (max_length > 2) {
742
+ switch(string[cursor + 2]) {
743
+
744
+ case 204:
745
+ if (max_length > 3) {
746
+ switch(string[cursor + 3]) {
747
+
748
+ case 165:
749
+ // Phoneme: "ʙ̥", bytes: [202, 153, 204, 165]
750
+ // consonant features: {"position":"Bi-labial","position_index":1,"manner":"Trill","voiced":false}
751
+ return 4;
752
+ break;
753
+ }
754
+ } else {
755
+ return 3;
756
+ }
757
+ break;
758
+ default:
759
+ return 2;
760
+ }
761
+ } else {
762
+ return 2;
763
+ }
764
+ break;
765
+ case 128:
766
+ // Phoneme: "ʀ", bytes: [202, 128]
767
+ // consonant features: {"position":"Uvular","position_index":10,"manner":"Trill","voiced":true}
768
+ if (max_length > 2) {
769
+ switch(string[cursor + 2]) {
770
+
771
+ case 204:
772
+ if (max_length > 3) {
773
+ switch(string[cursor + 3]) {
774
+
775
+ case 165:
776
+ // Phoneme: "ʀ̥", bytes: [202, 128, 204, 165]
777
+ // consonant features: {"position":"Uvular","position_index":10,"manner":"Trill","voiced":false}
778
+ return 4;
779
+ break;
780
+ }
781
+ } else {
782
+ return 3;
783
+ }
784
+ break;
785
+ default:
786
+ return 2;
787
+ }
788
+ } else {
789
+ return 2;
790
+ }
791
+ break;
792
+ case 156:
793
+ // Phoneme: "ʜ", bytes: [202, 156]
794
+ // consonant features: {"position":"Pharyngeal","position_index":11,"manner":"Trill","voiced":false}
795
+ return 2;
796
+ break;
797
+ case 162:
798
+ // Phoneme: "ʢ", bytes: [202, 162]
799
+ // consonant features: {"position":"Pharyngeal","position_index":11,"manner":"Trill","voiced":true}
800
+ return 2;
801
+ break;
802
+ case 142:
803
+ // Phoneme: "ʎ", bytes: [202, 142]
804
+ // consonant features: {"position":"Palatal","position_index":8,"manner":"Lateral approximant","voiced":true}
805
+ if (max_length > 2) {
806
+ switch(string[cursor + 2]) {
807
+
808
+ case 204:
809
+ if (max_length > 3) {
810
+ switch(string[cursor + 3]) {
811
+
812
+ case 157:
813
+ // Phoneme: "ʎ̝", bytes: [202, 142, 204, 157]
814
+ // consonant features: {"position":"Palatal","position_index":8,"manner":"Lateral fricative","voiced":true}
815
+ if (max_length > 4) {
816
+ switch(string[cursor + 4]) {
817
+
818
+ case 204:
819
+ if (max_length > 5) {
820
+ switch(string[cursor + 5]) {
821
+
822
+ case 138:
823
+ // Phoneme: "ʎ̝̊", bytes: [202, 142, 204, 157, 204, 138]
824
+ // consonant features: {"position":"Palatal","position_index":8,"manner":"Lateral fricative","voiced":false}
825
+ return 6;
826
+ break;
827
+ }
828
+ } else {
829
+ return 5;
830
+ }
831
+ break;
832
+ default:
833
+ return 4;
834
+ }
835
+ } else {
836
+ return 4;
837
+ }
838
+ break;
839
+ case 165:
840
+ // Phoneme: "ʎ̥", bytes: [202, 142, 204, 165]
841
+ // consonant features: {"position":"Palatal","position_index":8,"manner":"Lateral approximant","voiced":false}
842
+ return 4;
843
+ break;
844
+ case 134:
845
+ // Phoneme: "ʎ̆", bytes: [202, 142, 204, 134]
846
+ // consonant features: {"position":"Palatal","position_index":8,"manner":"Lateral tap/flap","voiced":true}
847
+ return 4;
848
+ break;
849
+ }
850
+ } else {
851
+ return 3;
852
+ }
853
+ break;
854
+ default:
855
+ return 2;
856
+ }
857
+ } else {
858
+ return 2;
859
+ }
860
+ break;
861
+ case 159:
862
+ // Phoneme: "ʟ", bytes: [202, 159]
863
+ // consonant features: {"position":"Velar","position_index":9,"manner":"Lateral approximant","voiced":true}
864
+ if (max_length > 2) {
865
+ switch(string[cursor + 2]) {
866
+
867
+ case 204:
868
+ if (max_length > 3) {
869
+ switch(string[cursor + 3]) {
870
+
871
+ case 157:
872
+ // Phoneme: "ʟ̝", bytes: [202, 159, 204, 157]
873
+ // consonant features: {"position":"Velar","position_index":9,"manner":"Lateral fricative","voiced":true}
874
+ if (max_length > 4) {
875
+ switch(string[cursor + 4]) {
876
+
877
+ case 204:
878
+ if (max_length > 5) {
879
+ switch(string[cursor + 5]) {
880
+
881
+ case 138:
882
+ // Phoneme: "ʟ̝̊", bytes: [202, 159, 204, 157, 204, 138]
883
+ // consonant features: {"position":"Velar","position_index":9,"manner":"Lateral fricative","voiced":false}
884
+ return 6;
885
+ break;
886
+ }
887
+ } else {
888
+ return 5;
889
+ }
890
+ break;
891
+ default:
892
+ return 4;
893
+ }
894
+ } else {
895
+ return 4;
896
+ }
897
+ break;
898
+ case 165:
899
+ // Phoneme: "ʟ̥", bytes: [202, 159, 204, 165]
900
+ // consonant features: {"position":"Velar","position_index":9,"manner":"Lateral approximant","voiced":false}
901
+ return 4;
902
+ break;
903
+ case 160:
904
+ // Phoneme: "ʟ̠", bytes: [202, 159, 204, 160]
905
+ // consonant features: {"position":"Uvular","position_index":10,"manner":"Lateral approximant","voiced":true}
906
+ return 4;
907
+ break;
908
+ case 134:
909
+ // Phoneme: "ʟ̆", bytes: [202, 159, 204, 134]
910
+ // consonant features: {"position":"Velar","position_index":9,"manner":"Lateral tap/flap","voiced":true}
911
+ return 4;
912
+ break;
913
+ }
914
+ } else {
915
+ return 3;
916
+ }
917
+ break;
918
+ default:
919
+ return 2;
920
+ }
921
+ } else {
922
+ return 2;
923
+ }
924
+ break;
925
+ }
926
+ } else {
927
+ return 1;
928
+ }
929
+ break;
930
+ case 111:
931
+ // Phoneme: "o", bytes: [111]
932
+ // vowel features: {"F1":360,"F2":640,"rounded":true}
933
+ return 1;
934
+ break;
935
+ case 117:
936
+ // Phoneme: "u", bytes: [117]
937
+ // vowel features: {"F1":350,"F2":650,"rounded":true}
938
+ return 1;
939
+ break;
940
+ case 109:
941
+ // Phoneme: "m", bytes: [109]
942
+ // consonant features: {"position":"Bi-labial","position_index":1,"manner":"Nasal","voiced":true}
943
+ if (max_length > 1) {
944
+ switch(string[cursor + 1]) {
945
+
946
+ case 204:
947
+ if (max_length > 2) {
948
+ switch(string[cursor + 2]) {
949
+
950
+ case 165:
951
+ // Phoneme: "m̥", bytes: [109, 204, 165]
952
+ // consonant features: {"position":"Bi-labial","position_index":1,"manner":"Nasal","voiced":false}
953
+ return 3;
954
+ break;
955
+ }
956
+ } else {
957
+ return 2;
958
+ }
959
+ break;
960
+ default:
961
+ return 1;
962
+ }
963
+ } else {
964
+ return 1;
965
+ }
966
+ break;
967
+ case 110:
968
+ // Phoneme: "n", bytes: [110]
969
+ // consonant features: {"position":"Alveolar","position_index":5,"manner":"Nasal","voiced":true}
970
+ if (max_length > 1) {
971
+ switch(string[cursor + 1]) {
972
+
973
+ case 204:
974
+ if (max_length > 2) {
975
+ switch(string[cursor + 2]) {
976
+
977
+ case 188:
978
+ // Phoneme: "n̼", bytes: [110, 204, 188]
979
+ // consonant features: {"position":"Linguo-labial","position_index":3,"manner":"Nasal","voiced":true}
980
+ return 3;
981
+ break;
982
+ case 165:
983
+ // Phoneme: "n̥", bytes: [110, 204, 165]
984
+ // consonant features: {"position":"Alveolar","position_index":5,"manner":"Nasal","voiced":false}
985
+ return 3;
986
+ break;
987
+ }
988
+ } else {
989
+ return 2;
990
+ }
991
+ break;
992
+ default:
993
+ return 1;
994
+ }
995
+ } else {
996
+ return 1;
997
+ }
998
+ break;
999
+ case 112:
1000
+ // Phoneme: "p", bytes: [112]
1001
+ // consonant features: {"position":"Bi-labial","position_index":1,"manner":"Stop","voiced":false}
1002
+ if (max_length > 1) {
1003
+ switch(string[cursor + 1]) {
1004
+
1005
+ case 204:
1006
+ if (max_length > 2) {
1007
+ switch(string[cursor + 2]) {
1008
+
1009
+ case 170:
1010
+ // Phoneme: "p̪", bytes: [112, 204, 170]
1011
+ // consonant features: {"position":"Labio-dental","position_index":2,"manner":"Stop","voiced":false}
1012
+ return 3;
1013
+ break;
1014
+ }
1015
+ } else {
1016
+ return 2;
1017
+ }
1018
+ break;
1019
+ default:
1020
+ return 1;
1021
+ }
1022
+ } else {
1023
+ return 1;
1024
+ }
1025
+ break;
1026
+ case 98:
1027
+ // Phoneme: "b", bytes: [98]
1028
+ // consonant features: {"position":"Bi-labial","position_index":1,"manner":"Stop","voiced":true}
1029
+ if (max_length > 1) {
1030
+ switch(string[cursor + 1]) {
1031
+
1032
+ case 204:
1033
+ if (max_length > 2) {
1034
+ switch(string[cursor + 2]) {
1035
+
1036
+ case 170:
1037
+ // Phoneme: "b̪", bytes: [98, 204, 170]
1038
+ // consonant features: {"position":"Labio-dental","position_index":2,"manner":"Stop","voiced":true}
1039
+ return 3;
1040
+ break;
1041
+ }
1042
+ } else {
1043
+ return 2;
1044
+ }
1045
+ break;
1046
+ default:
1047
+ return 1;
1048
+ }
1049
+ } else {
1050
+ return 1;
1051
+ }
1052
+ break;
1053
+ case 116:
1054
+ // Phoneme: "t", bytes: [116]
1055
+ // consonant features: {"position":"Alveolar","position_index":5,"manner":"Stop","voiced":false}
1056
+ if (max_length > 1) {
1057
+ switch(string[cursor + 1]) {
1058
+
1059
+ case 204:
1060
+ if (max_length > 2) {
1061
+ switch(string[cursor + 2]) {
1062
+
1063
+ case 188:
1064
+ // Phoneme: "t̼", bytes: [116, 204, 188]
1065
+ // consonant features: {"position":"Linguo-labial","position_index":3,"manner":"Stop","voiced":false}
1066
+ return 3;
1067
+ break;
1068
+ }
1069
+ } else {
1070
+ return 2;
1071
+ }
1072
+ break;
1073
+ default:
1074
+ return 1;
1075
+ }
1076
+ } else {
1077
+ return 1;
1078
+ }
1079
+ break;
1080
+ case 100:
1081
+ // Phoneme: "d", bytes: [100]
1082
+ // consonant features: {"position":"Alveolar","position_index":5,"manner":"Stop","voiced":true}
1083
+ if (max_length > 1) {
1084
+ switch(string[cursor + 1]) {
1085
+
1086
+ case 204:
1087
+ if (max_length > 2) {
1088
+ switch(string[cursor + 2]) {
1089
+
1090
+ case 188:
1091
+ // Phoneme: "d̼", bytes: [100, 204, 188]
1092
+ // consonant features: {"position":"Linguo-labial","position_index":3,"manner":"Stop","voiced":true}
1093
+ return 3;
1094
+ break;
1095
+ }
1096
+ } else {
1097
+ return 2;
1098
+ }
1099
+ break;
1100
+ default:
1101
+ return 1;
1102
+ }
1103
+ } else {
1104
+ return 1;
1105
+ }
1106
+ break;
1107
+ case 99:
1108
+ // Phoneme: "c", bytes: [99]
1109
+ // consonant features: {"position":"Palatal","position_index":8,"manner":"Stop","voiced":false}
1110
+ return 1;
1111
+ break;
1112
+ case 107:
1113
+ // Phoneme: "k", bytes: [107]
1114
+ // consonant features: {"position":"Velar","position_index":9,"manner":"Stop","voiced":false}
1115
+ return 1;
1116
+ break;
1117
+ case 103:
1118
+ // Phoneme: "g", bytes: [103]
1119
+ // consonant features: {"position":"Velar","position_index":9,"manner":"Stop","voiced":true}
1120
+ return 1;
1121
+ break;
1122
+ case 113:
1123
+ // Phoneme: "q", bytes: [113]
1124
+ // consonant features: {"position":"Uvular","position_index":10,"manner":"Stop","voiced":false}
1125
+ return 1;
1126
+ break;
1127
+ case 115:
1128
+ // Phoneme: "s", bytes: [115]
1129
+ // consonant features: {"position":"Alveolar","position_index":5,"manner":"Sibilant fricative","voiced":false}
1130
+ return 1;
1131
+ break;
1132
+ case 122:
1133
+ // Phoneme: "z", bytes: [122]
1134
+ // consonant features: {"position":"Alveolar","position_index":5,"manner":"Sibilant fricative","voiced":true}
1135
+ return 1;
1136
+ break;
1137
+ case 206:
1138
+ if (max_length > 1) {
1139
+ switch(string[cursor + 1]) {
1140
+
1141
+ case 178:
1142
+ // Phoneme: "β", bytes: [206, 178]
1143
+ // consonant features: {"position":"Bi-labial","position_index":1,"manner":"Non-sibilant fricative","voiced":true}
1144
+ return 2;
1145
+ break;
1146
+ case 184:
1147
+ // Phoneme: "θ", bytes: [206, 184]
1148
+ // consonant features: {"position":"Dental","position_index":4,"manner":"Non-sibilant fricative","voiced":false}
1149
+ if (max_length > 2) {
1150
+ switch(string[cursor + 2]) {
1151
+
1152
+ case 204:
1153
+ if (max_length > 3) {
1154
+ switch(string[cursor + 3]) {
1155
+
1156
+ case 188:
1157
+ // Phoneme: "θ̼", bytes: [206, 184, 204, 188]
1158
+ // consonant features: {"position":"Linguo-labial","position_index":3,"manner":"Non-sibilant fricative","voiced":false}
1159
+ return 4;
1160
+ break;
1161
+ case 160:
1162
+ // Phoneme: "θ̠", bytes: [206, 184, 204, 160]
1163
+ // consonant features: {"position":"Alveolar","position_index":5,"manner":"Non-sibilant fricative","voiced":false}
1164
+ return 4;
1165
+ break;
1166
+ }
1167
+ } else {
1168
+ return 3;
1169
+ }
1170
+ break;
1171
+ default:
1172
+ return 2;
1173
+ }
1174
+ } else {
1175
+ return 2;
1176
+ }
1177
+ break;
1178
+ }
1179
+ } else {
1180
+ return 1;
1181
+ }
1182
+ break;
1183
+ case 102:
1184
+ // Phoneme: "f", bytes: [102]
1185
+ // consonant features: {"position":"Labio-dental","position_index":2,"manner":"Non-sibilant fricative","voiced":false}
1186
+ return 1;
1187
+ break;
1188
+ case 118:
1189
+ // Phoneme: "v", bytes: [118]
1190
+ // consonant features: {"position":"Labio-dental","position_index":2,"manner":"Non-sibilant fricative","voiced":true}
1191
+ return 1;
1192
+ break;
1193
+ case 120:
1194
+ // Phoneme: "x", bytes: [120]
1195
+ // consonant features: {"position":"Velar","position_index":9,"manner":"Non-sibilant fricative","voiced":false}
1196
+ return 1;
1197
+ break;
1198
+ case 207:
1199
+ if (max_length > 1) {
1200
+ switch(string[cursor + 1]) {
1201
+
1202
+ case 135:
1203
+ // Phoneme: "χ", bytes: [207, 135]
1204
+ // consonant features: {"position":"Uvular","position_index":10,"manner":"Non-sibilant fricative","voiced":false}
1205
+ return 2;
1206
+ break;
1207
+ }
1208
+ } else {
1209
+ return 1;
1210
+ }
1211
+ break;
1212
+ case 196:
1213
+ if (max_length > 1) {
1214
+ switch(string[cursor + 1]) {
1215
+
1216
+ case 167:
1217
+ // Phoneme: "ħ", bytes: [196, 167]
1218
+ // consonant features: {"position":"Pharyngeal","position_index":11,"manner":"Non-sibilant fricative","voiced":false}
1219
+ return 2;
1220
+ break;
1221
+ }
1222
+ } else {
1223
+ return 1;
1224
+ }
1225
+ break;
1226
+ case 104:
1227
+ // Phoneme: "h", bytes: [104]
1228
+ // consonant features: {"position":"Glottal","position_index":12,"manner":"Non-sibilant fricative","voiced":false}
1229
+ return 1;
1230
+ break;
1231
+ case 119:
1232
+ // Phoneme: "w", bytes: [119]
1233
+ // consonant features: {"position":"Labio-velar","position_index":0,"manner":"Approximant","voiced":true}
1234
+ return 1;
1235
+ break;
1236
+ case 106:
1237
+ // Phoneme: "j", bytes: [106]
1238
+ // consonant features: {"position":"Palatal","position_index":8,"manner":"Approximant","voiced":true}
1239
+ if (max_length > 1) {
1240
+ switch(string[cursor + 1]) {
1241
+
1242
+ case 204:
1243
+ if (max_length > 2) {
1244
+ switch(string[cursor + 2]) {
1245
+
1246
+ case 138:
1247
+ // Phoneme: "j̊", bytes: [106, 204, 138]
1248
+ // consonant features: {"position":"Palatal","position_index":8,"manner":"Approximant","voiced":false}
1249
+ return 3;
1250
+ break;
1251
+ }
1252
+ } else {
1253
+ return 2;
1254
+ }
1255
+ break;
1256
+ default:
1257
+ return 1;
1258
+ }
1259
+ } else {
1260
+ return 1;
1261
+ }
1262
+ break;
1263
+ case 226:
1264
+ if (max_length > 1) {
1265
+ switch(string[cursor + 1]) {
1266
+
1267
+ case 177:
1268
+ if (max_length > 2) {
1269
+ switch(string[cursor + 2]) {
1270
+
1271
+ case 177:
1272
+ // Phoneme: "ⱱ", bytes: [226, 177, 177]
1273
+ // consonant features: {"position":"Labio-dental","position_index":2,"manner":"Tap/flap","voiced":true}
1274
+ if (max_length > 3) {
1275
+ switch(string[cursor + 3]) {
1276
+
1277
+ case 204:
1278
+ if (max_length > 4) {
1279
+ switch(string[cursor + 4]) {
1280
+
1281
+ case 159:
1282
+ // Phoneme: "ⱱ̟", bytes: [226, 177, 177, 204, 159]
1283
+ // consonant features: {"position":"Bi-labial","position_index":1,"manner":"Tap/flap","voiced":true}
1284
+ return 5;
1285
+ break;
1286
+ }
1287
+ } else {
1288
+ return 4;
1289
+ }
1290
+ break;
1291
+ default:
1292
+ return 3;
1293
+ }
1294
+ } else {
1295
+ return 3;
1296
+ }
1297
+ break;
1298
+ }
1299
+ } else {
1300
+ return 2;
1301
+ }
1302
+ break;
1303
+ }
1304
+ } else {
1305
+ return 1;
1306
+ }
1307
+ break;
1308
+ case 114:
1309
+ // Phoneme: "r", bytes: [114]
1310
+ // consonant features: {"position":"Alveolar","position_index":5,"manner":"Trill","voiced":true}
1311
+ if (max_length > 1) {
1312
+ switch(string[cursor + 1]) {
1313
+
1314
+ case 204:
1315
+ if (max_length > 2) {
1316
+ switch(string[cursor + 2]) {
1317
+
1318
+ case 165:
1319
+ // Phoneme: "r̥", bytes: [114, 204, 165]
1320
+ // consonant features: {"position":"Alveolar","position_index":5,"manner":"Trill","voiced":false}
1321
+ return 3;
1322
+ break;
1323
+ }
1324
+ } else {
1325
+ return 2;
1326
+ }
1327
+ break;
1328
+ default:
1329
+ return 1;
1330
+ }
1331
+ } else {
1332
+ return 1;
1333
+ }
1334
+ break;
1335
+ case 108:
1336
+ // Phoneme: "l", bytes: [108]
1337
+ // consonant features: {"position":"Alveolar","position_index":5,"manner":"Lateral approximant","voiced":true}
1338
+ if (max_length > 1) {
1339
+ switch(string[cursor + 1]) {
1340
+
1341
+ case 204:
1342
+ if (max_length > 2) {
1343
+ switch(string[cursor + 2]) {
1344
+
1345
+ case 165:
1346
+ // Phoneme: "l̥", bytes: [108, 204, 165]
1347
+ // consonant features: {"position":"Alveolar","position_index":5,"manner":"Lateral approximant","voiced":false}
1348
+ return 3;
1349
+ break;
1350
+ }
1351
+ } else {
1352
+ return 2;
1353
+ }
1354
+ break;
1355
+ default:
1356
+ return 1;
1357
+ }
1358
+ } else {
1359
+ return 1;
1360
+ }
1361
+ break;
1362
+ }
1363
+ return 0;
1364
+ }