phonetics 1.5.4 → 1.8.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: e8f26a3fc9c4a6729a0ddd04beee049efc4d1c0e8e2b887cfd14974de71d989a
4
- data.tar.gz: 4449ecff87444214065a0f964c2cd7a0ba2ce93f52b3cbeae14e4777afcba3df
3
+ metadata.gz: '05877cd48ad83d92a29dbcf60a5bd73373c7ecf974387487d37b719e4df0db4d'
4
+ data.tar.gz: d400d6249ac9204e920e8bc617b00b21009a4d15aeb719e589a100482114a927
5
5
  SHA512:
6
- metadata.gz: 4309942350149685324fa56a9c642f9456c7abe1993ab7f0801fbf56883afc4e31750e969e6d49e3da0ecd7b16a8d93b9212f5469ddc1dcd4c86699d0f48cd46
7
- data.tar.gz: 8007ae8ef77e2d9e5f055fc597c4c5ffbd9ee00007d9d31ed4d06ae96e9980bc2dfc92149c96bd94098faa7d0670c5d7cbd13a30f45acd8d8c6000853692f341
6
+ metadata.gz: 7bfd7f82c7f579e377f8d7669480c7cf4d73cf1c1b1259db1e19d4bbb9c8573480a7825ca51df6e4f600feba67ecfee10a2f9b66a49a9f33e888f94b04be9528
7
+ data.tar.gz: 3ddc3f72fbec2a0f833512b1d84c6d05d30c90db63e48ceb7f44c19cd7df43205517079b4cda113e9d80fe1a1c27586771eaf575c423beec2fa46a55a504cfd0
data/Rakefile CHANGED
@@ -5,23 +5,34 @@ require 'rake/extensiontask'
5
5
  require 'rspec/core/rake_task'
6
6
  require 'rubocop/rake_task'
7
7
 
8
+ EXT_PATH = 'ext/c_levenshtein'
9
+
8
10
  Rake::ExtensionTask.new('c_levenshtein') do |extension|
9
- extension.ext_dir = 'ext/c_levenshtein'
11
+ extension.ext_dir = EXT_PATH
10
12
  extension.lib_dir = 'lib/phonetics'
11
13
  end
12
14
 
13
- PHONETIC_COST_C_EXTENSION = File.expand_path('ext/c_levenshtein/phonetic_cost.c', __dir__)
15
+ PHONETIC_COST_C_EXTENSION = File.join(EXT_PATH, 'phonetic_cost.c')
16
+ NEXT_PHONEME_LENGTH_C_EXTENSION = File.join(EXT_PATH, 'next_phoneme_length.c')
17
+
18
+ require_relative './lib/phonetics/code_generator'
14
19
 
15
- namespace :compile do
16
- desc 'Write phonetic_cost.c using Phonetic values'
17
- task :phonetic_cost do
18
- require_relative './lib/phonetics'
19
- file = File.open(PHONETIC_COST_C_EXTENSION, 'w')
20
- Phonetics.generate_phonetic_cost_c_code(file)
21
- puts "Wrote #{PHONETIC_COST_C_EXTENSION}"
22
- end
20
+ desc 'Write phonetic_cost.c using Phonetic values'
21
+ task PHONETIC_COST_C_EXTENSION do
22
+ file = File.open(PHONETIC_COST_C_EXTENSION, 'w')
23
+ Phonetics::CodeGenerator.new(file).generate_phonetic_cost_c_code
24
+ puts "Wrote #{PHONETIC_COST_C_EXTENSION}"
23
25
  end
24
- task compile: 'compile:phonetic_cost'
26
+
27
+ desc 'Write phonemes.c using a lookup table of byte arrays'
28
+ task NEXT_PHONEME_LENGTH_C_EXTENSION do
29
+ file = File.open(NEXT_PHONEME_LENGTH_C_EXTENSION, 'w')
30
+ Phonetics::CodeGenerator.new(file).generate_next_phoneme_length_c_code
31
+ puts "Wrote #{NEXT_PHONEME_LENGTH_C_EXTENSION}"
32
+ end
33
+
34
+ task compile: PHONETIC_COST_C_EXTENSION
35
+ task compile: NEXT_PHONEME_LENGTH_C_EXTENSION
25
36
 
26
37
  RSpec::Core::RakeTask.new(:spec)
27
38
 
data/VERSION CHANGED
@@ -1 +1 @@
1
- 1.5.4
1
+ 1.8.0
@@ -0,0 +1,13 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ $LOAD_PATH << '../lib'
5
+
6
+ require 'bundler/setup'
7
+ require 'pry-byebug'
8
+ require 'phonetics'
9
+ require 'phonetics/levenshtein'
10
+ require 'phonetics/ruby_levenshtein'
11
+ require 'phonetics/code_generator'
12
+
13
+ Phonetics.pry
@@ -1,5 +1,9 @@
1
- #include "ruby.h"
2
1
  #include <stdbool.h>
2
+ #include "ruby.h"
3
+ #include "ruby/encoding.h"
4
+ #include "ruby/re.h"
5
+ #include "./phonemes.h"
6
+ #include "./next_phoneme_length.h"
3
7
  #include "./phonetic_cost.h"
4
8
 
5
9
  #define debug(M, ...) if (verbose) printf(M, ##__VA_ARGS__)
@@ -10,8 +14,8 @@ VALUE Binding = Qnil;
10
14
 
11
15
  void Init_c_levenshtein();
12
16
 
13
- void set_initial(double *d, int *string1, int string1_length, int *string2, int string2_length, bool verbose);
14
- void print_matrix(double *d, int *string1, int string1_length, int *string2, int string2_length, bool verbose);
17
+ void set_initial(double *d, int *string1, int string1_phoneme_count, int *string1_phoneme_sizes, int *string2, int string2_phoneme_count, int *string2_phoneme_sizes, bool verbose);
18
+ void print_matrix(double *d, int *string1, int string1_phoneme_count, int *string1_phoneme_sizes, int *string2, int string2_phoneme_count, int *string2_phoneme_sizes, bool verbose);
15
19
  VALUE method_internal_phonetic_distance(VALUE self, VALUE _string1, VALUE _string2, VALUE _verbose);
16
20
 
17
21
  /* Function implemitations */
@@ -22,15 +26,23 @@ void Init_c_levenshtein() {
22
26
  }
23
27
 
24
28
  VALUE method_internal_phonetic_distance(VALUE self, VALUE _string1, VALUE _string2, VALUE _verbose){
29
+ if (!RB_TYPE_P(_string1, T_STRING)) {
30
+ rb_raise(rb_eArgError, "must pass string as first argument");
31
+ }
32
+ if (!RB_TYPE_P(_string2, T_STRING)) {
33
+ rb_raise(rb_eArgError, "must pass string as second argument");
34
+ }
25
35
 
26
- VALUE *string1_ruby = RARRAY_PTR(_string1);
27
- VALUE *string2_ruby = RARRAY_PTR(_string2);
28
36
  bool verbose = _verbose;
29
- int string1_length = (int) RARRAY_LEN(_string1);
30
- int string2_length = (int) RARRAY_LEN(_string2);
31
- // We name them as 'strings' but in C-land we're representing our strings as
32
- // arrays of `int`s, where each int represents a consistent (if unusual)
33
- // encoding of a grapheme cluster (a symbol for a phoneme).
37
+ int string1_length = (int) RSTRING_LEN(_string1);
38
+ int string2_length = (int) RSTRING_LEN(_string2);
39
+
40
+ // Given the input strings, we count the phonemes in each and store both the
41
+ // total and, in a phoneme_sizes array, the length of each.
42
+ int string1_phoneme_count = 0;
43
+ int string2_phoneme_count = 0;
44
+ int string1_phoneme_sizes[string1_length + 1];
45
+ int string2_phoneme_sizes[string2_length + 1];
34
46
  int string1[string1_length + 1];
35
47
  int string2[string2_length + 1];
36
48
 
@@ -42,86 +54,85 @@ VALUE method_internal_phonetic_distance(VALUE self, VALUE _string1, VALUE _strin
42
54
  insert, replace,
43
55
  cost;
44
56
  int i, j; // Frequently overwritten loop vars
45
-
46
- // Guard clause for two empty strings
47
- if (string1_length == 0 && string2_length == 0)
48
- return DBL2NUM(0.0);
49
-
50
- //
51
- // Intial data setup
52
- //
57
+ int string1_offset = 0;
58
+ int string2_offset = 0;
53
59
 
54
60
  for (i = 0; i < string1_length; i++) {
55
- string1[i] = NUM2INT(string1_ruby[i]);
56
- debug("string1[%d] = %d\n", i, string1[i]);
61
+ string1[i] = (RSTRING_PTR(_string1)[i] & 0xff);
57
62
  }
58
- for (j = 0; j < string2_length; j++) {
59
- string2[j] = NUM2INT(string2_ruby[j]);
60
- debug("string2[%d] = %d\n", i, string2[j]);
63
+ for (i = 0; i < string2_length; i++) {
64
+ string2[i] = RSTRING_PTR(_string2)[i] & 0xff;
61
65
  }
62
66
 
63
- // one-dimensional representation of 2 dimentional array len(string1)+1 *
64
- // len(string2)+1
65
- d = malloc((sizeof(double)) * (string1_length+1) * (string2_length+1));
67
+ find_phonemes(string1, string1_length, &string1_phoneme_count, string1_phoneme_sizes);
68
+ find_phonemes(string2, string2_length, &string2_phoneme_count, string2_phoneme_sizes);
66
69
 
67
- //
68
- // Fill in the (flattened) matrix using the Levenshtein algorithm so we can
69
- // pluck the lowest-cost edit distance (stored in the lower-right corner, in
70
- // this case the last spot in the array)
71
- //
70
+ // Guard clauses for empty strings
71
+ if (string1_phoneme_count == 0 && string2_phoneme_count == 0)
72
+ return DBL2NUM(0.0);
72
73
 
74
+ // one-dimensional representation of 2 dimensional array
75
+ d = malloc((sizeof(double)) * (string1_phoneme_count+1) * (string2_phoneme_count+1));
76
+
73
77
  // First, set the top row and left column of the matrix using the sequential
74
78
  // phonetic edit distance of string1 and string2, respectively
75
- set_initial(d, string1, string1_length, string2, string2_length, verbose);
79
+ set_initial(d, string1, string1_phoneme_count, string1_phoneme_sizes, string2, string2_phoneme_count, string2_phoneme_sizes, verbose);
76
80
 
77
- debug("before:\n");
78
- print_matrix(d, string1, string1_length, string2, string2_length, verbose);
81
+ print_matrix(d, string1, string1_phoneme_count, string1_phoneme_sizes, string2, string2_phoneme_count, string2_phoneme_sizes, verbose);
79
82
 
80
- // Then walk through the matrix and fill in each cell with the lowest-cost
81
- // phonetic edit distance for that matrix cell.
83
+ // Then Fill in the (flattened) matrix using the Levenshtein algorithm so we can
84
+ // pluck the lowest-cost edit distance (stored in the lower-right corner, in
85
+ // this case the last spot in the array).
86
+ // We'll use phonetic distance instead of '1' as the edit cost.
87
+ //
82
88
  // (Skipping i=0 and j=0 because set_initial filled in all cells where i
83
89
  // or j are zero-valued)
84
- for (j = 1; j <= string2_length; j++){
85
- for (i = 1; i <= string1_length; i++){
90
+ for (j = 1; j <= string2_phoneme_count; j++){
91
+
92
+ for (i = 1; i <= string1_phoneme_count; i++){
86
93
 
87
94
  // The cost of deletion or addition is the Levenshtein distance
88
95
  // calculation (the value in the cell to the left, upper-left, or above)
89
96
  // plus the phonetic distance between the sound we're moving from to the
90
97
  // new one.
91
98
 
92
- debug("------- %d/%d (%d) \n", i, j, j*(string1_length+1) + i);
99
+ debug("------- %d/%d (%d) \n", i, j, j*(string1_phoneme_count+1) + i);
93
100
 
94
- cost = phonetic_cost(string1[i-1], string2[j-1]);
95
- debug("phonetic cost of %d to %d is %f\n", string1[i-1], string2[j-1], cost);
101
+ // TODO: increment i and j by the phoneme lengths
102
+ cost = phonetic_cost(string1, string1_offset, string1_phoneme_sizes[i-1], string2, string2_offset, string2_phoneme_sizes[j-1]);
96
103
 
97
- insert = d[j*(string1_length+1) + i-1];
104
+ insert = d[j*(string1_phoneme_count+1) + i-1];
98
105
  debug("insert proposes cell %d,%d - %f\n", i-1, j, insert);
99
106
  min = insert;
100
107
  debug("min (insert): %f\n", min);
101
108
 
102
- delete = d[(j-1)*(string1_length+1) + i];
109
+ delete = d[(j-1)*(string1_phoneme_count+1) + i];
103
110
  debug("delete proposes cell %d,%d - %f\n", i, j-1, delete);
104
111
  if (delete < min) {
105
112
  debug("delete is %f, better than %f for %d/%d\n", delete, min, i, j);
106
113
  min = delete;
107
114
  }
108
115
 
109
- replace = d[(j-1)*(string1_length+1) + i-1];
116
+ replace = d[(j-1)*(string1_phoneme_count+1) + i-1];
110
117
  debug("replace proposes cell %d,%d - %f\n", i-1, j-1, replace);
111
118
  if (replace < min) {
112
119
  debug("replace is %f, better than %f for %d/%d\n", replace, min, i, j);
113
120
  min = replace;
114
121
  }
115
122
 
116
- d[(j * (string1_length+1)) + i] = min + cost;
123
+ d[(j * (string1_phoneme_count+1)) + i] = min + cost;
117
124
  debug("\n");
118
- print_matrix(d, string1, string1_length, string2, string2_length, verbose);
125
+ print_matrix(d, string1, string1_phoneme_count, string1_phoneme_sizes, string2, string2_phoneme_count, string2_phoneme_sizes, verbose);
126
+
127
+ string1_offset += string1_phoneme_sizes[i-1];
119
128
  }
129
+ string1_offset = 0;
130
+ string2_offset += string2_phoneme_sizes[j-1];
120
131
  }
121
132
 
122
133
  // The final element in the `d` array is the value of the shortest path from
123
134
  // the top-left to the bottom-right of the matrix.
124
- distance = d[(string1_length + 1) * (string2_length + 1) - 1];
135
+ distance = d[(string1_phoneme_count + 1) * (string2_phoneme_count + 1) - 1];
125
136
 
126
137
  free(d);
127
138
  debug("distance: %f\n", distance);
@@ -129,7 +140,6 @@ VALUE method_internal_phonetic_distance(VALUE self, VALUE _string1, VALUE _strin
129
140
  return DBL2NUM(distance);
130
141
  }
131
142
 
132
-
133
143
  // Set the minimum scores equal to the distance between each phoneme,
134
144
  // sequentially.
135
145
  //
@@ -139,12 +149,14 @@ VALUE method_internal_phonetic_distance(VALUE self, VALUE _string1, VALUE _strin
139
149
  // Subsequent values are the cumulative phonetic distance between each
140
150
  // phoneme within the same string.
141
151
  // "aek" -> [0.0, 1.0, 1.61, 2.61]
142
- void set_initial(double *d, int *string1, int string1_length, int *string2, int string2_length, bool verbose) {
152
+ void set_initial(double *d, int *string1, int string1_phoneme_count, int *string1_phoneme_sizes, int *string2, int string2_phoneme_count, int *string2_phoneme_sizes, bool verbose) {
143
153
 
144
154
  double initial_distance;
155
+ int string1_offset = 0;
156
+ int string2_offset = 0;
145
157
  int i, j;
146
158
 
147
- if (string1_length == 0 || string2_length == 0) {
159
+ if (string1_phoneme_count == 0 || string2_phoneme_count == 0) {
148
160
  initial_distance = 0.0;
149
161
  } else {
150
162
  initial_distance = 1.0;
@@ -152,51 +164,67 @@ void set_initial(double *d, int *string1, int string1_length, int *string2, int
152
164
 
153
165
  // The top-left is 0, the cell to the right and down are each 1 to start
154
166
  d[0] = (double) 0.0;
155
- if (string1_length > 0) {
167
+ if (string1_phoneme_count > 0) {
156
168
  d[1] = initial_distance;
157
169
  }
158
- if (string2_length > 0) {
159
- d[string1_length+1] = initial_distance;
170
+ if (string2_phoneme_count > 0) {
171
+ d[string1_phoneme_count+1] = initial_distance;
160
172
  }
161
173
 
162
- debug("string1 length: %d\n", string1_length);
163
- for (i=2; i <= string1_length; i++) {
174
+ debug("string1 phoneme count: %d\n", string1_phoneme_count);
175
+
176
+ for (i=2; i <= string1_phoneme_count; i++) {
164
177
  // The cost of adding the next phoneme is the cost so far plus the phonetic
165
178
  // distance between the previous one and the current one.
166
- d[i] = d[i-1] + phonetic_cost(string1[i-2], string1[i-1]);
179
+ d[i] = d[i-1] +
180
+ phonetic_cost(string1, string1_offset, string1_phoneme_sizes[i-2], string1, string1_offset + string1_phoneme_sizes[i-2], string1_phoneme_sizes[i-1]);
181
+ string1_offset += string1_phoneme_sizes[i-2];
167
182
  }
168
- debug("string2 length: %d\n", string2_length);
169
- for (j=2; j <= string2_length; j++) {
183
+
184
+ debug("string2 phoneme count: %d\n", string2_phoneme_count);
185
+
186
+ for (j=2; j <= string2_phoneme_count; j++) {
170
187
  // The same exact pattern down the left side of the matrix
171
- d[j * (string1_length+1)] = d[(j - 1) * (string1_length+1)] + phonetic_cost(string2[j-2], string2[j-1]);
188
+ d[j * (string1_phoneme_count+1)] = d[(j - 1) * (string1_phoneme_count+1)] +
189
+ phonetic_cost(string2, string2_offset, string2_phoneme_sizes[j-2], string2, string2_offset + string2_phoneme_sizes[j-2], string2_phoneme_sizes[j-1]);
190
+ string2_offset += string2_phoneme_sizes[j-1];
172
191
  }
173
192
 
174
- // And zero out the rest. If you're reading this please edit this to be
175
- // faster.
176
- for (j=1; j <= string2_length; j++) {
177
- for (i=1; i <= string1_length; i++) {
178
- d[j * (string1_length+1) + i] = (double) 0.0;
193
+ // And zero out the rest. If you're reading this please show me a way to do
194
+ // this faster.
195
+ for (j=1; j <= string2_phoneme_count; j++) {
196
+ for (i=1; i <= string1_phoneme_count; i++) {
197
+ d[j * (string1_phoneme_count+1) + i] = (double) 0.0;
179
198
  }
180
199
  }
181
200
  }
182
201
 
183
202
  // A handy visualization for developers
184
- void print_matrix(double *d, int *string1, int string1_length, int *string2, int string2_length, bool verbose) {
203
+ void print_matrix(double *d, int *string1, int string1_phoneme_count, int *string1_phoneme_sizes, int *string2, int string2_phoneme_count, int *string2_phoneme_sizes, bool verbose) {
204
+
185
205
  int i, j;
186
- debug(" ");
187
- for (i=0; i < string1_length; i++) {
188
- debug("%8.d ", string1[i]);
206
+ int string1_offset = 0;
207
+ int string2_offset = 0;
208
+
209
+ if (!verbose)
210
+ return;
211
+
212
+ printf(" ");
213
+ for (i=0; i < string1_phoneme_count; i++) {
214
+ print_phoneme(string1, string1_offset, string1_phoneme_sizes[i], 9);
215
+ string1_offset += string1_phoneme_sizes[i];
189
216
  }
190
- debug("\n");
191
- for (j=0; j <= string2_length; j++) {
217
+ printf("\n");
218
+ for (j=0; j <= string2_phoneme_count; j++) {
192
219
  if (j==0) {
193
- debug(" ");
220
+ printf(" ");
194
221
  } else {
195
- debug("%4.d ", string2[j-1]);
196
- }
197
- for (i=0; i <= string1_length; i++) {
198
- debug("%f ", d[j * (string1_length+1) + i]) ;
222
+ print_phoneme(string2, string2_offset, string2_phoneme_sizes[j-1], 2);
223
+ string2_offset += string2_phoneme_sizes[j-1];
224
+ }
225
+ for (i=0; i <= string1_phoneme_count; i++) {
226
+ printf("%f ", d[j * (string1_phoneme_count+1) + i]) ;
199
227
  }
200
- debug("\n");
228
+ printf("\n");
201
229
  }
202
230
  }
@@ -0,0 +1,1364 @@
1
+ // This is compiled from Ruby, in phonetics/lib/phonetics/code_generator.rb:221
2
+ int next_phoneme_length(int *string, int cursor, int length) {
3
+
4
+ int max_length;
5
+ max_length = length - cursor;
6
+
7
+ switch(string[cursor + 0]) {
8
+
9
+ case 105:
10
+ // Phoneme: "i", bytes: [105]
11
+ // vowel features: {"F1":240,"F2":2400,"rounded":false}
12
+ return 1;
13
+ break;
14
+ case 121:
15
+ // Phoneme: "y", bytes: [121]
16
+ // vowel features: {"F1":235,"F2":2100,"rounded":false}
17
+ return 1;
18
+ break;
19
+ case 201:
20
+ if (max_length > 1) {
21
+ switch(string[cursor + 1]) {
22
+
23
+ case 170:
24
+ // Phoneme: "ɪ", bytes: [201, 170]
25
+ // vowel features: {"F1":300,"F2":2100,"rounded":false}
26
+ return 2;
27
+ break;
28
+ case 155:
29
+ // Phoneme: "ɛ", bytes: [201, 155]
30
+ // vowel features: {"F1":610,"F2":1900,"rounded":false}
31
+ return 2;
32
+ break;
33
+ case 182:
34
+ // Phoneme: "ɶ", bytes: [201, 182]
35
+ // vowel features: {"F1":820,"F2":1530,"rounded":true}
36
+ return 2;
37
+ break;
38
+ case 145:
39
+ // Phoneme: "ɑ", bytes: [201, 145]
40
+ // vowel features: {"F1":750,"F2":940,"rounded":false}
41
+ return 2;
42
+ break;
43
+ case 146:
44
+ // Phoneme: "ɒ", bytes: [201, 146]
45
+ // vowel features: {"F1":700,"F2":760,"rounded":true}
46
+ return 2;
47
+ break;
48
+ case 153:
49
+ // Phoneme: "ə", bytes: [201, 153]
50
+ // vowel features: {"F1":600,"F2":1170,"rounded":false}
51
+ return 2;
52
+ break;
53
+ case 157:
54
+ // Phoneme: "ɝ", bytes: [201, 157]
55
+ // vowel features: {"F1":600,"F2":1170,"rounded":false,"rhotic":true}
56
+ return 2;
57
+ break;
58
+ case 148:
59
+ // Phoneme: "ɔ", bytes: [201, 148]
60
+ // vowel features: {"F1":500,"F2":700,"rounded":true}
61
+ return 2;
62
+ break;
63
+ case 164:
64
+ // Phoneme: "ɤ", bytes: [201, 164]
65
+ // vowel features: {"F1":460,"F2":1310,"rounded":false}
66
+ return 2;
67
+ break;
68
+ case 175:
69
+ // Phoneme: "ɯ", bytes: [201, 175]
70
+ // vowel features: {"F1":300,"F2":1390,"rounded":false}
71
+ return 2;
72
+ break;
73
+ case 177:
74
+ // Phoneme: "ɱ", bytes: [201, 177]
75
+ // consonant features: {"position":"Labio-dental","position_index":2,"manner":"Nasal","voiced":true}
76
+ return 2;
77
+ break;
78
+ case 179:
79
+ // Phoneme: "ɳ", bytes: [201, 179]
80
+ // consonant features: {"position":"Retro-flex","position_index":7,"manner":"Nasal","voiced":true}
81
+ if (max_length > 2) {
82
+ switch(string[cursor + 2]) {
83
+
84
+ case 204:
85
+ if (max_length > 3) {
86
+ switch(string[cursor + 3]) {
87
+
88
+ case 138:
89
+ // Phoneme: "ɳ̊", bytes: [201, 179, 204, 138]
90
+ // consonant features: {"position":"Retro-flex","position_index":7,"manner":"Nasal","voiced":false}
91
+ return 4;
92
+ break;
93
+ }
94
+ } else {
95
+ return 3;
96
+ }
97
+ break;
98
+ default:
99
+ return 2;
100
+ }
101
+ } else {
102
+ return 2;
103
+ }
104
+ break;
105
+ case 178:
106
+ // Phoneme: "ɲ", bytes: [201, 178]
107
+ // consonant features: {"position":"Palatal","position_index":8,"manner":"Nasal","voiced":true}
108
+ if (max_length > 2) {
109
+ switch(string[cursor + 2]) {
110
+
111
+ case 204:
112
+ if (max_length > 3) {
113
+ switch(string[cursor + 3]) {
114
+
115
+ case 138:
116
+ // Phoneme: "ɲ̊", bytes: [201, 178, 204, 138]
117
+ // consonant features: {"position":"Palatal","position_index":8,"manner":"Nasal","voiced":false}
118
+ return 4;
119
+ break;
120
+ }
121
+ } else {
122
+ return 3;
123
+ }
124
+ break;
125
+ default:
126
+ return 2;
127
+ }
128
+ } else {
129
+ return 2;
130
+ }
131
+ break;
132
+ case 180:
133
+ // Phoneme: "ɴ", bytes: [201, 180]
134
+ // consonant features: {"position":"Uvular","position_index":10,"manner":"Nasal","voiced":true}
135
+ return 2;
136
+ break;
137
+ case 150:
138
+ // Phoneme: "ɖ", bytes: [201, 150]
139
+ // consonant features: {"position":"Retro-flex","position_index":7,"manner":"Stop","voiced":true}
140
+ return 2;
141
+ break;
142
+ case 159:
143
+ // Phoneme: "ɟ", bytes: [201, 159]
144
+ // consonant features: {"position":"Palatal","position_index":8,"manner":"Stop","voiced":true}
145
+ return 2;
146
+ break;
147
+ case 162:
148
+ // Phoneme: "ɢ", bytes: [201, 162]
149
+ // consonant features: {"position":"Uvular","position_index":10,"manner":"Stop","voiced":true}
150
+ if (max_length > 2) {
151
+ switch(string[cursor + 2]) {
152
+
153
+ case 204:
154
+ if (max_length > 3) {
155
+ switch(string[cursor + 3]) {
156
+
157
+ case 134:
158
+ // Phoneme: "ɢ̆", bytes: [201, 162, 204, 134]
159
+ // consonant features: {"position":"Uvular","position_index":10,"manner":"Tap/flap","voiced":true}
160
+ return 4;
161
+ break;
162
+ }
163
+ } else {
164
+ return 3;
165
+ }
166
+ break;
167
+ default:
168
+ return 2;
169
+ }
170
+ } else {
171
+ return 2;
172
+ }
173
+ break;
174
+ case 149:
175
+ // Phoneme: "ɕ", bytes: [201, 149]
176
+ // consonant features: {"position":"Palatal","position_index":8,"manner":"Sibilant fricative","voiced":false}
177
+ return 2;
178
+ break;
179
+ case 184:
180
+ // Phoneme: "ɸ", bytes: [201, 184]
181
+ // consonant features: {"position":"Bi-labial","position_index":1,"manner":"Non-sibilant fricative","voiced":false}
182
+ return 2;
183
+ break;
184
+ case 185:
185
+ // Phoneme: "ɹ", bytes: [201, 185]
186
+ // consonant features: {"position":"Alveolar","position_index":5,"manner":"Approximant","voiced":true}
187
+ if (max_length > 2) {
188
+ switch(string[cursor + 2]) {
189
+
190
+ case 204:
191
+ if (max_length > 3) {
192
+ switch(string[cursor + 3]) {
193
+
194
+ case 160:
195
+ if (max_length > 4) {
196
+ switch(string[cursor + 4]) {
197
+
198
+ case 204:
199
+ if (max_length > 5) {
200
+ switch(string[cursor + 5]) {
201
+
202
+ case 138:
203
+ if (max_length > 6) {
204
+ switch(string[cursor + 6]) {
205
+
206
+ case 203:
207
+ if (max_length > 7) {
208
+ switch(string[cursor + 7]) {
209
+
210
+ case 148:
211
+ // Phoneme: "ɹ̠̊˔", bytes: [201, 185, 204, 160, 204, 138, 203, 148]
212
+ // consonant features: {"position":"Post-alveolar","position_index":6,"manner":"Non-sibilant fricative","voiced":false}
213
+ return 8;
214
+ break;
215
+ }
216
+ } else {
217
+ return 7;
218
+ }
219
+ break;
220
+ }
221
+ } else {
222
+ return 6;
223
+ }
224
+ break;
225
+ }
226
+ } else {
227
+ return 5;
228
+ }
229
+ break;
230
+ case 203:
231
+ if (max_length > 5) {
232
+ switch(string[cursor + 5]) {
233
+
234
+ case 148:
235
+ // Phoneme: "ɹ̠˔", bytes: [201, 185, 204, 160, 203, 148]
236
+ // consonant features: {"position":"Post-alveolar","position_index":6,"manner":"Non-sibilant fricative","voiced":true}
237
+ return 6;
238
+ break;
239
+ }
240
+ } else {
241
+ return 5;
242
+ }
243
+ break;
244
+ }
245
+ } else {
246
+ return 4;
247
+ }
248
+ break;
249
+ case 165:
250
+ // Phoneme: "ɹ̥", bytes: [201, 185, 204, 165]
251
+ // consonant features: {"position":"Alveolar","position_index":5,"manner":"Approximant","voiced":false}
252
+ return 4;
253
+ break;
254
+ }
255
+ } else {
256
+ return 3;
257
+ }
258
+ break;
259
+ default:
260
+ return 2;
261
+ }
262
+ } else {
263
+ return 2;
264
+ }
265
+ break;
266
+ case 187:
267
+ // Phoneme: "ɻ", bytes: [201, 187]
268
+ // consonant features: {"position":"Retro-flex","position_index":7,"manner":"Approximant","voiced":true}
269
+ if (max_length > 2) {
270
+ switch(string[cursor + 2]) {
271
+
272
+ case 203:
273
+ if (max_length > 3) {
274
+ switch(string[cursor + 3]) {
275
+
276
+ case 148:
277
+ // Phoneme: "ɻ˔", bytes: [201, 187, 203, 148]
278
+ // consonant features: {"position":"Retro-flex","position_index":7,"manner":"Non-sibilant fricative","voiced":true}
279
+ return 4;
280
+ break;
281
+ }
282
+ } else {
283
+ return 3;
284
+ }
285
+ break;
286
+ case 204:
287
+ if (max_length > 3) {
288
+ switch(string[cursor + 3]) {
289
+
290
+ case 138:
291
+ // Phoneme: "ɻ̊", bytes: [201, 187, 204, 138]
292
+ // consonant features: {"position":"Retro-flex","position_index":7,"manner":"Approximant","voiced":false}
293
+ return 4;
294
+ break;
295
+ }
296
+ } else {
297
+ return 3;
298
+ }
299
+ break;
300
+ default:
301
+ return 2;
302
+ }
303
+ } else {
304
+ return 2;
305
+ }
306
+ break;
307
+ case 163:
308
+ // Phoneme: "ɣ", bytes: [201, 163]
309
+ // consonant features: {"position":"Velar","position_index":9,"manner":"Non-sibilant fricative","voiced":true}
310
+ return 2;
311
+ break;
312
+ case 166:
313
+ // Phoneme: "ɦ", bytes: [201, 166]
314
+ // consonant features: {"position":"Glottal","position_index":12,"manner":"Non-sibilant fricative","voiced":true}
315
+ return 2;
316
+ break;
317
+ case 176:
318
+ // Phoneme: "ɰ", bytes: [201, 176]
319
+ // consonant features: {"position":"Velar","position_index":9,"manner":"Approximant","voiced":true}
320
+ if (max_length > 2) {
321
+ switch(string[cursor + 2]) {
322
+
323
+ case 204:
324
+ if (max_length > 3) {
325
+ switch(string[cursor + 3]) {
326
+
327
+ case 138:
328
+ // Phoneme: "ɰ̊", bytes: [201, 176, 204, 138]
329
+ // consonant features: {"position":"Velar","position_index":9,"manner":"Approximant","voiced":false}
330
+ return 4;
331
+ break;
332
+ }
333
+ } else {
334
+ return 3;
335
+ }
336
+ break;
337
+ default:
338
+ return 2;
339
+ }
340
+ } else {
341
+ return 2;
342
+ }
343
+ break;
344
+ case 190:
345
+ // Phoneme: "ɾ", bytes: [201, 190]
346
+ // consonant features: {"position":"Alveolar","position_index":5,"manner":"Tap/flap","voiced":true}
347
+ if (max_length > 2) {
348
+ switch(string[cursor + 2]) {
349
+
350
+ case 204:
351
+ if (max_length > 3) {
352
+ switch(string[cursor + 3]) {
353
+
354
+ case 188:
355
+ // Phoneme: "ɾ̼", bytes: [201, 190, 204, 188]
356
+ // consonant features: {"position":"Linguo-labial","position_index":3,"manner":"Tap/flap","voiced":true}
357
+ return 4;
358
+ break;
359
+ case 165:
360
+ // Phoneme: "ɾ̥", bytes: [201, 190, 204, 165]
361
+ // consonant features: {"position":"Alveolar","position_index":5,"manner":"Tap/flap","voiced":false}
362
+ return 4;
363
+ break;
364
+ }
365
+ } else {
366
+ return 3;
367
+ }
368
+ break;
369
+ default:
370
+ return 2;
371
+ }
372
+ } else {
373
+ return 2;
374
+ }
375
+ break;
376
+ case 189:
377
+ // Phoneme: "ɽ", bytes: [201, 189]
378
+ // consonant features: {"position":"Retro-flex","position_index":7,"manner":"Tap/flap","voiced":true}
379
+ if (max_length > 2) {
380
+ switch(string[cursor + 2]) {
381
+
382
+ case 204:
383
+ if (max_length > 3) {
384
+ switch(string[cursor + 3]) {
385
+
386
+ case 138:
387
+ // Phoneme: "ɽ̊", bytes: [201, 189, 204, 138]
388
+ // consonant features: {"position":"Retro-flex","position_index":7,"manner":"Tap/flap","voiced":false}
389
+ return 4;
390
+ break;
391
+ }
392
+ } else {
393
+ return 3;
394
+ }
395
+ break;
396
+ default:
397
+ return 2;
398
+ }
399
+ } else {
400
+ return 2;
401
+ }
402
+ break;
403
+ case 172:
404
+ // Phoneme: "ɬ", bytes: [201, 172]
405
+ // consonant features: {"position":"Alveolar","position_index":5,"manner":"Lateral fricative","voiced":false}
406
+ return 2;
407
+ break;
408
+ case 174:
409
+ // Phoneme: "ɮ", bytes: [201, 174]
410
+ // consonant features: {"position":"Alveolar","position_index":5,"manner":"Lateral fricative","voiced":true}
411
+ return 2;
412
+ break;
413
+ case 173:
414
+ // Phoneme: "ɭ", bytes: [201, 173]
415
+ // consonant features: {"position":"Retro-flex","position_index":7,"manner":"Lateral approximant","voiced":true}
416
+ if (max_length > 2) {
417
+ switch(string[cursor + 2]) {
418
+
419
+ case 204:
420
+ if (max_length > 3) {
421
+ switch(string[cursor + 3]) {
422
+
423
+ case 138:
424
+ // Phoneme: "ɭ̊", bytes: [201, 173, 204, 138]
425
+ // consonant features: {"position":"Retro-flex","position_index":7,"manner":"Lateral approximant","voiced":false}
426
+ if (max_length > 4) {
427
+ switch(string[cursor + 4]) {
428
+
429
+ case 203:
430
+ if (max_length > 5) {
431
+ switch(string[cursor + 5]) {
432
+
433
+ case 148:
434
+ // Phoneme: "ɭ̊˔", bytes: [201, 173, 204, 138, 203, 148]
435
+ // consonant features: {"position":"Retro-flex","position_index":7,"manner":"Lateral fricative","voiced":false}
436
+ return 6;
437
+ break;
438
+ }
439
+ } else {
440
+ return 5;
441
+ }
442
+ break;
443
+ default:
444
+ return 4;
445
+ }
446
+ } else {
447
+ return 4;
448
+ }
449
+ break;
450
+ case 134:
451
+ // Phoneme: "ɭ̆", bytes: [201, 173, 204, 134]
452
+ // consonant features: {"position":"Retro-flex","position_index":7,"manner":"Lateral tap/flap","voiced":true}
453
+ return 4;
454
+ break;
455
+ }
456
+ } else {
457
+ return 3;
458
+ }
459
+ break;
460
+ case 203:
461
+ if (max_length > 3) {
462
+ switch(string[cursor + 3]) {
463
+
464
+ case 148:
465
+ // Phoneme: "ɭ˔", bytes: [201, 173, 203, 148]
466
+ // consonant features: {"position":"Retro-flex","position_index":7,"manner":"Lateral fricative","voiced":true}
467
+ return 4;
468
+ break;
469
+ }
470
+ } else {
471
+ return 3;
472
+ }
473
+ break;
474
+ default:
475
+ return 2;
476
+ }
477
+ } else {
478
+ return 2;
479
+ }
480
+ break;
481
+ case 186:
482
+ // Phoneme: "ɺ", bytes: [201, 186]
483
+ // consonant features: {"position":"Alveolar","position_index":5,"manner":"Lateral tap/flap","voiced":true}
484
+ return 2;
485
+ break;
486
+ }
487
+ } else {
488
+ return 1;
489
+ }
490
+ break;
491
+ case 101:
492
+ // Phoneme: "e", bytes: [101]
493
+ // vowel features: {"F1":390,"F2":2300,"rounded":false}
494
+ return 1;
495
+ break;
496
+ case 195:
497
+ if (max_length > 1) {
498
+ switch(string[cursor + 1]) {
499
+
500
+ case 184:
501
+ // Phoneme: "ø", bytes: [195, 184]
502
+ // vowel features: {"F1":370,"F2":1900,"rounded":true}
503
+ return 2;
504
+ break;
505
+ case 166:
506
+ // Phoneme: "æ", bytes: [195, 166]
507
+ // vowel features: {"F1":800,"F2":1900,"rounded":false}
508
+ return 2;
509
+ break;
510
+ case 176:
511
+ // Phoneme: "ð", bytes: [195, 176]
512
+ // consonant features: {"position":"Dental","position_index":4,"manner":"Non-sibilant fricative","voiced":true}
513
+ if (max_length > 2) {
514
+ switch(string[cursor + 2]) {
515
+
516
+ case 204:
517
+ if (max_length > 3) {
518
+ switch(string[cursor + 3]) {
519
+
520
+ case 188:
521
+ // Phoneme: "ð̼", bytes: [195, 176, 204, 188]
522
+ // consonant features: {"position":"Linguo-labial","position_index":3,"manner":"Non-sibilant fricative","voiced":true}
523
+ return 4;
524
+ break;
525
+ case 160:
526
+ // Phoneme: "ð̠", bytes: [195, 176, 204, 160]
527
+ // consonant features: {"position":"Alveolar","position_index":5,"manner":"Non-sibilant fricative","voiced":true}
528
+ return 4;
529
+ break;
530
+ }
531
+ } else {
532
+ return 3;
533
+ }
534
+ break;
535
+ default:
536
+ return 2;
537
+ }
538
+ } else {
539
+ return 2;
540
+ }
541
+ break;
542
+ case 167:
543
+ // Phoneme: "ç", bytes: [195, 167]
544
+ // consonant features: {"position":"Palatal","position_index":8,"manner":"Non-sibilant fricative","voiced":false}
545
+ return 2;
546
+ break;
547
+ }
548
+ } else {
549
+ return 1;
550
+ }
551
+ break;
552
+ case 197:
553
+ if (max_length > 1) {
554
+ switch(string[cursor + 1]) {
555
+
556
+ case 147:
557
+ // Phoneme: "œ", bytes: [197, 147]
558
+ // vowel features: {"F1":585,"F2":1710,"rounded":true}
559
+ return 2;
560
+ break;
561
+ case 139:
562
+ // Phoneme: "ŋ", bytes: [197, 139]
563
+ // consonant features: {"position":"Velar","position_index":9,"manner":"Nasal","voiced":true}
564
+ if (max_length > 2) {
565
+ switch(string[cursor + 2]) {
566
+
567
+ case 204:
568
+ if (max_length > 3) {
569
+ switch(string[cursor + 3]) {
570
+
571
+ case 138:
572
+ // Phoneme: "ŋ̊", bytes: [197, 139, 204, 138]
573
+ // consonant features: {"position":"Velar","position_index":9,"manner":"Nasal","voiced":false}
574
+ return 4;
575
+ break;
576
+ }
577
+ } else {
578
+ return 3;
579
+ }
580
+ break;
581
+ default:
582
+ return 2;
583
+ }
584
+ } else {
585
+ return 2;
586
+ }
587
+ break;
588
+ }
589
+ } else {
590
+ return 1;
591
+ }
592
+ break;
593
+ case 97:
594
+ // Phoneme: "a", bytes: [97]
595
+ // vowel features: {"F1":850,"F2":1610,"rounded":false}
596
+ return 1;
597
+ break;
598
+ case 202:
599
+ if (max_length > 1) {
600
+ switch(string[cursor + 1]) {
601
+
602
+ case 140:
603
+ // Phoneme: "ʌ", bytes: [202, 140]
604
+ // vowel features: {"F1":600,"F2":1170,"rounded":false}
605
+ return 2;
606
+ break;
607
+ case 138:
608
+ // Phoneme: "ʊ", bytes: [202, 138]
609
+ // vowel features: {"F1":350,"F2":650,"rounded":true}
610
+ return 2;
611
+ break;
612
+ case 136:
613
+ // Phoneme: "ʈ", bytes: [202, 136]
614
+ // consonant features: {"position":"Retro-flex","position_index":7,"manner":"Stop","voiced":false}
615
+ return 2;
616
+ break;
617
+ case 161:
618
+ // Phoneme: "ʡ", bytes: [202, 161]
619
+ // consonant features: {"position":"Pharyngeal","position_index":11,"manner":"Stop","voiced":false}
620
+ if (max_length > 2) {
621
+ switch(string[cursor + 2]) {
622
+
623
+ case 204:
624
+ if (max_length > 3) {
625
+ switch(string[cursor + 3]) {
626
+
627
+ case 134:
628
+ // Phoneme: "ʡ̆", bytes: [202, 161, 204, 134]
629
+ // consonant features: {"position":"Pharyngeal","position_index":11,"manner":"Tap/flap","voiced":true}
630
+ return 4;
631
+ break;
632
+ }
633
+ } else {
634
+ return 3;
635
+ }
636
+ break;
637
+ default:
638
+ return 2;
639
+ }
640
+ } else {
641
+ return 2;
642
+ }
643
+ break;
644
+ case 148:
645
+ // Phoneme: "ʔ", bytes: [202, 148]
646
+ // consonant features: {"position":"Glottal","position_index":12,"manner":"Stop","voiced":false}
647
+ if (max_length > 2) {
648
+ switch(string[cursor + 2]) {
649
+
650
+ case 204:
651
+ if (max_length > 3) {
652
+ switch(string[cursor + 3]) {
653
+
654
+ case 158:
655
+ // Phoneme: "ʔ̞", bytes: [202, 148, 204, 158]
656
+ // consonant features: {"position":"Glottal","position_index":12,"manner":"Approximant","voiced":true}
657
+ return 4;
658
+ break;
659
+ }
660
+ } else {
661
+ return 3;
662
+ }
663
+ break;
664
+ default:
665
+ return 2;
666
+ }
667
+ } else {
668
+ return 2;
669
+ }
670
+ break;
671
+ case 131:
672
+ // Phoneme: "ʃ", bytes: [202, 131]
673
+ // consonant features: {"position":"Post-alveolar","position_index":6,"manner":"Sibilant fricative","voiced":false}
674
+ return 2;
675
+ break;
676
+ case 146:
677
+ // Phoneme: "ʒ", bytes: [202, 146]
678
+ // consonant features: {"position":"Post-alveolar","position_index":6,"manner":"Sibilant fricative","voiced":true}
679
+ return 2;
680
+ break;
681
+ case 130:
682
+ // Phoneme: "ʂ", bytes: [202, 130]
683
+ // consonant features: {"position":"Retro-flex","position_index":7,"manner":"Sibilant fricative","voiced":false}
684
+ return 2;
685
+ break;
686
+ case 144:
687
+ // Phoneme: "ʐ", bytes: [202, 144]
688
+ // consonant features: {"position":"Retro-flex","position_index":7,"manner":"Sibilant fricative","voiced":true}
689
+ return 2;
690
+ break;
691
+ case 145:
692
+ // Phoneme: "ʑ", bytes: [202, 145]
693
+ // consonant features: {"position":"Palatal","position_index":8,"manner":"Sibilant fricative","voiced":true}
694
+ return 2;
695
+ break;
696
+ case 157:
697
+ // Phoneme: "ʝ", bytes: [202, 157]
698
+ // consonant features: {"position":"Palatal","position_index":8,"manner":"Non-sibilant fricative","voiced":true}
699
+ return 2;
700
+ break;
701
+ case 129:
702
+ // Phoneme: "ʁ", bytes: [202, 129]
703
+ // consonant features: {"position":"Uvular","position_index":10,"manner":"Non-sibilant fricative","voiced":true}
704
+ return 2;
705
+ break;
706
+ case 149:
707
+ // Phoneme: "ʕ", bytes: [202, 149]
708
+ // consonant features: {"position":"Pharyngeal","position_index":11,"manner":"Non-sibilant fricative","voiced":true}
709
+ return 2;
710
+ break;
711
+ case 139:
712
+ // Phoneme: "ʋ", bytes: [202, 139]
713
+ // consonant features: {"position":"Labio-dental","position_index":2,"manner":"Approximant","voiced":true}
714
+ if (max_length > 2) {
715
+ switch(string[cursor + 2]) {
716
+
717
+ case 204:
718
+ if (max_length > 3) {
719
+ switch(string[cursor + 3]) {
720
+
721
+ case 165:
722
+ // Phoneme: "ʋ̥", bytes: [202, 139, 204, 165]
723
+ // consonant features: {"position":"Labio-dental","position_index":2,"manner":"Approximant","voiced":false}
724
+ return 4;
725
+ break;
726
+ }
727
+ } else {
728
+ return 3;
729
+ }
730
+ break;
731
+ default:
732
+ return 2;
733
+ }
734
+ } else {
735
+ return 2;
736
+ }
737
+ break;
738
+ case 153:
739
+ // Phoneme: "ʙ", bytes: [202, 153]
740
+ // consonant features: {"position":"Bi-labial","position_index":1,"manner":"Trill","voiced":true}
741
+ if (max_length > 2) {
742
+ switch(string[cursor + 2]) {
743
+
744
+ case 204:
745
+ if (max_length > 3) {
746
+ switch(string[cursor + 3]) {
747
+
748
+ case 165:
749
+ // Phoneme: "ʙ̥", bytes: [202, 153, 204, 165]
750
+ // consonant features: {"position":"Bi-labial","position_index":1,"manner":"Trill","voiced":false}
751
+ return 4;
752
+ break;
753
+ }
754
+ } else {
755
+ return 3;
756
+ }
757
+ break;
758
+ default:
759
+ return 2;
760
+ }
761
+ } else {
762
+ return 2;
763
+ }
764
+ break;
765
+ case 128:
766
+ // Phoneme: "ʀ", bytes: [202, 128]
767
+ // consonant features: {"position":"Uvular","position_index":10,"manner":"Trill","voiced":true}
768
+ if (max_length > 2) {
769
+ switch(string[cursor + 2]) {
770
+
771
+ case 204:
772
+ if (max_length > 3) {
773
+ switch(string[cursor + 3]) {
774
+
775
+ case 165:
776
+ // Phoneme: "ʀ̥", bytes: [202, 128, 204, 165]
777
+ // consonant features: {"position":"Uvular","position_index":10,"manner":"Trill","voiced":false}
778
+ return 4;
779
+ break;
780
+ }
781
+ } else {
782
+ return 3;
783
+ }
784
+ break;
785
+ default:
786
+ return 2;
787
+ }
788
+ } else {
789
+ return 2;
790
+ }
791
+ break;
792
+ case 156:
793
+ // Phoneme: "ʜ", bytes: [202, 156]
794
+ // consonant features: {"position":"Pharyngeal","position_index":11,"manner":"Trill","voiced":false}
795
+ return 2;
796
+ break;
797
+ case 162:
798
+ // Phoneme: "ʢ", bytes: [202, 162]
799
+ // consonant features: {"position":"Pharyngeal","position_index":11,"manner":"Trill","voiced":true}
800
+ return 2;
801
+ break;
802
+ case 142:
803
+ // Phoneme: "ʎ", bytes: [202, 142]
804
+ // consonant features: {"position":"Palatal","position_index":8,"manner":"Lateral approximant","voiced":true}
805
+ if (max_length > 2) {
806
+ switch(string[cursor + 2]) {
807
+
808
+ case 204:
809
+ if (max_length > 3) {
810
+ switch(string[cursor + 3]) {
811
+
812
+ case 157:
813
+ // Phoneme: "ʎ̝", bytes: [202, 142, 204, 157]
814
+ // consonant features: {"position":"Palatal","position_index":8,"manner":"Lateral fricative","voiced":true}
815
+ if (max_length > 4) {
816
+ switch(string[cursor + 4]) {
817
+
818
+ case 204:
819
+ if (max_length > 5) {
820
+ switch(string[cursor + 5]) {
821
+
822
+ case 138:
823
+ // Phoneme: "ʎ̝̊", bytes: [202, 142, 204, 157, 204, 138]
824
+ // consonant features: {"position":"Palatal","position_index":8,"manner":"Lateral fricative","voiced":false}
825
+ return 6;
826
+ break;
827
+ }
828
+ } else {
829
+ return 5;
830
+ }
831
+ break;
832
+ default:
833
+ return 4;
834
+ }
835
+ } else {
836
+ return 4;
837
+ }
838
+ break;
839
+ case 165:
840
+ // Phoneme: "ʎ̥", bytes: [202, 142, 204, 165]
841
+ // consonant features: {"position":"Palatal","position_index":8,"manner":"Lateral approximant","voiced":false}
842
+ return 4;
843
+ break;
844
+ case 134:
845
+ // Phoneme: "ʎ̆", bytes: [202, 142, 204, 134]
846
+ // consonant features: {"position":"Palatal","position_index":8,"manner":"Lateral tap/flap","voiced":true}
847
+ return 4;
848
+ break;
849
+ }
850
+ } else {
851
+ return 3;
852
+ }
853
+ break;
854
+ default:
855
+ return 2;
856
+ }
857
+ } else {
858
+ return 2;
859
+ }
860
+ break;
861
+ case 159:
862
+ // Phoneme: "ʟ", bytes: [202, 159]
863
+ // consonant features: {"position":"Velar","position_index":9,"manner":"Lateral approximant","voiced":true}
864
+ if (max_length > 2) {
865
+ switch(string[cursor + 2]) {
866
+
867
+ case 204:
868
+ if (max_length > 3) {
869
+ switch(string[cursor + 3]) {
870
+
871
+ case 157:
872
+ // Phoneme: "ʟ̝", bytes: [202, 159, 204, 157]
873
+ // consonant features: {"position":"Velar","position_index":9,"manner":"Lateral fricative","voiced":true}
874
+ if (max_length > 4) {
875
+ switch(string[cursor + 4]) {
876
+
877
+ case 204:
878
+ if (max_length > 5) {
879
+ switch(string[cursor + 5]) {
880
+
881
+ case 138:
882
+ // Phoneme: "ʟ̝̊", bytes: [202, 159, 204, 157, 204, 138]
883
+ // consonant features: {"position":"Velar","position_index":9,"manner":"Lateral fricative","voiced":false}
884
+ return 6;
885
+ break;
886
+ }
887
+ } else {
888
+ return 5;
889
+ }
890
+ break;
891
+ default:
892
+ return 4;
893
+ }
894
+ } else {
895
+ return 4;
896
+ }
897
+ break;
898
+ case 165:
899
+ // Phoneme: "ʟ̥", bytes: [202, 159, 204, 165]
900
+ // consonant features: {"position":"Velar","position_index":9,"manner":"Lateral approximant","voiced":false}
901
+ return 4;
902
+ break;
903
+ case 160:
904
+ // Phoneme: "ʟ̠", bytes: [202, 159, 204, 160]
905
+ // consonant features: {"position":"Uvular","position_index":10,"manner":"Lateral approximant","voiced":true}
906
+ return 4;
907
+ break;
908
+ case 134:
909
+ // Phoneme: "ʟ̆", bytes: [202, 159, 204, 134]
910
+ // consonant features: {"position":"Velar","position_index":9,"manner":"Lateral tap/flap","voiced":true}
911
+ return 4;
912
+ break;
913
+ }
914
+ } else {
915
+ return 3;
916
+ }
917
+ break;
918
+ default:
919
+ return 2;
920
+ }
921
+ } else {
922
+ return 2;
923
+ }
924
+ break;
925
+ }
926
+ } else {
927
+ return 1;
928
+ }
929
+ break;
930
+ case 111:
931
+ // Phoneme: "o", bytes: [111]
932
+ // vowel features: {"F1":360,"F2":640,"rounded":true}
933
+ return 1;
934
+ break;
935
+ case 117:
936
+ // Phoneme: "u", bytes: [117]
937
+ // vowel features: {"F1":350,"F2":650,"rounded":true}
938
+ return 1;
939
+ break;
940
+ case 109:
941
+ // Phoneme: "m", bytes: [109]
942
+ // consonant features: {"position":"Bi-labial","position_index":1,"manner":"Nasal","voiced":true}
943
+ if (max_length > 1) {
944
+ switch(string[cursor + 1]) {
945
+
946
+ case 204:
947
+ if (max_length > 2) {
948
+ switch(string[cursor + 2]) {
949
+
950
+ case 165:
951
+ // Phoneme: "m̥", bytes: [109, 204, 165]
952
+ // consonant features: {"position":"Bi-labial","position_index":1,"manner":"Nasal","voiced":false}
953
+ return 3;
954
+ break;
955
+ }
956
+ } else {
957
+ return 2;
958
+ }
959
+ break;
960
+ default:
961
+ return 1;
962
+ }
963
+ } else {
964
+ return 1;
965
+ }
966
+ break;
967
+ case 110:
968
+ // Phoneme: "n", bytes: [110]
969
+ // consonant features: {"position":"Alveolar","position_index":5,"manner":"Nasal","voiced":true}
970
+ if (max_length > 1) {
971
+ switch(string[cursor + 1]) {
972
+
973
+ case 204:
974
+ if (max_length > 2) {
975
+ switch(string[cursor + 2]) {
976
+
977
+ case 188:
978
+ // Phoneme: "n̼", bytes: [110, 204, 188]
979
+ // consonant features: {"position":"Linguo-labial","position_index":3,"manner":"Nasal","voiced":true}
980
+ return 3;
981
+ break;
982
+ case 165:
983
+ // Phoneme: "n̥", bytes: [110, 204, 165]
984
+ // consonant features: {"position":"Alveolar","position_index":5,"manner":"Nasal","voiced":false}
985
+ return 3;
986
+ break;
987
+ }
988
+ } else {
989
+ return 2;
990
+ }
991
+ break;
992
+ default:
993
+ return 1;
994
+ }
995
+ } else {
996
+ return 1;
997
+ }
998
+ break;
999
+ case 112:
1000
+ // Phoneme: "p", bytes: [112]
1001
+ // consonant features: {"position":"Bi-labial","position_index":1,"manner":"Stop","voiced":false}
1002
+ if (max_length > 1) {
1003
+ switch(string[cursor + 1]) {
1004
+
1005
+ case 204:
1006
+ if (max_length > 2) {
1007
+ switch(string[cursor + 2]) {
1008
+
1009
+ case 170:
1010
+ // Phoneme: "p̪", bytes: [112, 204, 170]
1011
+ // consonant features: {"position":"Labio-dental","position_index":2,"manner":"Stop","voiced":false}
1012
+ return 3;
1013
+ break;
1014
+ }
1015
+ } else {
1016
+ return 2;
1017
+ }
1018
+ break;
1019
+ default:
1020
+ return 1;
1021
+ }
1022
+ } else {
1023
+ return 1;
1024
+ }
1025
+ break;
1026
+ case 98:
1027
+ // Phoneme: "b", bytes: [98]
1028
+ // consonant features: {"position":"Bi-labial","position_index":1,"manner":"Stop","voiced":true}
1029
+ if (max_length > 1) {
1030
+ switch(string[cursor + 1]) {
1031
+
1032
+ case 204:
1033
+ if (max_length > 2) {
1034
+ switch(string[cursor + 2]) {
1035
+
1036
+ case 170:
1037
+ // Phoneme: "b̪", bytes: [98, 204, 170]
1038
+ // consonant features: {"position":"Labio-dental","position_index":2,"manner":"Stop","voiced":true}
1039
+ return 3;
1040
+ break;
1041
+ }
1042
+ } else {
1043
+ return 2;
1044
+ }
1045
+ break;
1046
+ default:
1047
+ return 1;
1048
+ }
1049
+ } else {
1050
+ return 1;
1051
+ }
1052
+ break;
1053
+ case 116:
1054
+ // Phoneme: "t", bytes: [116]
1055
+ // consonant features: {"position":"Alveolar","position_index":5,"manner":"Stop","voiced":false}
1056
+ if (max_length > 1) {
1057
+ switch(string[cursor + 1]) {
1058
+
1059
+ case 204:
1060
+ if (max_length > 2) {
1061
+ switch(string[cursor + 2]) {
1062
+
1063
+ case 188:
1064
+ // Phoneme: "t̼", bytes: [116, 204, 188]
1065
+ // consonant features: {"position":"Linguo-labial","position_index":3,"manner":"Stop","voiced":false}
1066
+ return 3;
1067
+ break;
1068
+ }
1069
+ } else {
1070
+ return 2;
1071
+ }
1072
+ break;
1073
+ default:
1074
+ return 1;
1075
+ }
1076
+ } else {
1077
+ return 1;
1078
+ }
1079
+ break;
1080
+ case 100:
1081
+ // Phoneme: "d", bytes: [100]
1082
+ // consonant features: {"position":"Alveolar","position_index":5,"manner":"Stop","voiced":true}
1083
+ if (max_length > 1) {
1084
+ switch(string[cursor + 1]) {
1085
+
1086
+ case 204:
1087
+ if (max_length > 2) {
1088
+ switch(string[cursor + 2]) {
1089
+
1090
+ case 188:
1091
+ // Phoneme: "d̼", bytes: [100, 204, 188]
1092
+ // consonant features: {"position":"Linguo-labial","position_index":3,"manner":"Stop","voiced":true}
1093
+ return 3;
1094
+ break;
1095
+ }
1096
+ } else {
1097
+ return 2;
1098
+ }
1099
+ break;
1100
+ default:
1101
+ return 1;
1102
+ }
1103
+ } else {
1104
+ return 1;
1105
+ }
1106
+ break;
1107
+ case 99:
1108
+ // Phoneme: "c", bytes: [99]
1109
+ // consonant features: {"position":"Palatal","position_index":8,"manner":"Stop","voiced":false}
1110
+ return 1;
1111
+ break;
1112
+ case 107:
1113
+ // Phoneme: "k", bytes: [107]
1114
+ // consonant features: {"position":"Velar","position_index":9,"manner":"Stop","voiced":false}
1115
+ return 1;
1116
+ break;
1117
+ case 103:
1118
+ // Phoneme: "g", bytes: [103]
1119
+ // consonant features: {"position":"Velar","position_index":9,"manner":"Stop","voiced":true}
1120
+ return 1;
1121
+ break;
1122
+ case 113:
1123
+ // Phoneme: "q", bytes: [113]
1124
+ // consonant features: {"position":"Uvular","position_index":10,"manner":"Stop","voiced":false}
1125
+ return 1;
1126
+ break;
1127
+ case 115:
1128
+ // Phoneme: "s", bytes: [115]
1129
+ // consonant features: {"position":"Alveolar","position_index":5,"manner":"Sibilant fricative","voiced":false}
1130
+ return 1;
1131
+ break;
1132
+ case 122:
1133
+ // Phoneme: "z", bytes: [122]
1134
+ // consonant features: {"position":"Alveolar","position_index":5,"manner":"Sibilant fricative","voiced":true}
1135
+ return 1;
1136
+ break;
1137
+ case 206:
1138
+ if (max_length > 1) {
1139
+ switch(string[cursor + 1]) {
1140
+
1141
+ case 178:
1142
+ // Phoneme: "β", bytes: [206, 178]
1143
+ // consonant features: {"position":"Bi-labial","position_index":1,"manner":"Non-sibilant fricative","voiced":true}
1144
+ return 2;
1145
+ break;
1146
+ case 184:
1147
+ // Phoneme: "θ", bytes: [206, 184]
1148
+ // consonant features: {"position":"Dental","position_index":4,"manner":"Non-sibilant fricative","voiced":false}
1149
+ if (max_length > 2) {
1150
+ switch(string[cursor + 2]) {
1151
+
1152
+ case 204:
1153
+ if (max_length > 3) {
1154
+ switch(string[cursor + 3]) {
1155
+
1156
+ case 188:
1157
+ // Phoneme: "θ̼", bytes: [206, 184, 204, 188]
1158
+ // consonant features: {"position":"Linguo-labial","position_index":3,"manner":"Non-sibilant fricative","voiced":false}
1159
+ return 4;
1160
+ break;
1161
+ case 160:
1162
+ // Phoneme: "θ̠", bytes: [206, 184, 204, 160]
1163
+ // consonant features: {"position":"Alveolar","position_index":5,"manner":"Non-sibilant fricative","voiced":false}
1164
+ return 4;
1165
+ break;
1166
+ }
1167
+ } else {
1168
+ return 3;
1169
+ }
1170
+ break;
1171
+ default:
1172
+ return 2;
1173
+ }
1174
+ } else {
1175
+ return 2;
1176
+ }
1177
+ break;
1178
+ }
1179
+ } else {
1180
+ return 1;
1181
+ }
1182
+ break;
1183
+ case 102:
1184
+ // Phoneme: "f", bytes: [102]
1185
+ // consonant features: {"position":"Labio-dental","position_index":2,"manner":"Non-sibilant fricative","voiced":false}
1186
+ return 1;
1187
+ break;
1188
+ case 118:
1189
+ // Phoneme: "v", bytes: [118]
1190
+ // consonant features: {"position":"Labio-dental","position_index":2,"manner":"Non-sibilant fricative","voiced":true}
1191
+ return 1;
1192
+ break;
1193
+ case 120:
1194
+ // Phoneme: "x", bytes: [120]
1195
+ // consonant features: {"position":"Velar","position_index":9,"manner":"Non-sibilant fricative","voiced":false}
1196
+ return 1;
1197
+ break;
1198
+ case 207:
1199
+ if (max_length > 1) {
1200
+ switch(string[cursor + 1]) {
1201
+
1202
+ case 135:
1203
+ // Phoneme: "χ", bytes: [207, 135]
1204
+ // consonant features: {"position":"Uvular","position_index":10,"manner":"Non-sibilant fricative","voiced":false}
1205
+ return 2;
1206
+ break;
1207
+ }
1208
+ } else {
1209
+ return 1;
1210
+ }
1211
+ break;
1212
+ case 196:
1213
+ if (max_length > 1) {
1214
+ switch(string[cursor + 1]) {
1215
+
1216
+ case 167:
1217
+ // Phoneme: "ħ", bytes: [196, 167]
1218
+ // consonant features: {"position":"Pharyngeal","position_index":11,"manner":"Non-sibilant fricative","voiced":false}
1219
+ return 2;
1220
+ break;
1221
+ }
1222
+ } else {
1223
+ return 1;
1224
+ }
1225
+ break;
1226
+ case 104:
1227
+ // Phoneme: "h", bytes: [104]
1228
+ // consonant features: {"position":"Glottal","position_index":12,"manner":"Non-sibilant fricative","voiced":false}
1229
+ return 1;
1230
+ break;
1231
+ case 119:
1232
+ // Phoneme: "w", bytes: [119]
1233
+ // consonant features: {"position":"Labio-velar","position_index":0,"manner":"Approximant","voiced":true}
1234
+ return 1;
1235
+ break;
1236
+ case 106:
1237
+ // Phoneme: "j", bytes: [106]
1238
+ // consonant features: {"position":"Palatal","position_index":8,"manner":"Approximant","voiced":true}
1239
+ if (max_length > 1) {
1240
+ switch(string[cursor + 1]) {
1241
+
1242
+ case 204:
1243
+ if (max_length > 2) {
1244
+ switch(string[cursor + 2]) {
1245
+
1246
+ case 138:
1247
+ // Phoneme: "j̊", bytes: [106, 204, 138]
1248
+ // consonant features: {"position":"Palatal","position_index":8,"manner":"Approximant","voiced":false}
1249
+ return 3;
1250
+ break;
1251
+ }
1252
+ } else {
1253
+ return 2;
1254
+ }
1255
+ break;
1256
+ default:
1257
+ return 1;
1258
+ }
1259
+ } else {
1260
+ return 1;
1261
+ }
1262
+ break;
1263
+ case 226:
1264
+ if (max_length > 1) {
1265
+ switch(string[cursor + 1]) {
1266
+
1267
+ case 177:
1268
+ if (max_length > 2) {
1269
+ switch(string[cursor + 2]) {
1270
+
1271
+ case 177:
1272
+ // Phoneme: "ⱱ", bytes: [226, 177, 177]
1273
+ // consonant features: {"position":"Labio-dental","position_index":2,"manner":"Tap/flap","voiced":true}
1274
+ if (max_length > 3) {
1275
+ switch(string[cursor + 3]) {
1276
+
1277
+ case 204:
1278
+ if (max_length > 4) {
1279
+ switch(string[cursor + 4]) {
1280
+
1281
+ case 159:
1282
+ // Phoneme: "ⱱ̟", bytes: [226, 177, 177, 204, 159]
1283
+ // consonant features: {"position":"Bi-labial","position_index":1,"manner":"Tap/flap","voiced":true}
1284
+ return 5;
1285
+ break;
1286
+ }
1287
+ } else {
1288
+ return 4;
1289
+ }
1290
+ break;
1291
+ default:
1292
+ return 3;
1293
+ }
1294
+ } else {
1295
+ return 3;
1296
+ }
1297
+ break;
1298
+ }
1299
+ } else {
1300
+ return 2;
1301
+ }
1302
+ break;
1303
+ }
1304
+ } else {
1305
+ return 1;
1306
+ }
1307
+ break;
1308
+ case 114:
1309
+ // Phoneme: "r", bytes: [114]
1310
+ // consonant features: {"position":"Alveolar","position_index":5,"manner":"Trill","voiced":true}
1311
+ if (max_length > 1) {
1312
+ switch(string[cursor + 1]) {
1313
+
1314
+ case 204:
1315
+ if (max_length > 2) {
1316
+ switch(string[cursor + 2]) {
1317
+
1318
+ case 165:
1319
+ // Phoneme: "r̥", bytes: [114, 204, 165]
1320
+ // consonant features: {"position":"Alveolar","position_index":5,"manner":"Trill","voiced":false}
1321
+ return 3;
1322
+ break;
1323
+ }
1324
+ } else {
1325
+ return 2;
1326
+ }
1327
+ break;
1328
+ default:
1329
+ return 1;
1330
+ }
1331
+ } else {
1332
+ return 1;
1333
+ }
1334
+ break;
1335
+ case 108:
1336
+ // Phoneme: "l", bytes: [108]
1337
+ // consonant features: {"position":"Alveolar","position_index":5,"manner":"Lateral approximant","voiced":true}
1338
+ if (max_length > 1) {
1339
+ switch(string[cursor + 1]) {
1340
+
1341
+ case 204:
1342
+ if (max_length > 2) {
1343
+ switch(string[cursor + 2]) {
1344
+
1345
+ case 165:
1346
+ // Phoneme: "l̥", bytes: [108, 204, 165]
1347
+ // consonant features: {"position":"Alveolar","position_index":5,"manner":"Lateral approximant","voiced":false}
1348
+ return 3;
1349
+ break;
1350
+ }
1351
+ } else {
1352
+ return 2;
1353
+ }
1354
+ break;
1355
+ default:
1356
+ return 1;
1357
+ }
1358
+ } else {
1359
+ return 1;
1360
+ }
1361
+ break;
1362
+ }
1363
+ return 0;
1364
+ }