@ingglish/deseret 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +298 -0
- package/dist/index.d.cts +3 -0
- package/dist/index.d.ts +3 -0
- package/dist/index.js +273 -0
- package/package.json +51 -0
package/dist/index.cjs
ADDED
|
@@ -0,0 +1,298 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __defProp = Object.defineProperty;
|
|
3
|
+
var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
|
|
4
|
+
var __getOwnPropNames = Object.getOwnPropertyNames;
|
|
5
|
+
var __hasOwnProp = Object.prototype.hasOwnProperty;
|
|
6
|
+
var __export = (target, all) => {
|
|
7
|
+
for (var name in all)
|
|
8
|
+
__defProp(target, name, { get: all[name], enumerable: true });
|
|
9
|
+
};
|
|
10
|
+
var __copyProps = (to, from, except, desc) => {
|
|
11
|
+
if (from && typeof from === "object" || typeof from === "function") {
|
|
12
|
+
for (let key of __getOwnPropNames(from))
|
|
13
|
+
if (!__hasOwnProp.call(to, key) && key !== except)
|
|
14
|
+
__defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
|
|
15
|
+
}
|
|
16
|
+
return to;
|
|
17
|
+
};
|
|
18
|
+
var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
|
|
19
|
+
|
|
20
|
+
// src/index.ts
|
|
21
|
+
var index_exports = {};
|
|
22
|
+
__export(index_exports, {
|
|
23
|
+
registerDeseret: () => registerDeseret
|
|
24
|
+
});
|
|
25
|
+
module.exports = __toCommonJS(index_exports);
|
|
26
|
+
var import_phonemes2 = require("@ingglish/phonemes");
|
|
27
|
+
|
|
28
|
+
// src/from-deseret.ts
|
|
29
|
+
var import_dictionary = require("@ingglish/dictionary");
|
|
30
|
+
|
|
31
|
+
// src/deseret-maps.ts
|
|
32
|
+
var DESERET_CONSONANT_MAP = {
|
|
33
|
+
B: "\u{1043A}",
|
|
34
|
+
// U+1043A Bee
|
|
35
|
+
CH: "\u{1043D}",
|
|
36
|
+
// U+1043D Chee
|
|
37
|
+
D: "\u{1043C}",
|
|
38
|
+
// U+1043C Dee
|
|
39
|
+
DH: "\u{10444}",
|
|
40
|
+
// U+10444 Thee
|
|
41
|
+
F: "\u{10441}",
|
|
42
|
+
// U+10441 Ef
|
|
43
|
+
G: "\u{10440}",
|
|
44
|
+
// U+10440 Gay
|
|
45
|
+
HH: "\u{10438}",
|
|
46
|
+
// U+10438 H
|
|
47
|
+
JH: "\u{1043E}",
|
|
48
|
+
// U+1043E Jee
|
|
49
|
+
K: "\u{1043F}",
|
|
50
|
+
// U+1043F Kay
|
|
51
|
+
L: "\u{1044A}",
|
|
52
|
+
// U+1044A El
|
|
53
|
+
M: "\u{1044B}",
|
|
54
|
+
// U+1044B Em
|
|
55
|
+
N: "\u{1044C}",
|
|
56
|
+
// U+1044C En
|
|
57
|
+
NG: "\u{1044D}",
|
|
58
|
+
// U+1044D Eng
|
|
59
|
+
P: "\u{10439}",
|
|
60
|
+
// U+10439 Pee
|
|
61
|
+
R: "\u{10449}",
|
|
62
|
+
// U+10449 Er
|
|
63
|
+
S: "\u{10445}",
|
|
64
|
+
// U+10445 Es
|
|
65
|
+
SH: "\u{10447}",
|
|
66
|
+
// U+10447 Esh
|
|
67
|
+
T: "\u{1043B}",
|
|
68
|
+
// U+1043B Tee
|
|
69
|
+
TH: "\u{10443}",
|
|
70
|
+
// U+10443 Eth
|
|
71
|
+
V: "\u{10442}",
|
|
72
|
+
// U+10442 Vee
|
|
73
|
+
W: "\u{10436}",
|
|
74
|
+
// U+10436 Wu
|
|
75
|
+
Y: "\u{10437}",
|
|
76
|
+
// U+10437 Yee
|
|
77
|
+
Z: "\u{10446}",
|
|
78
|
+
// U+10446 Zee
|
|
79
|
+
ZH: "\u{10448}"
|
|
80
|
+
// U+10448 Zhee
|
|
81
|
+
};
|
|
82
|
+
var DESERET_VOWEL_MAP = {
|
|
83
|
+
AA: "\u{1042A}",
|
|
84
|
+
// U+1042A Long A (palm)
|
|
85
|
+
AE: "\u{10430}",
|
|
86
|
+
// U+10430 Short A (trap)
|
|
87
|
+
AH: "\u{10432}",
|
|
88
|
+
// U+10432 Short O (strut, stressed default)
|
|
89
|
+
AO: "\u{1042B}",
|
|
90
|
+
// U+1042B Long Ah (thought)
|
|
91
|
+
AW: "\u{10435}",
|
|
92
|
+
// U+10435 Ow (mouth)
|
|
93
|
+
// Diphthongs
|
|
94
|
+
AY: "\u{10434}",
|
|
95
|
+
// U+10434 Ay (price)
|
|
96
|
+
EH: "\u{1042F}",
|
|
97
|
+
// U+1042F Short E (dress)
|
|
98
|
+
EY: "\u{10429}",
|
|
99
|
+
// U+10429 Long E (face)
|
|
100
|
+
IH: "\u{1042E}",
|
|
101
|
+
// U+1042E Short I (kit)
|
|
102
|
+
IY: "\u{10428}",
|
|
103
|
+
// U+10428 Long I (fleece)
|
|
104
|
+
OW: "\u{1042C}",
|
|
105
|
+
// U+1042C Long O (goat)
|
|
106
|
+
OY: "\u{1044E}",
|
|
107
|
+
// U+1044E Oi (choice)
|
|
108
|
+
UH: "\u{10433}",
|
|
109
|
+
// U+10433 Short Oo (foot)
|
|
110
|
+
UW: "\u{1042D}"
|
|
111
|
+
// U+1042D Long Oo (goose)
|
|
112
|
+
};
|
|
113
|
+
var DESERET_SCHWA = "\u{10431}";
|
|
114
|
+
var DESERET_EW = "\u{1044F}";
|
|
115
|
+
var ARPABET_TO_DESERET_MAP = {
|
|
116
|
+
...DESERET_VOWEL_MAP,
|
|
117
|
+
...DESERET_CONSONANT_MAP
|
|
118
|
+
};
|
|
119
|
+
var DESERET_TO_ARPABET_MAP = {
|
|
120
|
+
// Long vowels
|
|
121
|
+
"\u{10428}": ["IY"],
|
|
122
|
+
"\u{10429}": ["EY"],
|
|
123
|
+
"\u{1042A}": ["AA"],
|
|
124
|
+
"\u{1042B}": ["AO"],
|
|
125
|
+
"\u{1042C}": ["OW"],
|
|
126
|
+
"\u{1042D}": ["UW"],
|
|
127
|
+
// Short vowels
|
|
128
|
+
"\u{1042E}": ["IH"],
|
|
129
|
+
"\u{1042F}": ["EH"],
|
|
130
|
+
"\u{10430}": ["AE"],
|
|
131
|
+
"\u{10431}": ["AH"],
|
|
132
|
+
"\u{10432}": ["AH"],
|
|
133
|
+
"\u{10433}": ["UH"],
|
|
134
|
+
// Diphthongs
|
|
135
|
+
"\u{10434}": ["AY"],
|
|
136
|
+
"\u{10435}": ["AW"],
|
|
137
|
+
// Consonants
|
|
138
|
+
"\u{10436}": ["W"],
|
|
139
|
+
"\u{10437}": ["Y"],
|
|
140
|
+
"\u{10438}": ["HH"],
|
|
141
|
+
"\u{10439}": ["P"],
|
|
142
|
+
"\u{1043A}": ["B"],
|
|
143
|
+
"\u{1043B}": ["T"],
|
|
144
|
+
"\u{1043C}": ["D"],
|
|
145
|
+
"\u{1043D}": ["CH"],
|
|
146
|
+
"\u{1043E}": ["JH"],
|
|
147
|
+
"\u{1043F}": ["K"],
|
|
148
|
+
"\u{10440}": ["G"],
|
|
149
|
+
"\u{10441}": ["F"],
|
|
150
|
+
"\u{10442}": ["V"],
|
|
151
|
+
"\u{10443}": ["TH"],
|
|
152
|
+
"\u{10444}": ["DH"],
|
|
153
|
+
"\u{10445}": ["S"],
|
|
154
|
+
"\u{10446}": ["Z"],
|
|
155
|
+
"\u{10447}": ["SH"],
|
|
156
|
+
"\u{10448}": ["ZH"],
|
|
157
|
+
"\u{10449}": ["R"],
|
|
158
|
+
"\u{1044A}": ["L"],
|
|
159
|
+
"\u{1044B}": ["M"],
|
|
160
|
+
"\u{1044C}": ["N"],
|
|
161
|
+
"\u{1044D}": ["NG"],
|
|
162
|
+
"\u{1044E}": ["OY"],
|
|
163
|
+
// Ew (expands to Y + UW)
|
|
164
|
+
"\u{1044F}": ["Y", "UW"]
|
|
165
|
+
};
|
|
166
|
+
|
|
167
|
+
// src/tokenize.ts
|
|
168
|
+
function isDeseretChar(char) {
|
|
169
|
+
const cp = char.codePointAt(0);
|
|
170
|
+
if (cp === void 0) {
|
|
171
|
+
return false;
|
|
172
|
+
}
|
|
173
|
+
return cp >= 66560 && cp <= 66639;
|
|
174
|
+
}
|
|
175
|
+
function tokenizeDeseret(text) {
|
|
176
|
+
const tokens = [];
|
|
177
|
+
let current = "";
|
|
178
|
+
let inWord = false;
|
|
179
|
+
for (const char of text) {
|
|
180
|
+
const isDeseret = isDeseretChar(char);
|
|
181
|
+
if (isDeseret === inWord) {
|
|
182
|
+
current += char;
|
|
183
|
+
} else {
|
|
184
|
+
if (current.length > 0) {
|
|
185
|
+
tokens.push({ isWord: inWord, text: current });
|
|
186
|
+
}
|
|
187
|
+
current = char;
|
|
188
|
+
inWord = isDeseret;
|
|
189
|
+
}
|
|
190
|
+
}
|
|
191
|
+
if (current.length > 0) {
|
|
192
|
+
tokens.push({ isWord: inWord, text: current });
|
|
193
|
+
}
|
|
194
|
+
return tokens;
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
// src/from-deseret.ts
|
|
198
|
+
function deseretToArpabet(text) {
|
|
199
|
+
const result = [];
|
|
200
|
+
for (const char of text) {
|
|
201
|
+
if (!isDeseretChar(char)) {
|
|
202
|
+
continue;
|
|
203
|
+
}
|
|
204
|
+
const cp = char.codePointAt(0);
|
|
205
|
+
const normalized = cp < 66600 ? String.fromCodePoint(cp + 40) : char;
|
|
206
|
+
const phonemes = DESERET_TO_ARPABET_MAP[normalized];
|
|
207
|
+
if (phonemes !== void 0) {
|
|
208
|
+
result.push(...phonemes);
|
|
209
|
+
}
|
|
210
|
+
}
|
|
211
|
+
return result.length > 0 ? result : null;
|
|
212
|
+
}
|
|
213
|
+
function reverseTranslateDeseretText(text) {
|
|
214
|
+
const tokens = tokenizeDeseret(text);
|
|
215
|
+
return tokens.map((token) => {
|
|
216
|
+
if (token.isWord) {
|
|
217
|
+
const matches = reverseTranslateDeseretWord(token.text);
|
|
218
|
+
return matches[0] ?? token.text;
|
|
219
|
+
}
|
|
220
|
+
return token.text;
|
|
221
|
+
}).join("");
|
|
222
|
+
}
|
|
223
|
+
function reverseTranslateDeseretTextWithMapping(text) {
|
|
224
|
+
const tokens = tokenizeDeseret(text);
|
|
225
|
+
return tokens.map((token) => {
|
|
226
|
+
if (token.isWord) {
|
|
227
|
+
const matches = reverseTranslateDeseretWord(token.text);
|
|
228
|
+
const translated = matches[0] ?? token.text;
|
|
229
|
+
return {
|
|
230
|
+
isWord: true,
|
|
231
|
+
matched: translated !== token.text,
|
|
232
|
+
original: token.text,
|
|
233
|
+
translated
|
|
234
|
+
};
|
|
235
|
+
}
|
|
236
|
+
return { isWord: false, matched: true, original: token.text, translated: token.text };
|
|
237
|
+
});
|
|
238
|
+
}
|
|
239
|
+
function reverseTranslateDeseretWord(word) {
|
|
240
|
+
const arpabet = deseretToArpabet(word);
|
|
241
|
+
if (!arpabet) {
|
|
242
|
+
return [word];
|
|
243
|
+
}
|
|
244
|
+
const key = arpabet.join(" ");
|
|
245
|
+
const matches = (0, import_dictionary.lookupPhonemeKey)(key);
|
|
246
|
+
if (!matches || matches.length === 0) {
|
|
247
|
+
return [word];
|
|
248
|
+
}
|
|
249
|
+
return matches.length > 1 ? (0, import_dictionary.sortByFrequency)(matches) : matches;
|
|
250
|
+
}
|
|
251
|
+
|
|
252
|
+
// src/to-deseret.ts
|
|
253
|
+
var import_phonemes = require("@ingglish/phonemes");
|
|
254
|
+
function arpabetToDeseret(arpabet) {
|
|
255
|
+
let result = "";
|
|
256
|
+
const len = arpabet.length;
|
|
257
|
+
for (let i = 0; i < len; i++) {
|
|
258
|
+
const phoneme = arpabet[i];
|
|
259
|
+
const base = (0, import_phonemes.stripStress)(phoneme);
|
|
260
|
+
if (base === "Y" && i + 1 < len && (0, import_phonemes.stripStress)(arpabet[i + 1]) === "UW") {
|
|
261
|
+
result += DESERET_EW;
|
|
262
|
+
i++;
|
|
263
|
+
continue;
|
|
264
|
+
}
|
|
265
|
+
if (base === "ER") {
|
|
266
|
+
const lastChar = phoneme.codePointAt(phoneme.length - 1);
|
|
267
|
+
result += lastChar === 48 ? DESERET_SCHWA : ARPABET_TO_DESERET_MAP.AH;
|
|
268
|
+
result += ARPABET_TO_DESERET_MAP.R;
|
|
269
|
+
continue;
|
|
270
|
+
}
|
|
271
|
+
if (base === "AH") {
|
|
272
|
+
const lastChar = phoneme.codePointAt(phoneme.length - 1);
|
|
273
|
+
if (lastChar === 48) {
|
|
274
|
+
result += DESERET_SCHWA;
|
|
275
|
+
continue;
|
|
276
|
+
}
|
|
277
|
+
}
|
|
278
|
+
result += ARPABET_TO_DESERET_MAP[base] ?? "";
|
|
279
|
+
}
|
|
280
|
+
return result;
|
|
281
|
+
}
|
|
282
|
+
|
|
283
|
+
// src/index.ts
|
|
284
|
+
function registerDeseret() {
|
|
285
|
+
(0, import_phonemes2.registerFormat)("deseret", {
|
|
286
|
+
forward: arpabetToDeseret,
|
|
287
|
+
isLatinScript: false,
|
|
288
|
+
label: "Deseret",
|
|
289
|
+
nativeLabel: "\u{10414}\u{1042F}\u{10445}\u{10428}\u{10449}\u{1042F}\u{1043B}",
|
|
290
|
+
preservesCase: false,
|
|
291
|
+
reverseText: reverseTranslateDeseretText,
|
|
292
|
+
reverseTextWithMapping: reverseTranslateDeseretTextWithMapping
|
|
293
|
+
});
|
|
294
|
+
}
|
|
295
|
+
// Annotate the CommonJS export names for ESM import in node:
|
|
296
|
+
0 && (module.exports = {
|
|
297
|
+
registerDeseret
|
|
298
|
+
});
|
package/dist/index.d.cts
ADDED
package/dist/index.d.ts
ADDED
package/dist/index.js
ADDED
|
@@ -0,0 +1,273 @@
|
|
|
1
|
+
// src/index.ts
|
|
2
|
+
import { registerFormat } from "@ingglish/phonemes";
|
|
3
|
+
|
|
4
|
+
// src/from-deseret.ts
|
|
5
|
+
import { lookupPhonemeKey, sortByFrequency } from "@ingglish/dictionary";
|
|
6
|
+
|
|
7
|
+
// src/deseret-maps.ts
|
|
8
|
+
var DESERET_CONSONANT_MAP = {
|
|
9
|
+
B: "\u{1043A}",
|
|
10
|
+
// U+1043A Bee
|
|
11
|
+
CH: "\u{1043D}",
|
|
12
|
+
// U+1043D Chee
|
|
13
|
+
D: "\u{1043C}",
|
|
14
|
+
// U+1043C Dee
|
|
15
|
+
DH: "\u{10444}",
|
|
16
|
+
// U+10444 Thee
|
|
17
|
+
F: "\u{10441}",
|
|
18
|
+
// U+10441 Ef
|
|
19
|
+
G: "\u{10440}",
|
|
20
|
+
// U+10440 Gay
|
|
21
|
+
HH: "\u{10438}",
|
|
22
|
+
// U+10438 H
|
|
23
|
+
JH: "\u{1043E}",
|
|
24
|
+
// U+1043E Jee
|
|
25
|
+
K: "\u{1043F}",
|
|
26
|
+
// U+1043F Kay
|
|
27
|
+
L: "\u{1044A}",
|
|
28
|
+
// U+1044A El
|
|
29
|
+
M: "\u{1044B}",
|
|
30
|
+
// U+1044B Em
|
|
31
|
+
N: "\u{1044C}",
|
|
32
|
+
// U+1044C En
|
|
33
|
+
NG: "\u{1044D}",
|
|
34
|
+
// U+1044D Eng
|
|
35
|
+
P: "\u{10439}",
|
|
36
|
+
// U+10439 Pee
|
|
37
|
+
R: "\u{10449}",
|
|
38
|
+
// U+10449 Er
|
|
39
|
+
S: "\u{10445}",
|
|
40
|
+
// U+10445 Es
|
|
41
|
+
SH: "\u{10447}",
|
|
42
|
+
// U+10447 Esh
|
|
43
|
+
T: "\u{1043B}",
|
|
44
|
+
// U+1043B Tee
|
|
45
|
+
TH: "\u{10443}",
|
|
46
|
+
// U+10443 Eth
|
|
47
|
+
V: "\u{10442}",
|
|
48
|
+
// U+10442 Vee
|
|
49
|
+
W: "\u{10436}",
|
|
50
|
+
// U+10436 Wu
|
|
51
|
+
Y: "\u{10437}",
|
|
52
|
+
// U+10437 Yee
|
|
53
|
+
Z: "\u{10446}",
|
|
54
|
+
// U+10446 Zee
|
|
55
|
+
ZH: "\u{10448}"
|
|
56
|
+
// U+10448 Zhee
|
|
57
|
+
};
|
|
58
|
+
var DESERET_VOWEL_MAP = {
|
|
59
|
+
AA: "\u{1042A}",
|
|
60
|
+
// U+1042A Long A (palm)
|
|
61
|
+
AE: "\u{10430}",
|
|
62
|
+
// U+10430 Short A (trap)
|
|
63
|
+
AH: "\u{10432}",
|
|
64
|
+
// U+10432 Short O (strut, stressed default)
|
|
65
|
+
AO: "\u{1042B}",
|
|
66
|
+
// U+1042B Long Ah (thought)
|
|
67
|
+
AW: "\u{10435}",
|
|
68
|
+
// U+10435 Ow (mouth)
|
|
69
|
+
// Diphthongs
|
|
70
|
+
AY: "\u{10434}",
|
|
71
|
+
// U+10434 Ay (price)
|
|
72
|
+
EH: "\u{1042F}",
|
|
73
|
+
// U+1042F Short E (dress)
|
|
74
|
+
EY: "\u{10429}",
|
|
75
|
+
// U+10429 Long E (face)
|
|
76
|
+
IH: "\u{1042E}",
|
|
77
|
+
// U+1042E Short I (kit)
|
|
78
|
+
IY: "\u{10428}",
|
|
79
|
+
// U+10428 Long I (fleece)
|
|
80
|
+
OW: "\u{1042C}",
|
|
81
|
+
// U+1042C Long O (goat)
|
|
82
|
+
OY: "\u{1044E}",
|
|
83
|
+
// U+1044E Oi (choice)
|
|
84
|
+
UH: "\u{10433}",
|
|
85
|
+
// U+10433 Short Oo (foot)
|
|
86
|
+
UW: "\u{1042D}"
|
|
87
|
+
// U+1042D Long Oo (goose)
|
|
88
|
+
};
|
|
89
|
+
var DESERET_SCHWA = "\u{10431}";
|
|
90
|
+
var DESERET_EW = "\u{1044F}";
|
|
91
|
+
var ARPABET_TO_DESERET_MAP = {
|
|
92
|
+
...DESERET_VOWEL_MAP,
|
|
93
|
+
...DESERET_CONSONANT_MAP
|
|
94
|
+
};
|
|
95
|
+
var DESERET_TO_ARPABET_MAP = {
|
|
96
|
+
// Long vowels
|
|
97
|
+
"\u{10428}": ["IY"],
|
|
98
|
+
"\u{10429}": ["EY"],
|
|
99
|
+
"\u{1042A}": ["AA"],
|
|
100
|
+
"\u{1042B}": ["AO"],
|
|
101
|
+
"\u{1042C}": ["OW"],
|
|
102
|
+
"\u{1042D}": ["UW"],
|
|
103
|
+
// Short vowels
|
|
104
|
+
"\u{1042E}": ["IH"],
|
|
105
|
+
"\u{1042F}": ["EH"],
|
|
106
|
+
"\u{10430}": ["AE"],
|
|
107
|
+
"\u{10431}": ["AH"],
|
|
108
|
+
"\u{10432}": ["AH"],
|
|
109
|
+
"\u{10433}": ["UH"],
|
|
110
|
+
// Diphthongs
|
|
111
|
+
"\u{10434}": ["AY"],
|
|
112
|
+
"\u{10435}": ["AW"],
|
|
113
|
+
// Consonants
|
|
114
|
+
"\u{10436}": ["W"],
|
|
115
|
+
"\u{10437}": ["Y"],
|
|
116
|
+
"\u{10438}": ["HH"],
|
|
117
|
+
"\u{10439}": ["P"],
|
|
118
|
+
"\u{1043A}": ["B"],
|
|
119
|
+
"\u{1043B}": ["T"],
|
|
120
|
+
"\u{1043C}": ["D"],
|
|
121
|
+
"\u{1043D}": ["CH"],
|
|
122
|
+
"\u{1043E}": ["JH"],
|
|
123
|
+
"\u{1043F}": ["K"],
|
|
124
|
+
"\u{10440}": ["G"],
|
|
125
|
+
"\u{10441}": ["F"],
|
|
126
|
+
"\u{10442}": ["V"],
|
|
127
|
+
"\u{10443}": ["TH"],
|
|
128
|
+
"\u{10444}": ["DH"],
|
|
129
|
+
"\u{10445}": ["S"],
|
|
130
|
+
"\u{10446}": ["Z"],
|
|
131
|
+
"\u{10447}": ["SH"],
|
|
132
|
+
"\u{10448}": ["ZH"],
|
|
133
|
+
"\u{10449}": ["R"],
|
|
134
|
+
"\u{1044A}": ["L"],
|
|
135
|
+
"\u{1044B}": ["M"],
|
|
136
|
+
"\u{1044C}": ["N"],
|
|
137
|
+
"\u{1044D}": ["NG"],
|
|
138
|
+
"\u{1044E}": ["OY"],
|
|
139
|
+
// Ew (expands to Y + UW)
|
|
140
|
+
"\u{1044F}": ["Y", "UW"]
|
|
141
|
+
};
|
|
142
|
+
|
|
143
|
+
// src/tokenize.ts
|
|
144
|
+
function isDeseretChar(char) {
|
|
145
|
+
const cp = char.codePointAt(0);
|
|
146
|
+
if (cp === void 0) {
|
|
147
|
+
return false;
|
|
148
|
+
}
|
|
149
|
+
return cp >= 66560 && cp <= 66639;
|
|
150
|
+
}
|
|
151
|
+
function tokenizeDeseret(text) {
|
|
152
|
+
const tokens = [];
|
|
153
|
+
let current = "";
|
|
154
|
+
let inWord = false;
|
|
155
|
+
for (const char of text) {
|
|
156
|
+
const isDeseret = isDeseretChar(char);
|
|
157
|
+
if (isDeseret === inWord) {
|
|
158
|
+
current += char;
|
|
159
|
+
} else {
|
|
160
|
+
if (current.length > 0) {
|
|
161
|
+
tokens.push({ isWord: inWord, text: current });
|
|
162
|
+
}
|
|
163
|
+
current = char;
|
|
164
|
+
inWord = isDeseret;
|
|
165
|
+
}
|
|
166
|
+
}
|
|
167
|
+
if (current.length > 0) {
|
|
168
|
+
tokens.push({ isWord: inWord, text: current });
|
|
169
|
+
}
|
|
170
|
+
return tokens;
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
// src/from-deseret.ts
|
|
174
|
+
function deseretToArpabet(text) {
|
|
175
|
+
const result = [];
|
|
176
|
+
for (const char of text) {
|
|
177
|
+
if (!isDeseretChar(char)) {
|
|
178
|
+
continue;
|
|
179
|
+
}
|
|
180
|
+
const cp = char.codePointAt(0);
|
|
181
|
+
const normalized = cp < 66600 ? String.fromCodePoint(cp + 40) : char;
|
|
182
|
+
const phonemes = DESERET_TO_ARPABET_MAP[normalized];
|
|
183
|
+
if (phonemes !== void 0) {
|
|
184
|
+
result.push(...phonemes);
|
|
185
|
+
}
|
|
186
|
+
}
|
|
187
|
+
return result.length > 0 ? result : null;
|
|
188
|
+
}
|
|
189
|
+
function reverseTranslateDeseretText(text) {
|
|
190
|
+
const tokens = tokenizeDeseret(text);
|
|
191
|
+
return tokens.map((token) => {
|
|
192
|
+
if (token.isWord) {
|
|
193
|
+
const matches = reverseTranslateDeseretWord(token.text);
|
|
194
|
+
return matches[0] ?? token.text;
|
|
195
|
+
}
|
|
196
|
+
return token.text;
|
|
197
|
+
}).join("");
|
|
198
|
+
}
|
|
199
|
+
function reverseTranslateDeseretTextWithMapping(text) {
|
|
200
|
+
const tokens = tokenizeDeseret(text);
|
|
201
|
+
return tokens.map((token) => {
|
|
202
|
+
if (token.isWord) {
|
|
203
|
+
const matches = reverseTranslateDeseretWord(token.text);
|
|
204
|
+
const translated = matches[0] ?? token.text;
|
|
205
|
+
return {
|
|
206
|
+
isWord: true,
|
|
207
|
+
matched: translated !== token.text,
|
|
208
|
+
original: token.text,
|
|
209
|
+
translated
|
|
210
|
+
};
|
|
211
|
+
}
|
|
212
|
+
return { isWord: false, matched: true, original: token.text, translated: token.text };
|
|
213
|
+
});
|
|
214
|
+
}
|
|
215
|
+
function reverseTranslateDeseretWord(word) {
|
|
216
|
+
const arpabet = deseretToArpabet(word);
|
|
217
|
+
if (!arpabet) {
|
|
218
|
+
return [word];
|
|
219
|
+
}
|
|
220
|
+
const key = arpabet.join(" ");
|
|
221
|
+
const matches = lookupPhonemeKey(key);
|
|
222
|
+
if (!matches || matches.length === 0) {
|
|
223
|
+
return [word];
|
|
224
|
+
}
|
|
225
|
+
return matches.length > 1 ? sortByFrequency(matches) : matches;
|
|
226
|
+
}
|
|
227
|
+
|
|
228
|
+
// src/to-deseret.ts
|
|
229
|
+
import { stripStress } from "@ingglish/phonemes";
|
|
230
|
+
function arpabetToDeseret(arpabet) {
|
|
231
|
+
let result = "";
|
|
232
|
+
const len = arpabet.length;
|
|
233
|
+
for (let i = 0; i < len; i++) {
|
|
234
|
+
const phoneme = arpabet[i];
|
|
235
|
+
const base = stripStress(phoneme);
|
|
236
|
+
if (base === "Y" && i + 1 < len && stripStress(arpabet[i + 1]) === "UW") {
|
|
237
|
+
result += DESERET_EW;
|
|
238
|
+
i++;
|
|
239
|
+
continue;
|
|
240
|
+
}
|
|
241
|
+
if (base === "ER") {
|
|
242
|
+
const lastChar = phoneme.codePointAt(phoneme.length - 1);
|
|
243
|
+
result += lastChar === 48 ? DESERET_SCHWA : ARPABET_TO_DESERET_MAP.AH;
|
|
244
|
+
result += ARPABET_TO_DESERET_MAP.R;
|
|
245
|
+
continue;
|
|
246
|
+
}
|
|
247
|
+
if (base === "AH") {
|
|
248
|
+
const lastChar = phoneme.codePointAt(phoneme.length - 1);
|
|
249
|
+
if (lastChar === 48) {
|
|
250
|
+
result += DESERET_SCHWA;
|
|
251
|
+
continue;
|
|
252
|
+
}
|
|
253
|
+
}
|
|
254
|
+
result += ARPABET_TO_DESERET_MAP[base] ?? "";
|
|
255
|
+
}
|
|
256
|
+
return result;
|
|
257
|
+
}
|
|
258
|
+
|
|
259
|
+
// src/index.ts
|
|
260
|
+
function registerDeseret() {
|
|
261
|
+
registerFormat("deseret", {
|
|
262
|
+
forward: arpabetToDeseret,
|
|
263
|
+
isLatinScript: false,
|
|
264
|
+
label: "Deseret",
|
|
265
|
+
nativeLabel: "\u{10414}\u{1042F}\u{10445}\u{10428}\u{10449}\u{1042F}\u{1043B}",
|
|
266
|
+
preservesCase: false,
|
|
267
|
+
reverseText: reverseTranslateDeseretText,
|
|
268
|
+
reverseTextWithMapping: reverseTranslateDeseretTextWithMapping
|
|
269
|
+
});
|
|
270
|
+
}
|
|
271
|
+
export {
|
|
272
|
+
registerDeseret
|
|
273
|
+
};
|
package/package.json
ADDED
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@ingglish/deseret",
|
|
3
|
+
"version": "0.1.0",
|
|
4
|
+
"description": "Deseret alphabet conversion for Ingglish",
|
|
5
|
+
"type": "module",
|
|
6
|
+
"main": "./dist/index.js",
|
|
7
|
+
"module": "./dist/index.js",
|
|
8
|
+
"types": "./dist/index.d.ts",
|
|
9
|
+
"exports": {
|
|
10
|
+
".": {
|
|
11
|
+
"source": "./src/index.ts",
|
|
12
|
+
"import": {
|
|
13
|
+
"types": "./dist/index.d.ts",
|
|
14
|
+
"default": "./dist/index.js"
|
|
15
|
+
},
|
|
16
|
+
"require": {
|
|
17
|
+
"types": "./dist/index.d.cts",
|
|
18
|
+
"default": "./dist/index.cjs"
|
|
19
|
+
}
|
|
20
|
+
}
|
|
21
|
+
},
|
|
22
|
+
"files": [
|
|
23
|
+
"dist"
|
|
24
|
+
],
|
|
25
|
+
"sideEffects": false,
|
|
26
|
+
"engines": {
|
|
27
|
+
"node": ">=16"
|
|
28
|
+
},
|
|
29
|
+
"scripts": {
|
|
30
|
+
"build": "tsup",
|
|
31
|
+
"build:fast": "tsup src/index.ts --format esm",
|
|
32
|
+
"lint": "eslint --cache src",
|
|
33
|
+
"test": "vitest run --no-color",
|
|
34
|
+
"prepublishOnly": "npm run build"
|
|
35
|
+
},
|
|
36
|
+
"dependencies": {
|
|
37
|
+
"@ingglish/phonemes": "^0.1.0",
|
|
38
|
+
"@ingglish/dictionary": "^0.1.0"
|
|
39
|
+
},
|
|
40
|
+
"author": "Paul Tarjan",
|
|
41
|
+
"license": "MIT",
|
|
42
|
+
"repository": {
|
|
43
|
+
"type": "git",
|
|
44
|
+
"url": "git+https://github.com/ptarjan/ingglish.git",
|
|
45
|
+
"directory": "packages/deseret"
|
|
46
|
+
},
|
|
47
|
+
"homepage": "https://github.com/ptarjan/ingglish#readme",
|
|
48
|
+
"bugs": {
|
|
49
|
+
"url": "https://github.com/ptarjan/ingglish/issues"
|
|
50
|
+
}
|
|
51
|
+
}
|