@ingglish/shavian 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +288 -0
- package/dist/index.d.cts +3 -0
- package/dist/index.d.ts +3 -0
- package/dist/index.js +263 -0
- package/package.json +51 -0
package/dist/index.cjs
ADDED
|
@@ -0,0 +1,288 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __defProp = Object.defineProperty;
|
|
3
|
+
var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
|
|
4
|
+
var __getOwnPropNames = Object.getOwnPropertyNames;
|
|
5
|
+
var __hasOwnProp = Object.prototype.hasOwnProperty;
|
|
6
|
+
var __export = (target, all) => {
|
|
7
|
+
for (var name in all)
|
|
8
|
+
__defProp(target, name, { get: all[name], enumerable: true });
|
|
9
|
+
};
|
|
10
|
+
var __copyProps = (to, from, except, desc) => {
|
|
11
|
+
if (from && typeof from === "object" || typeof from === "function") {
|
|
12
|
+
for (let key of __getOwnPropNames(from))
|
|
13
|
+
if (!__hasOwnProp.call(to, key) && key !== except)
|
|
14
|
+
__defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
|
|
15
|
+
}
|
|
16
|
+
return to;
|
|
17
|
+
};
|
|
18
|
+
var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
|
|
19
|
+
|
|
20
|
+
// src/index.ts
|
|
21
|
+
var index_exports = {};
|
|
22
|
+
__export(index_exports, {
|
|
23
|
+
registerShavian: () => registerShavian
|
|
24
|
+
});
|
|
25
|
+
module.exports = __toCommonJS(index_exports);
|
|
26
|
+
var import_phonemes2 = require("@ingglish/phonemes");
|
|
27
|
+
|
|
28
|
+
// src/from-shavian.ts
|
|
29
|
+
var import_dictionary = require("@ingglish/dictionary");
|
|
30
|
+
|
|
31
|
+
// src/shavian-maps.ts
|
|
32
|
+
var SHAVIAN_CONSONANT_MAP = {
|
|
33
|
+
B: "\u{1045A}",
|
|
34
|
+
CH: "\u{10457}",
|
|
35
|
+
D: "\u{1045B}",
|
|
36
|
+
DH: "\u{1045E}",
|
|
37
|
+
F: "\u{10453}",
|
|
38
|
+
G: "\u{1045C}",
|
|
39
|
+
HH: "\u{10463}",
|
|
40
|
+
JH: "\u{10461}",
|
|
41
|
+
K: "\u{10452}",
|
|
42
|
+
L: "\u{10464}",
|
|
43
|
+
M: "\u{10465}",
|
|
44
|
+
N: "\u{1046F}",
|
|
45
|
+
NG: "\u{10459}",
|
|
46
|
+
P: "\u{10450}",
|
|
47
|
+
R: "\u{1046E}",
|
|
48
|
+
S: "\u{10455}",
|
|
49
|
+
SH: "\u{10456}",
|
|
50
|
+
T: "\u{10451}",
|
|
51
|
+
TH: "\u{10454}",
|
|
52
|
+
V: "\u{1045D}",
|
|
53
|
+
W: "\u{10462}",
|
|
54
|
+
Y: "\u{10458}",
|
|
55
|
+
Z: "\u{1045F}",
|
|
56
|
+
ZH: "\u{10460}"
|
|
57
|
+
};
|
|
58
|
+
var SHAVIAN_VOWEL_MAP = {
|
|
59
|
+
AA: "\u{1046D}",
|
|
60
|
+
// palm, father
|
|
61
|
+
AE: "\u{10468}",
|
|
62
|
+
// trap, cat
|
|
63
|
+
AH: "\u{10473}",
|
|
64
|
+
// strut, cup (stressed default)
|
|
65
|
+
AO: "\u{10477}",
|
|
66
|
+
// thought, law
|
|
67
|
+
// Diphthongs
|
|
68
|
+
AW: "\u{1046C}",
|
|
69
|
+
// mouth, cow
|
|
70
|
+
AY: "\u{10472}",
|
|
71
|
+
// price, my
|
|
72
|
+
EH: "\u{10467}",
|
|
73
|
+
// dress, bed
|
|
74
|
+
ER: "\u{1047B}",
|
|
75
|
+
// nurse, bird (ligature)
|
|
76
|
+
EY: "\u{10471}",
|
|
77
|
+
// face, say
|
|
78
|
+
IH: "\u{10466}",
|
|
79
|
+
// kit, sit
|
|
80
|
+
IY: "\u{10470}",
|
|
81
|
+
// fleece, see
|
|
82
|
+
OW: "\u{10474}",
|
|
83
|
+
// goat, go
|
|
84
|
+
OY: "\u{10476}",
|
|
85
|
+
// choice, boy
|
|
86
|
+
UH: "\u{1046B}",
|
|
87
|
+
// foot, put
|
|
88
|
+
UW: "\u{10475}"
|
|
89
|
+
// goose, too
|
|
90
|
+
};
|
|
91
|
+
var ARPABET_TO_SHAVIAN_MAP = {
|
|
92
|
+
...SHAVIAN_VOWEL_MAP,
|
|
93
|
+
...SHAVIAN_CONSONANT_MAP
|
|
94
|
+
};
|
|
95
|
+
var SHAVIAN_SCHWA = "\u{10469}";
|
|
96
|
+
var SHAVIAN_R_COLORED = {
|
|
97
|
+
AA: "\u{10478}",
|
|
98
|
+
// start (AA+R)
|
|
99
|
+
AH0: "\u{1047C}",
|
|
100
|
+
// letter (unstressed AH+R)
|
|
101
|
+
AO: "\u{10479}",
|
|
102
|
+
// north/force (AO+R)
|
|
103
|
+
EH: "\u{1047A}",
|
|
104
|
+
// square (EH+R)
|
|
105
|
+
IH: "\u{1047D}"
|
|
106
|
+
// near (IH+R)
|
|
107
|
+
};
|
|
108
|
+
var SHAVIAN_TO_ARPABET_MAP = {
|
|
109
|
+
// Consonants
|
|
110
|
+
"\u{10450}": ["P"],
|
|
111
|
+
"\u{10451}": ["T"],
|
|
112
|
+
"\u{10452}": ["K"],
|
|
113
|
+
"\u{10453}": ["F"],
|
|
114
|
+
"\u{10454}": ["TH"],
|
|
115
|
+
"\u{10455}": ["S"],
|
|
116
|
+
"\u{10456}": ["SH"],
|
|
117
|
+
"\u{10457}": ["CH"],
|
|
118
|
+
"\u{10458}": ["Y"],
|
|
119
|
+
"\u{10459}": ["NG"],
|
|
120
|
+
"\u{1045A}": ["B"],
|
|
121
|
+
"\u{1045B}": ["D"],
|
|
122
|
+
"\u{1045C}": ["G"],
|
|
123
|
+
"\u{1045D}": ["V"],
|
|
124
|
+
"\u{1045E}": ["DH"],
|
|
125
|
+
"\u{1045F}": ["Z"],
|
|
126
|
+
"\u{10460}": ["ZH"],
|
|
127
|
+
"\u{10461}": ["JH"],
|
|
128
|
+
"\u{10462}": ["W"],
|
|
129
|
+
"\u{10463}": ["HH"],
|
|
130
|
+
"\u{10464}": ["L"],
|
|
131
|
+
"\u{10465}": ["M"],
|
|
132
|
+
"\u{10466}": ["IH"],
|
|
133
|
+
"\u{10467}": ["EH"],
|
|
134
|
+
"\u{10468}": ["AE"],
|
|
135
|
+
// Schwa
|
|
136
|
+
"\u{10469}": ["AH"],
|
|
137
|
+
"\u{1046B}": ["UH"],
|
|
138
|
+
// Diphthongs
|
|
139
|
+
"\u{1046C}": ["AW"],
|
|
140
|
+
// Vowels
|
|
141
|
+
"\u{1046D}": ["AA"],
|
|
142
|
+
"\u{1046E}": ["R"],
|
|
143
|
+
"\u{1046F}": ["N"],
|
|
144
|
+
"\u{10470}": ["IY"],
|
|
145
|
+
"\u{10471}": ["EY"],
|
|
146
|
+
"\u{10472}": ["AY"],
|
|
147
|
+
"\u{10473}": ["AH"],
|
|
148
|
+
"\u{10474}": ["OW"],
|
|
149
|
+
"\u{10475}": ["UW"],
|
|
150
|
+
"\u{10476}": ["OY"],
|
|
151
|
+
"\u{10477}": ["AO"],
|
|
152
|
+
// R-colored ligatures (expand to vowel + R)
|
|
153
|
+
"\u{10478}": ["AA", "R"],
|
|
154
|
+
"\u{10479}": ["AO", "R"],
|
|
155
|
+
"\u{1047A}": ["EH", "R"],
|
|
156
|
+
"\u{1047B}": ["ER"],
|
|
157
|
+
"\u{1047C}": ["AH", "R"],
|
|
158
|
+
"\u{1047D}": ["IH", "R"]
|
|
159
|
+
};
|
|
160
|
+
|
|
161
|
+
// src/tokenize.ts
|
|
162
|
+
function isShavianChar(char) {
|
|
163
|
+
const cp = char.codePointAt(0);
|
|
164
|
+
if (cp === void 0) {
|
|
165
|
+
return false;
|
|
166
|
+
}
|
|
167
|
+
return cp >= 66640 && cp <= 66687;
|
|
168
|
+
}
|
|
169
|
+
function tokenizeShavian(text) {
|
|
170
|
+
const tokens = [];
|
|
171
|
+
let current = "";
|
|
172
|
+
let inWord = false;
|
|
173
|
+
for (const char of text) {
|
|
174
|
+
const isShavian = isShavianChar(char);
|
|
175
|
+
if (isShavian === inWord) {
|
|
176
|
+
current += char;
|
|
177
|
+
} else {
|
|
178
|
+
if (current.length > 0) {
|
|
179
|
+
tokens.push({ isWord: inWord, text: current });
|
|
180
|
+
}
|
|
181
|
+
current = char;
|
|
182
|
+
inWord = isShavian;
|
|
183
|
+
}
|
|
184
|
+
}
|
|
185
|
+
if (current.length > 0) {
|
|
186
|
+
tokens.push({ isWord: inWord, text: current });
|
|
187
|
+
}
|
|
188
|
+
return tokens;
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
// src/from-shavian.ts
|
|
192
|
+
function reverseTranslateShavianText(text) {
|
|
193
|
+
const tokens = tokenizeShavian(text);
|
|
194
|
+
return tokens.map((token) => {
|
|
195
|
+
if (token.isWord) {
|
|
196
|
+
const matches = reverseTranslateShavianWord(token.text);
|
|
197
|
+
return matches[0] ?? token.text;
|
|
198
|
+
}
|
|
199
|
+
return token.text;
|
|
200
|
+
}).join("");
|
|
201
|
+
}
|
|
202
|
+
function reverseTranslateShavianTextWithMapping(text) {
|
|
203
|
+
const tokens = tokenizeShavian(text);
|
|
204
|
+
return tokens.map((token) => {
|
|
205
|
+
if (token.isWord) {
|
|
206
|
+
const matches = reverseTranslateShavianWord(token.text);
|
|
207
|
+
const translated = matches[0] ?? token.text;
|
|
208
|
+
return {
|
|
209
|
+
isWord: true,
|
|
210
|
+
matched: translated !== token.text,
|
|
211
|
+
original: token.text,
|
|
212
|
+
translated
|
|
213
|
+
};
|
|
214
|
+
}
|
|
215
|
+
return { isWord: false, matched: true, original: token.text, translated: token.text };
|
|
216
|
+
});
|
|
217
|
+
}
|
|
218
|
+
function reverseTranslateShavianWord(word) {
|
|
219
|
+
const arpabet = shavianToArpabet(word);
|
|
220
|
+
if (!arpabet) {
|
|
221
|
+
return [word];
|
|
222
|
+
}
|
|
223
|
+
const key = arpabet.join(" ");
|
|
224
|
+
const matches = (0, import_dictionary.lookupPhonemeKey)(key);
|
|
225
|
+
if (!matches || matches.length === 0) {
|
|
226
|
+
return [word];
|
|
227
|
+
}
|
|
228
|
+
return matches.length > 1 ? (0, import_dictionary.sortByFrequency)(matches) : matches;
|
|
229
|
+
}
|
|
230
|
+
function shavianToArpabet(text) {
|
|
231
|
+
const result = [];
|
|
232
|
+
for (const char of text) {
|
|
233
|
+
if (!isShavianChar(char)) {
|
|
234
|
+
continue;
|
|
235
|
+
}
|
|
236
|
+
const phonemes = SHAVIAN_TO_ARPABET_MAP[char];
|
|
237
|
+
if (phonemes !== void 0) {
|
|
238
|
+
result.push(...phonemes);
|
|
239
|
+
}
|
|
240
|
+
}
|
|
241
|
+
return result.length > 0 ? result : null;
|
|
242
|
+
}
|
|
243
|
+
|
|
244
|
+
// src/to-shavian.ts
|
|
245
|
+
var import_phonemes = require("@ingglish/phonemes");
|
|
246
|
+
function arpabetToShavian(arpabet) {
|
|
247
|
+
let result = "";
|
|
248
|
+
const len = arpabet.length;
|
|
249
|
+
for (let i = 0; i < len; i++) {
|
|
250
|
+
const phoneme = arpabet[i];
|
|
251
|
+
const base = (0, import_phonemes.stripStress)(phoneme);
|
|
252
|
+
if (i + 1 < len && arpabet[i + 1] === "R") {
|
|
253
|
+
const stressKey = base === "AH" ? phoneme : base;
|
|
254
|
+
const ligature = SHAVIAN_R_COLORED[stressKey] ?? SHAVIAN_R_COLORED[base];
|
|
255
|
+
if (ligature !== void 0) {
|
|
256
|
+
result += ligature;
|
|
257
|
+
i++;
|
|
258
|
+
continue;
|
|
259
|
+
}
|
|
260
|
+
}
|
|
261
|
+
if (base === "AH") {
|
|
262
|
+
const lastChar = phoneme.codePointAt(phoneme.length - 1);
|
|
263
|
+
if (lastChar === 48) {
|
|
264
|
+
result += SHAVIAN_SCHWA;
|
|
265
|
+
continue;
|
|
266
|
+
}
|
|
267
|
+
}
|
|
268
|
+
result += ARPABET_TO_SHAVIAN_MAP[base] ?? "";
|
|
269
|
+
}
|
|
270
|
+
return result;
|
|
271
|
+
}
|
|
272
|
+
|
|
273
|
+
// src/index.ts
|
|
274
|
+
function registerShavian() {
|
|
275
|
+
(0, import_phonemes2.registerFormat)("shavian", {
|
|
276
|
+
forward: arpabetToShavian,
|
|
277
|
+
isLatinScript: false,
|
|
278
|
+
label: "Shavian",
|
|
279
|
+
nativeLabel: "\u{10456}\u{10471}\u{1045D}\u{1047E}\u{1046F}",
|
|
280
|
+
preservesCase: false,
|
|
281
|
+
reverseText: reverseTranslateShavianText,
|
|
282
|
+
reverseTextWithMapping: reverseTranslateShavianTextWithMapping
|
|
283
|
+
});
|
|
284
|
+
}
|
|
285
|
+
// Annotate the CommonJS export names for ESM import in node:
|
|
286
|
+
0 && (module.exports = {
|
|
287
|
+
registerShavian
|
|
288
|
+
});
|
package/dist/index.d.cts
ADDED
package/dist/index.d.ts
ADDED
package/dist/index.js
ADDED
|
@@ -0,0 +1,263 @@
|
|
|
1
|
+
// src/index.ts
|
|
2
|
+
import { registerFormat } from "@ingglish/phonemes";
|
|
3
|
+
|
|
4
|
+
// src/from-shavian.ts
|
|
5
|
+
import { lookupPhonemeKey, sortByFrequency } from "@ingglish/dictionary";
|
|
6
|
+
|
|
7
|
+
// src/shavian-maps.ts
|
|
8
|
+
var SHAVIAN_CONSONANT_MAP = {
|
|
9
|
+
B: "\u{1045A}",
|
|
10
|
+
CH: "\u{10457}",
|
|
11
|
+
D: "\u{1045B}",
|
|
12
|
+
DH: "\u{1045E}",
|
|
13
|
+
F: "\u{10453}",
|
|
14
|
+
G: "\u{1045C}",
|
|
15
|
+
HH: "\u{10463}",
|
|
16
|
+
JH: "\u{10461}",
|
|
17
|
+
K: "\u{10452}",
|
|
18
|
+
L: "\u{10464}",
|
|
19
|
+
M: "\u{10465}",
|
|
20
|
+
N: "\u{1046F}",
|
|
21
|
+
NG: "\u{10459}",
|
|
22
|
+
P: "\u{10450}",
|
|
23
|
+
R: "\u{1046E}",
|
|
24
|
+
S: "\u{10455}",
|
|
25
|
+
SH: "\u{10456}",
|
|
26
|
+
T: "\u{10451}",
|
|
27
|
+
TH: "\u{10454}",
|
|
28
|
+
V: "\u{1045D}",
|
|
29
|
+
W: "\u{10462}",
|
|
30
|
+
Y: "\u{10458}",
|
|
31
|
+
Z: "\u{1045F}",
|
|
32
|
+
ZH: "\u{10460}"
|
|
33
|
+
};
|
|
34
|
+
var SHAVIAN_VOWEL_MAP = {
|
|
35
|
+
AA: "\u{1046D}",
|
|
36
|
+
// palm, father
|
|
37
|
+
AE: "\u{10468}",
|
|
38
|
+
// trap, cat
|
|
39
|
+
AH: "\u{10473}",
|
|
40
|
+
// strut, cup (stressed default)
|
|
41
|
+
AO: "\u{10477}",
|
|
42
|
+
// thought, law
|
|
43
|
+
// Diphthongs
|
|
44
|
+
AW: "\u{1046C}",
|
|
45
|
+
// mouth, cow
|
|
46
|
+
AY: "\u{10472}",
|
|
47
|
+
// price, my
|
|
48
|
+
EH: "\u{10467}",
|
|
49
|
+
// dress, bed
|
|
50
|
+
ER: "\u{1047B}",
|
|
51
|
+
// nurse, bird (ligature)
|
|
52
|
+
EY: "\u{10471}",
|
|
53
|
+
// face, say
|
|
54
|
+
IH: "\u{10466}",
|
|
55
|
+
// kit, sit
|
|
56
|
+
IY: "\u{10470}",
|
|
57
|
+
// fleece, see
|
|
58
|
+
OW: "\u{10474}",
|
|
59
|
+
// goat, go
|
|
60
|
+
OY: "\u{10476}",
|
|
61
|
+
// choice, boy
|
|
62
|
+
UH: "\u{1046B}",
|
|
63
|
+
// foot, put
|
|
64
|
+
UW: "\u{10475}"
|
|
65
|
+
// goose, too
|
|
66
|
+
};
|
|
67
|
+
var ARPABET_TO_SHAVIAN_MAP = {
|
|
68
|
+
...SHAVIAN_VOWEL_MAP,
|
|
69
|
+
...SHAVIAN_CONSONANT_MAP
|
|
70
|
+
};
|
|
71
|
+
var SHAVIAN_SCHWA = "\u{10469}";
|
|
72
|
+
var SHAVIAN_R_COLORED = {
|
|
73
|
+
AA: "\u{10478}",
|
|
74
|
+
// start (AA+R)
|
|
75
|
+
AH0: "\u{1047C}",
|
|
76
|
+
// letter (unstressed AH+R)
|
|
77
|
+
AO: "\u{10479}",
|
|
78
|
+
// north/force (AO+R)
|
|
79
|
+
EH: "\u{1047A}",
|
|
80
|
+
// square (EH+R)
|
|
81
|
+
IH: "\u{1047D}"
|
|
82
|
+
// near (IH+R)
|
|
83
|
+
};
|
|
84
|
+
var SHAVIAN_TO_ARPABET_MAP = {
|
|
85
|
+
// Consonants
|
|
86
|
+
"\u{10450}": ["P"],
|
|
87
|
+
"\u{10451}": ["T"],
|
|
88
|
+
"\u{10452}": ["K"],
|
|
89
|
+
"\u{10453}": ["F"],
|
|
90
|
+
"\u{10454}": ["TH"],
|
|
91
|
+
"\u{10455}": ["S"],
|
|
92
|
+
"\u{10456}": ["SH"],
|
|
93
|
+
"\u{10457}": ["CH"],
|
|
94
|
+
"\u{10458}": ["Y"],
|
|
95
|
+
"\u{10459}": ["NG"],
|
|
96
|
+
"\u{1045A}": ["B"],
|
|
97
|
+
"\u{1045B}": ["D"],
|
|
98
|
+
"\u{1045C}": ["G"],
|
|
99
|
+
"\u{1045D}": ["V"],
|
|
100
|
+
"\u{1045E}": ["DH"],
|
|
101
|
+
"\u{1045F}": ["Z"],
|
|
102
|
+
"\u{10460}": ["ZH"],
|
|
103
|
+
"\u{10461}": ["JH"],
|
|
104
|
+
"\u{10462}": ["W"],
|
|
105
|
+
"\u{10463}": ["HH"],
|
|
106
|
+
"\u{10464}": ["L"],
|
|
107
|
+
"\u{10465}": ["M"],
|
|
108
|
+
"\u{10466}": ["IH"],
|
|
109
|
+
"\u{10467}": ["EH"],
|
|
110
|
+
"\u{10468}": ["AE"],
|
|
111
|
+
// Schwa
|
|
112
|
+
"\u{10469}": ["AH"],
|
|
113
|
+
"\u{1046B}": ["UH"],
|
|
114
|
+
// Diphthongs
|
|
115
|
+
"\u{1046C}": ["AW"],
|
|
116
|
+
// Vowels
|
|
117
|
+
"\u{1046D}": ["AA"],
|
|
118
|
+
"\u{1046E}": ["R"],
|
|
119
|
+
"\u{1046F}": ["N"],
|
|
120
|
+
"\u{10470}": ["IY"],
|
|
121
|
+
"\u{10471}": ["EY"],
|
|
122
|
+
"\u{10472}": ["AY"],
|
|
123
|
+
"\u{10473}": ["AH"],
|
|
124
|
+
"\u{10474}": ["OW"],
|
|
125
|
+
"\u{10475}": ["UW"],
|
|
126
|
+
"\u{10476}": ["OY"],
|
|
127
|
+
"\u{10477}": ["AO"],
|
|
128
|
+
// R-colored ligatures (expand to vowel + R)
|
|
129
|
+
"\u{10478}": ["AA", "R"],
|
|
130
|
+
"\u{10479}": ["AO", "R"],
|
|
131
|
+
"\u{1047A}": ["EH", "R"],
|
|
132
|
+
"\u{1047B}": ["ER"],
|
|
133
|
+
"\u{1047C}": ["AH", "R"],
|
|
134
|
+
"\u{1047D}": ["IH", "R"]
|
|
135
|
+
};
|
|
136
|
+
|
|
137
|
+
// src/tokenize.ts
|
|
138
|
+
function isShavianChar(char) {
|
|
139
|
+
const cp = char.codePointAt(0);
|
|
140
|
+
if (cp === void 0) {
|
|
141
|
+
return false;
|
|
142
|
+
}
|
|
143
|
+
return cp >= 66640 && cp <= 66687;
|
|
144
|
+
}
|
|
145
|
+
function tokenizeShavian(text) {
|
|
146
|
+
const tokens = [];
|
|
147
|
+
let current = "";
|
|
148
|
+
let inWord = false;
|
|
149
|
+
for (const char of text) {
|
|
150
|
+
const isShavian = isShavianChar(char);
|
|
151
|
+
if (isShavian === inWord) {
|
|
152
|
+
current += char;
|
|
153
|
+
} else {
|
|
154
|
+
if (current.length > 0) {
|
|
155
|
+
tokens.push({ isWord: inWord, text: current });
|
|
156
|
+
}
|
|
157
|
+
current = char;
|
|
158
|
+
inWord = isShavian;
|
|
159
|
+
}
|
|
160
|
+
}
|
|
161
|
+
if (current.length > 0) {
|
|
162
|
+
tokens.push({ isWord: inWord, text: current });
|
|
163
|
+
}
|
|
164
|
+
return tokens;
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
// src/from-shavian.ts
|
|
168
|
+
function reverseTranslateShavianText(text) {
|
|
169
|
+
const tokens = tokenizeShavian(text);
|
|
170
|
+
return tokens.map((token) => {
|
|
171
|
+
if (token.isWord) {
|
|
172
|
+
const matches = reverseTranslateShavianWord(token.text);
|
|
173
|
+
return matches[0] ?? token.text;
|
|
174
|
+
}
|
|
175
|
+
return token.text;
|
|
176
|
+
}).join("");
|
|
177
|
+
}
|
|
178
|
+
function reverseTranslateShavianTextWithMapping(text) {
|
|
179
|
+
const tokens = tokenizeShavian(text);
|
|
180
|
+
return tokens.map((token) => {
|
|
181
|
+
if (token.isWord) {
|
|
182
|
+
const matches = reverseTranslateShavianWord(token.text);
|
|
183
|
+
const translated = matches[0] ?? token.text;
|
|
184
|
+
return {
|
|
185
|
+
isWord: true,
|
|
186
|
+
matched: translated !== token.text,
|
|
187
|
+
original: token.text,
|
|
188
|
+
translated
|
|
189
|
+
};
|
|
190
|
+
}
|
|
191
|
+
return { isWord: false, matched: true, original: token.text, translated: token.text };
|
|
192
|
+
});
|
|
193
|
+
}
|
|
194
|
+
function reverseTranslateShavianWord(word) {
|
|
195
|
+
const arpabet = shavianToArpabet(word);
|
|
196
|
+
if (!arpabet) {
|
|
197
|
+
return [word];
|
|
198
|
+
}
|
|
199
|
+
const key = arpabet.join(" ");
|
|
200
|
+
const matches = lookupPhonemeKey(key);
|
|
201
|
+
if (!matches || matches.length === 0) {
|
|
202
|
+
return [word];
|
|
203
|
+
}
|
|
204
|
+
return matches.length > 1 ? sortByFrequency(matches) : matches;
|
|
205
|
+
}
|
|
206
|
+
function shavianToArpabet(text) {
|
|
207
|
+
const result = [];
|
|
208
|
+
for (const char of text) {
|
|
209
|
+
if (!isShavianChar(char)) {
|
|
210
|
+
continue;
|
|
211
|
+
}
|
|
212
|
+
const phonemes = SHAVIAN_TO_ARPABET_MAP[char];
|
|
213
|
+
if (phonemes !== void 0) {
|
|
214
|
+
result.push(...phonemes);
|
|
215
|
+
}
|
|
216
|
+
}
|
|
217
|
+
return result.length > 0 ? result : null;
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
// src/to-shavian.ts
|
|
221
|
+
import { stripStress } from "@ingglish/phonemes";
|
|
222
|
+
function arpabetToShavian(arpabet) {
|
|
223
|
+
let result = "";
|
|
224
|
+
const len = arpabet.length;
|
|
225
|
+
for (let i = 0; i < len; i++) {
|
|
226
|
+
const phoneme = arpabet[i];
|
|
227
|
+
const base = stripStress(phoneme);
|
|
228
|
+
if (i + 1 < len && arpabet[i + 1] === "R") {
|
|
229
|
+
const stressKey = base === "AH" ? phoneme : base;
|
|
230
|
+
const ligature = SHAVIAN_R_COLORED[stressKey] ?? SHAVIAN_R_COLORED[base];
|
|
231
|
+
if (ligature !== void 0) {
|
|
232
|
+
result += ligature;
|
|
233
|
+
i++;
|
|
234
|
+
continue;
|
|
235
|
+
}
|
|
236
|
+
}
|
|
237
|
+
if (base === "AH") {
|
|
238
|
+
const lastChar = phoneme.codePointAt(phoneme.length - 1);
|
|
239
|
+
if (lastChar === 48) {
|
|
240
|
+
result += SHAVIAN_SCHWA;
|
|
241
|
+
continue;
|
|
242
|
+
}
|
|
243
|
+
}
|
|
244
|
+
result += ARPABET_TO_SHAVIAN_MAP[base] ?? "";
|
|
245
|
+
}
|
|
246
|
+
return result;
|
|
247
|
+
}
|
|
248
|
+
|
|
249
|
+
// src/index.ts
|
|
250
|
+
function registerShavian() {
|
|
251
|
+
registerFormat("shavian", {
|
|
252
|
+
forward: arpabetToShavian,
|
|
253
|
+
isLatinScript: false,
|
|
254
|
+
label: "Shavian",
|
|
255
|
+
nativeLabel: "\u{10456}\u{10471}\u{1045D}\u{1047E}\u{1046F}",
|
|
256
|
+
preservesCase: false,
|
|
257
|
+
reverseText: reverseTranslateShavianText,
|
|
258
|
+
reverseTextWithMapping: reverseTranslateShavianTextWithMapping
|
|
259
|
+
});
|
|
260
|
+
}
|
|
261
|
+
export {
|
|
262
|
+
registerShavian
|
|
263
|
+
};
|
package/package.json
ADDED
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@ingglish/shavian",
|
|
3
|
+
"version": "0.1.0",
|
|
4
|
+
"description": "Shavian alphabet conversion for Ingglish",
|
|
5
|
+
"type": "module",
|
|
6
|
+
"main": "./dist/index.js",
|
|
7
|
+
"module": "./dist/index.js",
|
|
8
|
+
"types": "./dist/index.d.ts",
|
|
9
|
+
"exports": {
|
|
10
|
+
".": {
|
|
11
|
+
"source": "./src/index.ts",
|
|
12
|
+
"import": {
|
|
13
|
+
"types": "./dist/index.d.ts",
|
|
14
|
+
"default": "./dist/index.js"
|
|
15
|
+
},
|
|
16
|
+
"require": {
|
|
17
|
+
"types": "./dist/index.d.cts",
|
|
18
|
+
"default": "./dist/index.cjs"
|
|
19
|
+
}
|
|
20
|
+
}
|
|
21
|
+
},
|
|
22
|
+
"files": [
|
|
23
|
+
"dist"
|
|
24
|
+
],
|
|
25
|
+
"sideEffects": false,
|
|
26
|
+
"engines": {
|
|
27
|
+
"node": ">=16"
|
|
28
|
+
},
|
|
29
|
+
"scripts": {
|
|
30
|
+
"build": "tsup",
|
|
31
|
+
"build:fast": "tsup src/index.ts --format esm",
|
|
32
|
+
"lint": "eslint --cache src",
|
|
33
|
+
"test": "vitest run --no-color",
|
|
34
|
+
"prepublishOnly": "npm run build"
|
|
35
|
+
},
|
|
36
|
+
"dependencies": {
|
|
37
|
+
"@ingglish/phonemes": "^0.1.0",
|
|
38
|
+
"@ingglish/dictionary": "^0.1.0"
|
|
39
|
+
},
|
|
40
|
+
"author": "Paul Tarjan",
|
|
41
|
+
"license": "MIT",
|
|
42
|
+
"repository": {
|
|
43
|
+
"type": "git",
|
|
44
|
+
"url": "git+https://github.com/ptarjan/ingglish.git",
|
|
45
|
+
"directory": "packages/shavian"
|
|
46
|
+
},
|
|
47
|
+
"homepage": "https://github.com/ptarjan/ingglish#readme",
|
|
48
|
+
"bugs": {
|
|
49
|
+
"url": "https://github.com/ptarjan/ingglish/issues"
|
|
50
|
+
}
|
|
51
|
+
}
|