@ingglish/shavian 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs ADDED
@@ -0,0 +1,288 @@
1
+ "use strict";
2
+ var __defProp = Object.defineProperty;
3
+ var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
4
+ var __getOwnPropNames = Object.getOwnPropertyNames;
5
+ var __hasOwnProp = Object.prototype.hasOwnProperty;
6
+ var __export = (target, all) => {
7
+ for (var name in all)
8
+ __defProp(target, name, { get: all[name], enumerable: true });
9
+ };
10
+ var __copyProps = (to, from, except, desc) => {
11
+ if (from && typeof from === "object" || typeof from === "function") {
12
+ for (let key of __getOwnPropNames(from))
13
+ if (!__hasOwnProp.call(to, key) && key !== except)
14
+ __defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
15
+ }
16
+ return to;
17
+ };
18
+ var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
19
+
20
+ // src/index.ts
21
+ var index_exports = {};
22
+ __export(index_exports, {
23
+ registerShavian: () => registerShavian
24
+ });
25
+ module.exports = __toCommonJS(index_exports);
26
+ var import_phonemes2 = require("@ingglish/phonemes");
27
+
28
+ // src/from-shavian.ts
29
+ var import_dictionary = require("@ingglish/dictionary");
30
+
31
+ // src/shavian-maps.ts
32
+ var SHAVIAN_CONSONANT_MAP = {
33
+ B: "\u{1045A}",
34
+ CH: "\u{10457}",
35
+ D: "\u{1045B}",
36
+ DH: "\u{1045E}",
37
+ F: "\u{10453}",
38
+ G: "\u{1045C}",
39
+ HH: "\u{10463}",
40
+ JH: "\u{10461}",
41
+ K: "\u{10452}",
42
+ L: "\u{10464}",
43
+ M: "\u{10465}",
44
+ N: "\u{1046F}",
45
+ NG: "\u{10459}",
46
+ P: "\u{10450}",
47
+ R: "\u{1046E}",
48
+ S: "\u{10455}",
49
+ SH: "\u{10456}",
50
+ T: "\u{10451}",
51
+ TH: "\u{10454}",
52
+ V: "\u{1045D}",
53
+ W: "\u{10462}",
54
+ Y: "\u{10458}",
55
+ Z: "\u{1045F}",
56
+ ZH: "\u{10460}"
57
+ };
58
+ var SHAVIAN_VOWEL_MAP = {
59
+ AA: "\u{1046D}",
60
+ // palm, father
61
+ AE: "\u{10468}",
62
+ // trap, cat
63
+ AH: "\u{10473}",
64
+ // strut, cup (stressed default)
65
+ AO: "\u{10477}",
66
+ // thought, law
67
+ // Diphthongs
68
+ AW: "\u{1046C}",
69
+ // mouth, cow
70
+ AY: "\u{10472}",
71
+ // price, my
72
+ EH: "\u{10467}",
73
+ // dress, bed
74
+ ER: "\u{1047B}",
75
+ // nurse, bird (ligature)
76
+ EY: "\u{10471}",
77
+ // face, say
78
+ IH: "\u{10466}",
79
+ // kit, sit
80
+ IY: "\u{10470}",
81
+ // fleece, see
82
+ OW: "\u{10474}",
83
+ // goat, go
84
+ OY: "\u{10476}",
85
+ // choice, boy
86
+ UH: "\u{1046B}",
87
+ // foot, put
88
+ UW: "\u{10475}"
89
+ // goose, too
90
+ };
91
+ var ARPABET_TO_SHAVIAN_MAP = {
92
+ ...SHAVIAN_VOWEL_MAP,
93
+ ...SHAVIAN_CONSONANT_MAP
94
+ };
95
+ var SHAVIAN_SCHWA = "\u{10469}";
96
+ var SHAVIAN_R_COLORED = {
97
+ AA: "\u{10478}",
98
+ // start (AA+R)
99
+ AH0: "\u{1047C}",
100
+ // letter (unstressed AH+R)
101
+ AO: "\u{10479}",
102
+ // north/force (AO+R)
103
+ EH: "\u{1047A}",
104
+ // square (EH+R)
105
+ IH: "\u{1047D}"
106
+ // near (IH+R)
107
+ };
108
+ var SHAVIAN_TO_ARPABET_MAP = {
109
+ // Consonants
110
+ "\u{10450}": ["P"],
111
+ "\u{10451}": ["T"],
112
+ "\u{10452}": ["K"],
113
+ "\u{10453}": ["F"],
114
+ "\u{10454}": ["TH"],
115
+ "\u{10455}": ["S"],
116
+ "\u{10456}": ["SH"],
117
+ "\u{10457}": ["CH"],
118
+ "\u{10458}": ["Y"],
119
+ "\u{10459}": ["NG"],
120
+ "\u{1045A}": ["B"],
121
+ "\u{1045B}": ["D"],
122
+ "\u{1045C}": ["G"],
123
+ "\u{1045D}": ["V"],
124
+ "\u{1045E}": ["DH"],
125
+ "\u{1045F}": ["Z"],
126
+ "\u{10460}": ["ZH"],
127
+ "\u{10461}": ["JH"],
128
+ "\u{10462}": ["W"],
129
+ "\u{10463}": ["HH"],
130
+ "\u{10464}": ["L"],
131
+ "\u{10465}": ["M"],
132
+ "\u{10466}": ["IH"],
133
+ "\u{10467}": ["EH"],
134
+ "\u{10468}": ["AE"],
135
+ // Schwa
136
+ "\u{10469}": ["AH"],
137
+ "\u{1046B}": ["UH"],
138
+ // Diphthongs
139
+ "\u{1046C}": ["AW"],
140
+ // Vowels
141
+ "\u{1046D}": ["AA"],
142
+ "\u{1046E}": ["R"],
143
+ "\u{1046F}": ["N"],
144
+ "\u{10470}": ["IY"],
145
+ "\u{10471}": ["EY"],
146
+ "\u{10472}": ["AY"],
147
+ "\u{10473}": ["AH"],
148
+ "\u{10474}": ["OW"],
149
+ "\u{10475}": ["UW"],
150
+ "\u{10476}": ["OY"],
151
+ "\u{10477}": ["AO"],
152
+ // R-colored ligatures (expand to vowel + R)
153
+ "\u{10478}": ["AA", "R"],
154
+ "\u{10479}": ["AO", "R"],
155
+ "\u{1047A}": ["EH", "R"],
156
+ "\u{1047B}": ["ER"],
157
+ "\u{1047C}": ["AH", "R"],
158
+ "\u{1047D}": ["IH", "R"]
159
+ };
160
+
161
+ // src/tokenize.ts
162
+ function isShavianChar(char) {
163
+ const cp = char.codePointAt(0);
164
+ if (cp === void 0) {
165
+ return false;
166
+ }
167
+ return cp >= 66640 && cp <= 66687;
168
+ }
169
+ function tokenizeShavian(text) {
170
+ const tokens = [];
171
+ let current = "";
172
+ let inWord = false;
173
+ for (const char of text) {
174
+ const isShavian = isShavianChar(char);
175
+ if (isShavian === inWord) {
176
+ current += char;
177
+ } else {
178
+ if (current.length > 0) {
179
+ tokens.push({ isWord: inWord, text: current });
180
+ }
181
+ current = char;
182
+ inWord = isShavian;
183
+ }
184
+ }
185
+ if (current.length > 0) {
186
+ tokens.push({ isWord: inWord, text: current });
187
+ }
188
+ return tokens;
189
+ }
190
+
191
+ // src/from-shavian.ts
192
+ function reverseTranslateShavianText(text) {
193
+ const tokens = tokenizeShavian(text);
194
+ return tokens.map((token) => {
195
+ if (token.isWord) {
196
+ const matches = reverseTranslateShavianWord(token.text);
197
+ return matches[0] ?? token.text;
198
+ }
199
+ return token.text;
200
+ }).join("");
201
+ }
202
+ function reverseTranslateShavianTextWithMapping(text) {
203
+ const tokens = tokenizeShavian(text);
204
+ return tokens.map((token) => {
205
+ if (token.isWord) {
206
+ const matches = reverseTranslateShavianWord(token.text);
207
+ const translated = matches[0] ?? token.text;
208
+ return {
209
+ isWord: true,
210
+ matched: translated !== token.text,
211
+ original: token.text,
212
+ translated
213
+ };
214
+ }
215
+ return { isWord: false, matched: true, original: token.text, translated: token.text };
216
+ });
217
+ }
218
+ function reverseTranslateShavianWord(word) {
219
+ const arpabet = shavianToArpabet(word);
220
+ if (!arpabet) {
221
+ return [word];
222
+ }
223
+ const key = arpabet.join(" ");
224
+ const matches = (0, import_dictionary.lookupPhonemeKey)(key);
225
+ if (!matches || matches.length === 0) {
226
+ return [word];
227
+ }
228
+ return matches.length > 1 ? (0, import_dictionary.sortByFrequency)(matches) : matches;
229
+ }
230
+ function shavianToArpabet(text) {
231
+ const result = [];
232
+ for (const char of text) {
233
+ if (!isShavianChar(char)) {
234
+ continue;
235
+ }
236
+ const phonemes = SHAVIAN_TO_ARPABET_MAP[char];
237
+ if (phonemes !== void 0) {
238
+ result.push(...phonemes);
239
+ }
240
+ }
241
+ return result.length > 0 ? result : null;
242
+ }
243
+
244
+ // src/to-shavian.ts
245
+ var import_phonemes = require("@ingglish/phonemes");
246
+ function arpabetToShavian(arpabet) {
247
+ let result = "";
248
+ const len = arpabet.length;
249
+ for (let i = 0; i < len; i++) {
250
+ const phoneme = arpabet[i];
251
+ const base = (0, import_phonemes.stripStress)(phoneme);
252
+ if (i + 1 < len && arpabet[i + 1] === "R") {
253
+ const stressKey = base === "AH" ? phoneme : base;
254
+ const ligature = SHAVIAN_R_COLORED[stressKey] ?? SHAVIAN_R_COLORED[base];
255
+ if (ligature !== void 0) {
256
+ result += ligature;
257
+ i++;
258
+ continue;
259
+ }
260
+ }
261
+ if (base === "AH") {
262
+ const lastChar = phoneme.codePointAt(phoneme.length - 1);
263
+ if (lastChar === 48) {
264
+ result += SHAVIAN_SCHWA;
265
+ continue;
266
+ }
267
+ }
268
+ result += ARPABET_TO_SHAVIAN_MAP[base] ?? "";
269
+ }
270
+ return result;
271
+ }
272
+
273
+ // src/index.ts
274
+ function registerShavian() {
275
+ (0, import_phonemes2.registerFormat)("shavian", {
276
+ forward: arpabetToShavian,
277
+ isLatinScript: false,
278
+ label: "Shavian",
279
+ nativeLabel: "\u{10456}\u{10471}\u{1045D}\u{1047E}\u{1046F}",
280
+ preservesCase: false,
281
+ reverseText: reverseTranslateShavianText,
282
+ reverseTextWithMapping: reverseTranslateShavianTextWithMapping
283
+ });
284
+ }
285
+ // Annotate the CommonJS export names for ESM import in node:
286
+ 0 && (module.exports = {
287
+ registerShavian
288
+ });
@@ -0,0 +1,3 @@
1
+ declare function registerShavian(): void;
2
+
3
+ export { registerShavian };
@@ -0,0 +1,3 @@
1
+ declare function registerShavian(): void;
2
+
3
+ export { registerShavian };
package/dist/index.js ADDED
@@ -0,0 +1,263 @@
1
+ // src/index.ts
2
+ import { registerFormat } from "@ingglish/phonemes";
3
+
4
+ // src/from-shavian.ts
5
+ import { lookupPhonemeKey, sortByFrequency } from "@ingglish/dictionary";
6
+
7
+ // src/shavian-maps.ts
8
+ var SHAVIAN_CONSONANT_MAP = {
9
+ B: "\u{1045A}",
10
+ CH: "\u{10457}",
11
+ D: "\u{1045B}",
12
+ DH: "\u{1045E}",
13
+ F: "\u{10453}",
14
+ G: "\u{1045C}",
15
+ HH: "\u{10463}",
16
+ JH: "\u{10461}",
17
+ K: "\u{10452}",
18
+ L: "\u{10464}",
19
+ M: "\u{10465}",
20
+ N: "\u{1046F}",
21
+ NG: "\u{10459}",
22
+ P: "\u{10450}",
23
+ R: "\u{1046E}",
24
+ S: "\u{10455}",
25
+ SH: "\u{10456}",
26
+ T: "\u{10451}",
27
+ TH: "\u{10454}",
28
+ V: "\u{1045D}",
29
+ W: "\u{10462}",
30
+ Y: "\u{10458}",
31
+ Z: "\u{1045F}",
32
+ ZH: "\u{10460}"
33
+ };
34
+ var SHAVIAN_VOWEL_MAP = {
35
+ AA: "\u{1046D}",
36
+ // palm, father
37
+ AE: "\u{10468}",
38
+ // trap, cat
39
+ AH: "\u{10473}",
40
+ // strut, cup (stressed default)
41
+ AO: "\u{10477}",
42
+ // thought, law
43
+ // Diphthongs
44
+ AW: "\u{1046C}",
45
+ // mouth, cow
46
+ AY: "\u{10472}",
47
+ // price, my
48
+ EH: "\u{10467}",
49
+ // dress, bed
50
+ ER: "\u{1047B}",
51
+ // nurse, bird (ligature)
52
+ EY: "\u{10471}",
53
+ // face, say
54
+ IH: "\u{10466}",
55
+ // kit, sit
56
+ IY: "\u{10470}",
57
+ // fleece, see
58
+ OW: "\u{10474}",
59
+ // goat, go
60
+ OY: "\u{10476}",
61
+ // choice, boy
62
+ UH: "\u{1046B}",
63
+ // foot, put
64
+ UW: "\u{10475}"
65
+ // goose, too
66
+ };
67
+ var ARPABET_TO_SHAVIAN_MAP = {
68
+ ...SHAVIAN_VOWEL_MAP,
69
+ ...SHAVIAN_CONSONANT_MAP
70
+ };
71
+ var SHAVIAN_SCHWA = "\u{10469}";
72
+ var SHAVIAN_R_COLORED = {
73
+ AA: "\u{10478}",
74
+ // start (AA+R)
75
+ AH0: "\u{1047C}",
76
+ // letter (unstressed AH+R)
77
+ AO: "\u{10479}",
78
+ // north/force (AO+R)
79
+ EH: "\u{1047A}",
80
+ // square (EH+R)
81
+ IH: "\u{1047D}"
82
+ // near (IH+R)
83
+ };
84
+ var SHAVIAN_TO_ARPABET_MAP = {
85
+ // Consonants
86
+ "\u{10450}": ["P"],
87
+ "\u{10451}": ["T"],
88
+ "\u{10452}": ["K"],
89
+ "\u{10453}": ["F"],
90
+ "\u{10454}": ["TH"],
91
+ "\u{10455}": ["S"],
92
+ "\u{10456}": ["SH"],
93
+ "\u{10457}": ["CH"],
94
+ "\u{10458}": ["Y"],
95
+ "\u{10459}": ["NG"],
96
+ "\u{1045A}": ["B"],
97
+ "\u{1045B}": ["D"],
98
+ "\u{1045C}": ["G"],
99
+ "\u{1045D}": ["V"],
100
+ "\u{1045E}": ["DH"],
101
+ "\u{1045F}": ["Z"],
102
+ "\u{10460}": ["ZH"],
103
+ "\u{10461}": ["JH"],
104
+ "\u{10462}": ["W"],
105
+ "\u{10463}": ["HH"],
106
+ "\u{10464}": ["L"],
107
+ "\u{10465}": ["M"],
108
+ "\u{10466}": ["IH"],
109
+ "\u{10467}": ["EH"],
110
+ "\u{10468}": ["AE"],
111
+ // Schwa
112
+ "\u{10469}": ["AH"],
113
+ "\u{1046B}": ["UH"],
114
+ // Diphthongs
115
+ "\u{1046C}": ["AW"],
116
+ // Vowels
117
+ "\u{1046D}": ["AA"],
118
+ "\u{1046E}": ["R"],
119
+ "\u{1046F}": ["N"],
120
+ "\u{10470}": ["IY"],
121
+ "\u{10471}": ["EY"],
122
+ "\u{10472}": ["AY"],
123
+ "\u{10473}": ["AH"],
124
+ "\u{10474}": ["OW"],
125
+ "\u{10475}": ["UW"],
126
+ "\u{10476}": ["OY"],
127
+ "\u{10477}": ["AO"],
128
+ // R-colored ligatures (expand to vowel + R)
129
+ "\u{10478}": ["AA", "R"],
130
+ "\u{10479}": ["AO", "R"],
131
+ "\u{1047A}": ["EH", "R"],
132
+ "\u{1047B}": ["ER"],
133
+ "\u{1047C}": ["AH", "R"],
134
+ "\u{1047D}": ["IH", "R"]
135
+ };
136
+
137
+ // src/tokenize.ts
138
+ function isShavianChar(char) {
139
+ const cp = char.codePointAt(0);
140
+ if (cp === void 0) {
141
+ return false;
142
+ }
143
+ return cp >= 66640 && cp <= 66687;
144
+ }
145
+ function tokenizeShavian(text) {
146
+ const tokens = [];
147
+ let current = "";
148
+ let inWord = false;
149
+ for (const char of text) {
150
+ const isShavian = isShavianChar(char);
151
+ if (isShavian === inWord) {
152
+ current += char;
153
+ } else {
154
+ if (current.length > 0) {
155
+ tokens.push({ isWord: inWord, text: current });
156
+ }
157
+ current = char;
158
+ inWord = isShavian;
159
+ }
160
+ }
161
+ if (current.length > 0) {
162
+ tokens.push({ isWord: inWord, text: current });
163
+ }
164
+ return tokens;
165
+ }
166
+
167
+ // src/from-shavian.ts
168
+ function reverseTranslateShavianText(text) {
169
+ const tokens = tokenizeShavian(text);
170
+ return tokens.map((token) => {
171
+ if (token.isWord) {
172
+ const matches = reverseTranslateShavianWord(token.text);
173
+ return matches[0] ?? token.text;
174
+ }
175
+ return token.text;
176
+ }).join("");
177
+ }
178
+ function reverseTranslateShavianTextWithMapping(text) {
179
+ const tokens = tokenizeShavian(text);
180
+ return tokens.map((token) => {
181
+ if (token.isWord) {
182
+ const matches = reverseTranslateShavianWord(token.text);
183
+ const translated = matches[0] ?? token.text;
184
+ return {
185
+ isWord: true,
186
+ matched: translated !== token.text,
187
+ original: token.text,
188
+ translated
189
+ };
190
+ }
191
+ return { isWord: false, matched: true, original: token.text, translated: token.text };
192
+ });
193
+ }
194
+ function reverseTranslateShavianWord(word) {
195
+ const arpabet = shavianToArpabet(word);
196
+ if (!arpabet) {
197
+ return [word];
198
+ }
199
+ const key = arpabet.join(" ");
200
+ const matches = lookupPhonemeKey(key);
201
+ if (!matches || matches.length === 0) {
202
+ return [word];
203
+ }
204
+ return matches.length > 1 ? sortByFrequency(matches) : matches;
205
+ }
206
+ function shavianToArpabet(text) {
207
+ const result = [];
208
+ for (const char of text) {
209
+ if (!isShavianChar(char)) {
210
+ continue;
211
+ }
212
+ const phonemes = SHAVIAN_TO_ARPABET_MAP[char];
213
+ if (phonemes !== void 0) {
214
+ result.push(...phonemes);
215
+ }
216
+ }
217
+ return result.length > 0 ? result : null;
218
+ }
219
+
220
+ // src/to-shavian.ts
221
+ import { stripStress } from "@ingglish/phonemes";
222
+ function arpabetToShavian(arpabet) {
223
+ let result = "";
224
+ const len = arpabet.length;
225
+ for (let i = 0; i < len; i++) {
226
+ const phoneme = arpabet[i];
227
+ const base = stripStress(phoneme);
228
+ if (i + 1 < len && arpabet[i + 1] === "R") {
229
+ const stressKey = base === "AH" ? phoneme : base;
230
+ const ligature = SHAVIAN_R_COLORED[stressKey] ?? SHAVIAN_R_COLORED[base];
231
+ if (ligature !== void 0) {
232
+ result += ligature;
233
+ i++;
234
+ continue;
235
+ }
236
+ }
237
+ if (base === "AH") {
238
+ const lastChar = phoneme.codePointAt(phoneme.length - 1);
239
+ if (lastChar === 48) {
240
+ result += SHAVIAN_SCHWA;
241
+ continue;
242
+ }
243
+ }
244
+ result += ARPABET_TO_SHAVIAN_MAP[base] ?? "";
245
+ }
246
+ return result;
247
+ }
248
+
249
+ // src/index.ts
250
+ function registerShavian() {
251
+ registerFormat("shavian", {
252
+ forward: arpabetToShavian,
253
+ isLatinScript: false,
254
+ label: "Shavian",
255
+ nativeLabel: "\u{10456}\u{10471}\u{1045D}\u{1047E}\u{1046F}",
256
+ preservesCase: false,
257
+ reverseText: reverseTranslateShavianText,
258
+ reverseTextWithMapping: reverseTranslateShavianTextWithMapping
259
+ });
260
+ }
261
+ export {
262
+ registerShavian
263
+ };
package/package.json ADDED
@@ -0,0 +1,51 @@
1
+ {
2
+ "name": "@ingglish/shavian",
3
+ "version": "0.1.0",
4
+ "description": "Shavian alphabet conversion for Ingglish",
5
+ "type": "module",
6
+ "main": "./dist/index.js",
7
+ "module": "./dist/index.js",
8
+ "types": "./dist/index.d.ts",
9
+ "exports": {
10
+ ".": {
11
+ "source": "./src/index.ts",
12
+ "import": {
13
+ "types": "./dist/index.d.ts",
14
+ "default": "./dist/index.js"
15
+ },
16
+ "require": {
17
+ "types": "./dist/index.d.cts",
18
+ "default": "./dist/index.cjs"
19
+ }
20
+ }
21
+ },
22
+ "files": [
23
+ "dist"
24
+ ],
25
+ "sideEffects": false,
26
+ "engines": {
27
+ "node": ">=16"
28
+ },
29
+ "scripts": {
30
+ "build": "tsup",
31
+ "build:fast": "tsup src/index.ts --format esm",
32
+ "lint": "eslint --cache src",
33
+ "test": "vitest run --no-color",
34
+ "prepublishOnly": "npm run build"
35
+ },
36
+ "dependencies": {
37
+ "@ingglish/phonemes": "^0.1.0",
38
+ "@ingglish/dictionary": "^0.1.0"
39
+ },
40
+ "author": "Paul Tarjan",
41
+ "license": "MIT",
42
+ "repository": {
43
+ "type": "git",
44
+ "url": "git+https://github.com/ptarjan/ingglish.git",
45
+ "directory": "packages/shavian"
46
+ },
47
+ "homepage": "https://github.com/ptarjan/ingglish#readme",
48
+ "bugs": {
49
+ "url": "https://github.com/ptarjan/ingglish/issues"
50
+ }
51
+ }