glost 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/nodes.ts ADDED
@@ -0,0 +1,301 @@
1
+ import type {
2
+ LanguageCode,
3
+ LinguisticLevel,
4
+ LinguisticMetadata,
5
+ GLOSTExtras,
6
+ GLOSTParagraph,
7
+ GLOSTPunctuation,
8
+ GLOSTRoot,
9
+ GLOSTSentence,
10
+ GLOSTSymbol,
11
+ GLOSTText,
12
+ GLOSTWhiteSpace,
13
+ GLOSTWord,
14
+ ScriptSystem,
15
+ TransliterationData,
16
+ } from "./types";
17
+
18
+ // ============================================================================
19
+ // Node Factory Functions
20
+ // ============================================================================
21
+
22
+ /**
23
+ * Create an GLOST word node
24
+ */
25
+ export function createGLOSTWordNode(
26
+ value: string,
27
+ transcription: TransliterationData,
28
+ metadata: LinguisticMetadata,
29
+ level: LinguisticLevel = "word",
30
+ lang?: LanguageCode,
31
+ script?: ScriptSystem,
32
+ extras?: GLOSTExtras,
33
+ ): GLOSTWord {
34
+ return {
35
+ type: "WordNode",
36
+ lang,
37
+ script,
38
+ transcription,
39
+ metadata,
40
+ extras,
41
+ children: [createGLOSTTextNode(value)],
42
+ };
43
+ }
44
+
45
+ /**
46
+ * Create an GLOST sentence node
47
+ */
48
+ export function createGLOSTSentenceNode(
49
+ originalText: string,
50
+ lang: LanguageCode,
51
+ script: ScriptSystem,
52
+ children: GLOSTWord[] = [],
53
+ transcription?: TransliterationData,
54
+ extras?: GLOSTExtras,
55
+ ): GLOSTSentence {
56
+ return {
57
+ type: "SentenceNode",
58
+ originalText,
59
+ lang,
60
+ script,
61
+ transcription,
62
+ children,
63
+ extras,
64
+ };
65
+ }
66
+
67
+ /**
68
+ * Create an GLOST paragraph node
69
+ */
70
+ export function createGLOSTParagraphNode(
71
+ children: GLOSTSentence[] = [],
72
+ extras?: GLOSTExtras,
73
+ ): GLOSTParagraph {
74
+ return {
75
+ type: "ParagraphNode",
76
+
77
+ children,
78
+ position: undefined,
79
+ extras,
80
+ };
81
+ }
82
+
83
+ /**
84
+ * Create an GLOST root node
85
+ */
86
+ export function createGLOSTRootNode(
87
+ lang: LanguageCode,
88
+ script: ScriptSystem,
89
+ children: GLOSTParagraph[] = [],
90
+ metadata?: {
91
+ title?: string;
92
+ author?: string;
93
+ date?: string;
94
+ description?: string;
95
+ },
96
+ extras?: GLOSTExtras,
97
+ ): GLOSTRoot {
98
+ return {
99
+ type: "RootNode",
100
+ lang,
101
+ script,
102
+ metadata,
103
+ children,
104
+ position: undefined,
105
+ extras,
106
+ };
107
+ }
108
+
109
+ // ============================================================================
110
+ // Helper Functions for Common Patterns
111
+ // ============================================================================
112
+
113
+ /**
114
+ * Create a simple word node with basic transcription
115
+ */
116
+ export function createSimpleWord(
117
+ text: string,
118
+ transliteration: string,
119
+ system: string = "ipa",
120
+ partOfSpeech: string = "unknown",
121
+ level: LinguisticLevel = "word",
122
+ ): GLOSTWord {
123
+ const transcription: TransliterationData = {
124
+ [system]: {
125
+ text: transliteration,
126
+ system: system as any,
127
+ syllables: [text],
128
+ },
129
+ };
130
+
131
+ const metadata: LinguisticMetadata = {
132
+ partOfSpeech,
133
+ };
134
+
135
+ return createGLOSTWordNode(text, transcription, metadata, level);
136
+ }
137
+
138
+ /**
139
+ * Create a Thai word node with RTGS transcription
140
+ */
141
+ export function createThaiWord(
142
+ text: string,
143
+ rtgs: string,
144
+ partOfSpeech: string = "unknown",
145
+ tone?: number,
146
+ syllables?: string[],
147
+ ): GLOSTWord {
148
+ const transcription: TransliterationData = {
149
+ rtgs: {
150
+ text: rtgs,
151
+ system: "rtgs",
152
+ tone,
153
+ syllables: syllables || [text],
154
+ },
155
+ };
156
+
157
+ const metadata: LinguisticMetadata = {
158
+ partOfSpeech,
159
+ };
160
+
161
+ return createGLOSTWordNode(
162
+ text,
163
+ transcription,
164
+ metadata,
165
+ "word",
166
+ "th",
167
+ "thai",
168
+ );
169
+ }
170
+
171
+ /**
172
+ * Create a Japanese word node with romaji transcription
173
+ */
174
+ export function createJapaneseWord(
175
+ text: string,
176
+ romaji: string,
177
+ partOfSpeech: string = "unknown",
178
+ furigana?: string,
179
+ ): GLOSTWord {
180
+ const transcription: TransliterationData = {
181
+ romaji: {
182
+ text: romaji,
183
+ system: "romaji",
184
+ syllables: [text],
185
+ },
186
+ };
187
+
188
+ if (furigana) {
189
+ transcription.furigana = {
190
+ text: furigana,
191
+ system: "furigana",
192
+ syllables: [text],
193
+ };
194
+ }
195
+
196
+ const metadata: LinguisticMetadata = {
197
+ partOfSpeech,
198
+ };
199
+
200
+ return createGLOSTWordNode(
201
+ text,
202
+ transcription,
203
+ metadata,
204
+ "word",
205
+ "ja",
206
+ "mixed",
207
+ );
208
+ }
209
+
210
+ /**
211
+ * Create a sentence from an array of words
212
+ */
213
+ export function createSentenceFromWords(
214
+ words: GLOSTWord[],
215
+ lang: LanguageCode,
216
+ script: ScriptSystem,
217
+ originalText?: string,
218
+ ): GLOSTSentence {
219
+ const text =
220
+ originalText ||
221
+ words
222
+ .map((w) => {
223
+ // Extract text from word's Text node children
224
+ const textNode = w.children.find(
225
+ (child) => child.type === "TextNode",
226
+ ) as GLOSTText;
227
+ return textNode ? textNode.value : "";
228
+ })
229
+ .join("");
230
+ return createGLOSTSentenceNode(text, lang, script, words);
231
+ }
232
+
233
+ /**
234
+ * Create a paragraph from an array of sentences
235
+ */
236
+ export function createParagraphFromSentences(
237
+ sentences: GLOSTSentence[],
238
+ ): GLOSTParagraph {
239
+ return createGLOSTParagraphNode(sentences);
240
+ }
241
+
242
+ /**
243
+ * Create a document from an array of paragraphs
244
+ */
245
+ export function createDocumentFromParagraphs(
246
+ paragraphs: GLOSTParagraph[],
247
+ lang: LanguageCode,
248
+ script: ScriptSystem,
249
+ metadata?: {
250
+ title?: string;
251
+ author?: string;
252
+ date?: string;
253
+ description?: string;
254
+ },
255
+ ): GLOSTRoot {
256
+ return createGLOSTRootNode(lang, script, paragraphs, metadata);
257
+ }
258
+
259
+ // ============================================================================
260
+ // NLCST Node Factory Functions
261
+ // ============================================================================
262
+
263
+ /**
264
+ * Create an GLOST punctuation node
265
+ */
266
+ export function createGLOSTPunctuationNode(value: string): GLOSTPunctuation {
267
+ return {
268
+ type: "PunctuationNode",
269
+ value,
270
+ };
271
+ }
272
+
273
+ /**
274
+ * Create an GLOST whitespace node
275
+ */
276
+ export function createGLOSTWhiteSpaceNode(value: string): GLOSTWhiteSpace {
277
+ return {
278
+ type: "WhiteSpaceNode",
279
+ value,
280
+ };
281
+ }
282
+
283
+ /**
284
+ * Create an GLOST symbol node
285
+ */
286
+ export function createGLOSTSymbolNode(value: string): GLOSTSymbol {
287
+ return {
288
+ type: "SymbolNode",
289
+ value,
290
+ };
291
+ }
292
+
293
+ /**
294
+ * Create an GLOST text node
295
+ */
296
+ export function createGLOSTTextNode(value: string): GLOSTText {
297
+ return {
298
+ type: "TextNode",
299
+ value,
300
+ };
301
+ }