glost-core 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. package/CHANGELOG.md +63 -0
  2. package/LICENSE +21 -0
  3. package/README.md +199 -0
  4. package/dist/__benchmarks__/document-creation.bench.d.ts +7 -0
  5. package/dist/__benchmarks__/document-creation.bench.d.ts.map +1 -0
  6. package/dist/__benchmarks__/document-creation.bench.js +71 -0
  7. package/dist/__benchmarks__/document-creation.bench.js.map +1 -0
  8. package/dist/__benchmarks__/traversal.bench.d.ts +7 -0
  9. package/dist/__benchmarks__/traversal.bench.d.ts.map +1 -0
  10. package/dist/__benchmarks__/traversal.bench.js +124 -0
  11. package/dist/__benchmarks__/traversal.bench.js.map +1 -0
  12. package/dist/cli/migrate.d.ts +8 -0
  13. package/dist/cli/migrate.d.ts.map +1 -0
  14. package/dist/cli/migrate.js +229 -0
  15. package/dist/cli/migrate.js.map +1 -0
  16. package/dist/errors.d.ts +168 -0
  17. package/dist/errors.d.ts.map +1 -0
  18. package/dist/errors.js +300 -0
  19. package/dist/errors.js.map +1 -0
  20. package/dist/guards.d.ts +103 -0
  21. package/dist/guards.d.ts.map +1 -0
  22. package/dist/guards.js +264 -0
  23. package/dist/guards.js.map +1 -0
  24. package/dist/index.d.ts +9 -0
  25. package/dist/index.d.ts.map +1 -0
  26. package/dist/index.js +25 -0
  27. package/dist/index.js.map +1 -0
  28. package/dist/nodes.d.ts +227 -0
  29. package/dist/nodes.d.ts.map +1 -0
  30. package/dist/nodes.js +243 -0
  31. package/dist/nodes.js.map +1 -0
  32. package/dist/types.d.ts +442 -0
  33. package/dist/types.d.ts.map +1 -0
  34. package/dist/types.js +51 -0
  35. package/dist/types.js.map +1 -0
  36. package/dist/utils.d.ts +247 -0
  37. package/dist/utils.d.ts.map +1 -0
  38. package/dist/utils.js +564 -0
  39. package/dist/utils.js.map +1 -0
  40. package/dist/validators.d.ts +1876 -0
  41. package/dist/validators.d.ts.map +1 -0
  42. package/dist/validators.js +302 -0
  43. package/dist/validators.js.map +1 -0
  44. package/package.json +73 -0
  45. package/src/__benchmarks__/document-creation.bench.ts +92 -0
  46. package/src/__benchmarks__/traversal.bench.ts +152 -0
  47. package/src/__tests__/README.md +20 -0
  48. package/src/__tests__/example.test.ts +43 -0
  49. package/src/__tests__/example.ts +186 -0
  50. package/src/__tests__/helpers.test.ts +178 -0
  51. package/src/__tests__/mock-data.ts +624 -0
  52. package/src/__tests__/performance.test.ts +317 -0
  53. package/src/__tests__/traversal.test.ts +170 -0
  54. package/src/cli/migrate.ts +294 -0
  55. package/src/errors.ts +394 -0
  56. package/src/guards.ts +341 -0
  57. package/src/index.ts +69 -0
  58. package/src/nodes.ts +409 -0
  59. package/src/types.ts +633 -0
  60. package/src/utils.ts +730 -0
  61. package/src/validators.ts +336 -0
  62. package/tsconfig.json +9 -0
package/src/nodes.ts ADDED
@@ -0,0 +1,409 @@
1
+ import type {
2
+ LanguageCode,
3
+ LinguisticMetadata,
4
+ GLOSTExtras,
5
+ GLOSTParagraph,
6
+ GLOSTPunctuation,
7
+ GLOSTRoot,
8
+ GLOSTSentence,
9
+ GLOSTSymbol,
10
+ GLOSTText,
11
+ GLOSTWhiteSpace,
12
+ GLOSTWord,
13
+ ScriptSystem,
14
+ TransliterationData,
15
+ } from "./types.js";
16
+
17
+ // ============================================================================
18
+ // Options Interfaces
19
+ // ============================================================================
20
+
21
+ /**
22
+ * Options for creating a GLOST word node
23
+ */
24
+ export interface CreateWordNodeOptions {
25
+ /** The text value of the word */
26
+ value: string;
27
+ /** Transcription data (IPA, romanization, etc.) - optional */
28
+ transcription?: TransliterationData;
29
+ /** Linguistic metadata (part of speech, etc.) - optional */
30
+ metadata?: LinguisticMetadata;
31
+ /** Language code (ISO-639-1, ISO-639-3, or BCP-47) */
32
+ lang?: LanguageCode;
33
+ /** Script system used */
34
+ script?: ScriptSystem;
35
+ /** Additional extension data */
36
+ extras?: GLOSTExtras;
37
+ }
38
+
39
+ /**
40
+ * Options for creating a GLOST sentence node
41
+ */
42
+ export interface CreateSentenceNodeOptions {
43
+ /** The original text of the sentence */
44
+ originalText: string;
45
+ /** Language code */
46
+ lang: LanguageCode;
47
+ /** Script system used */
48
+ script: ScriptSystem;
49
+ /** Word nodes in the sentence */
50
+ children?: GLOSTWord[];
51
+ /** Optional transcription data */
52
+ transcription?: TransliterationData;
53
+ /** Additional extension data */
54
+ extras?: GLOSTExtras;
55
+ }
56
+
57
+ /**
58
+ * Options for creating a GLOST root node
59
+ */
60
+ export interface CreateRootNodeOptions {
61
+ /** Language code */
62
+ lang: LanguageCode;
63
+ /** Script system used */
64
+ script: ScriptSystem;
65
+ /** Paragraph nodes */
66
+ children?: GLOSTParagraph[];
67
+ /** Document metadata */
68
+ metadata?: {
69
+ title?: string;
70
+ author?: string;
71
+ date?: string;
72
+ description?: string;
73
+ };
74
+ /** Additional extension data */
75
+ extras?: GLOSTExtras;
76
+ }
77
+
78
+ /**
79
+ * Options for creating a simple word node
80
+ */
81
+ export interface CreateSimpleWordOptions {
82
+ /** The text value of the word */
83
+ text: string;
84
+ /** Transliteration text */
85
+ transliteration: string;
86
+ /** Transcription system (default: "ipa") */
87
+ system?: string;
88
+ /** Part of speech (default: "unknown") */
89
+ partOfSpeech?: string;
90
+ }
91
+
92
+
93
+ // ============================================================================
94
+ // Node Factory Functions
95
+ // ============================================================================
96
+
97
+ /**
98
+ * Create a GLOST word node
99
+ *
100
+ * @example
101
+ * ```typescript
102
+ * const word = createGLOSTWordNode({
103
+ * value: "hello",
104
+ * transcription: { ipa: { text: "həˈloʊ", system: "ipa" } },
105
+ * metadata: { partOfSpeech: "interjection" },
106
+ * lang: "en",
107
+ * script: "latin"
108
+ * });
109
+ * ```
110
+ */
111
+ export function createGLOSTWordNode(options: CreateWordNodeOptions): GLOSTWord {
112
+ const { value, transcription, metadata, lang, script, extras } = options;
113
+ return {
114
+ type: "WordNode",
115
+ lang,
116
+ script,
117
+ transcription,
118
+ metadata,
119
+ extras,
120
+ children: [createGLOSTTextNode(value)],
121
+ };
122
+ }
123
+
124
+ /**
125
+ * Create a GLOST sentence node
126
+ *
127
+ * @example
128
+ * ```typescript
129
+ * const sentence = createGLOSTSentenceNode({
130
+ * originalText: "Hello world",
131
+ * lang: "en",
132
+ * script: "latin",
133
+ * children: [wordNode1, wordNode2]
134
+ * });
135
+ * ```
136
+ */
137
+ export function createGLOSTSentenceNode(
138
+ options: CreateSentenceNodeOptions,
139
+ ): GLOSTSentence {
140
+ const {
141
+ originalText,
142
+ lang,
143
+ script,
144
+ children = [],
145
+ transcription,
146
+ extras,
147
+ } = options;
148
+ return {
149
+ type: "SentenceNode",
150
+ originalText,
151
+ lang,
152
+ script,
153
+ transcription,
154
+ children,
155
+ extras,
156
+ };
157
+ }
158
+
159
+ /**
160
+ * Create a GLOST paragraph node
161
+ */
162
+ export function createGLOSTParagraphNode(
163
+ children: GLOSTSentence[] = [],
164
+ extras?: GLOSTExtras,
165
+ ): GLOSTParagraph {
166
+ return {
167
+ type: "ParagraphNode",
168
+ children,
169
+ position: undefined,
170
+ extras,
171
+ };
172
+ }
173
+
174
+ /**
175
+ * Create a GLOST root node
176
+ *
177
+ * @example
178
+ * ```typescript
179
+ * const root = createGLOSTRootNode({
180
+ * lang: "en",
181
+ * script: "latin",
182
+ * children: [paragraphNode],
183
+ * metadata: { title: "My Document" }
184
+ * });
185
+ * ```
186
+ */
187
+ export function createGLOSTRootNode(options: CreateRootNodeOptions): GLOSTRoot {
188
+ const { lang, script, children = [], metadata, extras } = options;
189
+ return {
190
+ type: "RootNode",
191
+ lang,
192
+ script,
193
+ metadata,
194
+ children,
195
+ position: undefined,
196
+ extras,
197
+ };
198
+ }
199
+
200
+ // ============================================================================
201
+ // Helper Functions for Common Patterns
202
+ // ============================================================================
203
+
204
+ /**
205
+ * Create a simple word node with basic transcription
206
+ *
207
+ * @example
208
+ * ```typescript
209
+ * const word = createSimpleWord({
210
+ * text: "hello",
211
+ * transliteration: "həˈloʊ",
212
+ * system: "ipa",
213
+ * partOfSpeech: "interjection"
214
+ * });
215
+ * ```
216
+ */
217
+ export function createSimpleWord(options: CreateSimpleWordOptions): GLOSTWord {
218
+ const { text, transliteration, system = "ipa", partOfSpeech = "unknown" } = options;
219
+
220
+ const transcription: TransliterationData = {
221
+ [system]: {
222
+ text: transliteration,
223
+ syllables: [text],
224
+ },
225
+ };
226
+
227
+ const metadata: LinguisticMetadata = {
228
+ partOfSpeech,
229
+ };
230
+
231
+ return createGLOSTWordNode({ value: text, transcription, metadata });
232
+ }
233
+
234
+
235
+ /**
236
+ * Create a sentence from an array of words
237
+ */
238
+ export function createSentenceFromWords(
239
+ words: GLOSTWord[],
240
+ lang: LanguageCode,
241
+ script: ScriptSystem,
242
+ originalText?: string,
243
+ ): GLOSTSentence {
244
+ const text =
245
+ originalText ||
246
+ words
247
+ .map((w) => {
248
+ // Extract text from word's Text node children
249
+ const textNode = w.children.find(
250
+ (child) => child.type === "TextNode",
251
+ ) as GLOSTText;
252
+ return textNode ? textNode.value : "";
253
+ })
254
+ .join("");
255
+ return createGLOSTSentenceNode({ originalText: text, lang, script, children: words });
256
+ }
257
+
258
+ /**
259
+ * Create a paragraph from an array of sentences
260
+ */
261
+ export function createParagraphFromSentences(
262
+ sentences: GLOSTSentence[],
263
+ ): GLOSTParagraph {
264
+ return createGLOSTParagraphNode(sentences);
265
+ }
266
+
267
+ /**
268
+ * Create a document from an array of paragraphs
269
+ */
270
+ export function createDocumentFromParagraphs(
271
+ paragraphs: GLOSTParagraph[],
272
+ lang: LanguageCode,
273
+ script: ScriptSystem,
274
+ metadata?: {
275
+ title?: string;
276
+ author?: string;
277
+ date?: string;
278
+ description?: string;
279
+ },
280
+ ): GLOSTRoot {
281
+ return createGLOSTRootNode({ lang, script, children: paragraphs, metadata });
282
+ }
283
+
284
+ /**
285
+ * Create a document from an array of sentences (simplified helper)
286
+ *
287
+ * Automatically wraps sentences in a paragraph for convenience.
288
+ * Useful when you don't need explicit paragraph structure.
289
+ *
290
+ * @param sentences - Array of sentences
291
+ * @param lang - Language code
292
+ * @param script - Script system (optional, will be inferred from lang if not provided)
293
+ * @param metadata - Optional document metadata
294
+ * @returns GLOST root document
295
+ *
296
+ * @example
297
+ * ```typescript
298
+ * const sentences = [
299
+ * createSentenceFromWords([word1, word2], "th", "thai", "สวัสดี"),
300
+ * createSentenceFromWords([word3, word4], "th", "thai", "ขอบคุณ")
301
+ * ];
302
+ * const doc = createDocumentFromSentences(sentences, "th", "thai");
303
+ * ```
304
+ */
305
+ export function createDocumentFromSentences(
306
+ sentences: GLOSTSentence[],
307
+ lang: LanguageCode,
308
+ script: ScriptSystem,
309
+ metadata?: {
310
+ title?: string;
311
+ author?: string;
312
+ date?: string;
313
+ description?: string;
314
+ },
315
+ ): GLOSTRoot {
316
+ const paragraph = createParagraphFromSentences(sentences);
317
+ return createDocumentFromParagraphs([paragraph], lang, script, metadata);
318
+ }
319
+
320
+ /**
321
+ * Create a simple document from an array of words (simplified helper)
322
+ *
323
+ * Automatically creates sentence and paragraph wrappers for maximum convenience.
324
+ * Perfect for testing, quick prototypes, and simple use cases.
325
+ *
326
+ * @param words - Array of word nodes
327
+ * @param lang - Language code
328
+ * @param script - Script system (optional, will be inferred from lang if not provided)
329
+ * @param options - Optional configuration
330
+ * @param options.sentenceText - Original text of the sentence (will be auto-generated if not provided)
331
+ * @param options.metadata - Optional document metadata
332
+ * @returns GLOST root document
333
+ *
334
+ * @example
335
+ * ```typescript
336
+ * import { createSimpleDocument } from "glost";
337
+ * import { createThaiWord } from "glost-th";
338
+ *
339
+ * const words = [
340
+ * createThaiWord({ text: "สวัสดี" }),
341
+ * createThaiWord({ text: "ครับ" })
342
+ * ];
343
+ * const doc = createSimpleDocument(words, "th", "thai", {
344
+ * sentenceText: "สวัสดีครับ"
345
+ * });
346
+ * ```
347
+ */
348
+ export function createSimpleDocument(
349
+ words: GLOSTWord[],
350
+ lang: LanguageCode,
351
+ script: ScriptSystem,
352
+ options?: {
353
+ sentenceText?: string;
354
+ metadata?: {
355
+ title?: string;
356
+ author?: string;
357
+ date?: string;
358
+ description?: string;
359
+ };
360
+ },
361
+ ): GLOSTRoot {
362
+ const sentence = createSentenceFromWords(words, lang, script, options?.sentenceText);
363
+ const paragraph = createParagraphFromSentences([sentence]);
364
+ return createDocumentFromParagraphs([paragraph], lang, script, options?.metadata);
365
+ }
366
+
367
+ // ============================================================================
368
+ // NLCST Node Factory Functions
369
+ // ============================================================================
370
+
371
+ /**
372
+ * Create a GLOST punctuation node
373
+ */
374
+ export function createGLOSTPunctuationNode(value: string): GLOSTPunctuation {
375
+ return {
376
+ type: "PunctuationNode",
377
+ value,
378
+ };
379
+ }
380
+
381
+ /**
382
+ * Create a GLOST whitespace node
383
+ */
384
+ export function createGLOSTWhiteSpaceNode(value: string): GLOSTWhiteSpace {
385
+ return {
386
+ type: "WhiteSpaceNode",
387
+ value,
388
+ };
389
+ }
390
+
391
+ /**
392
+ * Create a GLOST symbol node
393
+ */
394
+ export function createGLOSTSymbolNode(value: string): GLOSTSymbol {
395
+ return {
396
+ type: "SymbolNode",
397
+ value,
398
+ };
399
+ }
400
+
401
+ /**
402
+ * Create a GLOST text node
403
+ */
404
+ export function createGLOSTTextNode(value: string): GLOSTText {
405
+ return {
406
+ type: "TextNode",
407
+ value,
408
+ };
409
+ }