glost-core 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. package/CHANGELOG.md +63 -0
  2. package/LICENSE +21 -0
  3. package/README.md +199 -0
  4. package/dist/__benchmarks__/document-creation.bench.d.ts +7 -0
  5. package/dist/__benchmarks__/document-creation.bench.d.ts.map +1 -0
  6. package/dist/__benchmarks__/document-creation.bench.js +71 -0
  7. package/dist/__benchmarks__/document-creation.bench.js.map +1 -0
  8. package/dist/__benchmarks__/traversal.bench.d.ts +7 -0
  9. package/dist/__benchmarks__/traversal.bench.d.ts.map +1 -0
  10. package/dist/__benchmarks__/traversal.bench.js +124 -0
  11. package/dist/__benchmarks__/traversal.bench.js.map +1 -0
  12. package/dist/cli/migrate.d.ts +8 -0
  13. package/dist/cli/migrate.d.ts.map +1 -0
  14. package/dist/cli/migrate.js +229 -0
  15. package/dist/cli/migrate.js.map +1 -0
  16. package/dist/errors.d.ts +168 -0
  17. package/dist/errors.d.ts.map +1 -0
  18. package/dist/errors.js +300 -0
  19. package/dist/errors.js.map +1 -0
  20. package/dist/guards.d.ts +103 -0
  21. package/dist/guards.d.ts.map +1 -0
  22. package/dist/guards.js +264 -0
  23. package/dist/guards.js.map +1 -0
  24. package/dist/index.d.ts +9 -0
  25. package/dist/index.d.ts.map +1 -0
  26. package/dist/index.js +25 -0
  27. package/dist/index.js.map +1 -0
  28. package/dist/nodes.d.ts +227 -0
  29. package/dist/nodes.d.ts.map +1 -0
  30. package/dist/nodes.js +243 -0
  31. package/dist/nodes.js.map +1 -0
  32. package/dist/types.d.ts +442 -0
  33. package/dist/types.d.ts.map +1 -0
  34. package/dist/types.js +51 -0
  35. package/dist/types.js.map +1 -0
  36. package/dist/utils.d.ts +247 -0
  37. package/dist/utils.d.ts.map +1 -0
  38. package/dist/utils.js +564 -0
  39. package/dist/utils.js.map +1 -0
  40. package/dist/validators.d.ts +1876 -0
  41. package/dist/validators.d.ts.map +1 -0
  42. package/dist/validators.js +302 -0
  43. package/dist/validators.js.map +1 -0
  44. package/package.json +73 -0
  45. package/src/__benchmarks__/document-creation.bench.ts +92 -0
  46. package/src/__benchmarks__/traversal.bench.ts +152 -0
  47. package/src/__tests__/README.md +20 -0
  48. package/src/__tests__/example.test.ts +43 -0
  49. package/src/__tests__/example.ts +186 -0
  50. package/src/__tests__/helpers.test.ts +178 -0
  51. package/src/__tests__/mock-data.ts +624 -0
  52. package/src/__tests__/performance.test.ts +317 -0
  53. package/src/__tests__/traversal.test.ts +170 -0
  54. package/src/cli/migrate.ts +294 -0
  55. package/src/errors.ts +394 -0
  56. package/src/guards.ts +341 -0
  57. package/src/index.ts +69 -0
  58. package/src/nodes.ts +409 -0
  59. package/src/types.ts +633 -0
  60. package/src/utils.ts +730 -0
  61. package/src/validators.ts +336 -0
  62. package/tsconfig.json +9 -0
package/src/types.ts ADDED
@@ -0,0 +1,633 @@
1
+ import type { Literal as NlcstLiteral, Paragraph as NlcstParagraph, Punctuation as NlcstPunctuation, Root as NlcstRoot, Sentence as NlcstSentence, Source as NlcstSource, Symbol as NlcstSymbol, Text as NlcstText, WhiteSpace as NlcstWhiteSpace, Word as NlcstWord } from "nlcst";
2
+
3
+ // ============================================================================
4
+ // Node Type Constants
5
+ // ============================================================================
6
+
7
+ /**
8
+ * Standard GLOST node type constants
9
+ *
10
+ * Use these constants instead of string literals for type checking to prevent
11
+ * typos and enable autocomplete.
12
+ *
13
+ * @example
14
+ * ```typescript
15
+ * import { NODE_TYPES } from "glost";
16
+ *
17
+ * if (node.type === NODE_TYPES.WORD) {
18
+ * // Handle word node with autocomplete and type safety
19
+ * }
20
+ * ```
21
+ */
22
+ export const NODE_TYPES = {
23
+ /** Root document node */
24
+ ROOT: "RootNode",
25
+ /** Paragraph node */
26
+ PARAGRAPH: "ParagraphNode",
27
+ /** Sentence node */
28
+ SENTENCE: "SentenceNode",
29
+ /** Word node */
30
+ WORD: "WordNode",
31
+ /** Text node (leaf node containing actual text) */
32
+ TEXT: "TextNode",
33
+ /** Whitespace node */
34
+ WHITESPACE: "WhiteSpaceNode",
35
+ /** Punctuation node */
36
+ PUNCTUATION: "PunctuationNode",
37
+ /** Symbol node */
38
+ SYMBOL: "SymbolNode",
39
+ /** Source node */
40
+ SOURCE: "SourceNode",
41
+ /** Clause node (created by transformers) */
42
+ CLAUSE: "ClauseNode",
43
+ /** Phrase node (created by transformers) */
44
+ PHRASE: "PhraseNode",
45
+ /** Syllable node */
46
+ SYLLABLE: "SyllableNode",
47
+ /** Character node */
48
+ CHARACTER: "CharacterNode",
49
+ } as const;
50
+
51
+ /**
52
+ * Type representing any valid GLOST node type string
53
+ */
54
+ export type NodeType = typeof NODE_TYPES[keyof typeof NODE_TYPES];
55
+
56
+
57
+
58
+
59
+ // ============================================================================
60
+ // Core GLOST Types
61
+ // ============================================================================
62
+
63
+ /**
64
+ * Linguistic level of a text segment
65
+ */
66
+ export type LinguisticLevel =
67
+ | "character"
68
+ | "syllable"
69
+ | "word"
70
+ | "phrase"
71
+ | "sentence"
72
+ | "paragraph";
73
+
74
+ /**
75
+ * Context for pronunciation variants
76
+ */
77
+ export type PronunciationContext =
78
+ | "formal"
79
+ | "informal"
80
+ | "historical"
81
+ | "regional"
82
+ | "dialectal";
83
+
84
+ /**
85
+ * Transcription system identifiers
86
+ */
87
+ export type TranscriptionSystem =
88
+ | "rtgs" // Royal Thai General System
89
+ | "aua" // AUA (American University Alumni)
90
+ | "paiboon" // Paiboon system
91
+ | "romaji" // Japanese romanization
92
+ | "furigana" // Japanese furigana
93
+ | "ipa" // International Phonetic Alphabet
94
+ | "pinyin" // Chinese pinyin
95
+ | "hangul" // Korean hangul
96
+ | string; // Allow custom systems
97
+
98
+ /**
99
+ * Language codes following BCP-47 format (RFC 5646)
100
+ * Format: language[-script][-region][-variant]
101
+ * Examples: th-TH, ja-JP, zh-CN, ko-KR, en-US, fr-FR, de-DE
102
+ */
103
+ export type LanguageCode =
104
+ // Thai
105
+ | "th-TH" // Thai (Thailand)
106
+ | "th" // Thai (generic)
107
+
108
+ // Japanese
109
+ | "ja-JP" // Japanese (Japan)
110
+ | "ja" // Japanese (generic)
111
+
112
+ // Chinese
113
+ | "zh-CN" // Chinese (Simplified, China)
114
+ | "zh-TW" // Chinese (Traditional, Taiwan)
115
+ | "zh-HK" // Chinese (Hong Kong)
116
+ | "zh" // Chinese (generic)
117
+
118
+ // Korean
119
+ | "ko-KR" // Korean (South Korea)
120
+ | "ko-KP" // Korean (North Korea)
121
+ | "ko" // Korean (generic)
122
+
123
+ // English
124
+ | "en-US" // English (United States)
125
+ | "en-GB" // English (United Kingdom)
126
+ | "en-CA" // English (Canada)
127
+ | "en-AU" // English (Australia)
128
+ | "en" // English (generic)
129
+
130
+ // French
131
+ | "fr-FR" // French (France)
132
+ | "fr-CA" // French (Canada)
133
+ | "fr-BE" // French (Belgium)
134
+ | "fr" // French (generic)
135
+
136
+ // German
137
+ | "de-DE" // German (Germany)
138
+ | "de-AT" // German (Austria)
139
+ | "de-CH" // German (Switzerland)
140
+ | "de" // German (generic)
141
+
142
+ // Spanish
143
+ | "es-ES" // Spanish (Spain)
144
+ | "es-MX" // Spanish (Mexico)
145
+ | "es-AR" // Spanish (Argentina)
146
+ | "es" // Spanish (generic)
147
+
148
+ // Italian
149
+ | "it-IT" // Italian (Italy)
150
+ | "it-CH" // Italian (Switzerland)
151
+ | "it" // Italian (generic)
152
+
153
+ // Portuguese
154
+ | "pt-PT" // Portuguese (Portugal)
155
+ | "pt-BR" // Portuguese (Brazil)
156
+ | "pt" // Portuguese (generic)
157
+
158
+ // Russian
159
+ | "ru-RU" // Russian (Russia)
160
+ | "ru" // Russian (generic)
161
+
162
+ // Arabic
163
+ | "ar-SA" // Arabic (Saudi Arabia)
164
+ | "ar-EG" // Arabic (Egypt)
165
+ | "ar" // Arabic (generic)
166
+
167
+ // Hindi
168
+ | "hi-IN" // Hindi (India)
169
+ | "hi" // Hindi (generic)
170
+
171
+ // Allow custom BCP-47 language tags
172
+ | string;
173
+
174
+ /**
175
+ * Script system identifiers
176
+ */
177
+ export type ScriptSystem =
178
+ | "thai" // Thai script
179
+ | "hiragana" // Japanese hiragana
180
+ | "katakana" // Japanese katakana
181
+ | "kanji" // Japanese/Chinese characters
182
+ | "hanzi" // Chinese characters
183
+ | "hangul" // Korean hangul
184
+ | "latin" // Latin alphabet
185
+ | "mixed" // Mixed scripts
186
+ | string; // Allow other scripts
187
+
188
+ // ============================================================================
189
+ // Extras Field Types for i18n and Extensions
190
+ // ============================================================================
191
+
192
+ /**
193
+ * Quick translations in different languages using BCP-47 format
194
+ */
195
+ export type QuickTranslations = {
196
+ /** English translations */
197
+ "en-US"?: string; // English (United States)
198
+ "en-GB"?: string; // English (United Kingdom)
199
+ "en"?: string; // English (generic)
200
+
201
+ /** Thai translations */
202
+ "th-TH"?: string; // Thai (Thailand)
203
+ "th"?: string; // Thai (generic)
204
+
205
+ /** Japanese translations */
206
+ "ja-JP"?: string; // Japanese (Japan)
207
+ "ja"?: string; // Japanese (generic)
208
+
209
+ /** Chinese translations */
210
+ "zh-CN"?: string; // Chinese (Simplified, China)
211
+ "zh-TW"?: string; // Chinese (Traditional, Taiwan)
212
+ "zh"?: string; // Chinese (generic)
213
+
214
+ /** Korean translations */
215
+ "ko-KR"?: string; // Korean (South Korea)
216
+ "ko"?: string; // Korean (generic)
217
+
218
+ /** French translations */
219
+ "fr-FR"?: string; // French (France)
220
+ "fr-CA"?: string; // French (Canada)
221
+ "fr"?: string; // French (generic)
222
+
223
+ /** German translations */
224
+ "de-DE"?: string; // German (Germany)
225
+ "de"?: string; // German (generic)
226
+
227
+ /** Spanish translations */
228
+ "es-ES"?: string; // Spanish (Spain)
229
+ "es-MX"?: string; // Spanish (Mexico)
230
+ "es"?: string; // Spanish (generic)
231
+
232
+ /** Custom language translations using BCP-47 format */
233
+ [lang: string]: string | undefined;
234
+ };
235
+
236
+ /**
237
+ * Extended metadata for enhanced functionality
238
+ */
239
+ export type ExtendedMetadata = {
240
+ /** Quick translations in multiple languages */
241
+ translations?: QuickTranslations;
242
+ /** Difficulty level for learners */
243
+ difficulty?: "beginner" | "intermediate" | "advanced" | 1 | 2 | 3 | 4 | 5 | string;
244
+ /** Frequency in common usage */
245
+ frequency?: "rare" | "uncommon" | "common" | "very-common";
246
+ /** Cultural notes */
247
+ culturalNotes?: string;
248
+ /** Related words or concepts */
249
+ related?: string[];
250
+ /** Example sentences */
251
+ examples?: string[];
252
+ /** Custom extensions */
253
+ [key: string]: any;
254
+ };
255
+
256
+ /**
257
+ * Extras field for extending GLOST nodes
258
+ *
259
+ * This interface can be augmented by extension packages via declaration merging.
260
+ *
261
+ * @example
262
+ * ```typescript
263
+ * // In an extension package
264
+ * declare module "glost" {
265
+ * interface GLOSTExtras {
266
+ * frequency?: {
267
+ * rank: number;
268
+ * category: "very-common" | "common" | "uncommon" | "rare";
269
+ * };
270
+ * }
271
+ * }
272
+ * ```
273
+ */
274
+ export interface GLOSTExtras {
275
+ /** Quick translations */
276
+ translations?: QuickTranslations;
277
+ /** Extended metadata */
278
+ metadata?: ExtendedMetadata;
279
+ /** Custom extensions - allows any string key with unknown value */
280
+ [key: string]: unknown;
281
+ }
282
+
283
+ // ============================================================================
284
+ // Transcription and Pronunciation Types
285
+ // ============================================================================
286
+
287
+ /**
288
+ * Pronunciation variant for a text segment
289
+ */
290
+ export type PronunciationVariant = {
291
+ /** The variant text in the transcription system */
292
+ text: string;
293
+ /** Context where this variant is used */
294
+ context: PronunciationContext;
295
+ /** Additional notes about this variant */
296
+ notes?: string;
297
+ };
298
+
299
+ /**
300
+ * Transcription information for a text segment
301
+ *
302
+ * Note: The transcription system is not stored in this object.
303
+ * It is the key in the TransliterationData record.
304
+ */
305
+ export type TranscriptionInfo = {
306
+ /** The transcription text */
307
+ text: string;
308
+ /** Pronunciation variants */
309
+ variants?: PronunciationVariant[];
310
+ /** Tone information (for tonal languages) */
311
+ tone?: number;
312
+ /** Syllable breakdown */
313
+ syllables?: string[];
314
+ /** Additional phonetic information */
315
+ phonetic?: string;
316
+ };
317
+
318
+ /**
319
+ * Complete transliteration data for a text segment
320
+ */
321
+ export type TransliterationData = {
322
+ /** Map of transcription systems to their data */
323
+ [system: string]: TranscriptionInfo;
324
+ };
325
+
326
+ // ============================================================================
327
+ // Linguistic Metadata Types
328
+ // ============================================================================
329
+
330
+ /**
331
+ * Linguistic metadata for a text segment
332
+ */
333
+ export type LinguisticMetadata = {
334
+ /** @deprecated Use extras.translations instead */
335
+ meaning?: string;
336
+ /** Part of speech */
337
+ partOfSpeech: string;
338
+ /** Usage notes */
339
+ usage?: string;
340
+ /** Etymology information */
341
+ etymology?: string;
342
+ /** Example usage */
343
+ examples?: string[];
344
+ /** Frequency information */
345
+ frequency?: "high" | "medium" | "low";
346
+ /** Formality level */
347
+ formality?: "formal" | "neutral" | "informal";
348
+ /** Register (academic, colloquial, etc.) */
349
+ register?: string;
350
+ /** @deprecated Use extras.translations instead */
351
+ shortDefinition?: string;
352
+ /** @deprecated Use extras.translations instead */
353
+ fullDefinition?: string;
354
+ /** @deprecated Use metadata enrichment extensions instead */
355
+ difficulty?: "beginner" | "intermediate" | "advanced" | 1 | 2 | 3 | 4 | 5 | string;
356
+ };
357
+
358
+ // ============================================================================
359
+ // Extended Node Types
360
+ // ============================================================================
361
+
362
+ /**
363
+ * Union type for all GLOST nodes
364
+ */
365
+ export type GLOSTNode =
366
+ | GLOSTWord
367
+ | GLOSTSentence
368
+ | GLOSTParagraph
369
+ | GLOSTRoot
370
+ | GLOSTText
371
+ | GLOSTSymbol
372
+ | GLOSTPunctuation
373
+ | GLOSTWhiteSpace
374
+ | GLOSTSource
375
+ // New transformer node types
376
+ | GLOSTClause
377
+ | GLOSTPhrase
378
+ | GLOSTSyllable
379
+ | GLOSTCharacter;
380
+
381
+ /**
382
+ * GLOST nodes that extend nlcst Literal (have a value property)
383
+ */
384
+ export type GLOSTLiteral = NlcstLiteral & {
385
+ /** Language code for this node */
386
+ lang?: LanguageCode;
387
+ /** Script system used */
388
+ script?: ScriptSystem;
389
+ /** Linguistic level of this segment */
390
+ level?: LinguisticLevel;
391
+ /** Extras field for extensions */
392
+ extras?: GLOSTExtras;
393
+ };
394
+
395
+ /**
396
+ * GLOST Punctuation node (extends nlcst PunctuationNode)
397
+ */
398
+ export type GLOSTPunctuation = NlcstPunctuation & {};
399
+
400
+ /**
401
+ * GLOST WhiteSpace node (extends nlcst WhiteSpaceNode)
402
+ */
403
+ export type GLOSTWhiteSpace = NlcstWhiteSpace & {};
404
+
405
+ /**
406
+ * GLOST Symbol node (extends nlcst SymbolNode)
407
+ */
408
+ export type GLOSTSymbol = NlcstSymbol & {};
409
+
410
+ /**
411
+ * GLOST Text node (extends nlcst TextNode)
412
+ */
413
+ export type GLOSTText = NlcstText & {
414
+ // potentially can be character level information
415
+ };
416
+
417
+ /**
418
+ * GLOST Source node (extends nlcst SourceNode)
419
+ */
420
+ export type GLOSTSource = NlcstSource & {
421
+
422
+ };
423
+
424
+ /**
425
+ * Extended word node with transcription support
426
+ * Extends nlcst WordNode and adds GLOST-specific properties
427
+ */
428
+ export type GLOSTWord = Omit<NlcstWord, "children"> & {
429
+ /** Transcription data (optional - can be added by extensions) */
430
+ transcription?: TransliterationData;
431
+ /** Linguistic metadata (optional - can be added by extensions) */
432
+ metadata?: LinguisticMetadata;
433
+ /** @deprecated Use extras.translations instead */
434
+ shortDefinition?: string;
435
+ /** @deprecated Use extras.translations instead */
436
+ fullDefinition?: string;
437
+ /** @deprecated Use metadata enrichment extensions instead */
438
+ difficulty?: "beginner" | "intermediate" | "advanced" | 1 | 2 | 3 | 4 | 5 | string;
439
+ /** Language code for this node */
440
+ lang?: LanguageCode;
441
+ /** Script system used */
442
+ script?: ScriptSystem;
443
+ /** Linguistic level of this segment */
444
+ level?: LinguisticLevel;
445
+ /** Extras field for extensions */
446
+ extras?: GLOSTExtras;
447
+ /** Children nodes - must contain at least one Text node */
448
+ children: GLOSTWordContent[];
449
+ };
450
+
451
+ /**
452
+ * Extended sentence node
453
+ * Extends nlcst SentenceNode and adds GLOST-specific properties
454
+ */
455
+ export type GLOSTSentence = Omit<NlcstSentence, "children"> & {
456
+ /** Language of the sentence */
457
+ lang: LanguageCode;
458
+ /** Script system used */
459
+ script: ScriptSystem;
460
+ /** Original text */
461
+ originalText: string;
462
+ /** Transcription data for the entire sentence */
463
+ transcription?: TransliterationData;
464
+ /** Extras field for extensions */
465
+ extras?: GLOSTExtras;
466
+ /** Children nodes - must be nlcst-compliant */
467
+ children: GLOSTSentenceContent[];
468
+ };
469
+
470
+ /**
471
+ * Extended paragraph node
472
+ */
473
+ export type GLOSTParagraph = Omit<NlcstParagraph, "children"> & {
474
+ /** Language of the paragraph */
475
+ lang?: LanguageCode;
476
+ /** Script system used */
477
+ script?: ScriptSystem;
478
+ /** Extras field for extensions */
479
+ extras?: GLOSTExtras;
480
+ /** Children nodes - must be nlcst-compliant */
481
+ children: GLOSTParagraphContent[];
482
+ };
483
+
484
+ /**
485
+ * Extended root node
486
+ */
487
+ export type GLOSTRoot = Omit<NlcstRoot, "children"> & {
488
+ /** Primary language of the document */
489
+ lang: LanguageCode;
490
+ /** Primary script system */
491
+ script: ScriptSystem;
492
+ /** Extras field for extensions */
493
+ extras?: GLOSTExtras;
494
+ /** Document metadata */
495
+ metadata?: {
496
+ title?: string;
497
+ author?: string;
498
+ date?: string;
499
+ description?: string;
500
+ };
501
+ /** Children nodes - must be nlcst-compliant */
502
+ children: GLOSTRootContent[];
503
+ };
504
+
505
+ // ============================================================================
506
+ // Transformer Node Types
507
+ // ============================================================================
508
+
509
+ /**
510
+ * Clause node - represents grammatical clauses within sentences
511
+ * Created by ClauseSegmenterExtension transformer
512
+ */
513
+ export type GLOSTClause = {
514
+ type: "ClauseNode";
515
+ /** Type of clause */
516
+ clauseType: "main" | "subordinate" | "relative" | "adverbial";
517
+ /** Children nodes - phrases, words, or punctuation */
518
+ children: (GLOSTPhrase | GLOSTWord | GLOSTPunctuation)[];
519
+ /** Language code for this clause */
520
+ lang?: LanguageCode;
521
+ /** Script system used */
522
+ script?: ScriptSystem;
523
+ /** Extras field for extensions */
524
+ extras?: GLOSTExtras & {
525
+ /** Whether this clause has been negated */
526
+ isNegated?: boolean;
527
+ /** Grammatical mood */
528
+ mood?: "declarative" | "interrogative" | "imperative" | "conditional";
529
+ /** Tense information */
530
+ tense?: string;
531
+ /** Original form before transformation */
532
+ originalForm?: string;
533
+ };
534
+ };
535
+
536
+ /**
537
+ * Phrase node - groups words into grammatical phrases
538
+ * Created by PhraseSegmenterExtension transformer
539
+ */
540
+ export type GLOSTPhrase = {
541
+ type: "PhraseNode";
542
+ /** Type of phrase */
543
+ phraseType: "noun" | "verb" | "prepositional" | "adjectival" | "adverbial";
544
+ /** Main word of the phrase (head) */
545
+ headWord?: string;
546
+ /** Children nodes - words or punctuation */
547
+ children: (GLOSTWord | GLOSTPunctuation)[];
548
+ /** Language code for this phrase */
549
+ lang?: LanguageCode;
550
+ /** Script system used */
551
+ script?: ScriptSystem;
552
+ /** Extras field for extensions */
553
+ extras?: GLOSTExtras & {
554
+ /** Grammatical role in the clause/sentence */
555
+ role?: "subject" | "object" | "complement" | "modifier";
556
+ };
557
+ };
558
+
559
+ /**
560
+ * Syllable node - represents phonological syllable structure
561
+ * Created by SyllableSegmenterExtension transformer (language-specific)
562
+ */
563
+ export type GLOSTSyllable = {
564
+ type: "SyllableNode";
565
+ /** Syllable structure information */
566
+ structure: {
567
+ /** Initial consonant(s) - Generic (e.g., "h" in "hello") */
568
+ onset?: string;
569
+ /** Vowel - Generic (e.g., "e" in "hello") */
570
+ nucleus: string;
571
+ /** Final consonant(s) - Generic (e.g., "l" in "hello") */
572
+ coda?: string;
573
+
574
+ // Thai-specific structure (optional)
575
+ /** Initial consonant (Thai: พยัญชนะต้น) */
576
+ Ci?: string;
577
+ /** Vowel (Thai: สระ) */
578
+ V?: string;
579
+ /** Final consonant (Thai: ตัวสะกด) */
580
+ Cf?: string;
581
+ /** Tone mark (Thai: วรรณยุกต์) */
582
+ T?: string;
583
+ };
584
+ /** Children nodes - individual characters */
585
+ children: GLOSTCharacter[];
586
+ /** Language code for this syllable */
587
+ lang?: LanguageCode;
588
+ /** Script system used */
589
+ script?: ScriptSystem;
590
+ /** Tone number (for tonal languages like Thai, Mandarin) */
591
+ tone?: number;
592
+ /** Stress level (for stress languages like English) */
593
+ stress?: "primary" | "secondary" | "unstressed";
594
+ /** Extras field for extensions */
595
+ extras?: GLOSTExtras;
596
+ };
597
+
598
+ /**
599
+ * Character node - represents individual characters with linguistic roles
600
+ * Created by SyllableSegmenterExtension or CharacterSegmenterExtension
601
+ */
602
+ export type GLOSTCharacter = {
603
+ type: "CharacterNode";
604
+ /** The character value (single character) */
605
+ value: string;
606
+ /** Linguistic role of the character */
607
+ role?: "consonant" | "vowel" | "tone" | "diacritic" | "modifier";
608
+ /** Placement in the syllable/word (renamed from 'position' to avoid conflict with unist Position) */
609
+ placement?: "initial" | "medial" | "final" | "above" | "below" | "before" | "after";
610
+ /** Language code for this character */
611
+ lang?: LanguageCode;
612
+ /** Script system used */
613
+ script?: ScriptSystem;
614
+ /** Extras field for extensions */
615
+ extras?: GLOSTExtras & {
616
+ /** Unicode code point (e.g., "U+0E04") */
617
+ unicode?: string;
618
+ /** Thai consonant class (high/mid/low) */
619
+ class?: "high" | "mid" | "low";
620
+ /** Phonological sound class */
621
+ soundClass?: string;
622
+ };
623
+ };
624
+
625
+ export type GLOSTRootContent = GLOSTParagraph | GLOSTSentence | GLOSTWord | GLOSTText | GLOSTSymbol | GLOSTPunctuation | GLOSTWhiteSpace | GLOSTSource;
626
+ export type GLOSTParagraphContent = GLOSTSentence | GLOSTPunctuation | GLOSTSymbol | GLOSTWhiteSpace | GLOSTSource;
627
+ export type GLOSTSentenceContent = GLOSTClause | GLOSTWord | GLOSTPunctuation | GLOSTSymbol | GLOSTWhiteSpace | GLOSTSource;
628
+ export type GLOSTWordContent = GLOSTSyllable | GLOSTText | GLOSTSymbol | GLOSTPunctuation | GLOSTWhiteSpace | GLOSTSource;
629
+ // ============================================================================
630
+ // Utility Types
631
+ // ============================================================================
632
+
633
+ // Type guards are now implemented in utils.ts using unist-util-is