glost-core 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. package/CHANGELOG.md +63 -0
  2. package/LICENSE +21 -0
  3. package/README.md +199 -0
  4. package/dist/__benchmarks__/document-creation.bench.d.ts +7 -0
  5. package/dist/__benchmarks__/document-creation.bench.d.ts.map +1 -0
  6. package/dist/__benchmarks__/document-creation.bench.js +71 -0
  7. package/dist/__benchmarks__/document-creation.bench.js.map +1 -0
  8. package/dist/__benchmarks__/traversal.bench.d.ts +7 -0
  9. package/dist/__benchmarks__/traversal.bench.d.ts.map +1 -0
  10. package/dist/__benchmarks__/traversal.bench.js +124 -0
  11. package/dist/__benchmarks__/traversal.bench.js.map +1 -0
  12. package/dist/cli/migrate.d.ts +8 -0
  13. package/dist/cli/migrate.d.ts.map +1 -0
  14. package/dist/cli/migrate.js +229 -0
  15. package/dist/cli/migrate.js.map +1 -0
  16. package/dist/errors.d.ts +168 -0
  17. package/dist/errors.d.ts.map +1 -0
  18. package/dist/errors.js +300 -0
  19. package/dist/errors.js.map +1 -0
  20. package/dist/guards.d.ts +103 -0
  21. package/dist/guards.d.ts.map +1 -0
  22. package/dist/guards.js +264 -0
  23. package/dist/guards.js.map +1 -0
  24. package/dist/index.d.ts +9 -0
  25. package/dist/index.d.ts.map +1 -0
  26. package/dist/index.js +25 -0
  27. package/dist/index.js.map +1 -0
  28. package/dist/nodes.d.ts +227 -0
  29. package/dist/nodes.d.ts.map +1 -0
  30. package/dist/nodes.js +243 -0
  31. package/dist/nodes.js.map +1 -0
  32. package/dist/types.d.ts +442 -0
  33. package/dist/types.d.ts.map +1 -0
  34. package/dist/types.js +51 -0
  35. package/dist/types.js.map +1 -0
  36. package/dist/utils.d.ts +247 -0
  37. package/dist/utils.d.ts.map +1 -0
  38. package/dist/utils.js +564 -0
  39. package/dist/utils.js.map +1 -0
  40. package/dist/validators.d.ts +1876 -0
  41. package/dist/validators.d.ts.map +1 -0
  42. package/dist/validators.js +302 -0
  43. package/dist/validators.js.map +1 -0
  44. package/package.json +73 -0
  45. package/src/__benchmarks__/document-creation.bench.ts +92 -0
  46. package/src/__benchmarks__/traversal.bench.ts +152 -0
  47. package/src/__tests__/README.md +20 -0
  48. package/src/__tests__/example.test.ts +43 -0
  49. package/src/__tests__/example.ts +186 -0
  50. package/src/__tests__/helpers.test.ts +178 -0
  51. package/src/__tests__/mock-data.ts +624 -0
  52. package/src/__tests__/performance.test.ts +317 -0
  53. package/src/__tests__/traversal.test.ts +170 -0
  54. package/src/cli/migrate.ts +294 -0
  55. package/src/errors.ts +394 -0
  56. package/src/guards.ts +341 -0
  57. package/src/index.ts +69 -0
  58. package/src/nodes.ts +409 -0
  59. package/src/types.ts +633 -0
  60. package/src/utils.ts +730 -0
  61. package/src/validators.ts +336 -0
  62. package/tsconfig.json +9 -0
@@ -0,0 +1,247 @@
1
+ import type { LanguageCode, GLOSTCharacter, GLOSTClause, GLOSTNode, GLOSTParagraph, GLOSTPhrase, GLOSTRoot, GLOSTSentence, GLOSTSyllable, GLOSTWord, TranscriptionSystem } from "./types.js";
2
+ /**
3
+ * Parse a BCP-47 language tag into its components
4
+ * Format: language[-script][-region][-variant]
5
+ */
6
+ export declare function parseLanguageTag(tag: string): {
7
+ language: string;
8
+ script?: string;
9
+ region?: string;
10
+ variant?: string;
11
+ fullTag: string;
12
+ };
13
+ /**
14
+ * Get the base language from a BCP-47 tag
15
+ * Examples: "en-US" -> "en", "zh-CN" -> "zh"
16
+ */
17
+ export declare function getBaseLanguage(tag: string): string;
18
+ /**
19
+ * Check if two language tags are compatible (same base language)
20
+ * Examples: "en-US" and "en-GB" are compatible
21
+ */
22
+ export declare function areLanguagesCompatible(tag1: string, tag2: string): boolean;
23
+ /**
24
+ * Find the best matching language tag from available options
25
+ * Prioritizes exact matches, then region matches, then base language matches
26
+ */
27
+ export declare function findBestLanguageMatch(target: string, available: string[]): string | null;
28
+ /**
29
+ * Get a fallback language tag when the exact one isn't available
30
+ * Examples: "en-US" -> "en", "zh-CN" -> "zh"
31
+ */
32
+ export declare function getLanguageFallback(tag: string): string;
33
+ /**
34
+ * Normalize a language tag to standard format
35
+ * Converts to lowercase and ensures proper formatting
36
+ */
37
+ export declare function normalizeLanguageTag(tag: string): string;
38
+ /**
39
+ * Check if a language tag is valid BCP-47 format
40
+ */
41
+ export declare function isValidLanguageTag(tag: string): boolean;
42
+ /**
43
+ * Get all word nodes from an GLOST tree
44
+ */
45
+ export declare function getAllWords(node: GLOSTNode): GLOSTWord[];
46
+ /**
47
+ * Get the first word from a document
48
+ *
49
+ * Convenience helper for accessing the first word in document order.
50
+ * Returns undefined if no words are found.
51
+ *
52
+ * @param document - GLOST document root
53
+ * @returns First word node or undefined
54
+ *
55
+ * @example
56
+ * ```typescript
57
+ * const doc = createSimpleDocument([word1, word2], "en");
58
+ * const firstWord = getFirstWord(doc);
59
+ * if (firstWord) {
60
+ * console.log(getWordText(firstWord));
61
+ * }
62
+ * ```
63
+ */
64
+ export declare function getFirstWord(document: GLOSTRoot): GLOSTWord | undefined;
65
+ /**
66
+ * Get word at specific path in document
67
+ *
68
+ * Navigate document hierarchy using paragraph, sentence, and word indices.
69
+ * Returns undefined if path is invalid or doesn't exist.
70
+ *
71
+ * @param document - GLOST document root
72
+ * @param path - Path specifying paragraph, sentence, and word indices (0-based)
73
+ * @returns Word node at path or undefined
74
+ *
75
+ * @example
76
+ * ```typescript
77
+ * // Get the first word of the second sentence in the first paragraph
78
+ * const word = getWordAtPath(doc, {
79
+ * paragraph: 0,
80
+ * sentence: 1,
81
+ * word: 0
82
+ * });
83
+ * ```
84
+ */
85
+ export declare function getWordAtPath(document: GLOSTRoot, path: {
86
+ paragraph: number;
87
+ sentence: number;
88
+ word: number;
89
+ }): GLOSTWord | undefined;
90
+ /**
91
+ * Get all sentence nodes from an GLOST tree
92
+ */
93
+ export declare function getAllSentences(node: GLOSTNode): GLOSTSentence[];
94
+ /**
95
+ * Get all paragraph nodes from an GLOST tree
96
+ */
97
+ export declare function getAllParagraphs(node: GLOSTNode): GLOSTParagraph[];
98
+ /**
99
+ * Get all clause nodes from an GLOST tree
100
+ */
101
+ export declare function getAllClauses(node: GLOSTNode): GLOSTClause[];
102
+ /**
103
+ * Get all phrase nodes from an GLOST tree
104
+ */
105
+ export declare function getAllPhrases(node: GLOSTNode): GLOSTPhrase[];
106
+ /**
107
+ * Get all syllable nodes from an GLOST tree
108
+ */
109
+ export declare function getAllSyllables(node: GLOSTNode): GLOSTSyllable[];
110
+ /**
111
+ * Get all character nodes from an GLOST tree
112
+ */
113
+ export declare function getAllCharacters(node: GLOSTNode): GLOSTCharacter[];
114
+ /**
115
+ * Find nodes by type with better typing
116
+ */
117
+ export declare function findNodesByType<T extends GLOSTNode>(node: GLOSTNode, type: string): T[];
118
+ /**
119
+ * Get all words from a document with proper typing
120
+ */
121
+ export declare function getWordsFromDocument(doc: GLOSTRoot): GLOSTWord[];
122
+ /**
123
+ * Get the first sentence from a document
124
+ */
125
+ export declare function getFirstSentence(doc: GLOSTRoot): GLOSTSentence | null;
126
+ /**
127
+ * Get words from a specific sentence
128
+ */
129
+ export declare function getWordsFromSentence(sentence: GLOSTSentence): GLOSTWord[];
130
+ /**
131
+ * Get words from a specific paragraph
132
+ */
133
+ export declare function getWordsFromParagraph(paragraph: GLOSTParagraph): GLOSTWord[];
134
+ /**
135
+ * Find word nodes with specific language
136
+ */
137
+ export declare function findWordsByLanguage(node: GLOSTNode, lang: LanguageCode): GLOSTWord[];
138
+ /**
139
+ * Find word nodes with specific transcription system
140
+ */
141
+ export declare function findWordsByTranscriptionSystem(node: GLOSTNode, system: TranscriptionSystem): GLOSTWord[];
142
+ /**
143
+ * Enhanced type guards for the new GLOST types
144
+ */
145
+ export declare function isGLOSTWord(node: any): node is GLOSTWord;
146
+ export declare function isGLOSTSentence(node: any): node is GLOSTSentence;
147
+ export declare function isGLOSTParagraph(node: any): node is GLOSTParagraph;
148
+ export declare function isGLOSTRoot(node: any): node is GLOSTRoot;
149
+ /**
150
+ * Type guard for GLOSTClause nodes
151
+ */
152
+ export declare function isGLOSTClause(node: any): node is GLOSTClause;
153
+ /**
154
+ * Type guard for GLOSTPhrase nodes
155
+ */
156
+ export declare function isGLOSTPhrase(node: any): node is GLOSTPhrase;
157
+ /**
158
+ * Type guard for GLOSTSyllable nodes
159
+ */
160
+ export declare function isGLOSTSyllable(node: any): node is GLOSTSyllable;
161
+ /**
162
+ * Type guard for GLOSTCharacter nodes
163
+ */
164
+ export declare function isGLOSTCharacter(node: any): node is GLOSTCharacter;
165
+ /**
166
+ * Extract text value from a word node
167
+ */
168
+ export declare function getWordText(word: GLOSTWord): string;
169
+ /**
170
+ * Get transcription for a specific system
171
+ */
172
+ export declare function getWordTranscription(word: GLOSTWord, system: TranscriptionSystem): string | null;
173
+ /**
174
+ * Check if a word has transcription for a specific system
175
+ */
176
+ export declare function hasWordTranscription(word: GLOSTWord, system: TranscriptionSystem): boolean;
177
+ /**
178
+ * Get word translation for a specific language
179
+ * @param word - The word node
180
+ * @param language - Target language code (default: "en-US")
181
+ * @returns Translation string or empty string if not found
182
+ */
183
+ export declare function getWordTranslation(word: GLOSTWord, language?: string): string;
184
+ /**
185
+ * Get word meaning/definition
186
+ * @deprecated Use getWordTranslation for multi-language support.
187
+ * This function is kept for backward compatibility.
188
+ */
189
+ export declare function getWordMeaning(word: GLOSTWord): string;
190
+ /**
191
+ * Get word part of speech
192
+ */
193
+ export declare function getWordPartOfSpeech(word: GLOSTWord): string;
194
+ /**
195
+ * Get word difficulty
196
+ */
197
+ export declare function getWordDifficulty(word: GLOSTWord): string | number;
198
+ /**
199
+ * Get sentence translation
200
+ */
201
+ export declare function getSentenceTranslation(sentence: GLOSTSentence, language?: string): string | null;
202
+ /**
203
+ * Generic paragraph structure for word count calculation
204
+ * This interface allows converting external paragraph structures to GLOST format
205
+ */
206
+ export type ParagraphLike = {
207
+ sentences: Array<{
208
+ sentence: string;
209
+ translation?: string;
210
+ }>;
211
+ };
212
+ /**
213
+ * Convert a paragraph-like structure to GLOST format for word count calculation
214
+ * This is a minimal adapter that only converts what's needed for word counting
215
+ *
216
+ * @param paragraph - Paragraph structure with sentences containing text and optional translations
217
+ * @returns GLOST paragraph node
218
+ *
219
+ * @example
220
+ * ```ts
221
+ * const paragraph = {
222
+ * sentences: [
223
+ * { sentence: "Hello", translation: "สวัสดี" },
224
+ * { sentence: "World", translation: "โลก" }
225
+ * ]
226
+ * };
227
+ * const mtstParagraph = adaptParagraphLikeToGLOST(paragraph);
228
+ * const wordCount = getGLOSTWordCount(mtstParagraph);
229
+ * ```
230
+ */
231
+ export declare function adaptParagraphLikeToGLOST(paragraph: ParagraphLike): GLOSTParagraph;
232
+ /**
233
+ * Calculate word count from GLOST content
234
+ * Counts words from sentence translations or original text
235
+ *
236
+ * @param content - GLOST paragraph, sentence, or root node
237
+ * @param language - Optional language code for translation preference (default: 'en')
238
+ * @returns Word count as a number, or undefined if content is empty
239
+ *
240
+ * @example
241
+ * ```ts
242
+ * const wordCount = getGLOSTWordCount(paragraph, 'en');
243
+ * // Returns: 245
244
+ * ```
245
+ */
246
+ export declare function getGLOSTWordCount(content: GLOSTParagraph | GLOSTSentence | GLOSTRoot, language?: string): number | undefined;
247
+ //# sourceMappingURL=utils.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"utils.d.ts","sourceRoot":"","sources":["../src/utils.ts"],"names":[],"mappings":"AAGA,OAAO,KAAK,EACV,YAAY,EACZ,cAAc,EACd,WAAW,EACX,SAAS,EACT,cAAc,EACd,WAAW,EACX,SAAS,EACT,aAAa,EACb,aAAa,EACb,SAAS,EACT,mBAAmB,EACpB,MAAM,YAAY,CAAC;AAMpB;;;GAGG;AACH,wBAAgB,gBAAgB,CAAC,GAAG,EAAE,MAAM,GAAG;IAC7C,QAAQ,EAAE,MAAM,CAAC;IACjB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,OAAO,EAAE,MAAM,CAAC;CACjB,CAgCA;AAED;;;GAGG;AACH,wBAAgB,eAAe,CAAC,GAAG,EAAE,MAAM,GAAG,MAAM,CAGnD;AAED;;;GAGG;AACH,wBAAgB,sBAAsB,CAAC,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,GAAG,OAAO,CAE1E;AAED;;;GAGG;AACH,wBAAgB,qBAAqB,CACnC,MAAM,EAAE,MAAM,EACd,SAAS,EAAE,MAAM,EAAE,GAClB,MAAM,GAAG,IAAI,CA2Bf;AAED;;;GAGG;AACH,wBAAgB,mBAAmB,CAAC,GAAG,EAAE,MAAM,GAAG,MAAM,CAGvD;AAED;;;GAGG;AACH,wBAAgB,oBAAoB,CAAC,GAAG,EAAE,MAAM,GAAG,MAAM,CAwCxD;AAED;;GAEG;AACH,wBAAgB,kBAAkB,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAKvD;AAMD;;GAEG;AACH,wBAAgB,WAAW,CAAC,IAAI,EAAE,SAAS,GAAG,SAAS,EAAE,CAUxD;AAED;;;;;;;;;;;;;;;;;GAiBG;AACH,wBAAgB,YAAY,CAAC,QAAQ,EAAE,SAAS,GAAG,SAAS,GAAG,SAAS,CAWvE;AAED;;;;;;;;;;;;;;;;;;;GAmBG;AACH,wBAAgB,aAAa,CAC3B,QAAQ,EAAE,SAAS,EACnB,IAAI,EAAE;IAAE,SAAS,EAAE,MAAM,CAAC;IAAC,QAAQ,EAAE,MAAM,CAAC;IAAC,IAAI,EAAE,MAAM,CAAA;CAAE,GAC1D,SAAS,GAAG,SAAS,CAsBvB;AAED;;GAEG;AACH,wBAAgB,eAAe,CAAC,IAAI,EAAE,SAAS,GAAG,aAAa,EAAE,CAUhE;AAED;;GAEG;AACH,wBAAgB,gBAAgB,CAAC,IAAI,EAAE,SAAS,GAAG,cAAc,EAAE,CAUlE;AAED;;GAEG;AACH,wBAAgB,aAAa,CAAC,IAAI,EAAE,SAAS,GAAG,WAAW,EAAE,CAU5D;AAED;;GAEG;AACH,wBAAgB,aAAa,CAAC,IAAI,EAAE,SAAS,GAAG,WAAW,EAAE,CAU5D;AAED;;GAEG;AACH,wBAAgB,eAAe,CAAC,IAAI,EAAE,SAAS,GAAG,aAAa,EAAE,CAUhE;AAED;;GAEG;AACH,wBAAgB,gBAAgB,CAAC,IAAI,EAAE,SAAS,GAAG,cAAc,EAAE,CAUlE;AAED;;GAEG;AACH,wBAAgB,eAAe,CAAC,CAAC,SAAS,SAAS,EACjD,IAAI,EAAE,SAAS,EACf,IAAI,EAAE,MAAM,GACX,CAAC,EAAE,CAQL;AAMD;;GAEG;AACH,wBAAgB,oBAAoB,CAAC,GAAG,EAAE,SAAS,GAAG,SAAS,EAAE,CAEhE;AAED;;GAEG;AACH,wBAAgB,gBAAgB,CAAC,GAAG,EAAE,SAAS,GAAG,aAAa,GAAG,IAAI,CAYrE;AAED;;GAEG;AACH,wBAAgB,oBAAoB,CAAC,QAAQ,EAAE,aAAa,GAAG,SAAS,EAAE,CAEzE;AAED;;GAEG;AACH,wBAAgB,qBAAqB,CAAC,SAAS,EAAE,cAAc,GAAG,SAAS,EAAE,CAU5E;AAED;;GAEG;AACH,wBAAgB,mBAAmB,CACjC,IAAI,EAAE,SAAS,EACf,IAAI,EAAE,YAAY,GACjB,SAAS,EAAE,CAGb;AAED;;GAEG;AACH,wBAAgB,8BAA8B,CAC5C,IAAI,EAAE,SAAS,EACf,MAAM,EAAE,mBAAmB,GAC1B,SAAS,EAAE,CAKb;AAMD;;GAEG;AACH,wBAAgB,WAAW,CAAC,IAAI,EAAE,GAAG,GAAG,IAAI,IAAI,SAAS,CAIxD;AAED,wBAAgB,eAAe,CAAC,IAAI,EAAE,GAAG,GAAG,IAAI,IAAI,aAAa,CAOhE;AAED,wBAAgB,gBAAgB,CAAC,IAAI,EAAE,GAAG,GAAG,IAAI,IAAI,cAAc,CAElE;AAED,wBAAgB,WAAW,CAAC,IAAI,EAAE,GAAG,GAAG,IAAI,IAAI,SAAS,CAExD;AAED;;GAEG;AACH,wBAAgB,aAAa,CAAC,IAAI,EAAE,GAAG,GAAG,IAAI,IAAI,WAAW,CAE5D;AAED;;GAEG;AACH,wBAAgB,aAAa,CAAC,IAAI,EAAE,GAAG,GAAG,IAAI,IAAI,WAAW,CAE5D;AAED;;GAEG;AACH,wBAAgB,eAAe,CAAC,IAAI,EAAE,GAAG,GAAG,IAAI,IAAI,aAAa,CAEhE;AAED;;GAEG;AACH,wBAAgB,gBAAgB,CAAC,IAAI,EAAE,GAAG,GAAG,IAAI,IAAI,cAAc,CAElE;AAMD;;GAEG;AACH,wBAAgB,WAAW,CAAC,IAAI,EAAE,SAAS,GAAG,MAAM,CAGnD;AAED;;GAEG;AACH,wBAAgB,oBAAoB,CAClC,IAAI,EAAE,SAAS,EACf,MAAM,EAAE,mBAAmB,GAC1B,MAAM,GAAG,IAAI,CAEf;AAED;;GAEG;AACH,wBAAgB,oBAAoB,CAClC,IAAI,EAAE,SAAS,EACf,MAAM,EAAE,mBAAmB,GAC1B,OAAO,CAET;AAED;;;;;GAKG;AACH,wBAAgB,kBAAkB,CAChC,IAAI,EAAE,SAAS,EACf,QAAQ,SAAU,GACjB,MAAM,CAWR;AAED;;;;GAIG;AACH,wBAAgB,cAAc,CAAC,IAAI,EAAE,SAAS,GAAG,MAAM,CAQtD;AAED;;GAEG;AACH,wBAAgB,mBAAmB,CAAC,IAAI,EAAE,SAAS,GAAG,MAAM,CAE3D;AAED;;GAEG;AACH,wBAAgB,iBAAiB,CAAC,IAAI,EAAE,SAAS,GAAG,MAAM,GAAG,MAAM,CAElE;AAED;;GAEG;AACH,wBAAgB,sBAAsB,CACpC,QAAQ,EAAE,aAAa,EACvB,QAAQ,SAAO,GACd,MAAM,GAAG,IAAI,CAaf;AAMD;;;GAGG;AACH,MAAM,MAAM,aAAa,GAAG;IAC1B,SAAS,EAAE,KAAK,CAAC;QACf,QAAQ,EAAE,MAAM,CAAC;QACjB,WAAW,CAAC,EAAE,MAAM,CAAC;KACtB,CAAC,CAAC;CACJ,CAAC;AAEF;;;;;;;;;;;;;;;;;;GAkBG;AACH,wBAAgB,yBAAyB,CACvC,SAAS,EAAE,aAAa,GACvB,cAAc,CAgBhB;AAED;;;;;;;;;;;;;GAaG;AACH,wBAAgB,iBAAiB,CAC/B,OAAO,EAAE,cAAc,GAAG,aAAa,GAAG,SAAS,EACnD,QAAQ,SAAO,GACd,MAAM,GAAG,SAAS,CAoCpB"}