glost-core 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. package/CHANGELOG.md +63 -0
  2. package/LICENSE +21 -0
  3. package/README.md +199 -0
  4. package/dist/__benchmarks__/document-creation.bench.d.ts +7 -0
  5. package/dist/__benchmarks__/document-creation.bench.d.ts.map +1 -0
  6. package/dist/__benchmarks__/document-creation.bench.js +71 -0
  7. package/dist/__benchmarks__/document-creation.bench.js.map +1 -0
  8. package/dist/__benchmarks__/traversal.bench.d.ts +7 -0
  9. package/dist/__benchmarks__/traversal.bench.d.ts.map +1 -0
  10. package/dist/__benchmarks__/traversal.bench.js +124 -0
  11. package/dist/__benchmarks__/traversal.bench.js.map +1 -0
  12. package/dist/cli/migrate.d.ts +8 -0
  13. package/dist/cli/migrate.d.ts.map +1 -0
  14. package/dist/cli/migrate.js +229 -0
  15. package/dist/cli/migrate.js.map +1 -0
  16. package/dist/errors.d.ts +168 -0
  17. package/dist/errors.d.ts.map +1 -0
  18. package/dist/errors.js +300 -0
  19. package/dist/errors.js.map +1 -0
  20. package/dist/guards.d.ts +103 -0
  21. package/dist/guards.d.ts.map +1 -0
  22. package/dist/guards.js +264 -0
  23. package/dist/guards.js.map +1 -0
  24. package/dist/index.d.ts +9 -0
  25. package/dist/index.d.ts.map +1 -0
  26. package/dist/index.js +25 -0
  27. package/dist/index.js.map +1 -0
  28. package/dist/nodes.d.ts +227 -0
  29. package/dist/nodes.d.ts.map +1 -0
  30. package/dist/nodes.js +243 -0
  31. package/dist/nodes.js.map +1 -0
  32. package/dist/types.d.ts +442 -0
  33. package/dist/types.d.ts.map +1 -0
  34. package/dist/types.js +51 -0
  35. package/dist/types.js.map +1 -0
  36. package/dist/utils.d.ts +247 -0
  37. package/dist/utils.d.ts.map +1 -0
  38. package/dist/utils.js +564 -0
  39. package/dist/utils.js.map +1 -0
  40. package/dist/validators.d.ts +1876 -0
  41. package/dist/validators.d.ts.map +1 -0
  42. package/dist/validators.js +302 -0
  43. package/dist/validators.js.map +1 -0
  44. package/package.json +73 -0
  45. package/src/__benchmarks__/document-creation.bench.ts +92 -0
  46. package/src/__benchmarks__/traversal.bench.ts +152 -0
  47. package/src/__tests__/README.md +20 -0
  48. package/src/__tests__/example.test.ts +43 -0
  49. package/src/__tests__/example.ts +186 -0
  50. package/src/__tests__/helpers.test.ts +178 -0
  51. package/src/__tests__/mock-data.ts +624 -0
  52. package/src/__tests__/performance.test.ts +317 -0
  53. package/src/__tests__/traversal.test.ts +170 -0
  54. package/src/cli/migrate.ts +294 -0
  55. package/src/errors.ts +394 -0
  56. package/src/guards.ts +341 -0
  57. package/src/index.ts +69 -0
  58. package/src/nodes.ts +409 -0
  59. package/src/types.ts +633 -0
  60. package/src/utils.ts +730 -0
  61. package/src/validators.ts +336 -0
  62. package/tsconfig.json +9 -0
@@ -0,0 +1 @@
1
+ {"version":3,"file":"validators.d.ts","sourceRoot":"","sources":["../src/validators.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AAMxB;;GAEG;AACH,eAAO,MAAM,qBAAqB,iFAOhC,CAAC;AAEH;;GAEG;AACH,eAAO,MAAM,0BAA0B,0EAMrC,CAAC;AAEH;;GAEG;AACH,eAAO,MAAM,yBAAyB,sNAUpC,CAAC;AAEH;;GAEG;AACH,eAAO,MAAM,kBAAkB,+HAO7B,CAAC;AAEH;;GAEG;AACH,eAAO,MAAM,kBAAkB,yNAU7B,CAAC;AAEH;;GAEG;AACH,eAAO,MAAM,0BAA0B;;;;;;;;;;;;EAIrC,CAAC;AAEH;;GAEG;AACH,eAAO,MAAM,uBAAuB;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;EAOlC,CAAC;AAEH;;GAEG;AACH,eAAO,MAAM,yBAAyB;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAGrC,CAAC;AAEF;;GAEG;AACH,eAAO,MAAM,wBAAwB;IACnC,kDAAkD;;;;;;;;;;;;;;;;;;;;;;;;;;;EASlD,CAAC;AAEH;;GAEG;AACH,eAAO,MAAM,mBAAmB;;;;;;;;;;;;;;;;;;EAM9B,CAAC;AAEH;;GAEG;AACH,eAAO,MAAM,mBAAmB;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;QAzB9B,kDAAkD;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;EAgClD,CAAC;AAEH;;GAEG;AACH,eAAO,MAAM,uBAAuB;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;EAOlC,CAAC;AAEH;;GAEG;AACH,eAAO,MAAM,wBAAwB;;;;;;;;;;;;;;;;;;;;;;EAKnC,CAAC;AAEH;;GAEG;AACH,eAAO,MAAM,mBAAmB;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;EAW9B,CAAC;AAEH;;GAEG;AACH,eAAO,MAAM,eAAe;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;QA3E1B,kDAAkD;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;IAgFlD,CAAC;AAMH;;GAEG;AACH,wBAAgB,qBAAqB,CAAC,IAAI,EAAE,OAAO,GAAG,IAAI,IAAI,CAAC,CAAC,KAAK,CAAC,OAAO,mBAAmB,CAAC,CAGhG;AAED;;GAEG;AACH,wBAAgB,yBAAyB,CAAC,IAAI,EAAE,OAAO,GAAG,IAAI,IAAI,CAAC,CAAC,KAAK,CAAC,OAAO,uBAAuB,CAAC,CAGxG;AAED;;GAEG;AACH,wBAAgB,0BAA0B,CAAC,IAAI,EAAE,OAAO,GAAG,IAAI,IAAI,CAAC,CAAC,KAAK,CAAC,OAAO,wBAAwB,CAAC,CAG1G;AAED;;GAEG;AACH,wBAAgB,qBAAqB,CAAC,IAAI,EAAE,OAAO,GAAG,IAAI,IAAI,CAAC,CAAC,KAAK,CAAC,OAAO,mBAAmB,CAAC,CAGhG;AAED;;GAEG;AACH,wBAAgB,iBAAiB,CAAC,IAAI,EAAE,OAAO,GAAG,IAAI,IAAI,CAAC,CAAC,KAAK,CAAC,OAAO,eAAe,CAAC,CAGxF;AAED;;GAEG;AACH,wBAAgB,cAAc,CAAC,IAAI,EAAE,OAAO;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAE3C;AAED;;GAEG;AACH,wBAAgB,kBAAkB,CAAC,IAAI,EAAE,OAAO;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAE/C;AAED;;GAEG;AACH,wBAAgB,sBAAsB,CAAC,IAAI,EAAE,OAAO;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAEnD;AAED;;GAEG;AACH,wBAAgB,uBAAuB,CAAC,IAAI,EAAE,OAAO;;;;;;;;;;;;;;GAEpD;AAED;;GAEG;AACH,wBAAgB,kBAAkB,CAAC,IAAI,EAAE,OAAO;;;;;;;;;;;;;;;;;;;;;;;;;;GAE/C;AAED;;GAEG;AACH,wBAAgB,iBAAiB,CAAC,IAAI,EAAE,OAAO,GAAG,MAAM,EAAE,CA8CzD;AAMD,eAAO,MAAM,OAAO;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;QAxNlB,kDAAkD;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;YAAlD,kDAAkD;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;YAAlD,kDAAkD;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;CAuOnD,CAAC"}
@@ -0,0 +1,302 @@
1
+ import { z } from 'zod';
2
+ // ============================================================================
3
+ // Zod Schemas for GLOST Validation
4
+ // ============================================================================
5
+ /**
6
+ * Linguistic level schema
7
+ */
8
+ export const LinguisticLevelSchema = z.enum([
9
+ 'character',
10
+ 'syllable',
11
+ 'word',
12
+ 'phrase',
13
+ 'sentence',
14
+ 'paragraph'
15
+ ]);
16
+ /**
17
+ * Pronunciation context schema
18
+ */
19
+ export const PronunciationContextSchema = z.enum([
20
+ 'formal',
21
+ 'informal',
22
+ 'historical',
23
+ 'regional',
24
+ 'dialectal'
25
+ ]);
26
+ /**
27
+ * Transcription system schema
28
+ */
29
+ export const TranscriptionSystemSchema = z.union([
30
+ z.literal('rtgs'),
31
+ z.literal('aua'),
32
+ z.literal('paiboon'),
33
+ z.literal('romaji'),
34
+ z.literal('furigana'),
35
+ z.literal('ipa'),
36
+ z.literal('pinyin'),
37
+ z.literal('hangul'),
38
+ z.string()
39
+ ]);
40
+ /**
41
+ * Language code schema
42
+ */
43
+ export const LanguageCodeSchema = z.union([
44
+ z.literal('th'),
45
+ z.literal('ja'),
46
+ z.literal('zh'),
47
+ z.literal('ko'),
48
+ z.literal('en'),
49
+ z.string()
50
+ ]);
51
+ /**
52
+ * Script system schema
53
+ */
54
+ export const ScriptSystemSchema = z.union([
55
+ z.literal('thai'),
56
+ z.literal('hiragana'),
57
+ z.literal('katakana'),
58
+ z.literal('kanji'),
59
+ z.literal('hanzi'),
60
+ z.literal('hangul'),
61
+ z.literal('latin'),
62
+ z.literal('mixed'),
63
+ z.string()
64
+ ]);
65
+ /**
66
+ * Pronunciation variant schema
67
+ */
68
+ export const PronunciationVariantSchema = z.object({
69
+ text: z.string(),
70
+ context: PronunciationContextSchema,
71
+ notes: z.string().optional()
72
+ });
73
+ /**
74
+ * Transcription info schema
75
+ */
76
+ export const TranscriptionInfoSchema = z.object({
77
+ text: z.string(),
78
+ system: TranscriptionSystemSchema,
79
+ variants: z.array(PronunciationVariantSchema).optional(),
80
+ tone: z.number().optional(),
81
+ syllables: z.array(z.string()).optional(),
82
+ phonetic: z.string().optional()
83
+ });
84
+ /**
85
+ * Transliteration data schema
86
+ */
87
+ export const TransliterationDataSchema = z.record(z.string(), TranscriptionInfoSchema);
88
+ /**
89
+ * Linguistic metadata schema
90
+ */
91
+ export const LinguisticMetadataSchema = z.object({
92
+ /** @deprecated Use extras.translations instead */
93
+ meaning: z.string().optional(),
94
+ partOfSpeech: z.string(),
95
+ usage: z.string().optional(),
96
+ etymology: z.string().optional(),
97
+ examples: z.array(z.string()).optional(),
98
+ frequency: z.enum(['high', 'medium', 'low']).optional(),
99
+ formality: z.enum(['formal', 'neutral', 'informal']).optional(),
100
+ register: z.string().optional()
101
+ });
102
+ /**
103
+ * Base GLOST node schema
104
+ */
105
+ export const GLOSTNodeBaseSchema = z.object({
106
+ type: z.string(),
107
+ lang: LanguageCodeSchema.optional(),
108
+ script: ScriptSystemSchema.optional(),
109
+ level: LinguisticLevelSchema.optional(),
110
+ position: z.any().optional()
111
+ });
112
+ /**
113
+ * GLOST word node schema
114
+ */
115
+ export const GLOSTWordNodeSchema = GLOSTNodeBaseSchema.extend({
116
+ type: z.literal('GLOSTWordNode'),
117
+ value: z.string(),
118
+ transcription: TransliterationDataSchema,
119
+ metadata: LinguisticMetadataSchema,
120
+ level: LinguisticLevelSchema,
121
+ children: z.array(z.any()).default([])
122
+ });
123
+ /**
124
+ * GLOST sentence node schema
125
+ */
126
+ export const GLOSTSentenceNodeSchema = GLOSTNodeBaseSchema.extend({
127
+ type: z.literal('GLOSTSentenceNode'),
128
+ originalText: z.string(),
129
+ lang: LanguageCodeSchema,
130
+ script: ScriptSystemSchema,
131
+ transcription: TransliterationDataSchema.optional(),
132
+ children: z.array(z.any()).default([])
133
+ });
134
+ /**
135
+ * GLOST paragraph node schema
136
+ */
137
+ export const GLOSTParagraphNodeSchema = GLOSTNodeBaseSchema.extend({
138
+ type: z.literal('GLOSTParagraphNode'),
139
+ lang: LanguageCodeSchema,
140
+ script: ScriptSystemSchema,
141
+ children: z.array(z.any()).default([])
142
+ });
143
+ /**
144
+ * GLOST root node schema
145
+ */
146
+ export const GLOSTRootNodeSchema = GLOSTNodeBaseSchema.extend({
147
+ type: z.literal('GLOSTRootNode'),
148
+ lang: LanguageCodeSchema,
149
+ script: ScriptSystemSchema,
150
+ metadata: z.object({
151
+ title: z.string().optional(),
152
+ author: z.string().optional(),
153
+ date: z.string().optional(),
154
+ description: z.string().optional()
155
+ }).optional(),
156
+ children: z.array(z.any()).default([])
157
+ });
158
+ /**
159
+ * Union schema for all GLOST node types
160
+ */
161
+ export const GLOSTNodeSchema = z.union([
162
+ GLOSTWordNodeSchema,
163
+ GLOSTSentenceNodeSchema,
164
+ GLOSTParagraphNodeSchema,
165
+ GLOSTRootNodeSchema
166
+ ]);
167
+ // ============================================================================
168
+ // Validation Functions
169
+ // ============================================================================
170
+ /**
171
+ * Validate an GLOST word node
172
+ */
173
+ export function validateGLOSTWordNode(data) {
174
+ const result = GLOSTWordNodeSchema.safeParse(data);
175
+ return result.success;
176
+ }
177
+ /**
178
+ * Validate an GLOST sentence node
179
+ */
180
+ export function validateGLOSTSentenceNode(data) {
181
+ const result = GLOSTSentenceNodeSchema.safeParse(data);
182
+ return result.success;
183
+ }
184
+ /**
185
+ * Validate an GLOST paragraph node
186
+ */
187
+ export function validateGLOSTParagraphNode(data) {
188
+ const result = GLOSTParagraphNodeSchema.safeParse(data);
189
+ return result.success;
190
+ }
191
+ /**
192
+ * Validate an GLOST root node
193
+ */
194
+ export function validateGLOSTRootNode(data) {
195
+ const result = GLOSTRootNodeSchema.safeParse(data);
196
+ return result.success;
197
+ }
198
+ /**
199
+ * Validate any GLOST node
200
+ */
201
+ export function validateGLOSTNode(data) {
202
+ const result = GLOSTNodeSchema.safeParse(data);
203
+ return result.success;
204
+ }
205
+ /**
206
+ * Parse and validate GLOST data with error details
207
+ */
208
+ export function parseGLOSTNode(data) {
209
+ return GLOSTNodeSchema.safeParse(data);
210
+ }
211
+ /**
212
+ * Parse and validate GLOST word node with error details
213
+ */
214
+ export function parseGLOSTWordNode(data) {
215
+ return GLOSTWordNodeSchema.safeParse(data);
216
+ }
217
+ /**
218
+ * Parse and validate GLOST sentence node with error details
219
+ */
220
+ export function parseGLOSTSentenceNode(data) {
221
+ return GLOSTSentenceNodeSchema.safeParse(data);
222
+ }
223
+ /**
224
+ * Parse and validate GLOST paragraph node with error details
225
+ */
226
+ export function parseGLOSTParagraphNode(data) {
227
+ return GLOSTParagraphNodeSchema.safeParse(data);
228
+ }
229
+ /**
230
+ * Parse and validate GLOST root node with error details
231
+ */
232
+ export function parseGLOSTRootNode(data) {
233
+ return GLOSTRootNodeSchema.safeParse(data);
234
+ }
235
+ /**
236
+ * Validate an entire GLOST tree/document
237
+ */
238
+ export function validateGLOSTTree(data) {
239
+ const errors = [];
240
+ // Validate root node
241
+ const rootResult = GLOSTRootNodeSchema.safeParse(data);
242
+ if (!rootResult.success) {
243
+ errors.push(`Root validation failed: ${rootResult.error.message}`);
244
+ return errors;
245
+ }
246
+ const root = rootResult.data;
247
+ // Validate all paragraphs
248
+ for (let i = 0; i < root.children.length; i++) {
249
+ const child = root.children[i];
250
+ if (child.type === 'ParagraphNode') {
251
+ const paragraphResult = GLOSTParagraphNodeSchema.safeParse(child);
252
+ if (!paragraphResult.success) {
253
+ errors.push(`Paragraph ${i} validation failed: ${paragraphResult.error.message}`);
254
+ }
255
+ else {
256
+ // Validate all sentences in paragraph
257
+ for (let j = 0; j < child.children.length; j++) {
258
+ const sentence = child.children[j];
259
+ if (sentence.type === 'SentenceNode') {
260
+ const sentenceResult = GLOSTSentenceNodeSchema.safeParse(sentence);
261
+ if (!sentenceResult.success) {
262
+ errors.push(`Sentence ${i}.${j} validation failed: ${sentenceResult.error.message}`);
263
+ }
264
+ else {
265
+ // Validate all words in sentence
266
+ for (let k = 0; k < sentence.children.length; k++) {
267
+ const word = sentence.children[k];
268
+ if (word.type === 'WordNode') {
269
+ const wordResult = GLOSTWordNodeSchema.safeParse(word);
270
+ if (!wordResult.success) {
271
+ errors.push(`Word ${i}.${j}.${k} validation failed: ${wordResult.error.message}`);
272
+ }
273
+ }
274
+ }
275
+ }
276
+ }
277
+ }
278
+ }
279
+ }
280
+ }
281
+ return errors;
282
+ }
283
+ // ============================================================================
284
+ // Schema Export
285
+ // ============================================================================
286
+ export const schemas = {
287
+ LinguisticLevel: LinguisticLevelSchema,
288
+ PronunciationContext: PronunciationContextSchema,
289
+ TranscriptionSystem: TranscriptionSystemSchema,
290
+ LanguageCode: LanguageCodeSchema,
291
+ ScriptSystem: ScriptSystemSchema,
292
+ PronunciationVariant: PronunciationVariantSchema,
293
+ TranscriptionInfo: TranscriptionInfoSchema,
294
+ TransliterationData: TransliterationDataSchema,
295
+ LinguisticMetadata: LinguisticMetadataSchema,
296
+ GLOSTNode: GLOSTNodeSchema,
297
+ GLOSTWordNode: GLOSTWordNodeSchema,
298
+ GLOSTSentenceNode: GLOSTSentenceNodeSchema,
299
+ GLOSTParagraphNode: GLOSTParagraphNodeSchema,
300
+ GLOSTRootNode: GLOSTRootNodeSchema
301
+ };
302
+ //# sourceMappingURL=validators.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"validators.js","sourceRoot":"","sources":["../src/validators.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AAExB,+EAA+E;AAC/E,mCAAmC;AACnC,+EAA+E;AAE/E;;GAEG;AACH,MAAM,CAAC,MAAM,qBAAqB,GAAG,CAAC,CAAC,IAAI,CAAC;IAC1C,WAAW;IACX,UAAU;IACV,MAAM;IACN,QAAQ;IACR,UAAU;IACV,WAAW;CACZ,CAAC,CAAC;AAEH;;GAEG;AACH,MAAM,CAAC,MAAM,0BAA0B,GAAG,CAAC,CAAC,IAAI,CAAC;IAC/C,QAAQ;IACR,UAAU;IACV,YAAY;IACZ,UAAU;IACV,WAAW;CACZ,CAAC,CAAC;AAEH;;GAEG;AACH,MAAM,CAAC,MAAM,yBAAyB,GAAG,CAAC,CAAC,KAAK,CAAC;IAC/C,CAAC,CAAC,OAAO,CAAC,MAAM,CAAC;IACjB,CAAC,CAAC,OAAO,CAAC,KAAK,CAAC;IAChB,CAAC,CAAC,OAAO,CAAC,SAAS,CAAC;IACpB,CAAC,CAAC,OAAO,CAAC,QAAQ,CAAC;IACnB,CAAC,CAAC,OAAO,CAAC,UAAU,CAAC;IACrB,CAAC,CAAC,OAAO,CAAC,KAAK,CAAC;IAChB,CAAC,CAAC,OAAO,CAAC,QAAQ,CAAC;IACnB,CAAC,CAAC,OAAO,CAAC,QAAQ,CAAC;IACnB,CAAC,CAAC,MAAM,EAAE;CACX,CAAC,CAAC;AAEH;;GAEG;AACH,MAAM,CAAC,MAAM,kBAAkB,GAAG,CAAC,CAAC,KAAK,CAAC;IACxC,CAAC,CAAC,OAAO,CAAC,IAAI,CAAC;IACf,CAAC,CAAC,OAAO,CAAC,IAAI,CAAC;IACf,CAAC,CAAC,OAAO,CAAC,IAAI,CAAC;IACf,CAAC,CAAC,OAAO,CAAC,IAAI,CAAC;IACf,CAAC,CAAC,OAAO,CAAC,IAAI,CAAC;IACf,CAAC,CAAC,MAAM,EAAE;CACX,CAAC,CAAC;AAEH;;GAEG;AACH,MAAM,CAAC,MAAM,kBAAkB,GAAG,CAAC,CAAC,KAAK,CAAC;IACxC,CAAC,CAAC,OAAO,CAAC,MAAM,CAAC;IACjB,CAAC,CAAC,OAAO,CAAC,UAAU,CAAC;IACrB,CAAC,CAAC,OAAO,CAAC,UAAU,CAAC;IACrB,CAAC,CAAC,OAAO,CAAC,OAAO,CAAC;IAClB,CAAC,CAAC,OAAO,CAAC,OAAO,CAAC;IAClB,CAAC,CAAC,OAAO,CAAC,QAAQ,CAAC;IACnB,CAAC,CAAC,OAAO,CAAC,OAAO,CAAC;IAClB,CAAC,CAAC,OAAO,CAAC,OAAO,CAAC;IAClB,CAAC,CAAC,MAAM,EAAE;CACX,CAAC,CAAC;AAEH;;GAEG;AACH,MAAM,CAAC,MAAM,0BAA0B,GAAG,CAAC,CAAC,MAAM,CAAC;IACjD,IAAI,EAAE,CAAC,CAAC,MAAM,EAAE;IAChB,OAAO,EAAE,0BAA0B;IACnC,KAAK,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE;CAC7B,CAAC,CAAC;AAEH;;GAEG;AACH,MAAM,CAAC,MAAM,uBAAuB,GAAG,CAAC,CAAC,MAAM,CAAC;IAC9C,IAAI,EAAE,CAAC,CAAC,MAAM,EAAE;IAChB,MAAM,EAAE,yBAAyB;IACjC,QAAQ,EAAE,CAAC,CAAC,KAAK,CAAC,0BAA0B,CAAC,CAAC,QAAQ,EAAE;IACxD,IAAI,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE;IAC3B,SAAS,EAAE,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,MAAM,EAAE,CAAC,CAAC,QAAQ,EAAE;IACzC,QAAQ,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE;CAChC,CAAC,CAAC;AAEH;;GAEG;AACH,MAAM,CAAC,MAAM,yBAAyB,GAAG,CAAC,CAAC,MAAM,CAC/C,CAAC,CAAC,MAAM,EAAE,EACV,uBAAuB,CACxB,CAAC;AAEF;;GAEG;AACH,MAAM,CAAC,MAAM,wBAAwB,GAAG,CAAC,CAAC,MAAM,CAAC;IAC/C,kDAAkD;IAClD,OAAO,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE;IAC9B,YAAY,EAAE,CAAC,CAAC,MAAM,EAAE;IACxB,KAAK,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE;IAC5B,SAAS,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE;IAChC,QAAQ,EAAE,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,MAAM,EAAE,CAAC,CAAC,QAAQ,EAAE;IACxC,SAAS,EAAE,CAAC,CAAC,IAAI,CAAC,CAAC,MAAM,EAAE,QAAQ,EAAE,KAAK,CAAC,CAAC,CAAC,QAAQ,EAAE;IACvD,SAAS,EAAE,CAAC,CAAC,IAAI,CAAC,CAAC,QAAQ,EAAE,SAAS,EAAE,UAAU,CAAC,CAAC,CAAC,QAAQ,EAAE;IAC/D,QAAQ,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE;CAChC,CAAC,CAAC;AAEH;;GAEG;AACH,MAAM,CAAC,MAAM,mBAAmB,GAAG,CAAC,CAAC,MAAM,CAAC;IAC1C,IAAI,EAAE,CAAC,CAAC,MAAM,EAAE;IAChB,IAAI,EAAE,kBAAkB,CAAC,QAAQ,EAAE;IACnC,MAAM,EAAE,kBAAkB,CAAC,QAAQ,EAAE;IACrC,KAAK,EAAE,qBAAqB,CAAC,QAAQ,EAAE;IACvC,QAAQ,EAAE,CAAC,CAAC,GAAG,EAAE,CAAC,QAAQ,EAAE;CAC7B,CAAC,CAAC;AAEH;;GAEG;AACH,MAAM,CAAC,MAAM,mBAAmB,GAAG,mBAAmB,CAAC,MAAM,CAAC;IAC5D,IAAI,EAAE,CAAC,CAAC,OAAO,CAAC,eAAe,CAAC;IAChC,KAAK,EAAE,CAAC,CAAC,MAAM,EAAE;IACjB,aAAa,EAAE,yBAAyB;IACxC,QAAQ,EAAE,wBAAwB;IAClC,KAAK,EAAE,qBAAqB;IAC5B,QAAQ,EAAE,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,GAAG,EAAE,CAAC,CAAC,OAAO,CAAC,EAAE,CAAC;CACvC,CAAC,CAAC;AAEH;;GAEG;AACH,MAAM,CAAC,MAAM,uBAAuB,GAAG,mBAAmB,CAAC,MAAM,CAAC;IAChE,IAAI,EAAE,CAAC,CAAC,OAAO,CAAC,mBAAmB,CAAC;IACpC,YAAY,EAAE,CAAC,CAAC,MAAM,EAAE;IACxB,IAAI,EAAE,kBAAkB;IACxB,MAAM,EAAE,kBAAkB;IAC1B,aAAa,EAAE,yBAAyB,CAAC,QAAQ,EAAE;IACnD,QAAQ,EAAE,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,GAAG,EAAE,CAAC,CAAC,OAAO,CAAC,EAAE,CAAC;CACvC,CAAC,CAAC;AAEH;;GAEG;AACH,MAAM,CAAC,MAAM,wBAAwB,GAAG,mBAAmB,CAAC,MAAM,CAAC;IACjE,IAAI,EAAE,CAAC,CAAC,OAAO,CAAC,oBAAoB,CAAC;IACrC,IAAI,EAAE,kBAAkB;IACxB,MAAM,EAAE,kBAAkB;IAC1B,QAAQ,EAAE,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,GAAG,EAAE,CAAC,CAAC,OAAO,CAAC,EAAE,CAAC;CACvC,CAAC,CAAC;AAEH;;GAEG;AACH,MAAM,CAAC,MAAM,mBAAmB,GAAG,mBAAmB,CAAC,MAAM,CAAC;IAC5D,IAAI,EAAE,CAAC,CAAC,OAAO,CAAC,eAAe,CAAC;IAChC,IAAI,EAAE,kBAAkB;IACxB,MAAM,EAAE,kBAAkB;IAC1B,QAAQ,EAAE,CAAC,CAAC,MAAM,CAAC;QACjB,KAAK,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE;QAC5B,MAAM,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE;QAC7B,IAAI,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE;QAC3B,WAAW,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE;KACnC,CAAC,CAAC,QAAQ,EAAE;IACb,QAAQ,EAAE,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,GAAG,EAAE,CAAC,CAAC,OAAO,CAAC,EAAE,CAAC;CACvC,CAAC,CAAC;AAEH;;GAEG;AACH,MAAM,CAAC,MAAM,eAAe,GAAG,CAAC,CAAC,KAAK,CAAC;IACrC,mBAAmB;IACnB,uBAAuB;IACvB,wBAAwB;IACxB,mBAAmB;CACpB,CAAC,CAAC;AAEH,+EAA+E;AAC/E,uBAAuB;AACvB,+EAA+E;AAE/E;;GAEG;AACH,MAAM,UAAU,qBAAqB,CAAC,IAAa;IACjD,MAAM,MAAM,GAAG,mBAAmB,CAAC,SAAS,CAAC,IAAI,CAAC,CAAC;IACnD,OAAO,MAAM,CAAC,OAAO,CAAC;AACxB,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,yBAAyB,CAAC,IAAa;IACrD,MAAM,MAAM,GAAG,uBAAuB,CAAC,SAAS,CAAC,IAAI,CAAC,CAAC;IACvD,OAAO,MAAM,CAAC,OAAO,CAAC;AACxB,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,0BAA0B,CAAC,IAAa;IACtD,MAAM,MAAM,GAAG,wBAAwB,CAAC,SAAS,CAAC,IAAI,CAAC,CAAC;IACxD,OAAO,MAAM,CAAC,OAAO,CAAC;AACxB,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,qBAAqB,CAAC,IAAa;IACjD,MAAM,MAAM,GAAG,mBAAmB,CAAC,SAAS,CAAC,IAAI,CAAC,CAAC;IACnD,OAAO,MAAM,CAAC,OAAO,CAAC;AACxB,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,iBAAiB,CAAC,IAAa;IAC7C,MAAM,MAAM,GAAG,eAAe,CAAC,SAAS,CAAC,IAAI,CAAC,CAAC;IAC/C,OAAO,MAAM,CAAC,OAAO,CAAC;AACxB,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,cAAc,CAAC,IAAa;IAC1C,OAAO,eAAe,CAAC,SAAS,CAAC,IAAI,CAAC,CAAC;AACzC,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,kBAAkB,CAAC,IAAa;IAC9C,OAAO,mBAAmB,CAAC,SAAS,CAAC,IAAI,CAAC,CAAC;AAC7C,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,sBAAsB,CAAC,IAAa;IAClD,OAAO,uBAAuB,CAAC,SAAS,CAAC,IAAI,CAAC,CAAC;AACjD,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,uBAAuB,CAAC,IAAa;IACnD,OAAO,wBAAwB,CAAC,SAAS,CAAC,IAAI,CAAC,CAAC;AAClD,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,kBAAkB,CAAC,IAAa;IAC9C,OAAO,mBAAmB,CAAC,SAAS,CAAC,IAAI,CAAC,CAAC;AAC7C,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,iBAAiB,CAAC,IAAa;IAC7C,MAAM,MAAM,GAAa,EAAE,CAAC;IAE5B,qBAAqB;IACrB,MAAM,UAAU,GAAG,mBAAmB,CAAC,SAAS,CAAC,IAAI,CAAC,CAAC;IACvD,IAAI,CAAC,UAAU,CAAC,OAAO,EAAE,CAAC;QACxB,MAAM,CAAC,IAAI,CAAC,2BAA2B,UAAU,CAAC,KAAK,CAAC,OAAO,EAAE,CAAC,CAAC;QACnE,OAAO,MAAM,CAAC;IAChB,CAAC;IAED,MAAM,IAAI,GAAG,UAAU,CAAC,IAAI,CAAC;IAE7B,0BAA0B;IAC1B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,QAAQ,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QAC9C,MAAM,KAAK,GAAG,IAAI,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC;QAC/B,IAAI,KAAK,CAAC,IAAI,KAAK,eAAe,EAAE,CAAC;YACnC,MAAM,eAAe,GAAG,wBAAwB,CAAC,SAAS,CAAC,KAAK,CAAC,CAAC;YAClE,IAAI,CAAC,eAAe,CAAC,OAAO,EAAE,CAAC;gBAC7B,MAAM,CAAC,IAAI,CAAC,aAAa,CAAC,uBAAuB,eAAe,CAAC,KAAK,CAAC,OAAO,EAAE,CAAC,CAAC;YACpF,CAAC;iBAAM,CAAC;gBACN,sCAAsC;gBACtC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,QAAQ,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;oBAC/C,MAAM,QAAQ,GAAG,KAAK,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC;oBACnC,IAAI,QAAQ,CAAC,IAAI,KAAK,cAAc,EAAE,CAAC;wBACrC,MAAM,cAAc,GAAG,uBAAuB,CAAC,SAAS,CAAC,QAAQ,CAAC,CAAC;wBACnE,IAAI,CAAC,cAAc,CAAC,OAAO,EAAE,CAAC;4BAC5B,MAAM,CAAC,IAAI,CAAC,YAAY,CAAC,IAAI,CAAC,uBAAuB,cAAc,CAAC,KAAK,CAAC,OAAO,EAAE,CAAC,CAAC;wBACvF,CAAC;6BAAM,CAAC;4BACN,iCAAiC;4BACjC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,QAAQ,CAAC,QAAQ,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;gCAClD,MAAM,IAAI,GAAG,QAAQ,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC;gCAClC,IAAI,IAAI,CAAC,IAAI,KAAK,UAAU,EAAE,CAAC;oCAC7B,MAAM,UAAU,GAAG,mBAAmB,CAAC,SAAS,CAAC,IAAI,CAAC,CAAC;oCACvD,IAAI,CAAC,UAAU,CAAC,OAAO,EAAE,CAAC;wCACxB,MAAM,CAAC,IAAI,CAAC,QAAQ,CAAC,IAAI,CAAC,IAAI,CAAC,uBAAuB,UAAU,CAAC,KAAK,CAAC,OAAO,EAAE,CAAC,CAAC;oCACpF,CAAC;gCACH,CAAC;4BACH,CAAC;wBACH,CAAC;oBACH,CAAC;gBACH,CAAC;YACH,CAAC;QACH,CAAC;IACH,CAAC;IAED,OAAO,MAAM,CAAC;AAChB,CAAC;AAED,+EAA+E;AAC/E,gBAAgB;AAChB,+EAA+E;AAE/E,MAAM,CAAC,MAAM,OAAO,GAAG;IACrB,eAAe,EAAE,qBAAqB;IACtC,oBAAoB,EAAE,0BAA0B;IAChD,mBAAmB,EAAE,yBAAyB;IAC9C,YAAY,EAAE,kBAAkB;IAChC,YAAY,EAAE,kBAAkB;IAChC,oBAAoB,EAAE,0BAA0B;IAChD,iBAAiB,EAAE,uBAAuB;IAC1C,mBAAmB,EAAE,yBAAyB;IAC9C,kBAAkB,EAAE,wBAAwB;IAC5C,SAAS,EAAE,eAAe;IAC1B,aAAa,EAAE,mBAAmB;IAClC,iBAAiB,EAAE,uBAAuB;IAC1C,kBAAkB,EAAE,wBAAwB;IAC5C,aAAa,EAAE,mBAAmB;CACnC,CAAC"}
package/package.json ADDED
@@ -0,0 +1,73 @@
1
+ {
2
+ "name": "glost-core",
3
+ "version": "0.5.0",
4
+ "description": "Core types, nodes, and utilities for GLOST (Glossed Syntax Tree)",
5
+ "author": "fustilio",
6
+ "license": "MIT",
7
+ "repository": {
8
+ "type": "git",
9
+ "url": "https://github.com/fustilio/glost.git",
10
+ "directory": "packages/core"
11
+ },
12
+ "type": "module",
13
+ "main": "./src/index.ts",
14
+ "types": "./src/index.ts",
15
+ "bin": {
16
+ "glost": "./dist/cli/migrate.js"
17
+ },
18
+ "exports": {
19
+ ".": {
20
+ "types": "./src/index.ts",
21
+ "default": "./src/index.ts"
22
+ },
23
+ "./nodes": {
24
+ "types": "./src/nodes.ts",
25
+ "default": "./src/nodes.ts"
26
+ },
27
+ "./utils": {
28
+ "types": "./src/utils.ts",
29
+ "default": "./src/utils.ts"
30
+ },
31
+ "./validators": {
32
+ "types": "./src/validators.ts",
33
+ "default": "./src/validators.ts"
34
+ },
35
+ "./guards": {
36
+ "types": "./src/guards.ts",
37
+ "default": "./src/guards.ts"
38
+ }
39
+ },
40
+ "dependencies": {
41
+ "unist-util-filter": "^5.0.1",
42
+ "unist-util-find": "^3.0.0",
43
+ "unist-util-flatmap": "^1.0.0",
44
+ "unist-util-is": "^6.0.0",
45
+ "unist-util-map": "^4.0.0",
46
+ "unist-util-modify-children": "^4.0.0",
47
+ "unist-util-remove": "^4.0.0",
48
+ "unist-util-select": "^5.1.0",
49
+ "unist-util-visit": "^5.0.0",
50
+ "zod": "^3.23.8",
51
+ "glost-common": "0.2.0"
52
+ },
53
+ "devDependencies": {
54
+ "@types/nlcst": "^2.0.3",
55
+ "typescript": "^5.8.3",
56
+ "vitest": "^3.0.5"
57
+ },
58
+ "keywords": [
59
+ "glost",
60
+ "glossed-syntax-tree",
61
+ "nlcst",
62
+ "multilingual",
63
+ "language-learning",
64
+ "linguistics",
65
+ "syntax-tree",
66
+ "unist"
67
+ ],
68
+ "scripts": {
69
+ "build": "tsc",
70
+ "typecheck": "tsc --noEmit",
71
+ "test": "vitest --passWithNoTests"
72
+ }
73
+ }
@@ -0,0 +1,92 @@
1
+ /**
2
+ * Document Creation Performance Benchmarks
3
+ *
4
+ * Compares different approaches to creating GLOST documents
5
+ */
6
+
7
+ import { bench, describe } from 'vitest';
8
+ import {
9
+ createSimpleDocument,
10
+ createGLOSTWordNode,
11
+ createDocumentFromSentences,
12
+ createSentenceFromWords,
13
+ createParagraphFromSentences,
14
+ createDocumentFromParagraphs,
15
+ } from '../index.js';
16
+
17
+ describe('Document Creation: Comparing Approaches', () => {
18
+ // Setup: Create test data once
19
+ const words100 = Array.from({ length: 100 }, (_, i) =>
20
+ createGLOSTWordNode({ value: `word${i}` })
21
+ );
22
+
23
+ describe('Compare: Simple vs Full Hierarchy (100 words)', () => {
24
+ bench('createSimpleDocument (recommended)', () => {
25
+ createSimpleDocument(words100, "en", "latin");
26
+ });
27
+
28
+ bench('createDocumentFromSentences', () => {
29
+ const sentence = createSentenceFromWords(words100, "en", "latin", "text");
30
+ createDocumentFromSentences([sentence], "en", "latin");
31
+ });
32
+
33
+ bench('createDocumentFromParagraphs (full hierarchy)', () => {
34
+ const sentence = createSentenceFromWords(words100, "en", "latin", "text");
35
+ const paragraph = createParagraphFromSentences([sentence]);
36
+ createDocumentFromParagraphs([paragraph], "en", "latin");
37
+ });
38
+ });
39
+
40
+ describe('Compare: Word Node Creation Approaches', () => {
41
+ bench('minimal word node', () => {
42
+ createGLOSTWordNode({ value: "test" });
43
+ });
44
+
45
+ bench('word with transcription', () => {
46
+ createGLOSTWordNode({
47
+ value: "test",
48
+ transcription: {
49
+ ipa: { text: "test", syllables: ["test"] }
50
+ }
51
+ });
52
+ });
53
+
54
+ bench('word with full metadata', () => {
55
+ createGLOSTWordNode({
56
+ value: "test",
57
+ transcription: {
58
+ ipa: { text: "test", syllables: ["test"] }
59
+ },
60
+ metadata: {
61
+ partOfSpeech: "noun",
62
+ meaning: "a test",
63
+ usage: "testing"
64
+ },
65
+ lang: "en",
66
+ script: "latin",
67
+ extras: {
68
+ translations: { th: "ทดสอบ" },
69
+ metadata: { frequency: "common" }
70
+ }
71
+ });
72
+ });
73
+ });
74
+
75
+ describe('Compare: Sentence Creation Strategies', () => {
76
+ const words10 = Array.from({ length: 10 }, (_, i) =>
77
+ createGLOSTWordNode({ value: `word${i}` })
78
+ );
79
+
80
+ bench('createDocumentFromSentences - multiple small sentences', () => {
81
+ const sentences = Array.from({ length: 10 }, (_, i) => {
82
+ const sentenceWords = words10.slice(i, i + 5);
83
+ return createSentenceFromWords(sentenceWords, "en", "latin", `Sentence ${i}`);
84
+ });
85
+ createDocumentFromSentences(sentences, "en", "latin");
86
+ });
87
+
88
+ bench('createSimpleDocument - single sentence', () => {
89
+ createSimpleDocument(words10, "en", "latin");
90
+ });
91
+ });
92
+ });
@@ -0,0 +1,152 @@
1
+ /**
2
+ * Document Traversal Performance Benchmarks
3
+ *
4
+ * Compares different approaches to traversing and accessing GLOST documents
5
+ */
6
+
7
+ import { bench, describe } from 'vitest';
8
+ import {
9
+ createSimpleDocument,
10
+ createGLOSTWordNode,
11
+ getAllWords,
12
+ getFirstWord,
13
+ getWordAtPath,
14
+ getAllSentences,
15
+ getAllParagraphs,
16
+ } from '../index.js';
17
+ import { visit } from 'unist-util-visit';
18
+ import { NODE_TYPES } from '../types.js';
19
+
20
+ // Helper to create test documents
21
+ function createTestDocument(wordCount: number) {
22
+ const words = Array.from({ length: wordCount }, (_, i) =>
23
+ createGLOSTWordNode({
24
+ value: `word${i}`,
25
+ transcription: {
26
+ ipa: { text: `wɜːrd${i}`, syllables: [`word${i}`] }
27
+ },
28
+ metadata: { partOfSpeech: i % 2 === 0 ? "noun" : "verb" }
29
+ })
30
+ );
31
+ return createSimpleDocument(words, "en", "latin");
32
+ }
33
+
34
+ describe('Traversal: Comparing Access Patterns', () => {
35
+ const doc1000 = createTestDocument(1000);
36
+
37
+ describe('Compare: Getting All Words', () => {
38
+ bench('getAllWords (helper)', () => {
39
+ getAllWords(doc1000);
40
+ });
41
+
42
+ bench('visit with type filter', () => {
43
+ const words: any[] = [];
44
+ visit(doc1000, NODE_TYPES.WORD, (node) => {
45
+ words.push(node);
46
+ });
47
+ });
48
+
49
+ bench('manual recursive traversal', () => {
50
+ const words: any[] = [];
51
+ function traverse(node: any) {
52
+ if (node.type === NODE_TYPES.WORD) {
53
+ words.push(node);
54
+ }
55
+ if (node.children) {
56
+ node.children.forEach(traverse);
57
+ }
58
+ }
59
+ traverse(doc1000);
60
+ });
61
+ });
62
+
63
+ describe('Compare: Finding First Word', () => {
64
+ bench('getFirstWord (optimized with SKIP)', () => {
65
+ getFirstWord(doc1000);
66
+ });
67
+
68
+ bench('getAllWords then [0]', () => {
69
+ getAllWords(doc1000)[0];
70
+ });
71
+
72
+ bench('manual early-exit traversal', () => {
73
+ let found: any;
74
+ function traverse(node: any): boolean {
75
+ if (node.type === NODE_TYPES.WORD) {
76
+ found = node;
77
+ return true; // exit early
78
+ }
79
+ if (node.children) {
80
+ for (const child of node.children) {
81
+ if (traverse(child)) return true;
82
+ }
83
+ }
84
+ return false;
85
+ }
86
+ traverse(doc1000);
87
+ });
88
+ });
89
+
90
+ describe('Compare: Direct Path Access', () => {
91
+ bench('getWordAtPath (direct indexing)', () => {
92
+ getWordAtPath(doc1000, { paragraph: 0, sentence: 0, word: 500 });
93
+ });
94
+
95
+ bench('getAllWords then filter by index', () => {
96
+ getAllWords(doc1000)[500];
97
+ });
98
+ });
99
+ });
100
+
101
+ describe('Traversal: Comparing Filter Strategies', () => {
102
+ const doc500 = createTestDocument(500);
103
+
104
+ describe('Compare: Filter by Part of Speech', () => {
105
+ bench('getAllWords + array filter', () => {
106
+ const words = getAllWords(doc500);
107
+ words.filter(w => w.metadata?.partOfSpeech === "noun");
108
+ });
109
+
110
+ bench('visit with conditional push', () => {
111
+ const nouns: any[] = [];
112
+ visit(doc500, NODE_TYPES.WORD, (node: any) => {
113
+ if (node.metadata?.partOfSpeech === "noun") {
114
+ nouns.push(node);
115
+ }
116
+ });
117
+ });
118
+ });
119
+
120
+ describe('Compare: Extract Word Text', () => {
121
+ const words = getAllWords(doc500);
122
+
123
+ bench('map with type guard', () => {
124
+ words.map(w => {
125
+ const textNode = w.children[0];
126
+ return textNode && 'value' in textNode ? textNode.value : '';
127
+ });
128
+ });
129
+
130
+ bench('reduce to array', () => {
131
+ words.reduce((acc, w) => {
132
+ const textNode = w.children[0];
133
+ if (textNode && 'value' in textNode) {
134
+ acc.push(textNode.value);
135
+ }
136
+ return acc;
137
+ }, [] as string[]);
138
+ });
139
+ });
140
+
141
+ describe('Compare: Count with Condition', () => {
142
+ const words = getAllWords(doc500);
143
+
144
+ bench('filter then length', () => {
145
+ words.filter(w => w.transcription !== undefined).length;
146
+ });
147
+
148
+ bench('reduce with counter', () => {
149
+ words.reduce((count, w) => count + (w.transcription ? 1 : 0), 0);
150
+ });
151
+ });
152
+ });
@@ -0,0 +1,20 @@
1
+ # Test Fixtures
2
+
3
+ This directory contains mock data and fixtures for testing purposes only.
4
+
5
+ **Important:** Files in this directory should **NOT** be imported by production code to avoid bundling test data with the main library.
6
+
7
+ ## Usage in Tests
8
+
9
+ ```typescript
10
+ // In your test files
11
+ import { thaiDocumentWithExtras, japaneseDocumentWithExtras } from '../__fixtures__/mock-data';
12
+ ```
13
+
14
+ ## Available Fixtures
15
+
16
+ - `mock-data.ts` - Sample GLOST documents with enhanced Thai and Japanese examples including:
17
+ - Word-level annotations with multiple transcription systems
18
+ - Translations in multiple languages
19
+ - Cultural notes and metadata
20
+ - Example sentences and usage patterns