glost-core 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. package/CHANGELOG.md +63 -0
  2. package/LICENSE +21 -0
  3. package/README.md +199 -0
  4. package/dist/__benchmarks__/document-creation.bench.d.ts +7 -0
  5. package/dist/__benchmarks__/document-creation.bench.d.ts.map +1 -0
  6. package/dist/__benchmarks__/document-creation.bench.js +71 -0
  7. package/dist/__benchmarks__/document-creation.bench.js.map +1 -0
  8. package/dist/__benchmarks__/traversal.bench.d.ts +7 -0
  9. package/dist/__benchmarks__/traversal.bench.d.ts.map +1 -0
  10. package/dist/__benchmarks__/traversal.bench.js +124 -0
  11. package/dist/__benchmarks__/traversal.bench.js.map +1 -0
  12. package/dist/cli/migrate.d.ts +8 -0
  13. package/dist/cli/migrate.d.ts.map +1 -0
  14. package/dist/cli/migrate.js +229 -0
  15. package/dist/cli/migrate.js.map +1 -0
  16. package/dist/errors.d.ts +168 -0
  17. package/dist/errors.d.ts.map +1 -0
  18. package/dist/errors.js +300 -0
  19. package/dist/errors.js.map +1 -0
  20. package/dist/guards.d.ts +103 -0
  21. package/dist/guards.d.ts.map +1 -0
  22. package/dist/guards.js +264 -0
  23. package/dist/guards.js.map +1 -0
  24. package/dist/index.d.ts +9 -0
  25. package/dist/index.d.ts.map +1 -0
  26. package/dist/index.js +25 -0
  27. package/dist/index.js.map +1 -0
  28. package/dist/nodes.d.ts +227 -0
  29. package/dist/nodes.d.ts.map +1 -0
  30. package/dist/nodes.js +243 -0
  31. package/dist/nodes.js.map +1 -0
  32. package/dist/types.d.ts +442 -0
  33. package/dist/types.d.ts.map +1 -0
  34. package/dist/types.js +51 -0
  35. package/dist/types.js.map +1 -0
  36. package/dist/utils.d.ts +247 -0
  37. package/dist/utils.d.ts.map +1 -0
  38. package/dist/utils.js +564 -0
  39. package/dist/utils.js.map +1 -0
  40. package/dist/validators.d.ts +1876 -0
  41. package/dist/validators.d.ts.map +1 -0
  42. package/dist/validators.js +302 -0
  43. package/dist/validators.js.map +1 -0
  44. package/package.json +73 -0
  45. package/src/__benchmarks__/document-creation.bench.ts +92 -0
  46. package/src/__benchmarks__/traversal.bench.ts +152 -0
  47. package/src/__tests__/README.md +20 -0
  48. package/src/__tests__/example.test.ts +43 -0
  49. package/src/__tests__/example.ts +186 -0
  50. package/src/__tests__/helpers.test.ts +178 -0
  51. package/src/__tests__/mock-data.ts +624 -0
  52. package/src/__tests__/performance.test.ts +317 -0
  53. package/src/__tests__/traversal.test.ts +170 -0
  54. package/src/cli/migrate.ts +294 -0
  55. package/src/errors.ts +394 -0
  56. package/src/guards.ts +341 -0
  57. package/src/index.ts +69 -0
  58. package/src/nodes.ts +409 -0
  59. package/src/types.ts +633 -0
  60. package/src/utils.ts +730 -0
  61. package/src/validators.ts +336 -0
  62. package/tsconfig.json +9 -0
package/CHANGELOG.md ADDED
@@ -0,0 +1,63 @@
1
+ # glost
2
+
3
+ ## 0.4.0
4
+
5
+ ### Major Changes
6
+
7
+ - **ESM Imports Fixed**: Added `.js` extensions to all barrel exports for proper Node.js ESM support
8
+ - **BCP-47 Language Standard**: All language codes now use BCP-47 format (e.g., `en-US`, `th-TH`)
9
+ - **Transcription Schema Cleanup**: Removed redundant `system` field from TranscriptionInfo type
10
+ - **Translation API Renamed**: Changed `sourceLanguage/targetLanguage` to `from/to` for clarity
11
+
12
+ ### Breaking Changes
13
+
14
+ - Language codes standardized on BCP-47 (use migration CLI: `npx glost migrate v0.3-to-v0.4 ./src`)
15
+ - Translation extension API parameter names changed
16
+ - Transcription schema simplified
17
+
18
+ ### New Features
19
+
20
+ - **Typed Extras**: Extensions can augment `GLOSTExtras` interface via declaration merging for full type safety
21
+ - **Standard Provider Interface**: New `GLOSTDataProvider<TInput, TOutput>` with batch processing and caching support
22
+ - **Better Error Messages**: Comprehensive error classes with context, suggestions, and documentation links
23
+ - **Language Utilities**: `normalizeLanguageCode()`, `matchLanguage()`, `parseLanguageCode()`, `findBestMatch()`, etc.
24
+ - **Migration CLI**: Built-in `npx glost migrate` tool for automated upgrades
25
+
26
+ ### Patch Changes
27
+
28
+ - Updated dependencies
29
+
30
+ ## 0.3.0
31
+
32
+ ### Minor Changes
33
+
34
+ - Externalized language-specific helpers and data-dependent extensions into dedicated packages
35
+ - Extensions now require explicit data providers instead of fallback data
36
+
37
+ ### Patch Changes
38
+
39
+ - Updated dependencies
40
+
41
+ ## 0.2.0
42
+
43
+ ### Breaking Changes
44
+
45
+ - Removed `createThaiWord()` and `createJapaneseWord()` - moved to `glost-th` and `glost-ja` packages
46
+ - Language-specific helper functions externalized to dedicated packages
47
+
48
+ ### Internal Changes
49
+
50
+ - Fixed circular dependencies
51
+ - Reduced package size by ~30%
52
+ - Improved build performance and modularity
53
+
54
+ ### Patch Changes
55
+
56
+ - Updated dependencies
57
+
58
+ ## 0.1.1
59
+
60
+ ### Patch Changes
61
+
62
+ - Fix package names
63
+ - Updated dependencies
package/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 GLOST Contributors
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
package/README.md ADDED
@@ -0,0 +1,199 @@
1
+ # glost
2
+
3
+ Core types and node creation for GLOST (Glossed Syntax Tree).
4
+
5
+ ## What is GLOST?
6
+
7
+ GLOST (Glossed Syntax Tree) is a Concrete Syntax Tree format that extends nlcst to support language learning annotations:
8
+
9
+ - **Translations and glosses** in multiple languages
10
+ - **Difficulty levels** and word frequency data
11
+ - **Pronunciation guides** (IPA, romanization, transcription systems)
12
+ - **Cultural context** and usage notes
13
+ - **Part-of-speech** tagging
14
+ - **Grammar metadata** for language learners
15
+
16
+ ## Installation
17
+
18
+ ```bash
19
+ pnpm add glost
20
+ ```
21
+
22
+ ## Usage
23
+
24
+ ### Simple Document Creation (Recommended)
25
+
26
+ ```typescript
27
+ import { createSimpleDocument, getAllWords, NODE_TYPES } from "glost";
28
+ import type { GLOSTWord, GLOSTRoot } from "glost";
29
+
30
+ // Create a simple document from words
31
+ const words = [
32
+ createGLOSTWordNode({ value: "hello", lang: "en", script: "latin" }),
33
+ createGLOSTWordNode({ value: "world", lang: "en", script: "latin" })
34
+ ];
35
+
36
+ const document = createSimpleDocument(words, "en", "latin", {
37
+ sentenceText: "hello world"
38
+ });
39
+
40
+ // Access words with type-safe helpers
41
+ const allWords = getAllWords(document);
42
+ console.log(allWords.length); // 2
43
+ ```
44
+
45
+ ### Manual Word Creation
46
+
47
+ ```typescript
48
+ import { createGLOSTWordNode, createGLOSTRootNode } from "glost";
49
+ import type { GLOSTWord, GLOSTRoot } from "glost";
50
+
51
+ // Create a word node with annotations
52
+ // Language codes: ISO-639-1, ISO-639-3, or BCP-47 all work
53
+ const word = createGLOSTWordNode({
54
+ value: "สวัสดี", // Thai: hello
55
+ transcription: {
56
+ rtgs: { text: "sà-wàt-dii", system: "rtgs" },
57
+ ipa: { text: "sa.wàt.diː", system: "ipa" }
58
+ },
59
+ metadata: {
60
+ partOfSpeech: "interjection",
61
+ usage: "greeting"
62
+ },
63
+ lang: "th", // Can also use "tha" (ISO-639-3) or "th-TH" (BCP-47)
64
+ script: "thai"
65
+ });
66
+ ```
67
+
68
+ ## API
69
+
70
+ ### Node Factory Functions
71
+
72
+ All factory functions accept a single options object for better readability and extensibility.
73
+
74
+ #### `createGLOSTWordNode(options)`
75
+
76
+ Create a word node with transcription and metadata.
77
+
78
+ ```typescript
79
+ const word = createGLOSTWordNode({
80
+ value: "hello",
81
+ transcription: { ipa: { text: "həˈloʊ", system: "ipa" } },
82
+ metadata: { partOfSpeech: "interjection" },
83
+ lang: "en", // optional
84
+ script: "latin", // optional
85
+ extras: {} // optional extension data
86
+ });
87
+ ```
88
+
89
+ #### `createGLOSTSentenceNode(options)`
90
+
91
+ Create a sentence node containing word nodes.
92
+
93
+ ```typescript
94
+ const sentence = createGLOSTSentenceNode({
95
+ originalText: "Hello world",
96
+ lang: "en",
97
+ script: "latin",
98
+ children: [wordNode1, wordNode2], // optional
99
+ transcription: {}, // optional
100
+ extras: {} // optional
101
+ });
102
+ ```
103
+
104
+ #### `createGLOSTRootNode(options)`
105
+
106
+ Create a root document node.
107
+
108
+ ```typescript
109
+ const root = createGLOSTRootNode({
110
+ lang: "en",
111
+ script: "latin",
112
+ children: [paragraphNode], // optional
113
+ metadata: { title: "My Document" }, // optional
114
+ extras: {} // optional
115
+ });
116
+ ```
117
+
118
+ ### Helper Functions
119
+
120
+ Convenience functions for common language patterns:
121
+
122
+ #### `createSimpleWord(options)`
123
+
124
+ ```typescript
125
+ const word = createSimpleWord({
126
+ text: "hello",
127
+ transliteration: "həˈloʊ",
128
+ system: "ipa", // default: "ipa"
129
+ partOfSpeech: "noun" // default: "unknown"
130
+ });
131
+ ```
132
+
133
+ ### Language-Specific Helpers
134
+
135
+ **Note:** As of v0.2.0, language-specific helpers have been moved to separate packages.
136
+
137
+ #### Thai Language Support
138
+
139
+ ```bash
140
+ npm install glost-th
141
+ ```
142
+
143
+ ```typescript
144
+ import { createThaiWord } from 'glost-th';
145
+
146
+ const word = createThaiWord({
147
+ text: "สวัสดี",
148
+ rtgs: "sawatdi",
149
+ partOfSpeech: "interjection",
150
+ tone: 2,
151
+ syllables: ["sa", "wat", "di"]
152
+ });
153
+ ```
154
+
155
+ See [glost-th documentation](../languages/th/README.md).
156
+
157
+ #### Japanese Language Support
158
+
159
+ ```bash
160
+ npm install glost-ja
161
+ ```
162
+
163
+ ```typescript
164
+ import { createJapaneseWord } from 'glost-ja';
165
+
166
+ const word = createJapaneseWord({
167
+ text: "こんにちは",
168
+ romaji: "konnichiwa",
169
+ partOfSpeech: "interjection",
170
+ furigana: "こんにちは"
171
+ });
172
+ ```
173
+
174
+ See [glost-ja documentation](../languages/ja/README.md).
175
+
176
+ **Migration:** See [MIGRATION.md](../../MIGRATION.md) for upgrading from v0.1.x.
177
+
178
+ ## Features
179
+
180
+ - TypeScript support
181
+ - Extends nlcst (Natural Language Concrete Syntax Tree)
182
+ - Aims for compatibility with unist ecosystem
183
+ - Framework-agnostic
184
+ - Includes Zod validation schemas
185
+
186
+ ## Related Packages
187
+
188
+ ### Core Packages
189
+ - `glost-common` - Shared utilities and language configs
190
+ - `glost-extensions` - Extension system for transforming GLOST trees
191
+ - `glost-utils` - Utilities for working with GLOST documents
192
+
193
+ ### Language Packages
194
+ - `glost-th` - Thai language support
195
+ - `glost-ja` - Japanese language support
196
+
197
+ ## Documentation
198
+
199
+ See the main GLOST repository for full documentation.
@@ -0,0 +1,7 @@
1
+ /**
2
+ * Document Creation Performance Benchmarks
3
+ *
4
+ * Compares different approaches to creating GLOST documents
5
+ */
6
+ export {};
7
+ //# sourceMappingURL=document-creation.bench.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"document-creation.bench.d.ts","sourceRoot":"","sources":["../../src/__benchmarks__/document-creation.bench.ts"],"names":[],"mappings":"AAAA;;;;GAIG"}
@@ -0,0 +1,71 @@
1
+ /**
2
+ * Document Creation Performance Benchmarks
3
+ *
4
+ * Compares different approaches to creating GLOST documents
5
+ */
6
+ import { bench, describe } from 'vitest';
7
+ import { createSimpleDocument, createGLOSTWordNode, createDocumentFromSentences, createSentenceFromWords, createParagraphFromSentences, createDocumentFromParagraphs, } from '../index.js';
8
+ describe('Document Creation: Comparing Approaches', () => {
9
+ // Setup: Create test data once
10
+ const words100 = Array.from({ length: 100 }, (_, i) => createGLOSTWordNode({ value: `word${i}` }));
11
+ describe('Compare: Simple vs Full Hierarchy (100 words)', () => {
12
+ bench('createSimpleDocument (recommended)', () => {
13
+ createSimpleDocument(words100, "en", "latin");
14
+ });
15
+ bench('createDocumentFromSentences', () => {
16
+ const sentence = createSentenceFromWords(words100, "en", "latin", "text");
17
+ createDocumentFromSentences([sentence], "en", "latin");
18
+ });
19
+ bench('createDocumentFromParagraphs (full hierarchy)', () => {
20
+ const sentence = createSentenceFromWords(words100, "en", "latin", "text");
21
+ const paragraph = createParagraphFromSentences([sentence]);
22
+ createDocumentFromParagraphs([paragraph], "en", "latin");
23
+ });
24
+ });
25
+ describe('Compare: Word Node Creation Approaches', () => {
26
+ bench('minimal word node', () => {
27
+ createGLOSTWordNode({ value: "test" });
28
+ });
29
+ bench('word with transcription', () => {
30
+ createGLOSTWordNode({
31
+ value: "test",
32
+ transcription: {
33
+ ipa: { text: "test", syllables: ["test"] }
34
+ }
35
+ });
36
+ });
37
+ bench('word with full metadata', () => {
38
+ createGLOSTWordNode({
39
+ value: "test",
40
+ transcription: {
41
+ ipa: { text: "test", syllables: ["test"] }
42
+ },
43
+ metadata: {
44
+ partOfSpeech: "noun",
45
+ meaning: "a test",
46
+ usage: "testing"
47
+ },
48
+ lang: "en",
49
+ script: "latin",
50
+ extras: {
51
+ translations: { th: "ทดสอบ" },
52
+ metadata: { frequency: "common" }
53
+ }
54
+ });
55
+ });
56
+ });
57
+ describe('Compare: Sentence Creation Strategies', () => {
58
+ const words10 = Array.from({ length: 10 }, (_, i) => createGLOSTWordNode({ value: `word${i}` }));
59
+ bench('createDocumentFromSentences - multiple small sentences', () => {
60
+ const sentences = Array.from({ length: 10 }, (_, i) => {
61
+ const sentenceWords = words10.slice(i, i + 5);
62
+ return createSentenceFromWords(sentenceWords, "en", "latin", `Sentence ${i}`);
63
+ });
64
+ createDocumentFromSentences(sentences, "en", "latin");
65
+ });
66
+ bench('createSimpleDocument - single sentence', () => {
67
+ createSimpleDocument(words10, "en", "latin");
68
+ });
69
+ });
70
+ });
71
+ //# sourceMappingURL=document-creation.bench.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"document-creation.bench.js","sourceRoot":"","sources":["../../src/__benchmarks__/document-creation.bench.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,OAAO,EAAE,KAAK,EAAE,QAAQ,EAAE,MAAM,QAAQ,CAAC;AACzC,OAAO,EACL,oBAAoB,EACpB,mBAAmB,EACnB,2BAA2B,EAC3B,uBAAuB,EACvB,4BAA4B,EAC5B,4BAA4B,GAC7B,MAAM,aAAa,CAAC;AAErB,QAAQ,CAAC,yCAAyC,EAAE,GAAG,EAAE;IACvD,+BAA+B;IAC/B,MAAM,QAAQ,GAAG,KAAK,CAAC,IAAI,CAAC,EAAE,MAAM,EAAE,GAAG,EAAE,EAAE,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CACpD,mBAAmB,CAAC,EAAE,KAAK,EAAE,OAAO,CAAC,EAAE,EAAE,CAAC,CAC3C,CAAC;IAEF,QAAQ,CAAC,+CAA+C,EAAE,GAAG,EAAE;QAC7D,KAAK,CAAC,oCAAoC,EAAE,GAAG,EAAE;YAC/C,oBAAoB,CAAC,QAAQ,EAAE,IAAI,EAAE,OAAO,CAAC,CAAC;QAChD,CAAC,CAAC,CAAC;QAEH,KAAK,CAAC,6BAA6B,EAAE,GAAG,EAAE;YACxC,MAAM,QAAQ,GAAG,uBAAuB,CAAC,QAAQ,EAAE,IAAI,EAAE,OAAO,EAAE,MAAM,CAAC,CAAC;YAC1E,2BAA2B,CAAC,CAAC,QAAQ,CAAC,EAAE,IAAI,EAAE,OAAO,CAAC,CAAC;QACzD,CAAC,CAAC,CAAC;QAEH,KAAK,CAAC,+CAA+C,EAAE,GAAG,EAAE;YAC1D,MAAM,QAAQ,GAAG,uBAAuB,CAAC,QAAQ,EAAE,IAAI,EAAE,OAAO,EAAE,MAAM,CAAC,CAAC;YAC1E,MAAM,SAAS,GAAG,4BAA4B,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC;YAC3D,4BAA4B,CAAC,CAAC,SAAS,CAAC,EAAE,IAAI,EAAE,OAAO,CAAC,CAAC;QAC3D,CAAC,CAAC,CAAC;IACL,CAAC,CAAC,CAAC;IAEH,QAAQ,CAAC,wCAAwC,EAAE,GAAG,EAAE;QACtD,KAAK,CAAC,mBAAmB,EAAE,GAAG,EAAE;YAC9B,mBAAmB,CAAC,EAAE,KAAK,EAAE,MAAM,EAAE,CAAC,CAAC;QACzC,CAAC,CAAC,CAAC;QAEH,KAAK,CAAC,yBAAyB,EAAE,GAAG,EAAE;YACpC,mBAAmB,CAAC;gBAClB,KAAK,EAAE,MAAM;gBACb,aAAa,EAAE;oBACb,GAAG,EAAE,EAAE,IAAI,EAAE,MAAM,EAAE,SAAS,EAAE,CAAC,MAAM,CAAC,EAAE;iBAC3C;aACF,CAAC,CAAC;QACL,CAAC,CAAC,CAAC;QAEH,KAAK,CAAC,yBAAyB,EAAE,GAAG,EAAE;YACpC,mBAAmB,CAAC;gBAClB,KAAK,EAAE,MAAM;gBACb,aAAa,EAAE;oBACb,GAAG,EAAE,EAAE,IAAI,EAAE,MAAM,EAAE,SAAS,EAAE,CAAC,MAAM,CAAC,EAAE;iBAC3C;gBACD,QAAQ,EAAE;oBACR,YAAY,EAAE,MAAM;oBACpB,OAAO,EAAE,QAAQ;oBACjB,KAAK,EAAE,SAAS;iBACjB;gBACD,IAAI,EAAE,IAAI;gBACV,MAAM,EAAE,OAAO;gBACf,MAAM,EAAE;oBACN,YAAY,EAAE,EAAE,EAAE,EAAE,OAAO,EAAE;oBAC7B,QAAQ,EAAE,EAAE,SAAS,EAAE,QAAQ,EAAE;iBAClC;aACF,CAAC,CAAC;QACL,CAAC,CAAC,CAAC;IACL,CAAC,CAAC,CAAC;IAEH,QAAQ,CAAC,uCAAuC,EAAE,GAAG,EAAE;QACrD,MAAM,OAAO,GAAG,KAAK,CAAC,IAAI,CAAC,EAAE,MAAM,EAAE,EAAE,EAAE,EAAE,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAClD,mBAAmB,CAAC,EAAE,KAAK,EAAE,OAAO,CAAC,EAAE,EAAE,CAAC,CAC3C,CAAC;QAEF,KAAK,CAAC,wDAAwD,EAAE,GAAG,EAAE;YACnE,MAAM,SAAS,GAAG,KAAK,CAAC,IAAI,CAAC,EAAE,MAAM,EAAE,EAAE,EAAE,EAAE,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE;gBACpD,MAAM,aAAa,GAAG,OAAO,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC;gBAC9C,OAAO,uBAAuB,CAAC,aAAa,EAAE,IAAI,EAAE,OAAO,EAAE,YAAY,CAAC,EAAE,CAAC,CAAC;YAChF,CAAC,CAAC,CAAC;YACH,2BAA2B,CAAC,SAAS,EAAE,IAAI,EAAE,OAAO,CAAC,CAAC;QACxD,CAAC,CAAC,CAAC;QAEH,KAAK,CAAC,wCAAwC,EAAE,GAAG,EAAE;YACnD,oBAAoB,CAAC,OAAO,EAAE,IAAI,EAAE,OAAO,CAAC,CAAC;QAC/C,CAAC,CAAC,CAAC;IACL,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC"}
@@ -0,0 +1,7 @@
1
+ /**
2
+ * Document Traversal Performance Benchmarks
3
+ *
4
+ * Compares different approaches to traversing and accessing GLOST documents
5
+ */
6
+ export {};
7
+ //# sourceMappingURL=traversal.bench.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"traversal.bench.d.ts","sourceRoot":"","sources":["../../src/__benchmarks__/traversal.bench.ts"],"names":[],"mappings":"AAAA;;;;GAIG"}
@@ -0,0 +1,124 @@
1
+ /**
2
+ * Document Traversal Performance Benchmarks
3
+ *
4
+ * Compares different approaches to traversing and accessing GLOST documents
5
+ */
6
+ import { bench, describe } from 'vitest';
7
+ import { createSimpleDocument, createGLOSTWordNode, getAllWords, getFirstWord, getWordAtPath, } from '../index.js';
8
+ import { visit } from 'unist-util-visit';
9
+ import { NODE_TYPES } from '../types.js';
10
+ // Helper to create test documents
11
+ function createTestDocument(wordCount) {
12
+ const words = Array.from({ length: wordCount }, (_, i) => createGLOSTWordNode({
13
+ value: `word${i}`,
14
+ transcription: {
15
+ ipa: { text: `wɜːrd${i}`, syllables: [`word${i}`] }
16
+ },
17
+ metadata: { partOfSpeech: i % 2 === 0 ? "noun" : "verb" }
18
+ }));
19
+ return createSimpleDocument(words, "en", "latin");
20
+ }
21
+ describe('Traversal: Comparing Access Patterns', () => {
22
+ const doc1000 = createTestDocument(1000);
23
+ describe('Compare: Getting All Words', () => {
24
+ bench('getAllWords (helper)', () => {
25
+ getAllWords(doc1000);
26
+ });
27
+ bench('visit with type filter', () => {
28
+ const words = [];
29
+ visit(doc1000, NODE_TYPES.WORD, (node) => {
30
+ words.push(node);
31
+ });
32
+ });
33
+ bench('manual recursive traversal', () => {
34
+ const words = [];
35
+ function traverse(node) {
36
+ if (node.type === NODE_TYPES.WORD) {
37
+ words.push(node);
38
+ }
39
+ if (node.children) {
40
+ node.children.forEach(traverse);
41
+ }
42
+ }
43
+ traverse(doc1000);
44
+ });
45
+ });
46
+ describe('Compare: Finding First Word', () => {
47
+ bench('getFirstWord (optimized with SKIP)', () => {
48
+ getFirstWord(doc1000);
49
+ });
50
+ bench('getAllWords then [0]', () => {
51
+ getAllWords(doc1000)[0];
52
+ });
53
+ bench('manual early-exit traversal', () => {
54
+ let found;
55
+ function traverse(node) {
56
+ if (node.type === NODE_TYPES.WORD) {
57
+ found = node;
58
+ return true; // exit early
59
+ }
60
+ if (node.children) {
61
+ for (const child of node.children) {
62
+ if (traverse(child))
63
+ return true;
64
+ }
65
+ }
66
+ return false;
67
+ }
68
+ traverse(doc1000);
69
+ });
70
+ });
71
+ describe('Compare: Direct Path Access', () => {
72
+ bench('getWordAtPath (direct indexing)', () => {
73
+ getWordAtPath(doc1000, { paragraph: 0, sentence: 0, word: 500 });
74
+ });
75
+ bench('getAllWords then filter by index', () => {
76
+ getAllWords(doc1000)[500];
77
+ });
78
+ });
79
+ });
80
+ describe('Traversal: Comparing Filter Strategies', () => {
81
+ const doc500 = createTestDocument(500);
82
+ describe('Compare: Filter by Part of Speech', () => {
83
+ bench('getAllWords + array filter', () => {
84
+ const words = getAllWords(doc500);
85
+ words.filter(w => w.metadata?.partOfSpeech === "noun");
86
+ });
87
+ bench('visit with conditional push', () => {
88
+ const nouns = [];
89
+ visit(doc500, NODE_TYPES.WORD, (node) => {
90
+ if (node.metadata?.partOfSpeech === "noun") {
91
+ nouns.push(node);
92
+ }
93
+ });
94
+ });
95
+ });
96
+ describe('Compare: Extract Word Text', () => {
97
+ const words = getAllWords(doc500);
98
+ bench('map with type guard', () => {
99
+ words.map(w => {
100
+ const textNode = w.children[0];
101
+ return textNode && 'value' in textNode ? textNode.value : '';
102
+ });
103
+ });
104
+ bench('reduce to array', () => {
105
+ words.reduce((acc, w) => {
106
+ const textNode = w.children[0];
107
+ if (textNode && 'value' in textNode) {
108
+ acc.push(textNode.value);
109
+ }
110
+ return acc;
111
+ }, []);
112
+ });
113
+ });
114
+ describe('Compare: Count with Condition', () => {
115
+ const words = getAllWords(doc500);
116
+ bench('filter then length', () => {
117
+ words.filter(w => w.transcription !== undefined).length;
118
+ });
119
+ bench('reduce with counter', () => {
120
+ words.reduce((count, w) => count + (w.transcription ? 1 : 0), 0);
121
+ });
122
+ });
123
+ });
124
+ //# sourceMappingURL=traversal.bench.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"traversal.bench.js","sourceRoot":"","sources":["../../src/__benchmarks__/traversal.bench.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,OAAO,EAAE,KAAK,EAAE,QAAQ,EAAE,MAAM,QAAQ,CAAC;AACzC,OAAO,EACL,oBAAoB,EACpB,mBAAmB,EACnB,WAAW,EACX,YAAY,EACZ,aAAa,GAGd,MAAM,aAAa,CAAC;AACrB,OAAO,EAAE,KAAK,EAAE,MAAM,kBAAkB,CAAC;AACzC,OAAO,EAAE,UAAU,EAAE,MAAM,aAAa,CAAC;AAEzC,kCAAkC;AAClC,SAAS,kBAAkB,CAAC,SAAiB;IAC3C,MAAM,KAAK,GAAG,KAAK,CAAC,IAAI,CAAC,EAAE,MAAM,EAAE,SAAS,EAAE,EAAE,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CACvD,mBAAmB,CAAC;QAClB,KAAK,EAAE,OAAO,CAAC,EAAE;QACjB,aAAa,EAAE;YACb,GAAG,EAAE,EAAE,IAAI,EAAE,QAAQ,CAAC,EAAE,EAAE,SAAS,EAAE,CAAC,OAAO,CAAC,EAAE,CAAC,EAAE;SACpD;QACD,QAAQ,EAAE,EAAE,YAAY,EAAE,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,MAAM,EAAE;KAC1D,CAAC,CACH,CAAC;IACF,OAAO,oBAAoB,CAAC,KAAK,EAAE,IAAI,EAAE,OAAO,CAAC,CAAC;AACpD,CAAC;AAED,QAAQ,CAAC,sCAAsC,EAAE,GAAG,EAAE;IACpD,MAAM,OAAO,GAAG,kBAAkB,CAAC,IAAI,CAAC,CAAC;IAEzC,QAAQ,CAAC,4BAA4B,EAAE,GAAG,EAAE;QAC1C,KAAK,CAAC,sBAAsB,EAAE,GAAG,EAAE;YACjC,WAAW,CAAC,OAAO,CAAC,CAAC;QACvB,CAAC,CAAC,CAAC;QAEH,KAAK,CAAC,wBAAwB,EAAE,GAAG,EAAE;YACnC,MAAM,KAAK,GAAU,EAAE,CAAC;YACxB,KAAK,CAAC,OAAO,EAAE,UAAU,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,EAAE;gBACvC,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;YACnB,CAAC,CAAC,CAAC;QACL,CAAC,CAAC,CAAC;QAEH,KAAK,CAAC,4BAA4B,EAAE,GAAG,EAAE;YACvC,MAAM,KAAK,GAAU,EAAE,CAAC;YACxB,SAAS,QAAQ,CAAC,IAAS;gBACzB,IAAI,IAAI,CAAC,IAAI,KAAK,UAAU,CAAC,IAAI,EAAE,CAAC;oBAClC,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;gBACnB,CAAC;gBACD,IAAI,IAAI,CAAC,QAAQ,EAAE,CAAC;oBAClB,IAAI,CAAC,QAAQ,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC;gBAClC,CAAC;YACH,CAAC;YACD,QAAQ,CAAC,OAAO,CAAC,CAAC;QACpB,CAAC,CAAC,CAAC;IACL,CAAC,CAAC,CAAC;IAEH,QAAQ,CAAC,6BAA6B,EAAE,GAAG,EAAE;QAC3C,KAAK,CAAC,oCAAoC,EAAE,GAAG,EAAE;YAC/C,YAAY,CAAC,OAAO,CAAC,CAAC;QACxB,CAAC,CAAC,CAAC;QAEH,KAAK,CAAC,sBAAsB,EAAE,GAAG,EAAE;YACjC,WAAW,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC;QAC1B,CAAC,CAAC,CAAC;QAEH,KAAK,CAAC,6BAA6B,EAAE,GAAG,EAAE;YACxC,IAAI,KAAU,CAAC;YACf,SAAS,QAAQ,CAAC,IAAS;gBACzB,IAAI,IAAI,CAAC,IAAI,KAAK,UAAU,CAAC,IAAI,EAAE,CAAC;oBAClC,KAAK,GAAG,IAAI,CAAC;oBACb,OAAO,IAAI,CAAC,CAAC,aAAa;gBAC5B,CAAC;gBACD,IAAI,IAAI,CAAC,QAAQ,EAAE,CAAC;oBAClB,KAAK,MAAM,KAAK,IAAI,IAAI,CAAC,QAAQ,EAAE,CAAC;wBAClC,IAAI,QAAQ,CAAC,KAAK,CAAC;4BAAE,OAAO,IAAI,CAAC;oBACnC,CAAC;gBACH,CAAC;gBACD,OAAO,KAAK,CAAC;YACf,CAAC;YACD,QAAQ,CAAC,OAAO,CAAC,CAAC;QACpB,CAAC,CAAC,CAAC;IACL,CAAC,CAAC,CAAC;IAEH,QAAQ,CAAC,6BAA6B,EAAE,GAAG,EAAE;QAC3C,KAAK,CAAC,iCAAiC,EAAE,GAAG,EAAE;YAC5C,aAAa,CAAC,OAAO,EAAE,EAAE,SAAS,EAAE,CAAC,EAAE,QAAQ,EAAE,CAAC,EAAE,IAAI,EAAE,GAAG,EAAE,CAAC,CAAC;QACnE,CAAC,CAAC,CAAC;QAEH,KAAK,CAAC,kCAAkC,EAAE,GAAG,EAAE;YAC7C,WAAW,CAAC,OAAO,CAAC,CAAC,GAAG,CAAC,CAAC;QAC5B,CAAC,CAAC,CAAC;IACL,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC;AAEH,QAAQ,CAAC,wCAAwC,EAAE,GAAG,EAAE;IACtD,MAAM,MAAM,GAAG,kBAAkB,CAAC,GAAG,CAAC,CAAC;IAEvC,QAAQ,CAAC,mCAAmC,EAAE,GAAG,EAAE;QACjD,KAAK,CAAC,4BAA4B,EAAE,GAAG,EAAE;YACvC,MAAM,KAAK,GAAG,WAAW,CAAC,MAAM,CAAC,CAAC;YAClC,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,QAAQ,EAAE,YAAY,KAAK,MAAM,CAAC,CAAC;QACzD,CAAC,CAAC,CAAC;QAEH,KAAK,CAAC,6BAA6B,EAAE,GAAG,EAAE;YACxC,MAAM,KAAK,GAAU,EAAE,CAAC;YACxB,KAAK,CAAC,MAAM,EAAE,UAAU,CAAC,IAAI,EAAE,CAAC,IAAS,EAAE,EAAE;gBAC3C,IAAI,IAAI,CAAC,QAAQ,EAAE,YAAY,KAAK,MAAM,EAAE,CAAC;oBAC3C,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;gBACnB,CAAC;YACH,CAAC,CAAC,CAAC;QACL,CAAC,CAAC,CAAC;IACL,CAAC,CAAC,CAAC;IAEH,QAAQ,CAAC,4BAA4B,EAAE,GAAG,EAAE;QAC1C,MAAM,KAAK,GAAG,WAAW,CAAC,MAAM,CAAC,CAAC;QAElC,KAAK,CAAC,qBAAqB,EAAE,GAAG,EAAE;YAChC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE;gBACZ,MAAM,QAAQ,GAAG,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC;gBAC/B,OAAO,QAAQ,IAAI,OAAO,IAAI,QAAQ,CAAC,CAAC,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,CAAC;YAC/D,CAAC,CAAC,CAAC;QACL,CAAC,CAAC,CAAC;QAEH,KAAK,CAAC,iBAAiB,EAAE,GAAG,EAAE;YAC5B,KAAK,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE;gBACtB,MAAM,QAAQ,GAAG,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC;gBAC/B,IAAI,QAAQ,IAAI,OAAO,IAAI,QAAQ,EAAE,CAAC;oBACpC,GAAG,CAAC,IAAI,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC;gBAC3B,CAAC;gBACD,OAAO,GAAG,CAAC;YACb,CAAC,EAAE,EAAc,CAAC,CAAC;QACrB,CAAC,CAAC,CAAC;IACL,CAAC,CAAC,CAAC;IAEH,QAAQ,CAAC,+BAA+B,EAAE,GAAG,EAAE;QAC7C,MAAM,KAAK,GAAG,WAAW,CAAC,MAAM,CAAC,CAAC;QAElC,KAAK,CAAC,oBAAoB,EAAE,GAAG,EAAE;YAC/B,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,aAAa,KAAK,SAAS,CAAC,CAAC,MAAM,CAAC;QAC1D,CAAC,CAAC,CAAC;QAEH,KAAK,CAAC,qBAAqB,EAAE,GAAG,EAAE;YAChC,KAAK,CAAC,MAAM,CAAC,CAAC,KAAK,EAAE,CAAC,EAAE,EAAE,CAAC,KAAK,GAAG,CAAC,CAAC,CAAC,aAAa,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC;QACnE,CAAC,CAAC,CAAC;IACL,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC"}
@@ -0,0 +1,8 @@
1
+ #!/usr/bin/env node
2
+ /**
3
+ * GLOST Migration CLI
4
+ *
5
+ * Command-line interface for migrating GLOST documents
6
+ */
7
+ export {};
8
+ //# sourceMappingURL=migrate.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"migrate.d.ts","sourceRoot":"","sources":["../../src/cli/migrate.ts"],"names":[],"mappings":";AACA;;;;GAIG"}