glost-core 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +63 -0
- package/LICENSE +21 -0
- package/README.md +199 -0
- package/dist/__benchmarks__/document-creation.bench.d.ts +7 -0
- package/dist/__benchmarks__/document-creation.bench.d.ts.map +1 -0
- package/dist/__benchmarks__/document-creation.bench.js +71 -0
- package/dist/__benchmarks__/document-creation.bench.js.map +1 -0
- package/dist/__benchmarks__/traversal.bench.d.ts +7 -0
- package/dist/__benchmarks__/traversal.bench.d.ts.map +1 -0
- package/dist/__benchmarks__/traversal.bench.js +124 -0
- package/dist/__benchmarks__/traversal.bench.js.map +1 -0
- package/dist/cli/migrate.d.ts +8 -0
- package/dist/cli/migrate.d.ts.map +1 -0
- package/dist/cli/migrate.js +229 -0
- package/dist/cli/migrate.js.map +1 -0
- package/dist/errors.d.ts +168 -0
- package/dist/errors.d.ts.map +1 -0
- package/dist/errors.js +300 -0
- package/dist/errors.js.map +1 -0
- package/dist/guards.d.ts +103 -0
- package/dist/guards.d.ts.map +1 -0
- package/dist/guards.js +264 -0
- package/dist/guards.js.map +1 -0
- package/dist/index.d.ts +9 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +25 -0
- package/dist/index.js.map +1 -0
- package/dist/nodes.d.ts +227 -0
- package/dist/nodes.d.ts.map +1 -0
- package/dist/nodes.js +243 -0
- package/dist/nodes.js.map +1 -0
- package/dist/types.d.ts +442 -0
- package/dist/types.d.ts.map +1 -0
- package/dist/types.js +51 -0
- package/dist/types.js.map +1 -0
- package/dist/utils.d.ts +247 -0
- package/dist/utils.d.ts.map +1 -0
- package/dist/utils.js +564 -0
- package/dist/utils.js.map +1 -0
- package/dist/validators.d.ts +1876 -0
- package/dist/validators.d.ts.map +1 -0
- package/dist/validators.js +302 -0
- package/dist/validators.js.map +1 -0
- package/package.json +73 -0
- package/src/__benchmarks__/document-creation.bench.ts +92 -0
- package/src/__benchmarks__/traversal.bench.ts +152 -0
- package/src/__tests__/README.md +20 -0
- package/src/__tests__/example.test.ts +43 -0
- package/src/__tests__/example.ts +186 -0
- package/src/__tests__/helpers.test.ts +178 -0
- package/src/__tests__/mock-data.ts +624 -0
- package/src/__tests__/performance.test.ts +317 -0
- package/src/__tests__/traversal.test.ts +170 -0
- package/src/cli/migrate.ts +294 -0
- package/src/errors.ts +394 -0
- package/src/guards.ts +341 -0
- package/src/index.ts +69 -0
- package/src/nodes.ts +409 -0
- package/src/types.ts +633 -0
- package/src/utils.ts +730 -0
- package/src/validators.ts +336 -0
- package/tsconfig.json +9 -0
package/CHANGELOG.md
ADDED
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
# glost
|
|
2
|
+
|
|
3
|
+
## 0.4.0
|
|
4
|
+
|
|
5
|
+
### Major Changes
|
|
6
|
+
|
|
7
|
+
- **ESM Imports Fixed**: Added `.js` extensions to all barrel exports for proper Node.js ESM support
|
|
8
|
+
- **BCP-47 Language Standard**: All language codes now use BCP-47 format (e.g., `en-US`, `th-TH`)
|
|
9
|
+
- **Transcription Schema Cleanup**: Removed redundant `system` field from TranscriptionInfo type
|
|
10
|
+
- **Translation API Renamed**: Changed `sourceLanguage/targetLanguage` to `from/to` for clarity
|
|
11
|
+
|
|
12
|
+
### Breaking Changes
|
|
13
|
+
|
|
14
|
+
- Language codes standardized on BCP-47 (use migration CLI: `npx glost migrate v0.3-to-v0.4 ./src`)
|
|
15
|
+
- Translation extension API parameter names changed
|
|
16
|
+
- Transcription schema simplified
|
|
17
|
+
|
|
18
|
+
### New Features
|
|
19
|
+
|
|
20
|
+
- **Typed Extras**: Extensions can augment `GLOSTExtras` interface via declaration merging for full type safety
|
|
21
|
+
- **Standard Provider Interface**: New `GLOSTDataProvider<TInput, TOutput>` with batch processing and caching support
|
|
22
|
+
- **Better Error Messages**: Comprehensive error classes with context, suggestions, and documentation links
|
|
23
|
+
- **Language Utilities**: `normalizeLanguageCode()`, `matchLanguage()`, `parseLanguageCode()`, `findBestMatch()`, etc.
|
|
24
|
+
- **Migration CLI**: Built-in `npx glost migrate` tool for automated upgrades
|
|
25
|
+
|
|
26
|
+
### Patch Changes
|
|
27
|
+
|
|
28
|
+
- Updated dependencies
|
|
29
|
+
|
|
30
|
+
## 0.3.0
|
|
31
|
+
|
|
32
|
+
### Minor Changes
|
|
33
|
+
|
|
34
|
+
- Externalized language-specific helpers and data-dependent extensions into dedicated packages
|
|
35
|
+
- Extensions now require explicit data providers instead of fallback data
|
|
36
|
+
|
|
37
|
+
### Patch Changes
|
|
38
|
+
|
|
39
|
+
- Updated dependencies
|
|
40
|
+
|
|
41
|
+
## 0.2.0
|
|
42
|
+
|
|
43
|
+
### Breaking Changes
|
|
44
|
+
|
|
45
|
+
- Removed `createThaiWord()` and `createJapaneseWord()` - moved to `glost-th` and `glost-ja` packages
|
|
46
|
+
- Language-specific helper functions externalized to dedicated packages
|
|
47
|
+
|
|
48
|
+
### Internal Changes
|
|
49
|
+
|
|
50
|
+
- Fixed circular dependencies
|
|
51
|
+
- Reduced package size by ~30%
|
|
52
|
+
- Improved build performance and modularity
|
|
53
|
+
|
|
54
|
+
### Patch Changes
|
|
55
|
+
|
|
56
|
+
- Updated dependencies
|
|
57
|
+
|
|
58
|
+
## 0.1.1
|
|
59
|
+
|
|
60
|
+
### Patch Changes
|
|
61
|
+
|
|
62
|
+
- Fix package names
|
|
63
|
+
- Updated dependencies
|
package/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 GLOST Contributors
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
package/README.md
ADDED
|
@@ -0,0 +1,199 @@
|
|
|
1
|
+
# glost
|
|
2
|
+
|
|
3
|
+
Core types and node creation for GLOST (Glossed Syntax Tree).
|
|
4
|
+
|
|
5
|
+
## What is GLOST?
|
|
6
|
+
|
|
7
|
+
GLOST (Glossed Syntax Tree) is a Concrete Syntax Tree format that extends nlcst to support language learning annotations:
|
|
8
|
+
|
|
9
|
+
- **Translations and glosses** in multiple languages
|
|
10
|
+
- **Difficulty levels** and word frequency data
|
|
11
|
+
- **Pronunciation guides** (IPA, romanization, transcription systems)
|
|
12
|
+
- **Cultural context** and usage notes
|
|
13
|
+
- **Part-of-speech** tagging
|
|
14
|
+
- **Grammar metadata** for language learners
|
|
15
|
+
|
|
16
|
+
## Installation
|
|
17
|
+
|
|
18
|
+
```bash
|
|
19
|
+
pnpm add glost
|
|
20
|
+
```
|
|
21
|
+
|
|
22
|
+
## Usage
|
|
23
|
+
|
|
24
|
+
### Simple Document Creation (Recommended)
|
|
25
|
+
|
|
26
|
+
```typescript
|
|
27
|
+
import { createSimpleDocument, getAllWords, NODE_TYPES } from "glost";
|
|
28
|
+
import type { GLOSTWord, GLOSTRoot } from "glost";
|
|
29
|
+
|
|
30
|
+
// Create a simple document from words
|
|
31
|
+
const words = [
|
|
32
|
+
createGLOSTWordNode({ value: "hello", lang: "en", script: "latin" }),
|
|
33
|
+
createGLOSTWordNode({ value: "world", lang: "en", script: "latin" })
|
|
34
|
+
];
|
|
35
|
+
|
|
36
|
+
const document = createSimpleDocument(words, "en", "latin", {
|
|
37
|
+
sentenceText: "hello world"
|
|
38
|
+
});
|
|
39
|
+
|
|
40
|
+
// Access words with type-safe helpers
|
|
41
|
+
const allWords = getAllWords(document);
|
|
42
|
+
console.log(allWords.length); // 2
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
### Manual Word Creation
|
|
46
|
+
|
|
47
|
+
```typescript
|
|
48
|
+
import { createGLOSTWordNode, createGLOSTRootNode } from "glost";
|
|
49
|
+
import type { GLOSTWord, GLOSTRoot } from "glost";
|
|
50
|
+
|
|
51
|
+
// Create a word node with annotations
|
|
52
|
+
// Language codes: ISO-639-1, ISO-639-3, or BCP-47 all work
|
|
53
|
+
const word = createGLOSTWordNode({
|
|
54
|
+
value: "สวัสดี", // Thai: hello
|
|
55
|
+
transcription: {
|
|
56
|
+
rtgs: { text: "sà-wàt-dii", system: "rtgs" },
|
|
57
|
+
ipa: { text: "sa.wàt.diː", system: "ipa" }
|
|
58
|
+
},
|
|
59
|
+
metadata: {
|
|
60
|
+
partOfSpeech: "interjection",
|
|
61
|
+
usage: "greeting"
|
|
62
|
+
},
|
|
63
|
+
lang: "th", // Can also use "tha" (ISO-639-3) or "th-TH" (BCP-47)
|
|
64
|
+
script: "thai"
|
|
65
|
+
});
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
## API
|
|
69
|
+
|
|
70
|
+
### Node Factory Functions
|
|
71
|
+
|
|
72
|
+
All factory functions accept a single options object for better readability and extensibility.
|
|
73
|
+
|
|
74
|
+
#### `createGLOSTWordNode(options)`
|
|
75
|
+
|
|
76
|
+
Create a word node with transcription and metadata.
|
|
77
|
+
|
|
78
|
+
```typescript
|
|
79
|
+
const word = createGLOSTWordNode({
|
|
80
|
+
value: "hello",
|
|
81
|
+
transcription: { ipa: { text: "həˈloʊ", system: "ipa" } },
|
|
82
|
+
metadata: { partOfSpeech: "interjection" },
|
|
83
|
+
lang: "en", // optional
|
|
84
|
+
script: "latin", // optional
|
|
85
|
+
extras: {} // optional extension data
|
|
86
|
+
});
|
|
87
|
+
```
|
|
88
|
+
|
|
89
|
+
#### `createGLOSTSentenceNode(options)`
|
|
90
|
+
|
|
91
|
+
Create a sentence node containing word nodes.
|
|
92
|
+
|
|
93
|
+
```typescript
|
|
94
|
+
const sentence = createGLOSTSentenceNode({
|
|
95
|
+
originalText: "Hello world",
|
|
96
|
+
lang: "en",
|
|
97
|
+
script: "latin",
|
|
98
|
+
children: [wordNode1, wordNode2], // optional
|
|
99
|
+
transcription: {}, // optional
|
|
100
|
+
extras: {} // optional
|
|
101
|
+
});
|
|
102
|
+
```
|
|
103
|
+
|
|
104
|
+
#### `createGLOSTRootNode(options)`
|
|
105
|
+
|
|
106
|
+
Create a root document node.
|
|
107
|
+
|
|
108
|
+
```typescript
|
|
109
|
+
const root = createGLOSTRootNode({
|
|
110
|
+
lang: "en",
|
|
111
|
+
script: "latin",
|
|
112
|
+
children: [paragraphNode], // optional
|
|
113
|
+
metadata: { title: "My Document" }, // optional
|
|
114
|
+
extras: {} // optional
|
|
115
|
+
});
|
|
116
|
+
```
|
|
117
|
+
|
|
118
|
+
### Helper Functions
|
|
119
|
+
|
|
120
|
+
Convenience functions for common language patterns:
|
|
121
|
+
|
|
122
|
+
#### `createSimpleWord(options)`
|
|
123
|
+
|
|
124
|
+
```typescript
|
|
125
|
+
const word = createSimpleWord({
|
|
126
|
+
text: "hello",
|
|
127
|
+
transliteration: "həˈloʊ",
|
|
128
|
+
system: "ipa", // default: "ipa"
|
|
129
|
+
partOfSpeech: "noun" // default: "unknown"
|
|
130
|
+
});
|
|
131
|
+
```
|
|
132
|
+
|
|
133
|
+
### Language-Specific Helpers
|
|
134
|
+
|
|
135
|
+
**Note:** As of v0.2.0, language-specific helpers have been moved to separate packages.
|
|
136
|
+
|
|
137
|
+
#### Thai Language Support
|
|
138
|
+
|
|
139
|
+
```bash
|
|
140
|
+
npm install glost-th
|
|
141
|
+
```
|
|
142
|
+
|
|
143
|
+
```typescript
|
|
144
|
+
import { createThaiWord } from 'glost-th';
|
|
145
|
+
|
|
146
|
+
const word = createThaiWord({
|
|
147
|
+
text: "สวัสดี",
|
|
148
|
+
rtgs: "sawatdi",
|
|
149
|
+
partOfSpeech: "interjection",
|
|
150
|
+
tone: 2,
|
|
151
|
+
syllables: ["sa", "wat", "di"]
|
|
152
|
+
});
|
|
153
|
+
```
|
|
154
|
+
|
|
155
|
+
See [glost-th documentation](../languages/th/README.md).
|
|
156
|
+
|
|
157
|
+
#### Japanese Language Support
|
|
158
|
+
|
|
159
|
+
```bash
|
|
160
|
+
npm install glost-ja
|
|
161
|
+
```
|
|
162
|
+
|
|
163
|
+
```typescript
|
|
164
|
+
import { createJapaneseWord } from 'glost-ja';
|
|
165
|
+
|
|
166
|
+
const word = createJapaneseWord({
|
|
167
|
+
text: "こんにちは",
|
|
168
|
+
romaji: "konnichiwa",
|
|
169
|
+
partOfSpeech: "interjection",
|
|
170
|
+
furigana: "こんにちは"
|
|
171
|
+
});
|
|
172
|
+
```
|
|
173
|
+
|
|
174
|
+
See [glost-ja documentation](../languages/ja/README.md).
|
|
175
|
+
|
|
176
|
+
**Migration:** See [MIGRATION.md](../../MIGRATION.md) for upgrading from v0.1.x.
|
|
177
|
+
|
|
178
|
+
## Features
|
|
179
|
+
|
|
180
|
+
- TypeScript support
|
|
181
|
+
- Extends nlcst (Natural Language Concrete Syntax Tree)
|
|
182
|
+
- Aims for compatibility with unist ecosystem
|
|
183
|
+
- Framework-agnostic
|
|
184
|
+
- Includes Zod validation schemas
|
|
185
|
+
|
|
186
|
+
## Related Packages
|
|
187
|
+
|
|
188
|
+
### Core Packages
|
|
189
|
+
- `glost-common` - Shared utilities and language configs
|
|
190
|
+
- `glost-extensions` - Extension system for transforming GLOST trees
|
|
191
|
+
- `glost-utils` - Utilities for working with GLOST documents
|
|
192
|
+
|
|
193
|
+
### Language Packages
|
|
194
|
+
- `glost-th` - Thai language support
|
|
195
|
+
- `glost-ja` - Japanese language support
|
|
196
|
+
|
|
197
|
+
## Documentation
|
|
198
|
+
|
|
199
|
+
See the main GLOST repository for full documentation.
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"document-creation.bench.d.ts","sourceRoot":"","sources":["../../src/__benchmarks__/document-creation.bench.ts"],"names":[],"mappings":"AAAA;;;;GAIG"}
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Document Creation Performance Benchmarks
|
|
3
|
+
*
|
|
4
|
+
* Compares different approaches to creating GLOST documents
|
|
5
|
+
*/
|
|
6
|
+
import { bench, describe } from 'vitest';
|
|
7
|
+
import { createSimpleDocument, createGLOSTWordNode, createDocumentFromSentences, createSentenceFromWords, createParagraphFromSentences, createDocumentFromParagraphs, } from '../index.js';
|
|
8
|
+
describe('Document Creation: Comparing Approaches', () => {
|
|
9
|
+
// Setup: Create test data once
|
|
10
|
+
const words100 = Array.from({ length: 100 }, (_, i) => createGLOSTWordNode({ value: `word${i}` }));
|
|
11
|
+
describe('Compare: Simple vs Full Hierarchy (100 words)', () => {
|
|
12
|
+
bench('createSimpleDocument (recommended)', () => {
|
|
13
|
+
createSimpleDocument(words100, "en", "latin");
|
|
14
|
+
});
|
|
15
|
+
bench('createDocumentFromSentences', () => {
|
|
16
|
+
const sentence = createSentenceFromWords(words100, "en", "latin", "text");
|
|
17
|
+
createDocumentFromSentences([sentence], "en", "latin");
|
|
18
|
+
});
|
|
19
|
+
bench('createDocumentFromParagraphs (full hierarchy)', () => {
|
|
20
|
+
const sentence = createSentenceFromWords(words100, "en", "latin", "text");
|
|
21
|
+
const paragraph = createParagraphFromSentences([sentence]);
|
|
22
|
+
createDocumentFromParagraphs([paragraph], "en", "latin");
|
|
23
|
+
});
|
|
24
|
+
});
|
|
25
|
+
describe('Compare: Word Node Creation Approaches', () => {
|
|
26
|
+
bench('minimal word node', () => {
|
|
27
|
+
createGLOSTWordNode({ value: "test" });
|
|
28
|
+
});
|
|
29
|
+
bench('word with transcription', () => {
|
|
30
|
+
createGLOSTWordNode({
|
|
31
|
+
value: "test",
|
|
32
|
+
transcription: {
|
|
33
|
+
ipa: { text: "test", syllables: ["test"] }
|
|
34
|
+
}
|
|
35
|
+
});
|
|
36
|
+
});
|
|
37
|
+
bench('word with full metadata', () => {
|
|
38
|
+
createGLOSTWordNode({
|
|
39
|
+
value: "test",
|
|
40
|
+
transcription: {
|
|
41
|
+
ipa: { text: "test", syllables: ["test"] }
|
|
42
|
+
},
|
|
43
|
+
metadata: {
|
|
44
|
+
partOfSpeech: "noun",
|
|
45
|
+
meaning: "a test",
|
|
46
|
+
usage: "testing"
|
|
47
|
+
},
|
|
48
|
+
lang: "en",
|
|
49
|
+
script: "latin",
|
|
50
|
+
extras: {
|
|
51
|
+
translations: { th: "ทดสอบ" },
|
|
52
|
+
metadata: { frequency: "common" }
|
|
53
|
+
}
|
|
54
|
+
});
|
|
55
|
+
});
|
|
56
|
+
});
|
|
57
|
+
describe('Compare: Sentence Creation Strategies', () => {
|
|
58
|
+
const words10 = Array.from({ length: 10 }, (_, i) => createGLOSTWordNode({ value: `word${i}` }));
|
|
59
|
+
bench('createDocumentFromSentences - multiple small sentences', () => {
|
|
60
|
+
const sentences = Array.from({ length: 10 }, (_, i) => {
|
|
61
|
+
const sentenceWords = words10.slice(i, i + 5);
|
|
62
|
+
return createSentenceFromWords(sentenceWords, "en", "latin", `Sentence ${i}`);
|
|
63
|
+
});
|
|
64
|
+
createDocumentFromSentences(sentences, "en", "latin");
|
|
65
|
+
});
|
|
66
|
+
bench('createSimpleDocument - single sentence', () => {
|
|
67
|
+
createSimpleDocument(words10, "en", "latin");
|
|
68
|
+
});
|
|
69
|
+
});
|
|
70
|
+
});
|
|
71
|
+
//# sourceMappingURL=document-creation.bench.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"document-creation.bench.js","sourceRoot":"","sources":["../../src/__benchmarks__/document-creation.bench.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,OAAO,EAAE,KAAK,EAAE,QAAQ,EAAE,MAAM,QAAQ,CAAC;AACzC,OAAO,EACL,oBAAoB,EACpB,mBAAmB,EACnB,2BAA2B,EAC3B,uBAAuB,EACvB,4BAA4B,EAC5B,4BAA4B,GAC7B,MAAM,aAAa,CAAC;AAErB,QAAQ,CAAC,yCAAyC,EAAE,GAAG,EAAE;IACvD,+BAA+B;IAC/B,MAAM,QAAQ,GAAG,KAAK,CAAC,IAAI,CAAC,EAAE,MAAM,EAAE,GAAG,EAAE,EAAE,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CACpD,mBAAmB,CAAC,EAAE,KAAK,EAAE,OAAO,CAAC,EAAE,EAAE,CAAC,CAC3C,CAAC;IAEF,QAAQ,CAAC,+CAA+C,EAAE,GAAG,EAAE;QAC7D,KAAK,CAAC,oCAAoC,EAAE,GAAG,EAAE;YAC/C,oBAAoB,CAAC,QAAQ,EAAE,IAAI,EAAE,OAAO,CAAC,CAAC;QAChD,CAAC,CAAC,CAAC;QAEH,KAAK,CAAC,6BAA6B,EAAE,GAAG,EAAE;YACxC,MAAM,QAAQ,GAAG,uBAAuB,CAAC,QAAQ,EAAE,IAAI,EAAE,OAAO,EAAE,MAAM,CAAC,CAAC;YAC1E,2BAA2B,CAAC,CAAC,QAAQ,CAAC,EAAE,IAAI,EAAE,OAAO,CAAC,CAAC;QACzD,CAAC,CAAC,CAAC;QAEH,KAAK,CAAC,+CAA+C,EAAE,GAAG,EAAE;YAC1D,MAAM,QAAQ,GAAG,uBAAuB,CAAC,QAAQ,EAAE,IAAI,EAAE,OAAO,EAAE,MAAM,CAAC,CAAC;YAC1E,MAAM,SAAS,GAAG,4BAA4B,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC;YAC3D,4BAA4B,CAAC,CAAC,SAAS,CAAC,EAAE,IAAI,EAAE,OAAO,CAAC,CAAC;QAC3D,CAAC,CAAC,CAAC;IACL,CAAC,CAAC,CAAC;IAEH,QAAQ,CAAC,wCAAwC,EAAE,GAAG,EAAE;QACtD,KAAK,CAAC,mBAAmB,EAAE,GAAG,EAAE;YAC9B,mBAAmB,CAAC,EAAE,KAAK,EAAE,MAAM,EAAE,CAAC,CAAC;QACzC,CAAC,CAAC,CAAC;QAEH,KAAK,CAAC,yBAAyB,EAAE,GAAG,EAAE;YACpC,mBAAmB,CAAC;gBAClB,KAAK,EAAE,MAAM;gBACb,aAAa,EAAE;oBACb,GAAG,EAAE,EAAE,IAAI,EAAE,MAAM,EAAE,SAAS,EAAE,CAAC,MAAM,CAAC,EAAE;iBAC3C;aACF,CAAC,CAAC;QACL,CAAC,CAAC,CAAC;QAEH,KAAK,CAAC,yBAAyB,EAAE,GAAG,EAAE;YACpC,mBAAmB,CAAC;gBAClB,KAAK,EAAE,MAAM;gBACb,aAAa,EAAE;oBACb,GAAG,EAAE,EAAE,IAAI,EAAE,MAAM,EAAE,SAAS,EAAE,CAAC,MAAM,CAAC,EAAE;iBAC3C;gBACD,QAAQ,EAAE;oBACR,YAAY,EAAE,MAAM;oBACpB,OAAO,EAAE,QAAQ;oBACjB,KAAK,EAAE,SAAS;iBACjB;gBACD,IAAI,EAAE,IAAI;gBACV,MAAM,EAAE,OAAO;gBACf,MAAM,EAAE;oBACN,YAAY,EAAE,EAAE,EAAE,EAAE,OAAO,EAAE;oBAC7B,QAAQ,EAAE,EAAE,SAAS,EAAE,QAAQ,EAAE;iBAClC;aACF,CAAC,CAAC;QACL,CAAC,CAAC,CAAC;IACL,CAAC,CAAC,CAAC;IAEH,QAAQ,CAAC,uCAAuC,EAAE,GAAG,EAAE;QACrD,MAAM,OAAO,GAAG,KAAK,CAAC,IAAI,CAAC,EAAE,MAAM,EAAE,EAAE,EAAE,EAAE,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAClD,mBAAmB,CAAC,EAAE,KAAK,EAAE,OAAO,CAAC,EAAE,EAAE,CAAC,CAC3C,CAAC;QAEF,KAAK,CAAC,wDAAwD,EAAE,GAAG,EAAE;YACnE,MAAM,SAAS,GAAG,KAAK,CAAC,IAAI,CAAC,EAAE,MAAM,EAAE,EAAE,EAAE,EAAE,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE;gBACpD,MAAM,aAAa,GAAG,OAAO,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC;gBAC9C,OAAO,uBAAuB,CAAC,aAAa,EAAE,IAAI,EAAE,OAAO,EAAE,YAAY,CAAC,EAAE,CAAC,CAAC;YAChF,CAAC,CAAC,CAAC;YACH,2BAA2B,CAAC,SAAS,EAAE,IAAI,EAAE,OAAO,CAAC,CAAC;QACxD,CAAC,CAAC,CAAC;QAEH,KAAK,CAAC,wCAAwC,EAAE,GAAG,EAAE;YACnD,oBAAoB,CAAC,OAAO,EAAE,IAAI,EAAE,OAAO,CAAC,CAAC;QAC/C,CAAC,CAAC,CAAC;IACL,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"traversal.bench.d.ts","sourceRoot":"","sources":["../../src/__benchmarks__/traversal.bench.ts"],"names":[],"mappings":"AAAA;;;;GAIG"}
|
|
@@ -0,0 +1,124 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Document Traversal Performance Benchmarks
|
|
3
|
+
*
|
|
4
|
+
* Compares different approaches to traversing and accessing GLOST documents
|
|
5
|
+
*/
|
|
6
|
+
import { bench, describe } from 'vitest';
|
|
7
|
+
import { createSimpleDocument, createGLOSTWordNode, getAllWords, getFirstWord, getWordAtPath, } from '../index.js';
|
|
8
|
+
import { visit } from 'unist-util-visit';
|
|
9
|
+
import { NODE_TYPES } from '../types.js';
|
|
10
|
+
// Helper to create test documents
|
|
11
|
+
function createTestDocument(wordCount) {
|
|
12
|
+
const words = Array.from({ length: wordCount }, (_, i) => createGLOSTWordNode({
|
|
13
|
+
value: `word${i}`,
|
|
14
|
+
transcription: {
|
|
15
|
+
ipa: { text: `wɜːrd${i}`, syllables: [`word${i}`] }
|
|
16
|
+
},
|
|
17
|
+
metadata: { partOfSpeech: i % 2 === 0 ? "noun" : "verb" }
|
|
18
|
+
}));
|
|
19
|
+
return createSimpleDocument(words, "en", "latin");
|
|
20
|
+
}
|
|
21
|
+
describe('Traversal: Comparing Access Patterns', () => {
|
|
22
|
+
const doc1000 = createTestDocument(1000);
|
|
23
|
+
describe('Compare: Getting All Words', () => {
|
|
24
|
+
bench('getAllWords (helper)', () => {
|
|
25
|
+
getAllWords(doc1000);
|
|
26
|
+
});
|
|
27
|
+
bench('visit with type filter', () => {
|
|
28
|
+
const words = [];
|
|
29
|
+
visit(doc1000, NODE_TYPES.WORD, (node) => {
|
|
30
|
+
words.push(node);
|
|
31
|
+
});
|
|
32
|
+
});
|
|
33
|
+
bench('manual recursive traversal', () => {
|
|
34
|
+
const words = [];
|
|
35
|
+
function traverse(node) {
|
|
36
|
+
if (node.type === NODE_TYPES.WORD) {
|
|
37
|
+
words.push(node);
|
|
38
|
+
}
|
|
39
|
+
if (node.children) {
|
|
40
|
+
node.children.forEach(traverse);
|
|
41
|
+
}
|
|
42
|
+
}
|
|
43
|
+
traverse(doc1000);
|
|
44
|
+
});
|
|
45
|
+
});
|
|
46
|
+
describe('Compare: Finding First Word', () => {
|
|
47
|
+
bench('getFirstWord (optimized with SKIP)', () => {
|
|
48
|
+
getFirstWord(doc1000);
|
|
49
|
+
});
|
|
50
|
+
bench('getAllWords then [0]', () => {
|
|
51
|
+
getAllWords(doc1000)[0];
|
|
52
|
+
});
|
|
53
|
+
bench('manual early-exit traversal', () => {
|
|
54
|
+
let found;
|
|
55
|
+
function traverse(node) {
|
|
56
|
+
if (node.type === NODE_TYPES.WORD) {
|
|
57
|
+
found = node;
|
|
58
|
+
return true; // exit early
|
|
59
|
+
}
|
|
60
|
+
if (node.children) {
|
|
61
|
+
for (const child of node.children) {
|
|
62
|
+
if (traverse(child))
|
|
63
|
+
return true;
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
return false;
|
|
67
|
+
}
|
|
68
|
+
traverse(doc1000);
|
|
69
|
+
});
|
|
70
|
+
});
|
|
71
|
+
describe('Compare: Direct Path Access', () => {
|
|
72
|
+
bench('getWordAtPath (direct indexing)', () => {
|
|
73
|
+
getWordAtPath(doc1000, { paragraph: 0, sentence: 0, word: 500 });
|
|
74
|
+
});
|
|
75
|
+
bench('getAllWords then filter by index', () => {
|
|
76
|
+
getAllWords(doc1000)[500];
|
|
77
|
+
});
|
|
78
|
+
});
|
|
79
|
+
});
|
|
80
|
+
describe('Traversal: Comparing Filter Strategies', () => {
|
|
81
|
+
const doc500 = createTestDocument(500);
|
|
82
|
+
describe('Compare: Filter by Part of Speech', () => {
|
|
83
|
+
bench('getAllWords + array filter', () => {
|
|
84
|
+
const words = getAllWords(doc500);
|
|
85
|
+
words.filter(w => w.metadata?.partOfSpeech === "noun");
|
|
86
|
+
});
|
|
87
|
+
bench('visit with conditional push', () => {
|
|
88
|
+
const nouns = [];
|
|
89
|
+
visit(doc500, NODE_TYPES.WORD, (node) => {
|
|
90
|
+
if (node.metadata?.partOfSpeech === "noun") {
|
|
91
|
+
nouns.push(node);
|
|
92
|
+
}
|
|
93
|
+
});
|
|
94
|
+
});
|
|
95
|
+
});
|
|
96
|
+
describe('Compare: Extract Word Text', () => {
|
|
97
|
+
const words = getAllWords(doc500);
|
|
98
|
+
bench('map with type guard', () => {
|
|
99
|
+
words.map(w => {
|
|
100
|
+
const textNode = w.children[0];
|
|
101
|
+
return textNode && 'value' in textNode ? textNode.value : '';
|
|
102
|
+
});
|
|
103
|
+
});
|
|
104
|
+
bench('reduce to array', () => {
|
|
105
|
+
words.reduce((acc, w) => {
|
|
106
|
+
const textNode = w.children[0];
|
|
107
|
+
if (textNode && 'value' in textNode) {
|
|
108
|
+
acc.push(textNode.value);
|
|
109
|
+
}
|
|
110
|
+
return acc;
|
|
111
|
+
}, []);
|
|
112
|
+
});
|
|
113
|
+
});
|
|
114
|
+
describe('Compare: Count with Condition', () => {
|
|
115
|
+
const words = getAllWords(doc500);
|
|
116
|
+
bench('filter then length', () => {
|
|
117
|
+
words.filter(w => w.transcription !== undefined).length;
|
|
118
|
+
});
|
|
119
|
+
bench('reduce with counter', () => {
|
|
120
|
+
words.reduce((count, w) => count + (w.transcription ? 1 : 0), 0);
|
|
121
|
+
});
|
|
122
|
+
});
|
|
123
|
+
});
|
|
124
|
+
//# sourceMappingURL=traversal.bench.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"traversal.bench.js","sourceRoot":"","sources":["../../src/__benchmarks__/traversal.bench.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,OAAO,EAAE,KAAK,EAAE,QAAQ,EAAE,MAAM,QAAQ,CAAC;AACzC,OAAO,EACL,oBAAoB,EACpB,mBAAmB,EACnB,WAAW,EACX,YAAY,EACZ,aAAa,GAGd,MAAM,aAAa,CAAC;AACrB,OAAO,EAAE,KAAK,EAAE,MAAM,kBAAkB,CAAC;AACzC,OAAO,EAAE,UAAU,EAAE,MAAM,aAAa,CAAC;AAEzC,kCAAkC;AAClC,SAAS,kBAAkB,CAAC,SAAiB;IAC3C,MAAM,KAAK,GAAG,KAAK,CAAC,IAAI,CAAC,EAAE,MAAM,EAAE,SAAS,EAAE,EAAE,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CACvD,mBAAmB,CAAC;QAClB,KAAK,EAAE,OAAO,CAAC,EAAE;QACjB,aAAa,EAAE;YACb,GAAG,EAAE,EAAE,IAAI,EAAE,QAAQ,CAAC,EAAE,EAAE,SAAS,EAAE,CAAC,OAAO,CAAC,EAAE,CAAC,EAAE;SACpD;QACD,QAAQ,EAAE,EAAE,YAAY,EAAE,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,MAAM,EAAE;KAC1D,CAAC,CACH,CAAC;IACF,OAAO,oBAAoB,CAAC,KAAK,EAAE,IAAI,EAAE,OAAO,CAAC,CAAC;AACpD,CAAC;AAED,QAAQ,CAAC,sCAAsC,EAAE,GAAG,EAAE;IACpD,MAAM,OAAO,GAAG,kBAAkB,CAAC,IAAI,CAAC,CAAC;IAEzC,QAAQ,CAAC,4BAA4B,EAAE,GAAG,EAAE;QAC1C,KAAK,CAAC,sBAAsB,EAAE,GAAG,EAAE;YACjC,WAAW,CAAC,OAAO,CAAC,CAAC;QACvB,CAAC,CAAC,CAAC;QAEH,KAAK,CAAC,wBAAwB,EAAE,GAAG,EAAE;YACnC,MAAM,KAAK,GAAU,EAAE,CAAC;YACxB,KAAK,CAAC,OAAO,EAAE,UAAU,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,EAAE;gBACvC,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;YACnB,CAAC,CAAC,CAAC;QACL,CAAC,CAAC,CAAC;QAEH,KAAK,CAAC,4BAA4B,EAAE,GAAG,EAAE;YACvC,MAAM,KAAK,GAAU,EAAE,CAAC;YACxB,SAAS,QAAQ,CAAC,IAAS;gBACzB,IAAI,IAAI,CAAC,IAAI,KAAK,UAAU,CAAC,IAAI,EAAE,CAAC;oBAClC,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;gBACnB,CAAC;gBACD,IAAI,IAAI,CAAC,QAAQ,EAAE,CAAC;oBAClB,IAAI,CAAC,QAAQ,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC;gBAClC,CAAC;YACH,CAAC;YACD,QAAQ,CAAC,OAAO,CAAC,CAAC;QACpB,CAAC,CAAC,CAAC;IACL,CAAC,CAAC,CAAC;IAEH,QAAQ,CAAC,6BAA6B,EAAE,GAAG,EAAE;QAC3C,KAAK,CAAC,oCAAoC,EAAE,GAAG,EAAE;YAC/C,YAAY,CAAC,OAAO,CAAC,CAAC;QACxB,CAAC,CAAC,CAAC;QAEH,KAAK,CAAC,sBAAsB,EAAE,GAAG,EAAE;YACjC,WAAW,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC;QAC1B,CAAC,CAAC,CAAC;QAEH,KAAK,CAAC,6BAA6B,EAAE,GAAG,EAAE;YACxC,IAAI,KAAU,CAAC;YACf,SAAS,QAAQ,CAAC,IAAS;gBACzB,IAAI,IAAI,CAAC,IAAI,KAAK,UAAU,CAAC,IAAI,EAAE,CAAC;oBAClC,KAAK,GAAG,IAAI,CAAC;oBACb,OAAO,IAAI,CAAC,CAAC,aAAa;gBAC5B,CAAC;gBACD,IAAI,IAAI,CAAC,QAAQ,EAAE,CAAC;oBAClB,KAAK,MAAM,KAAK,IAAI,IAAI,CAAC,QAAQ,EAAE,CAAC;wBAClC,IAAI,QAAQ,CAAC,KAAK,CAAC;4BAAE,OAAO,IAAI,CAAC;oBACnC,CAAC;gBACH,CAAC;gBACD,OAAO,KAAK,CAAC;YACf,CAAC;YACD,QAAQ,CAAC,OAAO,CAAC,CAAC;QACpB,CAAC,CAAC,CAAC;IACL,CAAC,CAAC,CAAC;IAEH,QAAQ,CAAC,6BAA6B,EAAE,GAAG,EAAE;QAC3C,KAAK,CAAC,iCAAiC,EAAE,GAAG,EAAE;YAC5C,aAAa,CAAC,OAAO,EAAE,EAAE,SAAS,EAAE,CAAC,EAAE,QAAQ,EAAE,CAAC,EAAE,IAAI,EAAE,GAAG,EAAE,CAAC,CAAC;QACnE,CAAC,CAAC,CAAC;QAEH,KAAK,CAAC,kCAAkC,EAAE,GAAG,EAAE;YAC7C,WAAW,CAAC,OAAO,CAAC,CAAC,GAAG,CAAC,CAAC;QAC5B,CAAC,CAAC,CAAC;IACL,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC;AAEH,QAAQ,CAAC,wCAAwC,EAAE,GAAG,EAAE;IACtD,MAAM,MAAM,GAAG,kBAAkB,CAAC,GAAG,CAAC,CAAC;IAEvC,QAAQ,CAAC,mCAAmC,EAAE,GAAG,EAAE;QACjD,KAAK,CAAC,4BAA4B,EAAE,GAAG,EAAE;YACvC,MAAM,KAAK,GAAG,WAAW,CAAC,MAAM,CAAC,CAAC;YAClC,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,QAAQ,EAAE,YAAY,KAAK,MAAM,CAAC,CAAC;QACzD,CAAC,CAAC,CAAC;QAEH,KAAK,CAAC,6BAA6B,EAAE,GAAG,EAAE;YACxC,MAAM,KAAK,GAAU,EAAE,CAAC;YACxB,KAAK,CAAC,MAAM,EAAE,UAAU,CAAC,IAAI,EAAE,CAAC,IAAS,EAAE,EAAE;gBAC3C,IAAI,IAAI,CAAC,QAAQ,EAAE,YAAY,KAAK,MAAM,EAAE,CAAC;oBAC3C,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;gBACnB,CAAC;YACH,CAAC,CAAC,CAAC;QACL,CAAC,CAAC,CAAC;IACL,CAAC,CAAC,CAAC;IAEH,QAAQ,CAAC,4BAA4B,EAAE,GAAG,EAAE;QAC1C,MAAM,KAAK,GAAG,WAAW,CAAC,MAAM,CAAC,CAAC;QAElC,KAAK,CAAC,qBAAqB,EAAE,GAAG,EAAE;YAChC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE;gBACZ,MAAM,QAAQ,GAAG,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC;gBAC/B,OAAO,QAAQ,IAAI,OAAO,IAAI,QAAQ,CAAC,CAAC,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,CAAC;YAC/D,CAAC,CAAC,CAAC;QACL,CAAC,CAAC,CAAC;QAEH,KAAK,CAAC,iBAAiB,EAAE,GAAG,EAAE;YAC5B,KAAK,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE;gBACtB,MAAM,QAAQ,GAAG,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC;gBAC/B,IAAI,QAAQ,IAAI,OAAO,IAAI,QAAQ,EAAE,CAAC;oBACpC,GAAG,CAAC,IAAI,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC;gBAC3B,CAAC;gBACD,OAAO,GAAG,CAAC;YACb,CAAC,EAAE,EAAc,CAAC,CAAC;QACrB,CAAC,CAAC,CAAC;IACL,CAAC,CAAC,CAAC;IAEH,QAAQ,CAAC,+BAA+B,EAAE,GAAG,EAAE;QAC7C,MAAM,KAAK,GAAG,WAAW,CAAC,MAAM,CAAC,CAAC;QAElC,KAAK,CAAC,oBAAoB,EAAE,GAAG,EAAE;YAC/B,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,aAAa,KAAK,SAAS,CAAC,CAAC,MAAM,CAAC;QAC1D,CAAC,CAAC,CAAC;QAEH,KAAK,CAAC,qBAAqB,EAAE,GAAG,EAAE;YAChC,KAAK,CAAC,MAAM,CAAC,CAAC,KAAK,EAAE,CAAC,EAAE,EAAE,CAAC,KAAK,GAAG,CAAC,CAAC,CAAC,aAAa,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC;QACnE,CAAC,CAAC,CAAC;IACL,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"migrate.d.ts","sourceRoot":"","sources":["../../src/cli/migrate.ts"],"names":[],"mappings":";AACA;;;;GAIG"}
|