glost-core 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. package/CHANGELOG.md +63 -0
  2. package/LICENSE +21 -0
  3. package/README.md +199 -0
  4. package/dist/__benchmarks__/document-creation.bench.d.ts +7 -0
  5. package/dist/__benchmarks__/document-creation.bench.d.ts.map +1 -0
  6. package/dist/__benchmarks__/document-creation.bench.js +71 -0
  7. package/dist/__benchmarks__/document-creation.bench.js.map +1 -0
  8. package/dist/__benchmarks__/traversal.bench.d.ts +7 -0
  9. package/dist/__benchmarks__/traversal.bench.d.ts.map +1 -0
  10. package/dist/__benchmarks__/traversal.bench.js +124 -0
  11. package/dist/__benchmarks__/traversal.bench.js.map +1 -0
  12. package/dist/cli/migrate.d.ts +8 -0
  13. package/dist/cli/migrate.d.ts.map +1 -0
  14. package/dist/cli/migrate.js +229 -0
  15. package/dist/cli/migrate.js.map +1 -0
  16. package/dist/errors.d.ts +168 -0
  17. package/dist/errors.d.ts.map +1 -0
  18. package/dist/errors.js +300 -0
  19. package/dist/errors.js.map +1 -0
  20. package/dist/guards.d.ts +103 -0
  21. package/dist/guards.d.ts.map +1 -0
  22. package/dist/guards.js +264 -0
  23. package/dist/guards.js.map +1 -0
  24. package/dist/index.d.ts +9 -0
  25. package/dist/index.d.ts.map +1 -0
  26. package/dist/index.js +25 -0
  27. package/dist/index.js.map +1 -0
  28. package/dist/nodes.d.ts +227 -0
  29. package/dist/nodes.d.ts.map +1 -0
  30. package/dist/nodes.js +243 -0
  31. package/dist/nodes.js.map +1 -0
  32. package/dist/types.d.ts +442 -0
  33. package/dist/types.d.ts.map +1 -0
  34. package/dist/types.js +51 -0
  35. package/dist/types.js.map +1 -0
  36. package/dist/utils.d.ts +247 -0
  37. package/dist/utils.d.ts.map +1 -0
  38. package/dist/utils.js +564 -0
  39. package/dist/utils.js.map +1 -0
  40. package/dist/validators.d.ts +1876 -0
  41. package/dist/validators.d.ts.map +1 -0
  42. package/dist/validators.js +302 -0
  43. package/dist/validators.js.map +1 -0
  44. package/package.json +73 -0
  45. package/src/__benchmarks__/document-creation.bench.ts +92 -0
  46. package/src/__benchmarks__/traversal.bench.ts +152 -0
  47. package/src/__tests__/README.md +20 -0
  48. package/src/__tests__/example.test.ts +43 -0
  49. package/src/__tests__/example.ts +186 -0
  50. package/src/__tests__/helpers.test.ts +178 -0
  51. package/src/__tests__/mock-data.ts +624 -0
  52. package/src/__tests__/performance.test.ts +317 -0
  53. package/src/__tests__/traversal.test.ts +170 -0
  54. package/src/cli/migrate.ts +294 -0
  55. package/src/errors.ts +394 -0
  56. package/src/guards.ts +341 -0
  57. package/src/index.ts +69 -0
  58. package/src/nodes.ts +409 -0
  59. package/src/types.ts +633 -0
  60. package/src/utils.ts +730 -0
  61. package/src/validators.ts +336 -0
  62. package/tsconfig.json +9 -0
@@ -0,0 +1,317 @@
1
+ /**
2
+ * Performance Regression Tests
3
+ *
4
+ * Guards against performance regressions by setting baseline
5
+ * performance expectations for common operations.
6
+ *
7
+ * These tests should be run regularly to ensure performance
8
+ * improvements are maintained and no regressions are introduced.
9
+ */
10
+
11
+ import { describe, it, expect } from 'vitest';
12
+ import {
13
+ createSimpleDocument,
14
+ createGLOSTWordNode,
15
+ getAllWords,
16
+ getFirstWord,
17
+ getWordAtPath,
18
+ getAllSentences,
19
+ createSentenceFromWords,
20
+ createDocumentFromSentences,
21
+ } from '../index.js';
22
+
23
+ // Helper to create test document with realistic data
24
+ function createTestDocument(wordCount: number, withMetadata = false) {
25
+ const words = Array.from({ length: wordCount }, (_, i) => {
26
+ const options: any = { value: `word${i}` };
27
+
28
+ if (withMetadata) {
29
+ options.transcription = {
30
+ ipa: { text: `wɜːrd${i}`, syllables: [`word${i}`] }
31
+ };
32
+ options.metadata = {
33
+ partOfSpeech: i % 2 === 0 ? "noun" : "verb",
34
+ meaning: `meaning of word ${i}`,
35
+ };
36
+ }
37
+
38
+ return createGLOSTWordNode(options);
39
+ });
40
+ return createSimpleDocument(words, "en", "latin");
41
+ }
42
+
43
+ describe('Performance Regression Tests', () => {
44
+ describe('Document Creation Performance', () => {
45
+ it('should create 100-word document in under 10ms', () => {
46
+ const start = performance.now();
47
+ const words = Array.from({ length: 100 }, (_, i) =>
48
+ createGLOSTWordNode({ value: `word${i}` })
49
+ );
50
+ createSimpleDocument(words, "en", "latin");
51
+ const duration = performance.now() - start;
52
+
53
+ expect(duration).toBeLessThan(10);
54
+ });
55
+
56
+ it('should create 1000-word document in under 50ms', () => {
57
+ const start = performance.now();
58
+ const words = Array.from({ length: 1000 }, (_, i) =>
59
+ createGLOSTWordNode({ value: `word${i}` })
60
+ );
61
+ createSimpleDocument(words, "en", "latin");
62
+ const duration = performance.now() - start;
63
+
64
+ expect(duration).toBeLessThan(50);
65
+ });
66
+
67
+ it('should create word with full metadata in under 1ms', () => {
68
+ const start = performance.now();
69
+ createGLOSTWordNode({
70
+ value: "test",
71
+ transcription: {
72
+ ipa: { text: "test", syllables: ["test"] }
73
+ },
74
+ metadata: {
75
+ partOfSpeech: "noun",
76
+ meaning: "a test",
77
+ usage: "testing"
78
+ },
79
+ lang: "en",
80
+ script: "latin",
81
+ extras: {
82
+ translations: { th: "ทดสอบ" },
83
+ metadata: { frequency: "common" }
84
+ }
85
+ });
86
+ const duration = performance.now() - start;
87
+
88
+ expect(duration).toBeLessThan(1);
89
+ });
90
+ });
91
+
92
+ describe('Document Traversal Performance', () => {
93
+ it('should traverse 1000-word document in under 20ms', () => {
94
+ const doc = createTestDocument(1000);
95
+
96
+ const start = performance.now();
97
+ getAllWords(doc);
98
+ const duration = performance.now() - start;
99
+
100
+ expect(duration).toBeLessThan(30); // Adjusted for various system capabilities
101
+ });
102
+
103
+ it('should traverse 5000-word document in under 100ms', () => {
104
+ const doc = createTestDocument(5000);
105
+
106
+ const start = performance.now();
107
+ getAllWords(doc);
108
+ const duration = performance.now() - start;
109
+
110
+ expect(duration).toBeLessThan(150); // Adjusted for various system capabilities
111
+ });
112
+
113
+ it('should traverse 10000-word document in under 200ms', () => {
114
+ const doc = createTestDocument(10000);
115
+
116
+ const start = performance.now();
117
+ getAllWords(doc);
118
+ const duration = performance.now() - start;
119
+
120
+ expect(duration).toBeLessThan(200);
121
+ }, 10000); // 10s timeout
122
+
123
+ it('should find first word in 10000-word document instantly', () => {
124
+ const doc = createTestDocument(10000);
125
+
126
+ const start = performance.now();
127
+ getFirstWord(doc);
128
+ const duration = performance.now() - start;
129
+
130
+ // Should be very fast with SKIP optimization
131
+ expect(duration).toBeLessThan(200); // Generous limit accounting for variance
132
+ });
133
+
134
+ it('should access word by path in constant time', () => {
135
+ const doc = createTestDocument(5000);
136
+
137
+ const start = performance.now();
138
+ getWordAtPath(doc, { paragraph: 0, sentence: 0, word: 2500 });
139
+ const duration = performance.now() - start;
140
+
141
+ // Direct access should be O(1)
142
+ expect(duration).toBeLessThan(5);
143
+ });
144
+ });
145
+
146
+ describe('Complex Operations Performance', () => {
147
+ it('should filter 5000 words by POS in under 50ms', () => {
148
+ const doc = createTestDocument(5000, true);
149
+ const words = getAllWords(doc);
150
+
151
+ const start = performance.now();
152
+ words.filter(w => w.metadata?.partOfSpeech === "noun");
153
+ const duration = performance.now() - start;
154
+
155
+ expect(duration).toBeLessThan(50);
156
+ });
157
+
158
+ it('should map 5000 words to text in under 30ms', () => {
159
+ const doc = createTestDocument(5000);
160
+ const words = getAllWords(doc);
161
+
162
+ const start = performance.now();
163
+ words.map(w => w.children[0]?.value);
164
+ const duration = performance.now() - start;
165
+
166
+ expect(duration).toBeLessThan(30);
167
+ });
168
+
169
+ it('should count words with transcription in under 30ms', () => {
170
+ const doc = createTestDocument(5000, true);
171
+ const words = getAllWords(doc);
172
+
173
+ const start = performance.now();
174
+ words.filter(w => w.transcription !== undefined).length;
175
+ const duration = performance.now() - start;
176
+
177
+ expect(duration).toBeLessThan(30);
178
+ });
179
+ });
180
+
181
+ describe('Document Structure Performance', () => {
182
+ it('should create document from 100 sentences in under 50ms', () => {
183
+ const sentences = Array.from({ length: 100 }, (_, i) => {
184
+ const words = Array.from({ length: 5 }, (_, j) =>
185
+ createGLOSTWordNode({ value: `word${i}-${j}` })
186
+ );
187
+ return createSentenceFromWords(words, "en", "latin", `Sentence ${i}`);
188
+ });
189
+
190
+ const start = performance.now();
191
+ createDocumentFromSentences(sentences, "en", "latin");
192
+ const duration = performance.now() - start;
193
+
194
+ expect(duration).toBeLessThan(50);
195
+ });
196
+
197
+ it('should get all sentences from 1000-word document in under 30ms', () => {
198
+ const doc = createTestDocument(1000);
199
+
200
+ const start = performance.now();
201
+ getAllSentences(doc);
202
+ const duration = performance.now() - start;
203
+
204
+ expect(duration).toBeLessThan(30);
205
+ });
206
+ });
207
+
208
+ describe('Memory Efficiency', () => {
209
+ it('should handle repeated document creation without memory buildup', () => {
210
+ const iterations = 100;
211
+
212
+ const start = performance.now();
213
+ for (let i = 0; i < iterations; i++) {
214
+ const words = Array.from({ length: 100 }, (_, j) =>
215
+ createGLOSTWordNode({ value: `word${j}` })
216
+ );
217
+ createSimpleDocument(words, "en", "latin");
218
+ }
219
+ const duration = performance.now() - start;
220
+
221
+ // Average should be reasonable
222
+ const avgDuration = duration / iterations;
223
+ expect(avgDuration).toBeLessThan(10);
224
+ });
225
+
226
+ it('should handle repeated traversal without performance degradation', () => {
227
+ const doc = createTestDocument(1000);
228
+ const iterations = 100;
229
+
230
+ const start = performance.now();
231
+ for (let i = 0; i < iterations; i++) {
232
+ getAllWords(doc);
233
+ }
234
+ const duration = performance.now() - start;
235
+
236
+ const avgDuration = duration / iterations;
237
+ expect(avgDuration).toBeLessThan(20);
238
+ });
239
+ });
240
+
241
+ describe('Scaling Characteristics', () => {
242
+ it('should demonstrate linear scaling for document creation', () => {
243
+ const sizes = [100, 500, 1000];
244
+ const timings: number[] = [];
245
+
246
+ for (const size of sizes) {
247
+ const start = performance.now();
248
+ const words = Array.from({ length: size }, (_, i) =>
249
+ createGLOSTWordNode({ value: `word${i}` })
250
+ );
251
+ createSimpleDocument(words, "en", "latin");
252
+ timings.push(performance.now() - start);
253
+ }
254
+
255
+ // Verify roughly linear scaling
256
+ const ratio1 = timings[1] / timings[0]; // 500/100
257
+ const ratio2 = timings[2] / timings[1]; // 1000/500
258
+
259
+ // Should scale reasonably (not exponential)
260
+ // Allow for variance in performance measurement
261
+ expect(ratio1).toBeLessThan(20); // Relaxed constraint
262
+ expect(ratio2).toBeLessThan(10);
263
+ });
264
+
265
+ it('should demonstrate linear scaling for traversal', () => {
266
+ const sizes = [1000, 5000, 10000];
267
+ const timings: number[] = [];
268
+
269
+ for (const size of sizes) {
270
+ const doc = createTestDocument(size);
271
+ const start = performance.now();
272
+ getAllWords(doc);
273
+ timings.push(performance.now() - start);
274
+ }
275
+
276
+ // Verify roughly linear scaling
277
+ const ratio1 = timings[1] / timings[0]; // 5000/1000
278
+ const ratio2 = timings[2] / timings[1]; // 10000/5000
279
+
280
+ // Should scale linearly (5x and 2x)
281
+ expect(ratio1).toBeLessThan(10);
282
+ expect(ratio2).toBeLessThan(5);
283
+ });
284
+ });
285
+
286
+ describe('Baseline Performance Targets', () => {
287
+ it('meets target: small docs (10-50 words) < 10ms', () => {
288
+ const doc = createTestDocument(50, true);
289
+
290
+ const start = performance.now();
291
+ getAllWords(doc);
292
+ const duration = performance.now() - start;
293
+
294
+ expect(duration).toBeLessThan(10);
295
+ });
296
+
297
+ it('meets target: medium docs (100-500 words) < 50ms', () => {
298
+ const doc = createTestDocument(500, true);
299
+
300
+ const start = performance.now();
301
+ getAllWords(doc);
302
+ const duration = performance.now() - start;
303
+
304
+ expect(duration).toBeLessThan(50);
305
+ });
306
+
307
+ it('meets target: large docs (1000+ words) < 200ms', () => {
308
+ const doc = createTestDocument(1000, true);
309
+
310
+ const start = performance.now();
311
+ getAllWords(doc);
312
+ const duration = performance.now() - start;
313
+
314
+ expect(duration).toBeLessThan(200);
315
+ });
316
+ });
317
+ });
@@ -0,0 +1,170 @@
1
+ /**
2
+ * Tests for traversal helper functions
3
+ */
4
+
5
+ import { describe, it, expect } from "vitest";
6
+ import {
7
+ createSimpleDocument,
8
+ createGLOSTWordNode,
9
+ getAllWords,
10
+ getFirstWord,
11
+ getWordAtPath,
12
+ getWordText,
13
+ createSentenceFromWords,
14
+ createDocumentFromSentences,
15
+ } from "../index.js";
16
+
17
+ describe("Traversal Helper Functions", () => {
18
+ describe("getFirstWord", () => {
19
+ it("should return first word from document", () => {
20
+ const words = [
21
+ createGLOSTWordNode({ value: "first", lang: "en", script: "latin" }),
22
+ createGLOSTWordNode({ value: "second", lang: "en", script: "latin" }),
23
+ createGLOSTWordNode({ value: "third", lang: "en", script: "latin" }),
24
+ ];
25
+
26
+ const doc = createSimpleDocument(words, "en", "latin");
27
+ const firstWord = getFirstWord(doc);
28
+
29
+ expect(firstWord).toBeDefined();
30
+ expect(getWordText(firstWord!)).toBe("first");
31
+ });
32
+
33
+ it("should return undefined for empty document", () => {
34
+ const doc = createSimpleDocument([], "en", "latin");
35
+ const firstWord = getFirstWord(doc);
36
+
37
+ expect(firstWord).toBeUndefined();
38
+ });
39
+
40
+ it("should return first word even with multiple sentences", () => {
41
+ const words1 = [
42
+ createGLOSTWordNode({ value: "first", lang: "en", script: "latin" }),
43
+ ];
44
+ const words2 = [
45
+ createGLOSTWordNode({ value: "second", lang: "en", script: "latin" }),
46
+ ];
47
+
48
+ const sentence1 = createSentenceFromWords(words1, "en", "latin");
49
+ const sentence2 = createSentenceFromWords(words2, "en", "latin");
50
+ const doc = createDocumentFromSentences([sentence1, sentence2], "en", "latin");
51
+
52
+ const firstWord = getFirstWord(doc);
53
+
54
+ expect(firstWord).toBeDefined();
55
+ expect(getWordText(firstWord!)).toBe("first");
56
+ });
57
+ });
58
+
59
+ describe("getWordAtPath", () => {
60
+ it("should get word at valid path", () => {
61
+ const words = [
62
+ createGLOSTWordNode({ value: "one", lang: "en", script: "latin" }),
63
+ createGLOSTWordNode({ value: "two", lang: "en", script: "latin" }),
64
+ createGLOSTWordNode({ value: "three", lang: "en", script: "latin" }),
65
+ ];
66
+
67
+ const doc = createSimpleDocument(words, "en", "latin");
68
+
69
+ // Get first word (paragraph 0, sentence 0, word 0)
70
+ const word0 = getWordAtPath(doc, { paragraph: 0, sentence: 0, word: 0 });
71
+ expect(word0).toBeDefined();
72
+ expect(getWordText(word0!)).toBe("one");
73
+
74
+ // Get second word
75
+ const word1 = getWordAtPath(doc, { paragraph: 0, sentence: 0, word: 1 });
76
+ expect(word1).toBeDefined();
77
+ expect(getWordText(word1!)).toBe("two");
78
+
79
+ // Get third word
80
+ const word2 = getWordAtPath(doc, { paragraph: 0, sentence: 0, word: 2 });
81
+ expect(word2).toBeDefined();
82
+ expect(getWordText(word2!)).toBe("three");
83
+ });
84
+
85
+ it("should return undefined for invalid paragraph index", () => {
86
+ const words = [
87
+ createGLOSTWordNode({ value: "test", lang: "en", script: "latin" }),
88
+ ];
89
+ const doc = createSimpleDocument(words, "en", "latin");
90
+
91
+ const word = getWordAtPath(doc, { paragraph: 99, sentence: 0, word: 0 });
92
+ expect(word).toBeUndefined();
93
+ });
94
+
95
+ it("should return undefined for invalid sentence index", () => {
96
+ const words = [
97
+ createGLOSTWordNode({ value: "test", lang: "en", script: "latin" }),
98
+ ];
99
+ const doc = createSimpleDocument(words, "en", "latin");
100
+
101
+ const word = getWordAtPath(doc, { paragraph: 0, sentence: 99, word: 0 });
102
+ expect(word).toBeUndefined();
103
+ });
104
+
105
+ it("should return undefined for invalid word index", () => {
106
+ const words = [
107
+ createGLOSTWordNode({ value: "test", lang: "en", script: "latin" }),
108
+ ];
109
+ const doc = createSimpleDocument(words, "en", "latin");
110
+
111
+ const word = getWordAtPath(doc, { paragraph: 0, sentence: 0, word: 99 });
112
+ expect(word).toBeUndefined();
113
+ });
114
+
115
+ it("should handle multi-sentence documents", () => {
116
+ const words1 = [
117
+ createGLOSTWordNode({ value: "first", lang: "en", script: "latin" }),
118
+ ];
119
+ const words2 = [
120
+ createGLOSTWordNode({ value: "second", lang: "en", script: "latin" }),
121
+ ];
122
+
123
+ const sentence1 = createSentenceFromWords(words1, "en", "latin");
124
+ const sentence2 = createSentenceFromWords(words2, "en", "latin");
125
+ const doc = createDocumentFromSentences([sentence1, sentence2], "en", "latin");
126
+
127
+ // Get word from first sentence
128
+ const word1 = getWordAtPath(doc, { paragraph: 0, sentence: 0, word: 0 });
129
+ expect(getWordText(word1!)).toBe("first");
130
+
131
+ // Get word from second sentence
132
+ const word2 = getWordAtPath(doc, { paragraph: 0, sentence: 1, word: 0 });
133
+ expect(getWordText(word2!)).toBe("second");
134
+ });
135
+ });
136
+
137
+ describe("Helper integration", () => {
138
+ it("getFirstWord should match first result from getAllWords", () => {
139
+ const words = [
140
+ createGLOSTWordNode({ value: "alpha", lang: "en", script: "latin" }),
141
+ createGLOSTWordNode({ value: "beta", lang: "en", script: "latin" }),
142
+ createGLOSTWordNode({ value: "gamma", lang: "en", script: "latin" }),
143
+ ];
144
+
145
+ const doc = createSimpleDocument(words, "en", "latin");
146
+
147
+ const firstWord = getFirstWord(doc);
148
+ const allWords = getAllWords(doc);
149
+
150
+ expect(firstWord).toBe(allWords[0]);
151
+ expect(getWordText(firstWord!)).toBe(getWordText(allWords[0]));
152
+ });
153
+
154
+ it("getWordAtPath should match getAllWords result by index", () => {
155
+ const words = [
156
+ createGLOSTWordNode({ value: "one", lang: "en", script: "latin" }),
157
+ createGLOSTWordNode({ value: "two", lang: "en", script: "latin" }),
158
+ createGLOSTWordNode({ value: "three", lang: "en", script: "latin" }),
159
+ ];
160
+
161
+ const doc = createSimpleDocument(words, "en", "latin");
162
+
163
+ const allWords = getAllWords(doc);
164
+ const wordAtPath1 = getWordAtPath(doc, { paragraph: 0, sentence: 0, word: 1 });
165
+
166
+ expect(wordAtPath1).toBe(allWords[1]);
167
+ expect(getWordText(wordAtPath1!)).toBe("two");
168
+ });
169
+ });
170
+ });