glost-core 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +63 -0
- package/LICENSE +21 -0
- package/README.md +199 -0
- package/dist/__benchmarks__/document-creation.bench.d.ts +7 -0
- package/dist/__benchmarks__/document-creation.bench.d.ts.map +1 -0
- package/dist/__benchmarks__/document-creation.bench.js +71 -0
- package/dist/__benchmarks__/document-creation.bench.js.map +1 -0
- package/dist/__benchmarks__/traversal.bench.d.ts +7 -0
- package/dist/__benchmarks__/traversal.bench.d.ts.map +1 -0
- package/dist/__benchmarks__/traversal.bench.js +124 -0
- package/dist/__benchmarks__/traversal.bench.js.map +1 -0
- package/dist/cli/migrate.d.ts +8 -0
- package/dist/cli/migrate.d.ts.map +1 -0
- package/dist/cli/migrate.js +229 -0
- package/dist/cli/migrate.js.map +1 -0
- package/dist/errors.d.ts +168 -0
- package/dist/errors.d.ts.map +1 -0
- package/dist/errors.js +300 -0
- package/dist/errors.js.map +1 -0
- package/dist/guards.d.ts +103 -0
- package/dist/guards.d.ts.map +1 -0
- package/dist/guards.js +264 -0
- package/dist/guards.js.map +1 -0
- package/dist/index.d.ts +9 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +25 -0
- package/dist/index.js.map +1 -0
- package/dist/nodes.d.ts +227 -0
- package/dist/nodes.d.ts.map +1 -0
- package/dist/nodes.js +243 -0
- package/dist/nodes.js.map +1 -0
- package/dist/types.d.ts +442 -0
- package/dist/types.d.ts.map +1 -0
- package/dist/types.js +51 -0
- package/dist/types.js.map +1 -0
- package/dist/utils.d.ts +247 -0
- package/dist/utils.d.ts.map +1 -0
- package/dist/utils.js +564 -0
- package/dist/utils.js.map +1 -0
- package/dist/validators.d.ts +1876 -0
- package/dist/validators.d.ts.map +1 -0
- package/dist/validators.js +302 -0
- package/dist/validators.js.map +1 -0
- package/package.json +73 -0
- package/src/__benchmarks__/document-creation.bench.ts +92 -0
- package/src/__benchmarks__/traversal.bench.ts +152 -0
- package/src/__tests__/README.md +20 -0
- package/src/__tests__/example.test.ts +43 -0
- package/src/__tests__/example.ts +186 -0
- package/src/__tests__/helpers.test.ts +178 -0
- package/src/__tests__/mock-data.ts +624 -0
- package/src/__tests__/performance.test.ts +317 -0
- package/src/__tests__/traversal.test.ts +170 -0
- package/src/cli/migrate.ts +294 -0
- package/src/errors.ts +394 -0
- package/src/guards.ts +341 -0
- package/src/index.ts +69 -0
- package/src/nodes.ts +409 -0
- package/src/types.ts +633 -0
- package/src/utils.ts +730 -0
- package/src/validators.ts +336 -0
- package/tsconfig.json +9 -0
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
// Example test file showing how to use fixtures
|
|
2
|
+
// This demonstrates that fixtures are only imported in test files
|
|
3
|
+
|
|
4
|
+
import { describe, it, expect } from 'vitest';
|
|
5
|
+
import {
|
|
6
|
+
thaiDocumentWithExtras,
|
|
7
|
+
japaneseDocumentWithExtras,
|
|
8
|
+
getQuickTranslation,
|
|
9
|
+
getDifficulty
|
|
10
|
+
} from './mock-data.js';
|
|
11
|
+
import { getAllWords } from '../utils.js';
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
describe('Using mock data fixtures', () => {
|
|
15
|
+
it('should load Thai document with extras', () => {
|
|
16
|
+
expect(thaiDocumentWithExtras.type).toBe('RootNode');
|
|
17
|
+
expect(thaiDocumentWithExtras.lang).toBe('th');
|
|
18
|
+
|
|
19
|
+
const words = getAllWords(thaiDocumentWithExtras);
|
|
20
|
+
expect(words.length).toBeGreaterThan(0);
|
|
21
|
+
});
|
|
22
|
+
|
|
23
|
+
it('should load Japanese document with extras', () => {
|
|
24
|
+
expect(japaneseDocumentWithExtras.type).toBe('RootNode');
|
|
25
|
+
expect(japaneseDocumentWithExtras.lang).toBe('ja');
|
|
26
|
+
|
|
27
|
+
const words = getAllWords(japaneseDocumentWithExtras);
|
|
28
|
+
expect(words.length).toBeGreaterThan(0);
|
|
29
|
+
});
|
|
30
|
+
|
|
31
|
+
it('should access translation data from fixtures', () => {
|
|
32
|
+
const words = getAllWords(thaiDocumentWithExtras);
|
|
33
|
+
const firstWord = words[0];
|
|
34
|
+
|
|
35
|
+
if (firstWord) {
|
|
36
|
+
const englishTranslation = getQuickTranslation(firstWord, 'en');
|
|
37
|
+
expect(englishTranslation).toBeDefined();
|
|
38
|
+
|
|
39
|
+
const difficulty = getDifficulty(firstWord);
|
|
40
|
+
expect(difficulty).toBeDefined();
|
|
41
|
+
}
|
|
42
|
+
});
|
|
43
|
+
});
|
|
@@ -0,0 +1,186 @@
|
|
|
1
|
+
// Example usage of the GLOST package
|
|
2
|
+
import {
|
|
3
|
+
createSentenceFromWords,
|
|
4
|
+
createParagraphFromSentences,
|
|
5
|
+
createDocumentFromParagraphs,
|
|
6
|
+
getAllWords,
|
|
7
|
+
getWordTranscription,
|
|
8
|
+
validateGLOSTTree,
|
|
9
|
+
getWordText
|
|
10
|
+
} from '../index';
|
|
11
|
+
import { createThaiWord } from 'glost-th';
|
|
12
|
+
import { createJapaneseWord } from 'glost-ja';
|
|
13
|
+
|
|
14
|
+
// ============================================================================
|
|
15
|
+
// Thai Example: "สวัสดีครับ ผมชื่อสมชาย" (Hello, my name is Somchai)
|
|
16
|
+
// ============================================================================
|
|
17
|
+
|
|
18
|
+
const thaiWords = [
|
|
19
|
+
createThaiWord({
|
|
20
|
+
text: 'สวัสดี',
|
|
21
|
+
rtgs: 'sà-wàt-dii',
|
|
22
|
+
partOfSpeech: 'interjection',
|
|
23
|
+
tone: 2,
|
|
24
|
+
syllables: ['sa', 'wat', 'dii']
|
|
25
|
+
}),
|
|
26
|
+
createThaiWord({
|
|
27
|
+
text: 'ครับ',
|
|
28
|
+
rtgs: 'khráp',
|
|
29
|
+
partOfSpeech: 'particle',
|
|
30
|
+
tone: 2,
|
|
31
|
+
syllables: ['khrap']
|
|
32
|
+
}),
|
|
33
|
+
createThaiWord({
|
|
34
|
+
text: 'ผม',
|
|
35
|
+
rtgs: 'phǒm',
|
|
36
|
+
partOfSpeech: 'pronoun',
|
|
37
|
+
tone: 3,
|
|
38
|
+
syllables: ['phom']
|
|
39
|
+
}),
|
|
40
|
+
createThaiWord({
|
|
41
|
+
text: 'ชื่อ',
|
|
42
|
+
rtgs: 'chûue',
|
|
43
|
+
partOfSpeech: 'noun',
|
|
44
|
+
tone: 3,
|
|
45
|
+
syllables: ['chue']
|
|
46
|
+
}),
|
|
47
|
+
createThaiWord({
|
|
48
|
+
text: 'สมชาย',
|
|
49
|
+
rtgs: 'sǒm-chaai',
|
|
50
|
+
partOfSpeech: 'proper noun',
|
|
51
|
+
tone: 3,
|
|
52
|
+
syllables: ['som', 'chaai']
|
|
53
|
+
})
|
|
54
|
+
];
|
|
55
|
+
|
|
56
|
+
const thaiSentence = createSentenceFromWords(
|
|
57
|
+
thaiWords,
|
|
58
|
+
'th',
|
|
59
|
+
'thai',
|
|
60
|
+
'สวัสดีครับ ผมชื่อสมชาย'
|
|
61
|
+
);
|
|
62
|
+
|
|
63
|
+
// ============================================================================
|
|
64
|
+
// Japanese Example: "私の名前は田中です。" (My name is Tanaka)
|
|
65
|
+
// ============================================================================
|
|
66
|
+
|
|
67
|
+
const japaneseWords = [
|
|
68
|
+
createJapaneseWord({
|
|
69
|
+
text: '私',
|
|
70
|
+
romaji: 'watashi',
|
|
71
|
+
partOfSpeech: 'pronoun',
|
|
72
|
+
furigana: 'わたし'
|
|
73
|
+
}),
|
|
74
|
+
createJapaneseWord({
|
|
75
|
+
text: 'の',
|
|
76
|
+
romaji: 'no',
|
|
77
|
+
partOfSpeech: 'particle'
|
|
78
|
+
}),
|
|
79
|
+
createJapaneseWord({
|
|
80
|
+
text: '名前',
|
|
81
|
+
romaji: 'namae',
|
|
82
|
+
partOfSpeech: 'noun',
|
|
83
|
+
furigana: 'なまえ'
|
|
84
|
+
}),
|
|
85
|
+
createJapaneseWord({
|
|
86
|
+
text: 'は',
|
|
87
|
+
romaji: 'wa',
|
|
88
|
+
partOfSpeech: 'particle'
|
|
89
|
+
}),
|
|
90
|
+
createJapaneseWord({
|
|
91
|
+
text: '田中',
|
|
92
|
+
romaji: 'tanaka',
|
|
93
|
+
partOfSpeech: 'proper noun',
|
|
94
|
+
furigana: 'たなか'
|
|
95
|
+
}),
|
|
96
|
+
createJapaneseWord({
|
|
97
|
+
text: 'です',
|
|
98
|
+
romaji: 'desu',
|
|
99
|
+
partOfSpeech: 'copula'
|
|
100
|
+
})
|
|
101
|
+
];
|
|
102
|
+
|
|
103
|
+
const japaneseSentence = createSentenceFromWords(
|
|
104
|
+
japaneseWords,
|
|
105
|
+
'ja',
|
|
106
|
+
'mixed',
|
|
107
|
+
'私の名前は田中です。'
|
|
108
|
+
);
|
|
109
|
+
|
|
110
|
+
// ============================================================================
|
|
111
|
+
// Create Document Structure
|
|
112
|
+
// ============================================================================
|
|
113
|
+
|
|
114
|
+
const thaiParagraph = createParagraphFromSentences([thaiSentence]);
|
|
115
|
+
const japaneseParagraph = createParagraphFromSentences([japaneseSentence]);
|
|
116
|
+
|
|
117
|
+
const document = createDocumentFromParagraphs(
|
|
118
|
+
[thaiParagraph, japaneseParagraph],
|
|
119
|
+
'mixed',
|
|
120
|
+
'mixed',
|
|
121
|
+
{
|
|
122
|
+
title: 'Multilingual Greeting Examples',
|
|
123
|
+
description: 'Examples of greetings in Thai and Japanese with transcriptions'
|
|
124
|
+
}
|
|
125
|
+
);
|
|
126
|
+
|
|
127
|
+
// ============================================================================
|
|
128
|
+
// Demonstrate Utilities
|
|
129
|
+
// ============================================================================
|
|
130
|
+
|
|
131
|
+
export function demonstrateUtilities() {
|
|
132
|
+
console.log('=== GLOST Package Demo ===\n');
|
|
133
|
+
|
|
134
|
+
// Get all words from the document
|
|
135
|
+
const allWords = getAllWords(document);
|
|
136
|
+
console.log(`Total words: ${allWords.length}`);
|
|
137
|
+
|
|
138
|
+
// Show Thai words with RTGS transcriptions
|
|
139
|
+
const thaiWordsOnly = allWords.filter(word => word.lang === 'th');
|
|
140
|
+
console.log('\n=== Thai Words ===');
|
|
141
|
+
thaiWordsOnly.forEach(word => {
|
|
142
|
+
const rtgs = getWordTranscription(word, 'rtgs');
|
|
143
|
+
console.log(`${getWordText(word)} → ${rtgs} (${word.metadata.partOfSpeech})`);
|
|
144
|
+
});
|
|
145
|
+
|
|
146
|
+
// Show Japanese words with romaji
|
|
147
|
+
const japaneseWordsOnly = allWords.filter(word => word.lang === 'ja');
|
|
148
|
+
console.log('\n=== Japanese Words ===');
|
|
149
|
+
japaneseWordsOnly.forEach(word => {
|
|
150
|
+
const romaji = getWordTranscription(word, 'romaji');
|
|
151
|
+
console.log(`${getWordText(word)} → ${romaji} (${word.metadata.partOfSpeech})`);
|
|
152
|
+
});
|
|
153
|
+
|
|
154
|
+
// Validate the tree
|
|
155
|
+
const validationErrors = validateGLOSTTree(document);
|
|
156
|
+
if (validationErrors.length === 0) {
|
|
157
|
+
console.log('\n✅ Document is valid!');
|
|
158
|
+
} else {
|
|
159
|
+
console.log('\n❌ Validation errors:');
|
|
160
|
+
validationErrors.forEach((error: string) => console.log(` - ${error}`));
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
// Show document structure
|
|
164
|
+
console.log('\n=== Document Structure ===');
|
|
165
|
+
console.log(`Language: ${document.lang}`);
|
|
166
|
+
console.log(`Script: ${document.script}`);
|
|
167
|
+
console.log(`Paragraphs: ${document.children.length}`);
|
|
168
|
+
console.log(`Sentences: ${document.children.reduce((acc, p) => {
|
|
169
|
+
if (p.type === 'ParagraphNode') {
|
|
170
|
+
return acc + (p.children?.length || 0);
|
|
171
|
+
}
|
|
172
|
+
return acc;
|
|
173
|
+
}, 0)}`);
|
|
174
|
+
console.log(`Words: ${allWords.length}`);
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
// Export for use in other files
|
|
178
|
+
export {
|
|
179
|
+
thaiWords,
|
|
180
|
+
japaneseWords,
|
|
181
|
+
thaiSentence,
|
|
182
|
+
japaneseSentence,
|
|
183
|
+
thaiParagraph,
|
|
184
|
+
japaneseParagraph,
|
|
185
|
+
document
|
|
186
|
+
};
|
|
@@ -0,0 +1,178 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Tests for document creation helper functions
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
import { describe, it, expect } from "vitest";
|
|
6
|
+
import {
|
|
7
|
+
createSimpleDocument,
|
|
8
|
+
createDocumentFromSentences,
|
|
9
|
+
createSentenceFromWords,
|
|
10
|
+
createGLOSTWordNode,
|
|
11
|
+
getAllWords,
|
|
12
|
+
} from "../index.js";
|
|
13
|
+
|
|
14
|
+
describe("Document Helper Functions", () => {
|
|
15
|
+
describe("createSimpleDocument", () => {
|
|
16
|
+
it("should create document from words with all required structure", () => {
|
|
17
|
+
const words = [
|
|
18
|
+
createGLOSTWordNode({
|
|
19
|
+
value: "hello",
|
|
20
|
+
lang: "en",
|
|
21
|
+
script: "latin",
|
|
22
|
+
}),
|
|
23
|
+
createGLOSTWordNode({
|
|
24
|
+
value: "world",
|
|
25
|
+
lang: "en",
|
|
26
|
+
script: "latin",
|
|
27
|
+
}),
|
|
28
|
+
];
|
|
29
|
+
|
|
30
|
+
const doc = createSimpleDocument(words, "en", "latin");
|
|
31
|
+
|
|
32
|
+
expect(doc.type).toBe("RootNode");
|
|
33
|
+
expect(doc.lang).toBe("en");
|
|
34
|
+
expect(doc.script).toBe("latin");
|
|
35
|
+
expect(doc.children.length).toBe(1); // One paragraph
|
|
36
|
+
|
|
37
|
+
const paragraph = doc.children[0];
|
|
38
|
+
expect(paragraph.type).toBe("ParagraphNode");
|
|
39
|
+
expect(paragraph.children.length).toBe(1); // One sentence
|
|
40
|
+
|
|
41
|
+
const sentence = paragraph.children[0];
|
|
42
|
+
expect(sentence.type).toBe("SentenceNode");
|
|
43
|
+
expect(sentence.children.length).toBe(2); // Two words
|
|
44
|
+
});
|
|
45
|
+
|
|
46
|
+
it("should accept custom sentence text", () => {
|
|
47
|
+
const words = [
|
|
48
|
+
createGLOSTWordNode({
|
|
49
|
+
value: "hello",
|
|
50
|
+
lang: "en",
|
|
51
|
+
script: "latin",
|
|
52
|
+
}),
|
|
53
|
+
];
|
|
54
|
+
|
|
55
|
+
const doc = createSimpleDocument(words, "en", "latin", {
|
|
56
|
+
sentenceText: "Hello!",
|
|
57
|
+
});
|
|
58
|
+
|
|
59
|
+
const sentence = doc.children[0].children[0];
|
|
60
|
+
expect(sentence.originalText).toBe("Hello!");
|
|
61
|
+
});
|
|
62
|
+
|
|
63
|
+
it("should accept document metadata", () => {
|
|
64
|
+
const words = [
|
|
65
|
+
createGLOSTWordNode({
|
|
66
|
+
value: "test",
|
|
67
|
+
lang: "en",
|
|
68
|
+
script: "latin",
|
|
69
|
+
}),
|
|
70
|
+
];
|
|
71
|
+
|
|
72
|
+
const doc = createSimpleDocument(words, "en", "latin", {
|
|
73
|
+
metadata: {
|
|
74
|
+
title: "Test Document",
|
|
75
|
+
author: "Test Author",
|
|
76
|
+
},
|
|
77
|
+
});
|
|
78
|
+
|
|
79
|
+
expect(doc.metadata?.title).toBe("Test Document");
|
|
80
|
+
expect(doc.metadata?.author).toBe("Test Author");
|
|
81
|
+
});
|
|
82
|
+
|
|
83
|
+
it("should work with getAllWords utility", () => {
|
|
84
|
+
const words = [
|
|
85
|
+
createGLOSTWordNode({ value: "one", lang: "en", script: "latin" }),
|
|
86
|
+
createGLOSTWordNode({ value: "two", lang: "en", script: "latin" }),
|
|
87
|
+
createGLOSTWordNode({ value: "three", lang: "en", script: "latin" }),
|
|
88
|
+
];
|
|
89
|
+
|
|
90
|
+
const doc = createSimpleDocument(words, "en", "latin");
|
|
91
|
+
const extractedWords = getAllWords(doc);
|
|
92
|
+
|
|
93
|
+
expect(extractedWords.length).toBe(3);
|
|
94
|
+
expect(extractedWords[0].children[0].value).toBe("one");
|
|
95
|
+
expect(extractedWords[1].children[0].value).toBe("two");
|
|
96
|
+
expect(extractedWords[2].children[0].value).toBe("three");
|
|
97
|
+
});
|
|
98
|
+
});
|
|
99
|
+
|
|
100
|
+
describe("createDocumentFromSentences", () => {
|
|
101
|
+
it("should create document from sentences with paragraph wrapper", () => {
|
|
102
|
+
const words1 = [
|
|
103
|
+
createGLOSTWordNode({ value: "hello", lang: "en", script: "latin" }),
|
|
104
|
+
];
|
|
105
|
+
const words2 = [
|
|
106
|
+
createGLOSTWordNode({ value: "world", lang: "en", script: "latin" }),
|
|
107
|
+
];
|
|
108
|
+
|
|
109
|
+
const sentence1 = createSentenceFromWords(words1, "en", "latin", "Hello");
|
|
110
|
+
const sentence2 = createSentenceFromWords(words2, "en", "latin", "World");
|
|
111
|
+
|
|
112
|
+
const doc = createDocumentFromSentences([sentence1, sentence2], "en", "latin");
|
|
113
|
+
|
|
114
|
+
expect(doc.type).toBe("RootNode");
|
|
115
|
+
expect(doc.children.length).toBe(1); // One paragraph
|
|
116
|
+
|
|
117
|
+
const paragraph = doc.children[0];
|
|
118
|
+
expect(paragraph.type).toBe("ParagraphNode");
|
|
119
|
+
expect(paragraph.children.length).toBe(2); // Two sentences
|
|
120
|
+
});
|
|
121
|
+
|
|
122
|
+
it("should accept document metadata", () => {
|
|
123
|
+
const words = [
|
|
124
|
+
createGLOSTWordNode({ value: "test", lang: "en", script: "latin" }),
|
|
125
|
+
];
|
|
126
|
+
const sentence = createSentenceFromWords(words, "en", "latin");
|
|
127
|
+
|
|
128
|
+
const doc = createDocumentFromSentences([sentence], "en", "latin", {
|
|
129
|
+
title: "Test",
|
|
130
|
+
author: "Author",
|
|
131
|
+
});
|
|
132
|
+
|
|
133
|
+
expect(doc.metadata?.title).toBe("Test");
|
|
134
|
+
expect(doc.metadata?.author).toBe("Author");
|
|
135
|
+
});
|
|
136
|
+
|
|
137
|
+
it("should preserve sentence structure", () => {
|
|
138
|
+
const words1 = [
|
|
139
|
+
createGLOSTWordNode({ value: "first", lang: "en", script: "latin" }),
|
|
140
|
+
];
|
|
141
|
+
const words2 = [
|
|
142
|
+
createGLOSTWordNode({ value: "second", lang: "en", script: "latin" }),
|
|
143
|
+
];
|
|
144
|
+
|
|
145
|
+
const sentence1 = createSentenceFromWords(words1, "en", "latin");
|
|
146
|
+
const sentence2 = createSentenceFromWords(words2, "en", "latin");
|
|
147
|
+
|
|
148
|
+
const doc = createDocumentFromSentences([sentence1, sentence2], "en", "latin");
|
|
149
|
+
|
|
150
|
+
const sentences = doc.children[0].children;
|
|
151
|
+
expect(sentences.length).toBe(2);
|
|
152
|
+
expect(sentences[0].children[0].children[0].value).toBe("first");
|
|
153
|
+
expect(sentences[1].children[0].children[0].value).toBe("second");
|
|
154
|
+
});
|
|
155
|
+
});
|
|
156
|
+
|
|
157
|
+
describe("Helper comparison", () => {
|
|
158
|
+
it("createSimpleDocument should produce same structure as manual creation", () => {
|
|
159
|
+
const word = createGLOSTWordNode({
|
|
160
|
+
value: "test",
|
|
161
|
+
lang: "en",
|
|
162
|
+
script: "latin",
|
|
163
|
+
});
|
|
164
|
+
|
|
165
|
+
// Using helper
|
|
166
|
+
const docHelper = createSimpleDocument([word], "en", "latin");
|
|
167
|
+
|
|
168
|
+
// Manual creation
|
|
169
|
+
const sentence = createSentenceFromWords([word], "en", "latin");
|
|
170
|
+
const docManual = createDocumentFromSentences([sentence], "en", "latin");
|
|
171
|
+
|
|
172
|
+
expect(docHelper.type).toBe(docManual.type);
|
|
173
|
+
expect(docHelper.lang).toBe(docManual.lang);
|
|
174
|
+
expect(docHelper.script).toBe(docManual.script);
|
|
175
|
+
expect(docHelper.children.length).toBe(docManual.children.length);
|
|
176
|
+
});
|
|
177
|
+
});
|
|
178
|
+
});
|