glost 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +62 -0
- package/dist/example.d.ts +10 -0
- package/dist/example.d.ts.map +1 -0
- package/dist/example.js +82 -0
- package/dist/example.js.map +1 -0
- package/dist/guards.d.ts +103 -0
- package/dist/guards.d.ts.map +1 -0
- package/dist/guards.js +264 -0
- package/dist/guards.js.map +1 -0
- package/dist/index.d.ts +9 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +26 -0
- package/dist/index.js.map +1 -0
- package/dist/mock-data.d.ts +35 -0
- package/dist/mock-data.d.ts.map +1 -0
- package/dist/mock-data.js +494 -0
- package/dist/mock-data.js.map +1 -0
- package/dist/nodes.d.ts +68 -0
- package/dist/nodes.d.ts.map +1 -0
- package/dist/nodes.js +181 -0
- package/dist/nodes.js.map +1 -0
- package/dist/types.d.ts +379 -0
- package/dist/types.d.ts.map +1 -0
- package/dist/types.js +6 -0
- package/dist/types.js.map +1 -0
- package/dist/utils.d.ts +203 -0
- package/dist/utils.d.ts.map +1 -0
- package/dist/utils.js +497 -0
- package/dist/utils.js.map +1 -0
- package/dist/validators.d.ts +1876 -0
- package/dist/validators.d.ts.map +1 -0
- package/dist/validators.js +302 -0
- package/dist/validators.js.map +1 -0
- package/package.json +67 -0
- package/src/example.ts +186 -0
- package/src/guards.ts +341 -0
- package/src/index.ts +69 -0
- package/src/mock-data.ts +635 -0
- package/src/nodes.ts +301 -0
- package/src/types.ts +565 -0
- package/src/utils.ts +653 -0
- package/src/validators.ts +336 -0
- package/tsconfig.json +9 -0
package/src/guards.ts
ADDED
|
@@ -0,0 +1,341 @@
|
|
|
1
|
+
// ============================================================================
|
|
2
|
+
// GLOST Type Guards for Better Developer Experience
|
|
3
|
+
// ============================================================================
|
|
4
|
+
|
|
5
|
+
import type {
|
|
6
|
+
GLOSTParagraph,
|
|
7
|
+
GLOSTPunctuation,
|
|
8
|
+
GLOSTRoot,
|
|
9
|
+
GLOSTSentence,
|
|
10
|
+
GLOSTSource,
|
|
11
|
+
GLOSTSymbol,
|
|
12
|
+
GLOSTText,
|
|
13
|
+
GLOSTWhiteSpace,
|
|
14
|
+
GLOSTWord,
|
|
15
|
+
GLOSTNode,
|
|
16
|
+
} from "./types";
|
|
17
|
+
|
|
18
|
+
// ============================================================================
|
|
19
|
+
// Core Node Type Guards
|
|
20
|
+
// ============================================================================
|
|
21
|
+
|
|
22
|
+
/**
|
|
23
|
+
* Type guard to check if a node is an GLOSTWord
|
|
24
|
+
*/
|
|
25
|
+
export function isGLOSTWord(node: unknown): node is GLOSTWord {
|
|
26
|
+
return (
|
|
27
|
+
typeof node === "object" &&
|
|
28
|
+
node !== null &&
|
|
29
|
+
"type" in node &&
|
|
30
|
+
node.type === "WordNode" &&
|
|
31
|
+
"children" in node &&
|
|
32
|
+
"lang" in node &&
|
|
33
|
+
"script" in node &&
|
|
34
|
+
"level" in node &&
|
|
35
|
+
"metadata" in node &&
|
|
36
|
+
"transcription" in node &&
|
|
37
|
+
"extras" in node
|
|
38
|
+
);
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
/**
|
|
42
|
+
* Type guard to check if a node is an GLOSTSentence
|
|
43
|
+
*/
|
|
44
|
+
export function isGLOSTSentence(node: unknown): node is GLOSTSentence {
|
|
45
|
+
return (
|
|
46
|
+
typeof node === "object" &&
|
|
47
|
+
node !== null &&
|
|
48
|
+
"type" in node &&
|
|
49
|
+
node.type === "SentenceNode" &&
|
|
50
|
+
"children" in node &&
|
|
51
|
+
"lang" in node &&
|
|
52
|
+
"script" in node &&
|
|
53
|
+
"level" in node &&
|
|
54
|
+
"metadata" in node
|
|
55
|
+
);
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
/**
|
|
59
|
+
* Type guard to check if a node is an GLOSTParagraph
|
|
60
|
+
*/
|
|
61
|
+
export function isGLOSTParagraph(node: unknown): node is GLOSTParagraph {
|
|
62
|
+
return (
|
|
63
|
+
typeof node === "object" &&
|
|
64
|
+
node !== null &&
|
|
65
|
+
"type" in node &&
|
|
66
|
+
node.type === "ParagraphNode" &&
|
|
67
|
+
"children" in node &&
|
|
68
|
+
"lang" in node &&
|
|
69
|
+
"script" in node &&
|
|
70
|
+
"level" in node &&
|
|
71
|
+
"metadata" in node
|
|
72
|
+
);
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
/**
|
|
76
|
+
* Type guard to check if a node is an GLOSTRoot
|
|
77
|
+
*/
|
|
78
|
+
export function isGLOSTRoot(node: unknown): node is GLOSTRoot {
|
|
79
|
+
return (
|
|
80
|
+
typeof node === "object" &&
|
|
81
|
+
node !== null &&
|
|
82
|
+
"type" in node &&
|
|
83
|
+
node.type === "RootNode" &&
|
|
84
|
+
"children" in node &&
|
|
85
|
+
"lang" in node &&
|
|
86
|
+
"script" in node &&
|
|
87
|
+
"level" in node &&
|
|
88
|
+
"metadata" in node
|
|
89
|
+
);
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
/**
|
|
93
|
+
* Type guard to check if a node is an GLOSTText
|
|
94
|
+
*/
|
|
95
|
+
export function isGLOSTText(node: unknown): node is GLOSTText {
|
|
96
|
+
return (
|
|
97
|
+
typeof node === "object" &&
|
|
98
|
+
node !== null &&
|
|
99
|
+
"type" in node &&
|
|
100
|
+
node.type === "TextNode" &&
|
|
101
|
+
"value" in node &&
|
|
102
|
+
typeof node.value === "string"
|
|
103
|
+
);
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
/**
|
|
107
|
+
* Type guard to check if a node is an GLOSTPunctuation
|
|
108
|
+
*/
|
|
109
|
+
export function isGLOSTPunctuation(node: unknown): node is GLOSTPunctuation {
|
|
110
|
+
return (
|
|
111
|
+
typeof node === "object" &&
|
|
112
|
+
node !== null &&
|
|
113
|
+
"type" in node &&
|
|
114
|
+
node.type === "PunctuationNode" &&
|
|
115
|
+
"value" in node &&
|
|
116
|
+
typeof node.value === "string"
|
|
117
|
+
);
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
/**
|
|
121
|
+
* Type guard to check if a node is an GLOSTSymbol
|
|
122
|
+
*/
|
|
123
|
+
export function isGLOSTSymbol(node: unknown): node is GLOSTSymbol {
|
|
124
|
+
return (
|
|
125
|
+
typeof node === "object" &&
|
|
126
|
+
node !== null &&
|
|
127
|
+
"type" in node &&
|
|
128
|
+
node.type === "SymbolNode" &&
|
|
129
|
+
"value" in node &&
|
|
130
|
+
typeof node.value === "string"
|
|
131
|
+
);
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
/**
|
|
135
|
+
* Type guard to check if a node is an GLOSTWhiteSpace
|
|
136
|
+
*/
|
|
137
|
+
export function isGLOSTWhiteSpace(node: unknown): node is GLOSTWhiteSpace {
|
|
138
|
+
return (
|
|
139
|
+
typeof node === "object" &&
|
|
140
|
+
node !== null &&
|
|
141
|
+
"type" in node &&
|
|
142
|
+
node.type === "WhiteSpaceNode" &&
|
|
143
|
+
"value" in node &&
|
|
144
|
+
typeof node.value === "string"
|
|
145
|
+
);
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
/**
|
|
149
|
+
* Type guard to check if a node is an GLOSTSource
|
|
150
|
+
*/
|
|
151
|
+
export function isGLOSTSource(node: unknown): node is GLOSTSource {
|
|
152
|
+
return (
|
|
153
|
+
typeof node === "object" &&
|
|
154
|
+
node !== null &&
|
|
155
|
+
"type" in node &&
|
|
156
|
+
node.type === "SourceNode" &&
|
|
157
|
+
"value" in node &&
|
|
158
|
+
typeof node.value === "string"
|
|
159
|
+
);
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
// ============================================================================
|
|
163
|
+
// Content Type Guards
|
|
164
|
+
// ============================================================================
|
|
165
|
+
|
|
166
|
+
/**
|
|
167
|
+
* Type guard to check if a node is GLOSTNode (any GLOST node)
|
|
168
|
+
*/
|
|
169
|
+
export function isGLOSTNode(node: unknown): node is GLOSTNode {
|
|
170
|
+
return (
|
|
171
|
+
isGLOSTWord(node) ||
|
|
172
|
+
isGLOSTSentence(node) ||
|
|
173
|
+
isGLOSTParagraph(node) ||
|
|
174
|
+
isGLOSTText(node) ||
|
|
175
|
+
isGLOSTPunctuation(node) ||
|
|
176
|
+
isGLOSTSymbol(node) ||
|
|
177
|
+
isGLOSTWhiteSpace(node) ||
|
|
178
|
+
isGLOSTSource(node) ||
|
|
179
|
+
isGLOSTRoot(node)
|
|
180
|
+
);
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
// ============================================================================
|
|
184
|
+
// Document Type Guards
|
|
185
|
+
// ============================================================================
|
|
186
|
+
|
|
187
|
+
/**
|
|
188
|
+
* Type guard to check if a node is an GLOSTDocument (Root or Content)
|
|
189
|
+
*/
|
|
190
|
+
export function isGLOSTDocument(node: unknown): node is GLOSTRoot {
|
|
191
|
+
return isGLOSTRoot(node);
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
// ============================================================================
|
|
195
|
+
// Array Type Guards
|
|
196
|
+
// ============================================================================
|
|
197
|
+
|
|
198
|
+
/**
|
|
199
|
+
* Type guard to check if an array contains only GLOSTWord nodes
|
|
200
|
+
*/
|
|
201
|
+
export function isGLOSTWordArray(nodes: unknown[]): nodes is GLOSTWord[] {
|
|
202
|
+
return nodes.every(isGLOSTWord);
|
|
203
|
+
}
|
|
204
|
+
|
|
205
|
+
/**
|
|
206
|
+
* Type guard to check if an array contains only GLOSTSentence nodes
|
|
207
|
+
*/
|
|
208
|
+
export function isGLOSTSentenceArray(nodes: unknown[]): nodes is GLOSTSentence[] {
|
|
209
|
+
return nodes.every(isGLOSTSentence);
|
|
210
|
+
}
|
|
211
|
+
|
|
212
|
+
/**
|
|
213
|
+
* Type guard to check if an array contains only GLOSTParagraph nodes
|
|
214
|
+
*/
|
|
215
|
+
export function isGLOSTParagraphArray(nodes: unknown[]): nodes is GLOSTParagraph[] {
|
|
216
|
+
return nodes.every(isGLOSTParagraph);
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
/**
|
|
220
|
+
* Type guard to check if an array contains only GLOSTContent nodes
|
|
221
|
+
*/
|
|
222
|
+
export function isGLOSTContentArray(nodes: unknown[]): nodes is (GLOSTWord | GLOSTSentence | GLOSTParagraph | GLOSTText | GLOSTPunctuation | GLOSTSymbol | GLOSTWhiteSpace | GLOSTSource)[] {
|
|
223
|
+
return nodes.every(node =>
|
|
224
|
+
isGLOSTWord(node) ||
|
|
225
|
+
isGLOSTSentence(node) ||
|
|
226
|
+
isGLOSTParagraph(node) ||
|
|
227
|
+
isGLOSTText(node) ||
|
|
228
|
+
isGLOSTPunctuation(node) ||
|
|
229
|
+
isGLOSTSymbol(node) ||
|
|
230
|
+
isGLOSTWhiteSpace(node) ||
|
|
231
|
+
isGLOSTSource(node)
|
|
232
|
+
);
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
// ============================================================================
|
|
236
|
+
// Utility Type Guards
|
|
237
|
+
// ============================================================================
|
|
238
|
+
|
|
239
|
+
/**
|
|
240
|
+
* Type guard to check if a node has children
|
|
241
|
+
*/
|
|
242
|
+
export function hasChildren(node: unknown): node is { children: unknown[] } {
|
|
243
|
+
return (
|
|
244
|
+
typeof node === "object" &&
|
|
245
|
+
node !== null &&
|
|
246
|
+
"children" in node &&
|
|
247
|
+
Array.isArray(node.children)
|
|
248
|
+
);
|
|
249
|
+
}
|
|
250
|
+
|
|
251
|
+
/**
|
|
252
|
+
* Type guard to check if a node has metadata
|
|
253
|
+
*/
|
|
254
|
+
export function hasMetadata(node: unknown): node is { metadata: Record<string, unknown> } {
|
|
255
|
+
return (
|
|
256
|
+
typeof node === "object" &&
|
|
257
|
+
node !== null &&
|
|
258
|
+
"metadata" in node &&
|
|
259
|
+
typeof node.metadata === "object" &&
|
|
260
|
+
node.metadata !== null
|
|
261
|
+
);
|
|
262
|
+
}
|
|
263
|
+
|
|
264
|
+
/**
|
|
265
|
+
* Type guard to check if a node has transcription
|
|
266
|
+
*/
|
|
267
|
+
export function hasTranscription(node: unknown): node is { transcription: Record<string, unknown> } {
|
|
268
|
+
return (
|
|
269
|
+
typeof node === "object" &&
|
|
270
|
+
node !== null &&
|
|
271
|
+
"transcription" in node &&
|
|
272
|
+
typeof node.transcription === "object" &&
|
|
273
|
+
node.transcription !== null
|
|
274
|
+
);
|
|
275
|
+
}
|
|
276
|
+
|
|
277
|
+
/**
|
|
278
|
+
* Type guard to check if a node has extras
|
|
279
|
+
*/
|
|
280
|
+
export function hasExtras(node: unknown): node is { extras: Record<string, unknown> } {
|
|
281
|
+
return (
|
|
282
|
+
typeof node === "object" &&
|
|
283
|
+
node !== null &&
|
|
284
|
+
"extras" in node &&
|
|
285
|
+
typeof node.extras === "object" &&
|
|
286
|
+
node.extras !== null
|
|
287
|
+
);
|
|
288
|
+
}
|
|
289
|
+
|
|
290
|
+
// ============================================================================
|
|
291
|
+
// Validation Helpers
|
|
292
|
+
// ============================================================================
|
|
293
|
+
|
|
294
|
+
/**
|
|
295
|
+
* Check if a node has valid GLOST structure (type guard based validation)
|
|
296
|
+
* For schema-based validation, use validateGLOSTNode from validators.ts
|
|
297
|
+
*/
|
|
298
|
+
export function isGLOSTNodeWithValidChildren(node: unknown): node is GLOSTNode {
|
|
299
|
+
if (!isGLOSTNode(node)) {
|
|
300
|
+
return false;
|
|
301
|
+
}
|
|
302
|
+
|
|
303
|
+
// Additional validation for nodes with children
|
|
304
|
+
if (hasChildren(node)) {
|
|
305
|
+
return (node.children as unknown[]).every(isGLOSTNode);
|
|
306
|
+
}
|
|
307
|
+
|
|
308
|
+
return true;
|
|
309
|
+
}
|
|
310
|
+
|
|
311
|
+
/**
|
|
312
|
+
* Check if an array contains valid GLOST nodes (type guard based)
|
|
313
|
+
*/
|
|
314
|
+
export function isGLOSTNodeArrayValid(nodes: unknown[]): nodes is GLOSTNode[] {
|
|
315
|
+
return nodes.every(isGLOSTNodeWithValidChildren);
|
|
316
|
+
}
|
|
317
|
+
|
|
318
|
+
/**
|
|
319
|
+
* Check if a node is a leaf node (no children)
|
|
320
|
+
*/
|
|
321
|
+
export function isLeafNode(node: GLOSTNode): boolean {
|
|
322
|
+
return (
|
|
323
|
+
isGLOSTText(node) ||
|
|
324
|
+
isGLOSTPunctuation(node) ||
|
|
325
|
+
isGLOSTSymbol(node) ||
|
|
326
|
+
isGLOSTWhiteSpace(node) ||
|
|
327
|
+
isGLOSTSource(node)
|
|
328
|
+
);
|
|
329
|
+
}
|
|
330
|
+
|
|
331
|
+
/**
|
|
332
|
+
* Check if a node is a container node (has children)
|
|
333
|
+
*/
|
|
334
|
+
export function isContainerNode(node: GLOSTNode): boolean {
|
|
335
|
+
return (
|
|
336
|
+
isGLOSTWord(node) ||
|
|
337
|
+
isGLOSTSentence(node) ||
|
|
338
|
+
isGLOSTParagraph(node) ||
|
|
339
|
+
isGLOSTRoot(node)
|
|
340
|
+
);
|
|
341
|
+
}
|
package/src/index.ts
ADDED
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
// GLOST - Glossed Syntax Tree
|
|
2
|
+
// Extends nlcst for language learning with annotations
|
|
3
|
+
|
|
4
|
+
export * from "./types";
|
|
5
|
+
export * from "./nodes";
|
|
6
|
+
export * from "./utils";
|
|
7
|
+
export * from "./validators";
|
|
8
|
+
export * from "./guards";
|
|
9
|
+
// export * from './example';
|
|
10
|
+
export * from "./mock-data";
|
|
11
|
+
|
|
12
|
+
// Re-export utility types
|
|
13
|
+
export type { ParagraphLike } from "./utils";
|
|
14
|
+
|
|
15
|
+
// Re-export key utilities for transcription components
|
|
16
|
+
export {
|
|
17
|
+
// Tree traversal
|
|
18
|
+
getAllWords,
|
|
19
|
+
getAllSentences,
|
|
20
|
+
getAllParagraphs,
|
|
21
|
+
getAllClauses,
|
|
22
|
+
getAllPhrases,
|
|
23
|
+
getAllSyllables,
|
|
24
|
+
getAllCharacters,
|
|
25
|
+
getWordsFromDocument,
|
|
26
|
+
getFirstSentence,
|
|
27
|
+
getWordsFromSentence,
|
|
28
|
+
getWordsFromParagraph,
|
|
29
|
+
|
|
30
|
+
// Node finding
|
|
31
|
+
findNodesByType,
|
|
32
|
+
findWordsByLanguage,
|
|
33
|
+
findWordsByTranscriptionSystem,
|
|
34
|
+
|
|
35
|
+
// Type guards (most are now exported via guards.ts)
|
|
36
|
+
isGLOSTWord,
|
|
37
|
+
isGLOSTSentence,
|
|
38
|
+
isGLOSTParagraph,
|
|
39
|
+
isGLOSTRoot,
|
|
40
|
+
isGLOSTClause,
|
|
41
|
+
isGLOSTPhrase,
|
|
42
|
+
isGLOSTSyllable,
|
|
43
|
+
isGLOSTCharacter,
|
|
44
|
+
|
|
45
|
+
// Word utilities
|
|
46
|
+
getWordText,
|
|
47
|
+
getWordTranscription,
|
|
48
|
+
hasWordTranscription,
|
|
49
|
+
getWordTranslation,
|
|
50
|
+
getWordMeaning,
|
|
51
|
+
getWordPartOfSpeech,
|
|
52
|
+
getWordDifficulty,
|
|
53
|
+
|
|
54
|
+
// Sentence utilities
|
|
55
|
+
getSentenceTranslation,
|
|
56
|
+
|
|
57
|
+
// Content statistics utilities
|
|
58
|
+
getGLOSTWordCount,
|
|
59
|
+
adaptParagraphLikeToGLOST,
|
|
60
|
+
|
|
61
|
+
// BCP-47 Language utilities
|
|
62
|
+
parseLanguageTag,
|
|
63
|
+
getBaseLanguage,
|
|
64
|
+
areLanguagesCompatible,
|
|
65
|
+
findBestLanguageMatch,
|
|
66
|
+
getLanguageFallback,
|
|
67
|
+
normalizeLanguageTag,
|
|
68
|
+
isValidLanguageTag,
|
|
69
|
+
} from "./utils";
|