glost 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,336 @@
1
+ import { z } from 'zod';
2
+
3
+ // ============================================================================
4
+ // Zod Schemas for GLOST Validation
5
+ // ============================================================================
6
+
7
+ /**
8
+ * Linguistic level schema
9
+ */
10
+ export const LinguisticLevelSchema = z.enum([
11
+ 'character',
12
+ 'syllable',
13
+ 'word',
14
+ 'phrase',
15
+ 'sentence',
16
+ 'paragraph'
17
+ ]);
18
+
19
+ /**
20
+ * Pronunciation context schema
21
+ */
22
+ export const PronunciationContextSchema = z.enum([
23
+ 'formal',
24
+ 'informal',
25
+ 'historical',
26
+ 'regional',
27
+ 'dialectal'
28
+ ]);
29
+
30
+ /**
31
+ * Transcription system schema
32
+ */
33
+ export const TranscriptionSystemSchema = z.union([
34
+ z.literal('rtgs'),
35
+ z.literal('aua'),
36
+ z.literal('paiboon'),
37
+ z.literal('romaji'),
38
+ z.literal('furigana'),
39
+ z.literal('ipa'),
40
+ z.literal('pinyin'),
41
+ z.literal('hangul'),
42
+ z.string()
43
+ ]);
44
+
45
+ /**
46
+ * Language code schema
47
+ */
48
+ export const LanguageCodeSchema = z.union([
49
+ z.literal('th'),
50
+ z.literal('ja'),
51
+ z.literal('zh'),
52
+ z.literal('ko'),
53
+ z.literal('en'),
54
+ z.string()
55
+ ]);
56
+
57
+ /**
58
+ * Script system schema
59
+ */
60
+ export const ScriptSystemSchema = z.union([
61
+ z.literal('thai'),
62
+ z.literal('hiragana'),
63
+ z.literal('katakana'),
64
+ z.literal('kanji'),
65
+ z.literal('hanzi'),
66
+ z.literal('hangul'),
67
+ z.literal('latin'),
68
+ z.literal('mixed'),
69
+ z.string()
70
+ ]);
71
+
72
+ /**
73
+ * Pronunciation variant schema
74
+ */
75
+ export const PronunciationVariantSchema = z.object({
76
+ text: z.string(),
77
+ context: PronunciationContextSchema,
78
+ notes: z.string().optional()
79
+ });
80
+
81
+ /**
82
+ * Transcription info schema
83
+ */
84
+ export const TranscriptionInfoSchema = z.object({
85
+ text: z.string(),
86
+ system: TranscriptionSystemSchema,
87
+ variants: z.array(PronunciationVariantSchema).optional(),
88
+ tone: z.number().optional(),
89
+ syllables: z.array(z.string()).optional(),
90
+ phonetic: z.string().optional()
91
+ });
92
+
93
+ /**
94
+ * Transliteration data schema
95
+ */
96
+ export const TransliterationDataSchema = z.record(
97
+ z.string(),
98
+ TranscriptionInfoSchema
99
+ );
100
+
101
+ /**
102
+ * Linguistic metadata schema
103
+ */
104
+ export const LinguisticMetadataSchema = z.object({
105
+ /** @deprecated Use extras.translations instead */
106
+ meaning: z.string().optional(),
107
+ partOfSpeech: z.string(),
108
+ usage: z.string().optional(),
109
+ etymology: z.string().optional(),
110
+ examples: z.array(z.string()).optional(),
111
+ frequency: z.enum(['high', 'medium', 'low']).optional(),
112
+ formality: z.enum(['formal', 'neutral', 'informal']).optional(),
113
+ register: z.string().optional()
114
+ });
115
+
116
+ /**
117
+ * Base GLOST node schema
118
+ */
119
+ export const GLOSTNodeBaseSchema = z.object({
120
+ type: z.string(),
121
+ lang: LanguageCodeSchema.optional(),
122
+ script: ScriptSystemSchema.optional(),
123
+ level: LinguisticLevelSchema.optional(),
124
+ position: z.any().optional()
125
+ });
126
+
127
+ /**
128
+ * GLOST word node schema
129
+ */
130
+ export const GLOSTWordNodeSchema = GLOSTNodeBaseSchema.extend({
131
+ type: z.literal('GLOSTWordNode'),
132
+ value: z.string(),
133
+ transcription: TransliterationDataSchema,
134
+ metadata: LinguisticMetadataSchema,
135
+ level: LinguisticLevelSchema,
136
+ children: z.array(z.any()).default([])
137
+ });
138
+
139
+ /**
140
+ * GLOST sentence node schema
141
+ */
142
+ export const GLOSTSentenceNodeSchema = GLOSTNodeBaseSchema.extend({
143
+ type: z.literal('GLOSTSentenceNode'),
144
+ originalText: z.string(),
145
+ lang: LanguageCodeSchema,
146
+ script: ScriptSystemSchema,
147
+ transcription: TransliterationDataSchema.optional(),
148
+ children: z.array(z.any()).default([])
149
+ });
150
+
151
+ /**
152
+ * GLOST paragraph node schema
153
+ */
154
+ export const GLOSTParagraphNodeSchema = GLOSTNodeBaseSchema.extend({
155
+ type: z.literal('GLOSTParagraphNode'),
156
+ lang: LanguageCodeSchema,
157
+ script: ScriptSystemSchema,
158
+ children: z.array(z.any()).default([])
159
+ });
160
+
161
+ /**
162
+ * GLOST root node schema
163
+ */
164
+ export const GLOSTRootNodeSchema = GLOSTNodeBaseSchema.extend({
165
+ type: z.literal('GLOSTRootNode'),
166
+ lang: LanguageCodeSchema,
167
+ script: ScriptSystemSchema,
168
+ metadata: z.object({
169
+ title: z.string().optional(),
170
+ author: z.string().optional(),
171
+ date: z.string().optional(),
172
+ description: z.string().optional()
173
+ }).optional(),
174
+ children: z.array(z.any()).default([])
175
+ });
176
+
177
+ /**
178
+ * Union schema for all GLOST node types
179
+ */
180
+ export const GLOSTNodeSchema = z.union([
181
+ GLOSTWordNodeSchema,
182
+ GLOSTSentenceNodeSchema,
183
+ GLOSTParagraphNodeSchema,
184
+ GLOSTRootNodeSchema
185
+ ]);
186
+
187
+ // ============================================================================
188
+ // Validation Functions
189
+ // ============================================================================
190
+
191
+ /**
192
+ * Validate an GLOST word node
193
+ */
194
+ export function validateGLOSTWordNode(data: unknown): data is z.infer<typeof GLOSTWordNodeSchema> {
195
+ const result = GLOSTWordNodeSchema.safeParse(data);
196
+ return result.success;
197
+ }
198
+
199
+ /**
200
+ * Validate an GLOST sentence node
201
+ */
202
+ export function validateGLOSTSentenceNode(data: unknown): data is z.infer<typeof GLOSTSentenceNodeSchema> {
203
+ const result = GLOSTSentenceNodeSchema.safeParse(data);
204
+ return result.success;
205
+ }
206
+
207
+ /**
208
+ * Validate an GLOST paragraph node
209
+ */
210
+ export function validateGLOSTParagraphNode(data: unknown): data is z.infer<typeof GLOSTParagraphNodeSchema> {
211
+ const result = GLOSTParagraphNodeSchema.safeParse(data);
212
+ return result.success;
213
+ }
214
+
215
+ /**
216
+ * Validate an GLOST root node
217
+ */
218
+ export function validateGLOSTRootNode(data: unknown): data is z.infer<typeof GLOSTRootNodeSchema> {
219
+ const result = GLOSTRootNodeSchema.safeParse(data);
220
+ return result.success;
221
+ }
222
+
223
+ /**
224
+ * Validate any GLOST node
225
+ */
226
+ export function validateGLOSTNode(data: unknown): data is z.infer<typeof GLOSTNodeSchema> {
227
+ const result = GLOSTNodeSchema.safeParse(data);
228
+ return result.success;
229
+ }
230
+
231
+ /**
232
+ * Parse and validate GLOST data with error details
233
+ */
234
+ export function parseGLOSTNode(data: unknown) {
235
+ return GLOSTNodeSchema.safeParse(data);
236
+ }
237
+
238
+ /**
239
+ * Parse and validate GLOST word node with error details
240
+ */
241
+ export function parseGLOSTWordNode(data: unknown) {
242
+ return GLOSTWordNodeSchema.safeParse(data);
243
+ }
244
+
245
+ /**
246
+ * Parse and validate GLOST sentence node with error details
247
+ */
248
+ export function parseGLOSTSentenceNode(data: unknown) {
249
+ return GLOSTSentenceNodeSchema.safeParse(data);
250
+ }
251
+
252
+ /**
253
+ * Parse and validate GLOST paragraph node with error details
254
+ */
255
+ export function parseGLOSTParagraphNode(data: unknown) {
256
+ return GLOSTParagraphNodeSchema.safeParse(data);
257
+ }
258
+
259
+ /**
260
+ * Parse and validate GLOST root node with error details
261
+ */
262
+ export function parseGLOSTRootNode(data: unknown) {
263
+ return GLOSTRootNodeSchema.safeParse(data);
264
+ }
265
+
266
+ /**
267
+ * Validate an entire GLOST tree/document
268
+ */
269
+ export function validateGLOSTTree(data: unknown): string[] {
270
+ const errors: string[] = [];
271
+
272
+ // Validate root node
273
+ const rootResult = GLOSTRootNodeSchema.safeParse(data);
274
+ if (!rootResult.success) {
275
+ errors.push(`Root validation failed: ${rootResult.error.message}`);
276
+ return errors;
277
+ }
278
+
279
+ const root = rootResult.data;
280
+
281
+ // Validate all paragraphs
282
+ for (let i = 0; i < root.children.length; i++) {
283
+ const child = root.children[i];
284
+ if (child.type === 'ParagraphNode') {
285
+ const paragraphResult = GLOSTParagraphNodeSchema.safeParse(child);
286
+ if (!paragraphResult.success) {
287
+ errors.push(`Paragraph ${i} validation failed: ${paragraphResult.error.message}`);
288
+ } else {
289
+ // Validate all sentences in paragraph
290
+ for (let j = 0; j < child.children.length; j++) {
291
+ const sentence = child.children[j];
292
+ if (sentence.type === 'SentenceNode') {
293
+ const sentenceResult = GLOSTSentenceNodeSchema.safeParse(sentence);
294
+ if (!sentenceResult.success) {
295
+ errors.push(`Sentence ${i}.${j} validation failed: ${sentenceResult.error.message}`);
296
+ } else {
297
+ // Validate all words in sentence
298
+ for (let k = 0; k < sentence.children.length; k++) {
299
+ const word = sentence.children[k];
300
+ if (word.type === 'WordNode') {
301
+ const wordResult = GLOSTWordNodeSchema.safeParse(word);
302
+ if (!wordResult.success) {
303
+ errors.push(`Word ${i}.${j}.${k} validation failed: ${wordResult.error.message}`);
304
+ }
305
+ }
306
+ }
307
+ }
308
+ }
309
+ }
310
+ }
311
+ }
312
+ }
313
+
314
+ return errors;
315
+ }
316
+
317
+ // ============================================================================
318
+ // Schema Export
319
+ // ============================================================================
320
+
321
+ export const schemas = {
322
+ LinguisticLevel: LinguisticLevelSchema,
323
+ PronunciationContext: PronunciationContextSchema,
324
+ TranscriptionSystem: TranscriptionSystemSchema,
325
+ LanguageCode: LanguageCodeSchema,
326
+ ScriptSystem: ScriptSystemSchema,
327
+ PronunciationVariant: PronunciationVariantSchema,
328
+ TranscriptionInfo: TranscriptionInfoSchema,
329
+ TransliterationData: TransliterationDataSchema,
330
+ LinguisticMetadata: LinguisticMetadataSchema,
331
+ GLOSTNode: GLOSTNodeSchema,
332
+ GLOSTWordNode: GLOSTWordNodeSchema,
333
+ GLOSTSentenceNode: GLOSTSentenceNodeSchema,
334
+ GLOSTParagraphNode: GLOSTParagraphNodeSchema,
335
+ GLOSTRootNode: GLOSTRootNodeSchema
336
+ };
package/tsconfig.json ADDED
@@ -0,0 +1,9 @@
1
+ {
2
+ "extends": "../../tsconfig.json",
3
+ "compilerOptions": {
4
+ "rootDir": "./src",
5
+ "outDir": "./dist"
6
+ },
7
+ "include": ["src/**/*"],
8
+ "exclude": ["node_modules", "dist", "**/*.test.ts", "**/*.spec.ts"]
9
+ }