xcdn 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/parser.js ADDED
@@ -0,0 +1,427 @@
1
+ /**
2
+ * xCDN Parser Module
3
+ * Recursive descent parser for the xCDN format
4
+ */
5
+
6
+ import { Lexer, TokenType, Token } from './lexer.js';
7
+ import { XCDNError, ErrorKind } from './error.js';
8
+ import {
9
+ Document, Directive, Node, Tag, Annotation,
10
+ Null, Bool, Int, Float, DecimalValue, XString, Bytes,
11
+ DateTime, Duration, Uuid, XArray, XObject
12
+ } from './ast.js';
13
+
14
+ /**
15
+ * Decode Base64 (standard or URL-safe)
16
+ * @param {string} str
17
+ * @returns {Uint8Array}
18
+ */
19
+ function decodeBase64(str) {
20
+ try {
21
+ // Try standard base64
22
+ const binary = atob(str);
23
+ const bytes = new Uint8Array(binary.length);
24
+ for (let i = 0; i < binary.length; i++) {
25
+ bytes[i] = binary.charCodeAt(i);
26
+ }
27
+ return bytes;
28
+ } catch {
29
+ // Try URL-safe base64
30
+ try {
31
+ const urlSafe = str.replace(/-/g, '+').replace(/_/g, '/');
32
+ // Add padding if needed
33
+ const padded = urlSafe + '='.repeat((4 - urlSafe.length % 4) % 4);
34
+ const binary = atob(padded);
35
+ const bytes = new Uint8Array(binary.length);
36
+ for (let i = 0; i < binary.length; i++) {
37
+ bytes[i] = binary.charCodeAt(i);
38
+ }
39
+ return bytes;
40
+ } catch {
41
+ throw new Error(`Invalid base64: ${str}`);
42
+ }
43
+ }
44
+ }
45
+
46
+ /**
47
+ * Validates and parses UUID
48
+ * @param {string} str
49
+ * @returns {string}
50
+ */
51
+ function parseUuid(str) {
52
+ const uuidRegex = /^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}$/;
53
+ if (!uuidRegex.test(str)) {
54
+ throw new Error(`Invalid UUID: ${str}`);
55
+ }
56
+ return str.toLowerCase();
57
+ }
58
+
59
+ /**
60
+ * Parses RFC3339 datetime
61
+ * @param {string} str
62
+ * @returns {Date}
63
+ */
64
+ function parseDateTime(str) {
65
+ // Replace Z with +00:00 for compatibility
66
+ const normalized = str.replace('Z', '+00:00');
67
+ const date = new Date(normalized);
68
+ if (isNaN(date.getTime())) {
69
+ throw new Error(`Invalid datetime: ${str}`);
70
+ }
71
+ return date;
72
+ }
73
+
74
+ /**
75
+ * xCDN Parser
76
+ */
77
+ export class Parser {
78
+ /**
79
+ * @param {string} source - Source code
80
+ */
81
+ constructor(source) {
82
+ this.lexer = new Lexer(source);
83
+ this.current = this.lexer.nextToken();
84
+ }
85
+
86
+ /**
87
+ * Returns the current token without consuming it
88
+ * @returns {Token}
89
+ */
90
+ peek() {
91
+ return this.current;
92
+ }
93
+
94
+ /**
95
+ * Consumes and returns the current token
96
+ * @returns {Token}
97
+ */
98
+ bump() {
99
+ const token = this.current;
100
+ this.current = this.lexer.nextToken();
101
+ return token;
102
+ }
103
+
104
+ /**
105
+ * Verifies and consumes a specific token
106
+ * @param {string} kind - Expected type
107
+ * @returns {Token}
108
+ */
109
+ expect(kind) {
110
+ const token = this.peek();
111
+ if (token.kind !== kind) {
112
+ throw new XCDNError(
113
+ ErrorKind.Expected,
114
+ token.span,
115
+ `Expected ${kind}, found ${token.kind}`
116
+ );
117
+ }
118
+ return this.bump();
119
+ }
120
+
121
+ /**
122
+ * Parses an identifier (IDENT or STRING)
123
+ * @returns {string}
124
+ */
125
+ parseIdentString() {
126
+ const token = this.peek();
127
+ if (token.kind === TokenType.IDENT) {
128
+ this.bump();
129
+ return token.value;
130
+ } else if (token.kind === TokenType.STRING) {
131
+ this.bump();
132
+ return token.value;
133
+ } else {
134
+ throw new XCDNError(
135
+ ErrorKind.Expected,
136
+ token.span,
137
+ `Expected identifier or string, found ${token.kind}`
138
+ );
139
+ }
140
+ }
141
+
142
+ /**
143
+ * Parses a key (for objects)
144
+ * @returns {string}
145
+ */
146
+ parseKey() {
147
+ return this.parseIdentString();
148
+ }
149
+
150
+ /**
151
+ * Parses the complete document
152
+ * @returns {Document}
153
+ */
154
+ parseDocument() {
155
+ const prolog = [];
156
+ const values = [];
157
+
158
+ // Parse prolog directives ($name: value)
159
+ while (this.peek().kind === TokenType.DOLLAR) {
160
+ this.bump(); // $
161
+ const name = this.parseIdentString();
162
+ this.expect(TokenType.COLON);
163
+ const value = this.parseNode();
164
+ prolog.push(new Directive(name, value));
165
+ if (this.peek().kind === TokenType.COMMA) {
166
+ this.bump();
167
+ }
168
+ }
169
+
170
+ // Detect implicit object vs stream
171
+ if (this.peek().kind !== TokenType.EOF) {
172
+ const isImplicitObject = this.isImplicitObject();
173
+
174
+ if (isImplicitObject) {
175
+ // Parse implicit object
176
+ const objMap = new Map();
177
+ while (this.peek().kind !== TokenType.EOF) {
178
+ const key = this.parseKey();
179
+ this.expect(TokenType.COLON);
180
+ const node = this.parseNode();
181
+ objMap.set(key, node);
182
+ if (this.peek().kind === TokenType.COMMA) {
183
+ this.bump();
184
+ }
185
+ }
186
+ values.push(new Node([], [], new XObject(objMap)));
187
+ } else {
188
+ // Parse stream of values
189
+ while (this.peek().kind !== TokenType.EOF) {
190
+ const node = this.parseNode();
191
+ values.push(node);
192
+ }
193
+ }
194
+ }
195
+
196
+ return new Document(prolog, values);
197
+ }
198
+
199
+ /**
200
+ * Determines if the top-level is an implicit object
201
+ * @returns {boolean}
202
+ */
203
+ isImplicitObject() {
204
+ // If the next token is IDENT or STRING, check if it's followed by COLON
205
+ const token = this.peek();
206
+ if (token.kind === TokenType.IDENT || token.kind === TokenType.STRING) {
207
+ // Save state
208
+ const savedPos = this.lexer.pos;
209
+ const savedLine = this.lexer.line;
210
+ const savedColumn = this.lexer.column;
211
+ const savedCurrent = this.current;
212
+
213
+ // Consume and check
214
+ this.bump();
215
+ const next = this.peek();
216
+ const isObject = next.kind === TokenType.COLON;
217
+
218
+ // Restore state
219
+ this.lexer.pos = savedPos;
220
+ this.lexer.line = savedLine;
221
+ this.lexer.column = savedColumn;
222
+ this.current = savedCurrent;
223
+
224
+ return isObject;
225
+ }
226
+ return false;
227
+ }
228
+
229
+ /**
230
+ * Parses a node (with decorations)
231
+ * @returns {Node}
232
+ */
233
+ parseNode() {
234
+ const tags = [];
235
+ const annotations = [];
236
+
237
+ // Parse decorations
238
+ while (true) {
239
+ if (this.peek().kind === TokenType.AT) {
240
+ this.bump(); // @
241
+ const name = this.parseIdentString();
242
+ const args = [];
243
+ if (this.peek().kind === TokenType.LPAREN) {
244
+ this.bump(); // (
245
+ if (this.peek().kind !== TokenType.RPAREN) {
246
+ while (true) {
247
+ const val = this.parseValue();
248
+ args.push(val);
249
+ if (this.peek().kind === TokenType.COMMA) {
250
+ this.bump();
251
+ } else if (this.peek().kind === TokenType.RPAREN) {
252
+ break;
253
+ } else {
254
+ throw new XCDNError(
255
+ ErrorKind.Expected,
256
+ this.peek().span,
257
+ `Expected , or ), found ${this.peek().kind}`
258
+ );
259
+ }
260
+ }
261
+ }
262
+ this.expect(TokenType.RPAREN);
263
+ }
264
+ annotations.push(new Annotation(name, args));
265
+ } else if (this.peek().kind === TokenType.HASH) {
266
+ this.bump(); // #
267
+ const name = this.parseIdentString();
268
+ tags.push(new Tag(name));
269
+ } else {
270
+ break;
271
+ }
272
+ }
273
+
274
+ const value = this.parseValue();
275
+ return new Node(tags, annotations, value);
276
+ }
277
+
278
+ /**
279
+ * Parses a value
280
+ * @returns {*}
281
+ */
282
+ parseValue() {
283
+ const token = this.peek();
284
+
285
+ switch (token.kind) {
286
+ case TokenType.LBRACE:
287
+ return this.parseObject();
288
+
289
+ case TokenType.LBRACKET:
290
+ return this.parseArray();
291
+
292
+ case TokenType.STRING:
293
+ case TokenType.TRIPLE_STRING:
294
+ this.bump();
295
+ return new XString(token.value);
296
+
297
+ case TokenType.TRUE:
298
+ this.bump();
299
+ return new Bool(true);
300
+
301
+ case TokenType.FALSE:
302
+ this.bump();
303
+ return new Bool(false);
304
+
305
+ case TokenType.NULL:
306
+ this.bump();
307
+ return new Null();
308
+
309
+ case TokenType.INT:
310
+ this.bump();
311
+ return new Int(token.value);
312
+
313
+ case TokenType.FLOAT:
314
+ this.bump();
315
+ return new Float(token.value);
316
+
317
+ case TokenType.D_QUOTED:
318
+ this.bump();
319
+ return new DecimalValue(token.value);
320
+
321
+ case TokenType.B_QUOTED:
322
+ this.bump();
323
+ try {
324
+ const bytes = decodeBase64(token.value);
325
+ return new Bytes(bytes);
326
+ } catch (e) {
327
+ throw new XCDNError(ErrorKind.InvalidBase64, token.span, token.value);
328
+ }
329
+
330
+ case TokenType.U_QUOTED:
331
+ this.bump();
332
+ try {
333
+ const uuid = parseUuid(token.value);
334
+ return new Uuid(uuid);
335
+ } catch (e) {
336
+ throw new XCDNError(ErrorKind.InvalidUuid, token.span, token.value);
337
+ }
338
+
339
+ case TokenType.T_QUOTED:
340
+ this.bump();
341
+ try {
342
+ const dt = parseDateTime(token.value);
343
+ return new DateTime(dt);
344
+ } catch (e) {
345
+ throw new XCDNError(ErrorKind.InvalidDateTime, token.span, token.value);
346
+ }
347
+
348
+ case TokenType.R_QUOTED:
349
+ this.bump();
350
+ return new Duration(token.value);
351
+
352
+ case TokenType.EOF:
353
+ throw new XCDNError(ErrorKind.Eof, token.span, 'Unexpected end of input');
354
+
355
+ default:
356
+ throw new XCDNError(
357
+ ErrorKind.InvalidToken,
358
+ token.span,
359
+ `Unexpected token: ${token.kind}`
360
+ );
361
+ }
362
+ }
363
+
364
+ /**
365
+ * Parses an object {key: value, ...}
366
+ * @returns {XObject}
367
+ */
368
+ parseObject() {
369
+ this.expect(TokenType.LBRACE);
370
+ const objMap = new Map();
371
+
372
+ while (this.peek().kind !== TokenType.RBRACE) {
373
+ const key = this.parseKey();
374
+ this.expect(TokenType.COLON);
375
+ const node = this.parseNode();
376
+ objMap.set(key, node);
377
+
378
+ if (this.peek().kind === TokenType.COMMA) {
379
+ this.bump();
380
+ }
381
+ }
382
+
383
+ this.expect(TokenType.RBRACE);
384
+ return new XObject(objMap);
385
+ }
386
+
387
+ /**
388
+ * Parses an array [item, ...]
389
+ * @returns {XArray}
390
+ */
391
+ parseArray() {
392
+ this.expect(TokenType.LBRACKET);
393
+ const items = [];
394
+
395
+ while (this.peek().kind !== TokenType.RBRACKET) {
396
+ const node = this.parseNode();
397
+ items.push(node);
398
+
399
+ if (this.peek().kind === TokenType.COMMA) {
400
+ this.bump();
401
+ }
402
+ }
403
+
404
+ this.expect(TokenType.RBRACKET);
405
+ return new XArray(items);
406
+ }
407
+ }
408
+
409
+ /**
410
+ * Parses an xCDN string
411
+ * @param {string} source - Source code
412
+ * @returns {Document}
413
+ */
414
+ export function parseStr(source) {
415
+ const parser = new Parser(source);
416
+ return parser.parseDocument();
417
+ }
418
+
419
+ /**
420
+ * Parses from a reader (Python compatibility)
421
+ * @param {{read: () => string}} reader
422
+ * @returns {Document}
423
+ */
424
+ export function parseReader(reader) {
425
+ const source = reader.read();
426
+ return parseStr(source);
427
+ }