@arkadia/ai-data-format 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +198 -0
  3. package/dist/config.d.ts +75 -0
  4. package/dist/config.d.ts.map +1 -0
  5. package/dist/config.js +28 -0
  6. package/dist/config.js.map +1 -0
  7. package/dist/core/Decoder.d.ts +86 -0
  8. package/dist/core/Decoder.d.ts.map +1 -0
  9. package/dist/core/Decoder.js +951 -0
  10. package/dist/core/Decoder.js.map +1 -0
  11. package/dist/core/Encoder.d.ts +26 -0
  12. package/dist/core/Encoder.d.ts.map +1 -0
  13. package/dist/core/Encoder.js +368 -0
  14. package/dist/core/Encoder.js.map +1 -0
  15. package/dist/core/Parser.d.ts +6 -0
  16. package/dist/core/Parser.d.ts.map +1 -0
  17. package/dist/core/Parser.js +132 -0
  18. package/dist/core/Parser.js.map +1 -0
  19. package/dist/index.d.ts +22 -0
  20. package/dist/index.d.ts.map +1 -0
  21. package/dist/index.js +46 -0
  22. package/dist/index.js.map +1 -0
  23. package/dist/models/Meta.d.ts +48 -0
  24. package/dist/models/Meta.d.ts.map +1 -0
  25. package/dist/models/Meta.js +113 -0
  26. package/dist/models/Meta.js.map +1 -0
  27. package/dist/models/Node.d.ts +42 -0
  28. package/dist/models/Node.d.ts.map +1 -0
  29. package/dist/models/Node.js +179 -0
  30. package/dist/models/Node.js.map +1 -0
  31. package/dist/models/Schema.d.ts +55 -0
  32. package/dist/models/Schema.d.ts.map +1 -0
  33. package/dist/models/Schema.js +175 -0
  34. package/dist/models/Schema.js.map +1 -0
  35. package/package.json +32 -0
  36. package/scripts/verify-build.js +202 -0
  37. package/src/config.ts +102 -0
  38. package/src/core/Decoder.ts +1057 -0
  39. package/src/core/Encoder.ts +443 -0
  40. package/src/core/Parser.ts +150 -0
  41. package/src/index.ts +46 -0
  42. package/src/models/Meta.ts +135 -0
  43. package/src/models/Node.ts +212 -0
  44. package/src/models/Schema.ts +222 -0
  45. package/tests/00.meta.test.ts +31 -0
  46. package/tests/00.node.test.ts +54 -0
  47. package/tests/00.primitive.test.ts +108 -0
  48. package/tests/00.schema.test.ts +41 -0
  49. package/tests/01.schema.test.ts +70 -0
  50. package/tests/02.data.test.ts +89 -0
  51. package/tests/03.errors.test.ts +71 -0
  52. package/tests/04.list.test.ts +225 -0
  53. package/tests/05.record.test.ts +82 -0
  54. package/tests/06.meta.test.ts +506 -0
  55. package/tests/utils.ts +69 -0
  56. package/tsconfig.json +46 -0
  57. package/vitest.config.ts +9 -0
@@ -0,0 +1,1057 @@
1
+ import { Node } from '../models/Node';
2
+ import { Schema, SchemaKind } from '../models/Schema';
3
+ import { MetaInfo } from '../models/Meta';
4
+
5
+ // --- ANSI Colors Helper ---
6
+ class Ansi {
7
+ static RESET = "\x1b[0m";
8
+ static DIM = "\x1b[2m";
9
+ static BOLD = "\x1b[1m";
10
+ static CYAN = "\x1b[36m";
11
+ static YELLOW = "\x1b[33m";
12
+ static GREEN = "\x1b[32m";
13
+ static RED = "\x1b[31m";
14
+ static MAGENTA = "\x1b[35m";
15
+ }
16
+
17
+ const ANSI_RE = /\x1b\[[0-9;]*m/g;
18
+
19
+ // --- Error/Warning Data Structures ---
20
+
21
+ export class DecodeError {
22
+ message: string;
23
+ position: number;
24
+ context: string = "";
25
+ schema: Schema | null = null;
26
+ node: Node | null = null;
27
+
28
+ constructor(message: string, position: number, schema: Schema | null = null, node: Node | null = null) {
29
+ this.message = message;
30
+ this.position = position;
31
+ this.schema = schema;
32
+ this.node = node;
33
+ }
34
+
35
+ toString(): string {
36
+ return `[DecodeError] ${this.message} (at pos ${this.position})`;
37
+ }
38
+
39
+ }
40
+
41
+ export class DecodeWarning {
42
+ message: string;
43
+ position: number;
44
+ schema: Schema | null;
45
+ node: Node | null;
46
+
47
+ constructor(message: string, position: number, schema: Schema | null = null, node: Node | null = null) {
48
+ this.message = message;
49
+ this.position = position;
50
+ this.schema = schema;
51
+ this.node = node;
52
+ }
53
+
54
+ toString(): string {
55
+ return `[DecodeWarn] ${this.message} (at pos ${this.position})`;
56
+ }
57
+ }
58
+
59
+ export interface DecodeResult {
60
+ node: Node;
61
+ schema: Schema | null;
62
+ errors: DecodeError[];
63
+ warnings: DecodeWarning[];
64
+ }
65
+
66
+ // --- DECODER CLASS ---
67
+
68
+ export class Decoder {
69
+ private static PRIMITIVES = new Set(["string", "bool", "number", "null", "int", "float", "binary"]);
70
+ private static PRIMITIVES_MAPPING: Record<string, string> = {
71
+ "string": "string", "bool": "bool", "number": "number", "null": "null",
72
+ "int": "number", "float": "number", "binary": "binary"
73
+ };
74
+ private static MAX_ERRORS = 50;
75
+
76
+ private text: string;
77
+ private debug: boolean;
78
+
79
+ // Cursor State
80
+ private i: number = 0;
81
+ private line: number = 0;
82
+ private col: number = 0;
83
+
84
+ // Context State
85
+ private pendingMeta: MetaInfo = new MetaInfo();
86
+
87
+ // Hierarchy State
88
+ private nodeStack: Node[] = [];
89
+ private schemaStack: Schema[] = [];
90
+ private errors: DecodeError[] = [];
91
+ private warnings: DecodeWarning[] = [];
92
+ private namedSchemas: Map<string, Schema> = new Map();
93
+
94
+ constructor(text: string, schema: string = "", removeAnsiColors: boolean = false, debug: boolean = false) {
95
+ let cleanText = schema + text;
96
+ if (removeAnsiColors) {
97
+ cleanText = cleanText.replace(ANSI_RE, '');
98
+ }
99
+ this.text = cleanText;
100
+ this.debug = debug;
101
+ }
102
+
103
+ // =========================================================
104
+ // ENTRY
105
+ // =========================================================
106
+
107
+ public decode(): DecodeResult {
108
+ this._dbg("decode() start");
109
+ this.parseMeta();
110
+
111
+ let rootSchemaContext: Schema | null = null;
112
+
113
+ // 1. Schema Processing Loop
114
+ while (!this.eof()) {
115
+ const ch = this.peek();
116
+
117
+ // Inline Definition <x:int>
118
+ if (ch === '<') {
119
+ rootSchemaContext = this.parseSchemaBody();
120
+ this.parseMeta();
121
+
122
+ // Check lookahead for data start
123
+ const next = this.peek();
124
+ if (next === '(' || next === '{' || next === '[') break;
125
+ continue;
126
+ }
127
+
128
+ // Named Schema @Name
129
+ if (ch === '@') {
130
+ const schema = this.parseSchemaAtRef();
131
+ this.parseMeta();
132
+
133
+ const next = this.peek();
134
+ if (next === '@' || next === '<') continue;
135
+
136
+ rootSchemaContext = schema;
137
+ break;
138
+ }
139
+ break;
140
+ }
141
+
142
+ // 2. Push Context
143
+ if (rootSchemaContext) {
144
+ this.pushSchema(rootSchemaContext);
145
+ }
146
+
147
+ // 3. Parse Root Node
148
+ let rootNode: Node;
149
+ if (this.eof()) {
150
+ rootNode = this.createNode(null);
151
+ } else {
152
+ rootNode = this.parseNode();
153
+ }
154
+
155
+ // 4. Cleanup Context
156
+ if (rootSchemaContext) {
157
+ this.popSchema();
158
+ // Link schema if node ended up generic
159
+ if (!rootNode.schema || rootNode.schema.isAny) {
160
+ rootNode.schema = rootSchemaContext;
161
+ }
162
+ } else {
163
+ rootSchemaContext = rootNode.schema;
164
+ }
165
+
166
+ // Final prefix scan
167
+ this.parseMeta();
168
+ this.applyMeta(rootNode);
169
+
170
+ this.popNode(); // Just in case
171
+ this._dbg("decode() end");
172
+
173
+ return {
174
+ node: rootNode,
175
+ schema: rootSchemaContext,
176
+ errors: this.errors,
177
+ warnings: this.warnings
178
+ };
179
+ }
180
+
181
+ // =========================================================
182
+ // SCHEMA DEFINITION PARSING
183
+ // =========================================================
184
+
185
+ private parseSchemaAtRef(): Schema {
186
+ this.advance(1); // @
187
+ const typeName = this.parseIdent();
188
+ this.skipWhitespace();
189
+
190
+ if (this.peek() === '<') {
191
+ this._dbg(`defining type ${typeName}`);
192
+ const schema = this.parseSchemaBody(typeName);
193
+ if (schema.isAny) schema.kind = SchemaKind.RECORD;
194
+ this.namedSchemas.set(typeName, schema);
195
+ return schema;
196
+ }
197
+
198
+ this._dbg(`referencing type ${typeName}`);
199
+ if (this.namedSchemas.has(typeName)) {
200
+ return this.namedSchemas.get(typeName)!;
201
+ }
202
+
203
+ return new Schema(SchemaKind.RECORD, { typeName });
204
+ }
205
+
206
+ private parseSchemaBody(typeName: string = ""): Schema {
207
+ const typeNamePrefix = typeName ? `@${typeName}` : "";
208
+ this._dbg(`START parse_schema_body '<' ${typeNamePrefix}`);
209
+
210
+ if (!this.expect('<')) {
211
+ const s = this.createSchema(SchemaKind.ANY, typeName);
212
+ this.popSchema();
213
+ return s;
214
+ }
215
+
216
+ const schema = this.createSchema(SchemaKind.RECORD, typeName);
217
+ this.parseSchemaBodyContent(schema, '>');
218
+ this.popSchema();
219
+
220
+ this._dbg(`END parse_schema_body '>' ${typeNamePrefix}`);
221
+ return schema;
222
+ }
223
+
224
+ private parseSchemaBodyContent(schema: Schema, endChar: string) {
225
+ // Python style: iterate and parse meta into schema inside the loop
226
+ let fieldSchema: Schema | null = null;
227
+
228
+ while (!this.eof()) {
229
+ this.parseMeta(schema); // Passes schema, so blocks /.../ apply to schema
230
+
231
+ const ch = this.peek();
232
+ if (ch === endChar) {
233
+ this.advance(1);
234
+ break;
235
+ }
236
+
237
+ // LIST Schema: < [ ... ] >
238
+ if (ch === '[') {
239
+ this.advance(1);
240
+ this._dbg("LIST schema begin");
241
+ schema.kind = SchemaKind.LIST;
242
+ schema.clearFields(); // Python: schema._fields_list = []
243
+
244
+ this.applyMeta(schema); // Apply any pending meta
245
+
246
+ const elementSchema = new Schema(SchemaKind.ANY);
247
+ this.parseSchemaBodyContent(elementSchema, ']');
248
+ schema.element = elementSchema;
249
+
250
+ this.parseMeta(schema);
251
+ if (this.peek() === endChar) this.advance(1);
252
+ this.applyMeta(schema);
253
+ return;
254
+ }
255
+
256
+ if (ch === ',') {
257
+ this.applyMeta(fieldSchema || schema);
258
+ this.advance(1);
259
+ continue;
260
+ }
261
+
262
+ const name = this.parseIdent();
263
+ if (!name) {
264
+ this.addError("Expected identifier");
265
+ this.advance(1);
266
+ continue;
267
+ }
268
+
269
+ this.skipWhitespace();
270
+
271
+ // Detect Primitive List Definition [ int ]
272
+ if (Decoder.PRIMITIVES.has(name) && this.peek() !== ':') {
273
+ schema.kind = SchemaKind.PRIMITIVE;
274
+ schema.typeName = Decoder.PRIMITIVES_MAPPING[name];
275
+ continue;
276
+ }
277
+
278
+ if (this.peek() === ':') {
279
+ this.advance(1);
280
+ fieldSchema = this.parseSchemaType();
281
+ } else {
282
+ fieldSchema = new Schema(SchemaKind.PRIMITIVE, { typeName: "any" });
283
+ }
284
+
285
+ fieldSchema.name = name;
286
+ this.applyMeta(fieldSchema);
287
+
288
+ // Trailing comments handling
289
+ this.parseMeta(schema);
290
+ this.applyMeta(fieldSchema || schema);
291
+
292
+ schema.addField(fieldSchema);
293
+ }
294
+ this.applyMeta(fieldSchema || schema);
295
+ }
296
+
297
+ private parseSchemaType(): Schema {
298
+ this.parseMeta(this.currentSchema);
299
+ const ch = this.peek();
300
+
301
+ if (ch === '[') {
302
+ this.advance(1);
303
+ const lst = new Schema(SchemaKind.LIST);
304
+ this.applyMeta(lst);
305
+ lst.element = this.parseSchemaType();
306
+ this.expect(']');
307
+ return lst;
308
+ }
309
+
310
+ if (ch === '@') {
311
+ this.advance(1);
312
+ const name = this.parseIdent();
313
+ this.parseMeta(this.currentSchema);
314
+
315
+ if (this.peek() === '<') {
316
+ this._dbg(`Inline definition for @${name}`);
317
+ const s = this.parseSchemaBody(name);
318
+ if (s.isAny) s.kind = SchemaKind.RECORD;
319
+ this.namedSchemas.set(name, s);
320
+ return s;
321
+ }
322
+ if (this.namedSchemas.has(name)) return this.namedSchemas.get(name)!;
323
+ return new Schema(SchemaKind.RECORD, { typeName: name });
324
+ }
325
+
326
+ if (ch === '<') {
327
+ return this.parseSchemaBody();
328
+ }
329
+
330
+ const name = this.parseIdent();
331
+ if (Decoder.PRIMITIVES.has(name)) {
332
+ const s = new Schema(SchemaKind.PRIMITIVE, { typeName: Decoder.PRIMITIVES_MAPPING[name] });
333
+ this.applyMeta(s);
334
+ return s;
335
+ }
336
+ if (this.namedSchemas.has(name)) return this.namedSchemas.get(name)!;
337
+ if (!name) return new Schema(SchemaKind.ANY);
338
+
339
+ return new Schema(SchemaKind.RECORD, { typeName: name });
340
+ }
341
+
342
+ // =========================================================
343
+ // NODE DISPATCHER
344
+ // =========================================================
345
+
346
+ private parseNode(_parent: Node | null = null): Node {
347
+ this.parseMeta(this.currentNode);
348
+
349
+ if (this.eof()) {
350
+ this.addError("Unexpected EOF while expecting a node");
351
+ return this.createNode(null);
352
+ }
353
+
354
+ const ch = this.peek();
355
+ let node: Node;
356
+
357
+ if (ch === '@') node = this.parseNodeWithSchemaRef();
358
+ else if (ch === '<') node = this.parseNodeWithInlineSchema();
359
+ else if (ch === '[') node = this.parseList();
360
+ else if (ch === '(') node = this.parsePositionalRecord();
361
+ else if (ch === '{') node = this.parseNamedRecord();
362
+ else if (ch === '"') {
363
+ this._dbg("Dispatch: String");
364
+ node = this.parseString();
365
+ }
366
+ else if ((ch && /\d/.test(ch)) || ch === '-') {
367
+ this._dbg("Dispatch: Number");
368
+ node = this.parseNumber();
369
+ }
370
+ else if ((ch && /[a-zA-Z_]/.test(ch))) {
371
+ this._dbg("Dispatch: RawString/Ident");
372
+ node = this.parseRawString();
373
+ }
374
+ else {
375
+ this.addError(`Unexpected character '${ch}'`);
376
+ this.advance(1);
377
+ node = this.createNode(null);
378
+ }
379
+
380
+ this.applyMeta(node);
381
+ return node;
382
+ }
383
+
384
+ private parseNodeWithSchemaRef(): Node {
385
+ this._dbg("Start Node with Ref (@)");
386
+ const schema = this.parseSchemaAtRef();
387
+ this.pushSchema(schema);
388
+ const node = this.parseNode();
389
+ this.popSchema();
390
+ node.schema = schema;
391
+ return node;
392
+ }
393
+
394
+ private parseNodeWithInlineSchema(): Node {
395
+ this._dbg("Start Node with Inline (<)");
396
+ const schema = this.parseSchemaBody();
397
+ this.pushSchema(schema);
398
+ const node = this.parseNode();
399
+ this.popSchema();
400
+ node.schema = schema;
401
+ return node;
402
+ }
403
+
404
+ // =========================================================
405
+ // STRUCTURE PARSERS
406
+ // =========================================================
407
+
408
+ private parseList(): Node {
409
+ this._dbg("Start LIST [");
410
+ this.advance(1); // [
411
+
412
+ const node = this.createNode();
413
+ node.elements = [];
414
+
415
+ if (node.schema.kind !== SchemaKind.LIST) {
416
+ node.schema.kind = SchemaKind.LIST;
417
+ node.schema.typeName = "list";
418
+ node.schema.element = new Schema(SchemaKind.ANY);
419
+ }
420
+
421
+ const parentSchema = node.schema;
422
+ let childSchema = new Schema(SchemaKind.ANY);
423
+ if (parentSchema && parentSchema.isList && parentSchema.element) {
424
+ childSchema = parentSchema.element;
425
+ }
426
+
427
+ let childNode: Node | null = null;
428
+
429
+ while (true) {
430
+ this.parseMeta(node); // Passes node, so blocks /.../ apply to list
431
+ this.pushSchema(childSchema);
432
+
433
+ if (this.eof()) {
434
+ this.addError("Unexpected EOF: List not closed");
435
+ break;
436
+ }
437
+
438
+ if (this.peek() === ']') {
439
+ this.applyMeta(childNode || node);
440
+ this.advance(1);
441
+ break;
442
+ }
443
+ if (this.peek() === ',') {
444
+ this.applyMeta(childNode || node);
445
+ this.advance(1);
446
+ continue;
447
+ }
448
+
449
+ childNode = this.parseNode(node);
450
+ node.elements.push(childNode);
451
+
452
+ if (parentSchema.element && parentSchema.element.isAny && childNode.schema) {
453
+ parentSchema.element = childNode.schema;
454
+ }
455
+
456
+ this.applyMeta(childNode || node);
457
+ this.popNode();
458
+ this.popSchema();
459
+ }
460
+ this.popSchema();
461
+ this._dbg("End LIST ]");
462
+ return node;
463
+ }
464
+
465
+ private parsePositionalRecord(): Node {
466
+ this._dbg("Start RECORD (");
467
+ this.advance(1); // (
468
+
469
+ const node = this.createNode();
470
+ if (node.schema.kind !== SchemaKind.RECORD) {
471
+ node.schema.kind = SchemaKind.RECORD;
472
+ node.schema.typeName = "any";
473
+ }
474
+
475
+ let index = 0;
476
+ const predefinedFields = node.schema.fields ? [...node.schema.fields] : [];
477
+ let valNode: Node | null = null;
478
+
479
+ while (!this.eof()) {
480
+ this.parseMeta(node);
481
+
482
+ if (this.peek() === ')') {
483
+ this.applyMeta(valNode || node);
484
+ this.advance(1);
485
+ break;
486
+ }
487
+ if (this.peek() === ',') {
488
+ this.applyMeta(valNode || node);
489
+ this.advance(1);
490
+ continue;
491
+ }
492
+
493
+ let fieldSchema = new Schema(SchemaKind.ANY);
494
+ if (index < predefinedFields.length) {
495
+ fieldSchema = predefinedFields[index];
496
+ }
497
+
498
+ this.pushSchema(fieldSchema);
499
+ valNode = this.parseNode();
500
+
501
+ if (index < predefinedFields.length) {
502
+ const name = predefinedFields[index].name;
503
+ node.fields[name] = valNode;
504
+ } else {
505
+ const name = `_${index}`;
506
+ const inferred = new Schema(valNode.schema.kind, { typeName: valNode.schema.typeName || "any" });
507
+ inferred.name = name;
508
+ node.schema.addField(inferred);
509
+ node.fields[name] = valNode;
510
+ }
511
+
512
+ this.applyMeta(valNode || node);
513
+ this.popNode();
514
+ this.popSchema();
515
+ index++;
516
+ }
517
+ this._dbg("End RECORD )");
518
+ return node;
519
+ }
520
+
521
+ private parseNamedRecord(): Node {
522
+ this._dbg("Start NAMED RECORD {");
523
+ this.advance(1); // {
524
+
525
+ const node = this.createNode();
526
+ node.fields = {};
527
+
528
+ if (node.schema.kind !== SchemaKind.RECORD) {
529
+ node.schema.kind = SchemaKind.RECORD;
530
+ node.schema.typeName = "any";
531
+ }
532
+
533
+ const currentSchema = node.schema;
534
+ let valNode: Node | null = null;
535
+
536
+ while (!this.eof()) {
537
+ this.parseMeta(node);
538
+
539
+ if (this.peek() === '}') {
540
+ this.applyMeta(valNode || node);
541
+ this.advance(1);
542
+ break;
543
+ }
544
+ if (this.peek() === ',') {
545
+ this.applyMeta(valNode || node);
546
+ this.advance(1);
547
+ continue;
548
+ }
549
+
550
+ let keyName = this.parseIdent();
551
+ if (!keyName) {
552
+ if (this.peek() === '"') {
553
+ keyName = this.readQuotedString();
554
+ } else {
555
+ this.addError("Expected key in record");
556
+ this.advance(1);
557
+ continue;
558
+ }
559
+ }
560
+
561
+ this.skipWhitespace();
562
+ this.expect(':');
563
+
564
+ let fieldSchema = new Schema(SchemaKind.ANY);
565
+ if (currentSchema && currentSchema.isRecord) {
566
+ const existing = currentSchema.getField(keyName);
567
+ if (existing) fieldSchema = existing;
568
+ }
569
+
570
+ this.pushSchema(fieldSchema);
571
+ valNode = this.parseNode();
572
+
573
+ if (currentSchema.isRecord) {
574
+ const existing = currentSchema.getField(keyName);
575
+ if (existing && existing.isAny && !valNode.schema.isAny) {
576
+ valNode.schema.name = keyName;
577
+ currentSchema.replaceField(valNode.schema);
578
+ } else if (!existing) {
579
+ const inferred = new Schema(valNode.schema.kind, { typeName: valNode.schema.typeName || "any" });
580
+ inferred.name = keyName;
581
+ node.schema.addField(inferred);
582
+ }
583
+ }
584
+
585
+ node.fields[keyName] = valNode;
586
+ this.applyMeta(valNode || node);
587
+ this.popNode();
588
+ this.popSchema();
589
+ }
590
+ this._dbg("End NAMED RECORD }");
591
+ return node;
592
+ }
593
+
594
+ // =========================================================
595
+ // PREFIX & META PARSING
596
+ // =========================================================
597
+
598
+ private parseMeta(obj: Node | Schema | null = null): void {
599
+ while (!this.eof()) {
600
+ this.skipWhitespace();
601
+ const ch = this.peek();
602
+ const nextCh = this.peekNext();
603
+
604
+ if (ch === '/' && nextCh === '*') {
605
+ this.pendingMeta.comments.push(this.parseCommentBlock());
606
+ continue;
607
+ }
608
+
609
+ if (ch === '/' && nextCh !== '*') {
610
+ this.parseMetaBlock(obj);
611
+ continue;
612
+ }
613
+
614
+ if (ch === '$' || ch === '#' || ch === '!') {
615
+ this.parseModifierInline();
616
+ continue;
617
+ }
618
+ break;
619
+ }
620
+ }
621
+
622
+ private parseCommentBlock(): string {
623
+ this._dbg("START block comment");
624
+ this.advance(2);
625
+ let nesting = 1;
626
+ let content: string[] = [];
627
+
628
+ while (!this.eof() && nesting > 0) {
629
+ const ch = this.text[this.i];
630
+
631
+ if (ch === '\\') {
632
+ this.advance(1);
633
+ if (!this.eof()) content.push(this.text[this.i]);
634
+ this.advance(1);
635
+ continue;
636
+ }
637
+
638
+ if (ch === '/' && this.peekNext() === '*') {
639
+ nesting++;
640
+ this.advance(2);
641
+ content.push("/*");
642
+ continue;
643
+ }
644
+ if (ch === '*' && this.peekNext() === '/') {
645
+ nesting--;
646
+ this.advance(2);
647
+ if (nesting > 0) content.push("*/");
648
+ continue;
649
+ }
650
+
651
+ content.push(ch);
652
+ this.advance(1);
653
+ }
654
+
655
+ if (nesting > 0) this.addError("Unterminated comment");
656
+ return content.join("").trim();
657
+ }
658
+
659
+ private parseModifierInline(): void {
660
+ const ch = this.peek();
661
+ if (ch === '$') this.parseMetaAttribute(this.pendingMeta);
662
+ else if (ch === '#') this.parseMetaTag(this.pendingMeta);
663
+ else if (ch === '!') this.parseMetaFlag(this.pendingMeta);
664
+ else this.advance(1);
665
+ }
666
+
667
+ private parseMetaBlock(obj: Node | Schema | null = null): MetaInfo {
668
+ this.expect('/');
669
+ this._dbg("START meta header /.../");
670
+ const meta = new MetaInfo();
671
+
672
+ while (!this.eof()) {
673
+ this.skipWhitespace();
674
+ const ch = this.peek();
675
+ const nextCh = this.peekNext();
676
+
677
+ if (ch === '/' && nextCh === '*') {
678
+ meta.comments.push(this.parseCommentBlock());
679
+ continue;
680
+ }
681
+
682
+ if (ch === '/') {
683
+ this.advance(1);
684
+ break;
685
+ }
686
+
687
+ if (ch === '$') { this.parseMetaAttribute(meta); continue; }
688
+ if (ch === '#') { this.parseMetaTag(meta); continue; }
689
+ if (ch === '!') { this.parseMetaFlag(meta); continue; }
690
+
691
+ // Implicit Attribute (Legacy support: key=value without $)
692
+ if (/[a-zA-Z0-9_]/.test(ch || '')) {
693
+ const key = this.parseIdent();
694
+ let val: any = true;
695
+
696
+ this.skipWhitespace();
697
+ if (this.peek() === '=') {
698
+ this.advance(1);
699
+ val = this.parsePrimitiveValue();
700
+ }
701
+ this.addWarning(`Implicit attribute '${key}'. Use '$${key}' instead.`);
702
+
703
+ meta.attr[key] = val;
704
+ continue;
705
+ }
706
+
707
+ this.addError(`Unexpected token in meta block: ${ch}`);
708
+ this.advance(1);
709
+ }
710
+
711
+ if (obj) {
712
+ obj.applyMeta(meta);
713
+ } else {
714
+ this.addWarning(`There is no parent to add the meta block '${meta}'`);
715
+ this.pendingMeta.applyMeta(meta);
716
+ }
717
+
718
+ this._dbg("END meta header");
719
+ return meta;
720
+ }
721
+
722
+ private parseMetaAttribute(meta: MetaInfo): void {
723
+ this.advance(1); // $
724
+ const key = this.parseIdent();
725
+ let val: any = true;
726
+ this.skipWhitespace();
727
+ if (this.peek() === '=') {
728
+ this.advance(1);
729
+ val = this.parsePrimitiveValue();
730
+ }
731
+ meta.attr[key] = val;
732
+ this._dbg(`Meta Attr: $${key}=${val}`);
733
+ }
734
+
735
+ private parseMetaTag(meta: MetaInfo): void {
736
+ this.advance(1); // #
737
+ const tag = this.parseIdent();
738
+ meta.tags.push(tag);
739
+ this._dbg(`Meta Tag: #${tag}`);
740
+ }
741
+
742
+ private parseMetaFlag(meta: MetaInfo): void {
743
+ this.advance(1); // !
744
+ const flag = this.parseIdent();
745
+ if (flag === 'required') {
746
+ meta.required = true;
747
+ this._dbg("Meta Flag: !required");
748
+ } else {
749
+ this.addWarning(`Unknown flag: !${flag}`);
750
+ }
751
+ }
752
+
753
+ // =========================================================
754
+ // HELPERS & LOW-LEVEL PARSERS
755
+ // =========================================================
756
+
757
+ private parseIdent(): string {
758
+ this.skipWhitespace();
759
+ const start = this.i;
760
+ if (this.eof()) return "";
761
+
762
+ const ch = this.text[this.i];
763
+ if (!(/[a-zA-Z_]/.test(ch))) return "";
764
+
765
+ this.advance(1);
766
+ while (!this.eof()) {
767
+ const c = this.text[this.i];
768
+ if (/[a-zA-Z0-9_]/.test(c)) this.advance(1);
769
+ else break;
770
+ }
771
+ return this.text.substring(start, this.i);
772
+ }
773
+
774
+ private parseString(): Node {
775
+ const val = this.readQuotedString();
776
+ return this.createNode(val);
777
+ }
778
+
779
+ private parseNumber(): Node {
780
+ const val = this.readNumber();
781
+ return this.createNode(val);
782
+ }
783
+
784
+ private parseRawString(): Node {
785
+ const raw = this.parseIdent();
786
+ let val: any = raw;
787
+ if (raw === "true") val = true;
788
+ else if (raw === "false") val = false;
789
+ else if (raw === "null") val = null;
790
+ return this.createNode(val);
791
+ }
792
+
793
+ private parsePrimitiveValue(): any {
794
+ const ch = this.peek();
795
+ if (!ch) return null;
796
+ if (ch === '"') return this.readQuotedString();
797
+ if (/\d/.test(ch) || ch === '-') return this.readNumber();
798
+
799
+ const raw = this.parseIdent();
800
+ if (raw === "true") return true;
801
+ if (raw === "false") return false;
802
+ if (raw === "null") return null;
803
+ return raw;
804
+ }
805
+
806
+ private readQuotedString(): string {
807
+ this.expect('"');
808
+ let res = "";
809
+ while (!this.eof()) {
810
+ const ch = this.text[this.i];
811
+ if (ch === '"') break;
812
+
813
+ if (ch === '\\') {
814
+ this.advance(1);
815
+ if (this.eof()) break;
816
+ const esc = this.text[this.i];
817
+ if (esc === 'n') res += '\n';
818
+ else if (esc === 't') res += '\t';
819
+ else if (esc === 'r') res += '\r';
820
+ else if (esc === '"') res += '"';
821
+ else if (esc === '\\') res += '\\';
822
+ else res += esc;
823
+ this.advance(1);
824
+ } else {
825
+ res += ch;
826
+ this.advance(1);
827
+ }
828
+ }
829
+ this.expect('"');
830
+ return res;
831
+ }
832
+
833
+ private readNumber(): number {
834
+ const start = this.i;
835
+ if (this.peek() === '-') this.advance(1);
836
+ while (/\d/.test(this.peek() || '')) this.advance(1);
837
+
838
+ if (this.peek() === '.') {
839
+ this.advance(1);
840
+ while (/\d/.test(this.peek() || '')) this.advance(1);
841
+ }
842
+
843
+ if (['e', 'E'].includes(this.peek() || '')) {
844
+ this.advance(1);
845
+ if (['+', '-'].includes(this.peek() || '')) this.advance(1);
846
+ while (/\d/.test(this.peek() || '')) this.advance(1);
847
+ }
848
+
849
+ const raw = this.text.substring(start, this.i);
850
+ const num = parseFloat(raw);
851
+ if (isNaN(num)) {
852
+ this.addError(`Invalid number format: ${raw}`);
853
+ return 0;
854
+ }
855
+ return num;
856
+ }
857
+
858
+ // =========================================================
859
+ // STACK & STATE HELPERS
860
+ // =========================================================
861
+
862
+ private get currentSchema(): Schema | null {
863
+ return this.schemaStack.length > 0 ? this.schemaStack[this.schemaStack.length - 1] : null;
864
+ }
865
+
866
+ private createSchema(kind: SchemaKind, typeName: string = ""): Schema {
867
+ const s = new Schema(kind, { typeName });
868
+ this.applyMeta(s);
869
+ this.pushSchema(s);
870
+ return s;
871
+ }
872
+
873
+ private pushSchema(s: Schema): void {
874
+ this.schemaStack.push(s);
875
+ this._dbg(`PUSH SCHEMA ${s.toString().substring(0, 30)}...`);
876
+ }
877
+
878
+ private popSchema(): Schema | null {
879
+ const s = this.schemaStack.pop() || null;
880
+ if(s && s.isList && s.element) {
881
+ s.applyMeta(s.element)
882
+ s.element.clearMeta();
883
+ }
884
+ this._dbg(`POP SCHEMA ${s ? s.toString().substring(0, 30) : 'null'}...`);
885
+
886
+ return s;
887
+ }
888
+
889
+ private get currentNode(): Node | null {
890
+ return this.nodeStack.length > 0 ? this.nodeStack[this.nodeStack.length - 1] : null;
891
+ }
892
+
893
+ private pushNode(n: Node): void {
894
+ this.nodeStack.push(n);
895
+ this._dbg(`PUSH NODE ${n.toString().substring(0, 30)}...`);
896
+ }
897
+
898
+ private popNode(): Node | null {
899
+ const n = this.nodeStack.pop() || null;
900
+ this._dbg(`POP NODE ${n ? n.toString().substring(0, 30) : 'null'}...`);
901
+ return n;
902
+ }
903
+
904
+ private createNode(value: any = null): Node {
905
+ let currentS = this.currentSchema;
906
+ if (!currentS) {
907
+ currentS = new Schema(SchemaKind.ANY);
908
+ this.pushSchema(currentS);
909
+ }
910
+
911
+ let finalS = currentS;
912
+
913
+ if (value !== null) {
914
+ let inferred: Schema | null = null;
915
+ if (typeof value === 'boolean') inferred = new Schema(SchemaKind.PRIMITIVE, { typeName: "bool" });
916
+ else if (typeof value === 'number') inferred = new Schema(SchemaKind.PRIMITIVE, { typeName: "number" });
917
+ else if (typeof value === 'string') inferred = new Schema(SchemaKind.PRIMITIVE, { typeName: "string" });
918
+
919
+ if (inferred) {
920
+ let compatible = false;
921
+ if (currentS.kind === SchemaKind.ANY) {
922
+ compatible = true;
923
+ finalS = inferred;
924
+ } else if (currentS.typeName === inferred.typeName) {
925
+ compatible = true;
926
+ } else if (currentS.typeName === "number" && (inferred.typeName === "int" || inferred.typeName === "float")) {
927
+ compatible = true;
928
+ }
929
+
930
+ if (!compatible) {
931
+ finalS = inferred;
932
+ }
933
+ }
934
+ } else {
935
+ if (currentS.isRecord || currentS.isList) {
936
+ finalS = currentS;
937
+ } else {
938
+ finalS = new Schema(SchemaKind.PRIMITIVE, { typeName: "null" });
939
+ }
940
+ }
941
+
942
+ const node = new Node(finalS, { value });
943
+ this.applyMeta(node);
944
+ this.pushNode(node);
945
+ return node;
946
+ }
947
+
948
+ private applyMeta(obj: Node | Schema): void {
949
+ obj.applyMeta(this.pendingMeta);
950
+ this.pendingMeta = new MetaInfo();
951
+ }
952
+
953
+ private advance(n: number = 1): string {
954
+ let lastChar = "";
955
+ for (let k = 0; k < n; k++) {
956
+ if (this.i >= this.text.length) break;
957
+ const c = this.text[this.i];
958
+ lastChar = c;
959
+ if (c === '\n') {
960
+ this.line++;
961
+ this.col = 1;
962
+ } else {
963
+ this.col++;
964
+ }
965
+ this.i++;
966
+ }
967
+ return lastChar;
968
+ }
969
+
970
+ private skipWhitespace(): void {
971
+ while (!this.eof()) {
972
+ const ch = this.peek();
973
+ if (ch && /\s/.test(ch)) {
974
+ this.advance(1);
975
+ } else {
976
+ break;
977
+ }
978
+ }
979
+ }
980
+
981
+ private eof(): boolean { return this.i >= this.text.length; }
982
+ private peek(): string | null { return this.eof() ? null : this.text[this.i]; }
983
+ private peekNext(): string | null { return (this.i + 1 < this.text.length) ? this.text[this.i + 1] : null; }
984
+
985
+ private expect(ch: string): boolean {
986
+ if (this.peek() !== ch) {
987
+ this.addError(`Expected '${ch}', got '${this.peek()}'`);
988
+ return false;
989
+ }
990
+ this.advance(1);
991
+ return true;
992
+ }
993
+
994
+ private addError(msg: string): void {
995
+ if (this.errors.length >= Decoder.MAX_ERRORS) return;
996
+ this._dbg(`ERROR: ${msg}`);
997
+ this.errors.push(new DecodeError(msg, this.i, this.currentSchema, this.currentNode));
998
+ }
999
+
1000
+ private addWarning(msg: string): void {
1001
+ this._dbg(`WARNING: ${msg}`);
1002
+ this.warnings.push(new DecodeWarning(msg, this.i, this.currentSchema, this.currentNode));
1003
+ }
1004
+
1005
+ private _dbg(msg: string): void {
1006
+ if (!this.debug) return;
1007
+
1008
+ const locStr = `${this.line + 1}:${this.col + 1}`;
1009
+
1010
+ const depth = this.nodeStack.length;
1011
+ let treePrefix = "";
1012
+ if (depth > 0) {
1013
+ // Python: "│ " * (depth - 1)
1014
+ treePrefix = Ansi.DIM + "│ ".repeat(depth - 1) + "├─ " + Ansi.RESET;
1015
+ }
1016
+
1017
+ const start = Math.max(0, this.i - 10);
1018
+ const end = Math.min(this.text.length, this.i + 11);
1019
+
1020
+ // Raw Before: replace newlines, pad start
1021
+ const rawBefore = this.text.substring(start, this.i)
1022
+ .padStart(10)
1023
+ .replace(/\n/g, "↩︎");
1024
+
1025
+ // Raw Current: handle EOF, replace whitespace
1026
+ // Note: undefined check needed if i is out of bounds (EOF)
1027
+ const charAtI = this.text[this.i] || " ";
1028
+ const rawCurrent = charAtI
1029
+ .replace(/\n/g, "↩︎")
1030
+ .replace(/ /g, "·")
1031
+ .replace(/\t/g, "→");
1032
+
1033
+ // Raw After: replace newlines, pad end
1034
+ const rawAfter = this.text.substring(this.i + 1, end)
1035
+ .padEnd(10)
1036
+ .replace(/\n/g, "↩︎");
1037
+
1038
+ const context = `${Ansi.DIM}${rawBefore}${Ansi.RESET}${Ansi.YELLOW}${rawCurrent}${Ansi.RESET}${Ansi.DIM}${rawAfter}${Ansi.RESET}`;
1039
+
1040
+ let msgColor = Ansi.RESET;
1041
+ if (msg.includes("ERROR")) {
1042
+ msgColor = Ansi.RED;
1043
+ } else if (msg.includes("WARNING")) {
1044
+ msgColor = Ansi.YELLOW;
1045
+ } else if (msg.includes("→")) {
1046
+ msgColor = Ansi.GREEN;
1047
+ } else if (msg.includes("PUSH") || msg.includes("POP")) {
1048
+ msgColor = Ansi.MAGENTA;
1049
+ } else if (msg.includes("START") || msg.includes("END")) {
1050
+ msgColor = Ansi.DIM;
1051
+ }
1052
+
1053
+ console.log(
1054
+ `${Ansi.CYAN}|${locStr.padStart(8)}|${Ansi.RESET}${Ansi.DIM} ${Ansi.RESET}${context}${Ansi.DIM}${Ansi.CYAN}|${Ansi.RESET} ${Ansi.YELLOW}${treePrefix}${Ansi.YELLOW}@${depth}${Ansi.RESET} ${Ansi.DIM}|${Ansi.RESET} ${msgColor}${msg}${Ansi.RESET}`
1055
+ );
1056
+ }
1057
+ }