sommark 4.2.0 → 4.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1039 @@
1
+ /**
2
+ * Token Types in SomMark.
3
+ * These represent the basic lexical atoms identified by the lexer.
4
+ *
5
+ * @constant {Object}
6
+ * @property {string} OPEN_BRACKET - '[' char.
7
+ * @property {string} CLOSE_BRACKET - ']' char.
8
+ * @property {string} END_KEYWORD - 'end' value.
9
+ * @property {string} IDENTIFIER - Block or inline name (e.g. 'Person', 'import', '$use-module').
10
+ * @property {string} EQUAL - '=' char.
11
+ * @property {string} VALUE - Data values. Encapsulates Quoted Strings ("...") and Prefix Layers (js{}, p{}).
12
+ * @property {string} TEXT - Plain unformatted text content.
13
+ * @property {string} THIN_ARROW - '->' sequence.
14
+ * @property {string} OPEN_PAREN - '(' char.
15
+ * @property {string} CLOSE_PAREN - ')' char.
16
+ * @property {string} OPEN_AT - '@_' sequence (At-Block start).
17
+ * @property {string} CLOSE_AT - '_@' sequence (At-Header end).
18
+ * @property {string} COLON - ':' char.
19
+ * @property {string} COMMA - ',' char.
20
+ * @property {string} SEMICOLON - ';' char (At-Block separator).
21
+ * @property {string} COMMENT - '#' comments.
22
+ * @property {string} COMMENT_BLOCK - '###' comments.
23
+ * @property {string} ESCAPE - '\' char. Used for literalizing structural chars like '\"' or '\['.
24
+ * @property {string} QUOTE - '"' delimiter.
25
+ * @property {string} EXCLAMATION_MARK - '!' char.
26
+ * @property {string} IMPORT - 'import' keyword.
27
+ * @property {string} USE_MODULE - '$use-module' keyword.
28
+ * @property {string} PREFIX_JS - 'js{}' prefix layer.
29
+ * @property {string} PREFIX_P - 'p{}' placeholder layer.
30
+ * @property {string} PREFIX_V - 'v{}' local variable layer.
31
+ * @property {string} EOF - End of File indicator.
32
+ */
33
+ const TOKEN_TYPES = {
34
+ OPEN_BRACKET: "OPEN_BRACKET",
35
+ CLOSE_BRACKET: "CLOSE_BRACKET",
36
+ END_KEYWORD: "END_KEYWORD",
37
+ IMPORT: "IMPORT",
38
+ USE_MODULE: "USE_MODULE",
39
+ IDENTIFIER: "IDENTIFIER",
40
+ EQUAL: "EQUAL",
41
+ VALUE: "VALUE",
42
+ QUOTE: "QUOTE",
43
+ PREFIX_JS: "PREFIX_JS",
44
+ PREFIX_P: "PREFIX_P",
45
+ PREFIX_V: "PREFIX_V",
46
+ TEXT: "TEXT",
47
+ THIN_ARROW: "THIN_ARROW",
48
+ OPEN_PAREN: "OPEN_PAREN",
49
+ CLOSE_PAREN: "CLOSE_PAREN",
50
+ OPEN_AT: "OPEN_AT",
51
+ CLOSE_AT: "CLOSE_AT",
52
+ COLON: "COLON",
53
+ COMMA: "COMMA",
54
+ SEMICOLON: "SEMICOLON",
55
+ COMMENT: "COMMENT",
56
+ COMMENT_BLOCK: "COMMENT_BLOCK",
57
+ ESCAPE: "ESCAPE",
58
+ EXCLAMATION_MARK: "EXCLAMATION_MARK",
59
+ SLOT_KEYWORD: "SLOT_KEYWORD",
60
+ KEY: "KEY",
61
+ WHITESPACE: "WHITESPACE",
62
+ STATIC_KEYWORD: "STATIC_KEYWORD",
63
+ RUNTIME_KEYWORD: "RUNTIME_KEYWORD",
64
+ LOGIC: "LOGIC",
65
+ FOR_EACH: "FOR_EACH",
66
+ EOF: "EOF"
67
+ };
68
+
69
+ /**
70
+ * These labels identify different parts of the code (like blocks or text)
71
+ * so the system knows how to handle them.
72
+ */
73
+ const BLOCK = "Block",
74
+ TEXT = "Text",
75
+ INLINE = "Inline",
76
+ ATBLOCK = "AtBlock",
77
+ COMMENT = "Comment",
78
+ COMMENT_BLOCK = "CommentBlock",
79
+ IMPORT = "Import",
80
+ USE_MODULE = "$use-module",
81
+ SLOT = "Slot",
82
+ STATIC_LOGIC = "StaticLogic",
83
+ RUNTIME_LOGIC = "RuntimeLogic",
84
+ FOR_EACH = "ForEach";
85
+
86
+ /**
87
+ * Names for symbols used to separate parts of the code (like commas and colons).
88
+ */
89
+ const SEMICOLON = "Semicolon",
90
+ BLOCKCOMMA = "Block-comma",
91
+ ATBLOCKCOMMA = "Atblock-comma",
92
+ INLINECOMMA = "Inline-comma",
93
+ BLOCKCOLON = "Block-colon",
94
+ ATBLOCKCOLON = "Atblock-colon",
95
+ INLINECOLON = "Inline-colon";
96
+
97
+ /**
98
+ * These names are used in error messages to tell you exactly which part
99
+ * of your code has a mistake.
100
+ */
101
+ const block_id = "Block Identifier",
102
+ block_value = "Block Value",
103
+ block_key = "Block Key",
104
+ block_end = "Block end",
105
+ inline_id = "Inline Identifier",
106
+ inline_text = "Inline Text",
107
+ at_id = "At Identifier",
108
+ at_value = "At Value",
109
+ atblock_key = "AtBlock Key",
110
+ at_end = "Atblock End",
111
+ /** Reserved keyword for closing blocks */
112
+ end_keyword = "end",
113
+ slot_keyword = "slot",
114
+ for_each_keyword = "for-each";
115
+
116
+ var labels = /*#__PURE__*/Object.freeze({
117
+ __proto__: null,
118
+ ATBLOCK: ATBLOCK,
119
+ ATBLOCKCOLON: ATBLOCKCOLON,
120
+ ATBLOCKCOMMA: ATBLOCKCOMMA,
121
+ BLOCK: BLOCK,
122
+ BLOCKCOLON: BLOCKCOLON,
123
+ BLOCKCOMMA: BLOCKCOMMA,
124
+ COMMENT: COMMENT,
125
+ COMMENT_BLOCK: COMMENT_BLOCK,
126
+ FOR_EACH: FOR_EACH,
127
+ IMPORT: IMPORT,
128
+ INLINE: INLINE,
129
+ INLINECOLON: INLINECOLON,
130
+ INLINECOMMA: INLINECOMMA,
131
+ RUNTIME_LOGIC: RUNTIME_LOGIC,
132
+ SEMICOLON: SEMICOLON,
133
+ SLOT: SLOT,
134
+ STATIC_LOGIC: STATIC_LOGIC,
135
+ TEXT: TEXT,
136
+ USE_MODULE: USE_MODULE,
137
+ at_end: at_end,
138
+ at_id: at_id,
139
+ at_value: at_value,
140
+ atblock_key: atblock_key,
141
+ block_end: block_end,
142
+ block_id: block_id,
143
+ block_key: block_key,
144
+ block_value: block_value,
145
+ end_keyword: end_keyword,
146
+ for_each_keyword: for_each_keyword,
147
+ inline_id: inline_id,
148
+ inline_text: inline_text,
149
+ slot_keyword: slot_keyword
150
+ });
151
+
152
+ /**
153
+ * Wraps your text in a color if colors are turned on.
154
+ *
155
+ * @param {string} color - The color to use (red, green, yellow, blue, magenta, or cyan).
156
+ * @param {string} text - The text you want to color.
157
+ * @returns {string} - The colored text, or plain text if colors are off.
158
+ * @throws {Error} - Fails if you forget to provide the text.
159
+ */
160
+ function colorize(color, text) {
161
+ if (!text) throw new Error("argument 'text' is not defined.");
162
+ return text;
163
+ }
164
+
165
+ /**
166
+ * SomMark Errors
167
+ * Handles formatting and throwing errors with beautiful CLI coloring and pointers.
168
+ */
169
+
170
+ // ========================================================================== //
171
+ // Message Formatting //
172
+ // ========================================================================== //
173
+
174
+ /**
175
+ * Processes a message by applying colors and formatting.
176
+ * Supports:
177
+ * - {line} : Adds a horizontal line
178
+ * - {N} : Adds a new line
179
+ * - <$color: Text$> : Adds color (red, yellow, green, blue, magenta, cyan)
180
+ *
181
+ * @param {string|string[]} text - The message or list of message parts to format.
182
+ * @returns {string} - The final formatted and colored string.
183
+ */
184
+ function formatMessage(text) {
185
+ const horizontal_rule = "\n----------------------------------------------------------------------------------------------\n";
186
+ const pattern = /<\$([^:]+):([\s\S]*?)\$>/g;
187
+
188
+ if (Array.isArray(text)) {
189
+ text = text.join("");
190
+ }
191
+
192
+ text = text.replace(pattern, (match, color, content) => {
193
+ return colorize(color, content.trim());
194
+ });
195
+ text = text.replaceAll("{line}", horizontal_rule);
196
+ text = text.replaceAll("{N}", "\n");
197
+
198
+ text = text
199
+ .split("\n")
200
+ .filter(value => value !== "")
201
+ .join("\n")
202
+ .trim();
203
+
204
+ return text;
205
+ }
206
+
207
+ /**
208
+ * Creates a detailed error message showing where the error happened in the code.
209
+ * It adds a line number, a snippet of the code, and a pointer (^) to the exact spot.
210
+ *
211
+ * @param {string} src - The original code being parsed.
212
+ * @param {Object} range - The location of the error (line and character).
213
+ * @param {string|null} filename - The name of the file (optional).
214
+ * @param {string|string[]} message - The error message to show.
215
+ * @param {string} typeName - The type of error (e.g., "Lexer" or "Parser").
216
+ * @returns {string[]} - A list of message parts that make up the final error report.
217
+ */
218
+ function formatErrorWithContext(src, range, filename, message, typeName) {
219
+ if (!src || !range || !range.start) return message;
220
+
221
+ const lines = src.split("\n");
222
+ const lineIndex = range.start.line;
223
+ const lineContent = lines[lineIndex] || "";
224
+ const pointerPadding = " ".repeat(range.start.character);
225
+ const sourceLabel = filename ? ` [${filename}]` : "";
226
+
227
+ const rangeInfo =
228
+ range.start.line === range.end.line
229
+ ? `from column <$yellow:${range.start.character}$> to <$yellow:${range.end.character}$>`
230
+ : `from line <$yellow:${range.start.line + 1}$>, column <$yellow:${range.start.character}$> to line <$yellow:${range.end.line + 1}$>, column <$yellow:${range.end.character}$>`;
231
+
232
+ const formattedMessage = [
233
+ `<$blue:{line}$><$red:Here where error occurred${sourceLabel}:$>{N}${lineContent}{N}${pointerPadding}<$yellow:^$>{N}{N}`,
234
+ `<$red:${typeName} Error:$> `,
235
+ ...(Array.isArray(message) ? message : [message]),
236
+ `{N}at line <$yellow:${range.start.line + 1}$>, ${rangeInfo}{N}`,
237
+ "<$blue:{line}$>"
238
+ ];
239
+
240
+ return formattedMessage;
241
+ }
242
+
243
+ // ========================================================================== //
244
+ // Error Classes //
245
+ // ========================================================================== //
246
+
247
+ /** Base class for all SomMark errors that automatically formats messages for the terminal. */
248
+ class CustomError extends Error {
249
+ /**
250
+ * Creates a new error.
251
+ *
252
+ * @param {string|string[]} message - The text describing what went wrong.
253
+ * @param {string} name - The name of the error type.
254
+ */
255
+ constructor(message, name) {
256
+ super(message);
257
+ this.name = name;
258
+ this.message = formatMessage(`<$cyan:[${this.name}]$>:`) + "\n" + formatMessage(message);
259
+ if (Error.captureStackTrace) {
260
+ Error.captureStackTrace(this, this.constructor);
261
+ }
262
+ }
263
+ }
264
+
265
+ class ParserError extends CustomError {
266
+ constructor(message) { super(message, "Parser Error"); }
267
+ }
268
+
269
+ class LexerError extends CustomError {
270
+ constructor(message) { super(message, "Lexer Error"); }
271
+ }
272
+
273
+ class TranspilerError extends CustomError {
274
+ constructor(message) { super(message, "Transpiler Error"); }
275
+ }
276
+
277
+ class CLIError extends CustomError {
278
+ constructor(message) { super(message, "CLI Error"); }
279
+ }
280
+
281
+ class RuntimeError extends CustomError {
282
+ constructor(message) { super(message, "Runtime Error"); }
283
+ }
284
+
285
+ class SommarkError extends CustomError {
286
+ constructor(message) { super(message, "SomMark Error"); }
287
+ }
288
+
289
+ // ========================================================================== //
290
+ // Error Dispatcher (Helper) //
291
+ // ========================================================================== //
292
+
293
+ /**
294
+ * A helper that creates an error "dispatcher" for a specific category.
295
+ *
296
+ * @param {string} type - The category of error (e.g., 'lexer', 'parser').
297
+ * @returns {Function} - A function that throws the formatted error.
298
+ */
299
+ function getError(type) {
300
+ const validate_msg = msg => (Array.isArray(msg) && msg.length > 0) || typeof msg === "string";
301
+ const typeNames = {
302
+ parser: "Parser",
303
+ transpiler: "Transpiler",
304
+ lexer: "Lexer",
305
+ cli: "CLI",
306
+ runtime: "Runtime",
307
+ sommark: "SomMark"
308
+ };
309
+ const ErrorClasses = {
310
+ parser: ParserError,
311
+ transpiler: TranspilerError,
312
+ lexer: LexerError,
313
+ cli: CLIError,
314
+ runtime: RuntimeError,
315
+ sommark: SommarkError
316
+ };
317
+
318
+ return (errorMessage, context = null) => {
319
+ if (validate_msg(errorMessage)) {
320
+ let finalMessage = errorMessage;
321
+ if (context && context.src && context.range) {
322
+ finalMessage = formatErrorWithContext(
323
+ context.src,
324
+ context.range,
325
+ context.filename,
326
+ errorMessage,
327
+ typeNames[type]
328
+ );
329
+ }
330
+ throw new ErrorClasses[type](finalMessage).message;
331
+ }
332
+ };
333
+ }
334
+
335
+ /** Helper to throw Lexer errors. */
336
+ const lexerError = getError("lexer");
337
+
338
+ /** Helper to throw Runtime or Module errors. */
339
+ const runtimeError = getError("runtime");
340
+
341
+ /**
342
+ * SomMark Lexer
343
+ *
344
+ * Transforms a raw SomMark source string into a stream of tokens.
345
+ * It uses a state-machine approach to handle complex contexts like At-Block bodies,
346
+ * quoted values, and hierarchical headers.
347
+ *
348
+ * @param {string} src - The raw SomMark source code.
349
+ * @param {string} [filename="anonymous"] - Source filename for error reporting.
350
+ * @returns {Array<Object>} Array of token objects.
351
+ */
352
+ function lexer(src, filename = "anonymous") {
353
+ if (!src || typeof src !== "string") return [];
354
+ const tokens = [];
355
+ let last_non_junk_type = ""; // Tracks the last real token for context guessing
356
+ let i = 0;
357
+ let line = 0, character = 0;
358
+
359
+ // State Variables
360
+ let isInAtBlockBody = false;
361
+ let isInQuote = false;
362
+ let isInHeader = false; // Tracks if we are in a structural header context
363
+ let isInAtBlockHeader = false; // Specific for At-Block headers (@_ ... _@)
364
+ let isInInlineHead = false; // Specific for (key:val) after ->
365
+ let parenDepth = 0; // To track balanced parentheses in inlines
366
+
367
+ /**
368
+ * Adds a token to the stream and updates the scanner's position tracking.
369
+ *
370
+ * @param {string} type - The type of token (from TOKEN_TYPES).
371
+ * @param {string} value - The literal text content of the token.
372
+ */
373
+ function addToken(type, value) {
374
+ const start = { line, character };
375
+
376
+ // Update position
377
+ const parts = value.split("\n");
378
+ if (parts.length > 1) {
379
+ line += parts.length - 1;
380
+ character = parts[parts.length - 1].length;
381
+ } else {
382
+ character += value.length;
383
+ }
384
+
385
+ const end = { line, character };
386
+ tokens.push({
387
+ type,
388
+ value,
389
+ source: filename,
390
+ range: { start, end }
391
+ });
392
+ if (type !== TOKEN_TYPES.WHITESPACE && type !== TOKEN_TYPES.COMMENT) {
393
+ if (type !== TOKEN_TYPES.TEXT || value.trim() !== "") {
394
+ last_non_junk_type = type;
395
+ }
396
+ }
397
+ }
398
+
399
+ /**
400
+ * Looks ahead to find the next structural character, skipping whitespace and comments.
401
+ * Used for context-guessing (e.g., distinguishing KEY from VALUE).
402
+ *
403
+ * @param {number} start - Index to start peeking from.
404
+ * @returns {string|null} The next structural character or null if EOF.
405
+ */
406
+ function peekStructural(start) {
407
+ let j = start;
408
+ while (j < src.length) {
409
+ const c = src[j];
410
+ if (c === " " || c === "\t" || c === "\n" || c === "\r") {
411
+ j++;
412
+ continue;
413
+ }
414
+ if (c === "#") {
415
+ while (j < src.length && src[j] !== "\n") j++;
416
+ continue;
417
+ }
418
+ if (c === "\\") {
419
+ // Escape sequence: jump over the backslash and the escaped char
420
+ j += 2;
421
+ continue;
422
+ }
423
+ return c;
424
+ }
425
+ return null;
426
+ }
427
+
428
+ while (i < src.length) {
429
+ // --- PHASE 1: AT-BLOCK BODY MODE ---
430
+ // In this mode, we consume everything as raw text until we hit the @_ marker.
431
+ if (isInAtBlockBody) {
432
+ if (src[i] === "@" && src[i + 1] === "_") {
433
+ isInAtBlockBody = false;
434
+ } else {
435
+ let body = "";
436
+ while (i < src.length) {
437
+ // Handle escapes in At-Block Body
438
+ if (src[i] === "\\" && i + 1 < src.length) {
439
+ body += src[i + 1];
440
+ i += 2;
441
+ continue;
442
+ }
443
+ // Stop at end marker
444
+ if (src[i] === "@" && src[i + 1] === "_") {
445
+ break;
446
+ }
447
+ body += src[i];
448
+ i++;
449
+ }
450
+ if (body.length > 0) {
451
+ addToken(TOKEN_TYPES.TEXT, body);
452
+ }
453
+ continue;
454
+ }
455
+ }
456
+ const char = src[i];
457
+ const next = src[i + 1];
458
+
459
+ // --- PHASE 2: QUOTE MODE ---
460
+ // Handles balanced strings and allows prefix layers (js{}, p{}) inside them.
461
+ if (isInQuote) {
462
+ let quoteValue = "";
463
+ const quoteChar = tokens[tokens.length - 1].value;
464
+ while (i < src.length) {
465
+ if (src[i] === "\\" && i + 1 < src.length) {
466
+ // Inside quotes, we split escapes if we want to match reliability tests
467
+ if (quoteValue.length > 0) addToken(TOKEN_TYPES.VALUE, quoteValue);
468
+ addToken(TOKEN_TYPES.ESCAPE, "\\" + src[i + 1]);
469
+ quoteValue = "";
470
+ i += 2;
471
+ continue;
472
+ }
473
+
474
+ // Support Prefix Layers inside quotes!
475
+ if ((src[i] === "j" && src[i + 1] === "s" && src[i + 2] === "{") || (src[i] === "p" && src[i + 1] === "{") || (src[i] === "v" && src[i + 1] === "{")) {
476
+ const isJS = (src[i] === "j");
477
+ const isV = (src[i] === "v");
478
+ if (quoteValue.length > 0) {
479
+ addToken(TOKEN_TYPES.VALUE, quoteValue);
480
+ quoteValue = "";
481
+ }
482
+
483
+ let braceDepth = 1;
484
+ let prefixValue = isJS ? "js{" : (isV ? "v{" : "p{");
485
+ i += isJS ? 3 : 2;
486
+
487
+ let internalString = null;
488
+ while (i < src.length && braceDepth > 0) {
489
+ const c = src[i];
490
+ const n = src[i + 1];
491
+ if (internalString) {
492
+ if (c === "\\" && (n === internalString || n === "\\")) {
493
+ prefixValue += c + n;
494
+ i += 2;
495
+ continue;
496
+ }
497
+ if (c === internalString) internalString = null;
498
+ } else {
499
+ if (c === "\"" || c === "'") internalString = c;
500
+ else if (c === "{") braceDepth++;
501
+ else if (c === "}") braceDepth--;
502
+ }
503
+ prefixValue += c;
504
+ i++;
505
+ }
506
+ let tokenType = isJS ? TOKEN_TYPES.PREFIX_JS : (isV ? TOKEN_TYPES.PREFIX_V : TOKEN_TYPES.PREFIX_P);
507
+ addToken(tokenType, prefixValue);
508
+ continue;
509
+ }
510
+
511
+ if (src[i] === quoteChar) {
512
+ // Guess role based on next structural character
513
+ let nextStructural = peekStructural(i + 1);
514
+ let tokenType = (isInHeader || isInInlineHead) && (nextStructural === ":" || nextStructural === "=")
515
+ ? TOKEN_TYPES.KEY
516
+ : TOKEN_TYPES.VALUE;
517
+
518
+ if (quoteValue.length > 0) addToken(tokenType, quoteValue);
519
+ addToken(TOKEN_TYPES.QUOTE, quoteChar);
520
+ isInQuote = false;
521
+ i++;
522
+ break;
523
+ }
524
+ quoteValue += src[i];
525
+ i++;
526
+ }
527
+ if (!isInQuote) continue;
528
+ }
529
+
530
+ // --- PHASE 3: STRUCTURAL PARSING ---
531
+ // Handles markers, whitespace, and structural symbols.
532
+
533
+ // WHITESPACE
534
+ if (char === "\n") {
535
+ addToken(TOKEN_TYPES.WHITESPACE, char);
536
+ i++;
537
+ continue;
538
+ }
539
+
540
+ if (char === " " || char === "\t" || char === "\r") {
541
+ let ws = "";
542
+ while (i < src.length && (src[i] === " " || src[i] === "\t" || src[i] === "\r")) {
543
+ ws += src[i];
544
+ i++;
545
+ }
546
+ addToken(TOKEN_TYPES.WHITESPACE, ws);
547
+ continue;
548
+ }
549
+
550
+ // COMMENTS
551
+ if (char === "#") {
552
+ let comm = "";
553
+ // Check for Multiline Comment ### (must have no spaces)
554
+ if (src[i + 1] === "#" && src[i + 2] === "#") {
555
+ comm = "###";
556
+ i += 3;
557
+ while (i < src.length) {
558
+ if (src[i] === "#" && src[i + 1] === "#" && src[i + 2] === "#") {
559
+ comm += "###";
560
+ i += 3;
561
+ break;
562
+ }
563
+ comm += src[i];
564
+ i++;
565
+ }
566
+ addToken(TOKEN_TYPES.COMMENT_BLOCK, comm);
567
+ } else {
568
+ // Single line comment
569
+ while (i < src.length && src[i] !== "\n") {
570
+ comm += src[i];
571
+ i++;
572
+ }
573
+ addToken(TOKEN_TYPES.COMMENT, comm);
574
+ }
575
+ continue;
576
+ }
577
+
578
+ // ESCAPE CHARACTER (Sequence-based)
579
+ if (char === "\\") {
580
+ const seq = i + 1 < src.length ? "\\" + src[i + 1] : "\\";
581
+ addToken(TOKEN_TYPES.ESCAPE, seq);
582
+ i += seq.length;
583
+ continue;
584
+ }
585
+
586
+ // PREFIX LAYERS (js{...} or p{...} or v{...})
587
+ if ((char === "j" && next === "s" && src[i + 2] === "{") || (char === "p" && next === "{") || (char === "v" && next === "{")) {
588
+ const isJS = (char === "j");
589
+ const isP = (char === "p");
590
+ const isV = (char === "v");
591
+
592
+ // Context Check
593
+ const isBlockHeader = isInHeader && !isInAtBlockHeader;
594
+ const isNormalText = !isInHeader && !isInInlineHead && !isInAtBlockBody && parenDepth === 0;
595
+
596
+ let allowed = false;
597
+ if (isJS && isBlockHeader) allowed = true;
598
+ if (isP && (isBlockHeader || isNormalText)) allowed = true;
599
+ if (isV && (isBlockHeader || isNormalText)) allowed = true;
600
+
601
+ if (allowed) {
602
+ let braceDepth = 1;
603
+ let prefixValue = isJS ? "js{" : (isV ? "v{" : "p{");
604
+ i += isJS ? 3 : 2;
605
+
606
+ let inString = null; // Track if we are inside " " or ' '
607
+ while (i < src.length && braceDepth > 0) {
608
+ const c = src[i];
609
+ const n = src[i + 1];
610
+
611
+ if (inString) {
612
+ if (c === "\\" && (n === inString || n === "\\")) {
613
+ prefixValue += c + n;
614
+ i += 2;
615
+ continue;
616
+ }
617
+ if (c === inString) inString = null;
618
+ } else {
619
+ if (c === "\"" || c === "'") inString = c;
620
+ else if (c === "{") braceDepth++;
621
+ else if (c === "}") braceDepth--;
622
+ }
623
+ prefixValue += c;
624
+ i++;
625
+ }
626
+ let tokenType = isJS ? TOKEN_TYPES.PREFIX_JS : (isV ? TOKEN_TYPES.PREFIX_V : TOKEN_TYPES.PREFIX_P);
627
+ addToken(tokenType, prefixValue);
628
+ continue;
629
+ }
630
+ // If not allowed, it will fall through to normal word scanning
631
+ }
632
+
633
+ // MULTI-CHAR MARKERS
634
+ if (char === "@" && next === "_") {
635
+ addToken(TOKEN_TYPES.OPEN_AT, "@_");
636
+ i += 2;
637
+ isInHeader = true; // At-Blocks start with a header part
638
+ isInAtBlockHeader = true;
639
+ continue;
640
+ }
641
+ if (char === "-" && next === ">") {
642
+ if (isInAtBlockBody || (parenDepth > 0 && !isInInlineHead)) {
643
+ addToken(TOKEN_TYPES.TEXT, "-");
644
+ i++; // Swallowed one char
645
+ } else {
646
+ addToken(TOKEN_TYPES.THIN_ARROW, "->");
647
+ i += 2;
648
+ isInInlineHead = true; // The following ( ) will be structural
649
+ }
650
+ continue;
651
+ }
652
+
653
+ // STATIC KEYWORD
654
+ if (char === "s" && src.slice(i, i + 6) === "static") {
655
+ const afterStatic = src.slice(i + 6);
656
+ const hasSpace = afterStatic.startsWith(" ");
657
+ const hasLogic = hasSpace ? afterStatic.slice(1).startsWith("${") : afterStatic.startsWith("${");
658
+
659
+ const isMainIdentifier = (
660
+ last_non_junk_type === TOKEN_TYPES.OPEN_BRACKET ||
661
+ last_non_junk_type === TOKEN_TYPES.OPEN_AT ||
662
+ (last_non_junk_type === TOKEN_TYPES.OPEN_PAREN && isInInlineHead)
663
+ );
664
+
665
+ if ((hasLogic || isInHeader) && !isMainIdentifier) {
666
+ addToken(TOKEN_TYPES.STATIC_KEYWORD, hasSpace ? "static " : "static");
667
+ i += hasSpace ? 7 : 6;
668
+ continue;
669
+ }
670
+ }
671
+
672
+ // RUNTIME KEYWORD
673
+ if (char === "r" && src.slice(i, i + 7) === "runtime") {
674
+ const afterRuntime = src.slice(i + 7);
675
+ const hasSpace = afterRuntime.startsWith(" ");
676
+ const hasLogic = hasSpace ? afterRuntime.slice(1).startsWith("${") : afterRuntime.startsWith("${");
677
+
678
+ const isMainIdentifier = (
679
+ last_non_junk_type === TOKEN_TYPES.OPEN_BRACKET ||
680
+ last_non_junk_type === TOKEN_TYPES.OPEN_AT ||
681
+ (last_non_junk_type === TOKEN_TYPES.OPEN_PAREN && isInInlineHead)
682
+ );
683
+
684
+ if ((hasLogic || isInHeader) && !isMainIdentifier) {
685
+ addToken(TOKEN_TYPES.RUNTIME_KEYWORD, hasSpace ? "runtime " : "runtime");
686
+ i += hasSpace ? 8 : 7;
687
+ continue;
688
+ }
689
+ }
690
+
691
+ // LOGIC BLOCKS (${ ... }$)
692
+ if (char === "$" && next === "{" && (last_non_junk_type === TOKEN_TYPES.STATIC_KEYWORD || last_non_junk_type === TOKEN_TYPES.RUNTIME_KEYWORD)) {
693
+ const startLine = line;
694
+ const startCharacter = character;
695
+ i += 2;
696
+ let logicCode = "";
697
+ let braceDepth = 1;
698
+ let internalString = null;
699
+ let foundClosing = false;
700
+
701
+ while (i < src.length) {
702
+ const c = src[i];
703
+ const n = src[i + 1];
704
+
705
+ // Stop condition: }$ (only if not inside a JS string and at top-level brace depth)
706
+ if (c === "}" && n === "$" && !internalString && braceDepth === 1) {
707
+ i += 2;
708
+ braceDepth = 0;
709
+ foundClosing = true;
710
+ break;
711
+ }
712
+
713
+ if (internalString) {
714
+ if (c === "\\" && (n === internalString || n === "\\")) {
715
+ logicCode += c + n;
716
+ i += 2;
717
+ continue;
718
+ }
719
+ if (c === internalString) internalString = null;
720
+ } else {
721
+ if (c === "/" && n === "/") {
722
+ logicCode += c + n;
723
+ i += 2;
724
+ while (i < src.length && src[i] !== "\n" && src[i] !== "\r") {
725
+ logicCode += src[i];
726
+ i++;
727
+ }
728
+ continue;
729
+ }
730
+ if (c === "/" && n === "*") {
731
+ logicCode += c + n;
732
+ i += 2;
733
+ while (i < src.length) {
734
+ if (src[i] === "*" && src[i + 1] === "/") {
735
+ logicCode += "*/";
736
+ i += 2;
737
+ break;
738
+ }
739
+ logicCode += src[i];
740
+ i++;
741
+ }
742
+ continue;
743
+ }
744
+
745
+ if (c === "\"" || c === "'" || c === "`") internalString = c;
746
+ else if (c === "{") braceDepth++;
747
+ else if (c === "}") braceDepth--;
748
+ }
749
+
750
+ logicCode += c;
751
+ i++;
752
+ }
753
+
754
+ if (!foundClosing) {
755
+ lexerError("Unclosed logic block. Expected '}$' to close the block starting with '${'.", {
756
+ src,
757
+ filename,
758
+ range: {
759
+ start: { line: startLine, character: startCharacter },
760
+ end: { line: startLine, character: startCharacter + 2 }
761
+ }
762
+ });
763
+ }
764
+
765
+ addToken(TOKEN_TYPES.LOGIC, logicCode);
766
+ continue;
767
+ }
768
+
769
+ // SINGLE-CHAR MARKERS
770
+ if (char === "[") {
771
+ if (isInAtBlockBody || (parenDepth > 0 && !isInInlineHead)) {
772
+ addToken(TOKEN_TYPES.TEXT, "[");
773
+ } else {
774
+ addToken(TOKEN_TYPES.OPEN_BRACKET, "[");
775
+ isInHeader = true;
776
+ }
777
+ i++;
778
+ continue;
779
+ }
780
+ if (char === "_" && next === "@") {
781
+ if (isInAtBlockBody || (parenDepth > 0 && !isInInlineHead)) {
782
+ addToken(TOKEN_TYPES.TEXT, "_@");
783
+ } else {
784
+ const lastRealType = last_non_junk_type;
785
+ addToken(TOKEN_TYPES.CLOSE_AT, "_@");
786
+ // Removed delimiter stack check
787
+ if (lastRealType === TOKEN_TYPES.END_KEYWORD) {
788
+ isInAtBlockBody = false;
789
+ isInHeader = false;
790
+ isInAtBlockHeader = false;
791
+ }
792
+ }
793
+ i += 2;
794
+ continue;
795
+ }
796
+ if (char === "]") {
797
+ if (isInAtBlockBody || (parenDepth > 0 && !isInInlineHead)) {
798
+ addToken(TOKEN_TYPES.TEXT, "]");
799
+ } else {
800
+ addToken(TOKEN_TYPES.CLOSE_BRACKET, "]");
801
+ isInHeader = false;
802
+ }
803
+ i++;
804
+ continue;
805
+ }
806
+ if (char === "(") {
807
+ if (isInAtBlockBody || (parenDepth > 0 && !isInInlineHead)) {
808
+ addToken(TOKEN_TYPES.TEXT, "(");
809
+ parenDepth++;
810
+ } else {
811
+ addToken(TOKEN_TYPES.OPEN_PAREN, "(");
812
+ parenDepth++;
813
+ }
814
+ i++;
815
+ continue;
816
+ }
817
+ if (char === ")") {
818
+ if (isInAtBlockBody || (parenDepth > 1 && !isInInlineHead)) {
819
+ addToken(TOKEN_TYPES.TEXT, ")");
820
+ parenDepth--;
821
+ } else if (parenDepth > 0) {
822
+ // This ends the content part if depth drops to 0
823
+ parenDepth--;
824
+ if (parenDepth === 0) {
825
+ addToken(TOKEN_TYPES.CLOSE_PAREN, ")");
826
+ if (isInInlineHead) {
827
+ isInInlineHead = false;
828
+ isInHeader = false;
829
+ }
830
+ } else {
831
+ addToken(TOKEN_TYPES.TEXT, ")");
832
+ }
833
+ } else {
834
+ addToken(TOKEN_TYPES.TEXT, ")");
835
+ }
836
+ i++;
837
+ continue;
838
+ }
839
+ if (char === ":") {
840
+ if (isInAtBlockBody || (parenDepth > 0 && !isInInlineHead)) {
841
+ addToken(TOKEN_TYPES.TEXT, ":");
842
+ } else {
843
+ const allowed = [TOKEN_TYPES.IDENTIFIER, TOKEN_TYPES.KEY, TOKEN_TYPES.CLOSE_AT, TOKEN_TYPES.VALUE, TOKEN_TYPES.ESCAPE, TOKEN_TYPES.QUOTE, TOKEN_TYPES.PREFIX_JS, TOKEN_TYPES.PREFIX_V, TOKEN_TYPES.PREFIX_P, TOKEN_TYPES.IMPORT, TOKEN_TYPES.USE_MODULE, TOKEN_TYPES.END_KEYWORD, TOKEN_TYPES.TEXT, TOKEN_TYPES.LOGIC, TOKEN_TYPES.STATIC_KEYWORD, TOKEN_TYPES.RUNTIME_KEYWORD, TOKEN_TYPES.FOR_EACH];
844
+ if (allowed.includes(last_non_junk_type)) {
845
+ addToken(TOKEN_TYPES.COLON, ":");
846
+ isInHeader = true;
847
+ } else {
848
+ addToken(TOKEN_TYPES.TEXT, ":");
849
+ }
850
+ }
851
+ i++;
852
+ continue;
853
+ }
854
+ if (char === "=") {
855
+ if (isInAtBlockBody || (parenDepth > 0 && !isInInlineHead)) {
856
+ addToken(TOKEN_TYPES.TEXT, "=");
857
+ } else {
858
+ const allowed = [TOKEN_TYPES.IDENTIFIER, TOKEN_TYPES.KEY, TOKEN_TYPES.ESCAPE, TOKEN_TYPES.QUOTE, TOKEN_TYPES.PREFIX_JS, TOKEN_TYPES.PREFIX_V, TOKEN_TYPES.PREFIX_P, TOKEN_TYPES.IMPORT, TOKEN_TYPES.USE_MODULE, TOKEN_TYPES.END_KEYWORD, TOKEN_TYPES.TEXT, TOKEN_TYPES.LOGIC, TOKEN_TYPES.STATIC_KEYWORD, TOKEN_TYPES.RUNTIME_KEYWORD, TOKEN_TYPES.FOR_EACH];
859
+ if (allowed.includes(last_non_junk_type)) {
860
+ addToken(TOKEN_TYPES.EQUAL, "=");
861
+ } else {
862
+ addToken(TOKEN_TYPES.TEXT, "=");
863
+ }
864
+ }
865
+ i++;
866
+ continue;
867
+ }
868
+ if (char === ",") {
869
+ if (isInAtBlockBody || (parenDepth > 0 && !isInInlineHead)) {
870
+ addToken(TOKEN_TYPES.TEXT, ",");
871
+ } else {
872
+ const allowed = [TOKEN_TYPES.VALUE, TOKEN_TYPES.IDENTIFIER, TOKEN_TYPES.QUOTE, TOKEN_TYPES.ESCAPE, TOKEN_TYPES.PREFIX_JS, TOKEN_TYPES.PREFIX_V, TOKEN_TYPES.PREFIX_P, TOKEN_TYPES.IMPORT, TOKEN_TYPES.USE_MODULE, TOKEN_TYPES.END_KEYWORD, TOKEN_TYPES.TEXT, TOKEN_TYPES.LOGIC, TOKEN_TYPES.STATIC_KEYWORD, TOKEN_TYPES.RUNTIME_KEYWORD, TOKEN_TYPES.FOR_EACH];
873
+ if (allowed.includes(last_non_junk_type)) {
874
+ addToken(TOKEN_TYPES.COMMA, ",");
875
+ } else {
876
+ addToken(TOKEN_TYPES.TEXT, ",");
877
+ }
878
+ }
879
+ i++;
880
+ continue;
881
+ }
882
+ if (char === ";") {
883
+ if (isInAtBlockBody || (parenDepth > 0 && !isInInlineHead)) {
884
+ addToken(TOKEN_TYPES.TEXT, ";");
885
+ } else {
886
+ const allowed = [TOKEN_TYPES.IDENTIFIER, TOKEN_TYPES.VALUE, TOKEN_TYPES.CLOSE_AT, TOKEN_TYPES.CLOSE_PAREN, TOKEN_TYPES.ESCAPE, TOKEN_TYPES.QUOTE, TOKEN_TYPES.PREFIX_JS, TOKEN_TYPES.PREFIX_V, TOKEN_TYPES.PREFIX_P, TOKEN_TYPES.IMPORT, TOKEN_TYPES.USE_MODULE, TOKEN_TYPES.END_KEYWORD, TOKEN_TYPES.TEXT, TOKEN_TYPES.LOGIC, TOKEN_TYPES.STATIC_KEYWORD, TOKEN_TYPES.RUNTIME_KEYWORD, TOKEN_TYPES.FOR_EACH];
887
+ if (allowed.includes(last_non_junk_type)) {
888
+ addToken(TOKEN_TYPES.SEMICOLON, ";");
889
+ // ONLY trigger body mode if we were actually in an At-Block header
890
+ if (isInAtBlockHeader) {
891
+ isInHeader = false;
892
+ isInAtBlockHeader = false;
893
+ isInAtBlockBody = true;
894
+ }
895
+ } else {
896
+ addToken(TOKEN_TYPES.TEXT, ";");
897
+ }
898
+ }
899
+ i++;
900
+ continue;
901
+ }
902
+ if (char === "!") {
903
+ if (isInHeader) {
904
+ addToken(TOKEN_TYPES.EXCLAMATION_MARK, "!");
905
+ i++;
906
+ continue;
907
+ }
908
+ }
909
+ if (char === "\"" || char === "'") {
910
+ const valTriggers = [TOKEN_TYPES.COLON, TOKEN_TYPES.EQUAL, TOKEN_TYPES.COMMA, TOKEN_TYPES.ESCAPE, TOKEN_TYPES.OPEN_BRACKET, TOKEN_TYPES.OPEN_AT];
911
+ const wasValueTrigger = valTriggers.includes(last_non_junk_type);
912
+ addToken(TOKEN_TYPES.QUOTE, char);
913
+ i++;
914
+ // Enable quote mode
915
+ // NOTE: We allow quotes basically anywhere in headers as values/keys
916
+ if (isInHeader || wasValueTrigger) {
917
+ isInQuote = true;
918
+ }
919
+ continue;
920
+ }
921
+
922
+ // --- PHASE 4: WORD / TEXT SCANNING ---
923
+ // This is the "Fallback" mode where we scan for identifiers, keys, or values.
924
+ // It uses lookahead and context variables to guess the role of a word.
925
+ let word = "";
926
+ // Only Blocks ([ ]) allow ':' in their main identifier.
927
+ // At-Blocks (@_) and Inlines (->( )) do NOT allow ':' in the ID.
928
+ const isStartOfBlockId = (last_non_junk_type === TOKEN_TYPES.OPEN_BRACKET);
929
+
930
+ let stopChars = "[](){}:=;,@>\"'#\\ \t\n\r!";
931
+ if (isStartOfBlockId || (parenDepth > 0 && !isInInlineHead)) {
932
+ stopChars = stopChars.replace(":", "");
933
+ }
934
+ const isInNormalText = !isInHeader && !isInInlineHead && !isInAtBlockBody;
935
+ if (isInNormalText) {
936
+ stopChars = "[]@()>_()\\#\n\r"; // In normal text, stop at markers, comments and newlines
937
+ }
938
+
939
+ while (i < src.length && !stopChars.includes(src[i])) {
940
+ // Stop ONLY if $ is followed by { (Logic block start)
941
+ if (src[i] === "$" && src[i + 1] === "{") break;
942
+
943
+ // Lookahead for At-Block markers (_@ or @_)
944
+ if (src[i] === "_" && src[i + 1] === "@") break;
945
+ if (src[i] === "@" && src[i + 1] === "_") break;
946
+
947
+ // Lookahead for 'static ${' or 'runtime ${' (only if we're not at the very start of the word scanning)
948
+ if (word.length > 0) {
949
+ if (src[i] === "s" && src.slice(i, i + 7) === "static " && src[i + 7] === "$" && src[i + 8] === "{") break;
950
+ if (src[i] === "s" && src.slice(i, i + 6) === "static" && src[i + 6] === "$" && src[i + 7] === "{") break;
951
+ if (src[i] === "r" && src.slice(i, i + 8) === "runtime " && src[i + 8] === "$" && src[i + 9] === "{") break;
952
+ if (src[i] === "r" && src.slice(i, i + 7) === "runtime" && src[i + 7] === "$" && src[i + 8] === "{") break;
953
+ }
954
+
955
+ // Lookahead for -> marker in normal text
956
+ if (!isInHeader && src[i] === "-" && src[i + 1] === ">") break;
957
+
958
+ // Stop if we hit an ALLOWED prefix trigger
959
+ if ((src[i] === "p" && src[i + 1] === "{") || (src[i] === "v" && src[i + 1] === "{")) {
960
+ if (isInHeader || isInNormalText) break;
961
+ }
962
+ if (src[i] === "j" && src[i + 1] === "s" && src[i + 2] === "{") {
963
+ if (isInHeader) break;
964
+ }
965
+ word += src[i];
966
+ i++;
967
+ }
968
+
969
+ if (word.length > 0) {
970
+ // Guess role based on context
971
+ if (parenDepth > 0 && !isInInlineHead) {
972
+ // Inside Inline Content (raw text)
973
+ addToken(TOKEN_TYPES.TEXT, word);
974
+ } else if (isInHeader || isInInlineHead) {
975
+ // Inside a structural header context
976
+ const isMainIdentifier = (
977
+ last_non_junk_type === TOKEN_TYPES.OPEN_BRACKET ||
978
+ last_non_junk_type === TOKEN_TYPES.OPEN_AT ||
979
+ (last_non_junk_type === TOKEN_TYPES.OPEN_PAREN && isInInlineHead)
980
+ );
981
+
982
+ if (isMainIdentifier) {
983
+ if (word === end_keyword) {
984
+ addToken(TOKEN_TYPES.END_KEYWORD, word);
985
+ }
986
+ else if (word === "import") addToken(TOKEN_TYPES.IMPORT, word);
987
+ else if (word === "$use-module") addToken(TOKEN_TYPES.USE_MODULE, word);
988
+ else if (word === "slot") addToken(TOKEN_TYPES.SLOT_KEYWORD, word);
989
+ else if (word === "for-each") addToken(TOKEN_TYPES.FOR_EACH, word);
990
+ else addToken(TOKEN_TYPES.IDENTIFIER, word);
991
+ } else {
992
+ // Use lookahead to distinguish KEY from VALUE
993
+ const p = peekStructural(i);
994
+ if (p === ":") {
995
+ addToken(TOKEN_TYPES.KEY, word);
996
+ } else if (word === "static") {
997
+ addToken(TOKEN_TYPES.STATIC_KEYWORD, word);
998
+ } else if (word === "runtime") {
999
+ addToken(TOKEN_TYPES.RUNTIME_KEYWORD, word);
1000
+ } else {
1001
+ addToken(TOKEN_TYPES.VALUE, word);
1002
+ }
1003
+ }
1004
+ } else {
1005
+ // Normal text
1006
+ if (word.trim() === "static") {
1007
+ addToken(TOKEN_TYPES.STATIC_KEYWORD, word);
1008
+ } else if (word.trim() === "runtime") {
1009
+ addToken(TOKEN_TYPES.RUNTIME_KEYWORD, word);
1010
+ } else {
1011
+ addToken(TOKEN_TYPES.TEXT, word);
1012
+ }
1013
+ }
1014
+ } else {
1015
+ // Fallback for any unhandled characters
1016
+ if (i < src.length) {
1017
+ addToken(TOKEN_TYPES.TEXT, src[i]);
1018
+ i++;
1019
+ }
1020
+ }
1021
+ }
1022
+
1023
+ addToken(TOKEN_TYPES.EOF, "");
1024
+ return tokens;
1025
+ }
1026
+
1027
+ const lexSync = (src, filename = "anonymous") => {
1028
+ if (src === undefined || src === null) {
1029
+ runtimeError([`{line}<$red:Missing Source:$> <$yellow:The 'src' argument is required for tokenization.$>{line}`]);
1030
+ }
1031
+ if (typeof src !== "string") {
1032
+ runtimeError([`{line}<$red:Invalid Source Type:$> <$yellow:The 'src' argument must be a string, received ${typeof src}.$>{line}`]);
1033
+ }
1034
+ return lexer(src, filename);
1035
+ };
1036
+
1037
+ const lex = async (src, filename = "anonymous") => lexSync(src, filename);
1038
+
1039
+ export { TOKEN_TYPES, labels, lex, lexSync };