sommark 4.5.3 → 5.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. package/README.md +315 -179
  2. package/cli/cli.mjs +1 -1
  3. package/cli/commands/color.js +36 -14
  4. package/cli/commands/help.js +3 -0
  5. package/cli/commands/init.js +1 -3
  6. package/cli/constants.js +5 -2
  7. package/constants/html_props.js +0 -5
  8. package/core/errors.js +5 -4
  9. package/core/evaluator.js +1 -2
  10. package/core/formats.js +7 -1
  11. package/core/helpers/config-loader.js +2 -4
  12. package/core/helpers/lib.js +1 -1
  13. package/core/labels.js +2 -15
  14. package/core/lexer.js +197 -313
  15. package/core/modules.js +13 -13
  16. package/core/parser.js +226 -535
  17. package/core/tokenTypes.js +6 -15
  18. package/core/transpiler.js +129 -110
  19. package/core/validator.js +6 -26
  20. package/dist/sommark.browser.js +1781 -2172
  21. package/dist/sommark.browser.lite.js +1779 -2169
  22. package/dist/sommark.lexer.js +392 -544
  23. package/dist/sommark.parser.js +604 -1200
  24. package/formatter/mark.js +34 -0
  25. package/formatter/tag.js +7 -33
  26. package/helpers/utils.js +15 -16
  27. package/index.js +9 -1
  28. package/index.shared.js +26 -16
  29. package/mappers/languages/csv.js +62 -0
  30. package/mappers/languages/html.js +12 -66
  31. package/mappers/languages/json.js +74 -156
  32. package/mappers/languages/jsonc.js +21 -63
  33. package/mappers/languages/markdown.js +159 -276
  34. package/mappers/languages/mdx.js +7 -62
  35. package/mappers/languages/text.js +2 -19
  36. package/mappers/languages/toml.js +231 -0
  37. package/mappers/languages/xml.js +25 -25
  38. package/mappers/languages/yaml.js +323 -0
  39. package/mappers/mapper.js +1 -22
  40. package/mappers/shared/index.js +3 -16
  41. package/package.json +5 -2
package/core/lexer.js CHANGED
@@ -1,7 +1,6 @@
1
1
  import TOKEN_TYPES from "./tokenTypes.js";
2
2
  import peek from "../helpers/peek.js";
3
3
  import { end_keyword } from "./labels.js";
4
- import { lexerError } from "./errors.js";
5
4
 
6
5
  /**
7
6
  * SomMark Lexer
@@ -24,12 +23,12 @@ function lexer(src, filename = "anonymous") {
24
23
  let line = 0, character = 0;
25
24
 
26
25
  // State Variables
27
- let isInAtBlockBody = false;
28
26
  let isInQuote = false;
29
- let isInHeader = false; // Tracks if we are in a structural header context
30
- let isInAtBlockHeader = false; // Specific for At-Block headers (@_ ... _@)
31
- let isInInlineHead = false; // Specific for (key:val) after ->
32
- let parenDepth = 0; // To track balanced parentheses in inlines
27
+ let isInHeader = false; // Tracks if we are in a structural header context
28
+ let isInPVPrefix = false; // Tracks if we are scanning inside a p{} or v{} prefix
29
+ let pendingSmarkRaw = false; // Set when KEY "smark-raw" is seen — waiting for value
30
+ let hasSmarkRaw = false; // Set when smark-raw: true is confirmed in header
31
+ let isRawContent = false; // Set when inside a smark-raw block — content collected as-is, not parsed
33
32
 
34
33
  /**
35
34
  * Adds a token to the stream and updates the scanner's position tracking.
@@ -95,35 +94,63 @@ function lexer(src, filename = "anonymous") {
95
94
  }
96
95
 
97
96
  while (i < src.length) {
98
- // --- PHASE 1: AT-BLOCK BODY MODE ---
99
- // In this mode, we consume everything as raw text until we hit the @_ marker.
100
- if (isInAtBlockBody) {
101
- if (src[i] === "@" && src[i + 1] === "_") {
102
- isInAtBlockBody = false;
103
- } else {
104
- let body = "";
97
+ const char = src[i];
98
+ const next = src[i + 1];
99
+
100
+ // --- RAW CONTENT MODE ---
101
+ // Collect everything as-is until [end] or [end:name]. \[ escapes a literal [.
102
+ if (isRawContent) {
103
+ let raw = "";
104
+ while (i < src.length) {
105
+ if (src[i] === "\\" && src[i + 1] === "[") {
106
+ raw += "[";
107
+ i += 2;
108
+ continue;
109
+ }
110
+ if (src[i] === "[") {
111
+ if (src.startsWith(`[${end_keyword}]`, i) || src.startsWith(`[${end_keyword}:`, i)) break;
112
+ }
113
+ raw += src[i];
114
+ i++;
115
+ }
116
+ if (raw) addToken(TOKEN_TYPES.TEXT, raw);
117
+ isRawContent = false;
118
+ continue;
119
+ }
120
+
121
+ // --- PHASE 1.5: PV PREFIX CONTENT MODE ---
122
+ // Handles structured content inside p{} and v{} prefixes.
123
+ if (isInPVPrefix && !isInQuote) {
124
+ if (char === '"' || char === "'") {
125
+ addToken(TOKEN_TYPES.QUOTE, char);
126
+ i++;
127
+ isInQuote = true;
128
+ continue;
129
+ }
130
+ if (char === '|') {
131
+ addToken(TOKEN_TYPES.PIPELINE, "|");
132
+ i++;
133
+ continue;
134
+ }
135
+ if (char === '}') {
136
+ addToken(TOKEN_TYPES.PREFIX_CLOSE, "}");
137
+ isInPVPrefix = false;
138
+ i++;
139
+ continue;
140
+ }
141
+ if (char !== ' ' && char !== '\t' && char !== '\n' && char !== '\r') {
142
+ let word = '';
105
143
  while (i < src.length) {
106
- // Handle escapes in At-Block Body
107
- if (src[i] === "\\" && i + 1 < src.length) {
108
- body += src[i + 1];
109
- i += 2;
110
- continue;
111
- }
112
- // Stop at end marker
113
- if (src[i] === "@" && src[i + 1] === "_") {
114
- break;
115
- }
116
- body += src[i];
144
+ const c = src[i];
145
+ if (c === '}' || c === '|' || c === '"' || c === "'" || c === ' ' || c === '\t' || c === '\n' || c === '\r') break;
146
+ word += c;
117
147
  i++;
118
148
  }
119
- if (body.length > 0) {
120
- addToken(TOKEN_TYPES.TEXT, body);
121
- }
149
+ if (word) addToken(TOKEN_TYPES.KEY, word);
122
150
  continue;
123
151
  }
152
+ // Whitespace: fall through to PHASE 3 whitespace handling
124
153
  }
125
- const char = src[i];
126
- const next = src[i + 1];
127
154
 
128
155
  // --- PHASE 2: QUOTE MODE ---
129
156
  // Handles balanced strings and allows prefix layers (js{}, p{}) inside them.
@@ -141,50 +168,57 @@ function lexer(src, filename = "anonymous") {
141
168
  }
142
169
 
143
170
  // Support Prefix Layers inside quotes!
144
- if ((src[i] === "j" && src[i + 1] === "s" && src[i + 2] === "{") || (src[i] === "p" && src[i + 1] === "{") || (src[i] === "v" && src[i + 1] === "{")) {
145
- const isJS = (src[i] === "j");
171
+ if ((src[i] === "p" && src[i + 1] === "{") || (src[i] === "v" && src[i + 1] === "{")) {
146
172
  const isV = (src[i] === "v");
147
173
  if (quoteValue.length > 0) {
148
174
  addToken(TOKEN_TYPES.VALUE, quoteValue);
149
175
  quoteValue = "";
150
176
  }
151
177
 
152
- let braceDepth = 1;
153
- let prefixValue = isJS ? "js{" : (isV ? "v{" : "p{");
154
- i += isJS ? 3 : 2;
155
-
156
- let internalString = null;
157
- while (i < src.length && braceDepth > 0) {
158
- const c = src[i];
159
- const n = src[i + 1];
160
- if (internalString) {
161
- if (c === "\\" && (n === internalString || n === "\\")) {
162
- prefixValue += c + n;
163
- i += 2;
164
- continue;
178
+ {
179
+ // p{} or v{}: keyword + PREFIX_OPEN + unquoted key + optional PIPELINE + fallback + PREFIX_CLOSE
180
+ addToken(isV ? TOKEN_TYPES.PREFIX_V : TOKEN_TYPES.PREFIX_P, isV ? "v" : "p");
181
+ addToken(TOKEN_TYPES.PREFIX_OPEN, "{");
182
+ i += 2;
183
+ // Scan unquoted key (cannot use same quote char as outer string)
184
+ let key = "";
185
+ while (i < src.length && src[i] !== "|" && src[i] !== "}" && src[i] !== quoteChar) {
186
+ key += src[i];
187
+ i++;
188
+ }
189
+ if (key.trim()) addToken(TOKEN_TYPES.KEY, key.trim());
190
+ // Optional PIPELINE + fallback
191
+ if (i < src.length && src[i] === "|") {
192
+ addToken(TOKEN_TYPES.PIPELINE, "|");
193
+ i++;
194
+ let fallback = "";
195
+ while (i < src.length && src[i] !== "}" && src[i] !== quoteChar) {
196
+ fallback += src[i];
197
+ i++;
165
198
  }
166
- if (c === internalString) internalString = null;
167
- } else {
168
- if (c === "\"" || c === "'") internalString = c;
169
- else if (c === "{") braceDepth++;
170
- else if (c === "}") braceDepth--;
199
+ if (fallback.trim()) addToken(TOKEN_TYPES.VALUE, fallback.trim());
200
+ }
201
+ // PREFIX_CLOSE
202
+ if (i < src.length && src[i] === "}") {
203
+ addToken(TOKEN_TYPES.PREFIX_CLOSE, "}");
204
+ i++;
171
205
  }
172
- prefixValue += c;
173
- i++;
174
206
  }
175
- let tokenType = isJS ? TOKEN_TYPES.PREFIX_JS : (isV ? TOKEN_TYPES.PREFIX_V : TOKEN_TYPES.PREFIX_P);
176
- addToken(tokenType, prefixValue);
177
207
  continue;
178
208
  }
179
209
 
180
210
  if (src[i] === quoteChar) {
181
211
  // Guess role based on next structural character
182
212
  let nextStructural = peekStructural(i + 1);
183
- let tokenType = (isInHeader || isInInlineHead) && (nextStructural === ":" || nextStructural === "=")
213
+ let tokenType = isInHeader && (nextStructural === ":" || nextStructural === "=")
184
214
  ? TOKEN_TYPES.KEY
185
215
  : TOKEN_TYPES.VALUE;
186
216
 
187
217
  if (quoteValue.length > 0) addToken(tokenType, quoteValue);
218
+ if (pendingSmarkRaw && tokenType === TOKEN_TYPES.VALUE && quoteValue === "true") {
219
+ hasSmarkRaw = true;
220
+ pendingSmarkRaw = false;
221
+ }
188
222
  addToken(TOKEN_TYPES.QUOTE, quoteChar);
189
223
  isInQuote = false;
190
224
  i++;
@@ -255,84 +289,37 @@ function lexer(src, filename = "anonymous") {
255
289
  continue;
256
290
  }
257
291
 
258
- // PREFIX LAYERS (js{...} or p{...} or v{...})
259
- if ((char === "j" && next === "s" && src[i + 2] === "{") || (char === "p" && next === "{") || (char === "v" && next === "{")) {
260
- const isJS = (char === "j");
292
+ // PREFIX LAYERS (p{...} or v{...})
293
+ if ((char === "p" && next === "{") || (char === "v" && next === "{")) {
261
294
  const isP = (char === "p");
262
295
  const isV = (char === "v");
263
296
 
264
297
  // Context Check
265
- const isBlockHeader = isInHeader && !isInAtBlockHeader;
266
- const isNormalText = !isInHeader && !isInInlineHead && !isInAtBlockBody && parenDepth === 0;
298
+ const isBlockHeader = isInHeader;
299
+ const isNormalText = !isInHeader;
267
300
 
268
301
  let allowed = false;
269
- if (isJS && isBlockHeader) allowed = true;
270
302
  if (isP && (isBlockHeader || isNormalText)) allowed = true;
271
303
  if (isV && (isBlockHeader || isNormalText)) allowed = true;
272
304
 
273
305
  if (allowed) {
274
- let braceDepth = 1;
275
- let prefixValue = isJS ? "js{" : (isV ? "v{" : "p{");
276
- i += isJS ? 3 : 2;
277
-
278
- let inString = null; // Track if we are inside " " or ' '
279
- while (i < src.length && braceDepth > 0) {
280
- const c = src[i];
281
- const n = src[i + 1];
282
-
283
- if (inString) {
284
- if (c === "\\" && (n === inString || n === "\\")) {
285
- prefixValue += c + n;
286
- i += 2;
287
- continue;
288
- }
289
- if (c === inString) inString = null;
290
- } else {
291
- if (c === "\"" || c === "'") inString = c;
292
- else if (c === "{") braceDepth++;
293
- else if (c === "}") braceDepth--;
294
- }
295
- prefixValue += c;
296
- i++;
297
- }
298
- let tokenType = isJS ? TOKEN_TYPES.PREFIX_JS : (isV ? TOKEN_TYPES.PREFIX_V : TOKEN_TYPES.PREFIX_P);
299
- addToken(tokenType, prefixValue);
306
+ // p{} or v{}: emit keyword + PREFIX_OPEN, enter structured content mode
307
+ addToken(isV ? TOKEN_TYPES.PREFIX_V : TOKEN_TYPES.PREFIX_P, isV ? "v" : "p");
308
+ addToken(TOKEN_TYPES.PREFIX_OPEN, "{");
309
+ i += 2; // skip "p{" or "v{"
310
+ isInPVPrefix = true;
300
311
  continue;
301
312
  }
302
313
  // If not allowed, it will fall through to normal word scanning
303
314
  }
304
315
 
305
- // MULTI-CHAR MARKERS
306
- if (char === "@" && next === "_") {
307
- addToken(TOKEN_TYPES.OPEN_AT, "@_");
308
- i += 2;
309
- isInHeader = true; // At-Blocks start with a header part
310
- isInAtBlockHeader = true;
311
- continue;
312
- }
313
- if (char === "-" && next === ">") {
314
- if (isInAtBlockBody || (parenDepth > 0 && !isInInlineHead)) {
315
- addToken(TOKEN_TYPES.TEXT, "-");
316
- i++; // Swallowed one char
317
- } else {
318
- addToken(TOKEN_TYPES.THIN_ARROW, "->");
319
- i += 2;
320
- isInInlineHead = true; // The following ( ) will be structural
321
- }
322
- continue;
323
- }
324
-
325
316
  // STATIC KEYWORD
326
317
  if (char === "s" && src.slice(i, i + 6) === "static") {
327
318
  const afterStatic = src.slice(i + 6);
328
319
  const hasSpace = afterStatic.startsWith(" ");
329
320
  const hasLogic = hasSpace ? afterStatic.slice(1).startsWith("${") : afterStatic.startsWith("${");
330
321
 
331
- const isMainIdentifier = (
332
- last_non_junk_type === TOKEN_TYPES.OPEN_BRACKET ||
333
- last_non_junk_type === TOKEN_TYPES.OPEN_AT ||
334
- (last_non_junk_type === TOKEN_TYPES.OPEN_PAREN && isInInlineHead)
335
- );
322
+ const isMainIdentifier = last_non_junk_type === TOKEN_TYPES.OPEN_BRACKET;
336
323
 
337
324
  if ((hasLogic || isInHeader) && !isMainIdentifier) {
338
325
  addToken(TOKEN_TYPES.STATIC_KEYWORD, hasSpace ? "static " : "static");
@@ -347,11 +334,7 @@ function lexer(src, filename = "anonymous") {
347
334
  const hasSpace = afterRuntime.startsWith(" ");
348
335
  const hasLogic = hasSpace ? afterRuntime.slice(1).startsWith("${") : afterRuntime.startsWith("${");
349
336
 
350
- const isMainIdentifier = (
351
- last_non_junk_type === TOKEN_TYPES.OPEN_BRACKET ||
352
- last_non_junk_type === TOKEN_TYPES.OPEN_AT ||
353
- (last_non_junk_type === TOKEN_TYPES.OPEN_PAREN && isInInlineHead)
354
- );
337
+ const isMainIdentifier = last_non_junk_type === TOKEN_TYPES.OPEN_BRACKET;
355
338
 
356
339
  if ((hasLogic || isInHeader) && !isMainIdentifier) {
357
340
  addToken(TOKEN_TYPES.RUNTIME_KEYWORD, hasSpace ? "runtime " : "runtime");
@@ -360,213 +343,126 @@ function lexer(src, filename = "anonymous") {
360
343
  }
361
344
  }
362
345
 
363
- // LOGIC BLOCKS (${ ... }$)
364
- if (char === "$" && next === "{" && (last_non_junk_type === TOKEN_TYPES.STATIC_KEYWORD || last_non_junk_type === TOKEN_TYPES.RUNTIME_KEYWORD)) {
365
- const startLine = line;
366
- const startCharacter = character;
367
- i += 2;
368
- let logicCode = "";
369
- let braceDepth = 1;
370
- let internalString = null;
371
- let foundClosing = false;
346
+ // LOGIC BLOCKS (${ ... }$) — explicit: static/runtime ${ }$ shorthand: ${ }$ = static ${ }$
347
+ if (char === "$" && next === "{") {
348
+ {
349
+ const hasExplicitKeyword = last_non_junk_type === TOKEN_TYPES.STATIC_KEYWORD || last_non_junk_type === TOKEN_TYPES.RUNTIME_KEYWORD;
350
+ if (!hasExplicitKeyword) addToken(TOKEN_TYPES.STATIC_KEYWORD, "static");
351
+ addToken(TOKEN_TYPES.LOGIC_OPEN, "${");
352
+ i += 2;
372
353
 
373
- while (i < src.length) {
374
- const c = src[i];
375
- const n = src[i + 1];
354
+ let logicCode = "";
355
+ let depth = 0;
356
+ let internalString = null;
376
357
 
377
- // Stop condition: }$ (only if not inside a JS string and at top-level brace depth)
378
- if (c === "}" && n === "$" && !internalString && braceDepth === 1) {
379
- i += 2;
380
- braceDepth = 0;
381
- foundClosing = true;
382
- break;
383
- }
358
+ while (i < src.length) {
359
+ const c = src[i];
360
+ const n = src[i + 1];
384
361
 
385
- if (internalString) {
386
- if (c === "\\" && (n === internalString || n === "\\")) {
387
- logicCode += c + n;
388
- i += 2;
389
- continue;
362
+ // Close condition: }$ at depth 0, not followed by { (}${ is a template expression boundary)
363
+ if (c === "}" && n === "$" && !internalString && depth === 0 && src[i + 2] !== "{") {
364
+ break;
390
365
  }
391
- if (c === internalString) internalString = null;
392
- } else {
393
- if (c === "/" && n === "/") {
394
- logicCode += c + n;
395
- i += 2;
396
- while (i < src.length && src[i] !== "\n" && src[i] !== "\r") {
397
- logicCode += src[i];
398
- i++;
366
+
367
+ if (internalString) {
368
+ if (c === "\\" && (n === internalString || n === "\\")) {
369
+ logicCode += c + n;
370
+ i += 2;
371
+ continue;
399
372
  }
400
- continue;
401
- }
402
- if (c === "/" && n === "*") {
403
- logicCode += c + n;
404
- i += 2;
405
- while (i < src.length) {
406
- if (src[i] === "*" && src[i + 1] === "/") {
407
- logicCode += "*/";
408
- i += 2;
409
- break;
373
+ if (c === internalString) internalString = null;
374
+ } else {
375
+ if (c === "/" && n === "/") {
376
+ logicCode += c + n;
377
+ i += 2;
378
+ while (i < src.length && src[i] !== "\n" && src[i] !== "\r") {
379
+ logicCode += src[i];
380
+ i++;
410
381
  }
411
- logicCode += src[i];
412
- i++;
382
+ continue;
383
+ }
384
+ if (c === "/" && n === "*") {
385
+ logicCode += c + n;
386
+ i += 2;
387
+ while (i < src.length) {
388
+ if (src[i] === "*" && src[i + 1] === "/") {
389
+ logicCode += "*/";
390
+ i += 2;
391
+ break;
392
+ }
393
+ logicCode += src[i];
394
+ i++;
395
+ }
396
+ continue;
413
397
  }
414
- continue;
398
+
399
+ if (c === "\"" || c === "'" || c === "`") internalString = c;
400
+ else if (c === "{") depth++;
401
+ else if (c === "}") depth--;
415
402
  }
416
403
 
417
- if (c === "\"" || c === "'" || c === "`") internalString = c;
418
- else if (c === "{") braceDepth++;
419
- else if (c === "}") braceDepth--;
404
+ logicCode += c;
405
+ i++;
420
406
  }
421
407
 
422
- logicCode += c;
423
- i++;
424
- }
408
+ addToken(TOKEN_TYPES.LOGIC, logicCode);
425
409
 
426
- if (!foundClosing) {
427
- lexerError("Unclosed logic block. Expected '}$' to close the block starting with '${'.", {
428
- src,
429
- filename,
430
- range: {
431
- start: { line: startLine, character: startCharacter },
432
- end: { line: startLine, character: startCharacter + 2 }
433
- }
434
- });
435
- }
410
+ if (i < src.length && src[i] === "}" && src[i + 1] === "$") {
411
+ addToken(TOKEN_TYPES.LOGIC_CLOSE, "}$");
412
+ i += 2;
413
+ }
436
414
 
437
- addToken(TOKEN_TYPES.LOGIC, logicCode);
438
- continue;
415
+ continue;
416
+ }
439
417
  }
440
418
 
441
419
  // SINGLE-CHAR MARKERS
442
420
  if (char === "[") {
443
- if (isInAtBlockBody || (parenDepth > 0 && !isInInlineHead)) {
444
- addToken(TOKEN_TYPES.TEXT, "[");
445
- } else {
446
- addToken(TOKEN_TYPES.OPEN_BRACKET, "[");
447
- isInHeader = true;
448
- }
421
+ addToken(TOKEN_TYPES.OPEN_BRACKET, "[");
422
+ isInHeader = true;
423
+ pendingSmarkRaw = false;
424
+ hasSmarkRaw = false;
449
425
  i++;
450
426
  continue;
451
427
  }
452
- if (char === "_" && next === "@") {
453
- if (isInAtBlockBody || (parenDepth > 0 && !isInInlineHead)) {
454
- addToken(TOKEN_TYPES.TEXT, "_@");
455
- } else {
456
- const lastRealType = last_non_junk_type;
457
- addToken(TOKEN_TYPES.CLOSE_AT, "_@");
458
- // Removed delimiter stack check
459
- if (lastRealType === TOKEN_TYPES.END_KEYWORD) {
460
- isInAtBlockBody = false;
461
- isInHeader = false;
462
- isInAtBlockHeader = false;
463
- }
464
- }
465
- i += 2;
466
- continue;
467
- }
468
428
  if (char === "]") {
469
- if (isInAtBlockBody || (parenDepth > 0 && !isInInlineHead)) {
470
- addToken(TOKEN_TYPES.TEXT, "]");
471
- } else {
472
- addToken(TOKEN_TYPES.CLOSE_BRACKET, "]");
473
- isInHeader = false;
474
- }
475
- i++;
476
- continue;
477
- }
478
- if (char === "(") {
479
- if (isInAtBlockBody || (parenDepth > 0 && !isInInlineHead)) {
480
- addToken(TOKEN_TYPES.TEXT, "(");
481
- parenDepth++;
482
- } else {
483
- addToken(TOKEN_TYPES.OPEN_PAREN, "(");
484
- parenDepth++;
485
- }
486
- i++;
487
- continue;
488
- }
489
- if (char === ")") {
490
- if (isInAtBlockBody || (parenDepth > 1 && !isInInlineHead)) {
491
- addToken(TOKEN_TYPES.TEXT, ")");
492
- parenDepth--;
493
- } else if (parenDepth > 0) {
494
- // This ends the content part if depth drops to 0
495
- parenDepth--;
496
- if (parenDepth === 0) {
497
- addToken(TOKEN_TYPES.CLOSE_PAREN, ")");
498
- if (isInInlineHead) {
499
- isInInlineHead = false;
500
- isInHeader = false;
501
- }
502
- } else {
503
- addToken(TOKEN_TYPES.TEXT, ")");
504
- }
505
- } else {
506
- addToken(TOKEN_TYPES.TEXT, ")");
429
+ addToken(TOKEN_TYPES.CLOSE_BRACKET, "]");
430
+ isInHeader = false;
431
+ if (hasSmarkRaw) {
432
+ isRawContent = true;
433
+ hasSmarkRaw = false;
507
434
  }
435
+ pendingSmarkRaw = false;
508
436
  i++;
509
437
  continue;
510
438
  }
511
439
  if (char === ":") {
512
- if (isInAtBlockBody || (parenDepth > 0 && !isInInlineHead)) {
513
- addToken(TOKEN_TYPES.TEXT, ":");
440
+ const colonAllowed = [TOKEN_TYPES.IDENTIFIER, TOKEN_TYPES.KEY, TOKEN_TYPES.VALUE, TOKEN_TYPES.ESCAPE, TOKEN_TYPES.QUOTE, TOKEN_TYPES.PREFIX_V, TOKEN_TYPES.PREFIX_P, TOKEN_TYPES.PREFIX_CLOSE, TOKEN_TYPES.IMPORT, TOKEN_TYPES.USE_MODULE, TOKEN_TYPES.END_KEYWORD, TOKEN_TYPES.TEXT, TOKEN_TYPES.LOGIC, TOKEN_TYPES.LOGIC_CLOSE, TOKEN_TYPES.STATIC_KEYWORD, TOKEN_TYPES.RUNTIME_KEYWORD, TOKEN_TYPES.FOR_EACH];
441
+ if (colonAllowed.includes(last_non_junk_type)) {
442
+ addToken(TOKEN_TYPES.COLON, ":");
443
+ isInHeader = true;
514
444
  } else {
515
- const allowed = [TOKEN_TYPES.IDENTIFIER, TOKEN_TYPES.KEY, TOKEN_TYPES.CLOSE_AT, TOKEN_TYPES.VALUE, TOKEN_TYPES.ESCAPE, TOKEN_TYPES.QUOTE, TOKEN_TYPES.PREFIX_JS, TOKEN_TYPES.PREFIX_V, TOKEN_TYPES.PREFIX_P, TOKEN_TYPES.IMPORT, TOKEN_TYPES.USE_MODULE, TOKEN_TYPES.END_KEYWORD, TOKEN_TYPES.TEXT, TOKEN_TYPES.LOGIC, TOKEN_TYPES.STATIC_KEYWORD, TOKEN_TYPES.RUNTIME_KEYWORD, TOKEN_TYPES.FOR_EACH];
516
- if (allowed.includes(last_non_junk_type)) {
517
- addToken(TOKEN_TYPES.COLON, ":");
518
- isInHeader = true;
519
- } else {
520
- addToken(TOKEN_TYPES.TEXT, ":");
521
- }
445
+ addToken(TOKEN_TYPES.TEXT, ":");
522
446
  }
523
447
  i++;
524
448
  continue;
525
449
  }
526
450
  if (char === "=") {
527
- if (isInAtBlockBody || (parenDepth > 0 && !isInInlineHead)) {
528
- addToken(TOKEN_TYPES.TEXT, "=");
451
+ const eqAllowed = [TOKEN_TYPES.IDENTIFIER, TOKEN_TYPES.KEY, TOKEN_TYPES.ESCAPE, TOKEN_TYPES.QUOTE, TOKEN_TYPES.PREFIX_V, TOKEN_TYPES.PREFIX_P, TOKEN_TYPES.PREFIX_CLOSE, TOKEN_TYPES.IMPORT, TOKEN_TYPES.USE_MODULE, TOKEN_TYPES.END_KEYWORD, TOKEN_TYPES.TEXT, TOKEN_TYPES.LOGIC, TOKEN_TYPES.LOGIC_CLOSE, TOKEN_TYPES.STATIC_KEYWORD, TOKEN_TYPES.RUNTIME_KEYWORD, TOKEN_TYPES.FOR_EACH];
452
+ if (eqAllowed.includes(last_non_junk_type)) {
453
+ addToken(TOKEN_TYPES.EQUAL, "=");
529
454
  } else {
530
- const allowed = [TOKEN_TYPES.IDENTIFIER, TOKEN_TYPES.KEY, TOKEN_TYPES.ESCAPE, TOKEN_TYPES.QUOTE, TOKEN_TYPES.PREFIX_JS, TOKEN_TYPES.PREFIX_V, TOKEN_TYPES.PREFIX_P, TOKEN_TYPES.IMPORT, TOKEN_TYPES.USE_MODULE, TOKEN_TYPES.END_KEYWORD, TOKEN_TYPES.TEXT, TOKEN_TYPES.LOGIC, TOKEN_TYPES.STATIC_KEYWORD, TOKEN_TYPES.RUNTIME_KEYWORD, TOKEN_TYPES.FOR_EACH];
531
- if (allowed.includes(last_non_junk_type)) {
532
- addToken(TOKEN_TYPES.EQUAL, "=");
533
- } else {
534
- addToken(TOKEN_TYPES.TEXT, "=");
535
- }
455
+ addToken(TOKEN_TYPES.TEXT, "=");
536
456
  }
537
457
  i++;
538
458
  continue;
539
459
  }
540
460
  if (char === ",") {
541
- if (isInAtBlockBody || (parenDepth > 0 && !isInInlineHead)) {
542
- addToken(TOKEN_TYPES.TEXT, ",");
461
+ const commaAllowed = [TOKEN_TYPES.VALUE, TOKEN_TYPES.IDENTIFIER, TOKEN_TYPES.QUOTE, TOKEN_TYPES.ESCAPE, TOKEN_TYPES.PREFIX_V, TOKEN_TYPES.PREFIX_P, TOKEN_TYPES.PREFIX_CLOSE, TOKEN_TYPES.IMPORT, TOKEN_TYPES.USE_MODULE, TOKEN_TYPES.END_KEYWORD, TOKEN_TYPES.TEXT, TOKEN_TYPES.LOGIC, TOKEN_TYPES.LOGIC_CLOSE, TOKEN_TYPES.STATIC_KEYWORD, TOKEN_TYPES.RUNTIME_KEYWORD, TOKEN_TYPES.FOR_EACH];
462
+ if (commaAllowed.includes(last_non_junk_type)) {
463
+ addToken(TOKEN_TYPES.COMMA, ",");
543
464
  } else {
544
- const allowed = [TOKEN_TYPES.VALUE, TOKEN_TYPES.IDENTIFIER, TOKEN_TYPES.QUOTE, TOKEN_TYPES.ESCAPE, TOKEN_TYPES.PREFIX_JS, TOKEN_TYPES.PREFIX_V, TOKEN_TYPES.PREFIX_P, TOKEN_TYPES.IMPORT, TOKEN_TYPES.USE_MODULE, TOKEN_TYPES.END_KEYWORD, TOKEN_TYPES.TEXT, TOKEN_TYPES.LOGIC, TOKEN_TYPES.STATIC_KEYWORD, TOKEN_TYPES.RUNTIME_KEYWORD, TOKEN_TYPES.FOR_EACH];
545
- if (allowed.includes(last_non_junk_type)) {
546
- addToken(TOKEN_TYPES.COMMA, ",");
547
- } else {
548
- addToken(TOKEN_TYPES.TEXT, ",");
549
- }
550
- }
551
- i++;
552
- continue;
553
- }
554
- if (char === ";") {
555
- if (isInAtBlockBody || (parenDepth > 0 && !isInInlineHead)) {
556
- addToken(TOKEN_TYPES.TEXT, ";");
557
- } else {
558
- const allowed = [TOKEN_TYPES.IDENTIFIER, TOKEN_TYPES.VALUE, TOKEN_TYPES.CLOSE_AT, TOKEN_TYPES.CLOSE_PAREN, TOKEN_TYPES.ESCAPE, TOKEN_TYPES.QUOTE, TOKEN_TYPES.PREFIX_JS, TOKEN_TYPES.PREFIX_V, TOKEN_TYPES.PREFIX_P, TOKEN_TYPES.IMPORT, TOKEN_TYPES.USE_MODULE, TOKEN_TYPES.END_KEYWORD, TOKEN_TYPES.TEXT, TOKEN_TYPES.LOGIC, TOKEN_TYPES.STATIC_KEYWORD, TOKEN_TYPES.RUNTIME_KEYWORD, TOKEN_TYPES.FOR_EACH];
559
- if (allowed.includes(last_non_junk_type)) {
560
- addToken(TOKEN_TYPES.SEMICOLON, ";");
561
- // ONLY trigger body mode if we were actually in an At-Block header
562
- if (isInAtBlockHeader) {
563
- isInHeader = false;
564
- isInAtBlockHeader = false;
565
- isInAtBlockBody = true;
566
- }
567
- } else {
568
- addToken(TOKEN_TYPES.TEXT, ";");
569
- }
465
+ addToken(TOKEN_TYPES.TEXT, ",");
570
466
  }
571
467
  i++;
572
468
  continue;
@@ -579,7 +475,7 @@ function lexer(src, filename = "anonymous") {
579
475
  }
580
476
  }
581
477
  if (char === "\"" || char === "'") {
582
- const valTriggers = [TOKEN_TYPES.COLON, TOKEN_TYPES.EQUAL, TOKEN_TYPES.COMMA, TOKEN_TYPES.ESCAPE, TOKEN_TYPES.OPEN_BRACKET, TOKEN_TYPES.OPEN_AT];
478
+ const valTriggers = [TOKEN_TYPES.COLON, TOKEN_TYPES.EQUAL, TOKEN_TYPES.COMMA, TOKEN_TYPES.ESCAPE, TOKEN_TYPES.OPEN_BRACKET];
583
479
  const wasValueTrigger = valTriggers.includes(last_non_junk_type);
584
480
  addToken(TOKEN_TYPES.QUOTE, char);
585
481
  i++;
@@ -595,28 +491,22 @@ function lexer(src, filename = "anonymous") {
595
491
  // This is the "Fallback" mode where we scan for identifiers, keys, or values.
596
492
  // It uses lookahead and context variables to guess the role of a word.
597
493
  let word = "";
598
- // Only Blocks ([ ]) allow ':' in their main identifier.
599
- // At-Blocks (@_) and Inlines (->( )) do NOT allow ':' in the ID.
600
494
  const isStartOfBlockId = (last_non_junk_type === TOKEN_TYPES.OPEN_BRACKET);
495
+ const isInNormalText = !isInHeader;
601
496
 
602
- let stopChars = "[](){}:=;,@>\"'#\\ \t\n\r!";
603
- if (isStartOfBlockId || (parenDepth > 0 && !isInInlineHead)) {
497
+ let stopChars = "[]{}:=,\"'#\\ \t\n\r!";
498
+ if (isStartOfBlockId) {
604
499
  stopChars = stopChars.replace(":", "");
605
500
  }
606
- const isInNormalText = !isInHeader && !isInInlineHead && !isInAtBlockBody;
607
501
  if (isInNormalText) {
608
- stopChars = "[]@()>_()\\#\n\r"; // In normal text, stop at markers, comments and newlines
502
+ stopChars = "[]\\#\n\r"; // In normal text, stop only at block markers, escapes, comments and newlines
609
503
  }
610
504
 
611
505
  while (i < src.length && !stopChars.includes(src[i])) {
612
506
  // Stop ONLY if $ is followed by { (Logic block start)
613
507
  if (src[i] === "$" && src[i + 1] === "{") break;
614
508
 
615
- // Lookahead for At-Block markers (_@ or @_)
616
- if (src[i] === "_" && src[i + 1] === "@") break;
617
- if (src[i] === "@" && src[i + 1] === "_") break;
618
-
619
- // Lookahead for 'static ${' or 'runtime ${' (only if we're not at the very start of the word scanning)
509
+ // Lookahead for 'static ${' or 'runtime ${' mid-word
620
510
  if (word.length > 0) {
621
511
  if (src[i] === "s" && src.slice(i, i + 7) === "static " && src[i + 7] === "$" && src[i + 8] === "{") break;
622
512
  if (src[i] === "s" && src.slice(i, i + 6) === "static" && src[i + 6] === "$" && src[i + 7] === "{") break;
@@ -624,53 +514,47 @@ function lexer(src, filename = "anonymous") {
624
514
  if (src[i] === "r" && src.slice(i, i + 7) === "runtime" && src[i + 7] === "$" && src[i + 8] === "{") break;
625
515
  }
626
516
 
627
- // Lookahead for -> marker in normal text
628
- if (!isInHeader && src[i] === "-" && src[i + 1] === ">") break;
629
-
630
517
  // Stop if we hit an ALLOWED prefix trigger
631
518
  if ((src[i] === "p" && src[i + 1] === "{") || (src[i] === "v" && src[i + 1] === "{")) {
632
519
  if (isInHeader || isInNormalText) break;
633
520
  }
634
- if (src[i] === "j" && src[i + 1] === "s" && src[i + 2] === "{") {
635
- if (isInHeader) break;
636
- }
637
521
  word += src[i];
638
522
  i++;
639
523
  }
640
524
 
641
525
  if (word.length > 0) {
642
526
  // Guess role based on context
643
- if (parenDepth > 0 && !isInInlineHead) {
644
- // Inside Inline Content (raw text)
645
- addToken(TOKEN_TYPES.TEXT, word);
646
- } else if (isInHeader || isInInlineHead) {
527
+ if (isInHeader) {
647
528
  // Inside a structural header context
648
- const isMainIdentifier = (
649
- last_non_junk_type === TOKEN_TYPES.OPEN_BRACKET ||
650
- last_non_junk_type === TOKEN_TYPES.OPEN_AT ||
651
- (last_non_junk_type === TOKEN_TYPES.OPEN_PAREN && isInInlineHead)
652
- );
529
+ const isMainIdentifier = last_non_junk_type === TOKEN_TYPES.OPEN_BRACKET;
653
530
 
654
531
  if (isMainIdentifier) {
655
- if (word === end_keyword) {
532
+ if (word === end_keyword || word.startsWith(end_keyword + ":")) {
656
533
  addToken(TOKEN_TYPES.END_KEYWORD, word);
657
534
  }
658
535
  else if (word === "import") addToken(TOKEN_TYPES.IMPORT, word);
659
536
  else if (word === "$use-module") addToken(TOKEN_TYPES.USE_MODULE, word);
660
537
  else if (word === "slot") addToken(TOKEN_TYPES.SLOT_KEYWORD, word);
661
538
  else if (word === "for-each") addToken(TOKEN_TYPES.FOR_EACH, word);
662
- else addToken(TOKEN_TYPES.IDENTIFIER, word);
539
+ else {
540
+ addToken(TOKEN_TYPES.IDENTIFIER, word);
541
+ }
663
542
  } else {
664
543
  // Use lookahead to distinguish KEY from VALUE
665
544
  const p = peekStructural(i);
666
545
  if (p === ":") {
667
546
  addToken(TOKEN_TYPES.KEY, word);
547
+ if (word === "smark-raw") pendingSmarkRaw = true;
668
548
  } else if (word === "static") {
669
549
  addToken(TOKEN_TYPES.STATIC_KEYWORD, word);
670
550
  } else if (word === "runtime") {
671
551
  addToken(TOKEN_TYPES.RUNTIME_KEYWORD, word);
672
552
  } else {
673
553
  addToken(TOKEN_TYPES.VALUE, word);
554
+ if (pendingSmarkRaw) {
555
+ if (word === "true") hasSmarkRaw = true;
556
+ pendingSmarkRaw = false;
557
+ }
674
558
  }
675
559
  }
676
560
  } else {