sommark 4.5.3 → 5.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +314 -178
- package/cli/cli.mjs +1 -1
- package/cli/commands/color.js +36 -14
- package/cli/commands/help.js +3 -0
- package/cli/commands/init.js +0 -2
- package/cli/constants.js +5 -2
- package/core/errors.js +5 -4
- package/core/evaluator.js +1 -2
- package/core/formats.js +7 -1
- package/core/helpers/config-loader.js +1 -3
- package/core/helpers/lib.js +1 -1
- package/core/labels.js +2 -15
- package/core/lexer.js +197 -313
- package/core/modules.js +13 -13
- package/core/parser.js +226 -535
- package/core/tokenTypes.js +6 -15
- package/core/transpiler.js +129 -110
- package/core/validator.js +6 -26
- package/dist/sommark.browser.js +1777 -2163
- package/dist/sommark.browser.lite.js +1775 -2160
- package/dist/sommark.lexer.js +392 -544
- package/dist/sommark.parser.js +604 -1200
- package/formatter/mark.js +34 -0
- package/formatter/tag.js +7 -33
- package/helpers/utils.js +15 -16
- package/index.js +9 -1
- package/index.shared.js +22 -12
- package/mappers/languages/csv.js +62 -0
- package/mappers/languages/html.js +12 -66
- package/mappers/languages/json.js +74 -156
- package/mappers/languages/jsonc.js +21 -63
- package/mappers/languages/markdown.js +159 -276
- package/mappers/languages/mdx.js +7 -62
- package/mappers/languages/text.js +2 -19
- package/mappers/languages/toml.js +231 -0
- package/mappers/languages/xml.js +25 -25
- package/mappers/languages/yaml.js +323 -0
- package/mappers/mapper.js +1 -22
- package/mappers/shared/index.js +3 -16
- package/package.json +5 -2
package/core/lexer.js
CHANGED
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
import TOKEN_TYPES from "./tokenTypes.js";
|
|
2
2
|
import peek from "../helpers/peek.js";
|
|
3
3
|
import { end_keyword } from "./labels.js";
|
|
4
|
-
import { lexerError } from "./errors.js";
|
|
5
4
|
|
|
6
5
|
/**
|
|
7
6
|
* SomMark Lexer
|
|
@@ -24,12 +23,12 @@ function lexer(src, filename = "anonymous") {
|
|
|
24
23
|
let line = 0, character = 0;
|
|
25
24
|
|
|
26
25
|
// State Variables
|
|
27
|
-
let isInAtBlockBody = false;
|
|
28
26
|
let isInQuote = false;
|
|
29
|
-
let isInHeader = false;
|
|
30
|
-
let
|
|
31
|
-
let
|
|
32
|
-
let
|
|
27
|
+
let isInHeader = false; // Tracks if we are in a structural header context
|
|
28
|
+
let isInPVPrefix = false; // Tracks if we are scanning inside a p{} or v{} prefix
|
|
29
|
+
let pendingSmarkRaw = false; // Set when KEY "smark-raw" is seen — waiting for value
|
|
30
|
+
let hasSmarkRaw = false; // Set when smark-raw: true is confirmed in header
|
|
31
|
+
let isRawContent = false; // Set when inside a smark-raw block — content collected as-is, not parsed
|
|
33
32
|
|
|
34
33
|
/**
|
|
35
34
|
* Adds a token to the stream and updates the scanner's position tracking.
|
|
@@ -95,35 +94,63 @@ function lexer(src, filename = "anonymous") {
|
|
|
95
94
|
}
|
|
96
95
|
|
|
97
96
|
while (i < src.length) {
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
97
|
+
const char = src[i];
|
|
98
|
+
const next = src[i + 1];
|
|
99
|
+
|
|
100
|
+
// --- RAW CONTENT MODE ---
|
|
101
|
+
// Collect everything as-is until [end] or [end:name]. \[ escapes a literal [.
|
|
102
|
+
if (isRawContent) {
|
|
103
|
+
let raw = "";
|
|
104
|
+
while (i < src.length) {
|
|
105
|
+
if (src[i] === "\\" && src[i + 1] === "[") {
|
|
106
|
+
raw += "[";
|
|
107
|
+
i += 2;
|
|
108
|
+
continue;
|
|
109
|
+
}
|
|
110
|
+
if (src[i] === "[") {
|
|
111
|
+
if (src.startsWith(`[${end_keyword}]`, i) || src.startsWith(`[${end_keyword}:`, i)) break;
|
|
112
|
+
}
|
|
113
|
+
raw += src[i];
|
|
114
|
+
i++;
|
|
115
|
+
}
|
|
116
|
+
if (raw) addToken(TOKEN_TYPES.TEXT, raw);
|
|
117
|
+
isRawContent = false;
|
|
118
|
+
continue;
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
// --- PHASE 1.5: PV PREFIX CONTENT MODE ---
|
|
122
|
+
// Handles structured content inside p{} and v{} prefixes.
|
|
123
|
+
if (isInPVPrefix && !isInQuote) {
|
|
124
|
+
if (char === '"' || char === "'") {
|
|
125
|
+
addToken(TOKEN_TYPES.QUOTE, char);
|
|
126
|
+
i++;
|
|
127
|
+
isInQuote = true;
|
|
128
|
+
continue;
|
|
129
|
+
}
|
|
130
|
+
if (char === '|') {
|
|
131
|
+
addToken(TOKEN_TYPES.PIPELINE, "|");
|
|
132
|
+
i++;
|
|
133
|
+
continue;
|
|
134
|
+
}
|
|
135
|
+
if (char === '}') {
|
|
136
|
+
addToken(TOKEN_TYPES.PREFIX_CLOSE, "}");
|
|
137
|
+
isInPVPrefix = false;
|
|
138
|
+
i++;
|
|
139
|
+
continue;
|
|
140
|
+
}
|
|
141
|
+
if (char !== ' ' && char !== '\t' && char !== '\n' && char !== '\r') {
|
|
142
|
+
let word = '';
|
|
105
143
|
while (i < src.length) {
|
|
106
|
-
|
|
107
|
-
if (
|
|
108
|
-
|
|
109
|
-
i += 2;
|
|
110
|
-
continue;
|
|
111
|
-
}
|
|
112
|
-
// Stop at end marker
|
|
113
|
-
if (src[i] === "@" && src[i + 1] === "_") {
|
|
114
|
-
break;
|
|
115
|
-
}
|
|
116
|
-
body += src[i];
|
|
144
|
+
const c = src[i];
|
|
145
|
+
if (c === '}' || c === '|' || c === '"' || c === "'" || c === ' ' || c === '\t' || c === '\n' || c === '\r') break;
|
|
146
|
+
word += c;
|
|
117
147
|
i++;
|
|
118
148
|
}
|
|
119
|
-
if (
|
|
120
|
-
addToken(TOKEN_TYPES.TEXT, body);
|
|
121
|
-
}
|
|
149
|
+
if (word) addToken(TOKEN_TYPES.KEY, word);
|
|
122
150
|
continue;
|
|
123
151
|
}
|
|
152
|
+
// Whitespace: fall through to PHASE 3 whitespace handling
|
|
124
153
|
}
|
|
125
|
-
const char = src[i];
|
|
126
|
-
const next = src[i + 1];
|
|
127
154
|
|
|
128
155
|
// --- PHASE 2: QUOTE MODE ---
|
|
129
156
|
// Handles balanced strings and allows prefix layers (js{}, p{}) inside them.
|
|
@@ -141,50 +168,57 @@ function lexer(src, filename = "anonymous") {
|
|
|
141
168
|
}
|
|
142
169
|
|
|
143
170
|
// Support Prefix Layers inside quotes!
|
|
144
|
-
if ((src[i] === "
|
|
145
|
-
const isJS = (src[i] === "j");
|
|
171
|
+
if ((src[i] === "p" && src[i + 1] === "{") || (src[i] === "v" && src[i + 1] === "{")) {
|
|
146
172
|
const isV = (src[i] === "v");
|
|
147
173
|
if (quoteValue.length > 0) {
|
|
148
174
|
addToken(TOKEN_TYPES.VALUE, quoteValue);
|
|
149
175
|
quoteValue = "";
|
|
150
176
|
}
|
|
151
177
|
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
178
|
+
{
|
|
179
|
+
// p{} or v{}: keyword + PREFIX_OPEN + unquoted key + optional PIPELINE + fallback + PREFIX_CLOSE
|
|
180
|
+
addToken(isV ? TOKEN_TYPES.PREFIX_V : TOKEN_TYPES.PREFIX_P, isV ? "v" : "p");
|
|
181
|
+
addToken(TOKEN_TYPES.PREFIX_OPEN, "{");
|
|
182
|
+
i += 2;
|
|
183
|
+
// Scan unquoted key (cannot use same quote char as outer string)
|
|
184
|
+
let key = "";
|
|
185
|
+
while (i < src.length && src[i] !== "|" && src[i] !== "}" && src[i] !== quoteChar) {
|
|
186
|
+
key += src[i];
|
|
187
|
+
i++;
|
|
188
|
+
}
|
|
189
|
+
if (key.trim()) addToken(TOKEN_TYPES.KEY, key.trim());
|
|
190
|
+
// Optional PIPELINE + fallback
|
|
191
|
+
if (i < src.length && src[i] === "|") {
|
|
192
|
+
addToken(TOKEN_TYPES.PIPELINE, "|");
|
|
193
|
+
i++;
|
|
194
|
+
let fallback = "";
|
|
195
|
+
while (i < src.length && src[i] !== "}" && src[i] !== quoteChar) {
|
|
196
|
+
fallback += src[i];
|
|
197
|
+
i++;
|
|
165
198
|
}
|
|
166
|
-
if (
|
|
167
|
-
}
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
199
|
+
if (fallback.trim()) addToken(TOKEN_TYPES.VALUE, fallback.trim());
|
|
200
|
+
}
|
|
201
|
+
// PREFIX_CLOSE
|
|
202
|
+
if (i < src.length && src[i] === "}") {
|
|
203
|
+
addToken(TOKEN_TYPES.PREFIX_CLOSE, "}");
|
|
204
|
+
i++;
|
|
171
205
|
}
|
|
172
|
-
prefixValue += c;
|
|
173
|
-
i++;
|
|
174
206
|
}
|
|
175
|
-
let tokenType = isJS ? TOKEN_TYPES.PREFIX_JS : (isV ? TOKEN_TYPES.PREFIX_V : TOKEN_TYPES.PREFIX_P);
|
|
176
|
-
addToken(tokenType, prefixValue);
|
|
177
207
|
continue;
|
|
178
208
|
}
|
|
179
209
|
|
|
180
210
|
if (src[i] === quoteChar) {
|
|
181
211
|
// Guess role based on next structural character
|
|
182
212
|
let nextStructural = peekStructural(i + 1);
|
|
183
|
-
let tokenType =
|
|
213
|
+
let tokenType = isInHeader && (nextStructural === ":" || nextStructural === "=")
|
|
184
214
|
? TOKEN_TYPES.KEY
|
|
185
215
|
: TOKEN_TYPES.VALUE;
|
|
186
216
|
|
|
187
217
|
if (quoteValue.length > 0) addToken(tokenType, quoteValue);
|
|
218
|
+
if (pendingSmarkRaw && tokenType === TOKEN_TYPES.VALUE && quoteValue === "true") {
|
|
219
|
+
hasSmarkRaw = true;
|
|
220
|
+
pendingSmarkRaw = false;
|
|
221
|
+
}
|
|
188
222
|
addToken(TOKEN_TYPES.QUOTE, quoteChar);
|
|
189
223
|
isInQuote = false;
|
|
190
224
|
i++;
|
|
@@ -255,84 +289,37 @@ function lexer(src, filename = "anonymous") {
|
|
|
255
289
|
continue;
|
|
256
290
|
}
|
|
257
291
|
|
|
258
|
-
// PREFIX LAYERS (
|
|
259
|
-
if ((char === "
|
|
260
|
-
const isJS = (char === "j");
|
|
292
|
+
// PREFIX LAYERS (p{...} or v{...})
|
|
293
|
+
if ((char === "p" && next === "{") || (char === "v" && next === "{")) {
|
|
261
294
|
const isP = (char === "p");
|
|
262
295
|
const isV = (char === "v");
|
|
263
296
|
|
|
264
297
|
// Context Check
|
|
265
|
-
const isBlockHeader = isInHeader
|
|
266
|
-
const isNormalText = !isInHeader
|
|
298
|
+
const isBlockHeader = isInHeader;
|
|
299
|
+
const isNormalText = !isInHeader;
|
|
267
300
|
|
|
268
301
|
let allowed = false;
|
|
269
|
-
if (isJS && isBlockHeader) allowed = true;
|
|
270
302
|
if (isP && (isBlockHeader || isNormalText)) allowed = true;
|
|
271
303
|
if (isV && (isBlockHeader || isNormalText)) allowed = true;
|
|
272
304
|
|
|
273
305
|
if (allowed) {
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
while (i < src.length && braceDepth > 0) {
|
|
280
|
-
const c = src[i];
|
|
281
|
-
const n = src[i + 1];
|
|
282
|
-
|
|
283
|
-
if (inString) {
|
|
284
|
-
if (c === "\\" && (n === inString || n === "\\")) {
|
|
285
|
-
prefixValue += c + n;
|
|
286
|
-
i += 2;
|
|
287
|
-
continue;
|
|
288
|
-
}
|
|
289
|
-
if (c === inString) inString = null;
|
|
290
|
-
} else {
|
|
291
|
-
if (c === "\"" || c === "'") inString = c;
|
|
292
|
-
else if (c === "{") braceDepth++;
|
|
293
|
-
else if (c === "}") braceDepth--;
|
|
294
|
-
}
|
|
295
|
-
prefixValue += c;
|
|
296
|
-
i++;
|
|
297
|
-
}
|
|
298
|
-
let tokenType = isJS ? TOKEN_TYPES.PREFIX_JS : (isV ? TOKEN_TYPES.PREFIX_V : TOKEN_TYPES.PREFIX_P);
|
|
299
|
-
addToken(tokenType, prefixValue);
|
|
306
|
+
// p{} or v{}: emit keyword + PREFIX_OPEN, enter structured content mode
|
|
307
|
+
addToken(isV ? TOKEN_TYPES.PREFIX_V : TOKEN_TYPES.PREFIX_P, isV ? "v" : "p");
|
|
308
|
+
addToken(TOKEN_TYPES.PREFIX_OPEN, "{");
|
|
309
|
+
i += 2; // skip "p{" or "v{"
|
|
310
|
+
isInPVPrefix = true;
|
|
300
311
|
continue;
|
|
301
312
|
}
|
|
302
313
|
// If not allowed, it will fall through to normal word scanning
|
|
303
314
|
}
|
|
304
315
|
|
|
305
|
-
// MULTI-CHAR MARKERS
|
|
306
|
-
if (char === "@" && next === "_") {
|
|
307
|
-
addToken(TOKEN_TYPES.OPEN_AT, "@_");
|
|
308
|
-
i += 2;
|
|
309
|
-
isInHeader = true; // At-Blocks start with a header part
|
|
310
|
-
isInAtBlockHeader = true;
|
|
311
|
-
continue;
|
|
312
|
-
}
|
|
313
|
-
if (char === "-" && next === ">") {
|
|
314
|
-
if (isInAtBlockBody || (parenDepth > 0 && !isInInlineHead)) {
|
|
315
|
-
addToken(TOKEN_TYPES.TEXT, "-");
|
|
316
|
-
i++; // Swallowed one char
|
|
317
|
-
} else {
|
|
318
|
-
addToken(TOKEN_TYPES.THIN_ARROW, "->");
|
|
319
|
-
i += 2;
|
|
320
|
-
isInInlineHead = true; // The following ( ) will be structural
|
|
321
|
-
}
|
|
322
|
-
continue;
|
|
323
|
-
}
|
|
324
|
-
|
|
325
316
|
// STATIC KEYWORD
|
|
326
317
|
if (char === "s" && src.slice(i, i + 6) === "static") {
|
|
327
318
|
const afterStatic = src.slice(i + 6);
|
|
328
319
|
const hasSpace = afterStatic.startsWith(" ");
|
|
329
320
|
const hasLogic = hasSpace ? afterStatic.slice(1).startsWith("${") : afterStatic.startsWith("${");
|
|
330
321
|
|
|
331
|
-
const isMainIdentifier =
|
|
332
|
-
last_non_junk_type === TOKEN_TYPES.OPEN_BRACKET ||
|
|
333
|
-
last_non_junk_type === TOKEN_TYPES.OPEN_AT ||
|
|
334
|
-
(last_non_junk_type === TOKEN_TYPES.OPEN_PAREN && isInInlineHead)
|
|
335
|
-
);
|
|
322
|
+
const isMainIdentifier = last_non_junk_type === TOKEN_TYPES.OPEN_BRACKET;
|
|
336
323
|
|
|
337
324
|
if ((hasLogic || isInHeader) && !isMainIdentifier) {
|
|
338
325
|
addToken(TOKEN_TYPES.STATIC_KEYWORD, hasSpace ? "static " : "static");
|
|
@@ -347,11 +334,7 @@ function lexer(src, filename = "anonymous") {
|
|
|
347
334
|
const hasSpace = afterRuntime.startsWith(" ");
|
|
348
335
|
const hasLogic = hasSpace ? afterRuntime.slice(1).startsWith("${") : afterRuntime.startsWith("${");
|
|
349
336
|
|
|
350
|
-
const isMainIdentifier =
|
|
351
|
-
last_non_junk_type === TOKEN_TYPES.OPEN_BRACKET ||
|
|
352
|
-
last_non_junk_type === TOKEN_TYPES.OPEN_AT ||
|
|
353
|
-
(last_non_junk_type === TOKEN_TYPES.OPEN_PAREN && isInInlineHead)
|
|
354
|
-
);
|
|
337
|
+
const isMainIdentifier = last_non_junk_type === TOKEN_TYPES.OPEN_BRACKET;
|
|
355
338
|
|
|
356
339
|
if ((hasLogic || isInHeader) && !isMainIdentifier) {
|
|
357
340
|
addToken(TOKEN_TYPES.RUNTIME_KEYWORD, hasSpace ? "runtime " : "runtime");
|
|
@@ -360,213 +343,126 @@ function lexer(src, filename = "anonymous") {
|
|
|
360
343
|
}
|
|
361
344
|
}
|
|
362
345
|
|
|
363
|
-
// LOGIC BLOCKS (${ ... }$)
|
|
364
|
-
if (char === "$" && next === "{"
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
let internalString = null;
|
|
371
|
-
let foundClosing = false;
|
|
346
|
+
// LOGIC BLOCKS (${ ... }$) — explicit: static/runtime ${ }$ shorthand: ${ }$ = static ${ }$
|
|
347
|
+
if (char === "$" && next === "{") {
|
|
348
|
+
{
|
|
349
|
+
const hasExplicitKeyword = last_non_junk_type === TOKEN_TYPES.STATIC_KEYWORD || last_non_junk_type === TOKEN_TYPES.RUNTIME_KEYWORD;
|
|
350
|
+
if (!hasExplicitKeyword) addToken(TOKEN_TYPES.STATIC_KEYWORD, "static");
|
|
351
|
+
addToken(TOKEN_TYPES.LOGIC_OPEN, "${");
|
|
352
|
+
i += 2;
|
|
372
353
|
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
354
|
+
let logicCode = "";
|
|
355
|
+
let depth = 0;
|
|
356
|
+
let internalString = null;
|
|
376
357
|
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
i
|
|
380
|
-
braceDepth = 0;
|
|
381
|
-
foundClosing = true;
|
|
382
|
-
break;
|
|
383
|
-
}
|
|
358
|
+
while (i < src.length) {
|
|
359
|
+
const c = src[i];
|
|
360
|
+
const n = src[i + 1];
|
|
384
361
|
|
|
385
|
-
|
|
386
|
-
if (c === "
|
|
387
|
-
|
|
388
|
-
i += 2;
|
|
389
|
-
continue;
|
|
362
|
+
// Close condition: }$ at depth 0, not followed by { (}${ is a template expression boundary)
|
|
363
|
+
if (c === "}" && n === "$" && !internalString && depth === 0 && src[i + 2] !== "{") {
|
|
364
|
+
break;
|
|
390
365
|
}
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
logicCode += src[i];
|
|
398
|
-
i++;
|
|
366
|
+
|
|
367
|
+
if (internalString) {
|
|
368
|
+
if (c === "\\" && (n === internalString || n === "\\")) {
|
|
369
|
+
logicCode += c + n;
|
|
370
|
+
i += 2;
|
|
371
|
+
continue;
|
|
399
372
|
}
|
|
400
|
-
|
|
401
|
-
}
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
i += 2;
|
|
409
|
-
break;
|
|
373
|
+
if (c === internalString) internalString = null;
|
|
374
|
+
} else {
|
|
375
|
+
if (c === "/" && n === "/") {
|
|
376
|
+
logicCode += c + n;
|
|
377
|
+
i += 2;
|
|
378
|
+
while (i < src.length && src[i] !== "\n" && src[i] !== "\r") {
|
|
379
|
+
logicCode += src[i];
|
|
380
|
+
i++;
|
|
410
381
|
}
|
|
411
|
-
|
|
412
|
-
|
|
382
|
+
continue;
|
|
383
|
+
}
|
|
384
|
+
if (c === "/" && n === "*") {
|
|
385
|
+
logicCode += c + n;
|
|
386
|
+
i += 2;
|
|
387
|
+
while (i < src.length) {
|
|
388
|
+
if (src[i] === "*" && src[i + 1] === "/") {
|
|
389
|
+
logicCode += "*/";
|
|
390
|
+
i += 2;
|
|
391
|
+
break;
|
|
392
|
+
}
|
|
393
|
+
logicCode += src[i];
|
|
394
|
+
i++;
|
|
395
|
+
}
|
|
396
|
+
continue;
|
|
413
397
|
}
|
|
414
|
-
|
|
398
|
+
|
|
399
|
+
if (c === "\"" || c === "'" || c === "`") internalString = c;
|
|
400
|
+
else if (c === "{") depth++;
|
|
401
|
+
else if (c === "}") depth--;
|
|
415
402
|
}
|
|
416
403
|
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
else if (c === "}") braceDepth--;
|
|
404
|
+
logicCode += c;
|
|
405
|
+
i++;
|
|
420
406
|
}
|
|
421
407
|
|
|
422
|
-
logicCode
|
|
423
|
-
i++;
|
|
424
|
-
}
|
|
408
|
+
addToken(TOKEN_TYPES.LOGIC, logicCode);
|
|
425
409
|
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
range: {
|
|
431
|
-
start: { line: startLine, character: startCharacter },
|
|
432
|
-
end: { line: startLine, character: startCharacter + 2 }
|
|
433
|
-
}
|
|
434
|
-
});
|
|
435
|
-
}
|
|
410
|
+
if (i < src.length && src[i] === "}" && src[i + 1] === "$") {
|
|
411
|
+
addToken(TOKEN_TYPES.LOGIC_CLOSE, "}$");
|
|
412
|
+
i += 2;
|
|
413
|
+
}
|
|
436
414
|
|
|
437
|
-
|
|
438
|
-
|
|
415
|
+
continue;
|
|
416
|
+
}
|
|
439
417
|
}
|
|
440
418
|
|
|
441
419
|
// SINGLE-CHAR MARKERS
|
|
442
420
|
if (char === "[") {
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
isInHeader = true;
|
|
448
|
-
}
|
|
421
|
+
addToken(TOKEN_TYPES.OPEN_BRACKET, "[");
|
|
422
|
+
isInHeader = true;
|
|
423
|
+
pendingSmarkRaw = false;
|
|
424
|
+
hasSmarkRaw = false;
|
|
449
425
|
i++;
|
|
450
426
|
continue;
|
|
451
427
|
}
|
|
452
|
-
if (char === "_" && next === "@") {
|
|
453
|
-
if (isInAtBlockBody || (parenDepth > 0 && !isInInlineHead)) {
|
|
454
|
-
addToken(TOKEN_TYPES.TEXT, "_@");
|
|
455
|
-
} else {
|
|
456
|
-
const lastRealType = last_non_junk_type;
|
|
457
|
-
addToken(TOKEN_TYPES.CLOSE_AT, "_@");
|
|
458
|
-
// Removed delimiter stack check
|
|
459
|
-
if (lastRealType === TOKEN_TYPES.END_KEYWORD) {
|
|
460
|
-
isInAtBlockBody = false;
|
|
461
|
-
isInHeader = false;
|
|
462
|
-
isInAtBlockHeader = false;
|
|
463
|
-
}
|
|
464
|
-
}
|
|
465
|
-
i += 2;
|
|
466
|
-
continue;
|
|
467
|
-
}
|
|
468
428
|
if (char === "]") {
|
|
469
|
-
|
|
470
|
-
|
|
471
|
-
|
|
472
|
-
|
|
473
|
-
|
|
474
|
-
}
|
|
475
|
-
i++;
|
|
476
|
-
continue;
|
|
477
|
-
}
|
|
478
|
-
if (char === "(") {
|
|
479
|
-
if (isInAtBlockBody || (parenDepth > 0 && !isInInlineHead)) {
|
|
480
|
-
addToken(TOKEN_TYPES.TEXT, "(");
|
|
481
|
-
parenDepth++;
|
|
482
|
-
} else {
|
|
483
|
-
addToken(TOKEN_TYPES.OPEN_PAREN, "(");
|
|
484
|
-
parenDepth++;
|
|
485
|
-
}
|
|
486
|
-
i++;
|
|
487
|
-
continue;
|
|
488
|
-
}
|
|
489
|
-
if (char === ")") {
|
|
490
|
-
if (isInAtBlockBody || (parenDepth > 1 && !isInInlineHead)) {
|
|
491
|
-
addToken(TOKEN_TYPES.TEXT, ")");
|
|
492
|
-
parenDepth--;
|
|
493
|
-
} else if (parenDepth > 0) {
|
|
494
|
-
// This ends the content part if depth drops to 0
|
|
495
|
-
parenDepth--;
|
|
496
|
-
if (parenDepth === 0) {
|
|
497
|
-
addToken(TOKEN_TYPES.CLOSE_PAREN, ")");
|
|
498
|
-
if (isInInlineHead) {
|
|
499
|
-
isInInlineHead = false;
|
|
500
|
-
isInHeader = false;
|
|
501
|
-
}
|
|
502
|
-
} else {
|
|
503
|
-
addToken(TOKEN_TYPES.TEXT, ")");
|
|
504
|
-
}
|
|
505
|
-
} else {
|
|
506
|
-
addToken(TOKEN_TYPES.TEXT, ")");
|
|
429
|
+
addToken(TOKEN_TYPES.CLOSE_BRACKET, "]");
|
|
430
|
+
isInHeader = false;
|
|
431
|
+
if (hasSmarkRaw) {
|
|
432
|
+
isRawContent = true;
|
|
433
|
+
hasSmarkRaw = false;
|
|
507
434
|
}
|
|
435
|
+
pendingSmarkRaw = false;
|
|
508
436
|
i++;
|
|
509
437
|
continue;
|
|
510
438
|
}
|
|
511
439
|
if (char === ":") {
|
|
512
|
-
|
|
513
|
-
|
|
440
|
+
const colonAllowed = [TOKEN_TYPES.IDENTIFIER, TOKEN_TYPES.KEY, TOKEN_TYPES.VALUE, TOKEN_TYPES.ESCAPE, TOKEN_TYPES.QUOTE, TOKEN_TYPES.PREFIX_V, TOKEN_TYPES.PREFIX_P, TOKEN_TYPES.PREFIX_CLOSE, TOKEN_TYPES.IMPORT, TOKEN_TYPES.USE_MODULE, TOKEN_TYPES.END_KEYWORD, TOKEN_TYPES.TEXT, TOKEN_TYPES.LOGIC, TOKEN_TYPES.LOGIC_CLOSE, TOKEN_TYPES.STATIC_KEYWORD, TOKEN_TYPES.RUNTIME_KEYWORD, TOKEN_TYPES.FOR_EACH];
|
|
441
|
+
if (colonAllowed.includes(last_non_junk_type)) {
|
|
442
|
+
addToken(TOKEN_TYPES.COLON, ":");
|
|
443
|
+
isInHeader = true;
|
|
514
444
|
} else {
|
|
515
|
-
|
|
516
|
-
if (allowed.includes(last_non_junk_type)) {
|
|
517
|
-
addToken(TOKEN_TYPES.COLON, ":");
|
|
518
|
-
isInHeader = true;
|
|
519
|
-
} else {
|
|
520
|
-
addToken(TOKEN_TYPES.TEXT, ":");
|
|
521
|
-
}
|
|
445
|
+
addToken(TOKEN_TYPES.TEXT, ":");
|
|
522
446
|
}
|
|
523
447
|
i++;
|
|
524
448
|
continue;
|
|
525
449
|
}
|
|
526
450
|
if (char === "=") {
|
|
527
|
-
|
|
528
|
-
|
|
451
|
+
const eqAllowed = [TOKEN_TYPES.IDENTIFIER, TOKEN_TYPES.KEY, TOKEN_TYPES.ESCAPE, TOKEN_TYPES.QUOTE, TOKEN_TYPES.PREFIX_V, TOKEN_TYPES.PREFIX_P, TOKEN_TYPES.PREFIX_CLOSE, TOKEN_TYPES.IMPORT, TOKEN_TYPES.USE_MODULE, TOKEN_TYPES.END_KEYWORD, TOKEN_TYPES.TEXT, TOKEN_TYPES.LOGIC, TOKEN_TYPES.LOGIC_CLOSE, TOKEN_TYPES.STATIC_KEYWORD, TOKEN_TYPES.RUNTIME_KEYWORD, TOKEN_TYPES.FOR_EACH];
|
|
452
|
+
if (eqAllowed.includes(last_non_junk_type)) {
|
|
453
|
+
addToken(TOKEN_TYPES.EQUAL, "=");
|
|
529
454
|
} else {
|
|
530
|
-
|
|
531
|
-
if (allowed.includes(last_non_junk_type)) {
|
|
532
|
-
addToken(TOKEN_TYPES.EQUAL, "=");
|
|
533
|
-
} else {
|
|
534
|
-
addToken(TOKEN_TYPES.TEXT, "=");
|
|
535
|
-
}
|
|
455
|
+
addToken(TOKEN_TYPES.TEXT, "=");
|
|
536
456
|
}
|
|
537
457
|
i++;
|
|
538
458
|
continue;
|
|
539
459
|
}
|
|
540
460
|
if (char === ",") {
|
|
541
|
-
|
|
542
|
-
|
|
461
|
+
const commaAllowed = [TOKEN_TYPES.VALUE, TOKEN_TYPES.IDENTIFIER, TOKEN_TYPES.QUOTE, TOKEN_TYPES.ESCAPE, TOKEN_TYPES.PREFIX_V, TOKEN_TYPES.PREFIX_P, TOKEN_TYPES.PREFIX_CLOSE, TOKEN_TYPES.IMPORT, TOKEN_TYPES.USE_MODULE, TOKEN_TYPES.END_KEYWORD, TOKEN_TYPES.TEXT, TOKEN_TYPES.LOGIC, TOKEN_TYPES.LOGIC_CLOSE, TOKEN_TYPES.STATIC_KEYWORD, TOKEN_TYPES.RUNTIME_KEYWORD, TOKEN_TYPES.FOR_EACH];
|
|
462
|
+
if (commaAllowed.includes(last_non_junk_type)) {
|
|
463
|
+
addToken(TOKEN_TYPES.COMMA, ",");
|
|
543
464
|
} else {
|
|
544
|
-
|
|
545
|
-
if (allowed.includes(last_non_junk_type)) {
|
|
546
|
-
addToken(TOKEN_TYPES.COMMA, ",");
|
|
547
|
-
} else {
|
|
548
|
-
addToken(TOKEN_TYPES.TEXT, ",");
|
|
549
|
-
}
|
|
550
|
-
}
|
|
551
|
-
i++;
|
|
552
|
-
continue;
|
|
553
|
-
}
|
|
554
|
-
if (char === ";") {
|
|
555
|
-
if (isInAtBlockBody || (parenDepth > 0 && !isInInlineHead)) {
|
|
556
|
-
addToken(TOKEN_TYPES.TEXT, ";");
|
|
557
|
-
} else {
|
|
558
|
-
const allowed = [TOKEN_TYPES.IDENTIFIER, TOKEN_TYPES.VALUE, TOKEN_TYPES.CLOSE_AT, TOKEN_TYPES.CLOSE_PAREN, TOKEN_TYPES.ESCAPE, TOKEN_TYPES.QUOTE, TOKEN_TYPES.PREFIX_JS, TOKEN_TYPES.PREFIX_V, TOKEN_TYPES.PREFIX_P, TOKEN_TYPES.IMPORT, TOKEN_TYPES.USE_MODULE, TOKEN_TYPES.END_KEYWORD, TOKEN_TYPES.TEXT, TOKEN_TYPES.LOGIC, TOKEN_TYPES.STATIC_KEYWORD, TOKEN_TYPES.RUNTIME_KEYWORD, TOKEN_TYPES.FOR_EACH];
|
|
559
|
-
if (allowed.includes(last_non_junk_type)) {
|
|
560
|
-
addToken(TOKEN_TYPES.SEMICOLON, ";");
|
|
561
|
-
// ONLY trigger body mode if we were actually in an At-Block header
|
|
562
|
-
if (isInAtBlockHeader) {
|
|
563
|
-
isInHeader = false;
|
|
564
|
-
isInAtBlockHeader = false;
|
|
565
|
-
isInAtBlockBody = true;
|
|
566
|
-
}
|
|
567
|
-
} else {
|
|
568
|
-
addToken(TOKEN_TYPES.TEXT, ";");
|
|
569
|
-
}
|
|
465
|
+
addToken(TOKEN_TYPES.TEXT, ",");
|
|
570
466
|
}
|
|
571
467
|
i++;
|
|
572
468
|
continue;
|
|
@@ -579,7 +475,7 @@ function lexer(src, filename = "anonymous") {
|
|
|
579
475
|
}
|
|
580
476
|
}
|
|
581
477
|
if (char === "\"" || char === "'") {
|
|
582
|
-
const valTriggers = [TOKEN_TYPES.COLON, TOKEN_TYPES.EQUAL, TOKEN_TYPES.COMMA, TOKEN_TYPES.ESCAPE, TOKEN_TYPES.OPEN_BRACKET
|
|
478
|
+
const valTriggers = [TOKEN_TYPES.COLON, TOKEN_TYPES.EQUAL, TOKEN_TYPES.COMMA, TOKEN_TYPES.ESCAPE, TOKEN_TYPES.OPEN_BRACKET];
|
|
583
479
|
const wasValueTrigger = valTriggers.includes(last_non_junk_type);
|
|
584
480
|
addToken(TOKEN_TYPES.QUOTE, char);
|
|
585
481
|
i++;
|
|
@@ -595,28 +491,22 @@ function lexer(src, filename = "anonymous") {
|
|
|
595
491
|
// This is the "Fallback" mode where we scan for identifiers, keys, or values.
|
|
596
492
|
// It uses lookahead and context variables to guess the role of a word.
|
|
597
493
|
let word = "";
|
|
598
|
-
// Only Blocks ([ ]) allow ':' in their main identifier.
|
|
599
|
-
// At-Blocks (@_) and Inlines (->( )) do NOT allow ':' in the ID.
|
|
600
494
|
const isStartOfBlockId = (last_non_junk_type === TOKEN_TYPES.OPEN_BRACKET);
|
|
495
|
+
const isInNormalText = !isInHeader;
|
|
601
496
|
|
|
602
|
-
let stopChars = "[]
|
|
603
|
-
if (isStartOfBlockId
|
|
497
|
+
let stopChars = "[]{}:=,\"'#\\ \t\n\r!";
|
|
498
|
+
if (isStartOfBlockId) {
|
|
604
499
|
stopChars = stopChars.replace(":", "");
|
|
605
500
|
}
|
|
606
|
-
const isInNormalText = !isInHeader && !isInInlineHead && !isInAtBlockBody;
|
|
607
501
|
if (isInNormalText) {
|
|
608
|
-
stopChars = "[]
|
|
502
|
+
stopChars = "[]\\#\n\r"; // In normal text, stop only at block markers, escapes, comments and newlines
|
|
609
503
|
}
|
|
610
504
|
|
|
611
505
|
while (i < src.length && !stopChars.includes(src[i])) {
|
|
612
506
|
// Stop ONLY if $ is followed by { (Logic block start)
|
|
613
507
|
if (src[i] === "$" && src[i + 1] === "{") break;
|
|
614
508
|
|
|
615
|
-
// Lookahead for
|
|
616
|
-
if (src[i] === "_" && src[i + 1] === "@") break;
|
|
617
|
-
if (src[i] === "@" && src[i + 1] === "_") break;
|
|
618
|
-
|
|
619
|
-
// Lookahead for 'static ${' or 'runtime ${' (only if we're not at the very start of the word scanning)
|
|
509
|
+
// Lookahead for 'static ${' or 'runtime ${' mid-word
|
|
620
510
|
if (word.length > 0) {
|
|
621
511
|
if (src[i] === "s" && src.slice(i, i + 7) === "static " && src[i + 7] === "$" && src[i + 8] === "{") break;
|
|
622
512
|
if (src[i] === "s" && src.slice(i, i + 6) === "static" && src[i + 6] === "$" && src[i + 7] === "{") break;
|
|
@@ -624,53 +514,47 @@ function lexer(src, filename = "anonymous") {
|
|
|
624
514
|
if (src[i] === "r" && src.slice(i, i + 7) === "runtime" && src[i + 7] === "$" && src[i + 8] === "{") break;
|
|
625
515
|
}
|
|
626
516
|
|
|
627
|
-
// Lookahead for -> marker in normal text
|
|
628
|
-
if (!isInHeader && src[i] === "-" && src[i + 1] === ">") break;
|
|
629
|
-
|
|
630
517
|
// Stop if we hit an ALLOWED prefix trigger
|
|
631
518
|
if ((src[i] === "p" && src[i + 1] === "{") || (src[i] === "v" && src[i + 1] === "{")) {
|
|
632
519
|
if (isInHeader || isInNormalText) break;
|
|
633
520
|
}
|
|
634
|
-
if (src[i] === "j" && src[i + 1] === "s" && src[i + 2] === "{") {
|
|
635
|
-
if (isInHeader) break;
|
|
636
|
-
}
|
|
637
521
|
word += src[i];
|
|
638
522
|
i++;
|
|
639
523
|
}
|
|
640
524
|
|
|
641
525
|
if (word.length > 0) {
|
|
642
526
|
// Guess role based on context
|
|
643
|
-
if (
|
|
644
|
-
// Inside Inline Content (raw text)
|
|
645
|
-
addToken(TOKEN_TYPES.TEXT, word);
|
|
646
|
-
} else if (isInHeader || isInInlineHead) {
|
|
527
|
+
if (isInHeader) {
|
|
647
528
|
// Inside a structural header context
|
|
648
|
-
const isMainIdentifier =
|
|
649
|
-
last_non_junk_type === TOKEN_TYPES.OPEN_BRACKET ||
|
|
650
|
-
last_non_junk_type === TOKEN_TYPES.OPEN_AT ||
|
|
651
|
-
(last_non_junk_type === TOKEN_TYPES.OPEN_PAREN && isInInlineHead)
|
|
652
|
-
);
|
|
529
|
+
const isMainIdentifier = last_non_junk_type === TOKEN_TYPES.OPEN_BRACKET;
|
|
653
530
|
|
|
654
531
|
if (isMainIdentifier) {
|
|
655
|
-
if (word === end_keyword) {
|
|
532
|
+
if (word === end_keyword || word.startsWith(end_keyword + ":")) {
|
|
656
533
|
addToken(TOKEN_TYPES.END_KEYWORD, word);
|
|
657
534
|
}
|
|
658
535
|
else if (word === "import") addToken(TOKEN_TYPES.IMPORT, word);
|
|
659
536
|
else if (word === "$use-module") addToken(TOKEN_TYPES.USE_MODULE, word);
|
|
660
537
|
else if (word === "slot") addToken(TOKEN_TYPES.SLOT_KEYWORD, word);
|
|
661
538
|
else if (word === "for-each") addToken(TOKEN_TYPES.FOR_EACH, word);
|
|
662
|
-
else
|
|
539
|
+
else {
|
|
540
|
+
addToken(TOKEN_TYPES.IDENTIFIER, word);
|
|
541
|
+
}
|
|
663
542
|
} else {
|
|
664
543
|
// Use lookahead to distinguish KEY from VALUE
|
|
665
544
|
const p = peekStructural(i);
|
|
666
545
|
if (p === ":") {
|
|
667
546
|
addToken(TOKEN_TYPES.KEY, word);
|
|
547
|
+
if (word === "smark-raw") pendingSmarkRaw = true;
|
|
668
548
|
} else if (word === "static") {
|
|
669
549
|
addToken(TOKEN_TYPES.STATIC_KEYWORD, word);
|
|
670
550
|
} else if (word === "runtime") {
|
|
671
551
|
addToken(TOKEN_TYPES.RUNTIME_KEYWORD, word);
|
|
672
552
|
} else {
|
|
673
553
|
addToken(TOKEN_TYPES.VALUE, word);
|
|
554
|
+
if (pendingSmarkRaw) {
|
|
555
|
+
if (word === "true") hasSmarkRaw = true;
|
|
556
|
+
pendingSmarkRaw = false;
|
|
557
|
+
}
|
|
674
558
|
}
|
|
675
559
|
}
|
|
676
560
|
} else {
|