sommark 4.5.3 → 5.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +314 -178
- package/cli/cli.mjs +1 -1
- package/cli/commands/color.js +36 -14
- package/cli/commands/help.js +3 -0
- package/cli/commands/init.js +0 -2
- package/cli/constants.js +5 -2
- package/core/errors.js +5 -4
- package/core/evaluator.js +1 -2
- package/core/formats.js +7 -1
- package/core/helpers/config-loader.js +1 -3
- package/core/helpers/lib.js +1 -1
- package/core/labels.js +2 -15
- package/core/lexer.js +197 -313
- package/core/modules.js +13 -13
- package/core/parser.js +226 -535
- package/core/tokenTypes.js +6 -15
- package/core/transpiler.js +129 -110
- package/core/validator.js +6 -26
- package/dist/sommark.browser.js +1777 -2163
- package/dist/sommark.browser.lite.js +1775 -2160
- package/dist/sommark.lexer.js +392 -544
- package/dist/sommark.parser.js +604 -1200
- package/formatter/mark.js +34 -0
- package/formatter/tag.js +7 -33
- package/helpers/utils.js +15 -16
- package/index.js +9 -1
- package/index.shared.js +22 -12
- package/mappers/languages/csv.js +62 -0
- package/mappers/languages/html.js +12 -66
- package/mappers/languages/json.js +74 -156
- package/mappers/languages/jsonc.js +21 -63
- package/mappers/languages/markdown.js +159 -276
- package/mappers/languages/mdx.js +7 -62
- package/mappers/languages/text.js +2 -19
- package/mappers/languages/toml.js +231 -0
- package/mappers/languages/xml.js +25 -25
- package/mappers/languages/yaml.js +323 -0
- package/mappers/mapper.js +1 -22
- package/mappers/shared/index.js +3 -16
- package/package.json +5 -2
package/dist/sommark.lexer.js
CHANGED
|
@@ -8,16 +8,10 @@
|
|
|
8
8
|
* @property {string} END_KEYWORD - 'end' value.
|
|
9
9
|
* @property {string} IDENTIFIER - Block or inline name (e.g. 'Person', 'import', '$use-module').
|
|
10
10
|
* @property {string} EQUAL - '=' char.
|
|
11
|
-
* @property {string} VALUE - Data values. Encapsulates Quoted Strings ("...") and Prefix Layers (
|
|
11
|
+
* @property {string} VALUE - Data values. Encapsulates Quoted Strings ("...") and Prefix Layers (p{}, v{}).
|
|
12
12
|
* @property {string} TEXT - Plain unformatted text content.
|
|
13
|
-
* @property {string} THIN_ARROW - '->' sequence.
|
|
14
|
-
* @property {string} OPEN_PAREN - '(' char.
|
|
15
|
-
* @property {string} CLOSE_PAREN - ')' char.
|
|
16
|
-
* @property {string} OPEN_AT - '@_' sequence (At-Block start).
|
|
17
|
-
* @property {string} CLOSE_AT - '_@' sequence (At-Header end).
|
|
18
13
|
* @property {string} COLON - ':' char.
|
|
19
14
|
* @property {string} COMMA - ',' char.
|
|
20
|
-
* @property {string} SEMICOLON - ';' char (At-Block separator).
|
|
21
15
|
* @property {string} COMMENT - '#' comments.
|
|
22
16
|
* @property {string} COMMENT_BLOCK - '###' comments.
|
|
23
17
|
* @property {string} ESCAPE - '\' char. Used for literalizing structural chars like '\"' or '\['.
|
|
@@ -25,7 +19,6 @@
|
|
|
25
19
|
* @property {string} EXCLAMATION_MARK - '!' char.
|
|
26
20
|
* @property {string} IMPORT - 'import' keyword.
|
|
27
21
|
* @property {string} USE_MODULE - '$use-module' keyword.
|
|
28
|
-
* @property {string} PREFIX_JS - 'js{}' prefix layer.
|
|
29
22
|
* @property {string} PREFIX_P - 'p{}' placeholder layer.
|
|
30
23
|
* @property {string} PREFIX_V - 'v{}' local variable layer.
|
|
31
24
|
* @property {string} EOF - End of File indicator.
|
|
@@ -40,18 +33,11 @@ const TOKEN_TYPES = {
|
|
|
40
33
|
EQUAL: "EQUAL",
|
|
41
34
|
VALUE: "VALUE",
|
|
42
35
|
QUOTE: "QUOTE",
|
|
43
|
-
PREFIX_JS: "PREFIX_JS",
|
|
44
36
|
PREFIX_P: "PREFIX_P",
|
|
45
37
|
PREFIX_V: "PREFIX_V",
|
|
46
38
|
TEXT: "TEXT",
|
|
47
|
-
THIN_ARROW: "THIN_ARROW",
|
|
48
|
-
OPEN_PAREN: "OPEN_PAREN",
|
|
49
|
-
CLOSE_PAREN: "CLOSE_PAREN",
|
|
50
|
-
OPEN_AT: "OPEN_AT",
|
|
51
|
-
CLOSE_AT: "CLOSE_AT",
|
|
52
39
|
COLON: "COLON",
|
|
53
40
|
COMMA: "COMMA",
|
|
54
|
-
SEMICOLON: "SEMICOLON",
|
|
55
41
|
COMMENT: "COMMENT",
|
|
56
42
|
COMMENT_BLOCK: "COMMENT_BLOCK",
|
|
57
43
|
ESCAPE: "ESCAPE",
|
|
@@ -61,8 +47,13 @@ const TOKEN_TYPES = {
|
|
|
61
47
|
WHITESPACE: "WHITESPACE",
|
|
62
48
|
STATIC_KEYWORD: "STATIC_KEYWORD",
|
|
63
49
|
RUNTIME_KEYWORD: "RUNTIME_KEYWORD",
|
|
50
|
+
LOGIC_OPEN: "LOGIC_OPEN",
|
|
64
51
|
LOGIC: "LOGIC",
|
|
52
|
+
LOGIC_CLOSE: "LOGIC_CLOSE",
|
|
65
53
|
FOR_EACH: "FOR_EACH",
|
|
54
|
+
PREFIX_OPEN: "PREFIX_OPEN",
|
|
55
|
+
PREFIX_CLOSE: "PREFIX_CLOSE",
|
|
56
|
+
PIPELINE: "PIPELINE",
|
|
66
57
|
EOF: "EOF"
|
|
67
58
|
};
|
|
68
59
|
|
|
@@ -72,8 +63,6 @@ const TOKEN_TYPES = {
|
|
|
72
63
|
*/
|
|
73
64
|
const BLOCK = "Block",
|
|
74
65
|
TEXT = "Text",
|
|
75
|
-
INLINE = "Inline",
|
|
76
|
-
ATBLOCK = "AtBlock",
|
|
77
66
|
COMMENT = "Comment",
|
|
78
67
|
COMMENT_BLOCK = "CommentBlock",
|
|
79
68
|
IMPORT = "Import",
|
|
@@ -86,13 +75,8 @@ const BLOCK = "Block",
|
|
|
86
75
|
/**
|
|
87
76
|
* Names for symbols used to separate parts of the code (like commas and colons).
|
|
88
77
|
*/
|
|
89
|
-
const
|
|
90
|
-
|
|
91
|
-
ATBLOCKCOMMA = "Atblock-comma",
|
|
92
|
-
INLINECOMMA = "Inline-comma",
|
|
93
|
-
BLOCKCOLON = "Block-colon",
|
|
94
|
-
ATBLOCKCOLON = "Atblock-colon",
|
|
95
|
-
INLINECOLON = "Inline-colon";
|
|
78
|
+
const BLOCKCOMMA = "Block-comma",
|
|
79
|
+
BLOCKCOLON = "Block-colon";
|
|
96
80
|
|
|
97
81
|
/**
|
|
98
82
|
* These names are used in error messages to tell you exactly which part
|
|
@@ -102,12 +86,6 @@ const block_id = "Block Identifier",
|
|
|
102
86
|
block_value = "Block Value",
|
|
103
87
|
block_key = "Block Key",
|
|
104
88
|
block_end = "Block end",
|
|
105
|
-
inline_id = "Inline Identifier",
|
|
106
|
-
inline_text = "Inline Text",
|
|
107
|
-
at_id = "At Identifier",
|
|
108
|
-
at_value = "At Value",
|
|
109
|
-
atblock_key = "AtBlock Key",
|
|
110
|
-
at_end = "Atblock End",
|
|
111
89
|
/** Reserved keyword for closing blocks */
|
|
112
90
|
end_keyword = "end",
|
|
113
91
|
slot_keyword = "slot",
|
|
@@ -115,9 +93,6 @@ const block_id = "Block Identifier",
|
|
|
115
93
|
|
|
116
94
|
var labels = /*#__PURE__*/Object.freeze({
|
|
117
95
|
__proto__: null,
|
|
118
|
-
ATBLOCK: ATBLOCK,
|
|
119
|
-
ATBLOCKCOLON: ATBLOCKCOLON,
|
|
120
|
-
ATBLOCKCOMMA: ATBLOCKCOMMA,
|
|
121
96
|
BLOCK: BLOCK,
|
|
122
97
|
BLOCKCOLON: BLOCKCOLON,
|
|
123
98
|
BLOCKCOMMA: BLOCKCOMMA,
|
|
@@ -125,219 +100,20 @@ var labels = /*#__PURE__*/Object.freeze({
|
|
|
125
100
|
COMMENT_BLOCK: COMMENT_BLOCK,
|
|
126
101
|
FOR_EACH: FOR_EACH,
|
|
127
102
|
IMPORT: IMPORT,
|
|
128
|
-
INLINE: INLINE,
|
|
129
|
-
INLINECOLON: INLINECOLON,
|
|
130
|
-
INLINECOMMA: INLINECOMMA,
|
|
131
103
|
RUNTIME_LOGIC: RUNTIME_LOGIC,
|
|
132
|
-
SEMICOLON: SEMICOLON,
|
|
133
104
|
SLOT: SLOT,
|
|
134
105
|
STATIC_LOGIC: STATIC_LOGIC,
|
|
135
106
|
TEXT: TEXT,
|
|
136
107
|
USE_MODULE: USE_MODULE,
|
|
137
|
-
at_end: at_end,
|
|
138
|
-
at_id: at_id,
|
|
139
|
-
at_value: at_value,
|
|
140
|
-
atblock_key: atblock_key,
|
|
141
108
|
block_end: block_end,
|
|
142
109
|
block_id: block_id,
|
|
143
110
|
block_key: block_key,
|
|
144
111
|
block_value: block_value,
|
|
145
112
|
end_keyword: end_keyword,
|
|
146
113
|
for_each_keyword: for_each_keyword,
|
|
147
|
-
inline_id: inline_id,
|
|
148
|
-
inline_text: inline_text,
|
|
149
114
|
slot_keyword: slot_keyword
|
|
150
115
|
});
|
|
151
116
|
|
|
152
|
-
/**
|
|
153
|
-
* Wraps your text in a color if colors are turned on.
|
|
154
|
-
*
|
|
155
|
-
* @param {string} color - The color to use (red, green, yellow, blue, magenta, or cyan).
|
|
156
|
-
* @param {string} text - The text you want to color.
|
|
157
|
-
* @returns {string} - The colored text, or plain text if colors are off.
|
|
158
|
-
* @throws {Error} - Fails if you forget to provide the text.
|
|
159
|
-
*/
|
|
160
|
-
function colorize(color, text) {
|
|
161
|
-
if (!text) throw new Error("argument 'text' is not defined.");
|
|
162
|
-
return text;
|
|
163
|
-
}
|
|
164
|
-
|
|
165
|
-
/**
|
|
166
|
-
* SomMark Errors
|
|
167
|
-
* Handles formatting and throwing errors with beautiful CLI coloring and pointers.
|
|
168
|
-
*/
|
|
169
|
-
|
|
170
|
-
// ========================================================================== //
|
|
171
|
-
// Message Formatting //
|
|
172
|
-
// ========================================================================== //
|
|
173
|
-
|
|
174
|
-
/**
|
|
175
|
-
* Processes a message by applying colors and formatting.
|
|
176
|
-
* Supports:
|
|
177
|
-
* - {line} : Adds a horizontal line
|
|
178
|
-
* - {N} : Adds a new line
|
|
179
|
-
* - <$color: Text$> : Adds color (red, yellow, green, blue, magenta, cyan)
|
|
180
|
-
*
|
|
181
|
-
* @param {string|string[]} text - The message or list of message parts to format.
|
|
182
|
-
* @returns {string} - The final formatted and colored string.
|
|
183
|
-
*/
|
|
184
|
-
function formatMessage(text) {
|
|
185
|
-
const horizontal_rule = "\n----------------------------------------------------------------------------------------------\n";
|
|
186
|
-
const pattern = /<\$([^:]+):([\s\S]*?)\$>/g;
|
|
187
|
-
|
|
188
|
-
if (Array.isArray(text)) {
|
|
189
|
-
text = text.join("");
|
|
190
|
-
}
|
|
191
|
-
|
|
192
|
-
text = text.replace(pattern, (match, color, content) => {
|
|
193
|
-
return colorize(color, content.trim());
|
|
194
|
-
});
|
|
195
|
-
text = text.replaceAll("{line}", horizontal_rule);
|
|
196
|
-
text = text.replaceAll("{N}", "\n");
|
|
197
|
-
|
|
198
|
-
text = text
|
|
199
|
-
.split("\n")
|
|
200
|
-
.filter(value => value !== "")
|
|
201
|
-
.join("\n")
|
|
202
|
-
.trim();
|
|
203
|
-
|
|
204
|
-
return text;
|
|
205
|
-
}
|
|
206
|
-
|
|
207
|
-
/**
|
|
208
|
-
* Creates a detailed error message showing where the error happened in the code.
|
|
209
|
-
* It adds a line number, a snippet of the code, and a pointer (^) to the exact spot.
|
|
210
|
-
*
|
|
211
|
-
* @param {string} src - The original code being parsed.
|
|
212
|
-
* @param {Object} range - The location of the error (line and character).
|
|
213
|
-
* @param {string|null} filename - The name of the file (optional).
|
|
214
|
-
* @param {string|string[]} message - The error message to show.
|
|
215
|
-
* @param {string} typeName - The type of error (e.g., "Lexer" or "Parser").
|
|
216
|
-
* @returns {string[]} - A list of message parts that make up the final error report.
|
|
217
|
-
*/
|
|
218
|
-
function formatErrorWithContext(src, range, filename, message, typeName) {
|
|
219
|
-
if (!src || !range || !range.start) return message;
|
|
220
|
-
|
|
221
|
-
const lines = src.split("\n");
|
|
222
|
-
const lineIndex = range.start.line;
|
|
223
|
-
const lineContent = lines[lineIndex] || "";
|
|
224
|
-
const pointerPadding = " ".repeat(range.start.character);
|
|
225
|
-
const sourceLabel = filename ? ` [${filename}]` : "";
|
|
226
|
-
|
|
227
|
-
const rangeInfo =
|
|
228
|
-
range.start.line === range.end.line
|
|
229
|
-
? `from column <$yellow:${range.start.character}$> to <$yellow:${range.end.character}$>`
|
|
230
|
-
: `from line <$yellow:${range.start.line + 1}$>, column <$yellow:${range.start.character}$> to line <$yellow:${range.end.line + 1}$>, column <$yellow:${range.end.character}$>`;
|
|
231
|
-
|
|
232
|
-
const formattedMessage = [
|
|
233
|
-
`<$blue:{line}$><$red:Here where error occurred${sourceLabel}:$>{N}${lineContent}{N}${pointerPadding}<$yellow:^$>{N}{N}`,
|
|
234
|
-
`<$red:${typeName} Error:$> `,
|
|
235
|
-
...(Array.isArray(message) ? message : [message]),
|
|
236
|
-
`{N}at line <$yellow:${range.start.line + 1}$>, ${rangeInfo}{N}`,
|
|
237
|
-
"<$blue:{line}$>"
|
|
238
|
-
];
|
|
239
|
-
|
|
240
|
-
return formattedMessage;
|
|
241
|
-
}
|
|
242
|
-
|
|
243
|
-
// ========================================================================== //
|
|
244
|
-
// Error Classes //
|
|
245
|
-
// ========================================================================== //
|
|
246
|
-
|
|
247
|
-
/** Base class for all SomMark errors that automatically formats messages for the terminal. */
|
|
248
|
-
class CustomError extends Error {
|
|
249
|
-
/**
|
|
250
|
-
* Creates a new error.
|
|
251
|
-
*
|
|
252
|
-
* @param {string|string[]} message - The text describing what went wrong.
|
|
253
|
-
* @param {string} name - The name of the error type.
|
|
254
|
-
*/
|
|
255
|
-
constructor(message, name) {
|
|
256
|
-
super(message);
|
|
257
|
-
this.name = name;
|
|
258
|
-
this.message = formatMessage(`<$cyan:[${this.name}]$>:`) + "\n" + formatMessage(message);
|
|
259
|
-
if (Error.captureStackTrace) {
|
|
260
|
-
Error.captureStackTrace(this, this.constructor);
|
|
261
|
-
}
|
|
262
|
-
}
|
|
263
|
-
}
|
|
264
|
-
|
|
265
|
-
class ParserError extends CustomError {
|
|
266
|
-
constructor(message) { super(message, "Parser Error"); }
|
|
267
|
-
}
|
|
268
|
-
|
|
269
|
-
class LexerError extends CustomError {
|
|
270
|
-
constructor(message) { super(message, "Lexer Error"); }
|
|
271
|
-
}
|
|
272
|
-
|
|
273
|
-
class TranspilerError extends CustomError {
|
|
274
|
-
constructor(message) { super(message, "Transpiler Error"); }
|
|
275
|
-
}
|
|
276
|
-
|
|
277
|
-
class CLIError extends CustomError {
|
|
278
|
-
constructor(message) { super(message, "CLI Error"); }
|
|
279
|
-
}
|
|
280
|
-
|
|
281
|
-
class RuntimeError extends CustomError {
|
|
282
|
-
constructor(message) { super(message, "Runtime Error"); }
|
|
283
|
-
}
|
|
284
|
-
|
|
285
|
-
class SommarkError extends CustomError {
|
|
286
|
-
constructor(message) { super(message, "SomMark Error"); }
|
|
287
|
-
}
|
|
288
|
-
|
|
289
|
-
// ========================================================================== //
|
|
290
|
-
// Error Dispatcher (Helper) //
|
|
291
|
-
// ========================================================================== //
|
|
292
|
-
|
|
293
|
-
/**
|
|
294
|
-
* A helper that creates an error "dispatcher" for a specific category.
|
|
295
|
-
*
|
|
296
|
-
* @param {string} type - The category of error (e.g., 'lexer', 'parser').
|
|
297
|
-
* @returns {Function} - A function that throws the formatted error.
|
|
298
|
-
*/
|
|
299
|
-
function getError(type) {
|
|
300
|
-
const validate_msg = msg => (Array.isArray(msg) && msg.length > 0) || typeof msg === "string";
|
|
301
|
-
const typeNames = {
|
|
302
|
-
parser: "Parser",
|
|
303
|
-
transpiler: "Transpiler",
|
|
304
|
-
lexer: "Lexer",
|
|
305
|
-
cli: "CLI",
|
|
306
|
-
runtime: "Runtime",
|
|
307
|
-
sommark: "SomMark"
|
|
308
|
-
};
|
|
309
|
-
const ErrorClasses = {
|
|
310
|
-
parser: ParserError,
|
|
311
|
-
transpiler: TranspilerError,
|
|
312
|
-
lexer: LexerError,
|
|
313
|
-
cli: CLIError,
|
|
314
|
-
runtime: RuntimeError,
|
|
315
|
-
sommark: SommarkError
|
|
316
|
-
};
|
|
317
|
-
|
|
318
|
-
return (errorMessage, context = null) => {
|
|
319
|
-
if (validate_msg(errorMessage)) {
|
|
320
|
-
let finalMessage = errorMessage;
|
|
321
|
-
if (context && context.src && context.range) {
|
|
322
|
-
finalMessage = formatErrorWithContext(
|
|
323
|
-
context.src,
|
|
324
|
-
context.range,
|
|
325
|
-
context.filename,
|
|
326
|
-
errorMessage,
|
|
327
|
-
typeNames[type]
|
|
328
|
-
);
|
|
329
|
-
}
|
|
330
|
-
throw new ErrorClasses[type](finalMessage).message;
|
|
331
|
-
}
|
|
332
|
-
};
|
|
333
|
-
}
|
|
334
|
-
|
|
335
|
-
/** Helper to throw Lexer errors. */
|
|
336
|
-
const lexerError = getError("lexer");
|
|
337
|
-
|
|
338
|
-
/** Helper to throw Runtime or Module errors. */
|
|
339
|
-
const runtimeError = getError("runtime");
|
|
340
|
-
|
|
341
117
|
/**
|
|
342
118
|
* SomMark Lexer
|
|
343
119
|
*
|
|
@@ -357,12 +133,12 @@ function lexer(src, filename = "anonymous") {
|
|
|
357
133
|
let line = 0, character = 0;
|
|
358
134
|
|
|
359
135
|
// State Variables
|
|
360
|
-
let isInAtBlockBody = false;
|
|
361
136
|
let isInQuote = false;
|
|
362
|
-
let isInHeader = false;
|
|
363
|
-
let
|
|
364
|
-
let
|
|
365
|
-
let
|
|
137
|
+
let isInHeader = false; // Tracks if we are in a structural header context
|
|
138
|
+
let isInPVPrefix = false; // Tracks if we are scanning inside a p{} or v{} prefix
|
|
139
|
+
let pendingSmarkRaw = false; // Set when KEY "smark-raw" is seen — waiting for value
|
|
140
|
+
let hasSmarkRaw = false; // Set when smark-raw: true is confirmed in header
|
|
141
|
+
let isRawContent = false; // Set when inside a smark-raw block — content collected as-is, not parsed
|
|
366
142
|
|
|
367
143
|
/**
|
|
368
144
|
* Adds a token to the stream and updates the scanner's position tracking.
|
|
@@ -426,35 +202,63 @@ function lexer(src, filename = "anonymous") {
|
|
|
426
202
|
}
|
|
427
203
|
|
|
428
204
|
while (i < src.length) {
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
|
|
205
|
+
const char = src[i];
|
|
206
|
+
const next = src[i + 1];
|
|
207
|
+
|
|
208
|
+
// --- RAW CONTENT MODE ---
|
|
209
|
+
// Collect everything as-is until [end] or [end:name]. \[ escapes a literal [.
|
|
210
|
+
if (isRawContent) {
|
|
211
|
+
let raw = "";
|
|
212
|
+
while (i < src.length) {
|
|
213
|
+
if (src[i] === "\\" && src[i + 1] === "[") {
|
|
214
|
+
raw += "[";
|
|
215
|
+
i += 2;
|
|
216
|
+
continue;
|
|
217
|
+
}
|
|
218
|
+
if (src[i] === "[") {
|
|
219
|
+
if (src.startsWith(`[${end_keyword}]`, i) || src.startsWith(`[${end_keyword}:`, i)) break;
|
|
220
|
+
}
|
|
221
|
+
raw += src[i];
|
|
222
|
+
i++;
|
|
223
|
+
}
|
|
224
|
+
if (raw) addToken(TOKEN_TYPES.TEXT, raw);
|
|
225
|
+
isRawContent = false;
|
|
226
|
+
continue;
|
|
227
|
+
}
|
|
228
|
+
|
|
229
|
+
// --- PHASE 1.5: PV PREFIX CONTENT MODE ---
|
|
230
|
+
// Handles structured content inside p{} and v{} prefixes.
|
|
231
|
+
if (isInPVPrefix && !isInQuote) {
|
|
232
|
+
if (char === '"' || char === "'") {
|
|
233
|
+
addToken(TOKEN_TYPES.QUOTE, char);
|
|
234
|
+
i++;
|
|
235
|
+
isInQuote = true;
|
|
236
|
+
continue;
|
|
237
|
+
}
|
|
238
|
+
if (char === '|') {
|
|
239
|
+
addToken(TOKEN_TYPES.PIPELINE, "|");
|
|
240
|
+
i++;
|
|
241
|
+
continue;
|
|
242
|
+
}
|
|
243
|
+
if (char === '}') {
|
|
244
|
+
addToken(TOKEN_TYPES.PREFIX_CLOSE, "}");
|
|
245
|
+
isInPVPrefix = false;
|
|
246
|
+
i++;
|
|
247
|
+
continue;
|
|
248
|
+
}
|
|
249
|
+
if (char !== ' ' && char !== '\t' && char !== '\n' && char !== '\r') {
|
|
250
|
+
let word = '';
|
|
436
251
|
while (i < src.length) {
|
|
437
|
-
|
|
438
|
-
if (
|
|
439
|
-
|
|
440
|
-
i += 2;
|
|
441
|
-
continue;
|
|
442
|
-
}
|
|
443
|
-
// Stop at end marker
|
|
444
|
-
if (src[i] === "@" && src[i + 1] === "_") {
|
|
445
|
-
break;
|
|
446
|
-
}
|
|
447
|
-
body += src[i];
|
|
252
|
+
const c = src[i];
|
|
253
|
+
if (c === '}' || c === '|' || c === '"' || c === "'" || c === ' ' || c === '\t' || c === '\n' || c === '\r') break;
|
|
254
|
+
word += c;
|
|
448
255
|
i++;
|
|
449
256
|
}
|
|
450
|
-
if (
|
|
451
|
-
addToken(TOKEN_TYPES.TEXT, body);
|
|
452
|
-
}
|
|
257
|
+
if (word) addToken(TOKEN_TYPES.KEY, word);
|
|
453
258
|
continue;
|
|
454
259
|
}
|
|
260
|
+
// Whitespace: fall through to PHASE 3 whitespace handling
|
|
455
261
|
}
|
|
456
|
-
const char = src[i];
|
|
457
|
-
const next = src[i + 1];
|
|
458
262
|
|
|
459
263
|
// --- PHASE 2: QUOTE MODE ---
|
|
460
264
|
// Handles balanced strings and allows prefix layers (js{}, p{}) inside them.
|
|
@@ -472,50 +276,57 @@ function lexer(src, filename = "anonymous") {
|
|
|
472
276
|
}
|
|
473
277
|
|
|
474
278
|
// Support Prefix Layers inside quotes!
|
|
475
|
-
if ((src[i] === "
|
|
476
|
-
const isJS = (src[i] === "j");
|
|
279
|
+
if ((src[i] === "p" && src[i + 1] === "{") || (src[i] === "v" && src[i + 1] === "{")) {
|
|
477
280
|
const isV = (src[i] === "v");
|
|
478
281
|
if (quoteValue.length > 0) {
|
|
479
282
|
addToken(TOKEN_TYPES.VALUE, quoteValue);
|
|
480
283
|
quoteValue = "";
|
|
481
284
|
}
|
|
482
285
|
|
|
483
|
-
|
|
484
|
-
|
|
485
|
-
|
|
486
|
-
|
|
487
|
-
|
|
488
|
-
|
|
489
|
-
|
|
490
|
-
|
|
491
|
-
|
|
492
|
-
|
|
493
|
-
|
|
494
|
-
|
|
495
|
-
|
|
286
|
+
{
|
|
287
|
+
// p{} or v{}: keyword + PREFIX_OPEN + unquoted key + optional PIPELINE + fallback + PREFIX_CLOSE
|
|
288
|
+
addToken(isV ? TOKEN_TYPES.PREFIX_V : TOKEN_TYPES.PREFIX_P, isV ? "v" : "p");
|
|
289
|
+
addToken(TOKEN_TYPES.PREFIX_OPEN, "{");
|
|
290
|
+
i += 2;
|
|
291
|
+
// Scan unquoted key (cannot use same quote char as outer string)
|
|
292
|
+
let key = "";
|
|
293
|
+
while (i < src.length && src[i] !== "|" && src[i] !== "}" && src[i] !== quoteChar) {
|
|
294
|
+
key += src[i];
|
|
295
|
+
i++;
|
|
296
|
+
}
|
|
297
|
+
if (key.trim()) addToken(TOKEN_TYPES.KEY, key.trim());
|
|
298
|
+
// Optional PIPELINE + fallback
|
|
299
|
+
if (i < src.length && src[i] === "|") {
|
|
300
|
+
addToken(TOKEN_TYPES.PIPELINE, "|");
|
|
301
|
+
i++;
|
|
302
|
+
let fallback = "";
|
|
303
|
+
while (i < src.length && src[i] !== "}" && src[i] !== quoteChar) {
|
|
304
|
+
fallback += src[i];
|
|
305
|
+
i++;
|
|
496
306
|
}
|
|
497
|
-
if (
|
|
498
|
-
}
|
|
499
|
-
|
|
500
|
-
|
|
501
|
-
|
|
307
|
+
if (fallback.trim()) addToken(TOKEN_TYPES.VALUE, fallback.trim());
|
|
308
|
+
}
|
|
309
|
+
// PREFIX_CLOSE
|
|
310
|
+
if (i < src.length && src[i] === "}") {
|
|
311
|
+
addToken(TOKEN_TYPES.PREFIX_CLOSE, "}");
|
|
312
|
+
i++;
|
|
502
313
|
}
|
|
503
|
-
prefixValue += c;
|
|
504
|
-
i++;
|
|
505
314
|
}
|
|
506
|
-
let tokenType = isJS ? TOKEN_TYPES.PREFIX_JS : (isV ? TOKEN_TYPES.PREFIX_V : TOKEN_TYPES.PREFIX_P);
|
|
507
|
-
addToken(tokenType, prefixValue);
|
|
508
315
|
continue;
|
|
509
316
|
}
|
|
510
317
|
|
|
511
318
|
if (src[i] === quoteChar) {
|
|
512
319
|
// Guess role based on next structural character
|
|
513
320
|
let nextStructural = peekStructural(i + 1);
|
|
514
|
-
let tokenType =
|
|
321
|
+
let tokenType = isInHeader && (nextStructural === ":" || nextStructural === "=")
|
|
515
322
|
? TOKEN_TYPES.KEY
|
|
516
323
|
: TOKEN_TYPES.VALUE;
|
|
517
324
|
|
|
518
325
|
if (quoteValue.length > 0) addToken(tokenType, quoteValue);
|
|
326
|
+
if (pendingSmarkRaw && tokenType === TOKEN_TYPES.VALUE && quoteValue === "true") {
|
|
327
|
+
hasSmarkRaw = true;
|
|
328
|
+
pendingSmarkRaw = false;
|
|
329
|
+
}
|
|
519
330
|
addToken(TOKEN_TYPES.QUOTE, quoteChar);
|
|
520
331
|
isInQuote = false;
|
|
521
332
|
i++;
|
|
@@ -583,84 +394,37 @@ function lexer(src, filename = "anonymous") {
|
|
|
583
394
|
continue;
|
|
584
395
|
}
|
|
585
396
|
|
|
586
|
-
// PREFIX LAYERS (
|
|
587
|
-
if ((char === "
|
|
588
|
-
const isJS = (char === "j");
|
|
397
|
+
// PREFIX LAYERS (p{...} or v{...})
|
|
398
|
+
if ((char === "p" && next === "{") || (char === "v" && next === "{")) {
|
|
589
399
|
const isP = (char === "p");
|
|
590
400
|
const isV = (char === "v");
|
|
591
401
|
|
|
592
402
|
// Context Check
|
|
593
|
-
const isBlockHeader = isInHeader
|
|
594
|
-
const isNormalText = !isInHeader
|
|
403
|
+
const isBlockHeader = isInHeader;
|
|
404
|
+
const isNormalText = !isInHeader;
|
|
595
405
|
|
|
596
406
|
let allowed = false;
|
|
597
|
-
if (isJS && isBlockHeader) allowed = true;
|
|
598
407
|
if (isP && (isBlockHeader || isNormalText)) allowed = true;
|
|
599
408
|
if (isV && (isBlockHeader || isNormalText)) allowed = true;
|
|
600
409
|
|
|
601
410
|
if (allowed) {
|
|
602
|
-
|
|
603
|
-
|
|
604
|
-
|
|
605
|
-
|
|
606
|
-
|
|
607
|
-
while (i < src.length && braceDepth > 0) {
|
|
608
|
-
const c = src[i];
|
|
609
|
-
const n = src[i + 1];
|
|
610
|
-
|
|
611
|
-
if (inString) {
|
|
612
|
-
if (c === "\\" && (n === inString || n === "\\")) {
|
|
613
|
-
prefixValue += c + n;
|
|
614
|
-
i += 2;
|
|
615
|
-
continue;
|
|
616
|
-
}
|
|
617
|
-
if (c === inString) inString = null;
|
|
618
|
-
} else {
|
|
619
|
-
if (c === "\"" || c === "'") inString = c;
|
|
620
|
-
else if (c === "{") braceDepth++;
|
|
621
|
-
else if (c === "}") braceDepth--;
|
|
622
|
-
}
|
|
623
|
-
prefixValue += c;
|
|
624
|
-
i++;
|
|
625
|
-
}
|
|
626
|
-
let tokenType = isJS ? TOKEN_TYPES.PREFIX_JS : (isV ? TOKEN_TYPES.PREFIX_V : TOKEN_TYPES.PREFIX_P);
|
|
627
|
-
addToken(tokenType, prefixValue);
|
|
411
|
+
// p{} or v{}: emit keyword + PREFIX_OPEN, enter structured content mode
|
|
412
|
+
addToken(isV ? TOKEN_TYPES.PREFIX_V : TOKEN_TYPES.PREFIX_P, isV ? "v" : "p");
|
|
413
|
+
addToken(TOKEN_TYPES.PREFIX_OPEN, "{");
|
|
414
|
+
i += 2; // skip "p{" or "v{"
|
|
415
|
+
isInPVPrefix = true;
|
|
628
416
|
continue;
|
|
629
417
|
}
|
|
630
418
|
// If not allowed, it will fall through to normal word scanning
|
|
631
419
|
}
|
|
632
420
|
|
|
633
|
-
// MULTI-CHAR MARKERS
|
|
634
|
-
if (char === "@" && next === "_") {
|
|
635
|
-
addToken(TOKEN_TYPES.OPEN_AT, "@_");
|
|
636
|
-
i += 2;
|
|
637
|
-
isInHeader = true; // At-Blocks start with a header part
|
|
638
|
-
isInAtBlockHeader = true;
|
|
639
|
-
continue;
|
|
640
|
-
}
|
|
641
|
-
if (char === "-" && next === ">") {
|
|
642
|
-
if (isInAtBlockBody || (parenDepth > 0 && !isInInlineHead)) {
|
|
643
|
-
addToken(TOKEN_TYPES.TEXT, "-");
|
|
644
|
-
i++; // Swallowed one char
|
|
645
|
-
} else {
|
|
646
|
-
addToken(TOKEN_TYPES.THIN_ARROW, "->");
|
|
647
|
-
i += 2;
|
|
648
|
-
isInInlineHead = true; // The following ( ) will be structural
|
|
649
|
-
}
|
|
650
|
-
continue;
|
|
651
|
-
}
|
|
652
|
-
|
|
653
421
|
// STATIC KEYWORD
|
|
654
422
|
if (char === "s" && src.slice(i, i + 6) === "static") {
|
|
655
423
|
const afterStatic = src.slice(i + 6);
|
|
656
424
|
const hasSpace = afterStatic.startsWith(" ");
|
|
657
425
|
const hasLogic = hasSpace ? afterStatic.slice(1).startsWith("${") : afterStatic.startsWith("${");
|
|
658
426
|
|
|
659
|
-
const isMainIdentifier =
|
|
660
|
-
last_non_junk_type === TOKEN_TYPES.OPEN_BRACKET ||
|
|
661
|
-
last_non_junk_type === TOKEN_TYPES.OPEN_AT ||
|
|
662
|
-
(last_non_junk_type === TOKEN_TYPES.OPEN_PAREN && isInInlineHead)
|
|
663
|
-
);
|
|
427
|
+
const isMainIdentifier = last_non_junk_type === TOKEN_TYPES.OPEN_BRACKET;
|
|
664
428
|
|
|
665
429
|
if ((hasLogic || isInHeader) && !isMainIdentifier) {
|
|
666
430
|
addToken(TOKEN_TYPES.STATIC_KEYWORD, hasSpace ? "static " : "static");
|
|
@@ -675,11 +439,7 @@ function lexer(src, filename = "anonymous") {
|
|
|
675
439
|
const hasSpace = afterRuntime.startsWith(" ");
|
|
676
440
|
const hasLogic = hasSpace ? afterRuntime.slice(1).startsWith("${") : afterRuntime.startsWith("${");
|
|
677
441
|
|
|
678
|
-
const isMainIdentifier =
|
|
679
|
-
last_non_junk_type === TOKEN_TYPES.OPEN_BRACKET ||
|
|
680
|
-
last_non_junk_type === TOKEN_TYPES.OPEN_AT ||
|
|
681
|
-
(last_non_junk_type === TOKEN_TYPES.OPEN_PAREN && isInInlineHead)
|
|
682
|
-
);
|
|
442
|
+
const isMainIdentifier = last_non_junk_type === TOKEN_TYPES.OPEN_BRACKET;
|
|
683
443
|
|
|
684
444
|
if ((hasLogic || isInHeader) && !isMainIdentifier) {
|
|
685
445
|
addToken(TOKEN_TYPES.RUNTIME_KEYWORD, hasSpace ? "runtime " : "runtime");
|
|
@@ -688,213 +448,126 @@ function lexer(src, filename = "anonymous") {
|
|
|
688
448
|
}
|
|
689
449
|
}
|
|
690
450
|
|
|
691
|
-
// LOGIC BLOCKS (${ ... }$)
|
|
692
|
-
if (char === "$" && next === "{"
|
|
693
|
-
|
|
694
|
-
|
|
695
|
-
|
|
696
|
-
|
|
697
|
-
|
|
698
|
-
let internalString = null;
|
|
699
|
-
let foundClosing = false;
|
|
451
|
+
// LOGIC BLOCKS (${ ... }$) — explicit: static/runtime ${ }$ shorthand: ${ }$ = static ${ }$
|
|
452
|
+
if (char === "$" && next === "{") {
|
|
453
|
+
{
|
|
454
|
+
const hasExplicitKeyword = last_non_junk_type === TOKEN_TYPES.STATIC_KEYWORD || last_non_junk_type === TOKEN_TYPES.RUNTIME_KEYWORD;
|
|
455
|
+
if (!hasExplicitKeyword) addToken(TOKEN_TYPES.STATIC_KEYWORD, "static");
|
|
456
|
+
addToken(TOKEN_TYPES.LOGIC_OPEN, "${");
|
|
457
|
+
i += 2;
|
|
700
458
|
|
|
701
|
-
|
|
702
|
-
|
|
703
|
-
|
|
459
|
+
let logicCode = "";
|
|
460
|
+
let depth = 0;
|
|
461
|
+
let internalString = null;
|
|
704
462
|
|
|
705
|
-
|
|
706
|
-
|
|
707
|
-
i
|
|
708
|
-
braceDepth = 0;
|
|
709
|
-
foundClosing = true;
|
|
710
|
-
break;
|
|
711
|
-
}
|
|
463
|
+
while (i < src.length) {
|
|
464
|
+
const c = src[i];
|
|
465
|
+
const n = src[i + 1];
|
|
712
466
|
|
|
713
|
-
|
|
714
|
-
if (c === "
|
|
715
|
-
|
|
716
|
-
i += 2;
|
|
717
|
-
continue;
|
|
467
|
+
// Close condition: }$ at depth 0, not followed by { (}${ is a template expression boundary)
|
|
468
|
+
if (c === "}" && n === "$" && !internalString && depth === 0 && src[i + 2] !== "{") {
|
|
469
|
+
break;
|
|
718
470
|
}
|
|
719
|
-
|
|
720
|
-
|
|
721
|
-
|
|
722
|
-
|
|
723
|
-
|
|
724
|
-
|
|
725
|
-
logicCode += src[i];
|
|
726
|
-
i++;
|
|
471
|
+
|
|
472
|
+
if (internalString) {
|
|
473
|
+
if (c === "\\" && (n === internalString || n === "\\")) {
|
|
474
|
+
logicCode += c + n;
|
|
475
|
+
i += 2;
|
|
476
|
+
continue;
|
|
727
477
|
}
|
|
728
|
-
|
|
729
|
-
}
|
|
730
|
-
|
|
731
|
-
|
|
732
|
-
|
|
733
|
-
|
|
734
|
-
|
|
735
|
-
|
|
736
|
-
i += 2;
|
|
737
|
-
break;
|
|
478
|
+
if (c === internalString) internalString = null;
|
|
479
|
+
} else {
|
|
480
|
+
if (c === "/" && n === "/") {
|
|
481
|
+
logicCode += c + n;
|
|
482
|
+
i += 2;
|
|
483
|
+
while (i < src.length && src[i] !== "\n" && src[i] !== "\r") {
|
|
484
|
+
logicCode += src[i];
|
|
485
|
+
i++;
|
|
738
486
|
}
|
|
739
|
-
|
|
740
|
-
i++;
|
|
487
|
+
continue;
|
|
741
488
|
}
|
|
742
|
-
|
|
489
|
+
if (c === "/" && n === "*") {
|
|
490
|
+
logicCode += c + n;
|
|
491
|
+
i += 2;
|
|
492
|
+
while (i < src.length) {
|
|
493
|
+
if (src[i] === "*" && src[i + 1] === "/") {
|
|
494
|
+
logicCode += "*/";
|
|
495
|
+
i += 2;
|
|
496
|
+
break;
|
|
497
|
+
}
|
|
498
|
+
logicCode += src[i];
|
|
499
|
+
i++;
|
|
500
|
+
}
|
|
501
|
+
continue;
|
|
502
|
+
}
|
|
503
|
+
|
|
504
|
+
if (c === "\"" || c === "'" || c === "`") internalString = c;
|
|
505
|
+
else if (c === "{") depth++;
|
|
506
|
+
else if (c === "}") depth--;
|
|
743
507
|
}
|
|
744
508
|
|
|
745
|
-
|
|
746
|
-
|
|
747
|
-
else if (c === "}") braceDepth--;
|
|
509
|
+
logicCode += c;
|
|
510
|
+
i++;
|
|
748
511
|
}
|
|
749
512
|
|
|
750
|
-
logicCode
|
|
751
|
-
i++;
|
|
752
|
-
}
|
|
513
|
+
addToken(TOKEN_TYPES.LOGIC, logicCode);
|
|
753
514
|
|
|
754
|
-
|
|
755
|
-
|
|
756
|
-
|
|
757
|
-
|
|
758
|
-
range: {
|
|
759
|
-
start: { line: startLine, character: startCharacter },
|
|
760
|
-
end: { line: startLine, character: startCharacter + 2 }
|
|
761
|
-
}
|
|
762
|
-
});
|
|
763
|
-
}
|
|
515
|
+
if (i < src.length && src[i] === "}" && src[i + 1] === "$") {
|
|
516
|
+
addToken(TOKEN_TYPES.LOGIC_CLOSE, "}$");
|
|
517
|
+
i += 2;
|
|
518
|
+
}
|
|
764
519
|
|
|
765
|
-
|
|
766
|
-
|
|
520
|
+
continue;
|
|
521
|
+
}
|
|
767
522
|
}
|
|
768
523
|
|
|
769
524
|
// SINGLE-CHAR MARKERS
|
|
770
525
|
if (char === "[") {
|
|
771
|
-
|
|
772
|
-
|
|
773
|
-
|
|
774
|
-
|
|
775
|
-
isInHeader = true;
|
|
776
|
-
}
|
|
526
|
+
addToken(TOKEN_TYPES.OPEN_BRACKET, "[");
|
|
527
|
+
isInHeader = true;
|
|
528
|
+
pendingSmarkRaw = false;
|
|
529
|
+
hasSmarkRaw = false;
|
|
777
530
|
i++;
|
|
778
531
|
continue;
|
|
779
532
|
}
|
|
780
|
-
if (char === "_" && next === "@") {
|
|
781
|
-
if (isInAtBlockBody || (parenDepth > 0 && !isInInlineHead)) {
|
|
782
|
-
addToken(TOKEN_TYPES.TEXT, "_@");
|
|
783
|
-
} else {
|
|
784
|
-
const lastRealType = last_non_junk_type;
|
|
785
|
-
addToken(TOKEN_TYPES.CLOSE_AT, "_@");
|
|
786
|
-
// Removed delimiter stack check
|
|
787
|
-
if (lastRealType === TOKEN_TYPES.END_KEYWORD) {
|
|
788
|
-
isInAtBlockBody = false;
|
|
789
|
-
isInHeader = false;
|
|
790
|
-
isInAtBlockHeader = false;
|
|
791
|
-
}
|
|
792
|
-
}
|
|
793
|
-
i += 2;
|
|
794
|
-
continue;
|
|
795
|
-
}
|
|
796
533
|
if (char === "]") {
|
|
797
|
-
|
|
798
|
-
|
|
799
|
-
|
|
800
|
-
|
|
801
|
-
|
|
802
|
-
}
|
|
803
|
-
i++;
|
|
804
|
-
continue;
|
|
805
|
-
}
|
|
806
|
-
if (char === "(") {
|
|
807
|
-
if (isInAtBlockBody || (parenDepth > 0 && !isInInlineHead)) {
|
|
808
|
-
addToken(TOKEN_TYPES.TEXT, "(");
|
|
809
|
-
parenDepth++;
|
|
810
|
-
} else {
|
|
811
|
-
addToken(TOKEN_TYPES.OPEN_PAREN, "(");
|
|
812
|
-
parenDepth++;
|
|
813
|
-
}
|
|
814
|
-
i++;
|
|
815
|
-
continue;
|
|
816
|
-
}
|
|
817
|
-
if (char === ")") {
|
|
818
|
-
if (isInAtBlockBody || (parenDepth > 1 && !isInInlineHead)) {
|
|
819
|
-
addToken(TOKEN_TYPES.TEXT, ")");
|
|
820
|
-
parenDepth--;
|
|
821
|
-
} else if (parenDepth > 0) {
|
|
822
|
-
// This ends the content part if depth drops to 0
|
|
823
|
-
parenDepth--;
|
|
824
|
-
if (parenDepth === 0) {
|
|
825
|
-
addToken(TOKEN_TYPES.CLOSE_PAREN, ")");
|
|
826
|
-
if (isInInlineHead) {
|
|
827
|
-
isInInlineHead = false;
|
|
828
|
-
isInHeader = false;
|
|
829
|
-
}
|
|
830
|
-
} else {
|
|
831
|
-
addToken(TOKEN_TYPES.TEXT, ")");
|
|
832
|
-
}
|
|
833
|
-
} else {
|
|
834
|
-
addToken(TOKEN_TYPES.TEXT, ")");
|
|
534
|
+
addToken(TOKEN_TYPES.CLOSE_BRACKET, "]");
|
|
535
|
+
isInHeader = false;
|
|
536
|
+
if (hasSmarkRaw) {
|
|
537
|
+
isRawContent = true;
|
|
538
|
+
hasSmarkRaw = false;
|
|
835
539
|
}
|
|
540
|
+
pendingSmarkRaw = false;
|
|
836
541
|
i++;
|
|
837
542
|
continue;
|
|
838
543
|
}
|
|
839
544
|
if (char === ":") {
|
|
840
|
-
|
|
841
|
-
|
|
545
|
+
const colonAllowed = [TOKEN_TYPES.IDENTIFIER, TOKEN_TYPES.KEY, TOKEN_TYPES.VALUE, TOKEN_TYPES.ESCAPE, TOKEN_TYPES.QUOTE, TOKEN_TYPES.PREFIX_V, TOKEN_TYPES.PREFIX_P, TOKEN_TYPES.PREFIX_CLOSE, TOKEN_TYPES.IMPORT, TOKEN_TYPES.USE_MODULE, TOKEN_TYPES.END_KEYWORD, TOKEN_TYPES.TEXT, TOKEN_TYPES.LOGIC, TOKEN_TYPES.LOGIC_CLOSE, TOKEN_TYPES.STATIC_KEYWORD, TOKEN_TYPES.RUNTIME_KEYWORD, TOKEN_TYPES.FOR_EACH];
|
|
546
|
+
if (colonAllowed.includes(last_non_junk_type)) {
|
|
547
|
+
addToken(TOKEN_TYPES.COLON, ":");
|
|
548
|
+
isInHeader = true;
|
|
842
549
|
} else {
|
|
843
|
-
|
|
844
|
-
if (allowed.includes(last_non_junk_type)) {
|
|
845
|
-
addToken(TOKEN_TYPES.COLON, ":");
|
|
846
|
-
isInHeader = true;
|
|
847
|
-
} else {
|
|
848
|
-
addToken(TOKEN_TYPES.TEXT, ":");
|
|
849
|
-
}
|
|
550
|
+
addToken(TOKEN_TYPES.TEXT, ":");
|
|
850
551
|
}
|
|
851
552
|
i++;
|
|
852
553
|
continue;
|
|
853
554
|
}
|
|
854
555
|
if (char === "=") {
|
|
855
|
-
|
|
856
|
-
|
|
556
|
+
const eqAllowed = [TOKEN_TYPES.IDENTIFIER, TOKEN_TYPES.KEY, TOKEN_TYPES.ESCAPE, TOKEN_TYPES.QUOTE, TOKEN_TYPES.PREFIX_V, TOKEN_TYPES.PREFIX_P, TOKEN_TYPES.PREFIX_CLOSE, TOKEN_TYPES.IMPORT, TOKEN_TYPES.USE_MODULE, TOKEN_TYPES.END_KEYWORD, TOKEN_TYPES.TEXT, TOKEN_TYPES.LOGIC, TOKEN_TYPES.LOGIC_CLOSE, TOKEN_TYPES.STATIC_KEYWORD, TOKEN_TYPES.RUNTIME_KEYWORD, TOKEN_TYPES.FOR_EACH];
|
|
557
|
+
if (eqAllowed.includes(last_non_junk_type)) {
|
|
558
|
+
addToken(TOKEN_TYPES.EQUAL, "=");
|
|
857
559
|
} else {
|
|
858
|
-
|
|
859
|
-
if (allowed.includes(last_non_junk_type)) {
|
|
860
|
-
addToken(TOKEN_TYPES.EQUAL, "=");
|
|
861
|
-
} else {
|
|
862
|
-
addToken(TOKEN_TYPES.TEXT, "=");
|
|
863
|
-
}
|
|
560
|
+
addToken(TOKEN_TYPES.TEXT, "=");
|
|
864
561
|
}
|
|
865
562
|
i++;
|
|
866
563
|
continue;
|
|
867
564
|
}
|
|
868
565
|
if (char === ",") {
|
|
869
|
-
|
|
870
|
-
|
|
871
|
-
|
|
872
|
-
const allowed = [TOKEN_TYPES.VALUE, TOKEN_TYPES.IDENTIFIER, TOKEN_TYPES.QUOTE, TOKEN_TYPES.ESCAPE, TOKEN_TYPES.PREFIX_JS, TOKEN_TYPES.PREFIX_V, TOKEN_TYPES.PREFIX_P, TOKEN_TYPES.IMPORT, TOKEN_TYPES.USE_MODULE, TOKEN_TYPES.END_KEYWORD, TOKEN_TYPES.TEXT, TOKEN_TYPES.LOGIC, TOKEN_TYPES.STATIC_KEYWORD, TOKEN_TYPES.RUNTIME_KEYWORD, TOKEN_TYPES.FOR_EACH];
|
|
873
|
-
if (allowed.includes(last_non_junk_type)) {
|
|
874
|
-
addToken(TOKEN_TYPES.COMMA, ",");
|
|
875
|
-
} else {
|
|
876
|
-
addToken(TOKEN_TYPES.TEXT, ",");
|
|
877
|
-
}
|
|
878
|
-
}
|
|
879
|
-
i++;
|
|
880
|
-
continue;
|
|
881
|
-
}
|
|
882
|
-
if (char === ";") {
|
|
883
|
-
if (isInAtBlockBody || (parenDepth > 0 && !isInInlineHead)) {
|
|
884
|
-
addToken(TOKEN_TYPES.TEXT, ";");
|
|
566
|
+
const commaAllowed = [TOKEN_TYPES.VALUE, TOKEN_TYPES.IDENTIFIER, TOKEN_TYPES.QUOTE, TOKEN_TYPES.ESCAPE, TOKEN_TYPES.PREFIX_V, TOKEN_TYPES.PREFIX_P, TOKEN_TYPES.PREFIX_CLOSE, TOKEN_TYPES.IMPORT, TOKEN_TYPES.USE_MODULE, TOKEN_TYPES.END_KEYWORD, TOKEN_TYPES.TEXT, TOKEN_TYPES.LOGIC, TOKEN_TYPES.LOGIC_CLOSE, TOKEN_TYPES.STATIC_KEYWORD, TOKEN_TYPES.RUNTIME_KEYWORD, TOKEN_TYPES.FOR_EACH];
|
|
567
|
+
if (commaAllowed.includes(last_non_junk_type)) {
|
|
568
|
+
addToken(TOKEN_TYPES.COMMA, ",");
|
|
885
569
|
} else {
|
|
886
|
-
|
|
887
|
-
if (allowed.includes(last_non_junk_type)) {
|
|
888
|
-
addToken(TOKEN_TYPES.SEMICOLON, ";");
|
|
889
|
-
// ONLY trigger body mode if we were actually in an At-Block header
|
|
890
|
-
if (isInAtBlockHeader) {
|
|
891
|
-
isInHeader = false;
|
|
892
|
-
isInAtBlockHeader = false;
|
|
893
|
-
isInAtBlockBody = true;
|
|
894
|
-
}
|
|
895
|
-
} else {
|
|
896
|
-
addToken(TOKEN_TYPES.TEXT, ";");
|
|
897
|
-
}
|
|
570
|
+
addToken(TOKEN_TYPES.TEXT, ",");
|
|
898
571
|
}
|
|
899
572
|
i++;
|
|
900
573
|
continue;
|
|
@@ -907,7 +580,7 @@ function lexer(src, filename = "anonymous") {
|
|
|
907
580
|
}
|
|
908
581
|
}
|
|
909
582
|
if (char === "\"" || char === "'") {
|
|
910
|
-
const valTriggers = [TOKEN_TYPES.COLON, TOKEN_TYPES.EQUAL, TOKEN_TYPES.COMMA, TOKEN_TYPES.ESCAPE, TOKEN_TYPES.OPEN_BRACKET
|
|
583
|
+
const valTriggers = [TOKEN_TYPES.COLON, TOKEN_TYPES.EQUAL, TOKEN_TYPES.COMMA, TOKEN_TYPES.ESCAPE, TOKEN_TYPES.OPEN_BRACKET];
|
|
911
584
|
const wasValueTrigger = valTriggers.includes(last_non_junk_type);
|
|
912
585
|
addToken(TOKEN_TYPES.QUOTE, char);
|
|
913
586
|
i++;
|
|
@@ -923,28 +596,22 @@ function lexer(src, filename = "anonymous") {
|
|
|
923
596
|
// This is the "Fallback" mode where we scan for identifiers, keys, or values.
|
|
924
597
|
// It uses lookahead and context variables to guess the role of a word.
|
|
925
598
|
let word = "";
|
|
926
|
-
// Only Blocks ([ ]) allow ':' in their main identifier.
|
|
927
|
-
// At-Blocks (@_) and Inlines (->( )) do NOT allow ':' in the ID.
|
|
928
599
|
const isStartOfBlockId = (last_non_junk_type === TOKEN_TYPES.OPEN_BRACKET);
|
|
600
|
+
const isInNormalText = !isInHeader;
|
|
929
601
|
|
|
930
|
-
let stopChars = "[]
|
|
931
|
-
if (isStartOfBlockId
|
|
602
|
+
let stopChars = "[]{}:=,\"'#\\ \t\n\r!";
|
|
603
|
+
if (isStartOfBlockId) {
|
|
932
604
|
stopChars = stopChars.replace(":", "");
|
|
933
605
|
}
|
|
934
|
-
const isInNormalText = !isInHeader && !isInInlineHead && !isInAtBlockBody;
|
|
935
606
|
if (isInNormalText) {
|
|
936
|
-
stopChars = "[]
|
|
607
|
+
stopChars = "[]\\#\n\r"; // In normal text, stop only at block markers, escapes, comments and newlines
|
|
937
608
|
}
|
|
938
609
|
|
|
939
610
|
while (i < src.length && !stopChars.includes(src[i])) {
|
|
940
611
|
// Stop ONLY if $ is followed by { (Logic block start)
|
|
941
612
|
if (src[i] === "$" && src[i + 1] === "{") break;
|
|
942
613
|
|
|
943
|
-
// Lookahead for
|
|
944
|
-
if (src[i] === "_" && src[i + 1] === "@") break;
|
|
945
|
-
if (src[i] === "@" && src[i + 1] === "_") break;
|
|
946
|
-
|
|
947
|
-
// Lookahead for 'static ${' or 'runtime ${' (only if we're not at the very start of the word scanning)
|
|
614
|
+
// Lookahead for 'static ${' or 'runtime ${' mid-word
|
|
948
615
|
if (word.length > 0) {
|
|
949
616
|
if (src[i] === "s" && src.slice(i, i + 7) === "static " && src[i + 7] === "$" && src[i + 8] === "{") break;
|
|
950
617
|
if (src[i] === "s" && src.slice(i, i + 6) === "static" && src[i + 6] === "$" && src[i + 7] === "{") break;
|
|
@@ -952,53 +619,47 @@ function lexer(src, filename = "anonymous") {
|
|
|
952
619
|
if (src[i] === "r" && src.slice(i, i + 7) === "runtime" && src[i + 7] === "$" && src[i + 8] === "{") break;
|
|
953
620
|
}
|
|
954
621
|
|
|
955
|
-
// Lookahead for -> marker in normal text
|
|
956
|
-
if (!isInHeader && src[i] === "-" && src[i + 1] === ">") break;
|
|
957
|
-
|
|
958
622
|
// Stop if we hit an ALLOWED prefix trigger
|
|
959
623
|
if ((src[i] === "p" && src[i + 1] === "{") || (src[i] === "v" && src[i + 1] === "{")) {
|
|
960
624
|
if (isInHeader || isInNormalText) break;
|
|
961
625
|
}
|
|
962
|
-
if (src[i] === "j" && src[i + 1] === "s" && src[i + 2] === "{") {
|
|
963
|
-
if (isInHeader) break;
|
|
964
|
-
}
|
|
965
626
|
word += src[i];
|
|
966
627
|
i++;
|
|
967
628
|
}
|
|
968
629
|
|
|
969
630
|
if (word.length > 0) {
|
|
970
631
|
// Guess role based on context
|
|
971
|
-
if (
|
|
972
|
-
// Inside Inline Content (raw text)
|
|
973
|
-
addToken(TOKEN_TYPES.TEXT, word);
|
|
974
|
-
} else if (isInHeader || isInInlineHead) {
|
|
632
|
+
if (isInHeader) {
|
|
975
633
|
// Inside a structural header context
|
|
976
|
-
const isMainIdentifier =
|
|
977
|
-
last_non_junk_type === TOKEN_TYPES.OPEN_BRACKET ||
|
|
978
|
-
last_non_junk_type === TOKEN_TYPES.OPEN_AT ||
|
|
979
|
-
(last_non_junk_type === TOKEN_TYPES.OPEN_PAREN && isInInlineHead)
|
|
980
|
-
);
|
|
634
|
+
const isMainIdentifier = last_non_junk_type === TOKEN_TYPES.OPEN_BRACKET;
|
|
981
635
|
|
|
982
636
|
if (isMainIdentifier) {
|
|
983
|
-
if (word === end_keyword) {
|
|
637
|
+
if (word === end_keyword || word.startsWith(end_keyword + ":")) {
|
|
984
638
|
addToken(TOKEN_TYPES.END_KEYWORD, word);
|
|
985
639
|
}
|
|
986
640
|
else if (word === "import") addToken(TOKEN_TYPES.IMPORT, word);
|
|
987
641
|
else if (word === "$use-module") addToken(TOKEN_TYPES.USE_MODULE, word);
|
|
988
642
|
else if (word === "slot") addToken(TOKEN_TYPES.SLOT_KEYWORD, word);
|
|
989
643
|
else if (word === "for-each") addToken(TOKEN_TYPES.FOR_EACH, word);
|
|
990
|
-
else
|
|
644
|
+
else {
|
|
645
|
+
addToken(TOKEN_TYPES.IDENTIFIER, word);
|
|
646
|
+
}
|
|
991
647
|
} else {
|
|
992
648
|
// Use lookahead to distinguish KEY from VALUE
|
|
993
649
|
const p = peekStructural(i);
|
|
994
650
|
if (p === ":") {
|
|
995
651
|
addToken(TOKEN_TYPES.KEY, word);
|
|
652
|
+
if (word === "smark-raw") pendingSmarkRaw = true;
|
|
996
653
|
} else if (word === "static") {
|
|
997
654
|
addToken(TOKEN_TYPES.STATIC_KEYWORD, word);
|
|
998
655
|
} else if (word === "runtime") {
|
|
999
656
|
addToken(TOKEN_TYPES.RUNTIME_KEYWORD, word);
|
|
1000
657
|
} else {
|
|
1001
658
|
addToken(TOKEN_TYPES.VALUE, word);
|
|
659
|
+
if (pendingSmarkRaw) {
|
|
660
|
+
if (word === "true") hasSmarkRaw = true;
|
|
661
|
+
pendingSmarkRaw = false;
|
|
662
|
+
}
|
|
1002
663
|
}
|
|
1003
664
|
}
|
|
1004
665
|
} else {
|
|
@@ -1024,6 +685,193 @@ function lexer(src, filename = "anonymous") {
|
|
|
1024
685
|
return tokens;
|
|
1025
686
|
}
|
|
1026
687
|
|
|
688
|
+
/**
|
|
689
|
+
* Wraps your text in a color if colors are turned on.
|
|
690
|
+
*
|
|
691
|
+
* @param {string} color - The color to use (red, green, yellow, blue, magenta, or cyan).
|
|
692
|
+
* @param {string} text - The text you want to color.
|
|
693
|
+
* @returns {string} - The colored text, or plain text if colors are off.
|
|
694
|
+
* @throws {Error} - Fails if you forget to provide the text.
|
|
695
|
+
*/
|
|
696
|
+
function colorize(color, text) {
|
|
697
|
+
if (!text) throw new Error("argument 'text' is not defined.");
|
|
698
|
+
return text;
|
|
699
|
+
}
|
|
700
|
+
|
|
701
|
+
/**
|
|
702
|
+
* SomMark Errors
|
|
703
|
+
* Handles formatting and throwing errors with beautiful CLI coloring and pointers.
|
|
704
|
+
*/
|
|
705
|
+
|
|
706
|
+
// ========================================================================== //
|
|
707
|
+
// Message Formatting //
|
|
708
|
+
// ========================================================================== //
|
|
709
|
+
|
|
710
|
+
/**
|
|
711
|
+
* Processes a message by applying colors and formatting.
|
|
712
|
+
* Supports:
|
|
713
|
+
* - {line} : Adds a horizontal line
|
|
714
|
+
* - {N} : Adds a new line
|
|
715
|
+
* - <$color: Text$> : Adds color (red, yellow, green, blue, magenta, cyan)
|
|
716
|
+
*
|
|
717
|
+
* @param {string|string[]} text - The message or list of message parts to format.
|
|
718
|
+
* @returns {string} - The final formatted and colored string.
|
|
719
|
+
*/
|
|
720
|
+
function formatMessage(text) {
|
|
721
|
+
const horizontal_rule = "\n" + colorize("blue", "-".repeat(90)) + "\n";
|
|
722
|
+
const pattern = /<\$([^:]+):([\s\S]*?)\$>/g;
|
|
723
|
+
|
|
724
|
+
if (Array.isArray(text)) {
|
|
725
|
+
text = text.join("");
|
|
726
|
+
}
|
|
727
|
+
|
|
728
|
+
// Apply {line} before color tags so the rule is never nested inside a color wrapper.
|
|
729
|
+
text = text.replaceAll("{line}", horizontal_rule);
|
|
730
|
+
text = text.replace(pattern, (match, color, content) => {
|
|
731
|
+
return colorize(color, content.trim());
|
|
732
|
+
});
|
|
733
|
+
text = text.replaceAll("{N}", "\n");
|
|
734
|
+
|
|
735
|
+
text = text
|
|
736
|
+
.split("\n")
|
|
737
|
+
.filter(value => value !== "")
|
|
738
|
+
.join("\n")
|
|
739
|
+
.trim();
|
|
740
|
+
|
|
741
|
+
return text;
|
|
742
|
+
}
|
|
743
|
+
|
|
744
|
+
/**
|
|
745
|
+
* Creates a detailed error message showing where the error happened in the code.
|
|
746
|
+
* It adds a line number, a snippet of the code, and a pointer (^) to the exact spot.
|
|
747
|
+
*
|
|
748
|
+
* @param {string} src - The original code being parsed.
|
|
749
|
+
* @param {Object} range - The location of the error (line and character).
|
|
750
|
+
* @param {string|null} filename - The name of the file (optional).
|
|
751
|
+
* @param {string|string[]} message - The error message to show.
|
|
752
|
+
* @param {string} typeName - The type of error (e.g., "Lexer" or "Parser").
|
|
753
|
+
* @returns {string[]} - A list of message parts that make up the final error report.
|
|
754
|
+
*/
|
|
755
|
+
function formatErrorWithContext(src, range, filename, message, typeName) {
|
|
756
|
+
if (!src || !range || !range.start) return message;
|
|
757
|
+
|
|
758
|
+
const lines = src.split("\n");
|
|
759
|
+
const lineIndex = range.start.line;
|
|
760
|
+
const lineContent = lines[lineIndex] || "";
|
|
761
|
+
const pointerPadding = " ".repeat(range.start.character);
|
|
762
|
+
const sourceLabel = filename ? ` [${filename}]` : "";
|
|
763
|
+
|
|
764
|
+
const rangeInfo =
|
|
765
|
+
range.start.line === range.end.line
|
|
766
|
+
? `from column <$yellow:${range.start.character}$> to <$yellow:${range.end.character}$>`
|
|
767
|
+
: `from line <$yellow:${range.start.line + 1}$>, column <$yellow:${range.start.character}$> to line <$yellow:${range.end.line + 1}$>, column <$yellow:${range.end.character}$>`;
|
|
768
|
+
|
|
769
|
+
const formattedMessage = [
|
|
770
|
+
`{line}<$red:Here where error occurred${sourceLabel}:$>{N}${lineContent}{N}${pointerPadding}<$yellow:^$>{N}`,
|
|
771
|
+
`<$red:${typeName} Error:$> `,
|
|
772
|
+
...(Array.isArray(message) ? message : [message]),
|
|
773
|
+
`{N}at line <$yellow:${range.start.line + 1}$>, ${rangeInfo}{N}`,
|
|
774
|
+
`{line}`
|
|
775
|
+
];
|
|
776
|
+
|
|
777
|
+
return formattedMessage;
|
|
778
|
+
}
|
|
779
|
+
|
|
780
|
+
// ========================================================================== //
|
|
781
|
+
// Error Classes //
|
|
782
|
+
// ========================================================================== //
|
|
783
|
+
|
|
784
|
+
/** Base class for all SomMark errors that automatically formats messages for the terminal. */
|
|
785
|
+
class CustomError extends Error {
|
|
786
|
+
/**
|
|
787
|
+
* Creates a new error.
|
|
788
|
+
*
|
|
789
|
+
* @param {string|string[]} message - The text describing what went wrong.
|
|
790
|
+
* @param {string} name - The name of the error type.
|
|
791
|
+
*/
|
|
792
|
+
constructor(message, name) {
|
|
793
|
+
super(message);
|
|
794
|
+
this.name = name;
|
|
795
|
+
this.message = formatMessage(`<$cyan:[${this.name}]$>:`) + "\n" + formatMessage(message);
|
|
796
|
+
if (Error.captureStackTrace) {
|
|
797
|
+
Error.captureStackTrace(this, this.constructor);
|
|
798
|
+
}
|
|
799
|
+
}
|
|
800
|
+
}
|
|
801
|
+
|
|
802
|
+
class ParserError extends CustomError {
|
|
803
|
+
constructor(message) { super(message, "Parser Error"); }
|
|
804
|
+
}
|
|
805
|
+
|
|
806
|
+
class LexerError extends CustomError {
|
|
807
|
+
constructor(message) { super(message, "Lexer Error"); }
|
|
808
|
+
}
|
|
809
|
+
|
|
810
|
+
class TranspilerError extends CustomError {
|
|
811
|
+
constructor(message) { super(message, "Transpiler Error"); }
|
|
812
|
+
}
|
|
813
|
+
|
|
814
|
+
class CLIError extends CustomError {
|
|
815
|
+
constructor(message) { super(message, "CLI Error"); }
|
|
816
|
+
}
|
|
817
|
+
|
|
818
|
+
class RuntimeError extends CustomError {
|
|
819
|
+
constructor(message) { super(message, "Runtime Error"); }
|
|
820
|
+
}
|
|
821
|
+
|
|
822
|
+
class SommarkError extends CustomError {
|
|
823
|
+
constructor(message) { super(message, "SomMark Error"); }
|
|
824
|
+
}
|
|
825
|
+
|
|
826
|
+
// ========================================================================== //
|
|
827
|
+
// Error Dispatcher (Helper) //
|
|
828
|
+
// ========================================================================== //
|
|
829
|
+
|
|
830
|
+
/**
|
|
831
|
+
* A helper that creates an error "dispatcher" for a specific category.
|
|
832
|
+
*
|
|
833
|
+
* @param {string} type - The category of error (e.g., 'lexer', 'parser').
|
|
834
|
+
* @returns {Function} - A function that throws the formatted error.
|
|
835
|
+
*/
|
|
836
|
+
function getError(type) {
|
|
837
|
+
const validate_msg = msg => (Array.isArray(msg) && msg.length > 0) || typeof msg === "string";
|
|
838
|
+
const typeNames = {
|
|
839
|
+
parser: "Parser",
|
|
840
|
+
transpiler: "Transpiler",
|
|
841
|
+
lexer: "Lexer",
|
|
842
|
+
cli: "CLI",
|
|
843
|
+
runtime: "Runtime",
|
|
844
|
+
sommark: "SomMark"
|
|
845
|
+
};
|
|
846
|
+
const ErrorClasses = {
|
|
847
|
+
parser: ParserError,
|
|
848
|
+
transpiler: TranspilerError,
|
|
849
|
+
lexer: LexerError,
|
|
850
|
+
cli: CLIError,
|
|
851
|
+
runtime: RuntimeError,
|
|
852
|
+
sommark: SommarkError
|
|
853
|
+
};
|
|
854
|
+
|
|
855
|
+
return (errorMessage, context = null) => {
|
|
856
|
+
if (validate_msg(errorMessage)) {
|
|
857
|
+
let finalMessage = errorMessage;
|
|
858
|
+
if (context && context.src && context.range) {
|
|
859
|
+
finalMessage = formatErrorWithContext(
|
|
860
|
+
context.src,
|
|
861
|
+
context.range,
|
|
862
|
+
context.filename,
|
|
863
|
+
errorMessage,
|
|
864
|
+
typeNames[type]
|
|
865
|
+
);
|
|
866
|
+
}
|
|
867
|
+
throw new ErrorClasses[type](finalMessage).message;
|
|
868
|
+
}
|
|
869
|
+
};
|
|
870
|
+
}
|
|
871
|
+
|
|
872
|
+
/** Helper to throw Runtime or Module errors. */
|
|
873
|
+
const runtimeError = getError("runtime");
|
|
874
|
+
|
|
1027
875
|
const lexSync = (src, filename = "anonymous") => {
|
|
1028
876
|
if (src === undefined || src === null) {
|
|
1029
877
|
runtimeError([`{line}<$red:Missing Source:$> <$yellow:The 'src' argument is required for tokenization.$>{line}`]);
|