sommark 4.3.0 → 4.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/cli/cli.mjs +9 -0
- package/cli/commands/bundle.js +144 -0
- package/cli/commands/help.js +4 -0
- package/cli/constants.js +1 -1
- package/core/evaluator.stub.js +44 -0
- package/core/helpers/lib.js +1 -1
- package/core/transpiler.js +94 -9
- package/dist/sommark.browser.js +98 -11
- package/dist/sommark.browser.lite.js +13526 -0
- package/dist/sommark.lexer.js +1039 -0
- package/dist/sommark.parser.js +2521 -0
- package/index.shared.js +3 -1
- package/package.json +7 -4
|
@@ -0,0 +1,1039 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Token Types in SomMark.
|
|
3
|
+
* These represent the basic lexical atoms identified by the lexer.
|
|
4
|
+
*
|
|
5
|
+
* @constant {Object}
|
|
6
|
+
* @property {string} OPEN_BRACKET - '[' char.
|
|
7
|
+
* @property {string} CLOSE_BRACKET - ']' char.
|
|
8
|
+
* @property {string} END_KEYWORD - 'end' value.
|
|
9
|
+
* @property {string} IDENTIFIER - Block or inline name (e.g. 'Person', 'import', '$use-module').
|
|
10
|
+
* @property {string} EQUAL - '=' char.
|
|
11
|
+
* @property {string} VALUE - Data values. Encapsulates Quoted Strings ("...") and Prefix Layers (js{}, p{}).
|
|
12
|
+
* @property {string} TEXT - Plain unformatted text content.
|
|
13
|
+
* @property {string} THIN_ARROW - '->' sequence.
|
|
14
|
+
* @property {string} OPEN_PAREN - '(' char.
|
|
15
|
+
* @property {string} CLOSE_PAREN - ')' char.
|
|
16
|
+
* @property {string} OPEN_AT - '@_' sequence (At-Block start).
|
|
17
|
+
* @property {string} CLOSE_AT - '_@' sequence (At-Header end).
|
|
18
|
+
* @property {string} COLON - ':' char.
|
|
19
|
+
* @property {string} COMMA - ',' char.
|
|
20
|
+
* @property {string} SEMICOLON - ';' char (At-Block separator).
|
|
21
|
+
* @property {string} COMMENT - '#' comments.
|
|
22
|
+
* @property {string} COMMENT_BLOCK - '###' comments.
|
|
23
|
+
* @property {string} ESCAPE - '\' char. Used for literalizing structural chars like '\"' or '\['.
|
|
24
|
+
* @property {string} QUOTE - '"' delimiter.
|
|
25
|
+
* @property {string} EXCLAMATION_MARK - '!' char.
|
|
26
|
+
* @property {string} IMPORT - 'import' keyword.
|
|
27
|
+
* @property {string} USE_MODULE - '$use-module' keyword.
|
|
28
|
+
* @property {string} PREFIX_JS - 'js{}' prefix layer.
|
|
29
|
+
* @property {string} PREFIX_P - 'p{}' placeholder layer.
|
|
30
|
+
* @property {string} PREFIX_V - 'v{}' local variable layer.
|
|
31
|
+
* @property {string} EOF - End of File indicator.
|
|
32
|
+
*/
|
|
33
|
+
const TOKEN_TYPES = {
|
|
34
|
+
OPEN_BRACKET: "OPEN_BRACKET",
|
|
35
|
+
CLOSE_BRACKET: "CLOSE_BRACKET",
|
|
36
|
+
END_KEYWORD: "END_KEYWORD",
|
|
37
|
+
IMPORT: "IMPORT",
|
|
38
|
+
USE_MODULE: "USE_MODULE",
|
|
39
|
+
IDENTIFIER: "IDENTIFIER",
|
|
40
|
+
EQUAL: "EQUAL",
|
|
41
|
+
VALUE: "VALUE",
|
|
42
|
+
QUOTE: "QUOTE",
|
|
43
|
+
PREFIX_JS: "PREFIX_JS",
|
|
44
|
+
PREFIX_P: "PREFIX_P",
|
|
45
|
+
PREFIX_V: "PREFIX_V",
|
|
46
|
+
TEXT: "TEXT",
|
|
47
|
+
THIN_ARROW: "THIN_ARROW",
|
|
48
|
+
OPEN_PAREN: "OPEN_PAREN",
|
|
49
|
+
CLOSE_PAREN: "CLOSE_PAREN",
|
|
50
|
+
OPEN_AT: "OPEN_AT",
|
|
51
|
+
CLOSE_AT: "CLOSE_AT",
|
|
52
|
+
COLON: "COLON",
|
|
53
|
+
COMMA: "COMMA",
|
|
54
|
+
SEMICOLON: "SEMICOLON",
|
|
55
|
+
COMMENT: "COMMENT",
|
|
56
|
+
COMMENT_BLOCK: "COMMENT_BLOCK",
|
|
57
|
+
ESCAPE: "ESCAPE",
|
|
58
|
+
EXCLAMATION_MARK: "EXCLAMATION_MARK",
|
|
59
|
+
SLOT_KEYWORD: "SLOT_KEYWORD",
|
|
60
|
+
KEY: "KEY",
|
|
61
|
+
WHITESPACE: "WHITESPACE",
|
|
62
|
+
STATIC_KEYWORD: "STATIC_KEYWORD",
|
|
63
|
+
RUNTIME_KEYWORD: "RUNTIME_KEYWORD",
|
|
64
|
+
LOGIC: "LOGIC",
|
|
65
|
+
FOR_EACH: "FOR_EACH",
|
|
66
|
+
EOF: "EOF"
|
|
67
|
+
};
|
|
68
|
+
|
|
69
|
+
/**
|
|
70
|
+
* These labels identify different parts of the code (like blocks or text)
|
|
71
|
+
* so the system knows how to handle them.
|
|
72
|
+
*/
|
|
73
|
+
const BLOCK = "Block",
|
|
74
|
+
TEXT = "Text",
|
|
75
|
+
INLINE = "Inline",
|
|
76
|
+
ATBLOCK = "AtBlock",
|
|
77
|
+
COMMENT = "Comment",
|
|
78
|
+
COMMENT_BLOCK = "CommentBlock",
|
|
79
|
+
IMPORT = "Import",
|
|
80
|
+
USE_MODULE = "$use-module",
|
|
81
|
+
SLOT = "Slot",
|
|
82
|
+
STATIC_LOGIC = "StaticLogic",
|
|
83
|
+
RUNTIME_LOGIC = "RuntimeLogic",
|
|
84
|
+
FOR_EACH = "ForEach";
|
|
85
|
+
|
|
86
|
+
/**
|
|
87
|
+
* Names for symbols used to separate parts of the code (like commas and colons).
|
|
88
|
+
*/
|
|
89
|
+
const SEMICOLON = "Semicolon",
|
|
90
|
+
BLOCKCOMMA = "Block-comma",
|
|
91
|
+
ATBLOCKCOMMA = "Atblock-comma",
|
|
92
|
+
INLINECOMMA = "Inline-comma",
|
|
93
|
+
BLOCKCOLON = "Block-colon",
|
|
94
|
+
ATBLOCKCOLON = "Atblock-colon",
|
|
95
|
+
INLINECOLON = "Inline-colon";
|
|
96
|
+
|
|
97
|
+
/**
|
|
98
|
+
* These names are used in error messages to tell you exactly which part
|
|
99
|
+
* of your code has a mistake.
|
|
100
|
+
*/
|
|
101
|
+
const block_id = "Block Identifier",
|
|
102
|
+
block_value = "Block Value",
|
|
103
|
+
block_key = "Block Key",
|
|
104
|
+
block_end = "Block end",
|
|
105
|
+
inline_id = "Inline Identifier",
|
|
106
|
+
inline_text = "Inline Text",
|
|
107
|
+
at_id = "At Identifier",
|
|
108
|
+
at_value = "At Value",
|
|
109
|
+
atblock_key = "AtBlock Key",
|
|
110
|
+
at_end = "Atblock End",
|
|
111
|
+
/** Reserved keyword for closing blocks */
|
|
112
|
+
end_keyword = "end",
|
|
113
|
+
slot_keyword = "slot",
|
|
114
|
+
for_each_keyword = "for-each";
|
|
115
|
+
|
|
116
|
+
var labels = /*#__PURE__*/Object.freeze({
|
|
117
|
+
__proto__: null,
|
|
118
|
+
ATBLOCK: ATBLOCK,
|
|
119
|
+
ATBLOCKCOLON: ATBLOCKCOLON,
|
|
120
|
+
ATBLOCKCOMMA: ATBLOCKCOMMA,
|
|
121
|
+
BLOCK: BLOCK,
|
|
122
|
+
BLOCKCOLON: BLOCKCOLON,
|
|
123
|
+
BLOCKCOMMA: BLOCKCOMMA,
|
|
124
|
+
COMMENT: COMMENT,
|
|
125
|
+
COMMENT_BLOCK: COMMENT_BLOCK,
|
|
126
|
+
FOR_EACH: FOR_EACH,
|
|
127
|
+
IMPORT: IMPORT,
|
|
128
|
+
INLINE: INLINE,
|
|
129
|
+
INLINECOLON: INLINECOLON,
|
|
130
|
+
INLINECOMMA: INLINECOMMA,
|
|
131
|
+
RUNTIME_LOGIC: RUNTIME_LOGIC,
|
|
132
|
+
SEMICOLON: SEMICOLON,
|
|
133
|
+
SLOT: SLOT,
|
|
134
|
+
STATIC_LOGIC: STATIC_LOGIC,
|
|
135
|
+
TEXT: TEXT,
|
|
136
|
+
USE_MODULE: USE_MODULE,
|
|
137
|
+
at_end: at_end,
|
|
138
|
+
at_id: at_id,
|
|
139
|
+
at_value: at_value,
|
|
140
|
+
atblock_key: atblock_key,
|
|
141
|
+
block_end: block_end,
|
|
142
|
+
block_id: block_id,
|
|
143
|
+
block_key: block_key,
|
|
144
|
+
block_value: block_value,
|
|
145
|
+
end_keyword: end_keyword,
|
|
146
|
+
for_each_keyword: for_each_keyword,
|
|
147
|
+
inline_id: inline_id,
|
|
148
|
+
inline_text: inline_text,
|
|
149
|
+
slot_keyword: slot_keyword
|
|
150
|
+
});
|
|
151
|
+
|
|
152
|
+
/**
|
|
153
|
+
* Wraps your text in a color if colors are turned on.
|
|
154
|
+
*
|
|
155
|
+
* @param {string} color - The color to use (red, green, yellow, blue, magenta, or cyan).
|
|
156
|
+
* @param {string} text - The text you want to color.
|
|
157
|
+
* @returns {string} - The colored text, or plain text if colors are off.
|
|
158
|
+
* @throws {Error} - Fails if you forget to provide the text.
|
|
159
|
+
*/
|
|
160
|
+
function colorize(color, text) {
|
|
161
|
+
if (!text) throw new Error("argument 'text' is not defined.");
|
|
162
|
+
return text;
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
/**
|
|
166
|
+
* SomMark Errors
|
|
167
|
+
* Handles formatting and throwing errors with beautiful CLI coloring and pointers.
|
|
168
|
+
*/
|
|
169
|
+
|
|
170
|
+
// ========================================================================== //
|
|
171
|
+
// Message Formatting //
|
|
172
|
+
// ========================================================================== //
|
|
173
|
+
|
|
174
|
+
/**
|
|
175
|
+
* Processes a message by applying colors and formatting.
|
|
176
|
+
* Supports:
|
|
177
|
+
* - {line} : Adds a horizontal line
|
|
178
|
+
* - {N} : Adds a new line
|
|
179
|
+
* - <$color: Text$> : Adds color (red, yellow, green, blue, magenta, cyan)
|
|
180
|
+
*
|
|
181
|
+
* @param {string|string[]} text - The message or list of message parts to format.
|
|
182
|
+
* @returns {string} - The final formatted and colored string.
|
|
183
|
+
*/
|
|
184
|
+
function formatMessage(text) {
|
|
185
|
+
const horizontal_rule = "\n----------------------------------------------------------------------------------------------\n";
|
|
186
|
+
const pattern = /<\$([^:]+):([\s\S]*?)\$>/g;
|
|
187
|
+
|
|
188
|
+
if (Array.isArray(text)) {
|
|
189
|
+
text = text.join("");
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
text = text.replace(pattern, (match, color, content) => {
|
|
193
|
+
return colorize(color, content.trim());
|
|
194
|
+
});
|
|
195
|
+
text = text.replaceAll("{line}", horizontal_rule);
|
|
196
|
+
text = text.replaceAll("{N}", "\n");
|
|
197
|
+
|
|
198
|
+
text = text
|
|
199
|
+
.split("\n")
|
|
200
|
+
.filter(value => value !== "")
|
|
201
|
+
.join("\n")
|
|
202
|
+
.trim();
|
|
203
|
+
|
|
204
|
+
return text;
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
/**
|
|
208
|
+
* Creates a detailed error message showing where the error happened in the code.
|
|
209
|
+
* It adds a line number, a snippet of the code, and a pointer (^) to the exact spot.
|
|
210
|
+
*
|
|
211
|
+
* @param {string} src - The original code being parsed.
|
|
212
|
+
* @param {Object} range - The location of the error (line and character).
|
|
213
|
+
* @param {string|null} filename - The name of the file (optional).
|
|
214
|
+
* @param {string|string[]} message - The error message to show.
|
|
215
|
+
* @param {string} typeName - The type of error (e.g., "Lexer" or "Parser").
|
|
216
|
+
* @returns {string[]} - A list of message parts that make up the final error report.
|
|
217
|
+
*/
|
|
218
|
+
function formatErrorWithContext(src, range, filename, message, typeName) {
|
|
219
|
+
if (!src || !range || !range.start) return message;
|
|
220
|
+
|
|
221
|
+
const lines = src.split("\n");
|
|
222
|
+
const lineIndex = range.start.line;
|
|
223
|
+
const lineContent = lines[lineIndex] || "";
|
|
224
|
+
const pointerPadding = " ".repeat(range.start.character);
|
|
225
|
+
const sourceLabel = filename ? ` [${filename}]` : "";
|
|
226
|
+
|
|
227
|
+
const rangeInfo =
|
|
228
|
+
range.start.line === range.end.line
|
|
229
|
+
? `from column <$yellow:${range.start.character}$> to <$yellow:${range.end.character}$>`
|
|
230
|
+
: `from line <$yellow:${range.start.line + 1}$>, column <$yellow:${range.start.character}$> to line <$yellow:${range.end.line + 1}$>, column <$yellow:${range.end.character}$>`;
|
|
231
|
+
|
|
232
|
+
const formattedMessage = [
|
|
233
|
+
`<$blue:{line}$><$red:Here where error occurred${sourceLabel}:$>{N}${lineContent}{N}${pointerPadding}<$yellow:^$>{N}{N}`,
|
|
234
|
+
`<$red:${typeName} Error:$> `,
|
|
235
|
+
...(Array.isArray(message) ? message : [message]),
|
|
236
|
+
`{N}at line <$yellow:${range.start.line + 1}$>, ${rangeInfo}{N}`,
|
|
237
|
+
"<$blue:{line}$>"
|
|
238
|
+
];
|
|
239
|
+
|
|
240
|
+
return formattedMessage;
|
|
241
|
+
}
|
|
242
|
+
|
|
243
|
+
// ========================================================================== //
|
|
244
|
+
// Error Classes //
|
|
245
|
+
// ========================================================================== //
|
|
246
|
+
|
|
247
|
+
/** Base class for all SomMark errors that automatically formats messages for the terminal. */
|
|
248
|
+
class CustomError extends Error {
|
|
249
|
+
/**
|
|
250
|
+
* Creates a new error.
|
|
251
|
+
*
|
|
252
|
+
* @param {string|string[]} message - The text describing what went wrong.
|
|
253
|
+
* @param {string} name - The name of the error type.
|
|
254
|
+
*/
|
|
255
|
+
constructor(message, name) {
|
|
256
|
+
super(message);
|
|
257
|
+
this.name = name;
|
|
258
|
+
this.message = formatMessage(`<$cyan:[${this.name}]$>:`) + "\n" + formatMessage(message);
|
|
259
|
+
if (Error.captureStackTrace) {
|
|
260
|
+
Error.captureStackTrace(this, this.constructor);
|
|
261
|
+
}
|
|
262
|
+
}
|
|
263
|
+
}
|
|
264
|
+
|
|
265
|
+
class ParserError extends CustomError {
|
|
266
|
+
constructor(message) { super(message, "Parser Error"); }
|
|
267
|
+
}
|
|
268
|
+
|
|
269
|
+
class LexerError extends CustomError {
|
|
270
|
+
constructor(message) { super(message, "Lexer Error"); }
|
|
271
|
+
}
|
|
272
|
+
|
|
273
|
+
class TranspilerError extends CustomError {
|
|
274
|
+
constructor(message) { super(message, "Transpiler Error"); }
|
|
275
|
+
}
|
|
276
|
+
|
|
277
|
+
class CLIError extends CustomError {
|
|
278
|
+
constructor(message) { super(message, "CLI Error"); }
|
|
279
|
+
}
|
|
280
|
+
|
|
281
|
+
class RuntimeError extends CustomError {
|
|
282
|
+
constructor(message) { super(message, "Runtime Error"); }
|
|
283
|
+
}
|
|
284
|
+
|
|
285
|
+
class SommarkError extends CustomError {
|
|
286
|
+
constructor(message) { super(message, "SomMark Error"); }
|
|
287
|
+
}
|
|
288
|
+
|
|
289
|
+
// ========================================================================== //
|
|
290
|
+
// Error Dispatcher (Helper) //
|
|
291
|
+
// ========================================================================== //
|
|
292
|
+
|
|
293
|
+
/**
|
|
294
|
+
* A helper that creates an error "dispatcher" for a specific category.
|
|
295
|
+
*
|
|
296
|
+
* @param {string} type - The category of error (e.g., 'lexer', 'parser').
|
|
297
|
+
* @returns {Function} - A function that throws the formatted error.
|
|
298
|
+
*/
|
|
299
|
+
function getError(type) {
|
|
300
|
+
const validate_msg = msg => (Array.isArray(msg) && msg.length > 0) || typeof msg === "string";
|
|
301
|
+
const typeNames = {
|
|
302
|
+
parser: "Parser",
|
|
303
|
+
transpiler: "Transpiler",
|
|
304
|
+
lexer: "Lexer",
|
|
305
|
+
cli: "CLI",
|
|
306
|
+
runtime: "Runtime",
|
|
307
|
+
sommark: "SomMark"
|
|
308
|
+
};
|
|
309
|
+
const ErrorClasses = {
|
|
310
|
+
parser: ParserError,
|
|
311
|
+
transpiler: TranspilerError,
|
|
312
|
+
lexer: LexerError,
|
|
313
|
+
cli: CLIError,
|
|
314
|
+
runtime: RuntimeError,
|
|
315
|
+
sommark: SommarkError
|
|
316
|
+
};
|
|
317
|
+
|
|
318
|
+
return (errorMessage, context = null) => {
|
|
319
|
+
if (validate_msg(errorMessage)) {
|
|
320
|
+
let finalMessage = errorMessage;
|
|
321
|
+
if (context && context.src && context.range) {
|
|
322
|
+
finalMessage = formatErrorWithContext(
|
|
323
|
+
context.src,
|
|
324
|
+
context.range,
|
|
325
|
+
context.filename,
|
|
326
|
+
errorMessage,
|
|
327
|
+
typeNames[type]
|
|
328
|
+
);
|
|
329
|
+
}
|
|
330
|
+
throw new ErrorClasses[type](finalMessage).message;
|
|
331
|
+
}
|
|
332
|
+
};
|
|
333
|
+
}
|
|
334
|
+
|
|
335
|
+
/** Helper to throw Lexer errors. */
|
|
336
|
+
const lexerError = getError("lexer");
|
|
337
|
+
|
|
338
|
+
/** Helper to throw Runtime or Module errors. */
|
|
339
|
+
const runtimeError = getError("runtime");
|
|
340
|
+
|
|
341
|
+
/**
|
|
342
|
+
* SomMark Lexer
|
|
343
|
+
*
|
|
344
|
+
* Transforms a raw SomMark source string into a stream of tokens.
|
|
345
|
+
* It uses a state-machine approach to handle complex contexts like At-Block bodies,
|
|
346
|
+
* quoted values, and hierarchical headers.
|
|
347
|
+
*
|
|
348
|
+
* @param {string} src - The raw SomMark source code.
|
|
349
|
+
* @param {string} [filename="anonymous"] - Source filename for error reporting.
|
|
350
|
+
* @returns {Array<Object>} Array of token objects.
|
|
351
|
+
*/
|
|
352
|
+
function lexer(src, filename = "anonymous") {
|
|
353
|
+
if (!src || typeof src !== "string") return [];
|
|
354
|
+
const tokens = [];
|
|
355
|
+
let last_non_junk_type = ""; // Tracks the last real token for context guessing
|
|
356
|
+
let i = 0;
|
|
357
|
+
let line = 0, character = 0;
|
|
358
|
+
|
|
359
|
+
// State Variables
|
|
360
|
+
let isInAtBlockBody = false;
|
|
361
|
+
let isInQuote = false;
|
|
362
|
+
let isInHeader = false; // Tracks if we are in a structural header context
|
|
363
|
+
let isInAtBlockHeader = false; // Specific for At-Block headers (@_ ... _@)
|
|
364
|
+
let isInInlineHead = false; // Specific for (key:val) after ->
|
|
365
|
+
let parenDepth = 0; // To track balanced parentheses in inlines
|
|
366
|
+
|
|
367
|
+
/**
|
|
368
|
+
* Adds a token to the stream and updates the scanner's position tracking.
|
|
369
|
+
*
|
|
370
|
+
* @param {string} type - The type of token (from TOKEN_TYPES).
|
|
371
|
+
* @param {string} value - The literal text content of the token.
|
|
372
|
+
*/
|
|
373
|
+
function addToken(type, value) {
|
|
374
|
+
const start = { line, character };
|
|
375
|
+
|
|
376
|
+
// Update position
|
|
377
|
+
const parts = value.split("\n");
|
|
378
|
+
if (parts.length > 1) {
|
|
379
|
+
line += parts.length - 1;
|
|
380
|
+
character = parts[parts.length - 1].length;
|
|
381
|
+
} else {
|
|
382
|
+
character += value.length;
|
|
383
|
+
}
|
|
384
|
+
|
|
385
|
+
const end = { line, character };
|
|
386
|
+
tokens.push({
|
|
387
|
+
type,
|
|
388
|
+
value,
|
|
389
|
+
source: filename,
|
|
390
|
+
range: { start, end }
|
|
391
|
+
});
|
|
392
|
+
if (type !== TOKEN_TYPES.WHITESPACE && type !== TOKEN_TYPES.COMMENT) {
|
|
393
|
+
if (type !== TOKEN_TYPES.TEXT || value.trim() !== "") {
|
|
394
|
+
last_non_junk_type = type;
|
|
395
|
+
}
|
|
396
|
+
}
|
|
397
|
+
}
|
|
398
|
+
|
|
399
|
+
/**
|
|
400
|
+
* Looks ahead to find the next structural character, skipping whitespace and comments.
|
|
401
|
+
* Used for context-guessing (e.g., distinguishing KEY from VALUE).
|
|
402
|
+
*
|
|
403
|
+
* @param {number} start - Index to start peeking from.
|
|
404
|
+
* @returns {string|null} The next structural character or null if EOF.
|
|
405
|
+
*/
|
|
406
|
+
function peekStructural(start) {
|
|
407
|
+
let j = start;
|
|
408
|
+
while (j < src.length) {
|
|
409
|
+
const c = src[j];
|
|
410
|
+
if (c === " " || c === "\t" || c === "\n" || c === "\r") {
|
|
411
|
+
j++;
|
|
412
|
+
continue;
|
|
413
|
+
}
|
|
414
|
+
if (c === "#") {
|
|
415
|
+
while (j < src.length && src[j] !== "\n") j++;
|
|
416
|
+
continue;
|
|
417
|
+
}
|
|
418
|
+
if (c === "\\") {
|
|
419
|
+
// Escape sequence: jump over the backslash and the escaped char
|
|
420
|
+
j += 2;
|
|
421
|
+
continue;
|
|
422
|
+
}
|
|
423
|
+
return c;
|
|
424
|
+
}
|
|
425
|
+
return null;
|
|
426
|
+
}
|
|
427
|
+
|
|
428
|
+
while (i < src.length) {
|
|
429
|
+
// --- PHASE 1: AT-BLOCK BODY MODE ---
|
|
430
|
+
// In this mode, we consume everything as raw text until we hit the @_ marker.
|
|
431
|
+
if (isInAtBlockBody) {
|
|
432
|
+
if (src[i] === "@" && src[i + 1] === "_") {
|
|
433
|
+
isInAtBlockBody = false;
|
|
434
|
+
} else {
|
|
435
|
+
let body = "";
|
|
436
|
+
while (i < src.length) {
|
|
437
|
+
// Handle escapes in At-Block Body
|
|
438
|
+
if (src[i] === "\\" && i + 1 < src.length) {
|
|
439
|
+
body += src[i + 1];
|
|
440
|
+
i += 2;
|
|
441
|
+
continue;
|
|
442
|
+
}
|
|
443
|
+
// Stop at end marker
|
|
444
|
+
if (src[i] === "@" && src[i + 1] === "_") {
|
|
445
|
+
break;
|
|
446
|
+
}
|
|
447
|
+
body += src[i];
|
|
448
|
+
i++;
|
|
449
|
+
}
|
|
450
|
+
if (body.length > 0) {
|
|
451
|
+
addToken(TOKEN_TYPES.TEXT, body);
|
|
452
|
+
}
|
|
453
|
+
continue;
|
|
454
|
+
}
|
|
455
|
+
}
|
|
456
|
+
const char = src[i];
|
|
457
|
+
const next = src[i + 1];
|
|
458
|
+
|
|
459
|
+
// --- PHASE 2: QUOTE MODE ---
|
|
460
|
+
// Handles balanced strings and allows prefix layers (js{}, p{}) inside them.
|
|
461
|
+
if (isInQuote) {
|
|
462
|
+
let quoteValue = "";
|
|
463
|
+
const quoteChar = tokens[tokens.length - 1].value;
|
|
464
|
+
while (i < src.length) {
|
|
465
|
+
if (src[i] === "\\" && i + 1 < src.length) {
|
|
466
|
+
// Inside quotes, we split escapes if we want to match reliability tests
|
|
467
|
+
if (quoteValue.length > 0) addToken(TOKEN_TYPES.VALUE, quoteValue);
|
|
468
|
+
addToken(TOKEN_TYPES.ESCAPE, "\\" + src[i + 1]);
|
|
469
|
+
quoteValue = "";
|
|
470
|
+
i += 2;
|
|
471
|
+
continue;
|
|
472
|
+
}
|
|
473
|
+
|
|
474
|
+
// Support Prefix Layers inside quotes!
|
|
475
|
+
if ((src[i] === "j" && src[i + 1] === "s" && src[i + 2] === "{") || (src[i] === "p" && src[i + 1] === "{") || (src[i] === "v" && src[i + 1] === "{")) {
|
|
476
|
+
const isJS = (src[i] === "j");
|
|
477
|
+
const isV = (src[i] === "v");
|
|
478
|
+
if (quoteValue.length > 0) {
|
|
479
|
+
addToken(TOKEN_TYPES.VALUE, quoteValue);
|
|
480
|
+
quoteValue = "";
|
|
481
|
+
}
|
|
482
|
+
|
|
483
|
+
let braceDepth = 1;
|
|
484
|
+
let prefixValue = isJS ? "js{" : (isV ? "v{" : "p{");
|
|
485
|
+
i += isJS ? 3 : 2;
|
|
486
|
+
|
|
487
|
+
let internalString = null;
|
|
488
|
+
while (i < src.length && braceDepth > 0) {
|
|
489
|
+
const c = src[i];
|
|
490
|
+
const n = src[i + 1];
|
|
491
|
+
if (internalString) {
|
|
492
|
+
if (c === "\\" && (n === internalString || n === "\\")) {
|
|
493
|
+
prefixValue += c + n;
|
|
494
|
+
i += 2;
|
|
495
|
+
continue;
|
|
496
|
+
}
|
|
497
|
+
if (c === internalString) internalString = null;
|
|
498
|
+
} else {
|
|
499
|
+
if (c === "\"" || c === "'") internalString = c;
|
|
500
|
+
else if (c === "{") braceDepth++;
|
|
501
|
+
else if (c === "}") braceDepth--;
|
|
502
|
+
}
|
|
503
|
+
prefixValue += c;
|
|
504
|
+
i++;
|
|
505
|
+
}
|
|
506
|
+
let tokenType = isJS ? TOKEN_TYPES.PREFIX_JS : (isV ? TOKEN_TYPES.PREFIX_V : TOKEN_TYPES.PREFIX_P);
|
|
507
|
+
addToken(tokenType, prefixValue);
|
|
508
|
+
continue;
|
|
509
|
+
}
|
|
510
|
+
|
|
511
|
+
if (src[i] === quoteChar) {
|
|
512
|
+
// Guess role based on next structural character
|
|
513
|
+
let nextStructural = peekStructural(i + 1);
|
|
514
|
+
let tokenType = (isInHeader || isInInlineHead) && (nextStructural === ":" || nextStructural === "=")
|
|
515
|
+
? TOKEN_TYPES.KEY
|
|
516
|
+
: TOKEN_TYPES.VALUE;
|
|
517
|
+
|
|
518
|
+
if (quoteValue.length > 0) addToken(tokenType, quoteValue);
|
|
519
|
+
addToken(TOKEN_TYPES.QUOTE, quoteChar);
|
|
520
|
+
isInQuote = false;
|
|
521
|
+
i++;
|
|
522
|
+
break;
|
|
523
|
+
}
|
|
524
|
+
quoteValue += src[i];
|
|
525
|
+
i++;
|
|
526
|
+
}
|
|
527
|
+
if (!isInQuote) continue;
|
|
528
|
+
}
|
|
529
|
+
|
|
530
|
+
// --- PHASE 3: STRUCTURAL PARSING ---
|
|
531
|
+
// Handles markers, whitespace, and structural symbols.
|
|
532
|
+
|
|
533
|
+
// WHITESPACE
|
|
534
|
+
if (char === "\n") {
|
|
535
|
+
addToken(TOKEN_TYPES.WHITESPACE, char);
|
|
536
|
+
i++;
|
|
537
|
+
continue;
|
|
538
|
+
}
|
|
539
|
+
|
|
540
|
+
if (char === " " || char === "\t" || char === "\r") {
|
|
541
|
+
let ws = "";
|
|
542
|
+
while (i < src.length && (src[i] === " " || src[i] === "\t" || src[i] === "\r")) {
|
|
543
|
+
ws += src[i];
|
|
544
|
+
i++;
|
|
545
|
+
}
|
|
546
|
+
addToken(TOKEN_TYPES.WHITESPACE, ws);
|
|
547
|
+
continue;
|
|
548
|
+
}
|
|
549
|
+
|
|
550
|
+
// COMMENTS
|
|
551
|
+
if (char === "#") {
|
|
552
|
+
let comm = "";
|
|
553
|
+
// Check for Multiline Comment ### (must have no spaces)
|
|
554
|
+
if (src[i + 1] === "#" && src[i + 2] === "#") {
|
|
555
|
+
comm = "###";
|
|
556
|
+
i += 3;
|
|
557
|
+
while (i < src.length) {
|
|
558
|
+
if (src[i] === "#" && src[i + 1] === "#" && src[i + 2] === "#") {
|
|
559
|
+
comm += "###";
|
|
560
|
+
i += 3;
|
|
561
|
+
break;
|
|
562
|
+
}
|
|
563
|
+
comm += src[i];
|
|
564
|
+
i++;
|
|
565
|
+
}
|
|
566
|
+
addToken(TOKEN_TYPES.COMMENT_BLOCK, comm);
|
|
567
|
+
} else {
|
|
568
|
+
// Single line comment
|
|
569
|
+
while (i < src.length && src[i] !== "\n") {
|
|
570
|
+
comm += src[i];
|
|
571
|
+
i++;
|
|
572
|
+
}
|
|
573
|
+
addToken(TOKEN_TYPES.COMMENT, comm);
|
|
574
|
+
}
|
|
575
|
+
continue;
|
|
576
|
+
}
|
|
577
|
+
|
|
578
|
+
// ESCAPE CHARACTER (Sequence-based)
|
|
579
|
+
if (char === "\\") {
|
|
580
|
+
const seq = i + 1 < src.length ? "\\" + src[i + 1] : "\\";
|
|
581
|
+
addToken(TOKEN_TYPES.ESCAPE, seq);
|
|
582
|
+
i += seq.length;
|
|
583
|
+
continue;
|
|
584
|
+
}
|
|
585
|
+
|
|
586
|
+
// PREFIX LAYERS (js{...} or p{...} or v{...})
|
|
587
|
+
if ((char === "j" && next === "s" && src[i + 2] === "{") || (char === "p" && next === "{") || (char === "v" && next === "{")) {
|
|
588
|
+
const isJS = (char === "j");
|
|
589
|
+
const isP = (char === "p");
|
|
590
|
+
const isV = (char === "v");
|
|
591
|
+
|
|
592
|
+
// Context Check
|
|
593
|
+
const isBlockHeader = isInHeader && !isInAtBlockHeader;
|
|
594
|
+
const isNormalText = !isInHeader && !isInInlineHead && !isInAtBlockBody && parenDepth === 0;
|
|
595
|
+
|
|
596
|
+
let allowed = false;
|
|
597
|
+
if (isJS && isBlockHeader) allowed = true;
|
|
598
|
+
if (isP && (isBlockHeader || isNormalText)) allowed = true;
|
|
599
|
+
if (isV && (isBlockHeader || isNormalText)) allowed = true;
|
|
600
|
+
|
|
601
|
+
if (allowed) {
|
|
602
|
+
let braceDepth = 1;
|
|
603
|
+
let prefixValue = isJS ? "js{" : (isV ? "v{" : "p{");
|
|
604
|
+
i += isJS ? 3 : 2;
|
|
605
|
+
|
|
606
|
+
let inString = null; // Track if we are inside " " or ' '
|
|
607
|
+
while (i < src.length && braceDepth > 0) {
|
|
608
|
+
const c = src[i];
|
|
609
|
+
const n = src[i + 1];
|
|
610
|
+
|
|
611
|
+
if (inString) {
|
|
612
|
+
if (c === "\\" && (n === inString || n === "\\")) {
|
|
613
|
+
prefixValue += c + n;
|
|
614
|
+
i += 2;
|
|
615
|
+
continue;
|
|
616
|
+
}
|
|
617
|
+
if (c === inString) inString = null;
|
|
618
|
+
} else {
|
|
619
|
+
if (c === "\"" || c === "'") inString = c;
|
|
620
|
+
else if (c === "{") braceDepth++;
|
|
621
|
+
else if (c === "}") braceDepth--;
|
|
622
|
+
}
|
|
623
|
+
prefixValue += c;
|
|
624
|
+
i++;
|
|
625
|
+
}
|
|
626
|
+
let tokenType = isJS ? TOKEN_TYPES.PREFIX_JS : (isV ? TOKEN_TYPES.PREFIX_V : TOKEN_TYPES.PREFIX_P);
|
|
627
|
+
addToken(tokenType, prefixValue);
|
|
628
|
+
continue;
|
|
629
|
+
}
|
|
630
|
+
// If not allowed, it will fall through to normal word scanning
|
|
631
|
+
}
|
|
632
|
+
|
|
633
|
+
// MULTI-CHAR MARKERS
|
|
634
|
+
if (char === "@" && next === "_") {
|
|
635
|
+
addToken(TOKEN_TYPES.OPEN_AT, "@_");
|
|
636
|
+
i += 2;
|
|
637
|
+
isInHeader = true; // At-Blocks start with a header part
|
|
638
|
+
isInAtBlockHeader = true;
|
|
639
|
+
continue;
|
|
640
|
+
}
|
|
641
|
+
if (char === "-" && next === ">") {
|
|
642
|
+
if (isInAtBlockBody || (parenDepth > 0 && !isInInlineHead)) {
|
|
643
|
+
addToken(TOKEN_TYPES.TEXT, "-");
|
|
644
|
+
i++; // Swallowed one char
|
|
645
|
+
} else {
|
|
646
|
+
addToken(TOKEN_TYPES.THIN_ARROW, "->");
|
|
647
|
+
i += 2;
|
|
648
|
+
isInInlineHead = true; // The following ( ) will be structural
|
|
649
|
+
}
|
|
650
|
+
continue;
|
|
651
|
+
}
|
|
652
|
+
|
|
653
|
+
// STATIC KEYWORD
|
|
654
|
+
if (char === "s" && src.slice(i, i + 6) === "static") {
|
|
655
|
+
const afterStatic = src.slice(i + 6);
|
|
656
|
+
const hasSpace = afterStatic.startsWith(" ");
|
|
657
|
+
const hasLogic = hasSpace ? afterStatic.slice(1).startsWith("${") : afterStatic.startsWith("${");
|
|
658
|
+
|
|
659
|
+
const isMainIdentifier = (
|
|
660
|
+
last_non_junk_type === TOKEN_TYPES.OPEN_BRACKET ||
|
|
661
|
+
last_non_junk_type === TOKEN_TYPES.OPEN_AT ||
|
|
662
|
+
(last_non_junk_type === TOKEN_TYPES.OPEN_PAREN && isInInlineHead)
|
|
663
|
+
);
|
|
664
|
+
|
|
665
|
+
if ((hasLogic || isInHeader) && !isMainIdentifier) {
|
|
666
|
+
addToken(TOKEN_TYPES.STATIC_KEYWORD, hasSpace ? "static " : "static");
|
|
667
|
+
i += hasSpace ? 7 : 6;
|
|
668
|
+
continue;
|
|
669
|
+
}
|
|
670
|
+
}
|
|
671
|
+
|
|
672
|
+
// RUNTIME KEYWORD
|
|
673
|
+
if (char === "r" && src.slice(i, i + 7) === "runtime") {
|
|
674
|
+
const afterRuntime = src.slice(i + 7);
|
|
675
|
+
const hasSpace = afterRuntime.startsWith(" ");
|
|
676
|
+
const hasLogic = hasSpace ? afterRuntime.slice(1).startsWith("${") : afterRuntime.startsWith("${");
|
|
677
|
+
|
|
678
|
+
const isMainIdentifier = (
|
|
679
|
+
last_non_junk_type === TOKEN_TYPES.OPEN_BRACKET ||
|
|
680
|
+
last_non_junk_type === TOKEN_TYPES.OPEN_AT ||
|
|
681
|
+
(last_non_junk_type === TOKEN_TYPES.OPEN_PAREN && isInInlineHead)
|
|
682
|
+
);
|
|
683
|
+
|
|
684
|
+
if ((hasLogic || isInHeader) && !isMainIdentifier) {
|
|
685
|
+
addToken(TOKEN_TYPES.RUNTIME_KEYWORD, hasSpace ? "runtime " : "runtime");
|
|
686
|
+
i += hasSpace ? 8 : 7;
|
|
687
|
+
continue;
|
|
688
|
+
}
|
|
689
|
+
}
|
|
690
|
+
|
|
691
|
+
// LOGIC BLOCKS (${ ... }$)
|
|
692
|
+
if (char === "$" && next === "{" && (last_non_junk_type === TOKEN_TYPES.STATIC_KEYWORD || last_non_junk_type === TOKEN_TYPES.RUNTIME_KEYWORD)) {
|
|
693
|
+
const startLine = line;
|
|
694
|
+
const startCharacter = character;
|
|
695
|
+
i += 2;
|
|
696
|
+
let logicCode = "";
|
|
697
|
+
let braceDepth = 1;
|
|
698
|
+
let internalString = null;
|
|
699
|
+
let foundClosing = false;
|
|
700
|
+
|
|
701
|
+
while (i < src.length) {
|
|
702
|
+
const c = src[i];
|
|
703
|
+
const n = src[i + 1];
|
|
704
|
+
|
|
705
|
+
// Stop condition: }$ (only if not inside a JS string and at top-level brace depth)
|
|
706
|
+
if (c === "}" && n === "$" && !internalString && braceDepth === 1) {
|
|
707
|
+
i += 2;
|
|
708
|
+
braceDepth = 0;
|
|
709
|
+
foundClosing = true;
|
|
710
|
+
break;
|
|
711
|
+
}
|
|
712
|
+
|
|
713
|
+
if (internalString) {
|
|
714
|
+
if (c === "\\" && (n === internalString || n === "\\")) {
|
|
715
|
+
logicCode += c + n;
|
|
716
|
+
i += 2;
|
|
717
|
+
continue;
|
|
718
|
+
}
|
|
719
|
+
if (c === internalString) internalString = null;
|
|
720
|
+
} else {
|
|
721
|
+
if (c === "/" && n === "/") {
|
|
722
|
+
logicCode += c + n;
|
|
723
|
+
i += 2;
|
|
724
|
+
while (i < src.length && src[i] !== "\n" && src[i] !== "\r") {
|
|
725
|
+
logicCode += src[i];
|
|
726
|
+
i++;
|
|
727
|
+
}
|
|
728
|
+
continue;
|
|
729
|
+
}
|
|
730
|
+
if (c === "/" && n === "*") {
|
|
731
|
+
logicCode += c + n;
|
|
732
|
+
i += 2;
|
|
733
|
+
while (i < src.length) {
|
|
734
|
+
if (src[i] === "*" && src[i + 1] === "/") {
|
|
735
|
+
logicCode += "*/";
|
|
736
|
+
i += 2;
|
|
737
|
+
break;
|
|
738
|
+
}
|
|
739
|
+
logicCode += src[i];
|
|
740
|
+
i++;
|
|
741
|
+
}
|
|
742
|
+
continue;
|
|
743
|
+
}
|
|
744
|
+
|
|
745
|
+
if (c === "\"" || c === "'" || c === "`") internalString = c;
|
|
746
|
+
else if (c === "{") braceDepth++;
|
|
747
|
+
else if (c === "}") braceDepth--;
|
|
748
|
+
}
|
|
749
|
+
|
|
750
|
+
logicCode += c;
|
|
751
|
+
i++;
|
|
752
|
+
}
|
|
753
|
+
|
|
754
|
+
if (!foundClosing) {
|
|
755
|
+
lexerError("Unclosed logic block. Expected '}$' to close the block starting with '${'.", {
|
|
756
|
+
src,
|
|
757
|
+
filename,
|
|
758
|
+
range: {
|
|
759
|
+
start: { line: startLine, character: startCharacter },
|
|
760
|
+
end: { line: startLine, character: startCharacter + 2 }
|
|
761
|
+
}
|
|
762
|
+
});
|
|
763
|
+
}
|
|
764
|
+
|
|
765
|
+
addToken(TOKEN_TYPES.LOGIC, logicCode);
|
|
766
|
+
continue;
|
|
767
|
+
}
|
|
768
|
+
|
|
769
|
+
// SINGLE-CHAR MARKERS
|
|
770
|
+
if (char === "[") {
|
|
771
|
+
if (isInAtBlockBody || (parenDepth > 0 && !isInInlineHead)) {
|
|
772
|
+
addToken(TOKEN_TYPES.TEXT, "[");
|
|
773
|
+
} else {
|
|
774
|
+
addToken(TOKEN_TYPES.OPEN_BRACKET, "[");
|
|
775
|
+
isInHeader = true;
|
|
776
|
+
}
|
|
777
|
+
i++;
|
|
778
|
+
continue;
|
|
779
|
+
}
|
|
780
|
+
if (char === "_" && next === "@") {
|
|
781
|
+
if (isInAtBlockBody || (parenDepth > 0 && !isInInlineHead)) {
|
|
782
|
+
addToken(TOKEN_TYPES.TEXT, "_@");
|
|
783
|
+
} else {
|
|
784
|
+
const lastRealType = last_non_junk_type;
|
|
785
|
+
addToken(TOKEN_TYPES.CLOSE_AT, "_@");
|
|
786
|
+
// Removed delimiter stack check
|
|
787
|
+
if (lastRealType === TOKEN_TYPES.END_KEYWORD) {
|
|
788
|
+
isInAtBlockBody = false;
|
|
789
|
+
isInHeader = false;
|
|
790
|
+
isInAtBlockHeader = false;
|
|
791
|
+
}
|
|
792
|
+
}
|
|
793
|
+
i += 2;
|
|
794
|
+
continue;
|
|
795
|
+
}
|
|
796
|
+
if (char === "]") {
|
|
797
|
+
if (isInAtBlockBody || (parenDepth > 0 && !isInInlineHead)) {
|
|
798
|
+
addToken(TOKEN_TYPES.TEXT, "]");
|
|
799
|
+
} else {
|
|
800
|
+
addToken(TOKEN_TYPES.CLOSE_BRACKET, "]");
|
|
801
|
+
isInHeader = false;
|
|
802
|
+
}
|
|
803
|
+
i++;
|
|
804
|
+
continue;
|
|
805
|
+
}
|
|
806
|
+
if (char === "(") {
|
|
807
|
+
if (isInAtBlockBody || (parenDepth > 0 && !isInInlineHead)) {
|
|
808
|
+
addToken(TOKEN_TYPES.TEXT, "(");
|
|
809
|
+
parenDepth++;
|
|
810
|
+
} else {
|
|
811
|
+
addToken(TOKEN_TYPES.OPEN_PAREN, "(");
|
|
812
|
+
parenDepth++;
|
|
813
|
+
}
|
|
814
|
+
i++;
|
|
815
|
+
continue;
|
|
816
|
+
}
|
|
817
|
+
if (char === ")") {
|
|
818
|
+
if (isInAtBlockBody || (parenDepth > 1 && !isInInlineHead)) {
|
|
819
|
+
addToken(TOKEN_TYPES.TEXT, ")");
|
|
820
|
+
parenDepth--;
|
|
821
|
+
} else if (parenDepth > 0) {
|
|
822
|
+
// This ends the content part if depth drops to 0
|
|
823
|
+
parenDepth--;
|
|
824
|
+
if (parenDepth === 0) {
|
|
825
|
+
addToken(TOKEN_TYPES.CLOSE_PAREN, ")");
|
|
826
|
+
if (isInInlineHead) {
|
|
827
|
+
isInInlineHead = false;
|
|
828
|
+
isInHeader = false;
|
|
829
|
+
}
|
|
830
|
+
} else {
|
|
831
|
+
addToken(TOKEN_TYPES.TEXT, ")");
|
|
832
|
+
}
|
|
833
|
+
} else {
|
|
834
|
+
addToken(TOKEN_TYPES.TEXT, ")");
|
|
835
|
+
}
|
|
836
|
+
i++;
|
|
837
|
+
continue;
|
|
838
|
+
}
|
|
839
|
+
if (char === ":") {
|
|
840
|
+
if (isInAtBlockBody || (parenDepth > 0 && !isInInlineHead)) {
|
|
841
|
+
addToken(TOKEN_TYPES.TEXT, ":");
|
|
842
|
+
} else {
|
|
843
|
+
const allowed = [TOKEN_TYPES.IDENTIFIER, TOKEN_TYPES.KEY, TOKEN_TYPES.CLOSE_AT, TOKEN_TYPES.VALUE, TOKEN_TYPES.ESCAPE, TOKEN_TYPES.QUOTE, TOKEN_TYPES.PREFIX_JS, TOKEN_TYPES.PREFIX_V, TOKEN_TYPES.PREFIX_P, TOKEN_TYPES.IMPORT, TOKEN_TYPES.USE_MODULE, TOKEN_TYPES.END_KEYWORD, TOKEN_TYPES.TEXT, TOKEN_TYPES.LOGIC, TOKEN_TYPES.STATIC_KEYWORD, TOKEN_TYPES.RUNTIME_KEYWORD, TOKEN_TYPES.FOR_EACH];
|
|
844
|
+
if (allowed.includes(last_non_junk_type)) {
|
|
845
|
+
addToken(TOKEN_TYPES.COLON, ":");
|
|
846
|
+
isInHeader = true;
|
|
847
|
+
} else {
|
|
848
|
+
addToken(TOKEN_TYPES.TEXT, ":");
|
|
849
|
+
}
|
|
850
|
+
}
|
|
851
|
+
i++;
|
|
852
|
+
continue;
|
|
853
|
+
}
|
|
854
|
+
if (char === "=") {
|
|
855
|
+
if (isInAtBlockBody || (parenDepth > 0 && !isInInlineHead)) {
|
|
856
|
+
addToken(TOKEN_TYPES.TEXT, "=");
|
|
857
|
+
} else {
|
|
858
|
+
const allowed = [TOKEN_TYPES.IDENTIFIER, TOKEN_TYPES.KEY, TOKEN_TYPES.ESCAPE, TOKEN_TYPES.QUOTE, TOKEN_TYPES.PREFIX_JS, TOKEN_TYPES.PREFIX_V, TOKEN_TYPES.PREFIX_P, TOKEN_TYPES.IMPORT, TOKEN_TYPES.USE_MODULE, TOKEN_TYPES.END_KEYWORD, TOKEN_TYPES.TEXT, TOKEN_TYPES.LOGIC, TOKEN_TYPES.STATIC_KEYWORD, TOKEN_TYPES.RUNTIME_KEYWORD, TOKEN_TYPES.FOR_EACH];
|
|
859
|
+
if (allowed.includes(last_non_junk_type)) {
|
|
860
|
+
addToken(TOKEN_TYPES.EQUAL, "=");
|
|
861
|
+
} else {
|
|
862
|
+
addToken(TOKEN_TYPES.TEXT, "=");
|
|
863
|
+
}
|
|
864
|
+
}
|
|
865
|
+
i++;
|
|
866
|
+
continue;
|
|
867
|
+
}
|
|
868
|
+
if (char === ",") {
|
|
869
|
+
if (isInAtBlockBody || (parenDepth > 0 && !isInInlineHead)) {
|
|
870
|
+
addToken(TOKEN_TYPES.TEXT, ",");
|
|
871
|
+
} else {
|
|
872
|
+
const allowed = [TOKEN_TYPES.VALUE, TOKEN_TYPES.IDENTIFIER, TOKEN_TYPES.QUOTE, TOKEN_TYPES.ESCAPE, TOKEN_TYPES.PREFIX_JS, TOKEN_TYPES.PREFIX_V, TOKEN_TYPES.PREFIX_P, TOKEN_TYPES.IMPORT, TOKEN_TYPES.USE_MODULE, TOKEN_TYPES.END_KEYWORD, TOKEN_TYPES.TEXT, TOKEN_TYPES.LOGIC, TOKEN_TYPES.STATIC_KEYWORD, TOKEN_TYPES.RUNTIME_KEYWORD, TOKEN_TYPES.FOR_EACH];
|
|
873
|
+
if (allowed.includes(last_non_junk_type)) {
|
|
874
|
+
addToken(TOKEN_TYPES.COMMA, ",");
|
|
875
|
+
} else {
|
|
876
|
+
addToken(TOKEN_TYPES.TEXT, ",");
|
|
877
|
+
}
|
|
878
|
+
}
|
|
879
|
+
i++;
|
|
880
|
+
continue;
|
|
881
|
+
}
|
|
882
|
+
if (char === ";") {
|
|
883
|
+
if (isInAtBlockBody || (parenDepth > 0 && !isInInlineHead)) {
|
|
884
|
+
addToken(TOKEN_TYPES.TEXT, ";");
|
|
885
|
+
} else {
|
|
886
|
+
const allowed = [TOKEN_TYPES.IDENTIFIER, TOKEN_TYPES.VALUE, TOKEN_TYPES.CLOSE_AT, TOKEN_TYPES.CLOSE_PAREN, TOKEN_TYPES.ESCAPE, TOKEN_TYPES.QUOTE, TOKEN_TYPES.PREFIX_JS, TOKEN_TYPES.PREFIX_V, TOKEN_TYPES.PREFIX_P, TOKEN_TYPES.IMPORT, TOKEN_TYPES.USE_MODULE, TOKEN_TYPES.END_KEYWORD, TOKEN_TYPES.TEXT, TOKEN_TYPES.LOGIC, TOKEN_TYPES.STATIC_KEYWORD, TOKEN_TYPES.RUNTIME_KEYWORD, TOKEN_TYPES.FOR_EACH];
|
|
887
|
+
if (allowed.includes(last_non_junk_type)) {
|
|
888
|
+
addToken(TOKEN_TYPES.SEMICOLON, ";");
|
|
889
|
+
// ONLY trigger body mode if we were actually in an At-Block header
|
|
890
|
+
if (isInAtBlockHeader) {
|
|
891
|
+
isInHeader = false;
|
|
892
|
+
isInAtBlockHeader = false;
|
|
893
|
+
isInAtBlockBody = true;
|
|
894
|
+
}
|
|
895
|
+
} else {
|
|
896
|
+
addToken(TOKEN_TYPES.TEXT, ";");
|
|
897
|
+
}
|
|
898
|
+
}
|
|
899
|
+
i++;
|
|
900
|
+
continue;
|
|
901
|
+
}
|
|
902
|
+
if (char === "!") {
|
|
903
|
+
if (isInHeader) {
|
|
904
|
+
addToken(TOKEN_TYPES.EXCLAMATION_MARK, "!");
|
|
905
|
+
i++;
|
|
906
|
+
continue;
|
|
907
|
+
}
|
|
908
|
+
}
|
|
909
|
+
if (char === "\"" || char === "'") {
|
|
910
|
+
const valTriggers = [TOKEN_TYPES.COLON, TOKEN_TYPES.EQUAL, TOKEN_TYPES.COMMA, TOKEN_TYPES.ESCAPE, TOKEN_TYPES.OPEN_BRACKET, TOKEN_TYPES.OPEN_AT];
|
|
911
|
+
const wasValueTrigger = valTriggers.includes(last_non_junk_type);
|
|
912
|
+
addToken(TOKEN_TYPES.QUOTE, char);
|
|
913
|
+
i++;
|
|
914
|
+
// Enable quote mode
|
|
915
|
+
// NOTE: We allow quotes basically anywhere in headers as values/keys
|
|
916
|
+
if (isInHeader || wasValueTrigger) {
|
|
917
|
+
isInQuote = true;
|
|
918
|
+
}
|
|
919
|
+
continue;
|
|
920
|
+
}
|
|
921
|
+
|
|
922
|
+
// --- PHASE 4: WORD / TEXT SCANNING ---
|
|
923
|
+
// This is the "Fallback" mode where we scan for identifiers, keys, or values.
|
|
924
|
+
// It uses lookahead and context variables to guess the role of a word.
|
|
925
|
+
let word = "";
|
|
926
|
+
// Only Blocks ([ ]) allow ':' in their main identifier.
|
|
927
|
+
// At-Blocks (@_) and Inlines (->( )) do NOT allow ':' in the ID.
|
|
928
|
+
const isStartOfBlockId = (last_non_junk_type === TOKEN_TYPES.OPEN_BRACKET);
|
|
929
|
+
|
|
930
|
+
let stopChars = "[](){}:=;,@>\"'#\\ \t\n\r!";
|
|
931
|
+
if (isStartOfBlockId || (parenDepth > 0 && !isInInlineHead)) {
|
|
932
|
+
stopChars = stopChars.replace(":", "");
|
|
933
|
+
}
|
|
934
|
+
const isInNormalText = !isInHeader && !isInInlineHead && !isInAtBlockBody;
|
|
935
|
+
if (isInNormalText) {
|
|
936
|
+
stopChars = "[]@()>_()\\#\n\r"; // In normal text, stop at markers, comments and newlines
|
|
937
|
+
}
|
|
938
|
+
|
|
939
|
+
while (i < src.length && !stopChars.includes(src[i])) {
|
|
940
|
+
// Stop ONLY if $ is followed by { (Logic block start)
|
|
941
|
+
if (src[i] === "$" && src[i + 1] === "{") break;
|
|
942
|
+
|
|
943
|
+
// Lookahead for At-Block markers (_@ or @_)
|
|
944
|
+
if (src[i] === "_" && src[i + 1] === "@") break;
|
|
945
|
+
if (src[i] === "@" && src[i + 1] === "_") break;
|
|
946
|
+
|
|
947
|
+
// Lookahead for 'static ${' or 'runtime ${' (only if we're not at the very start of the word scanning)
|
|
948
|
+
if (word.length > 0) {
|
|
949
|
+
if (src[i] === "s" && src.slice(i, i + 7) === "static " && src[i + 7] === "$" && src[i + 8] === "{") break;
|
|
950
|
+
if (src[i] === "s" && src.slice(i, i + 6) === "static" && src[i + 6] === "$" && src[i + 7] === "{") break;
|
|
951
|
+
if (src[i] === "r" && src.slice(i, i + 8) === "runtime " && src[i + 8] === "$" && src[i + 9] === "{") break;
|
|
952
|
+
if (src[i] === "r" && src.slice(i, i + 7) === "runtime" && src[i + 7] === "$" && src[i + 8] === "{") break;
|
|
953
|
+
}
|
|
954
|
+
|
|
955
|
+
// Lookahead for -> marker in normal text
|
|
956
|
+
if (!isInHeader && src[i] === "-" && src[i + 1] === ">") break;
|
|
957
|
+
|
|
958
|
+
// Stop if we hit an ALLOWED prefix trigger
|
|
959
|
+
if ((src[i] === "p" && src[i + 1] === "{") || (src[i] === "v" && src[i + 1] === "{")) {
|
|
960
|
+
if (isInHeader || isInNormalText) break;
|
|
961
|
+
}
|
|
962
|
+
if (src[i] === "j" && src[i + 1] === "s" && src[i + 2] === "{") {
|
|
963
|
+
if (isInHeader) break;
|
|
964
|
+
}
|
|
965
|
+
word += src[i];
|
|
966
|
+
i++;
|
|
967
|
+
}
|
|
968
|
+
|
|
969
|
+
if (word.length > 0) {
|
|
970
|
+
// Guess role based on context
|
|
971
|
+
if (parenDepth > 0 && !isInInlineHead) {
|
|
972
|
+
// Inside Inline Content (raw text)
|
|
973
|
+
addToken(TOKEN_TYPES.TEXT, word);
|
|
974
|
+
} else if (isInHeader || isInInlineHead) {
|
|
975
|
+
// Inside a structural header context
|
|
976
|
+
const isMainIdentifier = (
|
|
977
|
+
last_non_junk_type === TOKEN_TYPES.OPEN_BRACKET ||
|
|
978
|
+
last_non_junk_type === TOKEN_TYPES.OPEN_AT ||
|
|
979
|
+
(last_non_junk_type === TOKEN_TYPES.OPEN_PAREN && isInInlineHead)
|
|
980
|
+
);
|
|
981
|
+
|
|
982
|
+
if (isMainIdentifier) {
|
|
983
|
+
if (word === end_keyword) {
|
|
984
|
+
addToken(TOKEN_TYPES.END_KEYWORD, word);
|
|
985
|
+
}
|
|
986
|
+
else if (word === "import") addToken(TOKEN_TYPES.IMPORT, word);
|
|
987
|
+
else if (word === "$use-module") addToken(TOKEN_TYPES.USE_MODULE, word);
|
|
988
|
+
else if (word === "slot") addToken(TOKEN_TYPES.SLOT_KEYWORD, word);
|
|
989
|
+
else if (word === "for-each") addToken(TOKEN_TYPES.FOR_EACH, word);
|
|
990
|
+
else addToken(TOKEN_TYPES.IDENTIFIER, word);
|
|
991
|
+
} else {
|
|
992
|
+
// Use lookahead to distinguish KEY from VALUE
|
|
993
|
+
const p = peekStructural(i);
|
|
994
|
+
if (p === ":") {
|
|
995
|
+
addToken(TOKEN_TYPES.KEY, word);
|
|
996
|
+
} else if (word === "static") {
|
|
997
|
+
addToken(TOKEN_TYPES.STATIC_KEYWORD, word);
|
|
998
|
+
} else if (word === "runtime") {
|
|
999
|
+
addToken(TOKEN_TYPES.RUNTIME_KEYWORD, word);
|
|
1000
|
+
} else {
|
|
1001
|
+
addToken(TOKEN_TYPES.VALUE, word);
|
|
1002
|
+
}
|
|
1003
|
+
}
|
|
1004
|
+
} else {
|
|
1005
|
+
// Normal text
|
|
1006
|
+
if (word.trim() === "static") {
|
|
1007
|
+
addToken(TOKEN_TYPES.STATIC_KEYWORD, word);
|
|
1008
|
+
} else if (word.trim() === "runtime") {
|
|
1009
|
+
addToken(TOKEN_TYPES.RUNTIME_KEYWORD, word);
|
|
1010
|
+
} else {
|
|
1011
|
+
addToken(TOKEN_TYPES.TEXT, word);
|
|
1012
|
+
}
|
|
1013
|
+
}
|
|
1014
|
+
} else {
|
|
1015
|
+
// Fallback for any unhandled characters
|
|
1016
|
+
if (i < src.length) {
|
|
1017
|
+
addToken(TOKEN_TYPES.TEXT, src[i]);
|
|
1018
|
+
i++;
|
|
1019
|
+
}
|
|
1020
|
+
}
|
|
1021
|
+
}
|
|
1022
|
+
|
|
1023
|
+
addToken(TOKEN_TYPES.EOF, "");
|
|
1024
|
+
return tokens;
|
|
1025
|
+
}
|
|
1026
|
+
|
|
1027
|
+
const lexSync = (src, filename = "anonymous") => {
|
|
1028
|
+
if (src === undefined || src === null) {
|
|
1029
|
+
runtimeError([`{line}<$red:Missing Source:$> <$yellow:The 'src' argument is required for tokenization.$>{line}`]);
|
|
1030
|
+
}
|
|
1031
|
+
if (typeof src !== "string") {
|
|
1032
|
+
runtimeError([`{line}<$red:Invalid Source Type:$> <$yellow:The 'src' argument must be a string, received ${typeof src}.$>{line}`]);
|
|
1033
|
+
}
|
|
1034
|
+
return lexer(src, filename);
|
|
1035
|
+
};
|
|
1036
|
+
|
|
1037
|
+
const lex = async (src, filename = "anonymous") => lexSync(src, filename);
|
|
1038
|
+
|
|
1039
|
+
export { TOKEN_TYPES, labels, lex, lexSync };
|