sommark 3.2.0 → 3.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +7 -0
- package/cli/commands/build.js +2 -1
- package/cli/commands/init.js +2 -6
- package/cli/commands/list.js +17 -12
- package/cli/commands/print.js +7 -2
- package/cli/helpers/transpile.js +2 -1
- package/core/errors.js +22 -9
- package/core/labels.js +3 -0
- package/core/lexer.js +206 -567
- package/core/parser.js +139 -39
- package/core/pluginManager.js +33 -23
- package/core/plugins/comment-remover.js +3 -3
- package/core/plugins/module-system.js +163 -124
- package/core/plugins/raw-content-plugin.js +15 -9
- package/core/plugins/rules-validation-plugin.js +2 -2
- package/core/plugins/sommark-format.js +92 -72
- package/core/transpiler.js +70 -8
- package/coverage_test.js +21 -0
- package/helpers/utils.js +27 -0
- package/index.js +25 -16
- package/mappers/languages/html.js +5 -10
- package/package.json +1 -1
- package/v3-todo.smark +68 -70
- package/core/plugins/quote-escaper.js +0 -37
- package/format.js +0 -23
- package/unformatted.smark +0 -90
package/core/lexer.js
CHANGED
|
@@ -1,614 +1,253 @@
|
|
|
1
1
|
import TOKEN_TYPES from "./tokenTypes.js";
|
|
2
2
|
import peek from "../helpers/peek.js";
|
|
3
|
-
import {
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
at_id_2,
|
|
14
|
-
at_end,
|
|
15
|
-
end_keyword,
|
|
16
|
-
BLOCKCOMMA,
|
|
17
|
-
ATBLOCKCOMMA,
|
|
18
|
-
INLINECOMMA,
|
|
19
|
-
BLOCKCOLON,
|
|
20
|
-
ATBLOCKCOLON,
|
|
21
|
-
INLINECOLON
|
|
22
|
-
} from "./labels.js";
|
|
23
|
-
import { lexerError, sommarkError } from "./errors.js";
|
|
3
|
+
import { end_keyword } from "./labels.js";
|
|
4
|
+
import { lexerError } from "./errors.js";
|
|
5
|
+
|
|
6
|
+
/**
|
|
7
|
+
* SomMark Lexer
|
|
8
|
+
*/
|
|
9
|
+
|
|
10
|
+
// ========================================================================== //
|
|
11
|
+
// Helper Functions //
|
|
12
|
+
// ========================================================================== //
|
|
24
13
|
|
|
25
14
|
const atBlockEndRegex = new RegExp(`^@_\\s*${end_keyword}\\s*_@`);
|
|
15
|
+
|
|
16
|
+
// Checks if we reached the end of an At-Block
|
|
26
17
|
function isAtBlockEnd(input, index) {
|
|
27
18
|
const slice = typeof input === "string" ? input.slice(index, index + 100) : input.slice(index, index + 100).join("");
|
|
28
19
|
return atBlockEndRegex.test(slice);
|
|
29
20
|
}
|
|
30
21
|
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
22
|
+
// Collects characters inside a quoted string
|
|
23
|
+
function concatQuote(input, index) {
|
|
24
|
+
let text = "\"";
|
|
25
|
+
for (let i = index + 1; i < input.length; i++) {
|
|
26
|
+
const char = input[i];
|
|
27
|
+
if (char === "\\" && peek(input, i, 1) === "\"") {
|
|
28
|
+
text += "\\\"";
|
|
29
|
+
i++;
|
|
30
|
+
continue;
|
|
31
|
+
}
|
|
32
|
+
text += char;
|
|
33
|
+
if (char === "\"") return text;
|
|
34
34
|
}
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
const updateColumn = (end = 0, textLength) => {
|
|
39
|
-
const start = end + 1;
|
|
40
|
-
const newEnd = start + textLength - 1;
|
|
41
|
-
return { start, end: newEnd };
|
|
42
|
-
};
|
|
35
|
+
lexerError(["[Lexer Error]: Unclosed quote"]);
|
|
36
|
+
return text;
|
|
37
|
+
}
|
|
43
38
|
|
|
44
|
-
|
|
39
|
+
// Collects plain text until a special character is found
|
|
40
|
+
function concatText(input, index, isInHeader, isInAtBlockBody, isLiberalValue = false) {
|
|
45
41
|
let text = "";
|
|
46
|
-
if (index >= input.length)
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
break;
|
|
70
|
-
} else if (extraConditions.some(([ch, condition]) => (!ch || ch === char) && condition)) {
|
|
42
|
+
if (index >= input.length) return text;
|
|
43
|
+
for (let i = index; i < input.length; i++) {
|
|
44
|
+
const char = input[i];
|
|
45
|
+
const stopConditions = [
|
|
46
|
+
["[", !isInAtBlockBody],
|
|
47
|
+
["(", !isInAtBlockBody],
|
|
48
|
+
["#", !isInAtBlockBody && !isLiberalValue],
|
|
49
|
+
["=", isInHeader && !isInAtBlockBody],
|
|
50
|
+
["\"", isInHeader],
|
|
51
|
+
["]", isInHeader],
|
|
52
|
+
[")", isInHeader],
|
|
53
|
+
["-", peek(input, i, 1) === ">" && (isInHeader || true)],
|
|
54
|
+
["@", peek(input, i, 1) === "_" && (!isInAtBlockBody || isAtBlockEnd(input, i))],
|
|
55
|
+
["_", peek(input, i, 1) === "@" && isInHeader],
|
|
56
|
+
["\\", true],
|
|
57
|
+
[":", isInHeader && !isInAtBlockBody],
|
|
58
|
+
[";", isInHeader],
|
|
59
|
+
[",", isInHeader]
|
|
60
|
+
];
|
|
61
|
+
let shouldStop = false;
|
|
62
|
+
for (const [stopChar, conditionMet] of stopConditions) {
|
|
63
|
+
if (conditionMet && input.substring(i, i + stopChar.length) === stopChar) {
|
|
64
|
+
shouldStop = true;
|
|
71
65
|
break;
|
|
72
66
|
}
|
|
73
|
-
text += char;
|
|
74
67
|
}
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
sommarkError([
|
|
78
|
-
"{line}<$red:Invalid Arguments:$> <$yellow:Assign arguments to their correct types, ",
|
|
79
|
-
"'input' must be an array and have to be not empty, 'index' must be a number value, and 'scope_state' ",
|
|
80
|
-
"must be a boolean.$>{line}."
|
|
81
|
-
]);
|
|
68
|
+
if (shouldStop) break;
|
|
69
|
+
text += char;
|
|
82
70
|
}
|
|
71
|
+
return text;
|
|
83
72
|
}
|
|
84
73
|
|
|
74
|
+
// Handles backslash escapes in the text
|
|
85
75
|
function concatEscape(input, index) {
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
"\n",
|
|
94
|
-
"\r",
|
|
95
|
-
"\v",
|
|
96
|
-
"\f",
|
|
97
|
-
//+++++++//
|
|
98
|
-
"\u00A0",
|
|
99
|
-
"\u1680",
|
|
100
|
-
"\u2000",
|
|
101
|
-
"\u2001",
|
|
102
|
-
"\u2002",
|
|
103
|
-
"\u2003",
|
|
104
|
-
"\u2004",
|
|
105
|
-
"\u2005",
|
|
106
|
-
"\u2006",
|
|
107
|
-
"\u2007",
|
|
108
|
-
"\u2008",
|
|
109
|
-
"\u2009",
|
|
110
|
-
"\u200A",
|
|
111
|
-
"\u202F",
|
|
112
|
-
"\u205F",
|
|
113
|
-
"\u3000"
|
|
114
|
-
];
|
|
115
|
-
let WHITESPACE_SET = new Set(WHITESPACES);
|
|
116
|
-
if ((Array.isArray(input) || typeof input === "string") && input.length > 0 && typeof index === "number") {
|
|
117
|
-
const nextChar = peek(input, index, 1);
|
|
118
|
-
if (input[index] === "\\" && nextChar !== null) {
|
|
119
|
-
str += "\\" + nextChar;
|
|
120
|
-
} else {
|
|
121
|
-
lexerError([
|
|
122
|
-
"{line}<$red:Invalid escape sequence$>{N}",
|
|
123
|
-
"<$yellow:Escape character '\\' must be followed immediately by a character.$>{N}",
|
|
124
|
-
nextChar === null ? "<$yellow:Found end of file after escape character$>" : "<$yellow:Missing character after escape character$>",
|
|
125
|
-
"{line}"
|
|
126
|
-
]);
|
|
127
|
-
}
|
|
128
|
-
if (WHITESPACE_SET.has(str[1])) {
|
|
129
|
-
const matchedCharacter = Array.from(WHITESPACE_SET).find(ch => ch === str[1]);
|
|
130
|
-
lexerError([
|
|
131
|
-
"{line}<$red:Invalid escape sequence$>{N}",
|
|
132
|
-
"<$yellow:Escape character '\\' must be followed immediately by a character.$>{N}",
|
|
133
|
-
`<$yellow:Found$> <$blue:${JSON.stringify(matchedCharacter)}$> <$yellow:after escape character$>{N}`,
|
|
134
|
-
"{line}"
|
|
135
|
-
]);
|
|
136
|
-
}
|
|
137
|
-
return str;
|
|
138
|
-
} else {
|
|
139
|
-
sommarkError([
|
|
140
|
-
"{line}<$red:Invalid Arguments:$> <$yellow:Assign arguments to their correct types, ",
|
|
141
|
-
"'input' must be an array and have to be not empty, and 'index' must be a number value.$>{line}"
|
|
142
|
-
]);
|
|
143
|
-
}
|
|
76
|
+
if (index >= input.length) return "";
|
|
77
|
+
const nextChar = peek(input, index, 1);
|
|
78
|
+
const WHITESPACES = [" ", "\t", "\n", "\r", "\v", "\f"];
|
|
79
|
+
if (WHITESPACES.includes(nextChar)) lexerError(["[Lexer Error]: Invalid escape sequence (escaped whitespace)"]);
|
|
80
|
+
if (input[index] === "\\" && nextChar !== null) return "\\" + nextChar;
|
|
81
|
+
lexerError(["[Lexer Error]: Invalid escape sequence"]);
|
|
82
|
+
return "";
|
|
144
83
|
}
|
|
145
84
|
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
if (index >= input.length) {
|
|
150
|
-
return str;
|
|
151
|
-
}
|
|
152
|
-
if (Array.isArray(stop_at_char) && stop_at_char.length > 0) {
|
|
153
|
-
for (let i = index; i < input.length; i++) {
|
|
154
|
-
const char = input[i];
|
|
155
|
-
if (stop_at_char.includes(char)) {
|
|
156
|
-
break;
|
|
157
|
-
}
|
|
158
|
-
str += char;
|
|
159
|
-
}
|
|
160
|
-
} else {
|
|
161
|
-
sommarkError([
|
|
162
|
-
"{line}<$red:Invalid Type:$> <$yellow:Argument 'stop_at_char' must be an array and have to be not empty array$>{line}"
|
|
163
|
-
]);
|
|
164
|
-
}
|
|
165
|
-
return str;
|
|
166
|
-
} else {
|
|
167
|
-
sommarkError([
|
|
168
|
-
"{line}<$red:Invalid Arguments:$> <$yellow:Assign arguments to their correct types, ",
|
|
169
|
-
"'input' must be an array and have to be not empty, 'index' must be a number value$>{line}"
|
|
170
|
-
]);
|
|
171
|
-
}
|
|
172
|
-
}
|
|
85
|
+
// ========================================================================== //
|
|
86
|
+
// Main Lexer Function //
|
|
87
|
+
// ========================================================================== //
|
|
173
88
|
|
|
174
89
|
function lexer(src) {
|
|
175
|
-
if (src
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
let character = 0;
|
|
180
|
-
let depth_stack = [];
|
|
181
|
-
let context = "",
|
|
182
|
-
temp_str = "",
|
|
183
|
-
previous_value = "";
|
|
90
|
+
if (!src || typeof src !== "string") return [];
|
|
91
|
+
const tokens = [];
|
|
92
|
+
let isInHeader = false, isInAtBlockBody = false;
|
|
93
|
+
let line = 0, character = 0, depth_stack = [];
|
|
184
94
|
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
95
|
+
// ========================================================================== //
|
|
96
|
+
// Token Creation Helpers //
|
|
97
|
+
// ========================================================================== //
|
|
98
|
+
|
|
99
|
+
function addToken(type, value, rawValue) {
|
|
100
|
+
if (typeof rawValue === "string" && typeof value === "string" && rawValue !== value) {
|
|
101
|
+
const offset = rawValue.indexOf(value);
|
|
102
|
+
if (offset !== -1) {
|
|
103
|
+
advance(rawValue.slice(0, offset));
|
|
104
|
+
const startPos = { line, character }; advance(value);
|
|
105
|
+
const endPos = { line, character };
|
|
106
|
+
tokens.push({ type, value, range: { start: startPos, end: endPos }, depth: depth_stack.length });
|
|
107
|
+
advance(rawValue.slice(offset + value.length));
|
|
108
|
+
return;
|
|
192
109
|
}
|
|
193
110
|
}
|
|
111
|
+
const startPos = { line, character }; advance(rawValue || value);
|
|
112
|
+
const endPos = { line, character };
|
|
113
|
+
tokens.push({ type, value, range: { start: startPos, end: endPos }, depth: depth_stack.length });
|
|
114
|
+
}
|
|
194
115
|
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
}
|
|
204
|
-
character += value.length;
|
|
205
|
-
}
|
|
206
|
-
const endPos = { line, character };
|
|
207
|
-
tokens.push({
|
|
208
|
-
type,
|
|
209
|
-
value,
|
|
210
|
-
range: { start: startPos, end: endPos },
|
|
211
|
-
depth: depth_stack.length
|
|
212
|
-
});
|
|
116
|
+
function advance(text) {
|
|
117
|
+
const newlines = (text.match(/\n/g) || []).length;
|
|
118
|
+
if (newlines > 0) { line += newlines; character = text.split("\n").pop().length; }
|
|
119
|
+
else character += text.length;
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
function validateIdentifier(id, charPos) {
|
|
123
|
+
if (!/^[a-zA-Z0-9\-_$]+$/.test(id.trim())) {
|
|
124
|
+
lexerError([`[Lexer Error]: Invalid Identifier: '${id.trim()}' at line ${line + 1}, col ${charPos || character}`]);
|
|
213
125
|
}
|
|
126
|
+
}
|
|
214
127
|
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
128
|
+
// ========================================================================== //
|
|
129
|
+
// Main Tokenization Loop //
|
|
130
|
+
// ========================================================================== //
|
|
131
|
+
|
|
132
|
+
for (let i = 0; i < src.length; i++) {
|
|
133
|
+
const char = src[i];
|
|
134
|
+
const next = peek(src, i, 1);
|
|
135
|
+
|
|
136
|
+
// ========================================================================== //
|
|
137
|
+
// Look back at previous tokens to determine current context //
|
|
138
|
+
// ========================================================================== //
|
|
139
|
+
let prev_type = "", prev_prev_type = "", count = 0;
|
|
140
|
+
for (let j = tokens.length - 1; j >= 0; j--) {
|
|
141
|
+
const t = tokens[j];
|
|
142
|
+
if (t.type !== TOKEN_TYPES.TEXT && t.type !== TOKEN_TYPES.COMMENT) {
|
|
143
|
+
if (count === 0) prev_type = t.type;
|
|
144
|
+
else if (count === 1) prev_prev_type = t.type;
|
|
145
|
+
count++; if (count >= 2) break;
|
|
224
146
|
}
|
|
225
147
|
}
|
|
226
148
|
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
previous_value = block_end;
|
|
244
|
-
} else {
|
|
245
|
-
previous_value = current_char;
|
|
246
|
-
}
|
|
247
|
-
}
|
|
248
|
-
// ========================================================================== //
|
|
249
|
-
// Token: Equal Sign //
|
|
250
|
-
// ========================================================================== //
|
|
251
|
-
else if (current_char === "=" && !scope_state) {
|
|
252
|
-
addToken(TOKEN_TYPES.EQUAL, current_char);
|
|
253
|
-
previous_value = current_char;
|
|
254
|
-
}
|
|
255
|
-
// ========================================================================== //
|
|
256
|
-
// Token: Close Bracket //
|
|
257
|
-
// ========================================================================== //
|
|
258
|
-
else if (current_char === "]" && !scope_state) {
|
|
259
|
-
addToken(TOKEN_TYPES.CLOSE_BRACKET, current_char);
|
|
260
|
-
if (previous_value === end_keyword) {
|
|
261
|
-
depth_stack.pop();
|
|
262
|
-
}
|
|
263
|
-
previous_value = current_char;
|
|
264
|
-
}
|
|
265
|
-
// ========================================================================== //
|
|
266
|
-
// Token: Open Parenthesis '(' //
|
|
267
|
-
// ========================================================================== //
|
|
268
|
-
else if (current_char === "(" && !scope_state) {
|
|
269
|
-
addToken(TOKEN_TYPES.OPEN_PAREN, current_char);
|
|
270
|
-
if (previous_value !== "->") {
|
|
271
|
-
previous_value = current_char;
|
|
272
|
-
}
|
|
273
|
-
}
|
|
274
|
-
// ========================================================================== //
|
|
275
|
-
// Token: Thin Arrow '->' //
|
|
276
|
-
// ========================================================================== //
|
|
277
|
-
else if (current_char === "-" && peek(src, i, 1) === ">") {
|
|
278
|
-
temp_str = current_char + peek(src, i, 1);
|
|
279
|
-
i += temp_str.length - 1;
|
|
280
|
-
addToken(TOKEN_TYPES.THIN_ARROW, temp_str);
|
|
281
|
-
previous_value = temp_str;
|
|
282
|
-
}
|
|
283
|
-
// ========================================================================== //
|
|
284
|
-
// Token: Close Parenthesis ')' //
|
|
285
|
-
// ========================================================================== //
|
|
286
|
-
else if (current_char === ")" && !scope_state) {
|
|
287
|
-
addToken(TOKEN_TYPES.CLOSE_PAREN, current_char);
|
|
288
|
-
previous_value = current_char;
|
|
289
|
-
}
|
|
290
|
-
// ========================================================================== //
|
|
291
|
-
// Token: Open At '@_' //
|
|
292
|
-
// ========================================================================== //
|
|
293
|
-
else if (
|
|
294
|
-
current_char === "@" &&
|
|
295
|
-
peek(src, i, 1) === "_" &&
|
|
296
|
-
(!scope_state || isAtBlockEnd(src, i))
|
|
297
|
-
) {
|
|
298
|
-
temp_str = current_char + peek(src, i, 1);
|
|
299
|
-
i += temp_str.length - 1;
|
|
300
|
-
addToken(TOKEN_TYPES.OPEN_AT, temp_str);
|
|
301
|
-
// is next token end keyword?
|
|
302
|
-
if (isAtBlockEnd(src, i - 1)) {
|
|
303
|
-
previous_value = at_end;
|
|
304
|
-
} else {
|
|
305
|
-
previous_value = temp_str;
|
|
306
|
-
}
|
|
307
|
-
}
|
|
308
|
-
// ========================================================================== //
|
|
309
|
-
// Token: Close At '_@' //
|
|
310
|
-
// ========================================================================== //
|
|
311
|
-
else if (current_char === "_" && peek(src, i, 1) === "@") {
|
|
312
|
-
temp_str = current_char + peek(src, i, 1);
|
|
313
|
-
i += temp_str.length - 1;
|
|
314
|
-
addToken(TOKEN_TYPES.CLOSE_AT, temp_str);
|
|
315
|
-
switch (previous_value) {
|
|
316
|
-
case at_id:
|
|
317
|
-
previous_value = temp_str + "+";
|
|
318
|
-
break;
|
|
319
|
-
default:
|
|
320
|
-
previous_value = temp_str;
|
|
321
|
-
break;
|
|
322
|
-
}
|
|
323
|
-
}
|
|
324
|
-
// ========================================================================== //
|
|
325
|
-
// Token: Colon ':' //
|
|
326
|
-
// ========================================================================== //
|
|
327
|
-
else if (
|
|
328
|
-
current_char === ":" &&
|
|
329
|
-
(previous_value === "_@+" ||
|
|
330
|
-
previous_value === BLOCKCOMMA ||
|
|
331
|
-
previous_value === block_id_2 ||
|
|
332
|
-
previous_value === inline_id_2 ||
|
|
333
|
-
previous_value === at_id_2 ||
|
|
334
|
-
previous_value === at_value ||
|
|
335
|
-
previous_value === BLOCKCOLON ||
|
|
336
|
-
previous_value === ATBLOCKCOLON ||
|
|
337
|
-
previous_value === INLINECOLON) &&
|
|
338
|
-
!scope_state
|
|
339
|
-
) {
|
|
340
|
-
addToken(TOKEN_TYPES.COLON, current_char);
|
|
341
|
-
switch (previous_value) {
|
|
342
|
-
case block_id_2:
|
|
343
|
-
previous_value = BLOCKCOLON;
|
|
344
|
-
break;
|
|
345
|
-
case "_@+":
|
|
346
|
-
previous_value = ATBLOCKCOLON;
|
|
347
|
-
break;
|
|
348
|
-
case at_id_2:
|
|
349
|
-
previous_value = ATBLOCKCOLON;
|
|
350
|
-
break;
|
|
351
|
-
case inline_id_2:
|
|
352
|
-
previous_value = INLINECOLON;
|
|
353
|
-
break;
|
|
354
|
-
}
|
|
355
|
-
}
|
|
356
|
-
// ========================================================================== //
|
|
357
|
-
// Token: Comma ',' //
|
|
358
|
-
// ========================================================================== //
|
|
359
|
-
else if (
|
|
360
|
-
current_char === "," &&
|
|
361
|
-
(previous_value === block_value ||
|
|
362
|
-
previous_value === at_value ||
|
|
363
|
-
previous_value === inline_value ||
|
|
364
|
-
previous_value === BLOCKCOMMA ||
|
|
365
|
-
previous_value === ATBLOCKCOMMA ||
|
|
366
|
-
previous_value === INLINECOMMA)
|
|
367
|
-
) {
|
|
368
|
-
addToken(TOKEN_TYPES.COMMA, current_char);
|
|
369
|
-
switch (previous_value) {
|
|
370
|
-
case "=":
|
|
371
|
-
previous_value = BLOCKCOMMA;
|
|
372
|
-
break;
|
|
373
|
-
case block_value:
|
|
374
|
-
previous_value = BLOCKCOMMA;
|
|
375
|
-
break;
|
|
376
|
-
case at_value:
|
|
377
|
-
previous_value = ATBLOCKCOMMA;
|
|
378
|
-
break;
|
|
379
|
-
case inline_value:
|
|
380
|
-
previous_value = INLINECOMMA;
|
|
381
|
-
break;
|
|
382
|
-
}
|
|
383
|
-
}
|
|
384
|
-
// ========================================================================== //
|
|
385
|
-
// Token: Semi-colon ';' //
|
|
386
|
-
// ========================================================================== //
|
|
387
|
-
else if (
|
|
388
|
-
(current_char === ";" && previous_value === at_value) ||
|
|
389
|
-
(current_char === ";" && previous_value === "_@+") || // New: Allow semicolon directly after identifier
|
|
390
|
-
(current_char === ";" && previous_value === ";") ||
|
|
391
|
-
(current_char === ";" && previous_value === ATBLOCKCOMMA)
|
|
392
|
-
) {
|
|
393
|
-
addToken(TOKEN_TYPES.SEMICOLON, current_char);
|
|
394
|
-
scope_state = true;
|
|
395
|
-
previous_value = current_char;
|
|
396
|
-
}
|
|
397
|
-
// ========================================================================== //
|
|
398
|
-
// Token: Escape Character '\' //
|
|
399
|
-
// ========================================================================== //
|
|
400
|
-
else if (current_char === "\\") {
|
|
401
|
-
temp_str = concatEscape(src, i);
|
|
402
|
-
i += temp_str.length - 1;
|
|
403
|
-
temp_str = temp_str.trim();
|
|
404
|
-
if (temp_str && temp_str.length > 0) {
|
|
405
|
-
addToken(TOKEN_TYPES.ESCAPE, temp_str);
|
|
149
|
+
// ========================================================================== //
|
|
150
|
+
// Check for structural characters ([ ], ( ), @_, _@) //
|
|
151
|
+
// ========================================================================== //
|
|
152
|
+
|
|
153
|
+
if (char === "[" && !isInAtBlockBody) {
|
|
154
|
+
let idPeek = ""; for (let j = i + 1; j < src.length && !/[=\]:#]/.test(src[j]); j++) idPeek += src[j];
|
|
155
|
+
if (idPeek.trim() !== end_keyword) depth_stack.push("B");
|
|
156
|
+
addToken(TOKEN_TYPES.OPEN_BRACKET, char); isInHeader = true;
|
|
157
|
+
} else if (char === "]" && isInHeader) {
|
|
158
|
+
addToken(TOKEN_TYPES.CLOSE_BRACKET, char); isInHeader = false;
|
|
159
|
+
// Reliable depth pop on [end]
|
|
160
|
+
for (let j = tokens.length - 1; j >= 0; j--) {
|
|
161
|
+
const t = tokens[j];
|
|
162
|
+
if (t.type === TOKEN_TYPES.IDENTIFIER || t.type === TOKEN_TYPES.END_KEYWORD) {
|
|
163
|
+
if (t.type === TOKEN_TYPES.END_KEYWORD || t.value.trim() === end_keyword) depth_stack.pop();
|
|
164
|
+
break;
|
|
406
165
|
}
|
|
407
166
|
}
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
167
|
+
} else if (char === "(" && !isInAtBlockBody) {
|
|
168
|
+
addToken(TOKEN_TYPES.OPEN_PAREN, char); isInHeader = true;
|
|
169
|
+
} else if (char === ")" && isInHeader) {
|
|
170
|
+
addToken(TOKEN_TYPES.CLOSE_PAREN, char); isInHeader = false;
|
|
171
|
+
} else if (char === "@" && next === "_" && (!isInAtBlockBody || isAtBlockEnd(src, i))) {
|
|
172
|
+
let idPeek = ""; for (let j = i + 2; j < src.length && !/[_@:#]/.test(src[j]); j++) idPeek += src[j];
|
|
173
|
+
if (idPeek.trim() !== end_keyword) depth_stack.push("A");
|
|
174
|
+
addToken(TOKEN_TYPES.OPEN_AT, "@_"); i++; isInHeader = true;
|
|
175
|
+
} else if (char === "_" && next === "@" && (isInHeader || isInAtBlockBody)) {
|
|
176
|
+
addToken(TOKEN_TYPES.CLOSE_AT, "_@"); i++;
|
|
177
|
+
for (let j = tokens.length - 1; j >= 0; j--) {
|
|
178
|
+
const t = tokens[j];
|
|
179
|
+
if (t.type === TOKEN_TYPES.IDENTIFIER || t.type === TOKEN_TYPES.END_KEYWORD) {
|
|
180
|
+
if (t.type === TOKEN_TYPES.END_KEYWORD || t.value.trim() === end_keyword) depth_stack.pop();
|
|
181
|
+
break;
|
|
415
182
|
}
|
|
416
183
|
}
|
|
184
|
+
isInHeader = true; isInAtBlockBody = false;
|
|
185
|
+
} else if (char === ";" && isInHeader) {
|
|
186
|
+
addToken(TOKEN_TYPES.SEMICOLON, char); isInHeader = false; isInAtBlockBody = true;
|
|
187
|
+
} else if (char === "=" && isInHeader && !isInAtBlockBody) {
|
|
188
|
+
addToken(TOKEN_TYPES.EQUAL, char);
|
|
189
|
+
} else if (char === ":" && isInHeader && !isInAtBlockBody && (prev_type === TOKEN_TYPES.IDENTIFIER || prev_type === TOKEN_TYPES.CLOSE_AT)) {
|
|
190
|
+
addToken(TOKEN_TYPES.COLON, char);
|
|
191
|
+
} else if (char === "," && isInHeader) {
|
|
192
|
+
addToken(TOKEN_TYPES.COMMA, char);
|
|
193
|
+
} else if (char === "-" && next === ">" && (isInHeader || prev_type === TOKEN_TYPES.CLOSE_PAREN)) {
|
|
194
|
+
addToken(TOKEN_TYPES.THIN_ARROW, "->"); i++;
|
|
195
|
+
} else if (char === "\"" && isInHeader) {
|
|
196
|
+
const quote = concatQuote(src, i); addToken(TOKEN_TYPES.VALUE, quote); i += quote.length - 1;
|
|
197
|
+
} else if (char === "\\") {
|
|
198
|
+
const esc = concatEscape(src, i); addToken(TOKEN_TYPES.ESCAPE, esc); i += esc.length - 1;
|
|
199
|
+
} else if (char === "#" && !isInAtBlockBody) {
|
|
200
|
+
let comm = ""; for (; i < src.length && src[i] !== "\n"; i++) comm += src[i];
|
|
201
|
+
addToken(TOKEN_TYPES.COMMENT, comm, comm); i--;
|
|
202
|
+
} else if (char === "\n" && !isInAtBlockBody) {
|
|
203
|
+
advance(char);
|
|
204
|
+
} else {
|
|
417
205
|
// ========================================================================== //
|
|
418
|
-
//
|
|
419
|
-
// ========================================================================== //
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
|
|
427
|
-
if (
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
// Update Previous Value
|
|
435
|
-
previous_value = block_id;
|
|
436
|
-
}
|
|
437
|
-
}
|
|
438
|
-
// ========================================================================== //
|
|
439
|
-
// Token: Block Value //
|
|
440
|
-
// ========================================================================== //
|
|
441
|
-
else if (
|
|
442
|
-
(previous_value === "=" ||
|
|
443
|
-
previous_value === BLOCKCOMMA ||
|
|
444
|
-
previous_value === BLOCKCOLON ||
|
|
445
|
-
previous_value === block_value) &&
|
|
446
|
-
!scope_state
|
|
447
|
-
) {
|
|
448
|
-
temp_str = concatChar(src, i, ["]", "\\", ",", ":"]);
|
|
449
|
-
i += temp_str.length - 1;
|
|
450
|
-
const nextToken = peek(src, i, 1);
|
|
451
|
-
if (temp_str.trim()) {
|
|
452
|
-
// Add token
|
|
453
|
-
switch (nextToken) {
|
|
454
|
-
case ":":
|
|
455
|
-
const trimmedKey = temp_str.trim();
|
|
456
|
-
validateIdentifier(trimmedKey, "Argument Key");
|
|
457
|
-
addToken(TOKEN_TYPES.IDENTIFIER, trimmedKey);
|
|
458
|
-
previous_value = block_id_2;
|
|
459
|
-
break;
|
|
460
|
-
default:
|
|
461
|
-
addToken(TOKEN_TYPES.VALUE, temp_str);
|
|
462
|
-
previous_value = block_value;
|
|
463
|
-
break;
|
|
464
|
-
}
|
|
465
|
-
}
|
|
466
|
-
}
|
|
467
|
-
// ========================================================================== //
|
|
468
|
-
// Token: Inline Identifier //
|
|
469
|
-
// ========================================================================== //
|
|
470
|
-
else if (previous_value === "->" && !scope_state) {
|
|
471
|
-
temp_str = concatChar(src, i, ["(", ")", ":"]);
|
|
472
|
-
i += temp_str.length - 1;
|
|
473
|
-
const nextToken = peek(src, i, 1);
|
|
474
|
-
if (temp_str.trim()) {
|
|
475
|
-
// Add Token
|
|
476
|
-
switch (nextToken) {
|
|
477
|
-
case ":":
|
|
478
|
-
const trimmedKey = temp_str.trim();
|
|
479
|
-
validateIdentifier(trimmedKey, "Argument Key");
|
|
480
|
-
addToken(TOKEN_TYPES.IDENTIFIER, trimmedKey);
|
|
481
|
-
previous_value = inline_id_2;
|
|
482
|
-
break;
|
|
483
|
-
default:
|
|
484
|
-
const trimmedId = temp_str.trim();
|
|
485
|
-
validateIdentifier(trimmedId, "Inline Identifier");
|
|
486
|
-
addToken(TOKEN_TYPES.IDENTIFIER, trimmedId);
|
|
487
|
-
previous_value = inline_id;
|
|
488
|
-
break;
|
|
489
|
-
}
|
|
490
|
-
}
|
|
491
|
-
}
|
|
492
|
-
// ========================================================================== //
|
|
493
|
-
// Token: Inline Value //
|
|
494
|
-
// ========================================================================== //
|
|
495
|
-
else if (
|
|
496
|
-
(previous_value === "(" ||
|
|
497
|
-
previous_value === INLINECOLON ||
|
|
498
|
-
previous_value === INLINECOMMA ||
|
|
499
|
-
previous_value === inline_value) &&
|
|
500
|
-
!scope_state
|
|
501
|
-
) {
|
|
502
|
-
temp_str = concatChar(src, i, [")", "\\", ",", previous_value === INLINECOLON ? ":" : null]);
|
|
503
|
-
i += temp_str.length - 1;
|
|
504
|
-
if (temp_str.trim()) {
|
|
505
|
-
// Add Token
|
|
506
|
-
addToken(TOKEN_TYPES.VALUE, temp_str);
|
|
507
|
-
// Update Previous Value
|
|
508
|
-
previous_value = inline_value;
|
|
509
|
-
}
|
|
510
|
-
}
|
|
511
|
-
// ========================================================================== //
|
|
512
|
-
// Token: At Identifier //
|
|
513
|
-
// ========================================================================== //
|
|
514
|
-
else if (previous_value === "@_") {
|
|
515
|
-
temp_str = concatChar(src, i, ["_", ":"]);
|
|
516
|
-
i += temp_str.length - 1;
|
|
517
|
-
if (temp_str.trim()) {
|
|
518
|
-
const trimmedStr = temp_str.trim();
|
|
519
|
-
if (trimmedStr !== end_keyword) {
|
|
520
|
-
validateIdentifier(trimmedStr, "At-Block Identifier");
|
|
206
|
+
// Capture plain text or Identifier values //
|
|
207
|
+
// ========================================================================== //
|
|
208
|
+
const isValueContext = (prev_type === TOKEN_TYPES.COLON || prev_type === TOKEN_TYPES.EQUAL);
|
|
209
|
+
const context = concatText(src, i, isInHeader, isInAtBlockBody, isValueContext);
|
|
210
|
+
if (context.length > 0) {
|
|
211
|
+
if (isInHeader) {
|
|
212
|
+
const trimmed = context.trim();
|
|
213
|
+
if ((prev_type === TOKEN_TYPES.OPEN_BRACKET || prev_type === TOKEN_TYPES.OPEN_AT) && trimmed === end_keyword) {
|
|
214
|
+
addToken(TOKEN_TYPES.END_KEYWORD, trimmed, context);
|
|
215
|
+
} else if (trimmed.length > 0) {
|
|
216
|
+
let isNextColon = false;
|
|
217
|
+
for (let j = i + context.length; j < src.length; j++) {
|
|
218
|
+
const c = src[j];
|
|
219
|
+
if (c === " " || c === "\t" || c === "\n") continue;
|
|
220
|
+
if (c === ":") isNextColon = true;
|
|
221
|
+
break;
|
|
521
222
|
}
|
|
522
|
-
|
|
523
|
-
|
|
524
|
-
|
|
525
|
-
|
|
526
|
-
|
|
527
|
-
|
|
528
|
-
|
|
529
|
-
|
|
530
|
-
|
|
531
|
-
|
|
532
|
-
i += temp_str.length - 1;
|
|
533
|
-
const nextToken = peek(src, i, 1);
|
|
534
|
-
if (temp_str.trim()) {
|
|
535
|
-
switch (nextToken) {
|
|
536
|
-
case ":":
|
|
537
|
-
const trimmedKey = temp_str.trim();
|
|
538
|
-
validateIdentifier(trimmedKey, "Argument Key");
|
|
539
|
-
addToken(TOKEN_TYPES.IDENTIFIER, trimmedKey);
|
|
540
|
-
previous_value = at_id_2;
|
|
541
|
-
break;
|
|
542
|
-
default:
|
|
543
|
-
addToken(TOKEN_TYPES.VALUE, temp_str);
|
|
544
|
-
previous_value = at_value;
|
|
545
|
-
break;
|
|
223
|
+
|
|
224
|
+
const isBlockStart = (prev_type === TOKEN_TYPES.OPEN_BRACKET || prev_type === TOKEN_TYPES.OPEN_AT);
|
|
225
|
+
const isMapperHead = (prev_type === TOKEN_TYPES.OPEN_PAREN && prev_prev_type === TOKEN_TYPES.THIN_ARROW);
|
|
226
|
+
const isMandatoryId = (isNextColon || prev_type === TOKEN_TYPES.THIN_ARROW);
|
|
227
|
+
|
|
228
|
+
if (isBlockStart || isMapperHead || isMandatoryId) {
|
|
229
|
+
validateIdentifier(trimmed, character + context.indexOf(trimmed));
|
|
230
|
+
addToken(TOKEN_TYPES.IDENTIFIER, trimmed, context);
|
|
231
|
+
} else {
|
|
232
|
+
addToken(TOKEN_TYPES.VALUE, trimmed, context);
|
|
546
233
|
}
|
|
234
|
+
} else {
|
|
235
|
+
advance(context);
|
|
547
236
|
}
|
|
237
|
+
} else {
|
|
238
|
+
addToken(TOKEN_TYPES.TEXT, context);
|
|
548
239
|
}
|
|
549
|
-
|
|
550
|
-
|
|
551
|
-
|
|
552
|
-
else if ((previous_value === block_end && !scope_state) || previous_value === at_end) {
|
|
553
|
-
temp_str = concatChar(src, i, ["]", "_"]);
|
|
554
|
-
i += temp_str.length - 1;
|
|
555
|
-
if (temp_str.trim()) {
|
|
556
|
-
addToken(TOKEN_TYPES.END_KEYWORD, temp_str);
|
|
557
|
-
// Update Previous Value
|
|
558
|
-
previous_value = end_keyword;
|
|
559
|
-
scope_state = false;
|
|
560
|
-
}
|
|
561
|
-
}
|
|
562
|
-
// ========================================================================== //
|
|
563
|
-
// Token: Comment //
|
|
564
|
-
// ========================================================================== //
|
|
565
|
-
else if (current_char === "#") {
|
|
566
|
-
temp_str = concatChar(src, i, ["\n"]);
|
|
567
|
-
if (temp_str.trim()) {
|
|
568
|
-
i += temp_str.length - 1;
|
|
569
|
-
addToken(TOKEN_TYPES.COMMENT, temp_str);
|
|
570
|
-
}
|
|
571
|
-
}
|
|
572
|
-
// ========================================================================== //
|
|
573
|
-
// Token: Text //
|
|
574
|
-
// ========================================================================== //
|
|
575
|
-
else {
|
|
576
|
-
if (previous_value === "_@+") {
|
|
577
|
-
// Strictly wait for semicolon or arguments on the same line.
|
|
578
|
-
// No more heuristic lookahead.
|
|
579
|
-
}
|
|
580
|
-
context = concatText(src, i, scope_state, [
|
|
581
|
-
[":", previous_value === inline_id_2],
|
|
582
|
-
[",", previous_value === block_value || previous_value === at_value || previous_value === inline_value],
|
|
583
|
-
[":", (previous_value === "_@+" && !scope_state) || previous_value === at_value],
|
|
584
|
-
[";", previous_value === at_value],
|
|
585
|
-
[")", previous_value === inline_value]
|
|
586
|
-
]);
|
|
587
|
-
i += context.length - 1;
|
|
588
|
-
if (context.trim()) {
|
|
589
|
-
addToken(TOKEN_TYPES.TEXT, context);
|
|
590
|
-
}
|
|
591
|
-
}
|
|
240
|
+
i += context.length - 1;
|
|
241
|
+
} else {
|
|
242
|
+
addToken(TOKEN_TYPES.TEXT, char);
|
|
592
243
|
}
|
|
593
|
-
context = "";
|
|
594
|
-
temp_str = "";
|
|
595
244
|
}
|
|
596
|
-
|
|
597
|
-
// Ensure EOF token
|
|
598
|
-
const eofPos = { line, character };
|
|
599
|
-
tokens.push({
|
|
600
|
-
type: TOKEN_TYPES.EOF,
|
|
601
|
-
value: "",
|
|
602
|
-
range: { start: eofPos, end: eofPos },
|
|
603
|
-
depth: depth_stack.length
|
|
604
|
-
});
|
|
605
|
-
|
|
606
|
-
return tokens;
|
|
607
|
-
} else {
|
|
608
|
-
lexerError([
|
|
609
|
-
`{line}<$red:Invalid SomMark syntax:$> ${src === "" ? "<$yellow: Got empty string '' $>" : `<$yellow:Expected source input to be a string, got$> <$blue: '${typeof src}'$>`}{line}`
|
|
610
|
-
]);
|
|
611
245
|
}
|
|
246
|
+
// ========================================================================== //
|
|
247
|
+
// Finalize with End-of-File token //
|
|
248
|
+
// ========================================================================== //
|
|
249
|
+
addToken(TOKEN_TYPES.EOF, "");
|
|
250
|
+
return tokens;
|
|
612
251
|
}
|
|
613
252
|
|
|
614
253
|
export default lexer;
|