sommark 3.1.0 → 3.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +7 -0
- package/cli/commands/build.js +2 -1
- package/cli/commands/init.js +2 -6
- package/cli/commands/list.js +17 -12
- package/cli/commands/print.js +7 -2
- package/cli/helpers/transpile.js +2 -1
- package/core/errors.js +22 -9
- package/core/labels.js +3 -0
- package/core/lexer.js +207 -590
- package/core/parser.js +201 -65
- package/core/pluginManager.js +33 -23
- package/core/plugins/comment-remover.js +3 -3
- package/core/plugins/module-system.js +163 -124
- package/core/plugins/raw-content-plugin.js +15 -9
- package/core/plugins/rules-validation-plugin.js +2 -2
- package/core/plugins/sommark-format.js +92 -72
- package/core/tokenTypes.js +2 -1
- package/core/transpiler.js +70 -8
- package/coverage_test.js +21 -0
- package/helpers/utils.js +27 -0
- package/index.js +25 -16
- package/mappers/languages/html.js +5 -10
- package/package.json +1 -1
- package/v3-todo.smark +68 -70
- package/core/plugins/quote-escaper.js +0 -37
- package/format.js +0 -23
- package/unformatted.smark +0 -90
package/core/lexer.js
CHANGED
|
@@ -1,636 +1,253 @@
|
|
|
1
1
|
import TOKEN_TYPES from "./tokenTypes.js";
|
|
2
2
|
import peek from "../helpers/peek.js";
|
|
3
|
-
import {
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
at_id_2,
|
|
14
|
-
at_end,
|
|
15
|
-
end_keyword,
|
|
16
|
-
BLOCKCOMMA,
|
|
17
|
-
ATBLOCKCOMMA,
|
|
18
|
-
INLINECOMMA,
|
|
19
|
-
BLOCKCOLON,
|
|
20
|
-
ATBLOCKCOLON,
|
|
21
|
-
INLINECOLON
|
|
22
|
-
} from "./labels.js";
|
|
23
|
-
import { lexerError, sommarkError } from "./errors.js";
|
|
3
|
+
import { end_keyword } from "./labels.js";
|
|
4
|
+
import { lexerError } from "./errors.js";
|
|
5
|
+
|
|
6
|
+
/**
|
|
7
|
+
* SomMark Lexer
|
|
8
|
+
*/
|
|
9
|
+
|
|
10
|
+
// ========================================================================== //
|
|
11
|
+
// Helper Functions //
|
|
12
|
+
// ========================================================================== //
|
|
24
13
|
|
|
25
14
|
const atBlockEndRegex = new RegExp(`^@_\\s*${end_keyword}\\s*_@`);
|
|
15
|
+
|
|
16
|
+
// Checks if we reached the end of an At-Block
|
|
26
17
|
function isAtBlockEnd(input, index) {
|
|
27
18
|
const slice = typeof input === "string" ? input.slice(index, index + 100) : input.slice(index, index + 100).join("");
|
|
28
19
|
return atBlockEndRegex.test(slice);
|
|
29
20
|
}
|
|
30
21
|
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
22
|
+
// Collects characters inside a quoted string
|
|
23
|
+
function concatQuote(input, index) {
|
|
24
|
+
let text = "\"";
|
|
25
|
+
for (let i = index + 1; i < input.length; i++) {
|
|
26
|
+
const char = input[i];
|
|
27
|
+
if (char === "\\" && peek(input, i, 1) === "\"") {
|
|
28
|
+
text += "\\\"";
|
|
29
|
+
i++;
|
|
30
|
+
continue;
|
|
31
|
+
}
|
|
32
|
+
text += char;
|
|
33
|
+
if (char === "\"") return text;
|
|
34
34
|
}
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
const updateColumn = (end = 0, textLength) => {
|
|
39
|
-
const start = end + 1;
|
|
40
|
-
const newEnd = start + textLength - 1;
|
|
41
|
-
return { start, end: newEnd };
|
|
42
|
-
};
|
|
35
|
+
lexerError(["[Lexer Error]: Unclosed quote"]);
|
|
36
|
+
return text;
|
|
37
|
+
}
|
|
43
38
|
|
|
44
|
-
|
|
39
|
+
// Collects plain text until a special character is found
|
|
40
|
+
function concatText(input, index, isInHeader, isInAtBlockBody, isLiberalValue = false) {
|
|
45
41
|
let text = "";
|
|
46
|
-
if (index >= input.length)
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
break;
|
|
70
|
-
} else if (extraConditions.some(([ch, condition]) => (!ch || ch === char) && condition)) {
|
|
42
|
+
if (index >= input.length) return text;
|
|
43
|
+
for (let i = index; i < input.length; i++) {
|
|
44
|
+
const char = input[i];
|
|
45
|
+
const stopConditions = [
|
|
46
|
+
["[", !isInAtBlockBody],
|
|
47
|
+
["(", !isInAtBlockBody],
|
|
48
|
+
["#", !isInAtBlockBody && !isLiberalValue],
|
|
49
|
+
["=", isInHeader && !isInAtBlockBody],
|
|
50
|
+
["\"", isInHeader],
|
|
51
|
+
["]", isInHeader],
|
|
52
|
+
[")", isInHeader],
|
|
53
|
+
["-", peek(input, i, 1) === ">" && (isInHeader || true)],
|
|
54
|
+
["@", peek(input, i, 1) === "_" && (!isInAtBlockBody || isAtBlockEnd(input, i))],
|
|
55
|
+
["_", peek(input, i, 1) === "@" && isInHeader],
|
|
56
|
+
["\\", true],
|
|
57
|
+
[":", isInHeader && !isInAtBlockBody],
|
|
58
|
+
[";", isInHeader],
|
|
59
|
+
[",", isInHeader]
|
|
60
|
+
];
|
|
61
|
+
let shouldStop = false;
|
|
62
|
+
for (const [stopChar, conditionMet] of stopConditions) {
|
|
63
|
+
if (conditionMet && input.substring(i, i + stopChar.length) === stopChar) {
|
|
64
|
+
shouldStop = true;
|
|
71
65
|
break;
|
|
72
66
|
}
|
|
73
|
-
text += char;
|
|
74
67
|
}
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
sommarkError([
|
|
78
|
-
"{line}<$red:Invalid Arguments:$> <$yellow:Assign arguments to their correct types, ",
|
|
79
|
-
"'input' must be an array and have to be not empty, 'index' must be a number value, and 'scope_state' ",
|
|
80
|
-
"must be a boolean.$>{line}."
|
|
81
|
-
]);
|
|
68
|
+
if (shouldStop) break;
|
|
69
|
+
text += char;
|
|
82
70
|
}
|
|
71
|
+
return text;
|
|
83
72
|
}
|
|
84
73
|
|
|
74
|
+
// Handles backslash escapes in the text
|
|
85
75
|
function concatEscape(input, index) {
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
"\n",
|
|
94
|
-
"\r",
|
|
95
|
-
"\v",
|
|
96
|
-
"\f",
|
|
97
|
-
//+++++++//
|
|
98
|
-
"\u00A0",
|
|
99
|
-
"\u1680",
|
|
100
|
-
"\u2000",
|
|
101
|
-
"\u2001",
|
|
102
|
-
"\u2002",
|
|
103
|
-
"\u2003",
|
|
104
|
-
"\u2004",
|
|
105
|
-
"\u2005",
|
|
106
|
-
"\u2006",
|
|
107
|
-
"\u2007",
|
|
108
|
-
"\u2008",
|
|
109
|
-
"\u2009",
|
|
110
|
-
"\u200A",
|
|
111
|
-
"\u202F",
|
|
112
|
-
"\u205F",
|
|
113
|
-
"\u3000"
|
|
114
|
-
];
|
|
115
|
-
let WHITESPACE_SET = new Set(WHITESPACES);
|
|
116
|
-
if ((Array.isArray(input) || typeof input === "string") && input.length > 0 && typeof index === "number") {
|
|
117
|
-
const nextChar = peek(input, index, 1);
|
|
118
|
-
if (input[index] === "\\" && nextChar !== null) {
|
|
119
|
-
str += "\\" + nextChar;
|
|
120
|
-
} else {
|
|
121
|
-
lexerError([
|
|
122
|
-
"{line}<$red:Invalid escape sequence$>{N}",
|
|
123
|
-
"<$yellow:Escape character '\\' must be followed immediately by a character.$>{N}",
|
|
124
|
-
nextChar === null ? "<$yellow:Found end of file after escape character$>" : "<$yellow:Missing character after escape character$>",
|
|
125
|
-
"{line}"
|
|
126
|
-
]);
|
|
127
|
-
}
|
|
128
|
-
if (WHITESPACE_SET.has(str[1])) {
|
|
129
|
-
const matchedCharacter = Array.from(WHITESPACE_SET).find(ch => ch === str[1]);
|
|
130
|
-
lexerError([
|
|
131
|
-
"{line}<$red:Invalid escape sequence$>{N}",
|
|
132
|
-
"<$yellow:Escape character '\\' must be followed immediately by a character.$>{N}",
|
|
133
|
-
`<$yellow:Found$> <$blue:${JSON.stringify(matchedCharacter)}$> <$yellow:after escape character$>{N}`,
|
|
134
|
-
"{line}"
|
|
135
|
-
]);
|
|
136
|
-
}
|
|
137
|
-
return str;
|
|
138
|
-
} else {
|
|
139
|
-
sommarkError([
|
|
140
|
-
"{line}<$red:Invalid Arguments:$> <$yellow:Assign arguments to their correct types, ",
|
|
141
|
-
"'input' must be an array and have to be not empty, and 'index' must be a number value.$>{line}"
|
|
142
|
-
]);
|
|
143
|
-
}
|
|
76
|
+
if (index >= input.length) return "";
|
|
77
|
+
const nextChar = peek(input, index, 1);
|
|
78
|
+
const WHITESPACES = [" ", "\t", "\n", "\r", "\v", "\f"];
|
|
79
|
+
if (WHITESPACES.includes(nextChar)) lexerError(["[Lexer Error]: Invalid escape sequence (escaped whitespace)"]);
|
|
80
|
+
if (input[index] === "\\" && nextChar !== null) return "\\" + nextChar;
|
|
81
|
+
lexerError(["[Lexer Error]: Invalid escape sequence"]);
|
|
82
|
+
return "";
|
|
144
83
|
}
|
|
145
84
|
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
if (index >= input.length) {
|
|
150
|
-
return str;
|
|
151
|
-
}
|
|
152
|
-
if (Array.isArray(stop_at_char) && stop_at_char.length > 0) {
|
|
153
|
-
for (let i = index; i < input.length; i++) {
|
|
154
|
-
const char = input[i];
|
|
155
|
-
if (stop_at_char.includes(char)) {
|
|
156
|
-
break;
|
|
157
|
-
}
|
|
158
|
-
str += char;
|
|
159
|
-
}
|
|
160
|
-
} else {
|
|
161
|
-
sommarkError([
|
|
162
|
-
"{line}<$red:Invalid Type:$> <$yellow:Argument 'stop_at_char' must be an array and have to be not empty array$>{line}"
|
|
163
|
-
]);
|
|
164
|
-
}
|
|
165
|
-
return str;
|
|
166
|
-
} else {
|
|
167
|
-
sommarkError([
|
|
168
|
-
"{line}<$red:Invalid Arguments:$> <$yellow:Assign arguments to their correct types, ",
|
|
169
|
-
"'input' must be an array and have to be not empty, 'index' must be a number value$>{line}"
|
|
170
|
-
]);
|
|
171
|
-
}
|
|
172
|
-
}
|
|
85
|
+
// ========================================================================== //
|
|
86
|
+
// Main Lexer Function //
|
|
87
|
+
// ========================================================================== //
|
|
173
88
|
|
|
174
89
|
function lexer(src) {
|
|
175
|
-
if (src
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
temp_str = "",
|
|
184
|
-
previous_value = "";
|
|
90
|
+
if (!src || typeof src !== "string") return [];
|
|
91
|
+
const tokens = [];
|
|
92
|
+
let isInHeader = false, isInAtBlockBody = false;
|
|
93
|
+
let line = 0, character = 0, depth_stack = [];
|
|
94
|
+
|
|
95
|
+
// ========================================================================== //
|
|
96
|
+
// Token Creation Helpers //
|
|
97
|
+
// ========================================================================== //
|
|
185
98
|
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
99
|
+
function addToken(type, value, rawValue) {
|
|
100
|
+
if (typeof rawValue === "string" && typeof value === "string" && rawValue !== value) {
|
|
101
|
+
const offset = rawValue.indexOf(value);
|
|
102
|
+
if (offset !== -1) {
|
|
103
|
+
advance(rawValue.slice(0, offset));
|
|
104
|
+
const startPos = { line, character }; advance(value);
|
|
105
|
+
const endPos = { line, character };
|
|
106
|
+
tokens.push({ type, value, range: { start: startPos, end: endPos }, depth: depth_stack.length });
|
|
107
|
+
advance(rawValue.slice(offset + value.length));
|
|
108
|
+
return;
|
|
193
109
|
}
|
|
194
110
|
}
|
|
111
|
+
const startPos = { line, character }; advance(rawValue || value);
|
|
112
|
+
const endPos = { line, character };
|
|
113
|
+
tokens.push({ type, value, range: { start: startPos, end: endPos }, depth: depth_stack.length });
|
|
114
|
+
}
|
|
195
115
|
|
|
196
|
-
|
|
197
|
-
|
|
116
|
+
function advance(text) {
|
|
117
|
+
const newlines = (text.match(/\n/g) || []).length;
|
|
118
|
+
if (newlines > 0) { line += newlines; character = text.split("\n").pop().length; }
|
|
119
|
+
else character += text.length;
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
function validateIdentifier(id, charPos) {
|
|
123
|
+
if (!/^[a-zA-Z0-9\-_$]+$/.test(id.trim())) {
|
|
124
|
+
lexerError([`[Lexer Error]: Invalid Identifier: '${id.trim()}' at line ${line + 1}, col ${charPos || character}`]);
|
|
198
125
|
}
|
|
126
|
+
}
|
|
199
127
|
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
const lines = text.split("\n");
|
|
204
|
-
const lastLineLength = lines[lines.length - 1].length;
|
|
205
|
-
start = end + 1;
|
|
206
|
-
end = lastLineLength;
|
|
207
|
-
line += newlines;
|
|
208
|
-
} else {
|
|
209
|
-
const cols = updateColumn(end, text.length);
|
|
210
|
-
start = cols.start;
|
|
211
|
-
end = cols.end;
|
|
212
|
-
}
|
|
213
|
-
};
|
|
128
|
+
// ========================================================================== //
|
|
129
|
+
// Main Tokenization Loop //
|
|
130
|
+
// ========================================================================== //
|
|
214
131
|
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
}
|
|
230
|
-
addToken(TOKEN_TYPES.OPEN_BRACKET, current_char);
|
|
231
|
-
// is next token end keyword?
|
|
232
|
-
if (temp_str.trim() === end_keyword) {
|
|
233
|
-
previous_value = block_end;
|
|
234
|
-
} else {
|
|
235
|
-
previous_value = current_char;
|
|
236
|
-
}
|
|
237
|
-
}
|
|
238
|
-
// ========================================================================== //
|
|
239
|
-
// Token: Equal Sign //
|
|
240
|
-
// ========================================================================== //
|
|
241
|
-
else if (current_char === "=" && !scope_state) {
|
|
242
|
-
// Update Metadata
|
|
243
|
-
updateMetadata(current_char);
|
|
244
|
-
addToken(TOKEN_TYPES.EQUAL, current_char);
|
|
245
|
-
previous_value = current_char;
|
|
246
|
-
}
|
|
247
|
-
// ========================================================================== //
|
|
248
|
-
// Token: Close Bracket //
|
|
249
|
-
// ========================================================================== //
|
|
250
|
-
else if (current_char === "]" && !scope_state) {
|
|
251
|
-
// Update Metadata
|
|
252
|
-
updateMetadata(current_char);
|
|
253
|
-
addToken(TOKEN_TYPES.CLOSE_BRACKET, current_char);
|
|
254
|
-
if (previous_value === end_keyword) {
|
|
255
|
-
depth_stack.pop();
|
|
256
|
-
}
|
|
257
|
-
previous_value = current_char;
|
|
258
|
-
}
|
|
259
|
-
// ========================================================================== //
|
|
260
|
-
// Token: Open Parenthesis '(' //
|
|
261
|
-
// ========================================================================== //
|
|
262
|
-
else if (current_char === "(" && !scope_state) {
|
|
263
|
-
// Update Metadata
|
|
264
|
-
updateMetadata(current_char);
|
|
265
|
-
addToken(TOKEN_TYPES.OPEN_PAREN, current_char);
|
|
266
|
-
if (previous_value !== "->") {
|
|
267
|
-
previous_value = current_char;
|
|
268
|
-
}
|
|
269
|
-
}
|
|
270
|
-
// ========================================================================== //
|
|
271
|
-
// Token: Thin Arrow '->' //
|
|
272
|
-
// ========================================================================== //
|
|
273
|
-
else if (current_char === "-" && peek(src, i, 1) === ">") {
|
|
274
|
-
temp_str = current_char + peek(src, i, 1);
|
|
275
|
-
i += temp_str.length - 1;
|
|
276
|
-
// Update Metadata
|
|
277
|
-
updateMetadata(temp_str);
|
|
278
|
-
addToken(TOKEN_TYPES.THIN_ARROW, temp_str);
|
|
279
|
-
previous_value = temp_str;
|
|
280
|
-
}
|
|
281
|
-
// ========================================================================== //
|
|
282
|
-
// Token: Close Parenthesis ')' //
|
|
283
|
-
// ========================================================================== //
|
|
284
|
-
else if (current_char === ")" && !scope_state) {
|
|
285
|
-
// Update Metadata
|
|
286
|
-
updateMetadata(current_char);
|
|
287
|
-
addToken(TOKEN_TYPES.CLOSE_PAREN, current_char);
|
|
288
|
-
previous_value = current_char;
|
|
289
|
-
}
|
|
290
|
-
// ========================================================================== //
|
|
291
|
-
// Token: Open At '@_' //
|
|
292
|
-
// ========================================================================== //
|
|
293
|
-
else if (
|
|
294
|
-
current_char === "@" &&
|
|
295
|
-
peek(src, i, 1) === "_" &&
|
|
296
|
-
(!scope_state || isAtBlockEnd(src, i))
|
|
297
|
-
) {
|
|
298
|
-
temp_str = current_char + peek(src, i, 1);
|
|
299
|
-
i += temp_str.length - 1;
|
|
300
|
-
// Update Metadata
|
|
301
|
-
updateMetadata(temp_str);
|
|
302
|
-
addToken(TOKEN_TYPES.OPEN_AT, temp_str);
|
|
303
|
-
// is next token end keyword?
|
|
304
|
-
if (isAtBlockEnd(src, i - 1)) {
|
|
305
|
-
previous_value = at_end;
|
|
306
|
-
} else {
|
|
307
|
-
previous_value = temp_str;
|
|
308
|
-
}
|
|
309
|
-
}
|
|
310
|
-
// ========================================================================== //
|
|
311
|
-
// Token: Close At '_@' //
|
|
312
|
-
// ========================================================================== //
|
|
313
|
-
else if (current_char === "_" && peek(src, i, 1) === "@") {
|
|
314
|
-
temp_str = current_char + peek(src, i, 1);
|
|
315
|
-
i += temp_str.length - 1;
|
|
316
|
-
// Update Metadata
|
|
317
|
-
updateMetadata(temp_str);
|
|
318
|
-
addToken(TOKEN_TYPES.CLOSE_AT, temp_str);
|
|
319
|
-
switch (previous_value) {
|
|
320
|
-
case at_id:
|
|
321
|
-
previous_value = temp_str + "+";
|
|
322
|
-
break;
|
|
323
|
-
default:
|
|
324
|
-
previous_value = temp_str;
|
|
325
|
-
break;
|
|
326
|
-
}
|
|
327
|
-
}
|
|
328
|
-
// ========================================================================== //
|
|
329
|
-
// Token: Colon ':' //
|
|
330
|
-
// ========================================================================== //
|
|
331
|
-
else if (
|
|
332
|
-
current_char === ":" &&
|
|
333
|
-
(previous_value === "_@+" ||
|
|
334
|
-
previous_value === BLOCKCOMMA ||
|
|
335
|
-
previous_value === block_id_2 ||
|
|
336
|
-
previous_value === inline_id_2 ||
|
|
337
|
-
previous_value === at_id_2 ||
|
|
338
|
-
previous_value === at_value ||
|
|
339
|
-
previous_value === BLOCKCOLON ||
|
|
340
|
-
previous_value === ATBLOCKCOLON ||
|
|
341
|
-
previous_value === INLINECOLON) &&
|
|
342
|
-
!scope_state
|
|
343
|
-
) {
|
|
344
|
-
// Update Metadata
|
|
345
|
-
updateMetadata(current_char);
|
|
346
|
-
addToken(TOKEN_TYPES.COLON, current_char);
|
|
347
|
-
switch (previous_value) {
|
|
348
|
-
case block_id_2:
|
|
349
|
-
previous_value = BLOCKCOLON;
|
|
350
|
-
break;
|
|
351
|
-
case "_@+":
|
|
352
|
-
previous_value = ATBLOCKCOLON;
|
|
353
|
-
break;
|
|
354
|
-
case at_id_2:
|
|
355
|
-
previous_value = ATBLOCKCOLON;
|
|
356
|
-
break;
|
|
357
|
-
case inline_id_2:
|
|
358
|
-
previous_value = INLINECOLON;
|
|
359
|
-
break;
|
|
360
|
-
}
|
|
361
|
-
}
|
|
362
|
-
// ========================================================================== //
|
|
363
|
-
// Token: Comma ',' //
|
|
364
|
-
// ========================================================================== //
|
|
365
|
-
else if (
|
|
366
|
-
current_char === "," &&
|
|
367
|
-
(previous_value === block_value ||
|
|
368
|
-
previous_value === at_value ||
|
|
369
|
-
previous_value === inline_value ||
|
|
370
|
-
previous_value === BLOCKCOMMA ||
|
|
371
|
-
previous_value === ATBLOCKCOMMA ||
|
|
372
|
-
previous_value === INLINECOMMA)
|
|
373
|
-
) {
|
|
374
|
-
// Update Metadata
|
|
375
|
-
updateMetadata(current_char);
|
|
376
|
-
addToken(TOKEN_TYPES.COMMA, current_char);
|
|
377
|
-
switch (previous_value) {
|
|
378
|
-
case "=":
|
|
379
|
-
previous_value = BLOCKCOMMA;
|
|
380
|
-
break;
|
|
381
|
-
case block_value:
|
|
382
|
-
previous_value = BLOCKCOMMA;
|
|
383
|
-
break;
|
|
384
|
-
case at_value:
|
|
385
|
-
previous_value = ATBLOCKCOMMA;
|
|
386
|
-
break;
|
|
387
|
-
case inline_value:
|
|
388
|
-
previous_value = INLINECOMMA;
|
|
389
|
-
break;
|
|
390
|
-
}
|
|
132
|
+
for (let i = 0; i < src.length; i++) {
|
|
133
|
+
const char = src[i];
|
|
134
|
+
const next = peek(src, i, 1);
|
|
135
|
+
|
|
136
|
+
// ========================================================================== //
|
|
137
|
+
// Look back at previous tokens to determine current context //
|
|
138
|
+
// ========================================================================== //
|
|
139
|
+
let prev_type = "", prev_prev_type = "", count = 0;
|
|
140
|
+
for (let j = tokens.length - 1; j >= 0; j--) {
|
|
141
|
+
const t = tokens[j];
|
|
142
|
+
if (t.type !== TOKEN_TYPES.TEXT && t.type !== TOKEN_TYPES.COMMENT) {
|
|
143
|
+
if (count === 0) prev_type = t.type;
|
|
144
|
+
else if (count === 1) prev_prev_type = t.type;
|
|
145
|
+
count++; if (count >= 2) break;
|
|
391
146
|
}
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
)
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
else if (current_char === "\\") {
|
|
411
|
-
temp_str = concatEscape(src, i);
|
|
412
|
-
i += temp_str.length - 1;
|
|
413
|
-
updateMetadata(temp_str);
|
|
414
|
-
temp_str = temp_str.trim();
|
|
415
|
-
if (temp_str && temp_str.length > 0) {
|
|
416
|
-
// Add Token
|
|
417
|
-
addToken(TOKEN_TYPES.ESCAPE, temp_str);
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
// ========================================================================== //
|
|
150
|
+
// Check for structural characters ([ ], ( ), @_, _@) //
|
|
151
|
+
// ========================================================================== //
|
|
152
|
+
|
|
153
|
+
if (char === "[" && !isInAtBlockBody) {
|
|
154
|
+
let idPeek = ""; for (let j = i + 1; j < src.length && !/[=\]:#]/.test(src[j]); j++) idPeek += src[j];
|
|
155
|
+
if (idPeek.trim() !== end_keyword) depth_stack.push("B");
|
|
156
|
+
addToken(TOKEN_TYPES.OPEN_BRACKET, char); isInHeader = true;
|
|
157
|
+
} else if (char === "]" && isInHeader) {
|
|
158
|
+
addToken(TOKEN_TYPES.CLOSE_BRACKET, char); isInHeader = false;
|
|
159
|
+
// Reliable depth pop on [end]
|
|
160
|
+
for (let j = tokens.length - 1; j >= 0; j--) {
|
|
161
|
+
const t = tokens[j];
|
|
162
|
+
if (t.type === TOKEN_TYPES.IDENTIFIER || t.type === TOKEN_TYPES.END_KEYWORD) {
|
|
163
|
+
if (t.type === TOKEN_TYPES.END_KEYWORD || t.value.trim() === end_keyword) depth_stack.pop();
|
|
164
|
+
break;
|
|
418
165
|
}
|
|
419
166
|
}
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
|
|
167
|
+
} else if (char === "(" && !isInAtBlockBody) {
|
|
168
|
+
addToken(TOKEN_TYPES.OPEN_PAREN, char); isInHeader = true;
|
|
169
|
+
} else if (char === ")" && isInHeader) {
|
|
170
|
+
addToken(TOKEN_TYPES.CLOSE_PAREN, char); isInHeader = false;
|
|
171
|
+
} else if (char === "@" && next === "_" && (!isInAtBlockBody || isAtBlockEnd(src, i))) {
|
|
172
|
+
let idPeek = ""; for (let j = i + 2; j < src.length && !/[_@:#]/.test(src[j]); j++) idPeek += src[j];
|
|
173
|
+
if (idPeek.trim() !== end_keyword) depth_stack.push("A");
|
|
174
|
+
addToken(TOKEN_TYPES.OPEN_AT, "@_"); i++; isInHeader = true;
|
|
175
|
+
} else if (char === "_" && next === "@" && (isInHeader || isInAtBlockBody)) {
|
|
176
|
+
addToken(TOKEN_TYPES.CLOSE_AT, "_@"); i++;
|
|
177
|
+
for (let j = tokens.length - 1; j >= 0; j--) {
|
|
178
|
+
const t = tokens[j];
|
|
179
|
+
if (t.type === TOKEN_TYPES.IDENTIFIER || t.type === TOKEN_TYPES.END_KEYWORD) {
|
|
180
|
+
if (t.type === TOKEN_TYPES.END_KEYWORD || t.value.trim() === end_keyword) depth_stack.pop();
|
|
181
|
+
break;
|
|
429
182
|
}
|
|
430
183
|
}
|
|
184
|
+
isInHeader = true; isInAtBlockBody = false;
|
|
185
|
+
} else if (char === ";" && isInHeader) {
|
|
186
|
+
addToken(TOKEN_TYPES.SEMICOLON, char); isInHeader = false; isInAtBlockBody = true;
|
|
187
|
+
} else if (char === "=" && isInHeader && !isInAtBlockBody) {
|
|
188
|
+
addToken(TOKEN_TYPES.EQUAL, char);
|
|
189
|
+
} else if (char === ":" && isInHeader && !isInAtBlockBody && (prev_type === TOKEN_TYPES.IDENTIFIER || prev_type === TOKEN_TYPES.CLOSE_AT)) {
|
|
190
|
+
addToken(TOKEN_TYPES.COLON, char);
|
|
191
|
+
} else if (char === "," && isInHeader) {
|
|
192
|
+
addToken(TOKEN_TYPES.COMMA, char);
|
|
193
|
+
} else if (char === "-" && next === ">" && (isInHeader || prev_type === TOKEN_TYPES.CLOSE_PAREN)) {
|
|
194
|
+
addToken(TOKEN_TYPES.THIN_ARROW, "->"); i++;
|
|
195
|
+
} else if (char === "\"" && isInHeader) {
|
|
196
|
+
const quote = concatQuote(src, i); addToken(TOKEN_TYPES.VALUE, quote); i += quote.length - 1;
|
|
197
|
+
} else if (char === "\\") {
|
|
198
|
+
const esc = concatEscape(src, i); addToken(TOKEN_TYPES.ESCAPE, esc); i += esc.length - 1;
|
|
199
|
+
} else if (char === "#" && !isInAtBlockBody) {
|
|
200
|
+
let comm = ""; for (; i < src.length && src[i] !== "\n"; i++) comm += src[i];
|
|
201
|
+
addToken(TOKEN_TYPES.COMMENT, comm, comm); i--;
|
|
202
|
+
} else if (char === "\n" && !isInAtBlockBody) {
|
|
203
|
+
advance(char);
|
|
204
|
+
} else {
|
|
431
205
|
// ========================================================================== //
|
|
432
|
-
//
|
|
433
|
-
// ========================================================================== //
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
// Add Token
|
|
449
|
-
addToken(TOKEN_TYPES.IDENTIFIER, trimmedStr);
|
|
450
|
-
// Update Previous Value
|
|
451
|
-
previous_value = block_id;
|
|
452
|
-
}
|
|
453
|
-
}
|
|
454
|
-
// ========================================================================== //
|
|
455
|
-
// Token: Block Value //
|
|
456
|
-
// ========================================================================== //
|
|
457
|
-
else if (
|
|
458
|
-
(previous_value === "=" ||
|
|
459
|
-
previous_value === BLOCKCOMMA ||
|
|
460
|
-
previous_value === BLOCKCOLON ||
|
|
461
|
-
previous_value === block_value) &&
|
|
462
|
-
!scope_state
|
|
463
|
-
) {
|
|
464
|
-
temp_str = concatChar(src, i, ["]", "\\", ",", ":"]);
|
|
465
|
-
i += temp_str.length - 1;
|
|
466
|
-
const nextToken = peek(src, i, 1);
|
|
467
|
-
// Update Metadata
|
|
468
|
-
updateMetadata(temp_str);
|
|
469
|
-
if (temp_str.trim()) {
|
|
470
|
-
// Add token
|
|
471
|
-
switch (nextToken) {
|
|
472
|
-
case ":":
|
|
473
|
-
const trimmedKey = temp_str.trim();
|
|
474
|
-
validateIdentifier(trimmedKey, "Argument Key");
|
|
475
|
-
addToken(TOKEN_TYPES.IDENTIFIER, trimmedKey);
|
|
476
|
-
previous_value = block_id_2;
|
|
477
|
-
break;
|
|
478
|
-
default:
|
|
479
|
-
addToken(TOKEN_TYPES.VALUE, temp_str);
|
|
480
|
-
previous_value = block_value;
|
|
481
|
-
break;
|
|
482
|
-
}
|
|
483
|
-
}
|
|
484
|
-
}
|
|
485
|
-
// ========================================================================== //
|
|
486
|
-
// Token: Inline Identifier //
|
|
487
|
-
// ========================================================================== //
|
|
488
|
-
else if (previous_value === "->" && !scope_state) {
|
|
489
|
-
temp_str = concatChar(src, i, ["(", ")", ":"]);
|
|
490
|
-
i += temp_str.length - 1;
|
|
491
|
-
const nextToken = peek(src, i, 1);
|
|
492
|
-
// Update Metadata
|
|
493
|
-
updateMetadata(temp_str);
|
|
494
|
-
if (temp_str.trim()) {
|
|
495
|
-
// Add Token
|
|
496
|
-
switch (nextToken) {
|
|
497
|
-
case ":":
|
|
498
|
-
const trimmedKey = temp_str.trim();
|
|
499
|
-
validateIdentifier(trimmedKey, "Argument Key");
|
|
500
|
-
addToken(TOKEN_TYPES.IDENTIFIER, trimmedKey);
|
|
501
|
-
previous_value = inline_id_2;
|
|
502
|
-
break;
|
|
503
|
-
default:
|
|
504
|
-
const trimmedId = temp_str.trim();
|
|
505
|
-
validateIdentifier(trimmedId, "Inline Identifier");
|
|
506
|
-
addToken(TOKEN_TYPES.IDENTIFIER, trimmedId);
|
|
507
|
-
previous_value = inline_id;
|
|
508
|
-
break;
|
|
509
|
-
}
|
|
510
|
-
}
|
|
511
|
-
}
|
|
512
|
-
// ========================================================================== //
|
|
513
|
-
// Token: Inline Value //
|
|
514
|
-
// ========================================================================== //
|
|
515
|
-
else if (
|
|
516
|
-
(previous_value === "(" ||
|
|
517
|
-
previous_value === INLINECOLON ||
|
|
518
|
-
previous_value === INLINECOMMA ||
|
|
519
|
-
previous_value === inline_value) &&
|
|
520
|
-
!scope_state
|
|
521
|
-
) {
|
|
522
|
-
temp_str = concatChar(src, i, [")", "\\", ",", previous_value === INLINECOLON ? ":" : null]);
|
|
523
|
-
i += temp_str.length - 1;
|
|
524
|
-
// Update Metadata
|
|
525
|
-
updateMetadata(temp_str);
|
|
526
|
-
if (temp_str.trim()) {
|
|
527
|
-
// Add Token
|
|
528
|
-
addToken(TOKEN_TYPES.VALUE, temp_str);
|
|
529
|
-
// Update Previous Value
|
|
530
|
-
previous_value = inline_value;
|
|
531
|
-
}
|
|
532
|
-
}
|
|
533
|
-
// ========================================================================== //
|
|
534
|
-
// Token: At Identifier //
|
|
535
|
-
// ========================================================================== //
|
|
536
|
-
else if (previous_value === "@_") {
|
|
537
|
-
temp_str = concatChar(src, i, ["_", ":"]);
|
|
538
|
-
i += temp_str.length - 1;
|
|
539
|
-
// Update Metadata
|
|
540
|
-
updateMetadata(temp_str);
|
|
541
|
-
if (temp_str.trim()) {
|
|
542
|
-
const trimmedStr = temp_str.trim();
|
|
543
|
-
if (trimmedStr !== end_keyword) {
|
|
544
|
-
validateIdentifier(trimmedStr, "At-Block Identifier");
|
|
206
|
+
// Capture plain text or Identifier values //
|
|
207
|
+
// ========================================================================== //
|
|
208
|
+
const isValueContext = (prev_type === TOKEN_TYPES.COLON || prev_type === TOKEN_TYPES.EQUAL);
|
|
209
|
+
const context = concatText(src, i, isInHeader, isInAtBlockBody, isValueContext);
|
|
210
|
+
if (context.length > 0) {
|
|
211
|
+
if (isInHeader) {
|
|
212
|
+
const trimmed = context.trim();
|
|
213
|
+
if ((prev_type === TOKEN_TYPES.OPEN_BRACKET || prev_type === TOKEN_TYPES.OPEN_AT) && trimmed === end_keyword) {
|
|
214
|
+
addToken(TOKEN_TYPES.END_KEYWORD, trimmed, context);
|
|
215
|
+
} else if (trimmed.length > 0) {
|
|
216
|
+
let isNextColon = false;
|
|
217
|
+
for (let j = i + context.length; j < src.length; j++) {
|
|
218
|
+
const c = src[j];
|
|
219
|
+
if (c === " " || c === "\t" || c === "\n") continue;
|
|
220
|
+
if (c === ":") isNextColon = true;
|
|
221
|
+
break;
|
|
545
222
|
}
|
|
546
|
-
|
|
547
|
-
|
|
548
|
-
|
|
549
|
-
|
|
550
|
-
|
|
551
|
-
|
|
552
|
-
|
|
553
|
-
|
|
554
|
-
|
|
555
|
-
|
|
556
|
-
i += temp_str.length - 1;
|
|
557
|
-
const nextToken = peek(src, i, 1);
|
|
558
|
-
// Update Metadata
|
|
559
|
-
updateMetadata(temp_str);
|
|
560
|
-
if (temp_str.trim()) {
|
|
561
|
-
switch (nextToken) {
|
|
562
|
-
case ":":
|
|
563
|
-
const trimmedKey = temp_str.trim();
|
|
564
|
-
validateIdentifier(trimmedKey, "Argument Key");
|
|
565
|
-
addToken(TOKEN_TYPES.IDENTIFIER, trimmedKey);
|
|
566
|
-
previous_value = at_id_2;
|
|
567
|
-
break;
|
|
568
|
-
default:
|
|
569
|
-
addToken(TOKEN_TYPES.VALUE, temp_str);
|
|
570
|
-
previous_value = at_value;
|
|
571
|
-
break;
|
|
223
|
+
|
|
224
|
+
const isBlockStart = (prev_type === TOKEN_TYPES.OPEN_BRACKET || prev_type === TOKEN_TYPES.OPEN_AT);
|
|
225
|
+
const isMapperHead = (prev_type === TOKEN_TYPES.OPEN_PAREN && prev_prev_type === TOKEN_TYPES.THIN_ARROW);
|
|
226
|
+
const isMandatoryId = (isNextColon || prev_type === TOKEN_TYPES.THIN_ARROW);
|
|
227
|
+
|
|
228
|
+
if (isBlockStart || isMapperHead || isMandatoryId) {
|
|
229
|
+
validateIdentifier(trimmed, character + context.indexOf(trimmed));
|
|
230
|
+
addToken(TOKEN_TYPES.IDENTIFIER, trimmed, context);
|
|
231
|
+
} else {
|
|
232
|
+
addToken(TOKEN_TYPES.VALUE, trimmed, context);
|
|
572
233
|
}
|
|
234
|
+
} else {
|
|
235
|
+
advance(context);
|
|
573
236
|
}
|
|
237
|
+
} else {
|
|
238
|
+
addToken(TOKEN_TYPES.TEXT, context);
|
|
574
239
|
}
|
|
575
|
-
|
|
576
|
-
|
|
577
|
-
|
|
578
|
-
else if ((previous_value === block_end && !scope_state) || previous_value === at_end) {
|
|
579
|
-
temp_str = concatChar(src, i, ["]", "_"]);
|
|
580
|
-
i += temp_str.length - 1;
|
|
581
|
-
// Update Metadata
|
|
582
|
-
updateMetadata(temp_str);
|
|
583
|
-
if (temp_str.trim()) {
|
|
584
|
-
addToken(TOKEN_TYPES.END_KEYWORD, temp_str);
|
|
585
|
-
// Update Previous Value
|
|
586
|
-
previous_value = end_keyword;
|
|
587
|
-
scope_state = false;
|
|
588
|
-
}
|
|
589
|
-
}
|
|
590
|
-
// ========================================================================== //
|
|
591
|
-
// Token: Comment //
|
|
592
|
-
// ========================================================================== //
|
|
593
|
-
else if (current_char === "#") {
|
|
594
|
-
temp_str = concatChar(src, i, ["\n"]);
|
|
595
|
-
// Update Metadata
|
|
596
|
-
updateMetadata(temp_str);
|
|
597
|
-
if (temp_str.trim()) {
|
|
598
|
-
i += temp_str.length - 1;
|
|
599
|
-
addToken(TOKEN_TYPES.COMMENT, temp_str);
|
|
600
|
-
}
|
|
601
|
-
}
|
|
602
|
-
// ========================================================================== //
|
|
603
|
-
// Token: Text //
|
|
604
|
-
// ========================================================================== //
|
|
605
|
-
else {
|
|
606
|
-
if (previous_value === "_@+") {
|
|
607
|
-
// Strictly wait for semicolon or arguments on the same line.
|
|
608
|
-
// No more heuristic lookahead.
|
|
609
|
-
}
|
|
610
|
-
context = concatText(src, i, scope_state, [
|
|
611
|
-
[":", previous_value === inline_id_2],
|
|
612
|
-
[",", previous_value === block_value || previous_value === at_value || previous_value === inline_value],
|
|
613
|
-
[":", (previous_value === "_@+" && !scope_state) || previous_value === at_value],
|
|
614
|
-
[";", previous_value === at_value],
|
|
615
|
-
[")", previous_value === inline_value]
|
|
616
|
-
]);
|
|
617
|
-
i += context.length - 1;
|
|
618
|
-
// Update Metadata
|
|
619
|
-
updateMetadata(context);
|
|
620
|
-
if (context.trim()) {
|
|
621
|
-
addToken(TOKEN_TYPES.TEXT, context);
|
|
622
|
-
}
|
|
623
|
-
}
|
|
240
|
+
i += context.length - 1;
|
|
241
|
+
} else {
|
|
242
|
+
addToken(TOKEN_TYPES.TEXT, char);
|
|
624
243
|
}
|
|
625
|
-
context = "";
|
|
626
|
-
temp_str = "";
|
|
627
244
|
}
|
|
628
|
-
return tokens;
|
|
629
|
-
} else {
|
|
630
|
-
lexerError([
|
|
631
|
-
`{line}<$red:Invalid SomMark syntax:$> ${src === "" ? "<$yellow: Got empty string '' $>" : `<$yellow:Expected source input to be a string, got$> <$blue: '${typeof src}'$>`}{line}`
|
|
632
|
-
]);
|
|
633
245
|
}
|
|
246
|
+
// ========================================================================== //
|
|
247
|
+
// Finalize with End-of-File token //
|
|
248
|
+
// ========================================================================== //
|
|
249
|
+
addToken(TOKEN_TYPES.EOF, "");
|
|
250
|
+
return tokens;
|
|
634
251
|
}
|
|
635
252
|
|
|
636
253
|
export default lexer;
|