sommark 3.3.3 → 4.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +98 -82
- package/assets/logo.json +28 -0
- package/assets/smark.logo.png +0 -0
- package/assets/smark.logo.svg +21 -0
- package/cli/cli.mjs +8 -16
- package/cli/commands/build.js +24 -4
- package/cli/commands/color.js +22 -26
- package/cli/commands/help.js +10 -10
- package/cli/commands/init.js +19 -42
- package/cli/commands/print.js +20 -12
- package/cli/commands/show.js +4 -0
- package/cli/commands/version.js +6 -0
- package/cli/constants.js +9 -5
- package/cli/helpers/config.js +11 -0
- package/cli/helpers/file.js +17 -6
- package/cli/helpers/transpile.js +7 -8
- package/core/errors.js +49 -25
- package/core/formats.js +7 -3
- package/core/formatter.js +215 -0
- package/core/helpers/config-loader.js +37 -56
- package/core/labels.js +21 -9
- package/core/lexer.js +491 -212
- package/core/modules.js +164 -0
- package/core/parser.js +516 -389
- package/core/tokenTypes.js +36 -1
- package/core/transpiler.js +237 -151
- package/core/validator.js +79 -0
- package/formatter/mark.js +203 -43
- package/formatter/tag.js +202 -32
- package/grammar.ebnf +57 -50
- package/helpers/colorize.js +26 -13
- package/helpers/escapeHTML.js +13 -6
- package/helpers/kebabize.js +6 -0
- package/helpers/peek.js +9 -0
- package/helpers/removeChar.js +26 -13
- package/helpers/safeDataParser.js +114 -0
- package/helpers/utils.js +140 -158
- package/index.js +198 -188
- package/mappers/languages/html.js +105 -213
- package/mappers/languages/json.js +122 -171
- package/mappers/languages/markdown.js +355 -108
- package/mappers/languages/mdx.js +76 -114
- package/mappers/languages/xml.js +114 -0
- package/mappers/mapper.js +152 -123
- package/mappers/shared/index.js +22 -0
- package/package.json +26 -6
- package/SOMMARK-SPEC.md +0 -481
- package/cli/commands/list.js +0 -124
- package/constants/html_tags.js +0 -146
- package/core/pluginManager.js +0 -149
- package/core/plugins/comment-remover.js +0 -47
- package/core/plugins/module-system.js +0 -176
- package/core/plugins/raw-content-plugin.js +0 -78
- package/core/plugins/rules-validation-plugin.js +0 -231
- package/core/plugins/sommark-format.js +0 -244
- package/coverage_test.js +0 -21
- package/debug.js +0 -15
- package/helpers/camelize.js +0 -2
- package/helpers/defaultTheme.js +0 -3
- package/test_format_fix.js +0 -42
- package/v3-todo.smark +0 -73
package/core/lexer.js
CHANGED
|
@@ -5,247 +5,526 @@ import { lexerError } from "./errors.js";
|
|
|
5
5
|
|
|
6
6
|
/**
|
|
7
7
|
* SomMark Lexer
|
|
8
|
+
*
|
|
9
|
+
* Transforms a raw SomMark source string into a stream of tokens.
|
|
10
|
+
* It uses a state-machine approach to handle complex contexts like At-Block bodies,
|
|
11
|
+
* quoted values, and hierarchical headers.
|
|
12
|
+
*
|
|
13
|
+
* @param {string} src - The raw SomMark source code.
|
|
14
|
+
* @param {string} [filename="anonymous"] - Source filename for error reporting.
|
|
15
|
+
* @returns {Array<Object>} Array of token objects.
|
|
8
16
|
*/
|
|
17
|
+
function lexer(src, filename = "anonymous") {
|
|
18
|
+
if (!src || typeof src !== "string") return [];
|
|
19
|
+
const tokens = [];
|
|
9
20
|
|
|
10
|
-
|
|
11
|
-
//
|
|
12
|
-
|
|
21
|
+
let prev_type = "";
|
|
22
|
+
let last_non_junk_type = ""; // Tracks the last real token for context guessing
|
|
23
|
+
let i = 0;
|
|
24
|
+
let line = 0, character = 0;
|
|
13
25
|
|
|
14
|
-
|
|
26
|
+
// State Variables
|
|
27
|
+
let isInAtBlockBody = false;
|
|
28
|
+
let isInQuote = false;
|
|
29
|
+
let isInHeader = false; // Tracks if we are in a structural header context
|
|
30
|
+
let isInInlineHead = false; // Specific for (key:val) after ->
|
|
31
|
+
let parenDepth = 0; // To track balanced parentheses in inlines
|
|
32
|
+
let delimiterStack = []; // To track block nesting for body mode
|
|
15
33
|
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
}
|
|
34
|
+
/**
|
|
35
|
+
* Adds a token to the stream and updates the scanner's position tracking.
|
|
36
|
+
*
|
|
37
|
+
* @param {string} type - The type of token (from TOKEN_TYPES).
|
|
38
|
+
* @param {string} value - The literal text content of the token.
|
|
39
|
+
*/
|
|
40
|
+
function addToken(type, value) {
|
|
41
|
+
const start = { line, character };
|
|
21
42
|
|
|
22
|
-
//
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
43
|
+
// Update position
|
|
44
|
+
const parts = value.split("\n");
|
|
45
|
+
if (parts.length > 1) {
|
|
46
|
+
line += parts.length - 1;
|
|
47
|
+
character = parts[parts.length - 1].length;
|
|
48
|
+
} else {
|
|
49
|
+
character += value.length;
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
const end = { line, character };
|
|
53
|
+
tokens.push({
|
|
54
|
+
type,
|
|
55
|
+
value,
|
|
56
|
+
source: filename,
|
|
57
|
+
range: { start, end },
|
|
58
|
+
depth: delimiterStack.length
|
|
59
|
+
});
|
|
60
|
+
|
|
61
|
+
prev_type = type;
|
|
62
|
+
if (type !== TOKEN_TYPES.WHITESPACE && type !== TOKEN_TYPES.COMMENT) {
|
|
63
|
+
if (type !== TOKEN_TYPES.TEXT || value.trim() !== "") {
|
|
64
|
+
last_non_junk_type = type;
|
|
65
|
+
}
|
|
31
66
|
}
|
|
32
|
-
text += char;
|
|
33
|
-
if (char === "\"") return text;
|
|
34
67
|
}
|
|
35
|
-
lexerError(["[Lexer Error]: Unclosed quote"]);
|
|
36
|
-
return text;
|
|
37
|
-
}
|
|
38
68
|
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
}
|
|
67
|
-
}
|
|
68
|
-
if (shouldStop) break;
|
|
69
|
-
text += char;
|
|
69
|
+
/**
|
|
70
|
+
* Looks ahead to find the next structural character, skipping whitespace and comments.
|
|
71
|
+
* Used for context-guessing (e.g., distinguishing KEY from VALUE).
|
|
72
|
+
*
|
|
73
|
+
* @param {number} start - Index to start peeking from.
|
|
74
|
+
* @returns {string|null} The next structural character or null if EOF.
|
|
75
|
+
*/
|
|
76
|
+
function peekStructural(start) {
|
|
77
|
+
let j = start;
|
|
78
|
+
while (j < src.length) {
|
|
79
|
+
const c = src[j];
|
|
80
|
+
if (c === " " || c === "\t" || c === "\n" || c === "\r") {
|
|
81
|
+
j++;
|
|
82
|
+
continue;
|
|
83
|
+
}
|
|
84
|
+
if (c === "#") {
|
|
85
|
+
while (j < src.length && src[j] !== "\n") j++;
|
|
86
|
+
continue;
|
|
87
|
+
}
|
|
88
|
+
if (c === "\\") {
|
|
89
|
+
// Escape sequence: jump over the backslash and the escaped char
|
|
90
|
+
j += 2;
|
|
91
|
+
continue;
|
|
92
|
+
}
|
|
93
|
+
return c;
|
|
94
|
+
}
|
|
95
|
+
return null;
|
|
70
96
|
}
|
|
71
|
-
return text;
|
|
72
|
-
}
|
|
73
97
|
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
98
|
+
while (i < src.length) {
|
|
99
|
+
// --- PHASE 1: AT-BLOCK BODY MODE ---
|
|
100
|
+
// In this mode, we consume everything as raw text until we hit the @_ marker.
|
|
101
|
+
if (isInAtBlockBody) {
|
|
102
|
+
if (src[i] === "@" && src[i + 1] === "_") {
|
|
103
|
+
isInAtBlockBody = false;
|
|
104
|
+
} else {
|
|
105
|
+
let body = "";
|
|
106
|
+
while (i < src.length) {
|
|
107
|
+
// Handle escapes in At-Block Body
|
|
108
|
+
if (src[i] === "\\" && i + 1 < src.length) {
|
|
109
|
+
body += src[i + 1];
|
|
110
|
+
i += 2;
|
|
111
|
+
continue;
|
|
112
|
+
}
|
|
113
|
+
// Stop at end marker
|
|
114
|
+
if (src[i] === "@" && src[i + 1] === "_") {
|
|
115
|
+
break;
|
|
116
|
+
}
|
|
117
|
+
body += src[i];
|
|
118
|
+
i++;
|
|
119
|
+
}
|
|
120
|
+
if (body.length > 0) {
|
|
121
|
+
addToken(TOKEN_TYPES.TEXT, body);
|
|
122
|
+
}
|
|
123
|
+
continue;
|
|
124
|
+
}
|
|
125
|
+
}
|
|
126
|
+
const char = src[i];
|
|
127
|
+
const next = src[i + 1];
|
|
84
128
|
|
|
85
|
-
//
|
|
86
|
-
//
|
|
87
|
-
|
|
129
|
+
// --- PHASE 2: QUOTE MODE ---
|
|
130
|
+
// Handles balanced strings and allows prefix layers (js{}, p{}) inside them.
|
|
131
|
+
if (isInQuote) {
|
|
132
|
+
let quoteValue = "";
|
|
133
|
+
const quoteChar = tokens[tokens.length - 1].value;
|
|
134
|
+
while (i < src.length) {
|
|
135
|
+
if (src[i] === "\\" && i + 1 < src.length) {
|
|
136
|
+
// Inside quotes, we split escapes if we want to match reliability tests
|
|
137
|
+
if (quoteValue.length > 0) addToken(TOKEN_TYPES.VALUE, quoteValue);
|
|
138
|
+
addToken(TOKEN_TYPES.ESCAPE, "\\" + src[i + 1]);
|
|
139
|
+
quoteValue = "";
|
|
140
|
+
i += 2;
|
|
141
|
+
continue;
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
// Support Prefix Layers inside quotes!
|
|
145
|
+
if ((src[i] === "j" && src[i+1] === "s" && src[i+2] === "{") || (src[i] === "p" && src[i+1] === "{")) {
|
|
146
|
+
const isJS = (src[i] === "j");
|
|
147
|
+
if (quoteValue.length > 0) {
|
|
148
|
+
addToken(TOKEN_TYPES.VALUE, quoteValue);
|
|
149
|
+
quoteValue = "";
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
let braceDepth = 1;
|
|
153
|
+
let prefixValue = isJS ? "js{" : "p{";
|
|
154
|
+
i += isJS ? 3 : 2;
|
|
88
155
|
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
const startPos = { line, character }; advance(rawValue || value);
|
|
112
|
-
const endPos = { line, character };
|
|
113
|
-
tokens.push({ type, value, range: { start: startPos, end: endPos }, depth: depth_stack.length });
|
|
114
|
-
}
|
|
156
|
+
let internalString = null;
|
|
157
|
+
while (i < src.length && braceDepth > 0) {
|
|
158
|
+
const c = src[i];
|
|
159
|
+
const n = src[i + 1];
|
|
160
|
+
if (internalString) {
|
|
161
|
+
if (c === "\\" && (n === internalString || n === "\\")) {
|
|
162
|
+
prefixValue += c + n;
|
|
163
|
+
i += 2;
|
|
164
|
+
continue;
|
|
165
|
+
}
|
|
166
|
+
if (c === internalString) internalString = null;
|
|
167
|
+
} else {
|
|
168
|
+
if (c === "\"" || c === "'") internalString = c;
|
|
169
|
+
else if (c === "{") braceDepth++;
|
|
170
|
+
else if (c === "}") braceDepth--;
|
|
171
|
+
}
|
|
172
|
+
prefixValue += c;
|
|
173
|
+
i++;
|
|
174
|
+
}
|
|
175
|
+
addToken(isJS ? TOKEN_TYPES.PREFIX_JS : TOKEN_TYPES.PREFIX_P, prefixValue);
|
|
176
|
+
continue;
|
|
177
|
+
}
|
|
115
178
|
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
179
|
+
if (src[i] === quoteChar) {
|
|
180
|
+
// Guess role based on next structural character
|
|
181
|
+
let nextStructural = peekStructural(i + 1);
|
|
182
|
+
let tokenType = (isInHeader || isInInlineHead) && (nextStructural === ":" || nextStructural === "=")
|
|
183
|
+
? TOKEN_TYPES.KEY
|
|
184
|
+
: TOKEN_TYPES.VALUE;
|
|
121
185
|
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
186
|
+
if (quoteValue.length > 0) addToken(tokenType, quoteValue);
|
|
187
|
+
addToken(TOKEN_TYPES.QUOTE, quoteChar);
|
|
188
|
+
isInQuote = false;
|
|
189
|
+
i++;
|
|
190
|
+
break;
|
|
191
|
+
}
|
|
192
|
+
quoteValue += src[i];
|
|
193
|
+
i++;
|
|
194
|
+
}
|
|
195
|
+
if (!isInQuote) continue;
|
|
125
196
|
}
|
|
126
|
-
}
|
|
127
197
|
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
// ========================================================================== //
|
|
131
|
-
|
|
132
|
-
for (let i = 0; i < src.length; i++) {
|
|
133
|
-
const char = src[i];
|
|
134
|
-
const next = peek(src, i, 1);
|
|
198
|
+
// --- PHASE 3: STRUCTURAL PARSING ---
|
|
199
|
+
// Handles markers, whitespace, and structural symbols.
|
|
135
200
|
|
|
136
|
-
//
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
201
|
+
// WHITESPACE
|
|
202
|
+
if (char === "\n") {
|
|
203
|
+
addToken(TOKEN_TYPES.WHITESPACE, char);
|
|
204
|
+
i++;
|
|
205
|
+
continue;
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
if (char === " " || char === "\t" || char === "\r") {
|
|
209
|
+
let ws = "";
|
|
210
|
+
while (i < src.length && (src[i] === " " || src[i] === "\t" || src[i] === "\r")) {
|
|
211
|
+
ws += src[i];
|
|
212
|
+
i++;
|
|
213
|
+
}
|
|
214
|
+
addToken(TOKEN_TYPES.WHITESPACE, ws);
|
|
215
|
+
continue;
|
|
216
|
+
}
|
|
217
|
+
|
|
218
|
+
// COMMENTS
|
|
219
|
+
if (char === "#") {
|
|
220
|
+
let comm = "";
|
|
221
|
+
while (i < src.length && src[i] !== "\n") {
|
|
222
|
+
comm += src[i];
|
|
223
|
+
i++;
|
|
224
|
+
}
|
|
225
|
+
addToken(TOKEN_TYPES.COMMENT, comm);
|
|
226
|
+
continue;
|
|
227
|
+
}
|
|
228
|
+
|
|
229
|
+
// ESCAPE CHARACTER (Sequence-based)
|
|
230
|
+
if (char === "\\") {
|
|
231
|
+
const seq = i + 1 < src.length ? "\\" + src[i + 1] : "\\";
|
|
232
|
+
addToken(TOKEN_TYPES.ESCAPE, seq);
|
|
233
|
+
i += seq.length;
|
|
234
|
+
continue;
|
|
235
|
+
}
|
|
236
|
+
|
|
237
|
+
// PREFIX LAYERS (js{...} or p{...})
|
|
238
|
+
if ((char === "j" && next === "s" && src[i+2] === "{") || (char === "p" && next === "{")) {
|
|
239
|
+
const isJS = (char === "j");
|
|
240
|
+
const isP = (char === "p");
|
|
241
|
+
|
|
242
|
+
// Context Check
|
|
243
|
+
const top = (delimiterStack.length > 0) ? delimiterStack[delimiterStack.length - 1] : null;
|
|
244
|
+
const isInBlockHeader = isInHeader && top === "[";
|
|
245
|
+
const isInNormalText = !isInHeader && !isInInlineHead && !isInAtBlockBody && parenDepth === 0;
|
|
246
|
+
|
|
247
|
+
let allowed = false;
|
|
248
|
+
if (isJS && isInBlockHeader) allowed = true;
|
|
249
|
+
if (isP && (isInBlockHeader || isInNormalText)) allowed = true;
|
|
250
|
+
|
|
251
|
+
if (allowed) {
|
|
252
|
+
let braceDepth = 1;
|
|
253
|
+
let prefixValue = isJS ? "js{" : "p{";
|
|
254
|
+
i += isJS ? 3 : 2;
|
|
255
|
+
|
|
256
|
+
let inString = null; // Track if we are inside " " or ' '
|
|
257
|
+
while (i < src.length && braceDepth > 0) {
|
|
258
|
+
const c = src[i];
|
|
259
|
+
const n = src[i + 1];
|
|
260
|
+
|
|
261
|
+
if (inString) {
|
|
262
|
+
if (c === "\\" && (n === inString || n === "\\")) {
|
|
263
|
+
prefixValue += c + n;
|
|
264
|
+
i += 2;
|
|
265
|
+
continue;
|
|
266
|
+
}
|
|
267
|
+
if (c === inString) inString = null;
|
|
268
|
+
} else {
|
|
269
|
+
if (c === "\"" || c === "'") inString = c;
|
|
270
|
+
else if (c === "{") braceDepth++;
|
|
271
|
+
else if (c === "}") braceDepth--;
|
|
272
|
+
}
|
|
273
|
+
prefixValue += c;
|
|
274
|
+
i++;
|
|
165
275
|
}
|
|
276
|
+
addToken(isJS ? TOKEN_TYPES.PREFIX_JS : TOKEN_TYPES.PREFIX_P, prefixValue);
|
|
277
|
+
continue;
|
|
166
278
|
}
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
279
|
+
// If not allowed, it will fall through to normal word scanning
|
|
280
|
+
}
|
|
281
|
+
|
|
282
|
+
// MULTI-CHAR MARKERS
|
|
283
|
+
if (char === "@" && next === "_") {
|
|
284
|
+
addToken(TOKEN_TYPES.OPEN_AT, "@_");
|
|
285
|
+
i += 2;
|
|
286
|
+
if (!isInAtBlockBody) delimiterStack.push("@");
|
|
287
|
+
isInHeader = true; // At-Blocks start with a header part
|
|
288
|
+
continue;
|
|
289
|
+
}
|
|
290
|
+
if (char === "-" && next === ">") {
|
|
291
|
+
if (isInAtBlockBody || (parenDepth > 0 && !isInInlineHead)) {
|
|
292
|
+
addToken(TOKEN_TYPES.TEXT, "-");
|
|
293
|
+
i++; // Swallowed one char
|
|
294
|
+
} else {
|
|
295
|
+
addToken(TOKEN_TYPES.THIN_ARROW, "->");
|
|
296
|
+
i += 2;
|
|
297
|
+
isInInlineHead = true; // The following ( ) will be structural
|
|
298
|
+
}
|
|
299
|
+
continue;
|
|
300
|
+
}
|
|
301
|
+
|
|
302
|
+
// SINGLE-CHAR MARKERS
|
|
303
|
+
if (char === "[") {
|
|
304
|
+
if (isInAtBlockBody || (parenDepth > 0 && !isInInlineHead)) {
|
|
305
|
+
addToken(TOKEN_TYPES.TEXT, "[");
|
|
306
|
+
} else {
|
|
307
|
+
addToken(TOKEN_TYPES.OPEN_BRACKET, "[");
|
|
308
|
+
delimiterStack.push("[");
|
|
309
|
+
isInHeader = true;
|
|
310
|
+
}
|
|
311
|
+
i++;
|
|
312
|
+
continue;
|
|
313
|
+
}
|
|
314
|
+
if (char === "_" && next === "@") {
|
|
315
|
+
if (isInAtBlockBody || (parenDepth > 0 && !isInInlineHead)) {
|
|
316
|
+
addToken(TOKEN_TYPES.TEXT, "_@");
|
|
317
|
+
} else {
|
|
318
|
+
const lastRealType = last_non_junk_type;
|
|
319
|
+
addToken(TOKEN_TYPES.CLOSE_AT, "_@");
|
|
320
|
+
const top = delimiterStack[delimiterStack.length - 1];
|
|
321
|
+
if (top === "@") {
|
|
322
|
+
if (lastRealType === TOKEN_TYPES.END_KEYWORD) {
|
|
323
|
+
delimiterStack.pop();
|
|
324
|
+
isInAtBlockBody = false;
|
|
325
|
+
isInHeader = false;
|
|
326
|
+
}
|
|
182
327
|
}
|
|
183
328
|
}
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
329
|
+
i += 2;
|
|
330
|
+
continue;
|
|
331
|
+
}
|
|
332
|
+
if (char === "]") {
|
|
333
|
+
if (isInAtBlockBody || (parenDepth > 0 && !isInInlineHead)) {
|
|
334
|
+
addToken(TOKEN_TYPES.TEXT, "]");
|
|
335
|
+
} else {
|
|
336
|
+
addToken(TOKEN_TYPES.CLOSE_BRACKET, "]");
|
|
337
|
+
isInHeader = false;
|
|
338
|
+
}
|
|
339
|
+
i++;
|
|
340
|
+
continue;
|
|
341
|
+
}
|
|
342
|
+
if (char === "(") {
|
|
343
|
+
if (isInAtBlockBody || (parenDepth > 0 && !isInInlineHead)) {
|
|
344
|
+
addToken(TOKEN_TYPES.TEXT, "(");
|
|
345
|
+
parenDepth++;
|
|
346
|
+
} else {
|
|
347
|
+
addToken(TOKEN_TYPES.OPEN_PAREN, "(");
|
|
348
|
+
parenDepth++;
|
|
349
|
+
}
|
|
350
|
+
i++;
|
|
351
|
+
continue;
|
|
352
|
+
}
|
|
353
|
+
if (char === ")") {
|
|
354
|
+
if (isInAtBlockBody || (parenDepth > 1 && !isInInlineHead)) {
|
|
355
|
+
addToken(TOKEN_TYPES.TEXT, ")");
|
|
356
|
+
parenDepth--;
|
|
357
|
+
} else if (parenDepth > 0) {
|
|
358
|
+
// This ends the content part if depth drops to 0
|
|
359
|
+
parenDepth--;
|
|
360
|
+
if (parenDepth === 0) {
|
|
361
|
+
addToken(TOKEN_TYPES.CLOSE_PAREN, ")");
|
|
362
|
+
if (isInInlineHead) isInInlineHead = false;
|
|
363
|
+
} else {
|
|
364
|
+
addToken(TOKEN_TYPES.TEXT, ")");
|
|
365
|
+
}
|
|
366
|
+
} else {
|
|
367
|
+
addToken(TOKEN_TYPES.TEXT, ")");
|
|
368
|
+
}
|
|
369
|
+
i++;
|
|
370
|
+
continue;
|
|
371
|
+
}
|
|
372
|
+
if (char === ":") {
|
|
373
|
+
if (isInAtBlockBody || (parenDepth > 0 && !isInInlineHead)) {
|
|
374
|
+
addToken(TOKEN_TYPES.TEXT, ":");
|
|
375
|
+
} else {
|
|
376
|
+
const allowed = [TOKEN_TYPES.IDENTIFIER, TOKEN_TYPES.KEY, TOKEN_TYPES.CLOSE_AT, TOKEN_TYPES.VALUE, TOKEN_TYPES.ESCAPE, TOKEN_TYPES.QUOTE, TOKEN_TYPES.PREFIX_JS, TOKEN_TYPES.PREFIX_P, TOKEN_TYPES.IMPORT, TOKEN_TYPES.USE_MODULE, TOKEN_TYPES.END_KEYWORD, TOKEN_TYPES.TEXT];
|
|
377
|
+
if (allowed.includes(last_non_junk_type)) {
|
|
378
|
+
addToken(TOKEN_TYPES.COLON, ":");
|
|
379
|
+
isInHeader = true;
|
|
380
|
+
} else {
|
|
381
|
+
addToken(TOKEN_TYPES.TEXT, ":");
|
|
382
|
+
}
|
|
383
|
+
}
|
|
384
|
+
i++;
|
|
385
|
+
continue;
|
|
386
|
+
}
|
|
387
|
+
if (char === "=") {
|
|
388
|
+
if (isInAtBlockBody || (parenDepth > 0 && !isInInlineHead)) {
|
|
389
|
+
addToken(TOKEN_TYPES.TEXT, "=");
|
|
390
|
+
} else {
|
|
391
|
+
const allowed = [TOKEN_TYPES.IDENTIFIER, TOKEN_TYPES.KEY, TOKEN_TYPES.ESCAPE, TOKEN_TYPES.QUOTE, TOKEN_TYPES.PREFIX_JS, TOKEN_TYPES.PREFIX_P, TOKEN_TYPES.IMPORT, TOKEN_TYPES.USE_MODULE, TOKEN_TYPES.END_KEYWORD, TOKEN_TYPES.TEXT];
|
|
392
|
+
if (allowed.includes(last_non_junk_type)) {
|
|
393
|
+
addToken(TOKEN_TYPES.EQUAL, "=");
|
|
394
|
+
} else {
|
|
395
|
+
addToken(TOKEN_TYPES.TEXT, "=");
|
|
396
|
+
}
|
|
397
|
+
}
|
|
398
|
+
i++;
|
|
399
|
+
continue;
|
|
400
|
+
}
|
|
401
|
+
if (char === ",") {
|
|
402
|
+
if (isInAtBlockBody || (parenDepth > 0 && !isInInlineHead)) {
|
|
403
|
+
addToken(TOKEN_TYPES.TEXT, ",");
|
|
404
|
+
} else {
|
|
405
|
+
const allowed = [TOKEN_TYPES.VALUE, TOKEN_TYPES.IDENTIFIER, TOKEN_TYPES.QUOTE, TOKEN_TYPES.ESCAPE, TOKEN_TYPES.PREFIX_JS, TOKEN_TYPES.PREFIX_P, TOKEN_TYPES.IMPORT, TOKEN_TYPES.USE_MODULE, TOKEN_TYPES.END_KEYWORD, TOKEN_TYPES.TEXT];
|
|
406
|
+
if (allowed.includes(last_non_junk_type)) {
|
|
407
|
+
addToken(TOKEN_TYPES.COMMA, ",");
|
|
408
|
+
} else {
|
|
409
|
+
addToken(TOKEN_TYPES.TEXT, ",");
|
|
410
|
+
}
|
|
411
|
+
}
|
|
412
|
+
i++;
|
|
413
|
+
continue;
|
|
414
|
+
}
|
|
415
|
+
if (char === ";") {
|
|
416
|
+
if (isInAtBlockBody || (parenDepth > 0 && !isInInlineHead)) {
|
|
417
|
+
addToken(TOKEN_TYPES.TEXT, ";");
|
|
418
|
+
} else {
|
|
419
|
+
const allowed = [TOKEN_TYPES.IDENTIFIER, TOKEN_TYPES.VALUE, TOKEN_TYPES.CLOSE_AT, TOKEN_TYPES.CLOSE_PAREN, TOKEN_TYPES.ESCAPE, TOKEN_TYPES.QUOTE, TOKEN_TYPES.PREFIX_JS, TOKEN_TYPES.PREFIX_P, TOKEN_TYPES.IMPORT, TOKEN_TYPES.USE_MODULE, TOKEN_TYPES.END_KEYWORD, TOKEN_TYPES.TEXT];
|
|
420
|
+
if (allowed.includes(last_non_junk_type)) {
|
|
421
|
+
addToken(TOKEN_TYPES.SEMICOLON, ";");
|
|
422
|
+
isInHeader = false; // Semicolon ends the At-Block header
|
|
423
|
+
// Trigger body mode for At-Blocks
|
|
424
|
+
if (delimiterStack.length > 0) {
|
|
425
|
+
const top = delimiterStack[delimiterStack.length - 1];
|
|
426
|
+
if (top === "@") {
|
|
427
|
+
isInAtBlockBody = true;
|
|
233
428
|
}
|
|
234
|
-
} else {
|
|
235
|
-
advance(context);
|
|
236
429
|
}
|
|
237
430
|
} else {
|
|
238
|
-
addToken(TOKEN_TYPES.TEXT,
|
|
431
|
+
addToken(TOKEN_TYPES.TEXT, ";");
|
|
432
|
+
}
|
|
433
|
+
}
|
|
434
|
+
i++;
|
|
435
|
+
continue;
|
|
436
|
+
}
|
|
437
|
+
if (char === "\"" || char === "'") {
|
|
438
|
+
const valTriggers = [TOKEN_TYPES.COLON, TOKEN_TYPES.EQUAL, TOKEN_TYPES.COMMA, TOKEN_TYPES.ESCAPE, TOKEN_TYPES.OPEN_BRACKET, TOKEN_TYPES.OPEN_AT];
|
|
439
|
+
const wasValueTrigger = valTriggers.includes(last_non_junk_type);
|
|
440
|
+
addToken(TOKEN_TYPES.QUOTE, char);
|
|
441
|
+
i++;
|
|
442
|
+
// Enable quote mode
|
|
443
|
+
// NOTE: We allow quotes basically anywhere in headers as values/keys
|
|
444
|
+
if (isInHeader || wasValueTrigger) {
|
|
445
|
+
isInQuote = true;
|
|
446
|
+
}
|
|
447
|
+
continue;
|
|
448
|
+
}
|
|
449
|
+
|
|
450
|
+
// --- PHASE 4: WORD / TEXT SCANNING ---
|
|
451
|
+
// This is the "Fallback" mode where we scan for identifiers, keys, or values.
|
|
452
|
+
// It uses lookahead and context variables to guess the role of a word.
|
|
453
|
+
let word = "";
|
|
454
|
+
// Only Blocks ([ ]) allow ':' in their main identifier.
|
|
455
|
+
// At-Blocks (@_) and Inlines (->( )) do NOT allow ':' in the ID.
|
|
456
|
+
const isStartOfBlockId = (last_non_junk_type === TOKEN_TYPES.OPEN_BRACKET);
|
|
457
|
+
|
|
458
|
+
let stopChars = "[](){}:=;,@_>\"'#\\ \t\n\r";
|
|
459
|
+
if (isStartOfBlockId || (parenDepth > 0 && !isInInlineHead)) {
|
|
460
|
+
stopChars = stopChars.replace(":", "");
|
|
461
|
+
}
|
|
462
|
+
if (!isInHeader && !isInInlineHead) {
|
|
463
|
+
stopChars = "[]@_()\\#\n\r"; // In normal text, stop at markers, comments and newlines
|
|
464
|
+
}
|
|
465
|
+
|
|
466
|
+
while (i < src.length && !stopChars.includes(src[i])) {
|
|
467
|
+
// Lookahead for -> marker in normal text
|
|
468
|
+
if (!isInHeader && src[i] === "-" && src[i+1] === ">") break;
|
|
469
|
+
|
|
470
|
+
// Stop if we hit an ALLOWED prefix trigger
|
|
471
|
+
if ((src[i] === "p" && src[i+1] === "{")) {
|
|
472
|
+
const top = (delimiterStack.length > 0) ? delimiterStack[delimiterStack.length - 1] : null;
|
|
473
|
+
const isInBlockHeader = isInHeader && top === "[";
|
|
474
|
+
const isInNormalText = !isInHeader && !isInInlineHead && !isInAtBlockBody && parenDepth === 0;
|
|
475
|
+
if (isInBlockHeader || isInNormalText) break;
|
|
476
|
+
}
|
|
477
|
+
if (src[i] === "j" && src[i+1] === "s" && src[i+2] === "{") {
|
|
478
|
+
const top = (delimiterStack.length > 0) ? delimiterStack[delimiterStack.length - 1] : null;
|
|
479
|
+
if (isInHeader && top === "[") break;
|
|
480
|
+
}
|
|
481
|
+
word += src[i];
|
|
482
|
+
i++;
|
|
483
|
+
}
|
|
484
|
+
|
|
485
|
+
if (word.length > 0) {
|
|
486
|
+
// Guess role based on context
|
|
487
|
+
if (parenDepth > 0 && !isInInlineHead) {
|
|
488
|
+
// Inside Inline Content (raw text)
|
|
489
|
+
addToken(TOKEN_TYPES.TEXT, word);
|
|
490
|
+
} else if (isInHeader || isInInlineHead) {
|
|
491
|
+
// Inside a structural header context
|
|
492
|
+
const isMainIdentifier = (
|
|
493
|
+
last_non_junk_type === TOKEN_TYPES.OPEN_BRACKET ||
|
|
494
|
+
last_non_junk_type === TOKEN_TYPES.OPEN_AT ||
|
|
495
|
+
(last_non_junk_type === TOKEN_TYPES.OPEN_PAREN && isInInlineHead)
|
|
496
|
+
);
|
|
497
|
+
|
|
498
|
+
if (isMainIdentifier) {
|
|
499
|
+
if (word === end_keyword) {
|
|
500
|
+
addToken(TOKEN_TYPES.END_KEYWORD, word);
|
|
501
|
+
if (delimiterStack[delimiterStack.length - 1] === "[") delimiterStack.pop();
|
|
502
|
+
}
|
|
503
|
+
else if (word === "import") addToken(TOKEN_TYPES.IMPORT, word);
|
|
504
|
+
else if (word === "$use-module") addToken(TOKEN_TYPES.USE_MODULE, word);
|
|
505
|
+
else addToken(TOKEN_TYPES.IDENTIFIER, word);
|
|
506
|
+
} else {
|
|
507
|
+
// Use lookahead to distinguish KEY from VALUE
|
|
508
|
+
const p = peekStructural(i);
|
|
509
|
+
if (p === ":") {
|
|
510
|
+
addToken(TOKEN_TYPES.KEY, word);
|
|
511
|
+
} else {
|
|
512
|
+
addToken(TOKEN_TYPES.VALUE, word);
|
|
513
|
+
}
|
|
239
514
|
}
|
|
240
|
-
i += context.length - 1;
|
|
241
515
|
} else {
|
|
242
|
-
|
|
516
|
+
// Normal text
|
|
517
|
+
addToken(TOKEN_TYPES.TEXT, word);
|
|
518
|
+
}
|
|
519
|
+
} else {
|
|
520
|
+
// Fallback for any unhandled characters
|
|
521
|
+
if (i < src.length) {
|
|
522
|
+
addToken(TOKEN_TYPES.TEXT, src[i]);
|
|
523
|
+
i++;
|
|
243
524
|
}
|
|
244
525
|
}
|
|
245
526
|
}
|
|
246
|
-
|
|
247
|
-
// Finalize with End-of-File token //
|
|
248
|
-
// ========================================================================== //
|
|
527
|
+
|
|
249
528
|
addToken(TOKEN_TYPES.EOF, "");
|
|
250
529
|
return tokens;
|
|
251
530
|
}
|