sommark 3.2.0 → 3.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/core/lexer.js CHANGED
@@ -1,614 +1,253 @@
1
1
  import TOKEN_TYPES from "./tokenTypes.js";
2
2
  import peek from "../helpers/peek.js";
3
- import {
4
- block_value,
5
- block_id,
6
- block_id_2,
7
- block_end,
8
- inline_id,
9
- inline_value,
10
- inline_id_2,
11
- at_id,
12
- at_value,
13
- at_id_2,
14
- at_end,
15
- end_keyword,
16
- BLOCKCOMMA,
17
- ATBLOCKCOMMA,
18
- INLINECOMMA,
19
- BLOCKCOLON,
20
- ATBLOCKCOLON,
21
- INLINECOLON
22
- } from "./labels.js";
23
- import { lexerError, sommarkError } from "./errors.js";
3
+ import { end_keyword } from "./labels.js";
4
+ import { lexerError } from "./errors.js";
5
+
6
+ /**
7
+ * SomMark Lexer
8
+ */
9
+
10
+ // ========================================================================== //
11
+ // Helper Functions //
12
+ // ========================================================================== //
24
13
 
25
14
  const atBlockEndRegex = new RegExp(`^@_\\s*${end_keyword}\\s*_@`);
15
+
16
+ // Checks if we reached the end of an At-Block
26
17
  function isAtBlockEnd(input, index) {
27
18
  const slice = typeof input === "string" ? input.slice(index, index + 100) : input.slice(index, index + 100).join("");
28
19
  return atBlockEndRegex.test(slice);
29
20
  }
30
21
 
31
- const updateNewLine = text => {
32
- if (text && typeof text === "string") {
33
- return text.split("").filter(value => value === "\n").length;
22
+ // Collects characters inside a quoted string
23
+ function concatQuote(input, index) {
24
+ let text = "\"";
25
+ for (let i = index + 1; i < input.length; i++) {
26
+ const char = input[i];
27
+ if (char === "\\" && peek(input, i, 1) === "\"") {
28
+ text += "\\\"";
29
+ i++;
30
+ continue;
31
+ }
32
+ text += char;
33
+ if (char === "\"") return text;
34
34
  }
35
- return;
36
- };
37
-
38
- const updateColumn = (end = 0, textLength) => {
39
- const start = end + 1;
40
- const newEnd = start + textLength - 1;
41
- return { start, end: newEnd };
42
- };
35
+ lexerError(["[Lexer Error]: Unclosed quote"]);
36
+ return text;
37
+ }
43
38
 
44
- function concatText(input, index, scope_state, extraConditions = []) {
39
+ // Collects plain text until a special character is found
40
+ function concatText(input, index, isInHeader, isInAtBlockBody, isLiberalValue = false) {
45
41
  let text = "";
46
- if (index >= input.length) {
47
- return text;
48
- }
49
- if (
50
- (Array.isArray(input) || typeof input === "string") &&
51
- input.length > 0 &&
52
- typeof index === "number" &&
53
- typeof scope_state === "boolean"
54
- ) {
55
- for (let i = index; i < input.length; i++) {
56
- const char = input[i];
57
- const defaultConditions = [
58
- ["[", !scope_state],
59
- ["=", !scope_state],
60
- ["]", !scope_state],
61
- ["(", !scope_state],
62
- ["-", peek(input, i, 1) === ">" && !scope_state],
63
- ["@", peek(input, i, 1) === "_" && (!scope_state || isAtBlockEnd(input, i))],
64
- ["_", peek(input, i, 1) === "@" && !scope_state],
65
- ["#", !scope_state],
66
- ["\\", true]
67
- ];
68
- if (defaultConditions.some(([ch, condition]) => (!ch || ch === char) && condition)) {
69
- break;
70
- } else if (extraConditions.some(([ch, condition]) => (!ch || ch === char) && condition)) {
42
+ if (index >= input.length) return text;
43
+ for (let i = index; i < input.length; i++) {
44
+ const char = input[i];
45
+ const stopConditions = [
46
+ ["[", !isInAtBlockBody],
47
+ ["(", !isInAtBlockBody],
48
+ ["#", !isInAtBlockBody && !isLiberalValue],
49
+ ["=", isInHeader && !isInAtBlockBody],
50
+ ["\"", isInHeader],
51
+ ["]", isInHeader],
52
+ [")", isInHeader],
53
+ ["-", peek(input, i, 1) === ">" && (isInHeader || true)],
54
+ ["@", peek(input, i, 1) === "_" && (!isInAtBlockBody || isAtBlockEnd(input, i))],
55
+ ["_", peek(input, i, 1) === "@" && isInHeader],
56
+ ["\\", true],
57
+ [":", isInHeader && !isInAtBlockBody],
58
+ [";", isInHeader],
59
+ [",", isInHeader]
60
+ ];
61
+ let shouldStop = false;
62
+ for (const [stopChar, conditionMet] of stopConditions) {
63
+ if (conditionMet && input.substring(i, i + stopChar.length) === stopChar) {
64
+ shouldStop = true;
71
65
  break;
72
66
  }
73
- text += char;
74
67
  }
75
- return text;
76
- } else {
77
- sommarkError([
78
- "{line}<$red:Invalid Arguments:$> <$yellow:Assign arguments to their correct types, ",
79
- "'input' must be an array and have to be not empty, 'index' must be a number value, and 'scope_state' ",
80
- "must be a boolean.$>{line}."
81
- ]);
68
+ if (shouldStop) break;
69
+ text += char;
82
70
  }
71
+ return text;
83
72
  }
84
73
 
74
+ // Handles backslash escapes in the text
85
75
  function concatEscape(input, index) {
86
- let str = "";
87
- if (index >= input.length) {
88
- return str;
89
- }
90
- const WHITESPACES = [
91
- " ",
92
- "\t",
93
- "\n",
94
- "\r",
95
- "\v",
96
- "\f",
97
- //+++++++//
98
- "\u00A0",
99
- "\u1680",
100
- "\u2000",
101
- "\u2001",
102
- "\u2002",
103
- "\u2003",
104
- "\u2004",
105
- "\u2005",
106
- "\u2006",
107
- "\u2007",
108
- "\u2008",
109
- "\u2009",
110
- "\u200A",
111
- "\u202F",
112
- "\u205F",
113
- "\u3000"
114
- ];
115
- let WHITESPACE_SET = new Set(WHITESPACES);
116
- if ((Array.isArray(input) || typeof input === "string") && input.length > 0 && typeof index === "number") {
117
- const nextChar = peek(input, index, 1);
118
- if (input[index] === "\\" && nextChar !== null) {
119
- str += "\\" + nextChar;
120
- } else {
121
- lexerError([
122
- "{line}<$red:Invalid escape sequence$>{N}",
123
- "<$yellow:Escape character '\\' must be followed immediately by a character.$>{N}",
124
- nextChar === null ? "<$yellow:Found end of file after escape character$>" : "<$yellow:Missing character after escape character$>",
125
- "{line}"
126
- ]);
127
- }
128
- if (WHITESPACE_SET.has(str[1])) {
129
- const matchedCharacter = Array.from(WHITESPACE_SET).find(ch => ch === str[1]);
130
- lexerError([
131
- "{line}<$red:Invalid escape sequence$>{N}",
132
- "<$yellow:Escape character '\\' must be followed immediately by a character.$>{N}",
133
- `<$yellow:Found$> <$blue:${JSON.stringify(matchedCharacter)}$> <$yellow:after escape character$>{N}`,
134
- "{line}"
135
- ]);
136
- }
137
- return str;
138
- } else {
139
- sommarkError([
140
- "{line}<$red:Invalid Arguments:$> <$yellow:Assign arguments to their correct types, ",
141
- "'input' must be an array and have to be not empty, and 'index' must be a number value.$>{line}"
142
- ]);
143
- }
76
+ if (index >= input.length) return "";
77
+ const nextChar = peek(input, index, 1);
78
+ const WHITESPACES = [" ", "\t", "\n", "\r", "\v", "\f"];
79
+ if (WHITESPACES.includes(nextChar)) lexerError(["[Lexer Error]: Invalid escape sequence (escaped whitespace)"]);
80
+ if (input[index] === "\\" && nextChar !== null) return "\\" + nextChar;
81
+ lexerError(["[Lexer Error]: Invalid escape sequence"]);
82
+ return "";
144
83
  }
145
84
 
146
- function concatChar(input, index, stop_at_char) {
147
- if ((Array.isArray(input) || typeof input === "string") && input.length > 0 && typeof index === "number") {
148
- let str = "";
149
- if (index >= input.length) {
150
- return str;
151
- }
152
- if (Array.isArray(stop_at_char) && stop_at_char.length > 0) {
153
- for (let i = index; i < input.length; i++) {
154
- const char = input[i];
155
- if (stop_at_char.includes(char)) {
156
- break;
157
- }
158
- str += char;
159
- }
160
- } else {
161
- sommarkError([
162
- "{line}<$red:Invalid Type:$> <$yellow:Argument 'stop_at_char' must be an array and have to be not empty array$>{line}"
163
- ]);
164
- }
165
- return str;
166
- } else {
167
- sommarkError([
168
- "{line}<$red:Invalid Arguments:$> <$yellow:Assign arguments to their correct types, ",
169
- "'input' must be an array and have to be not empty, 'index' must be a number value$>{line}"
170
- ]);
171
- }
172
- }
85
+ // ========================================================================== //
86
+ // Main Lexer Function //
87
+ // ========================================================================== //
173
88
 
174
89
  function lexer(src) {
175
- if (src && typeof src === "string") {
176
- const tokens = [];
177
- let scope_state = false;
178
- let line = 0;
179
- let character = 0;
180
- let depth_stack = [];
181
- let context = "",
182
- temp_str = "",
183
- previous_value = "";
90
+ if (!src || typeof src !== "string") return [];
91
+ const tokens = [];
92
+ let isInHeader = false, isInAtBlockBody = false;
93
+ let line = 0, character = 0, depth_stack = [];
184
94
 
185
- function validateIdentifier(id, type = "Identifier") {
186
- if (!/^[a-zA-Z0-9\-_$]+$/.test(id.trim())) {
187
- lexerError([
188
- `{line}<$red:Invalid ${type}:$>{N}`,
189
- `<$yellow:Identifiers can only contain letters, numbers, underscores (_), dollar signs, and hyphens (-). Got$> <$blue:'${id.trim()}'$>{N}`,
190
- "{line}"
191
- ]);
95
+ // ========================================================================== //
96
+ // Token Creation Helpers //
97
+ // ========================================================================== //
98
+
99
+ function addToken(type, value, rawValue) {
100
+ if (typeof rawValue === "string" && typeof value === "string" && rawValue !== value) {
101
+ const offset = rawValue.indexOf(value);
102
+ if (offset !== -1) {
103
+ advance(rawValue.slice(0, offset));
104
+ const startPos = { line, character }; advance(value);
105
+ const endPos = { line, character };
106
+ tokens.push({ type, value, range: { start: startPos, end: endPos }, depth: depth_stack.length });
107
+ advance(rawValue.slice(offset + value.length));
108
+ return;
192
109
  }
193
110
  }
111
+ const startPos = { line, character }; advance(rawValue || value);
112
+ const endPos = { line, character };
113
+ tokens.push({ type, value, range: { start: startPos, end: endPos }, depth: depth_stack.length });
114
+ }
194
115
 
195
- function addToken(type, value) {
196
- const startPos = { line, character };
197
- // Update position based on value length and newlines
198
- const newlines = (value.match(/\n/g) || []).length;
199
- if (newlines > 0) {
200
- line += newlines;
201
- const parts = value.split("\n");
202
- character = parts[parts.length - 1].length;
203
- } else {
204
- character += value.length;
205
- }
206
- const endPos = { line, character };
207
- tokens.push({
208
- type,
209
- value,
210
- range: { start: startPos, end: endPos },
211
- depth: depth_stack.length
212
- });
116
+ function advance(text) {
117
+ const newlines = (text.match(/\n/g) || []).length;
118
+ if (newlines > 0) { line += newlines; character = text.split("\n").pop().length; }
119
+ else character += text.length;
120
+ }
121
+
122
+ function validateIdentifier(id, charPos) {
123
+ if (!/^[a-zA-Z0-9\-_$]+$/.test(id.trim())) {
124
+ lexerError([`[Lexer Error]: Invalid Identifier: '${id.trim()}' at line ${line + 1}, col ${charPos || character}`]);
213
125
  }
126
+ }
214
127
 
215
- // Helper to advance position without adding a token (e.g., for whitespace/newlines that don't emit tokens)
216
- function advance(text) {
217
- const newlines = (text.match(/\n/g) || []).length;
218
- if (newlines > 0) {
219
- line += newlines;
220
- const parts = text.split("\n");
221
- character = parts[parts.length - 1].length;
222
- } else {
223
- character += text.length;
128
+ // ========================================================================== //
129
+ // Main Tokenization Loop //
130
+ // ========================================================================== //
131
+
132
+ for (let i = 0; i < src.length; i++) {
133
+ const char = src[i];
134
+ const next = peek(src, i, 1);
135
+
136
+ // ========================================================================== //
137
+ // Look back at previous tokens to determine current context //
138
+ // ========================================================================== //
139
+ let prev_type = "", prev_prev_type = "", count = 0;
140
+ for (let j = tokens.length - 1; j >= 0; j--) {
141
+ const t = tokens[j];
142
+ if (t.type !== TOKEN_TYPES.TEXT && t.type !== TOKEN_TYPES.COMMENT) {
143
+ if (count === 0) prev_type = t.type;
144
+ else if (count === 1) prev_prev_type = t.type;
145
+ count++; if (count >= 2) break;
224
146
  }
225
147
  }
226
148
 
227
- for (let i = 0; i < src.length; i++) {
228
- let current_char = src[i];
229
- // ========================================================================== //
230
- // Token: Open Bracket //
231
- // ========================================================================== //
232
- if (current_char === "[" && !scope_state && previous_value !== "(") {
233
- // i + 1 -> skip current character
234
- temp_str = concatChar(src, i + 1, ["]"]);
235
- if (temp_str && temp_str.length > 0) {
236
- if (temp_str.trim() !== end_keyword) {
237
- depth_stack.push("Block");
238
- }
239
- }
240
- addToken(TOKEN_TYPES.OPEN_BRACKET, current_char);
241
- // is next token end keyword?
242
- if (temp_str.trim() === end_keyword) {
243
- previous_value = block_end;
244
- } else {
245
- previous_value = current_char;
246
- }
247
- }
248
- // ========================================================================== //
249
- // Token: Equal Sign //
250
- // ========================================================================== //
251
- else if (current_char === "=" && !scope_state) {
252
- addToken(TOKEN_TYPES.EQUAL, current_char);
253
- previous_value = current_char;
254
- }
255
- // ========================================================================== //
256
- // Token: Close Bracket //
257
- // ========================================================================== //
258
- else if (current_char === "]" && !scope_state) {
259
- addToken(TOKEN_TYPES.CLOSE_BRACKET, current_char);
260
- if (previous_value === end_keyword) {
261
- depth_stack.pop();
262
- }
263
- previous_value = current_char;
264
- }
265
- // ========================================================================== //
266
- // Token: Open Parenthesis '(' //
267
- // ========================================================================== //
268
- else if (current_char === "(" && !scope_state) {
269
- addToken(TOKEN_TYPES.OPEN_PAREN, current_char);
270
- if (previous_value !== "->") {
271
- previous_value = current_char;
272
- }
273
- }
274
- // ========================================================================== //
275
- // Token: Thin Arrow '->' //
276
- // ========================================================================== //
277
- else if (current_char === "-" && peek(src, i, 1) === ">") {
278
- temp_str = current_char + peek(src, i, 1);
279
- i += temp_str.length - 1;
280
- addToken(TOKEN_TYPES.THIN_ARROW, temp_str);
281
- previous_value = temp_str;
282
- }
283
- // ========================================================================== //
284
- // Token: Close Parenthesis ')' //
285
- // ========================================================================== //
286
- else if (current_char === ")" && !scope_state) {
287
- addToken(TOKEN_TYPES.CLOSE_PAREN, current_char);
288
- previous_value = current_char;
289
- }
290
- // ========================================================================== //
291
- // Token: Open At '@_' //
292
- // ========================================================================== //
293
- else if (
294
- current_char === "@" &&
295
- peek(src, i, 1) === "_" &&
296
- (!scope_state || isAtBlockEnd(src, i))
297
- ) {
298
- temp_str = current_char + peek(src, i, 1);
299
- i += temp_str.length - 1;
300
- addToken(TOKEN_TYPES.OPEN_AT, temp_str);
301
- // is next token end keyword?
302
- if (isAtBlockEnd(src, i - 1)) {
303
- previous_value = at_end;
304
- } else {
305
- previous_value = temp_str;
306
- }
307
- }
308
- // ========================================================================== //
309
- // Token: Close At '_@' //
310
- // ========================================================================== //
311
- else if (current_char === "_" && peek(src, i, 1) === "@") {
312
- temp_str = current_char + peek(src, i, 1);
313
- i += temp_str.length - 1;
314
- addToken(TOKEN_TYPES.CLOSE_AT, temp_str);
315
- switch (previous_value) {
316
- case at_id:
317
- previous_value = temp_str + "+";
318
- break;
319
- default:
320
- previous_value = temp_str;
321
- break;
322
- }
323
- }
324
- // ========================================================================== //
325
- // Token: Colon ':' //
326
- // ========================================================================== //
327
- else if (
328
- current_char === ":" &&
329
- (previous_value === "_@+" ||
330
- previous_value === BLOCKCOMMA ||
331
- previous_value === block_id_2 ||
332
- previous_value === inline_id_2 ||
333
- previous_value === at_id_2 ||
334
- previous_value === at_value ||
335
- previous_value === BLOCKCOLON ||
336
- previous_value === ATBLOCKCOLON ||
337
- previous_value === INLINECOLON) &&
338
- !scope_state
339
- ) {
340
- addToken(TOKEN_TYPES.COLON, current_char);
341
- switch (previous_value) {
342
- case block_id_2:
343
- previous_value = BLOCKCOLON;
344
- break;
345
- case "_@+":
346
- previous_value = ATBLOCKCOLON;
347
- break;
348
- case at_id_2:
349
- previous_value = ATBLOCKCOLON;
350
- break;
351
- case inline_id_2:
352
- previous_value = INLINECOLON;
353
- break;
354
- }
355
- }
356
- // ========================================================================== //
357
- // Token: Comma ',' //
358
- // ========================================================================== //
359
- else if (
360
- current_char === "," &&
361
- (previous_value === block_value ||
362
- previous_value === at_value ||
363
- previous_value === inline_value ||
364
- previous_value === BLOCKCOMMA ||
365
- previous_value === ATBLOCKCOMMA ||
366
- previous_value === INLINECOMMA)
367
- ) {
368
- addToken(TOKEN_TYPES.COMMA, current_char);
369
- switch (previous_value) {
370
- case "=":
371
- previous_value = BLOCKCOMMA;
372
- break;
373
- case block_value:
374
- previous_value = BLOCKCOMMA;
375
- break;
376
- case at_value:
377
- previous_value = ATBLOCKCOMMA;
378
- break;
379
- case inline_value:
380
- previous_value = INLINECOMMA;
381
- break;
382
- }
383
- }
384
- // ========================================================================== //
385
- // Token: Semi-colon ';' //
386
- // ========================================================================== //
387
- else if (
388
- (current_char === ";" && previous_value === at_value) ||
389
- (current_char === ";" && previous_value === "_@+") || // New: Allow semicolon directly after identifier
390
- (current_char === ";" && previous_value === ";") ||
391
- (current_char === ";" && previous_value === ATBLOCKCOMMA)
392
- ) {
393
- addToken(TOKEN_TYPES.SEMICOLON, current_char);
394
- scope_state = true;
395
- previous_value = current_char;
396
- }
397
- // ========================================================================== //
398
- // Token: Escape Character '\' //
399
- // ========================================================================== //
400
- else if (current_char === "\\") {
401
- temp_str = concatEscape(src, i);
402
- i += temp_str.length - 1;
403
- temp_str = temp_str.trim();
404
- if (temp_str && temp_str.length > 0) {
405
- addToken(TOKEN_TYPES.ESCAPE, temp_str);
149
+ // ========================================================================== //
150
+ // Check for structural characters ([ ], ( ), @_, _@) //
151
+ // ========================================================================== //
152
+
153
+ if (char === "[" && !isInAtBlockBody) {
154
+ let idPeek = ""; for (let j = i + 1; j < src.length && !/[=\]:#]/.test(src[j]); j++) idPeek += src[j];
155
+ if (idPeek.trim() !== end_keyword) depth_stack.push("B");
156
+ addToken(TOKEN_TYPES.OPEN_BRACKET, char); isInHeader = true;
157
+ } else if (char === "]" && isInHeader) {
158
+ addToken(TOKEN_TYPES.CLOSE_BRACKET, char); isInHeader = false;
159
+ // Reliable depth pop on [end]
160
+ for (let j = tokens.length - 1; j >= 0; j--) {
161
+ const t = tokens[j];
162
+ if (t.type === TOKEN_TYPES.IDENTIFIER || t.type === TOKEN_TYPES.END_KEYWORD) {
163
+ if (t.type === TOKEN_TYPES.END_KEYWORD || t.value.trim() === end_keyword) depth_stack.pop();
164
+ break;
406
165
  }
407
166
  }
408
- // ========================================================================== //
409
- // Count Newlines and Whitespace (No Tokens) //
410
- // ========================================================================== //
411
- else if (current_char === "\n") {
412
- if (!scope_state) {
413
- advance(current_char);
414
- continue;
167
+ } else if (char === "(" && !isInAtBlockBody) {
168
+ addToken(TOKEN_TYPES.OPEN_PAREN, char); isInHeader = true;
169
+ } else if (char === ")" && isInHeader) {
170
+ addToken(TOKEN_TYPES.CLOSE_PAREN, char); isInHeader = false;
171
+ } else if (char === "@" && next === "_" && (!isInAtBlockBody || isAtBlockEnd(src, i))) {
172
+ let idPeek = ""; for (let j = i + 2; j < src.length && !/[_@:#]/.test(src[j]); j++) idPeek += src[j];
173
+ if (idPeek.trim() !== end_keyword) depth_stack.push("A");
174
+ addToken(TOKEN_TYPES.OPEN_AT, "@_"); i++; isInHeader = true;
175
+ } else if (char === "_" && next === "@" && (isInHeader || isInAtBlockBody)) {
176
+ addToken(TOKEN_TYPES.CLOSE_AT, "_@"); i++;
177
+ for (let j = tokens.length - 1; j >= 0; j--) {
178
+ const t = tokens[j];
179
+ if (t.type === TOKEN_TYPES.IDENTIFIER || t.type === TOKEN_TYPES.END_KEYWORD) {
180
+ if (t.type === TOKEN_TYPES.END_KEYWORD || t.value.trim() === end_keyword) depth_stack.pop();
181
+ break;
415
182
  }
416
183
  }
184
+ isInHeader = true; isInAtBlockBody = false;
185
+ } else if (char === ";" && isInHeader) {
186
+ addToken(TOKEN_TYPES.SEMICOLON, char); isInHeader = false; isInAtBlockBody = true;
187
+ } else if (char === "=" && isInHeader && !isInAtBlockBody) {
188
+ addToken(TOKEN_TYPES.EQUAL, char);
189
+ } else if (char === ":" && isInHeader && !isInAtBlockBody && (prev_type === TOKEN_TYPES.IDENTIFIER || prev_type === TOKEN_TYPES.CLOSE_AT)) {
190
+ addToken(TOKEN_TYPES.COLON, char);
191
+ } else if (char === "," && isInHeader) {
192
+ addToken(TOKEN_TYPES.COMMA, char);
193
+ } else if (char === "-" && next === ">" && (isInHeader || prev_type === TOKEN_TYPES.CLOSE_PAREN)) {
194
+ addToken(TOKEN_TYPES.THIN_ARROW, "->"); i++;
195
+ } else if (char === "\"" && isInHeader) {
196
+ const quote = concatQuote(src, i); addToken(TOKEN_TYPES.VALUE, quote); i += quote.length - 1;
197
+ } else if (char === "\\") {
198
+ const esc = concatEscape(src, i); addToken(TOKEN_TYPES.ESCAPE, esc); i += esc.length - 1;
199
+ } else if (char === "#" && !isInAtBlockBody) {
200
+ let comm = ""; for (; i < src.length && src[i] !== "\n"; i++) comm += src[i];
201
+ addToken(TOKEN_TYPES.COMMENT, comm, comm); i--;
202
+ } else if (char === "\n" && !isInAtBlockBody) {
203
+ advance(char);
204
+ } else {
417
205
  // ========================================================================== //
418
- // +++++++++++++++++ //
419
- // ========================================================================== //
420
- else {
421
- // ========================================================================== //
422
- // Token: Block Identifier //
423
- // ========================================================================== //
424
- if (previous_value === "[" && !scope_state) {
425
- temp_str = concatChar(src, i, ["=", "]"]);
426
- i += temp_str.length - 1;
427
- if (temp_str.trim()) {
428
- const trimmedStr = temp_str.trim();
429
- if (trimmedStr !== end_keyword) {
430
- validateIdentifier(trimmedStr, "Block Identifier");
431
- }
432
- // Add Token
433
- addToken(TOKEN_TYPES.IDENTIFIER, trimmedStr);
434
- // Update Previous Value
435
- previous_value = block_id;
436
- }
437
- }
438
- // ========================================================================== //
439
- // Token: Block Value //
440
- // ========================================================================== //
441
- else if (
442
- (previous_value === "=" ||
443
- previous_value === BLOCKCOMMA ||
444
- previous_value === BLOCKCOLON ||
445
- previous_value === block_value) &&
446
- !scope_state
447
- ) {
448
- temp_str = concatChar(src, i, ["]", "\\", ",", ":"]);
449
- i += temp_str.length - 1;
450
- const nextToken = peek(src, i, 1);
451
- if (temp_str.trim()) {
452
- // Add token
453
- switch (nextToken) {
454
- case ":":
455
- const trimmedKey = temp_str.trim();
456
- validateIdentifier(trimmedKey, "Argument Key");
457
- addToken(TOKEN_TYPES.IDENTIFIER, trimmedKey);
458
- previous_value = block_id_2;
459
- break;
460
- default:
461
- addToken(TOKEN_TYPES.VALUE, temp_str);
462
- previous_value = block_value;
463
- break;
464
- }
465
- }
466
- }
467
- // ========================================================================== //
468
- // Token: Inline Identifier //
469
- // ========================================================================== //
470
- else if (previous_value === "->" && !scope_state) {
471
- temp_str = concatChar(src, i, ["(", ")", ":"]);
472
- i += temp_str.length - 1;
473
- const nextToken = peek(src, i, 1);
474
- if (temp_str.trim()) {
475
- // Add Token
476
- switch (nextToken) {
477
- case ":":
478
- const trimmedKey = temp_str.trim();
479
- validateIdentifier(trimmedKey, "Argument Key");
480
- addToken(TOKEN_TYPES.IDENTIFIER, trimmedKey);
481
- previous_value = inline_id_2;
482
- break;
483
- default:
484
- const trimmedId = temp_str.trim();
485
- validateIdentifier(trimmedId, "Inline Identifier");
486
- addToken(TOKEN_TYPES.IDENTIFIER, trimmedId);
487
- previous_value = inline_id;
488
- break;
489
- }
490
- }
491
- }
492
- // ========================================================================== //
493
- // Token: Inline Value //
494
- // ========================================================================== //
495
- else if (
496
- (previous_value === "(" ||
497
- previous_value === INLINECOLON ||
498
- previous_value === INLINECOMMA ||
499
- previous_value === inline_value) &&
500
- !scope_state
501
- ) {
502
- temp_str = concatChar(src, i, [")", "\\", ",", previous_value === INLINECOLON ? ":" : null]);
503
- i += temp_str.length - 1;
504
- if (temp_str.trim()) {
505
- // Add Token
506
- addToken(TOKEN_TYPES.VALUE, temp_str);
507
- // Update Previous Value
508
- previous_value = inline_value;
509
- }
510
- }
511
- // ========================================================================== //
512
- // Token: At Identifier //
513
- // ========================================================================== //
514
- else if (previous_value === "@_") {
515
- temp_str = concatChar(src, i, ["_", ":"]);
516
- i += temp_str.length - 1;
517
- if (temp_str.trim()) {
518
- const trimmedStr = temp_str.trim();
519
- if (trimmedStr !== end_keyword) {
520
- validateIdentifier(trimmedStr, "At-Block Identifier");
206
+ // Capture plain text or Identifier values //
207
+ // ========================================================================== //
208
+ const isValueContext = (prev_type === TOKEN_TYPES.COLON || prev_type === TOKEN_TYPES.EQUAL);
209
+ const context = concatText(src, i, isInHeader, isInAtBlockBody, isValueContext);
210
+ if (context.length > 0) {
211
+ if (isInHeader) {
212
+ const trimmed = context.trim();
213
+ if ((prev_type === TOKEN_TYPES.OPEN_BRACKET || prev_type === TOKEN_TYPES.OPEN_AT) && trimmed === end_keyword) {
214
+ addToken(TOKEN_TYPES.END_KEYWORD, trimmed, context);
215
+ } else if (trimmed.length > 0) {
216
+ let isNextColon = false;
217
+ for (let j = i + context.length; j < src.length; j++) {
218
+ const c = src[j];
219
+ if (c === " " || c === "\t" || c === "\n") continue;
220
+ if (c === ":") isNextColon = true;
221
+ break;
521
222
  }
522
- // Add Token
523
- addToken(TOKEN_TYPES.IDENTIFIER, trimmedStr);
524
- previous_value = at_id;
525
- }
526
- }
527
- // ========================================================================== //
528
- // Token: At Value //
529
- // ========================================================================== //
530
- else if (previous_value === ATBLOCKCOLON || previous_value === ATBLOCKCOMMA || previous_value === at_value) {
531
- temp_str = concatChar(src, i, [";", "\\", ",", ":"]);
532
- i += temp_str.length - 1;
533
- const nextToken = peek(src, i, 1);
534
- if (temp_str.trim()) {
535
- switch (nextToken) {
536
- case ":":
537
- const trimmedKey = temp_str.trim();
538
- validateIdentifier(trimmedKey, "Argument Key");
539
- addToken(TOKEN_TYPES.IDENTIFIER, trimmedKey);
540
- previous_value = at_id_2;
541
- break;
542
- default:
543
- addToken(TOKEN_TYPES.VALUE, temp_str);
544
- previous_value = at_value;
545
- break;
223
+
224
+ const isBlockStart = (prev_type === TOKEN_TYPES.OPEN_BRACKET || prev_type === TOKEN_TYPES.OPEN_AT);
225
+ const isMapperHead = (prev_type === TOKEN_TYPES.OPEN_PAREN && prev_prev_type === TOKEN_TYPES.THIN_ARROW);
226
+ const isMandatoryId = (isNextColon || prev_type === TOKEN_TYPES.THIN_ARROW);
227
+
228
+ if (isBlockStart || isMapperHead || isMandatoryId) {
229
+ validateIdentifier(trimmed, character + context.indexOf(trimmed));
230
+ addToken(TOKEN_TYPES.IDENTIFIER, trimmed, context);
231
+ } else {
232
+ addToken(TOKEN_TYPES.VALUE, trimmed, context);
546
233
  }
234
+ } else {
235
+ advance(context);
547
236
  }
237
+ } else {
238
+ addToken(TOKEN_TYPES.TEXT, context);
548
239
  }
549
- // ========================================================================== //
550
- // Token:End Keyword //
551
- // ========================================================================== //
552
- else if ((previous_value === block_end && !scope_state) || previous_value === at_end) {
553
- temp_str = concatChar(src, i, ["]", "_"]);
554
- i += temp_str.length - 1;
555
- if (temp_str.trim()) {
556
- addToken(TOKEN_TYPES.END_KEYWORD, temp_str);
557
- // Update Previous Value
558
- previous_value = end_keyword;
559
- scope_state = false;
560
- }
561
- }
562
- // ========================================================================== //
563
- // Token: Comment //
564
- // ========================================================================== //
565
- else if (current_char === "#") {
566
- temp_str = concatChar(src, i, ["\n"]);
567
- if (temp_str.trim()) {
568
- i += temp_str.length - 1;
569
- addToken(TOKEN_TYPES.COMMENT, temp_str);
570
- }
571
- }
572
- // ========================================================================== //
573
- // Token: Text //
574
- // ========================================================================== //
575
- else {
576
- if (previous_value === "_@+") {
577
- // Strictly wait for semicolon or arguments on the same line.
578
- // No more heuristic lookahead.
579
- }
580
- context = concatText(src, i, scope_state, [
581
- [":", previous_value === inline_id_2],
582
- [",", previous_value === block_value || previous_value === at_value || previous_value === inline_value],
583
- [":", (previous_value === "_@+" && !scope_state) || previous_value === at_value],
584
- [";", previous_value === at_value],
585
- [")", previous_value === inline_value]
586
- ]);
587
- i += context.length - 1;
588
- if (context.trim()) {
589
- addToken(TOKEN_TYPES.TEXT, context);
590
- }
591
- }
240
+ i += context.length - 1;
241
+ } else {
242
+ addToken(TOKEN_TYPES.TEXT, char);
592
243
  }
593
- context = "";
594
- temp_str = "";
595
244
  }
596
-
597
- // Ensure EOF token
598
- const eofPos = { line, character };
599
- tokens.push({
600
- type: TOKEN_TYPES.EOF,
601
- value: "",
602
- range: { start: eofPos, end: eofPos },
603
- depth: depth_stack.length
604
- });
605
-
606
- return tokens;
607
- } else {
608
- lexerError([
609
- `{line}<$red:Invalid SomMark syntax:$> ${src === "" ? "<$yellow: Got empty string '' $>" : `<$yellow:Expected source input to be a string, got$> <$blue: '${typeof src}'$>`}{line}`
610
- ]);
611
245
  }
246
+ // ========================================================================== //
247
+ // Finalize with End-of-File token //
248
+ // ========================================================================== //
249
+ addToken(TOKEN_TYPES.EOF, "");
250
+ return tokens;
612
251
  }
613
252
 
614
253
  export default lexer;