sommark 3.1.0 → 3.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/core/lexer.js CHANGED
@@ -1,636 +1,253 @@
1
1
  import TOKEN_TYPES from "./tokenTypes.js";
2
2
  import peek from "../helpers/peek.js";
3
- import {
4
- block_value,
5
- block_id,
6
- block_id_2,
7
- block_end,
8
- inline_id,
9
- inline_value,
10
- inline_id_2,
11
- at_id,
12
- at_value,
13
- at_id_2,
14
- at_end,
15
- end_keyword,
16
- BLOCKCOMMA,
17
- ATBLOCKCOMMA,
18
- INLINECOMMA,
19
- BLOCKCOLON,
20
- ATBLOCKCOLON,
21
- INLINECOLON
22
- } from "./labels.js";
23
- import { lexerError, sommarkError } from "./errors.js";
3
+ import { end_keyword } from "./labels.js";
4
+ import { lexerError } from "./errors.js";
5
+
6
+ /**
7
+ * SomMark Lexer
8
+ */
9
+
10
+ // ========================================================================== //
11
+ // Helper Functions //
12
+ // ========================================================================== //
24
13
 
25
14
  const atBlockEndRegex = new RegExp(`^@_\\s*${end_keyword}\\s*_@`);
15
+
16
+ // Checks if we reached the end of an At-Block
26
17
  function isAtBlockEnd(input, index) {
27
18
  const slice = typeof input === "string" ? input.slice(index, index + 100) : input.slice(index, index + 100).join("");
28
19
  return atBlockEndRegex.test(slice);
29
20
  }
30
21
 
31
- const updateNewLine = text => {
32
- if (text && typeof text === "string") {
33
- return text.split("").filter(value => value === "\n").length;
22
+ // Collects characters inside a quoted string
23
+ function concatQuote(input, index) {
24
+ let text = "\"";
25
+ for (let i = index + 1; i < input.length; i++) {
26
+ const char = input[i];
27
+ if (char === "\\" && peek(input, i, 1) === "\"") {
28
+ text += "\\\"";
29
+ i++;
30
+ continue;
31
+ }
32
+ text += char;
33
+ if (char === "\"") return text;
34
34
  }
35
- return;
36
- };
37
-
38
- const updateColumn = (end = 0, textLength) => {
39
- const start = end + 1;
40
- const newEnd = start + textLength - 1;
41
- return { start, end: newEnd };
42
- };
35
+ lexerError(["[Lexer Error]: Unclosed quote"]);
36
+ return text;
37
+ }
43
38
 
44
- function concatText(input, index, scope_state, extraConditions = []) {
39
+ // Collects plain text until a special character is found
40
+ function concatText(input, index, isInHeader, isInAtBlockBody, isLiberalValue = false) {
45
41
  let text = "";
46
- if (index >= input.length) {
47
- return text;
48
- }
49
- if (
50
- (Array.isArray(input) || typeof input === "string") &&
51
- input.length > 0 &&
52
- typeof index === "number" &&
53
- typeof scope_state === "boolean"
54
- ) {
55
- for (let i = index; i < input.length; i++) {
56
- const char = input[i];
57
- const defaultConditions = [
58
- ["[", !scope_state],
59
- ["=", !scope_state],
60
- ["]", !scope_state],
61
- ["(", !scope_state],
62
- ["-", peek(input, i, 1) === ">" && !scope_state],
63
- ["@", peek(input, i, 1) === "_" && (!scope_state || isAtBlockEnd(input, i))],
64
- ["_", peek(input, i, 1) === "@" && !scope_state],
65
- ["#", !scope_state],
66
- ["\\", true]
67
- ];
68
- if (defaultConditions.some(([ch, condition]) => (!ch || ch === char) && condition)) {
69
- break;
70
- } else if (extraConditions.some(([ch, condition]) => (!ch || ch === char) && condition)) {
42
+ if (index >= input.length) return text;
43
+ for (let i = index; i < input.length; i++) {
44
+ const char = input[i];
45
+ const stopConditions = [
46
+ ["[", !isInAtBlockBody],
47
+ ["(", !isInAtBlockBody],
48
+ ["#", !isInAtBlockBody && !isLiberalValue],
49
+ ["=", isInHeader && !isInAtBlockBody],
50
+ ["\"", isInHeader],
51
+ ["]", isInHeader],
52
+ [")", isInHeader],
53
+ ["-", peek(input, i, 1) === ">" && (isInHeader || true)],
54
+ ["@", peek(input, i, 1) === "_" && (!isInAtBlockBody || isAtBlockEnd(input, i))],
55
+ ["_", peek(input, i, 1) === "@" && isInHeader],
56
+ ["\\", true],
57
+ [":", isInHeader && !isInAtBlockBody],
58
+ [";", isInHeader],
59
+ [",", isInHeader]
60
+ ];
61
+ let shouldStop = false;
62
+ for (const [stopChar, conditionMet] of stopConditions) {
63
+ if (conditionMet && input.substring(i, i + stopChar.length) === stopChar) {
64
+ shouldStop = true;
71
65
  break;
72
66
  }
73
- text += char;
74
67
  }
75
- return text;
76
- } else {
77
- sommarkError([
78
- "{line}<$red:Invalid Arguments:$> <$yellow:Assign arguments to their correct types, ",
79
- "'input' must be an array and have to be not empty, 'index' must be a number value, and 'scope_state' ",
80
- "must be a boolean.$>{line}."
81
- ]);
68
+ if (shouldStop) break;
69
+ text += char;
82
70
  }
71
+ return text;
83
72
  }
84
73
 
74
+ // Handles backslash escapes in the text
85
75
  function concatEscape(input, index) {
86
- let str = "";
87
- if (index >= input.length) {
88
- return str;
89
- }
90
- const WHITESPACES = [
91
- " ",
92
- "\t",
93
- "\n",
94
- "\r",
95
- "\v",
96
- "\f",
97
- //+++++++//
98
- "\u00A0",
99
- "\u1680",
100
- "\u2000",
101
- "\u2001",
102
- "\u2002",
103
- "\u2003",
104
- "\u2004",
105
- "\u2005",
106
- "\u2006",
107
- "\u2007",
108
- "\u2008",
109
- "\u2009",
110
- "\u200A",
111
- "\u202F",
112
- "\u205F",
113
- "\u3000"
114
- ];
115
- let WHITESPACE_SET = new Set(WHITESPACES);
116
- if ((Array.isArray(input) || typeof input === "string") && input.length > 0 && typeof index === "number") {
117
- const nextChar = peek(input, index, 1);
118
- if (input[index] === "\\" && nextChar !== null) {
119
- str += "\\" + nextChar;
120
- } else {
121
- lexerError([
122
- "{line}<$red:Invalid escape sequence$>{N}",
123
- "<$yellow:Escape character '\\' must be followed immediately by a character.$>{N}",
124
- nextChar === null ? "<$yellow:Found end of file after escape character$>" : "<$yellow:Missing character after escape character$>",
125
- "{line}"
126
- ]);
127
- }
128
- if (WHITESPACE_SET.has(str[1])) {
129
- const matchedCharacter = Array.from(WHITESPACE_SET).find(ch => ch === str[1]);
130
- lexerError([
131
- "{line}<$red:Invalid escape sequence$>{N}",
132
- "<$yellow:Escape character '\\' must be followed immediately by a character.$>{N}",
133
- `<$yellow:Found$> <$blue:${JSON.stringify(matchedCharacter)}$> <$yellow:after escape character$>{N}`,
134
- "{line}"
135
- ]);
136
- }
137
- return str;
138
- } else {
139
- sommarkError([
140
- "{line}<$red:Invalid Arguments:$> <$yellow:Assign arguments to their correct types, ",
141
- "'input' must be an array and have to be not empty, and 'index' must be a number value.$>{line}"
142
- ]);
143
- }
76
+ if (index >= input.length) return "";
77
+ const nextChar = peek(input, index, 1);
78
+ const WHITESPACES = [" ", "\t", "\n", "\r", "\v", "\f"];
79
+ if (WHITESPACES.includes(nextChar)) lexerError(["[Lexer Error]: Invalid escape sequence (escaped whitespace)"]);
80
+ if (input[index] === "\\" && nextChar !== null) return "\\" + nextChar;
81
+ lexerError(["[Lexer Error]: Invalid escape sequence"]);
82
+ return "";
144
83
  }
145
84
 
146
- function concatChar(input, index, stop_at_char) {
147
- if ((Array.isArray(input) || typeof input === "string") && input.length > 0 && typeof index === "number") {
148
- let str = "";
149
- if (index >= input.length) {
150
- return str;
151
- }
152
- if (Array.isArray(stop_at_char) && stop_at_char.length > 0) {
153
- for (let i = index; i < input.length; i++) {
154
- const char = input[i];
155
- if (stop_at_char.includes(char)) {
156
- break;
157
- }
158
- str += char;
159
- }
160
- } else {
161
- sommarkError([
162
- "{line}<$red:Invalid Type:$> <$yellow:Argument 'stop_at_char' must be an array and have to be not empty array$>{line}"
163
- ]);
164
- }
165
- return str;
166
- } else {
167
- sommarkError([
168
- "{line}<$red:Invalid Arguments:$> <$yellow:Assign arguments to their correct types, ",
169
- "'input' must be an array and have to be not empty, 'index' must be a number value$>{line}"
170
- ]);
171
- }
172
- }
85
+ // ========================================================================== //
86
+ // Main Lexer Function //
87
+ // ========================================================================== //
173
88
 
174
89
  function lexer(src) {
175
- if (src && typeof src === "string") {
176
- const tokens = [];
177
- let scope_state = false;
178
- let line = 1;
179
- let start = 1;
180
- let end = 0;
181
- let depth_stack = [];
182
- let context = "",
183
- temp_str = "",
184
- previous_value = "";
90
+ if (!src || typeof src !== "string") return [];
91
+ const tokens = [];
92
+ let isInHeader = false, isInAtBlockBody = false;
93
+ let line = 0, character = 0, depth_stack = [];
94
+
95
+ // ========================================================================== //
96
+ // Token Creation Helpers //
97
+ // ========================================================================== //
185
98
 
186
- function validateIdentifier(id, type = "Identifier") {
187
- if (!/^[a-zA-Z0-9\-_$]+$/.test(id.trim())) {
188
- lexerError([
189
- `{line}<$red:Invalid ${type}:$>{N}`,
190
- `<$yellow:Identifiers can only contain letters, numbers, underscores (_), dollar signs, and hyphens (-). Got$> <$blue:'${id.trim()}'$>{N}`,
191
- "{line}"
192
- ]);
99
+ function addToken(type, value, rawValue) {
100
+ if (typeof rawValue === "string" && typeof value === "string" && rawValue !== value) {
101
+ const offset = rawValue.indexOf(value);
102
+ if (offset !== -1) {
103
+ advance(rawValue.slice(0, offset));
104
+ const startPos = { line, character }; advance(value);
105
+ const endPos = { line, character };
106
+ tokens.push({ type, value, range: { start: startPos, end: endPos }, depth: depth_stack.length });
107
+ advance(rawValue.slice(offset + value.length));
108
+ return;
193
109
  }
194
110
  }
111
+ const startPos = { line, character }; advance(rawValue || value);
112
+ const endPos = { line, character };
113
+ tokens.push({ type, value, range: { start: startPos, end: endPos }, depth: depth_stack.length });
114
+ }
195
115
 
196
- function addToken(type, value) {
197
- tokens.push({ type, value, line, start, end, depth: depth_stack.length });
116
+ function advance(text) {
117
+ const newlines = (text.match(/\n/g) || []).length;
118
+ if (newlines > 0) { line += newlines; character = text.split("\n").pop().length; }
119
+ else character += text.length;
120
+ }
121
+
122
+ function validateIdentifier(id, charPos) {
123
+ if (!/^[a-zA-Z0-9\-_$]+$/.test(id.trim())) {
124
+ lexerError([`[Lexer Error]: Invalid Identifier: '${id.trim()}' at line ${line + 1}, col ${charPos || character}`]);
198
125
  }
126
+ }
199
127
 
200
- const updateMetadata = text => {
201
- const newlines = updateNewLine(text) || 0;
202
- if (newlines > 0) {
203
- const lines = text.split("\n");
204
- const lastLineLength = lines[lines.length - 1].length;
205
- start = end + 1;
206
- end = lastLineLength;
207
- line += newlines;
208
- } else {
209
- const cols = updateColumn(end, text.length);
210
- start = cols.start;
211
- end = cols.end;
212
- }
213
- };
128
+ // ========================================================================== //
129
+ // Main Tokenization Loop //
130
+ // ========================================================================== //
214
131
 
215
- for (let i = 0; i < src.length; i++) {
216
- let current_char = src[i];
217
- // ========================================================================== //
218
- // Token: Open Bracket //
219
- // ========================================================================== //
220
- if (current_char === "[" && !scope_state && previous_value !== "(") {
221
- // Update Metadata
222
- updateMetadata(current_char);
223
- // i + 1 -> skip current character
224
- temp_str = concatChar(src, i + 1, ["]"]);
225
- if (temp_str && temp_str.length > 0) {
226
- if (temp_str.trim() !== end_keyword) {
227
- depth_stack.push("Block");
228
- }
229
- }
230
- addToken(TOKEN_TYPES.OPEN_BRACKET, current_char);
231
- // is next token end keyword?
232
- if (temp_str.trim() === end_keyword) {
233
- previous_value = block_end;
234
- } else {
235
- previous_value = current_char;
236
- }
237
- }
238
- // ========================================================================== //
239
- // Token: Equal Sign //
240
- // ========================================================================== //
241
- else if (current_char === "=" && !scope_state) {
242
- // Update Metadata
243
- updateMetadata(current_char);
244
- addToken(TOKEN_TYPES.EQUAL, current_char);
245
- previous_value = current_char;
246
- }
247
- // ========================================================================== //
248
- // Token: Close Bracket //
249
- // ========================================================================== //
250
- else if (current_char === "]" && !scope_state) {
251
- // Update Metadata
252
- updateMetadata(current_char);
253
- addToken(TOKEN_TYPES.CLOSE_BRACKET, current_char);
254
- if (previous_value === end_keyword) {
255
- depth_stack.pop();
256
- }
257
- previous_value = current_char;
258
- }
259
- // ========================================================================== //
260
- // Token: Open Parenthesis '(' //
261
- // ========================================================================== //
262
- else if (current_char === "(" && !scope_state) {
263
- // Update Metadata
264
- updateMetadata(current_char);
265
- addToken(TOKEN_TYPES.OPEN_PAREN, current_char);
266
- if (previous_value !== "->") {
267
- previous_value = current_char;
268
- }
269
- }
270
- // ========================================================================== //
271
- // Token: Thin Arrow '->' //
272
- // ========================================================================== //
273
- else if (current_char === "-" && peek(src, i, 1) === ">") {
274
- temp_str = current_char + peek(src, i, 1);
275
- i += temp_str.length - 1;
276
- // Update Metadata
277
- updateMetadata(temp_str);
278
- addToken(TOKEN_TYPES.THIN_ARROW, temp_str);
279
- previous_value = temp_str;
280
- }
281
- // ========================================================================== //
282
- // Token: Close Parenthesis ')' //
283
- // ========================================================================== //
284
- else if (current_char === ")" && !scope_state) {
285
- // Update Metadata
286
- updateMetadata(current_char);
287
- addToken(TOKEN_TYPES.CLOSE_PAREN, current_char);
288
- previous_value = current_char;
289
- }
290
- // ========================================================================== //
291
- // Token: Open At '@_' //
292
- // ========================================================================== //
293
- else if (
294
- current_char === "@" &&
295
- peek(src, i, 1) === "_" &&
296
- (!scope_state || isAtBlockEnd(src, i))
297
- ) {
298
- temp_str = current_char + peek(src, i, 1);
299
- i += temp_str.length - 1;
300
- // Update Metadata
301
- updateMetadata(temp_str);
302
- addToken(TOKEN_TYPES.OPEN_AT, temp_str);
303
- // is next token end keyword?
304
- if (isAtBlockEnd(src, i - 1)) {
305
- previous_value = at_end;
306
- } else {
307
- previous_value = temp_str;
308
- }
309
- }
310
- // ========================================================================== //
311
- // Token: Close At '_@' //
312
- // ========================================================================== //
313
- else if (current_char === "_" && peek(src, i, 1) === "@") {
314
- temp_str = current_char + peek(src, i, 1);
315
- i += temp_str.length - 1;
316
- // Update Metadata
317
- updateMetadata(temp_str);
318
- addToken(TOKEN_TYPES.CLOSE_AT, temp_str);
319
- switch (previous_value) {
320
- case at_id:
321
- previous_value = temp_str + "+";
322
- break;
323
- default:
324
- previous_value = temp_str;
325
- break;
326
- }
327
- }
328
- // ========================================================================== //
329
- // Token: Colon ':' //
330
- // ========================================================================== //
331
- else if (
332
- current_char === ":" &&
333
- (previous_value === "_@+" ||
334
- previous_value === BLOCKCOMMA ||
335
- previous_value === block_id_2 ||
336
- previous_value === inline_id_2 ||
337
- previous_value === at_id_2 ||
338
- previous_value === at_value ||
339
- previous_value === BLOCKCOLON ||
340
- previous_value === ATBLOCKCOLON ||
341
- previous_value === INLINECOLON) &&
342
- !scope_state
343
- ) {
344
- // Update Metadata
345
- updateMetadata(current_char);
346
- addToken(TOKEN_TYPES.COLON, current_char);
347
- switch (previous_value) {
348
- case block_id_2:
349
- previous_value = BLOCKCOLON;
350
- break;
351
- case "_@+":
352
- previous_value = ATBLOCKCOLON;
353
- break;
354
- case at_id_2:
355
- previous_value = ATBLOCKCOLON;
356
- break;
357
- case inline_id_2:
358
- previous_value = INLINECOLON;
359
- break;
360
- }
361
- }
362
- // ========================================================================== //
363
- // Token: Comma ',' //
364
- // ========================================================================== //
365
- else if (
366
- current_char === "," &&
367
- (previous_value === block_value ||
368
- previous_value === at_value ||
369
- previous_value === inline_value ||
370
- previous_value === BLOCKCOMMA ||
371
- previous_value === ATBLOCKCOMMA ||
372
- previous_value === INLINECOMMA)
373
- ) {
374
- // Update Metadata
375
- updateMetadata(current_char);
376
- addToken(TOKEN_TYPES.COMMA, current_char);
377
- switch (previous_value) {
378
- case "=":
379
- previous_value = BLOCKCOMMA;
380
- break;
381
- case block_value:
382
- previous_value = BLOCKCOMMA;
383
- break;
384
- case at_value:
385
- previous_value = ATBLOCKCOMMA;
386
- break;
387
- case inline_value:
388
- previous_value = INLINECOMMA;
389
- break;
390
- }
132
+ for (let i = 0; i < src.length; i++) {
133
+ const char = src[i];
134
+ const next = peek(src, i, 1);
135
+
136
+ // ========================================================================== //
137
+ // Look back at previous tokens to determine current context //
138
+ // ========================================================================== //
139
+ let prev_type = "", prev_prev_type = "", count = 0;
140
+ for (let j = tokens.length - 1; j >= 0; j--) {
141
+ const t = tokens[j];
142
+ if (t.type !== TOKEN_TYPES.TEXT && t.type !== TOKEN_TYPES.COMMENT) {
143
+ if (count === 0) prev_type = t.type;
144
+ else if (count === 1) prev_prev_type = t.type;
145
+ count++; if (count >= 2) break;
391
146
  }
392
- // ========================================================================== //
393
- // Token: Semi-colon ';' //
394
- // ========================================================================== //
395
- else if (
396
- (current_char === ";" && previous_value === at_value) ||
397
- (current_char === ";" && previous_value === "_@+") || // New: Allow semicolon directly after identifier
398
- (current_char === ";" && previous_value === ";") ||
399
- (current_char === ";" && previous_value === ATBLOCKCOMMA)
400
- ) {
401
- // Update Metadata
402
- updateMetadata(current_char);
403
- addToken(TOKEN_TYPES.SEMICOLON, current_char);
404
- scope_state = true;
405
- previous_value = current_char;
406
- }
407
- // ========================================================================== //
408
- // Token: Escape Character '\' //
409
- // ========================================================================== //
410
- else if (current_char === "\\") {
411
- temp_str = concatEscape(src, i);
412
- i += temp_str.length - 1;
413
- updateMetadata(temp_str);
414
- temp_str = temp_str.trim();
415
- if (temp_str && temp_str.length > 0) {
416
- // Add Token
417
- addToken(TOKEN_TYPES.ESCAPE, temp_str);
147
+ }
148
+
149
+ // ========================================================================== //
150
+ // Check for structural characters ([ ], ( ), @_, _@) //
151
+ // ========================================================================== //
152
+
153
+ if (char === "[" && !isInAtBlockBody) {
154
+ let idPeek = ""; for (let j = i + 1; j < src.length && !/[=\]:#]/.test(src[j]); j++) idPeek += src[j];
155
+ if (idPeek.trim() !== end_keyword) depth_stack.push("B");
156
+ addToken(TOKEN_TYPES.OPEN_BRACKET, char); isInHeader = true;
157
+ } else if (char === "]" && isInHeader) {
158
+ addToken(TOKEN_TYPES.CLOSE_BRACKET, char); isInHeader = false;
159
+ // Reliable depth pop on [end]
160
+ for (let j = tokens.length - 1; j >= 0; j--) {
161
+ const t = tokens[j];
162
+ if (t.type === TOKEN_TYPES.IDENTIFIER || t.type === TOKEN_TYPES.END_KEYWORD) {
163
+ if (t.type === TOKEN_TYPES.END_KEYWORD || t.value.trim() === end_keyword) depth_stack.pop();
164
+ break;
418
165
  }
419
166
  }
420
- // ========================================================================== //
421
- // Count Newlines //
422
- // ========================================================================== //
423
- else if (current_char === "\n") {
424
- if (!scope_state) {
425
- line++;
426
- start = 1;
427
- end = 0;
428
- continue;
167
+ } else if (char === "(" && !isInAtBlockBody) {
168
+ addToken(TOKEN_TYPES.OPEN_PAREN, char); isInHeader = true;
169
+ } else if (char === ")" && isInHeader) {
170
+ addToken(TOKEN_TYPES.CLOSE_PAREN, char); isInHeader = false;
171
+ } else if (char === "@" && next === "_" && (!isInAtBlockBody || isAtBlockEnd(src, i))) {
172
+ let idPeek = ""; for (let j = i + 2; j < src.length && !/[_@:#]/.test(src[j]); j++) idPeek += src[j];
173
+ if (idPeek.trim() !== end_keyword) depth_stack.push("A");
174
+ addToken(TOKEN_TYPES.OPEN_AT, "@_"); i++; isInHeader = true;
175
+ } else if (char === "_" && next === "@" && (isInHeader || isInAtBlockBody)) {
176
+ addToken(TOKEN_TYPES.CLOSE_AT, "_@"); i++;
177
+ for (let j = tokens.length - 1; j >= 0; j--) {
178
+ const t = tokens[j];
179
+ if (t.type === TOKEN_TYPES.IDENTIFIER || t.type === TOKEN_TYPES.END_KEYWORD) {
180
+ if (t.type === TOKEN_TYPES.END_KEYWORD || t.value.trim() === end_keyword) depth_stack.pop();
181
+ break;
429
182
  }
430
183
  }
184
+ isInHeader = true; isInAtBlockBody = false;
185
+ } else if (char === ";" && isInHeader) {
186
+ addToken(TOKEN_TYPES.SEMICOLON, char); isInHeader = false; isInAtBlockBody = true;
187
+ } else if (char === "=" && isInHeader && !isInAtBlockBody) {
188
+ addToken(TOKEN_TYPES.EQUAL, char);
189
+ } else if (char === ":" && isInHeader && !isInAtBlockBody && (prev_type === TOKEN_TYPES.IDENTIFIER || prev_type === TOKEN_TYPES.CLOSE_AT)) {
190
+ addToken(TOKEN_TYPES.COLON, char);
191
+ } else if (char === "," && isInHeader) {
192
+ addToken(TOKEN_TYPES.COMMA, char);
193
+ } else if (char === "-" && next === ">" && (isInHeader || prev_type === TOKEN_TYPES.CLOSE_PAREN)) {
194
+ addToken(TOKEN_TYPES.THIN_ARROW, "->"); i++;
195
+ } else if (char === "\"" && isInHeader) {
196
+ const quote = concatQuote(src, i); addToken(TOKEN_TYPES.VALUE, quote); i += quote.length - 1;
197
+ } else if (char === "\\") {
198
+ const esc = concatEscape(src, i); addToken(TOKEN_TYPES.ESCAPE, esc); i += esc.length - 1;
199
+ } else if (char === "#" && !isInAtBlockBody) {
200
+ let comm = ""; for (; i < src.length && src[i] !== "\n"; i++) comm += src[i];
201
+ addToken(TOKEN_TYPES.COMMENT, comm, comm); i--;
202
+ } else if (char === "\n" && !isInAtBlockBody) {
203
+ advance(char);
204
+ } else {
431
205
  // ========================================================================== //
432
- // +++++++++++++++++ //
433
- // ========================================================================== //
434
- else {
435
- // ========================================================================== //
436
- // Token: Block Identifier //
437
- // ========================================================================== //
438
- if (previous_value === "[" && !scope_state) {
439
- temp_str = concatChar(src, i, ["=", "]"]);
440
- i += temp_str.length - 1;
441
- // Update Metadata
442
- updateMetadata(temp_str);
443
- if (temp_str.trim()) {
444
- const trimmedStr = temp_str.trim();
445
- if (trimmedStr !== end_keyword) {
446
- validateIdentifier(trimmedStr, "Block Identifier");
447
- }
448
- // Add Token
449
- addToken(TOKEN_TYPES.IDENTIFIER, trimmedStr);
450
- // Update Previous Value
451
- previous_value = block_id;
452
- }
453
- }
454
- // ========================================================================== //
455
- // Token: Block Value //
456
- // ========================================================================== //
457
- else if (
458
- (previous_value === "=" ||
459
- previous_value === BLOCKCOMMA ||
460
- previous_value === BLOCKCOLON ||
461
- previous_value === block_value) &&
462
- !scope_state
463
- ) {
464
- temp_str = concatChar(src, i, ["]", "\\", ",", ":"]);
465
- i += temp_str.length - 1;
466
- const nextToken = peek(src, i, 1);
467
- // Update Metadata
468
- updateMetadata(temp_str);
469
- if (temp_str.trim()) {
470
- // Add token
471
- switch (nextToken) {
472
- case ":":
473
- const trimmedKey = temp_str.trim();
474
- validateIdentifier(trimmedKey, "Argument Key");
475
- addToken(TOKEN_TYPES.IDENTIFIER, trimmedKey);
476
- previous_value = block_id_2;
477
- break;
478
- default:
479
- addToken(TOKEN_TYPES.VALUE, temp_str);
480
- previous_value = block_value;
481
- break;
482
- }
483
- }
484
- }
485
- // ========================================================================== //
486
- // Token: Inline Identifier //
487
- // ========================================================================== //
488
- else if (previous_value === "->" && !scope_state) {
489
- temp_str = concatChar(src, i, ["(", ")", ":"]);
490
- i += temp_str.length - 1;
491
- const nextToken = peek(src, i, 1);
492
- // Update Metadata
493
- updateMetadata(temp_str);
494
- if (temp_str.trim()) {
495
- // Add Token
496
- switch (nextToken) {
497
- case ":":
498
- const trimmedKey = temp_str.trim();
499
- validateIdentifier(trimmedKey, "Argument Key");
500
- addToken(TOKEN_TYPES.IDENTIFIER, trimmedKey);
501
- previous_value = inline_id_2;
502
- break;
503
- default:
504
- const trimmedId = temp_str.trim();
505
- validateIdentifier(trimmedId, "Inline Identifier");
506
- addToken(TOKEN_TYPES.IDENTIFIER, trimmedId);
507
- previous_value = inline_id;
508
- break;
509
- }
510
- }
511
- }
512
- // ========================================================================== //
513
- // Token: Inline Value //
514
- // ========================================================================== //
515
- else if (
516
- (previous_value === "(" ||
517
- previous_value === INLINECOLON ||
518
- previous_value === INLINECOMMA ||
519
- previous_value === inline_value) &&
520
- !scope_state
521
- ) {
522
- temp_str = concatChar(src, i, [")", "\\", ",", previous_value === INLINECOLON ? ":" : null]);
523
- i += temp_str.length - 1;
524
- // Update Metadata
525
- updateMetadata(temp_str);
526
- if (temp_str.trim()) {
527
- // Add Token
528
- addToken(TOKEN_TYPES.VALUE, temp_str);
529
- // Update Previous Value
530
- previous_value = inline_value;
531
- }
532
- }
533
- // ========================================================================== //
534
- // Token: At Identifier //
535
- // ========================================================================== //
536
- else if (previous_value === "@_") {
537
- temp_str = concatChar(src, i, ["_", ":"]);
538
- i += temp_str.length - 1;
539
- // Update Metadata
540
- updateMetadata(temp_str);
541
- if (temp_str.trim()) {
542
- const trimmedStr = temp_str.trim();
543
- if (trimmedStr !== end_keyword) {
544
- validateIdentifier(trimmedStr, "At-Block Identifier");
206
+ // Capture plain text or Identifier values //
207
+ // ========================================================================== //
208
+ const isValueContext = (prev_type === TOKEN_TYPES.COLON || prev_type === TOKEN_TYPES.EQUAL);
209
+ const context = concatText(src, i, isInHeader, isInAtBlockBody, isValueContext);
210
+ if (context.length > 0) {
211
+ if (isInHeader) {
212
+ const trimmed = context.trim();
213
+ if ((prev_type === TOKEN_TYPES.OPEN_BRACKET || prev_type === TOKEN_TYPES.OPEN_AT) && trimmed === end_keyword) {
214
+ addToken(TOKEN_TYPES.END_KEYWORD, trimmed, context);
215
+ } else if (trimmed.length > 0) {
216
+ let isNextColon = false;
217
+ for (let j = i + context.length; j < src.length; j++) {
218
+ const c = src[j];
219
+ if (c === " " || c === "\t" || c === "\n") continue;
220
+ if (c === ":") isNextColon = true;
221
+ break;
545
222
  }
546
- // Add Token
547
- addToken(TOKEN_TYPES.IDENTIFIER, trimmedStr);
548
- previous_value = at_id;
549
- }
550
- }
551
- // ========================================================================== //
552
- // Token: At Value //
553
- // ========================================================================== //
554
- else if (previous_value === ATBLOCKCOLON || previous_value === ATBLOCKCOMMA || previous_value === at_value) {
555
- temp_str = concatChar(src, i, [";", "\\", ",", ":"]);
556
- i += temp_str.length - 1;
557
- const nextToken = peek(src, i, 1);
558
- // Update Metadata
559
- updateMetadata(temp_str);
560
- if (temp_str.trim()) {
561
- switch (nextToken) {
562
- case ":":
563
- const trimmedKey = temp_str.trim();
564
- validateIdentifier(trimmedKey, "Argument Key");
565
- addToken(TOKEN_TYPES.IDENTIFIER, trimmedKey);
566
- previous_value = at_id_2;
567
- break;
568
- default:
569
- addToken(TOKEN_TYPES.VALUE, temp_str);
570
- previous_value = at_value;
571
- break;
223
+
224
+ const isBlockStart = (prev_type === TOKEN_TYPES.OPEN_BRACKET || prev_type === TOKEN_TYPES.OPEN_AT);
225
+ const isMapperHead = (prev_type === TOKEN_TYPES.OPEN_PAREN && prev_prev_type === TOKEN_TYPES.THIN_ARROW);
226
+ const isMandatoryId = (isNextColon || prev_type === TOKEN_TYPES.THIN_ARROW);
227
+
228
+ if (isBlockStart || isMapperHead || isMandatoryId) {
229
+ validateIdentifier(trimmed, character + context.indexOf(trimmed));
230
+ addToken(TOKEN_TYPES.IDENTIFIER, trimmed, context);
231
+ } else {
232
+ addToken(TOKEN_TYPES.VALUE, trimmed, context);
572
233
  }
234
+ } else {
235
+ advance(context);
573
236
  }
237
+ } else {
238
+ addToken(TOKEN_TYPES.TEXT, context);
574
239
  }
575
- // ========================================================================== //
576
- // Token:End Keyword //
577
- // ========================================================================== //
578
- else if ((previous_value === block_end && !scope_state) || previous_value === at_end) {
579
- temp_str = concatChar(src, i, ["]", "_"]);
580
- i += temp_str.length - 1;
581
- // Update Metadata
582
- updateMetadata(temp_str);
583
- if (temp_str.trim()) {
584
- addToken(TOKEN_TYPES.END_KEYWORD, temp_str);
585
- // Update Previous Value
586
- previous_value = end_keyword;
587
- scope_state = false;
588
- }
589
- }
590
- // ========================================================================== //
591
- // Token: Comment //
592
- // ========================================================================== //
593
- else if (current_char === "#") {
594
- temp_str = concatChar(src, i, ["\n"]);
595
- // Update Metadata
596
- updateMetadata(temp_str);
597
- if (temp_str.trim()) {
598
- i += temp_str.length - 1;
599
- addToken(TOKEN_TYPES.COMMENT, temp_str);
600
- }
601
- }
602
- // ========================================================================== //
603
- // Token: Text //
604
- // ========================================================================== //
605
- else {
606
- if (previous_value === "_@+") {
607
- // Strictly wait for semicolon or arguments on the same line.
608
- // No more heuristic lookahead.
609
- }
610
- context = concatText(src, i, scope_state, [
611
- [":", previous_value === inline_id_2],
612
- [",", previous_value === block_value || previous_value === at_value || previous_value === inline_value],
613
- [":", (previous_value === "_@+" && !scope_state) || previous_value === at_value],
614
- [";", previous_value === at_value],
615
- [")", previous_value === inline_value]
616
- ]);
617
- i += context.length - 1;
618
- // Update Metadata
619
- updateMetadata(context);
620
- if (context.trim()) {
621
- addToken(TOKEN_TYPES.TEXT, context);
622
- }
623
- }
240
+ i += context.length - 1;
241
+ } else {
242
+ addToken(TOKEN_TYPES.TEXT, char);
624
243
  }
625
- context = "";
626
- temp_str = "";
627
244
  }
628
- return tokens;
629
- } else {
630
- lexerError([
631
- `{line}<$red:Invalid SomMark syntax:$> ${src === "" ? "<$yellow: Got empty string '' $>" : `<$yellow:Expected source input to be a string, got$> <$blue: '${typeof src}'$>`}{line}`
632
- ]);
633
245
  }
246
+ // ========================================================================== //
247
+ // Finalize with End-of-File token //
248
+ // ========================================================================== //
249
+ addToken(TOKEN_TYPES.EOF, "");
250
+ return tokens;
634
251
  }
635
252
 
636
253
  export default lexer;