sommark 3.1.0 → 3.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/core/lexer.js CHANGED
@@ -175,9 +175,8 @@ function lexer(src) {
175
175
  if (src && typeof src === "string") {
176
176
  const tokens = [];
177
177
  let scope_state = false;
178
- let line = 1;
179
- let start = 1;
180
- let end = 0;
178
+ let line = 0;
179
+ let character = 0;
181
180
  let depth_stack = [];
182
181
  let context = "",
183
182
  temp_str = "",
@@ -194,23 +193,36 @@ function lexer(src) {
194
193
  }
195
194
 
196
195
  function addToken(type, value) {
197
- tokens.push({ type, value, line, start, end, depth: depth_stack.length });
196
+ const startPos = { line, character };
197
+ // Update position based on value length and newlines
198
+ const newlines = (value.match(/\n/g) || []).length;
199
+ if (newlines > 0) {
200
+ line += newlines;
201
+ const parts = value.split("\n");
202
+ character = parts[parts.length - 1].length;
203
+ } else {
204
+ character += value.length;
205
+ }
206
+ const endPos = { line, character };
207
+ tokens.push({
208
+ type,
209
+ value,
210
+ range: { start: startPos, end: endPos },
211
+ depth: depth_stack.length
212
+ });
198
213
  }
199
214
 
200
- const updateMetadata = text => {
201
- const newlines = updateNewLine(text) || 0;
215
+ // Helper to advance position without adding a token (e.g., for whitespace/newlines that don't emit tokens)
216
+ function advance(text) {
217
+ const newlines = (text.match(/\n/g) || []).length;
202
218
  if (newlines > 0) {
203
- const lines = text.split("\n");
204
- const lastLineLength = lines[lines.length - 1].length;
205
- start = end + 1;
206
- end = lastLineLength;
207
219
  line += newlines;
220
+ const parts = text.split("\n");
221
+ character = parts[parts.length - 1].length;
208
222
  } else {
209
- const cols = updateColumn(end, text.length);
210
- start = cols.start;
211
- end = cols.end;
223
+ character += text.length;
212
224
  }
213
- };
225
+ }
214
226
 
215
227
  for (let i = 0; i < src.length; i++) {
216
228
  let current_char = src[i];
@@ -218,8 +230,6 @@ function lexer(src) {
218
230
  // Token: Open Bracket //
219
231
  // ========================================================================== //
220
232
  if (current_char === "[" && !scope_state && previous_value !== "(") {
221
- // Update Metadata
222
- updateMetadata(current_char);
223
233
  // i + 1 -> skip current character
224
234
  temp_str = concatChar(src, i + 1, ["]"]);
225
235
  if (temp_str && temp_str.length > 0) {
@@ -239,8 +249,6 @@ function lexer(src) {
239
249
  // Token: Equal Sign //
240
250
  // ========================================================================== //
241
251
  else if (current_char === "=" && !scope_state) {
242
- // Update Metadata
243
- updateMetadata(current_char);
244
252
  addToken(TOKEN_TYPES.EQUAL, current_char);
245
253
  previous_value = current_char;
246
254
  }
@@ -248,8 +256,6 @@ function lexer(src) {
248
256
  // Token: Close Bracket //
249
257
  // ========================================================================== //
250
258
  else if (current_char === "]" && !scope_state) {
251
- // Update Metadata
252
- updateMetadata(current_char);
253
259
  addToken(TOKEN_TYPES.CLOSE_BRACKET, current_char);
254
260
  if (previous_value === end_keyword) {
255
261
  depth_stack.pop();
@@ -260,8 +266,6 @@ function lexer(src) {
260
266
  // Token: Open Parenthesis '(' //
261
267
  // ========================================================================== //
262
268
  else if (current_char === "(" && !scope_state) {
263
- // Update Metadata
264
- updateMetadata(current_char);
265
269
  addToken(TOKEN_TYPES.OPEN_PAREN, current_char);
266
270
  if (previous_value !== "->") {
267
271
  previous_value = current_char;
@@ -273,8 +277,6 @@ function lexer(src) {
273
277
  else if (current_char === "-" && peek(src, i, 1) === ">") {
274
278
  temp_str = current_char + peek(src, i, 1);
275
279
  i += temp_str.length - 1;
276
- // Update Metadata
277
- updateMetadata(temp_str);
278
280
  addToken(TOKEN_TYPES.THIN_ARROW, temp_str);
279
281
  previous_value = temp_str;
280
282
  }
@@ -282,8 +284,6 @@ function lexer(src) {
282
284
  // Token: Close Parenthesis ')' //
283
285
  // ========================================================================== //
284
286
  else if (current_char === ")" && !scope_state) {
285
- // Update Metadata
286
- updateMetadata(current_char);
287
287
  addToken(TOKEN_TYPES.CLOSE_PAREN, current_char);
288
288
  previous_value = current_char;
289
289
  }
@@ -297,8 +297,6 @@ function lexer(src) {
297
297
  ) {
298
298
  temp_str = current_char + peek(src, i, 1);
299
299
  i += temp_str.length - 1;
300
- // Update Metadata
301
- updateMetadata(temp_str);
302
300
  addToken(TOKEN_TYPES.OPEN_AT, temp_str);
303
301
  // is next token end keyword?
304
302
  if (isAtBlockEnd(src, i - 1)) {
@@ -313,8 +311,6 @@ function lexer(src) {
313
311
  else if (current_char === "_" && peek(src, i, 1) === "@") {
314
312
  temp_str = current_char + peek(src, i, 1);
315
313
  i += temp_str.length - 1;
316
- // Update Metadata
317
- updateMetadata(temp_str);
318
314
  addToken(TOKEN_TYPES.CLOSE_AT, temp_str);
319
315
  switch (previous_value) {
320
316
  case at_id:
@@ -341,8 +337,6 @@ function lexer(src) {
341
337
  previous_value === INLINECOLON) &&
342
338
  !scope_state
343
339
  ) {
344
- // Update Metadata
345
- updateMetadata(current_char);
346
340
  addToken(TOKEN_TYPES.COLON, current_char);
347
341
  switch (previous_value) {
348
342
  case block_id_2:
@@ -371,8 +365,6 @@ function lexer(src) {
371
365
  previous_value === ATBLOCKCOMMA ||
372
366
  previous_value === INLINECOMMA)
373
367
  ) {
374
- // Update Metadata
375
- updateMetadata(current_char);
376
368
  addToken(TOKEN_TYPES.COMMA, current_char);
377
369
  switch (previous_value) {
378
370
  case "=":
@@ -398,8 +390,6 @@ function lexer(src) {
398
390
  (current_char === ";" && previous_value === ";") ||
399
391
  (current_char === ";" && previous_value === ATBLOCKCOMMA)
400
392
  ) {
401
- // Update Metadata
402
- updateMetadata(current_char);
403
393
  addToken(TOKEN_TYPES.SEMICOLON, current_char);
404
394
  scope_state = true;
405
395
  previous_value = current_char;
@@ -410,21 +400,17 @@ function lexer(src) {
410
400
  else if (current_char === "\\") {
411
401
  temp_str = concatEscape(src, i);
412
402
  i += temp_str.length - 1;
413
- updateMetadata(temp_str);
414
403
  temp_str = temp_str.trim();
415
404
  if (temp_str && temp_str.length > 0) {
416
- // Add Token
417
405
  addToken(TOKEN_TYPES.ESCAPE, temp_str);
418
406
  }
419
407
  }
420
408
  // ========================================================================== //
421
- // Count Newlines //
409
+ // Count Newlines and Whitespace (No Tokens) //
422
410
  // ========================================================================== //
423
411
  else if (current_char === "\n") {
424
412
  if (!scope_state) {
425
- line++;
426
- start = 1;
427
- end = 0;
413
+ advance(current_char);
428
414
  continue;
429
415
  }
430
416
  }
@@ -438,8 +424,6 @@ function lexer(src) {
438
424
  if (previous_value === "[" && !scope_state) {
439
425
  temp_str = concatChar(src, i, ["=", "]"]);
440
426
  i += temp_str.length - 1;
441
- // Update Metadata
442
- updateMetadata(temp_str);
443
427
  if (temp_str.trim()) {
444
428
  const trimmedStr = temp_str.trim();
445
429
  if (trimmedStr !== end_keyword) {
@@ -464,8 +448,6 @@ function lexer(src) {
464
448
  temp_str = concatChar(src, i, ["]", "\\", ",", ":"]);
465
449
  i += temp_str.length - 1;
466
450
  const nextToken = peek(src, i, 1);
467
- // Update Metadata
468
- updateMetadata(temp_str);
469
451
  if (temp_str.trim()) {
470
452
  // Add token
471
453
  switch (nextToken) {
@@ -489,8 +471,6 @@ function lexer(src) {
489
471
  temp_str = concatChar(src, i, ["(", ")", ":"]);
490
472
  i += temp_str.length - 1;
491
473
  const nextToken = peek(src, i, 1);
492
- // Update Metadata
493
- updateMetadata(temp_str);
494
474
  if (temp_str.trim()) {
495
475
  // Add Token
496
476
  switch (nextToken) {
@@ -521,8 +501,6 @@ function lexer(src) {
521
501
  ) {
522
502
  temp_str = concatChar(src, i, [")", "\\", ",", previous_value === INLINECOLON ? ":" : null]);
523
503
  i += temp_str.length - 1;
524
- // Update Metadata
525
- updateMetadata(temp_str);
526
504
  if (temp_str.trim()) {
527
505
  // Add Token
528
506
  addToken(TOKEN_TYPES.VALUE, temp_str);
@@ -536,8 +514,6 @@ function lexer(src) {
536
514
  else if (previous_value === "@_") {
537
515
  temp_str = concatChar(src, i, ["_", ":"]);
538
516
  i += temp_str.length - 1;
539
- // Update Metadata
540
- updateMetadata(temp_str);
541
517
  if (temp_str.trim()) {
542
518
  const trimmedStr = temp_str.trim();
543
519
  if (trimmedStr !== end_keyword) {
@@ -555,8 +531,6 @@ function lexer(src) {
555
531
  temp_str = concatChar(src, i, [";", "\\", ",", ":"]);
556
532
  i += temp_str.length - 1;
557
533
  const nextToken = peek(src, i, 1);
558
- // Update Metadata
559
- updateMetadata(temp_str);
560
534
  if (temp_str.trim()) {
561
535
  switch (nextToken) {
562
536
  case ":":
@@ -578,8 +552,6 @@ function lexer(src) {
578
552
  else if ((previous_value === block_end && !scope_state) || previous_value === at_end) {
579
553
  temp_str = concatChar(src, i, ["]", "_"]);
580
554
  i += temp_str.length - 1;
581
- // Update Metadata
582
- updateMetadata(temp_str);
583
555
  if (temp_str.trim()) {
584
556
  addToken(TOKEN_TYPES.END_KEYWORD, temp_str);
585
557
  // Update Previous Value
@@ -592,8 +564,6 @@ function lexer(src) {
592
564
  // ========================================================================== //
593
565
  else if (current_char === "#") {
594
566
  temp_str = concatChar(src, i, ["\n"]);
595
- // Update Metadata
596
- updateMetadata(temp_str);
597
567
  if (temp_str.trim()) {
598
568
  i += temp_str.length - 1;
599
569
  addToken(TOKEN_TYPES.COMMENT, temp_str);
@@ -615,8 +585,6 @@ function lexer(src) {
615
585
  [")", previous_value === inline_value]
616
586
  ]);
617
587
  i += context.length - 1;
618
- // Update Metadata
619
- updateMetadata(context);
620
588
  if (context.trim()) {
621
589
  addToken(TOKEN_TYPES.TEXT, context);
622
590
  }
@@ -625,6 +593,16 @@ function lexer(src) {
625
593
  context = "";
626
594
  temp_str = "";
627
595
  }
596
+
597
+ // Ensure EOF token
598
+ const eofPos = { line, character };
599
+ tokens.push({
600
+ type: TOKEN_TYPES.EOF,
601
+ value: "",
602
+ range: { start: eofPos, end: eofPos },
603
+ depth: depth_stack.length
604
+ });
605
+
628
606
  return tokens;
629
607
  } else {
630
608
  lexerError([
package/core/parser.js CHANGED
@@ -38,7 +38,11 @@ function makeBlockNode() {
38
38
  id: "",
39
39
  args: [],
40
40
  body: [],
41
- depth: 0
41
+ depth: 0,
42
+ range: {
43
+ start: { line: 0, character: 0 },
44
+ end: { line: 0, character: 0 }
45
+ }
42
46
  };
43
47
  }
44
48
 
@@ -46,7 +50,11 @@ function makeTextNode() {
46
50
  return {
47
51
  type: TEXT,
48
52
  text: "",
49
- depth: 0
53
+ depth: 0,
54
+ range: {
55
+ start: { line: 0, character: 0 },
56
+ end: { line: 0, character: 0 }
57
+ }
50
58
  };
51
59
  }
52
60
 
@@ -54,7 +62,11 @@ function makeCommentNode() {
54
62
  return {
55
63
  type: COMMENT,
56
64
  text: "",
57
- depth: 0
65
+ depth: 0,
66
+ range: {
67
+ start: { line: 0, character: 0 },
68
+ end: { line: 0, character: 0 }
69
+ }
58
70
  };
59
71
  }
60
72
 
@@ -64,7 +76,11 @@ function makeInlineNode() {
64
76
  value: "",
65
77
  id: "",
66
78
  args: [],
67
- depth: 0
79
+ depth: 0,
80
+ range: {
81
+ start: { line: 0, character: 0 },
82
+ end: { line: 0, character: 0 }
83
+ }
68
84
  };
69
85
  }
70
86
 
@@ -74,30 +90,34 @@ function makeAtBlockNode() {
74
90
  id: "",
75
91
  args: [],
76
92
  content: "",
77
- depth: 0
93
+ depth: 0,
94
+ range: {
95
+ start: { line: 0, character: 0 },
96
+ end: { line: 0, character: 0 }
97
+ }
78
98
  };
79
99
  }
80
100
 
81
101
  let end_stack = [];
82
102
  let tokens_stack = [];
83
- let line = 1,
84
- start = 1,
85
- end = 1,
103
+ let range = {
104
+ start: { line: 0, character: 0 },
105
+ end: { line: 0, character: 0 }
106
+ },
86
107
  value = "";
87
108
 
88
109
  const fallback = {
89
110
  value: "Unknown",
90
- line: "Unknown",
91
- start: "Unknown",
92
- end: "Unknown",
111
+ range: {
112
+ start: { line: 0, character: 0 },
113
+ end: { line: 0, character: 0 }
114
+ },
93
115
  tokens_stack: ["--Empty--"]
94
116
  };
95
117
  const updateData = (tokens, i) => {
96
118
  if (tokens[i]) {
97
119
  tokens_stack.push(tokens[i].value);
98
- line = tokens[i].line;
99
- start = tokens[i].start;
100
- end = tokens[i].end;
120
+ range = tokens[i].range;
101
121
  value = tokens[i].value;
102
122
  }
103
123
  };
@@ -130,11 +150,11 @@ const errorMessage = (tokens, i, expectedValue, behindValue, frontText) => {
130
150
 
131
151
  return [
132
152
  `<$blue:{line}$><$red:Here where error occurred:$>{N}${lineContent}{N}${pointerPadding}<$yellow:^$>{N}{N}`,
133
- `<$red:${frontText ? frontText : "Expected token"}$> <$blue:'${expectedValue}'$> ${behindValue ? "after <$blue:'" + behindValue + "'$>" : ""} at line <$yellow:${line}$>,`,
134
- ` from column <$yellow: ${start}$> to <$yellow: ${end}$>`,
153
+ `<$red:${frontText ? frontText : "Expected token"}$> <$blue:'${expectedValue}'$> ${behindValue ? "after <$blue:'" + behindValue + "'$>" : ""} at line <$yellow:${current.range.start.line + 1}$>,`,
154
+ ` from column <$yellow: ${current.range.start.character}$> to <$yellow: ${current.range.end.character}$>`,
135
155
  `{N}<$yellow:Received:$> <$blue:'${value === "\n" ? "\\n' (newline)" : value}'$>`,
136
- ` at line <$yellow:${current.line}$>,`,
137
- ` from column <$yellow: ${current.start}$> to <$yellow: ${current.end}$>{N}`,
156
+ ` at line <$yellow:${current.range.start.line + 1}$>,`,
157
+ ` from column <$yellow: ${current.range.start.character}$> to <$yellow: ${current.range.end.character}$>{N}`,
138
158
  "<$blue:{line}$>"
139
159
  ];
140
160
  };
@@ -233,21 +253,21 @@ function parseSemiColon(tokens, i, afterChar = "") {
233
253
  // ========================================================================== //
234
254
  function parseBlock(tokens, i) {
235
255
  const blockNode = makeBlockNode();
256
+ const openBracketToken = current_token(tokens, i);
236
257
  // ========================================================================== //
237
258
  // consume '[' //
238
259
  // ========================================================================== //
239
260
  i++;
240
261
  updateData(tokens, i);
241
- if (!current_token(tokens, i) || (current_token(tokens, i) && current_token(tokens, i).type !== TOKEN_TYPES.IDENTIFIER)) {
242
- parserError(errorMessage(tokens, i, block_id, "["));
243
- }
244
- const id = current_token(tokens, i).value;
262
+ const idToken = current_token(tokens, i);
263
+ const id = idToken.value;
245
264
  if (id.trim() === end_keyword) {
246
265
  parserError(errorMessage(tokens, i, id, "", `'${id.trim()}' is a reserved keyword and cannot be used as an identifier.`));
247
266
  }
248
267
  blockNode.id = id.trim();
249
268
  validateName(blockNode.id);
250
- blockNode.depth = current_token(tokens, i).depth;
269
+ blockNode.depth = idToken.depth;
270
+ blockNode.range.start = openBracketToken.range.start;
251
271
  end_stack.push(id);
252
272
  // ========================================================================== //
253
273
  // consume Block Identifier //
@@ -403,8 +423,10 @@ function parseBlock(tokens, i) {
403
423
  // ========================================================================== //
404
424
  // consume ']' //
405
425
  // ========================================================================== //
426
+ const closeBracketToken = current_token(tokens, i);
406
427
  i++;
407
428
  updateData(tokens, i);
429
+ blockNode.range.end = closeBracketToken.range.end;
408
430
  break;
409
431
  } else {
410
432
  const [childNode, nextIndex] = parseNode(tokens, i);
@@ -423,6 +445,8 @@ function parseBlock(tokens, i) {
423
445
  // ========================================================================== //
424
446
  function parseInline(tokens, i) {
425
447
  const inlineNode = makeInlineNode();
448
+ const openParenToken = current_token(tokens, i);
449
+ inlineNode.range.start = openParenToken.range.start;
426
450
  // ========================================================================== //
427
451
  // consume '(' //
428
452
  // ========================================================================== //
@@ -569,8 +593,10 @@ function parseInline(tokens, i) {
569
593
  // ========================================================================== //
570
594
  // consume ')' //
571
595
  // ========================================================================== //
596
+ const finalParenToken = current_token(tokens, i);
572
597
  i++;
573
598
  updateData(tokens, i);
599
+ inlineNode.range.end = finalParenToken.range.end;
574
600
  tokens_stack.length = 0;
575
601
  return [inlineNode, i];
576
602
  }
@@ -579,7 +605,9 @@ function parseInline(tokens, i) {
579
605
  // ========================================================================== //
580
606
  function parseText(tokens, i, options = {}) {
581
607
  const textNode = makeTextNode();
582
- textNode.depth = current_token(tokens, i).depth;
608
+ const startToken = current_token(tokens, i);
609
+ textNode.range.start = startToken.range.start;
610
+ textNode.depth = startToken.depth;
583
611
  const { selectiveUnescape = false } = options;
584
612
 
585
613
  while (i < tokens.length) {
@@ -604,6 +632,7 @@ function parseText(tokens, i, options = {}) {
604
632
  } else {
605
633
  break;
606
634
  }
635
+ textNode.range.end = current_token(tokens, i - 1).range.end;
607
636
  }
608
637
  return [textNode, i];
609
638
  }
@@ -612,6 +641,8 @@ function parseText(tokens, i, options = {}) {
612
641
  // ========================================================================== //
613
642
  function parseAtBlock(tokens, i) {
614
643
  const atBlockNode = makeAtBlockNode();
644
+ const openAtToken = current_token(tokens, i);
645
+ atBlockNode.range.start = openAtToken.range.start;
615
646
  // ========================================================================== //
616
647
  // consume '@_' //
617
648
  // ========================================================================== //
@@ -754,8 +785,10 @@ function parseAtBlock(tokens, i) {
754
785
  // ========================================================================== //
755
786
  // consume '_@' //
756
787
  // ========================================================================== //
788
+ const closeAtToken = current_token(tokens, i);
757
789
  i++;
758
790
  updateData(tokens, i);
791
+ atBlockNode.range.end = closeAtToken.range.end;
759
792
  tokens_stack.length = 0;
760
793
  return [atBlockNode, i];
761
794
  }
@@ -764,9 +797,11 @@ function parseAtBlock(tokens, i) {
764
797
  // ========================================================================== //
765
798
  function parseCommentNode(tokens, i) {
766
799
  const commentNode = makeCommentNode();
767
- if (current_token(tokens, i) && current_token(tokens, i).type === TOKEN_TYPES.COMMENT) {
768
- commentNode.text = current_token(tokens, i).value;
769
- commentNode.depth = current_token(tokens, i).depth;
800
+ const token = current_token(tokens, i);
801
+ if (token && token.type === TOKEN_TYPES.COMMENT) {
802
+ commentNode.text = token.value;
803
+ commentNode.depth = token.depth;
804
+ commentNode.range = token.range;
770
805
  }
771
806
  // ========================================================================== //
772
807
  // consume Comment '#' //
@@ -825,9 +860,10 @@ function parseNode(tokens, i) {
825
860
  function parser(tokens) {
826
861
  end_stack = [];
827
862
  tokens_stack = [];
828
- line = 1;
829
- start = 1;
830
- end = 1;
863
+ range = {
864
+ start: { line: 0, character: 0 },
865
+ end: { line: 0, character: 0 }
866
+ };
831
867
  value = "";
832
868
  let ast = [];
833
869
  let i = 0;
@@ -16,7 +16,8 @@ const TOKEN_TYPES = {
16
16
  COMMA: "COMMA",
17
17
  SEMICOLON: "SEMICOLON",
18
18
  COMMENT: "COMMENT",
19
- ESCAPE: "ESCAPE"
19
+ ESCAPE: "ESCAPE",
20
+ EOF: "EOF"
20
21
  };
21
22
 
22
23
  export default TOKEN_TYPES;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "sommark",
3
- "version": "3.1.0",
3
+ "version": "3.2.0",
4
4
  "description": "SomMark is a declarative, extensible markup language for structured content that can be converted to HTML, Markdown, MDX, JSON, and more.",
5
5
  "main": "index.js",
6
6
  "directories": {