uniorg-parse 3.0.1 → 3.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/lib/parser.js CHANGED
@@ -19,10 +19,10 @@ import { Reader } from './reader.js';
19
19
  * By default, all Elements except `Headline`, `Planning`, `PropertyDrawer`, `ListItem`, `TableRow`, and
20
20
  * `NodeProperty` are supported.
21
21
  *
22
- * If the documentation of the mode say allows”—the specified elements are supported in additional to default
22
+ * If the documentation of the mode say "allows"—the specified elements are supported in additional to default
23
23
  * elements.
24
24
  *
25
- * If the documentation of the mode says expecting”—only that elements are allowed (default elements are
25
+ * If the documentation of the mode says "expecting"—only that elements are allowed (default elements are
26
26
  * not).
27
27
  */
28
28
  var ParseMode;
@@ -53,10 +53,28 @@ class Parser {
53
53
  this.options = { ...defaultOptions, ...options };
54
54
  this.re = new OrgRegexUtils(this.options);
55
55
  }
56
+ /**
57
+ * Adds position information to node data when trackPosition is enabled
58
+ */
59
+ addPosition(data, startOffset, endOffset) {
60
+ if (!this.options.trackPosition) {
61
+ return data;
62
+ }
63
+ const position = this.r.positionFromOffsets(startOffset, endOffset);
64
+ if (!position) {
65
+ return data;
66
+ }
67
+ return {
68
+ ...data,
69
+ position,
70
+ };
71
+ }
56
72
  parse() {
57
73
  this.parseEmptyLines();
74
+ const startOffset = 0;
58
75
  const children = this.parseElements(ParseMode.TopComment);
59
- return u('org-data', { contentsBegin: 0, contentsEnd: this.r.endOffset() }, children);
76
+ const endOffset = this.r.endOffset();
77
+ return u('org-data', this.addPosition({ contentsBegin: startOffset, contentsEnd: endOffset }, startOffset, endOffset), children);
60
78
  }
61
79
  // General parsing structure
62
80
  parseElements(mode, structure) {
@@ -66,7 +84,7 @@ class Parser {
66
84
  const offset = this.r.offset();
67
85
  if (offset === prevOffset) {
68
86
  console.log('elements:', elements, 'rest:', JSON.stringify(this.r.rest()));
69
- throw new Error('no progress (elements)');
87
+ throw new Error('no progress (elements), if you see this, please report an issue to https://github.com/rasendubi/uniorg/issues');
70
88
  }
71
89
  prevOffset = offset;
72
90
  const element = this.parseElement(mode, structure);
@@ -85,7 +103,7 @@ class Parser {
85
103
  element.structure
86
104
  : undefined));
87
105
  this.r.widen();
88
- // Delete structure from lists. Its only here to facilitate
106
+ // Delete structure from lists. It's only here to facilitate
89
107
  // parsing and should not be exposed to the user.
90
108
  // @ts-expect-error Property 'structure' does not exist on type 'OrgData'
91
109
  if (element.structure) {
@@ -287,13 +305,13 @@ class Parser {
287
305
  // cursor. This is always a programming error and leads to
288
306
  // infinite loop here.
289
307
  if (this.r.offset() === prevOffset) {
290
- throw new Error(`no progress (parseObject): ${JSON.stringify(mobject)}, text: ${JSON.stringify(this.r.rest())}, objects: ${JSON.stringify(objects, null, 2)}`);
308
+ throw new Error(`no progress (parseObject). If you see this, please report an issue to https://github.com/rasendubi/uniorg/issues with the following information: ${JSON.stringify(mobject)}, text: ${JSON.stringify(this.r.rest())}, objects: ${JSON.stringify(objects, null, 2)}`);
291
309
  }
292
310
  const [objectBegin, o] = mobject;
293
311
  if (objectBegin !== prevEnd) {
294
312
  // parse text before object
295
313
  const value = this.r.substring(prevEnd, objectBegin);
296
- objects.push(u('text', { value }));
314
+ objects.push(u('text', this.addPosition({ value }, prevEnd, objectBegin)));
297
315
  }
298
316
  // @ts-expect-error contentsBegin is not defined for "literals"
299
317
  const cbeg = o.contentsBegin;
@@ -312,7 +330,7 @@ class Parser {
312
330
  const text = this.r.rest();
313
331
  this.r.advance(text.length);
314
332
  if (text.trim().length) {
315
- objects.push(u('text', { value: text }));
333
+ objects.push(u('text', this.addPosition({ value: text }, prevEnd, this.r.offset())));
316
334
  }
317
335
  return objects;
318
336
  }
@@ -399,7 +417,7 @@ class Parser {
399
417
  const o = this.tryParseObject(restriction);
400
418
  if (o) {
401
419
  if (begin === this.r.offset()) {
402
- throw new Error('no progress (tryParseObject)');
420
+ throw new Error('no progress (tryParseObject). If you see this, please report an issue to https://github.com/rasendubi/uniorg/issues.');
403
421
  }
404
422
  return [begin, o];
405
423
  }
@@ -548,7 +566,7 @@ class Parser {
548
566
  ? this.r.offset() + endOfSubtree.index
549
567
  : this.r.endOffset();
550
568
  this.r.resetOffset(contentsEnd);
551
- return u('section', { contentsBegin, contentsEnd }, []);
569
+ return u('section', this.addPosition({ contentsBegin, contentsEnd }, contentsBegin, contentsEnd), []);
552
570
  }
553
571
  parseHeadline() {
554
572
  const begin = this.r.offset();
@@ -576,7 +594,7 @@ class Parser {
576
594
  // Reset line restriction.
577
595
  this.r.widen();
578
596
  this.parseEmptyLines();
579
- return u('headline', {
597
+ return u('headline', this.addPosition({
580
598
  level,
581
599
  todoKeyword,
582
600
  priority,
@@ -585,10 +603,12 @@ class Parser {
585
603
  tags,
586
604
  contentsBegin,
587
605
  contentsEnd,
588
- }, []);
606
+ }, begin, titleEnd), []);
589
607
  }
590
608
  parsePlanning() {
591
609
  this.r.narrow(this.r.offset(), this.r.offset() + this.r.line().length);
610
+ this.r.advance(this.r.match(/^[ \t]*/));
611
+ const begin = this.r.offset();
592
612
  let scheduled = null;
593
613
  let deadline = null;
594
614
  let closed = null;
@@ -607,20 +627,23 @@ class Parser {
607
627
  if (keyword === 'CLOSED:')
608
628
  closed = time;
609
629
  }
630
+ const end = this.r.offset();
610
631
  this.r.widen();
611
632
  this.r.advance(this.r.line());
612
633
  this.parseEmptyLines();
613
- return u('planning', { scheduled, deadline, closed });
634
+ return u('planning', this.addPosition({ scheduled, deadline, closed }, begin, end));
614
635
  }
615
636
  parsePropertyDrawer() {
637
+ const begin = this.r.offset();
616
638
  this.r.advance(this.r.line());
617
639
  const contentsBegin = this.r.offset();
618
640
  const endM = this.r.forceMatch(/^[ \t]*:END:[ \t]*$/im);
619
641
  this.r.advance(endM.index);
620
642
  const contentsEnd = this.r.offset();
621
643
  this.r.advance(this.r.line());
644
+ const end = this.r.offset();
622
645
  this.parseEmptyLines();
623
- return u('property-drawer', { contentsBegin, contentsEnd }, []);
646
+ return u('property-drawer', this.addPosition({ contentsBegin, contentsEnd }, begin, end), []);
624
647
  }
625
648
  parseBlock(type, pattern, affiliated) {
626
649
  const endM = this.r.match(new RegExp(`^[ \\t]*#\\+end_${pattern}[ \\t]*$`, 'im'));
@@ -633,13 +656,14 @@ class Parser {
633
656
  const contentsEnd = begin + endM.index;
634
657
  this.r.resetOffset(contentsEnd);
635
658
  this.r.advance(this.r.line());
659
+ const end = this.r.offset();
636
660
  this.parseEmptyLines();
637
- const _end = this.r.offset();
638
- return u(type, { affiliated, contentsBegin, contentsEnd }, []);
661
+ return u(type, this.addPosition({ affiliated, contentsBegin, contentsEnd }, begin, end), []);
639
662
  }
640
663
  parseComment() {
641
664
  let valueLines = [];
642
665
  this.r.advance(this.r.forceLookingAt(/^[ \t]*# ?/));
666
+ const start = this.r.offset();
643
667
  valueLines.push(this.r.advance(this.r.line()));
644
668
  while (true) {
645
669
  const m = this.r.advance(this.r.lookingAt(/^[ \t]*#( |$)/m));
@@ -647,14 +671,19 @@ class Parser {
647
671
  break;
648
672
  valueLines.push(this.r.advance(this.r.line()));
649
673
  }
674
+ let end = this.r.offset();
675
+ if (this.r.substring(end - 1, end) === '\n') {
676
+ end -= 1;
677
+ }
650
678
  let value = valueLines.join('');
651
679
  if (value[value.length - 1] === '\n') {
652
680
  value = value.substring(0, value.length - 1);
653
681
  }
654
- return u('comment', { value: value });
682
+ return u('comment', this.addPosition({ value }, start, end));
655
683
  }
656
684
  parseFixedWidth(affiliated) {
657
685
  let valueLines = [];
686
+ const begin = this.r.offset();
658
687
  while (true) {
659
688
  const m = this.r.lookingAt(/^[ \t]*: ?(.*)$/m);
660
689
  if (!m)
@@ -663,7 +692,11 @@ class Parser {
663
692
  valueLines.push(m[1]);
664
693
  }
665
694
  const value = valueLines.join('\n');
666
- return u('fixed-width', { affiliated, value });
695
+ let end = this.r.offset();
696
+ if (this.r.substring(end - 1, end) === '\n') {
697
+ end -= 1;
698
+ }
699
+ return u('fixed-width', this.addPosition({ affiliated, value }, begin, end));
667
700
  }
668
701
  parseCommentBlock(affiliated) {
669
702
  const comment = this.parseBlock('comment-block', 'comment', affiliated);
@@ -671,8 +704,9 @@ class Parser {
671
704
  // parsed as paragraph
672
705
  return comment;
673
706
  }
674
- const value = this.r.substring(comment.contentsBegin, comment.contentsEnd);
675
- return u('comment-block', { affiliated, value });
707
+ const { type: _, contentsBegin, contentsEnd, children, ...rest } = comment;
708
+ const value = this.r.substring(contentsBegin, contentsEnd);
709
+ return u('comment-block', { ...rest, value, affiliated });
676
710
  }
677
711
  parseSrcBlock(affiliated) {
678
712
  const endM = this.r.match(/^[ \t]*#\+end_src[ \t]*$/im);
@@ -688,16 +722,16 @@ class Parser {
688
722
  const value = unescapeCodeInString(this.r.substring(contentsBegin, contentsEnd));
689
723
  this.r.resetOffset(contentsEnd);
690
724
  this.r.advance(this.r.line());
725
+ const end = begin + endM.index + endM[0].length;
691
726
  this.parseEmptyLines();
692
- const _end = this.r.offset();
693
- return u('src-block', {
727
+ return u('src-block', this.addPosition({
694
728
  affiliated,
695
729
  language,
696
730
  switches: switches?.trim() ?? null,
697
731
  // using || to convert empty strings to null as well
698
732
  parameters: parameters.trim() || null,
699
733
  value,
700
- });
734
+ }, begin, end));
701
735
  }
702
736
  parseExampleBlock(affiliated) {
703
737
  // TODO: parse switches
@@ -706,8 +740,9 @@ class Parser {
706
740
  // parsed as paragraph
707
741
  return block;
708
742
  }
709
- const value = this.r.substring(block.contentsBegin, block.contentsEnd);
710
- return u('example-block', { affiliated, value });
743
+ const { type: _, contentsBegin, contentsEnd, children, ...rest } = block;
744
+ const value = this.r.substring(contentsBegin, contentsEnd);
745
+ return u('example-block', { ...rest, value, affiliated });
711
746
  }
712
747
  parseExportBlock(affiliated) {
713
748
  const endM = this.r.match(/^[ \t]*#\+end_export[ \t]*$/im);
@@ -724,8 +759,8 @@ class Parser {
724
759
  this.r.resetOffset(contentsEnd);
725
760
  this.r.advance(this.r.line());
726
761
  this.parseEmptyLines();
727
- const _end = this.r.offset();
728
- return u('export-block', { affiliated, backend, value });
762
+ const end = begin + endM.index + endM[0].length;
763
+ return u('export-block', this.addPosition({ affiliated, backend, value }, begin, end));
729
764
  }
730
765
  parseSpecialBlock(affiliated) {
731
766
  const blockType = this.r.forceLookingAt(/[ \t]*#\+begin_(\S+)/i)[1];
@@ -741,8 +776,8 @@ class Parser {
741
776
  this.r.resetOffset(contentsEnd);
742
777
  this.r.advance(this.r.line());
743
778
  this.parseEmptyLines();
744
- const _end = this.r.offset();
745
- return u('special-block', { affiliated, blockType, contentsBegin, contentsEnd }, []);
779
+ const end = begin + endM.index + endM[0].length;
780
+ return u('special-block', this.addPosition({ affiliated, blockType, contentsBegin, contentsEnd }, begin, end), []);
746
781
  }
747
782
  parseAffiliatedKeywords() {
748
783
  const offset = this.r.offset();
@@ -765,7 +800,7 @@ class Parser {
765
800
  this.r.widen();
766
801
  this.r.advance(this.r.line());
767
802
  const isDual = dualKeywords.has(keyword);
768
- const dualValue = isDual ? keywordM.groups.dualValue ?? null : null;
803
+ const dualValue = isDual ? (keywordM.groups.dualValue ?? null) : null;
769
804
  const value = dualValue === null ? mainValue : [mainValue, dualValue];
770
805
  if (multipleKeywords.has(keyword) ||
771
806
  // Attributes can always appear on multiple lines.
@@ -789,9 +824,11 @@ class Parser {
789
824
  const m = this.r.forceLookingAt(/[ \t]*#\+(\S+):(.*)/);
790
825
  const key = m[1].toUpperCase();
791
826
  const value = m[2].trim();
827
+ const begin = this.r.offset();
792
828
  this.r.advance(this.r.line());
829
+ const end = this.r.offset();
793
830
  this.parseEmptyLines();
794
- return u('keyword', { affiliated, key, value });
831
+ return u('keyword', this.addPosition({ affiliated, key, value }, begin, end));
795
832
  }
796
833
  parseLatexEnvironment(affiliated) {
797
834
  const beginOffset = this.r.offset();
@@ -807,15 +844,17 @@ class Parser {
807
844
  const endOffset = this.r.offset();
808
845
  this.parseEmptyLines();
809
846
  const value = this.r.substring(beginOffset, endOffset);
810
- return u('latex-environment', { affiliated, value });
847
+ return u('latex-environment', this.addPosition({ affiliated, value }, beginOffset, endOffset));
811
848
  }
812
849
  parseDrawer(affiliated) {
850
+ const start = this.r.offset();
813
851
  const endM = this.r.match(/^[ \t]*:END:[ \t]*$/im);
814
852
  if (!endM) {
815
853
  this.r.message('incomplete drawer', this.r.offset(), 'uniorg');
816
854
  // Incomplete drawer: parse it as a paragraph.
817
855
  return this.parseParagraph(affiliated);
818
856
  }
857
+ const end = start + endM.index + endM[0].length;
819
858
  const contentsEnd = this.r.offset() + endM.index;
820
859
  const name = this.r.forceLookingAt(drawerRe)[1];
821
860
  this.r.advance(this.r.line());
@@ -823,28 +862,33 @@ class Parser {
823
862
  this.r.resetOffset(contentsEnd);
824
863
  this.r.advance(this.r.line());
825
864
  this.parseEmptyLines();
826
- return u('drawer', { affiliated, name, contentsBegin, contentsEnd }, []);
865
+ return u('drawer', this.addPosition({ affiliated, name, contentsBegin, contentsEnd }, start, end), []);
827
866
  }
828
867
  parseClock() {
868
+ const start = this.r.offset();
829
869
  this.r.advance(this.r.forceMatch(/^[ \t]*CLOCK:[ \t]*/));
830
870
  const value = this.parseTimestamp();
831
871
  this.r.advance(this.r.match(/^[ \t]+=>[ \t]*/));
832
872
  const durationM = this.r.advance(this.r.lookingAt(/^(\S+)[ \t]*$/m));
833
873
  const duration = durationM ? durationM[1] : null;
834
874
  const status = duration ? 'closed' : 'running';
875
+ const end = this.r.offset();
835
876
  this.parseEmptyLines();
836
- return u('clock', { value, duration, status });
877
+ return u('clock', this.addPosition({ value, duration, status }, start, end));
837
878
  }
838
879
  parseNodeProperty() {
880
+ const start = this.r.offset();
839
881
  const propertyRe = /^[ \t]*:(?<key>\S+):(?:(?<value1>$)|[ \t]+(?<value2>.*?))[ \t]*$/m;
840
882
  const m = this.r.forceLookingAt(propertyRe);
841
883
  const key = m.groups['key'];
842
884
  const value = m.groups['value1'] ?? m.groups['value2'];
885
+ const end = this.r.offset() + m.index + m[0].length;
843
886
  this.r.advance(this.r.line());
844
- return u('node-property', { key, value });
887
+ return u('node-property', this.addPosition({ key, value }, start, end));
845
888
  }
846
889
  parseParagraph(affiliated) {
847
- const contentsBegin = this.r.offset();
890
+ const begin = this.r.offset();
891
+ const contentsBegin = begin;
848
892
  this.r.advance(this.r.line());
849
893
  let next = null;
850
894
  while ((next = this.r.match(this.re.paragraphSeparateRe()))) {
@@ -893,11 +937,13 @@ class Parser {
893
937
  break;
894
938
  }
895
939
  const contentsEnd = next ? this.r.offset() : this.r.endOffset();
940
+ const end = contentsEnd;
896
941
  this.r.resetOffset(contentsEnd);
897
942
  this.parseEmptyLines();
898
- return u('paragraph', { affiliated, contentsBegin, contentsEnd }, []);
943
+ return u('paragraph', this.addPosition({ affiliated, contentsBegin, contentsEnd }, begin, end), []);
899
944
  }
900
945
  parseFootnoteDefinition(affiliated) {
946
+ const start = this.r.offset();
901
947
  const m = this.r.forceLookingAt(footnoteDefinitionRe);
902
948
  const label = m[1];
903
949
  const begin = this.r.offset();
@@ -924,26 +970,32 @@ class Parser {
924
970
  }
925
971
  contentsEnd = this.r.offset();
926
972
  }
973
+ const end = contentsEnd;
927
974
  this.r.narrow(begin, contentsEnd);
928
975
  this.r.advance(this.r.forceMatch(/\][ \r\t\n]*/m));
929
976
  const contentsBegin = this.r.offset();
930
977
  this.r.widen();
931
978
  this.r.resetOffset(contentsEnd);
932
979
  this.parseEmptyLines();
933
- return u('footnote-definition', { affiliated, label, contentsBegin, contentsEnd }, []);
980
+ return u('footnote-definition', this.addPosition({ affiliated, label, contentsBegin, contentsEnd }, start, end), []);
934
981
  }
935
982
  parseHorizontalRule(affiliated) {
983
+ const start = this.r.offset();
936
984
  this.r.advance(this.r.line());
985
+ const end = this.r.offset();
937
986
  this.parseEmptyLines();
938
- return u('horizontal-rule', { affiliated });
987
+ return u('horizontal-rule', this.addPosition({ affiliated }, start, end));
939
988
  }
940
989
  parseDiarySexp(affiliated) {
990
+ const start = this.r.offset();
941
991
  const value = this.r.forceLookingAt(/^(%%\(.*)[ \t]*$/m)[1];
942
992
  this.r.advance(this.r.line());
993
+ const end = this.r.offset();
943
994
  this.parseEmptyLines();
944
- return u('diary-sexp', { affiliated, value });
995
+ return u('diary-sexp', this.addPosition({ affiliated, value }, start, end));
945
996
  }
946
997
  parseTable(affiliated) {
998
+ const start = this.r.offset();
947
999
  const contentsBegin = this.r.offset();
948
1000
  const tableType = this.r.lookingAt(/^[ \t]*\|/)
949
1001
  ? 'org'
@@ -960,40 +1012,46 @@ class Parser {
960
1012
  tblfm = tblfm + tblfmM[1];
961
1013
  this.r.advance(this.r.line());
962
1014
  }
1015
+ const end = this.r.offset();
963
1016
  this.parseEmptyLines();
964
1017
  if (tableType === 'org') {
965
- return u('table', { tableType, tblfm, contentsBegin, contentsEnd }, []);
1018
+ return u('table', this.addPosition({ tableType, tblfm, contentsBegin, contentsEnd }, start, end), []);
966
1019
  }
967
1020
  else {
968
- return u('table', {
1021
+ return u('table', this.addPosition({
969
1022
  affiliated,
970
1023
  tableType,
971
1024
  tblfm,
972
1025
  value: this.r.substring(contentsBegin, contentsEnd),
973
- });
1026
+ }, start, end));
974
1027
  }
975
1028
  }
976
1029
  parseTableRow() {
1030
+ const start = this.r.offset();
977
1031
  const rowType = this.r.lookingAt(/^[ \t]*\|-/)
978
1032
  ? 'rule'
979
1033
  : 'standard';
980
1034
  this.r.advance(this.r.forceMatch(/\|/));
981
1035
  const contentsBegin = this.r.offset();
982
1036
  this.r.advance(this.r.forceMatch(/^.*?[ \t]*$/m));
1037
+ const end = this.r.offset();
983
1038
  // A table rule has no contents. In that case, ensure
984
1039
  // contentsBegin matches contentsEnd.
985
1040
  const contentsEnd = rowType === 'rule' ? contentsBegin : this.r.offset();
986
1041
  this.r.advance(this.r.line());
987
- return u('table-row', { rowType, contentsBegin, contentsEnd }, []);
1042
+ return u('table-row', this.addPosition({ rowType, contentsBegin, contentsEnd }, start, end), []);
988
1043
  }
989
1044
  parseTableCell() {
1045
+ const start = this.r.offset();
990
1046
  this.r.advance(this.r.forceLookingAt(/^[ \t]*/));
991
1047
  const contentsBegin = this.r.offset();
992
1048
  const m = this.r.advance(this.r.forceLookingAt(/(.*?)[ \t]*(?:\||$)/m));
993
1049
  const contentsEnd = contentsBegin + m[1].length;
994
- return u('table-cell', { contentsBegin, contentsEnd }, []);
1050
+ const end = contentsBegin + m[0].length;
1051
+ return u('table-cell', this.addPosition({ contentsBegin, contentsEnd }, start, end), []);
995
1052
  }
996
1053
  parseList(structure, affiliated) {
1054
+ const start = this.r.offset();
997
1055
  const contentsBegin = this.r.offset();
998
1056
  const item = structure.find((x) => x.begin === contentsBegin);
999
1057
  if (!item) {
@@ -1014,7 +1072,8 @@ class Parser {
1014
1072
  }
1015
1073
  const contentsEnd = pos;
1016
1074
  this.r.resetOffset(contentsEnd);
1017
- return u('plain-list', {
1075
+ const end = this.r.offset();
1076
+ return u('plain-list', this.addPosition({
1018
1077
  affiliated,
1019
1078
  indent,
1020
1079
  listType,
@@ -1023,10 +1082,10 @@ class Parser {
1023
1082
  // Exposing structure here is temporary as it gets removed in parseElements(). It is only exposed so
1024
1083
  // that parseElements() can pick it up and use it for parsing list items.
1025
1084
  structure,
1026
- }, []);
1085
+ }, start, end), []);
1027
1086
  }
1028
1087
  parseListItem(structure) {
1029
- const offset = this.r.offset();
1088
+ const start = this.r.offset();
1030
1089
  const m = this.r.advance(this.r.forceMatch(this.re.fullListItemRe()));
1031
1090
  const bullet = m.groups.bullet;
1032
1091
  const counter = m.groups.counter ?? null;
@@ -1037,11 +1096,12 @@ class Parser {
1037
1096
  : m.groups.checkbox === '[-]'
1038
1097
  ? 'trans'
1039
1098
  : null;
1040
- const item = structure.find((x) => x.begin === offset);
1099
+ const item = structure.find((x) => x.begin === start);
1041
1100
  const contentsBegin = this.r.offset();
1042
1101
  const contentsEnd = item.end;
1043
1102
  this.r.resetOffset(contentsEnd);
1044
- return u('list-item', {
1103
+ const end = this.r.offset();
1104
+ return u('list-item', this.addPosition({
1045
1105
  indent: item.indent,
1046
1106
  bullet,
1047
1107
  counter,
@@ -1049,7 +1109,7 @@ class Parser {
1049
1109
  contentsBegin,
1050
1110
  contentsEnd,
1051
1111
  structure,
1052
- }, item.tag ? [item.tag] : []);
1112
+ }, start, end), item.tag ? [item.tag] : []);
1053
1113
  }
1054
1114
  parseListStructure() {
1055
1115
  const items = [];
@@ -1152,8 +1212,8 @@ class Parser {
1152
1212
  const begin = start + m[1].length;
1153
1213
  const contentsBegin = begin + m[2].length + (inside ? 1 : 0);
1154
1214
  const contentsEnd = begin + m[2].length + m[3].length - (inside ? 1 : 0);
1155
- const _end = this.r.offset();
1156
- return u('superscript', { contentsBegin, contentsEnd, children: [] });
1215
+ const end = this.r.offset();
1216
+ return u('superscript', this.addPosition({ contentsBegin, contentsEnd }, begin, end), []);
1157
1217
  }
1158
1218
  parseSubscript() {
1159
1219
  if (!this.options.useSubSuperscripts) {
@@ -1170,10 +1230,11 @@ class Parser {
1170
1230
  const begin = start + m[1].length;
1171
1231
  const contentsBegin = begin + m[2].length + (inside ? 1 : 0);
1172
1232
  const contentsEnd = begin + m[2].length + m[3].length - (inside ? 1 : 0);
1173
- const _end = this.r.offset();
1174
- return u('subscript', { contentsBegin, contentsEnd, children: [] });
1233
+ const end = this.r.offset();
1234
+ return u('subscript', this.addPosition({ contentsBegin, contentsEnd }, begin, end), []);
1175
1235
  }
1176
1236
  parseUnderline() {
1237
+ const start = this.r.offset();
1177
1238
  // backoff one char to check border
1178
1239
  this.r.backoff(1);
1179
1240
  const m = this.r.lookingAt(this.re.emphRe());
@@ -1182,9 +1243,11 @@ class Parser {
1182
1243
  const contentsBegin = this.r.offset() + m.index + m[1].length + m[3].length;
1183
1244
  const contentsEnd = contentsBegin + m[4].length;
1184
1245
  this.r.resetOffset(contentsEnd + 1);
1185
- return u('underline', { contentsBegin, contentsEnd }, []);
1246
+ const end = this.r.offset();
1247
+ return u('underline', this.addPosition({ contentsBegin, contentsEnd }, start, end), []);
1186
1248
  }
1187
1249
  parseBold() {
1250
+ const start = this.r.offset();
1188
1251
  // backoff one char to check border
1189
1252
  this.r.backoff(1);
1190
1253
  const m = this.r.lookingAt(this.re.emphRe());
@@ -1193,9 +1256,11 @@ class Parser {
1193
1256
  const contentsBegin = this.r.offset() + m.index + m[1].length + m[3].length;
1194
1257
  const contentsEnd = contentsBegin + m[4].length;
1195
1258
  this.r.resetOffset(contentsEnd + 1);
1196
- return u('bold', { contentsBegin, contentsEnd }, []);
1259
+ const end = this.r.offset();
1260
+ return u('bold', this.addPosition({ contentsBegin, contentsEnd }, start, end), []);
1197
1261
  }
1198
1262
  parseItalic() {
1263
+ const start = this.r.offset();
1199
1264
  // backoff one char to check border
1200
1265
  this.r.backoff(1);
1201
1266
  const m = this.r.lookingAt(this.re.emphRe());
@@ -1204,9 +1269,11 @@ class Parser {
1204
1269
  const contentsBegin = this.r.offset() + m.index + m[1].length + m[3].length;
1205
1270
  const contentsEnd = contentsBegin + m[4].length;
1206
1271
  this.r.resetOffset(contentsEnd + 1);
1207
- return u('italic', { contentsBegin, contentsEnd }, []);
1272
+ const end = this.r.offset();
1273
+ return u('italic', this.addPosition({ contentsBegin, contentsEnd }, start, end), []);
1208
1274
  }
1209
1275
  parseCode() {
1276
+ const start = this.r.offset();
1210
1277
  // backoff one char to check border
1211
1278
  this.r.backoff(1);
1212
1279
  const m = this.r.lookingAt(this.re.verbatimRe());
@@ -1216,9 +1283,11 @@ class Parser {
1216
1283
  const contentsBegin = this.r.offset() + m.index + m[1].length + m[3].length;
1217
1284
  const contentsEnd = contentsBegin + m[4].length;
1218
1285
  this.r.resetOffset(contentsEnd + 1);
1219
- return u('code', { value }, []);
1286
+ const end = this.r.offset();
1287
+ return u('code', this.addPosition({ value }, start, end), []);
1220
1288
  }
1221
1289
  parseVerbatim() {
1290
+ const start = this.r.offset();
1222
1291
  this.r.backoff(1);
1223
1292
  const m = this.r.lookingAt(this.re.verbatimRe());
1224
1293
  if (!m)
@@ -1227,9 +1296,11 @@ class Parser {
1227
1296
  const contentsBegin = this.r.offset() + m.index + m[1].length + m[3].length;
1228
1297
  const contentsEnd = contentsBegin + m[4].length;
1229
1298
  this.r.resetOffset(contentsEnd + 1);
1230
- return u('verbatim', { value }, []);
1299
+ const end = this.r.offset();
1300
+ return u('verbatim', this.addPosition({ value }, start, end), []);
1231
1301
  }
1232
1302
  parseStrikeThrough() {
1303
+ const start = this.r.offset();
1233
1304
  // backoff one char to check border
1234
1305
  this.r.backoff(1);
1235
1306
  const m = this.r.lookingAt(this.re.emphRe());
@@ -1238,7 +1309,8 @@ class Parser {
1238
1309
  const contentsBegin = this.r.offset() + m.index + m[1].length + m[3].length;
1239
1310
  const contentsEnd = contentsBegin + m[4].length;
1240
1311
  this.r.resetOffset(contentsEnd + 1);
1241
- return u('strike-through', { contentsBegin, contentsEnd }, []);
1312
+ const end = this.r.offset();
1313
+ return u('strike-through', this.addPosition({ contentsBegin, contentsEnd }, start, end), []);
1242
1314
  }
1243
1315
  parseStatisticsCookie() {
1244
1316
  const begin = this.r.offset();
@@ -1249,9 +1321,10 @@ class Parser {
1249
1321
  const value = this.r.substring(begin, end);
1250
1322
  // skip trailing whitespace
1251
1323
  const postBlank = this.r.advance(this.r.forceLookingAt(/\s*/))[0].length;
1252
- return u('statistics-cookie', { begin, end, value, postBlank });
1324
+ return u('statistics-cookie', this.addPosition({ begin, end, value, postBlank }, begin, end));
1253
1325
  }
1254
1326
  parseEntity() {
1327
+ const start = this.r.offset();
1255
1328
  const m = this.r.advance(this.r.lookingAt(/^\\(?:(?<value1>_ +)|(?<value2>there4|sup[123]|frac[13][24]|[a-zA-Z]+)(?<brackets>$|\{\}|\P{Letter}))/mu));
1256
1329
  if (!m)
1257
1330
  return null;
@@ -1262,12 +1335,14 @@ class Parser {
1262
1335
  // as text later.
1263
1336
  this.r.backoff(m.groups.brackets.length);
1264
1337
  }
1338
+ const end = this.r.offset();
1265
1339
  const value = getOrgEntity(m.groups.value1 ?? m.groups.value2);
1266
1340
  if (!value)
1267
1341
  return null;
1268
- return u('entity', { useBrackets: hasBrackets, ...value });
1342
+ return u('entity', this.addPosition({ useBrackets: hasBrackets, ...value }, start, end));
1269
1343
  }
1270
1344
  parseExportSnippet() {
1345
+ const start = this.r.offset();
1271
1346
  const m = this.r.advance(this.r.lookingAt(/@@([-A-Za-z0-9]+):/));
1272
1347
  if (!m)
1273
1348
  return null;
@@ -1276,9 +1351,10 @@ class Parser {
1276
1351
  const mend = this.r.advance(this.r.match(/@@/));
1277
1352
  if (!mend)
1278
1353
  return null;
1279
- const contentsEnd = this.r.offset() - 2; // exclude @@
1354
+ const end = this.r.offset();
1355
+ const contentsEnd = end - 2; // exclude @@
1280
1356
  const value = this.r.substring(contentsBegin, contentsEnd);
1281
- return u('export-snippet', { backEnd, value });
1357
+ return u('export-snippet', this.addPosition({ backEnd, value }, start, end));
1282
1358
  }
1283
1359
  parseLatexFragment() {
1284
1360
  const begin = this.r.offset();
@@ -1324,9 +1400,10 @@ class Parser {
1324
1400
  if (begin === end)
1325
1401
  return null;
1326
1402
  const value = this.r.substring(begin, end);
1327
- return u('latex-fragment', { value, contents: contents ?? value });
1403
+ return u('latex-fragment', this.addPosition({ value, contents: contents ?? value }, begin, end));
1328
1404
  }
1329
1405
  parseLineBreak() {
1406
+ const start = this.r.offset();
1330
1407
  const m = this.r.lookingAt(/\\\\[ \t]*$/m);
1331
1408
  if (!m)
1332
1409
  return null;
@@ -1334,8 +1411,9 @@ class Parser {
1334
1411
  this.r.backoff(1);
1335
1412
  if (this.r.peek(1) === '\\')
1336
1413
  return null;
1414
+ const end = start + m[0].length;
1337
1415
  this.r.advance(this.r.line());
1338
- return u('line-break');
1416
+ return u('line-break', this.addPosition({}, start, end));
1339
1417
  }
1340
1418
  parseFootnoteReference() {
1341
1419
  const begin = this.r.offset();
@@ -1366,21 +1444,17 @@ class Parser {
1366
1444
  ? 'inline'
1367
1445
  : 'standard';
1368
1446
  const label = footnoteType === 'inline'
1369
- ? m.groups.label_inline ?? null
1447
+ ? (m.groups.label_inline ?? null)
1370
1448
  : m.groups.label;
1371
1449
  if (footnoteType === 'inline') {
1372
- return u('footnote-reference', {
1373
- label,
1374
- footnoteType,
1375
- contentsBegin,
1376
- contentsEnd,
1377
- }, []);
1450
+ return u('footnote-reference', this.addPosition({ label, footnoteType, contentsBegin, contentsEnd }, begin, end), []);
1378
1451
  }
1379
1452
  else {
1380
- return u('footnote-reference', { label, footnoteType }, []);
1453
+ return u('footnote-reference', this.addPosition({ label, footnoteType }, begin, end), []);
1381
1454
  }
1382
1455
  }
1383
1456
  parseCitation() {
1457
+ const start = this.r.offset();
1384
1458
  let m = this.r.lookingAt(this.re.citationPrefixRe());
1385
1459
  if (!m)
1386
1460
  return null;
@@ -1400,7 +1474,7 @@ class Parser {
1400
1474
  return null;
1401
1475
  }
1402
1476
  this.r.resetOffset(end);
1403
- const cite = {
1477
+ const cite = this.addPosition({
1404
1478
  type: 'citation',
1405
1479
  style,
1406
1480
  begin,
@@ -1410,7 +1484,7 @@ class Parser {
1410
1484
  contentsBegin,
1411
1485
  contentsEnd,
1412
1486
  children: [],
1413
- };
1487
+ }, start, end);
1414
1488
  return cite;
1415
1489
  }
1416
1490
  parseCitationCommonPrefix() {
@@ -1428,12 +1502,12 @@ class Parser {
1428
1502
  return null;
1429
1503
  }
1430
1504
  const end = contentsEnd + ';'.length;
1431
- const prefix = {
1505
+ const prefix = this.addPosition({
1432
1506
  type: 'citation-common-prefix',
1433
1507
  contentsBegin,
1434
1508
  contentsEnd,
1435
1509
  children: [],
1436
- };
1510
+ }, begin, end);
1437
1511
  this.r.resetOffset(end);
1438
1512
  return prefix;
1439
1513
  }
@@ -1449,7 +1523,7 @@ class Parser {
1449
1523
  const separator = mSeparator ? this.r.offset() + mSeparator.index : null;
1450
1524
  const contentsEnd = separator ?? this.r.endOffset();
1451
1525
  const end = separator ? separator + 1 : this.r.endOffset();
1452
- const reference = {
1526
+ const reference = this.addPosition({
1453
1527
  type: 'citation-reference',
1454
1528
  key,
1455
1529
  begin,
@@ -1457,7 +1531,7 @@ class Parser {
1457
1531
  contentsBegin,
1458
1532
  contentsEnd,
1459
1533
  children: [],
1460
- };
1534
+ }, begin, end);
1461
1535
  this.r.resetOffset(end);
1462
1536
  return reference;
1463
1537
  }
@@ -1472,12 +1546,12 @@ class Parser {
1472
1546
  const contentsEnd = end;
1473
1547
  if (begin === end)
1474
1548
  return null;
1475
- const prefix = {
1549
+ const prefix = this.addPosition({
1476
1550
  type: 'citation-prefix',
1477
1551
  contentsBegin,
1478
1552
  contentsEnd,
1479
1553
  children: [],
1480
- };
1554
+ }, begin, end);
1481
1555
  return prefix;
1482
1556
  }
1483
1557
  parseCitationCommonSuffix() {
@@ -1488,23 +1562,25 @@ class Parser {
1488
1562
  if (contentsBegin === contentsEnd)
1489
1563
  return null;
1490
1564
  this.r.resetOffset(contentsEnd);
1491
- return {
1565
+ return this.addPosition({
1492
1566
  type: 'citation-common-suffix',
1493
1567
  contentsBegin,
1494
1568
  contentsEnd,
1495
1569
  children: [],
1496
- };
1570
+ }, contentsBegin, contentsEnd);
1497
1571
  }
1498
1572
  parseCitationKey() {
1573
+ const start = this.r.offset();
1499
1574
  const m = this.r.match(this.re.citationKeyRe());
1500
1575
  if (!m)
1501
1576
  return null;
1502
1577
  this.r.advance(m);
1578
+ const end = this.r.offset();
1503
1579
  const key = m.groups['key'];
1504
- return {
1580
+ return this.addPosition({
1505
1581
  type: 'citation-key',
1506
1582
  key,
1507
- };
1583
+ }, start, end);
1508
1584
  }
1509
1585
  parseCitationSuffix() {
1510
1586
  // this is called after key, so just parse till the end of
@@ -1514,12 +1590,12 @@ class Parser {
1514
1590
  if (contentsBegin === contentsEnd)
1515
1591
  return null;
1516
1592
  this.r.resetOffset(contentsEnd);
1517
- return {
1593
+ return this.addPosition({
1518
1594
  type: 'citation-suffix',
1519
1595
  contentsBegin,
1520
1596
  contentsEnd,
1521
1597
  children: [],
1522
- };
1598
+ }, contentsBegin, contentsEnd);
1523
1599
  }
1524
1600
  scanLists() {
1525
1601
  const start = this.r.offset();
@@ -1539,7 +1615,7 @@ class Parser {
1539
1615
  this.r.resetOffset(start);
1540
1616
  return depth === 0
1541
1617
  ? end
1542
- : // didnt find matching closing parenthesis
1618
+ : // didn't find matching closing parenthesis
1543
1619
  null;
1544
1620
  }
1545
1621
  parseLink() {
@@ -1573,13 +1649,13 @@ class Parser {
1573
1649
  .replace(/(\\+)([\[\]])/g, (p1, p2) => '\\'.repeat(p1.length / 2) + p2);
1574
1650
  // TODO: org-link-expand-abbrev
1575
1651
  const { linkType, path } = this.linkType(rawLink);
1576
- return u('link', {
1652
+ return u('link', this.addPosition({
1577
1653
  format: 'bracket',
1578
1654
  linkType,
1579
1655
  rawLink,
1580
1656
  path,
1581
1657
  ...contents,
1582
- }, []);
1658
+ }, initialOffset, this.r.offset()), []);
1583
1659
  }
1584
1660
  // TODO: this is different from OrgRegexUtils.linkPlainRe
1585
1661
  // Type 3: Plain link, e.g., https://orgmode.org
@@ -1587,12 +1663,12 @@ class Parser {
1587
1663
  const plainM = this.r.advance(this.r.lookingAt(linkPlainRe));
1588
1664
  if (plainM) {
1589
1665
  const m = plainM;
1590
- return u('link', {
1666
+ return u('link', this.addPosition({
1591
1667
  format: 'plain',
1592
1668
  linkType: m[1],
1593
1669
  rawLink: m[0],
1594
1670
  path: m[2],
1595
- }, []);
1671
+ }, initialOffset, this.r.offset()), []);
1596
1672
  }
1597
1673
  // Type 4: Angular link, e.g., <https://orgmode.org>. Unlike
1598
1674
  // bracket links, follow RFC 3986 and remove any extra whitespace
@@ -1604,7 +1680,7 @@ class Parser {
1604
1680
  const linkType = m[1];
1605
1681
  const rawLink = m[0].substring(1, m[0].length - 1); // strip < >
1606
1682
  const path = m[2].replace(/[ \t]*\n[ \t]*/g, '');
1607
- return u('link', { format: 'angle', linkType, rawLink, path }, []);
1683
+ return u('link', this.addPosition({ format: 'angle', linkType, rawLink, path }, initialOffset, this.r.offset()), []);
1608
1684
  }
1609
1685
  return null;
1610
1686
  }
@@ -1643,6 +1719,7 @@ class Parser {
1643
1719
  ].join('|'));
1644
1720
  if (!this.r.lookingAt(timestampRe))
1645
1721
  return null;
1722
+ const contentsBegin = this.r.offset();
1646
1723
  const active = this.r.substring(this.r.offset(), this.r.offset() + 1) === '<';
1647
1724
  const m = this.r.advance(this.r.match(/^([<[](%%)?.*?)[\]>](?:--([<[].*?[\]>]))?/));
1648
1725
  if (!m)
@@ -1677,12 +1754,13 @@ class Parser {
1677
1754
  : timeRange
1678
1755
  ? { ...start, ...timeRange }
1679
1756
  : null;
1680
- return u('timestamp', {
1757
+ const contentsEnd = this.r.offset();
1758
+ return u('timestamp', this.addPosition({
1681
1759
  timestampType,
1682
1760
  rawValue,
1683
1761
  start,
1684
1762
  end,
1685
- });
1763
+ }, contentsBegin, contentsEnd));
1686
1764
  }
1687
1765
  // Helpers
1688
1766
  static parseDate(s) {