prettier-plugin-wolfram 0.7.4 → 0.7.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "prettier-plugin-wolfram",
3
- "version": "0.7.4",
3
+ "version": "0.7.6",
4
4
  "description": "Prettier plugin for Wolfram Language using tree-sitter",
5
5
  "license": "MIT",
6
6
  "repository": {
@@ -9,9 +9,12 @@ const GROUP_CLOSE_LEAF = { "}": "Token`CloseCurly", ")": "Token`CloseParen", "]"
9
9
 
10
10
  // preprocessedSource is the version passed to tree-sitter (may have ⁢ for InvisibleTimes);
11
11
  // source is the original — used only for the unformattable fallback.
12
- export function adapt(tree, source, preprocessedSource) {
12
+ // map (optional) translates preprocessed char offsets back to original offsets;
13
+ // it is attached to lineIndex so nodeSource can record exact original positions.
14
+ export function adapt(tree, source, preprocessedSource, map) {
13
15
  const ps = preprocessedSource ?? source;
14
16
  const lineIndex = makeLineIndex(ps);
17
+ lineIndex.map = map;
15
18
  const ctx = { source: ps, lineIndex };
16
19
  const root = tree.rootNode;
17
20
  if (subtreeHasError(root)) {
@@ -21,12 +24,14 @@ export function adapt(tree, source, preprocessedSource) {
21
24
  const src = nodeSource(root, errLineIndex);
22
25
  return { type: "ContainerNode", kind: "String", children: [{ type: "Unknown", kind: "SyntaxErrorNode[]", source: src }], source: src };
23
26
  }
24
- // Hoist top-level semicolon chains that end with a trailing ";" (MISSING rhs) into separate
25
- // ContainerNode children, matching the CodeParser output structure.
26
- // Only hoist if the outer infix has a trailing MISSING (i.e. ends with ";").
27
+ // Hoist top-level semicolon chains into separate ContainerNode children,
28
+ // matching the CodeParser output structure. This must happen for multiline
29
+ // chains even when the final expression is not followed by a trailing ";",
30
+ // otherwise the printer sees one giant CompoundExpression and cannot apply
31
+ // top-level definition spacing between adjacent definitions.
27
32
  const children = [];
28
33
  for (const c of namedChildren(root)) {
29
- if (c.type === "infix" && operatorLiteral(c, ctx) === ";" && hasTrailingSemicolon(c)) {
34
+ if (shouldHoistTopLevelSemicolonChain(c, ctx)) {
30
35
  hoistSemicolonChildren(c, ctx, children);
31
36
  } else {
32
37
  children.push(adaptNode(c, ctx));
@@ -40,12 +45,22 @@ export function adapt(tree, source, preprocessedSource) {
40
45
  };
41
46
  }
42
47
 
48
+ function shouldHoistTopLevelSemicolonChain(node, ctx) {
49
+ if (node.type !== "infix" || operatorLiteral(node, ctx) !== ";") return false;
50
+ return hasTrailingSemicolon(node) || spansMultipleLines(node, ctx);
51
+ }
52
+
43
53
  // Returns true if the infix node ends with a MISSING node (i.e. has a trailing ";").
44
54
  function hasTrailingSemicolon(node) {
45
55
  const last = node.child(node.childCount - 1);
46
56
  return last !== null && last.isMissing;
47
57
  }
48
58
 
59
+ function spansMultipleLines(node, ctx) {
60
+ const source = nodeSource(node, ctx.lineIndex);
61
+ return source?.[0]?.[0] !== source?.[1]?.[0];
62
+ }
63
+
49
64
  // Collect all leaf statements from a semicolon infix chain (possibly left-recursive),
50
65
  // flattening the left-associative structure into a linear list of segments.
51
66
  // Each segment is { nodes: TSNode[], semiToken: TSNode|null } where semiToken is the ";" that follows.
@@ -53,7 +68,8 @@ function collectSemicolonSegments(node, ctx, segments) {
53
68
  // A semicolon infix has children: [lhs, ";", rhs] or with comments interspersed.
54
69
  // The lhs may itself be a semicolon infix (left-assoc chaining).
55
70
  let lhs = null, semiToken = null, rhs = null;
56
- const pendingComments = [];
71
+ const leadingComments = [];
72
+ const boundaryComments = [];
57
73
 
58
74
  for (let i = 0; i < node.childCount; i++) {
59
75
  const c = node.child(i);
@@ -63,7 +79,8 @@ function collectSemicolonSegments(node, ctx, segments) {
63
79
  } else if (c.isMissing) {
64
80
  // trailing implicit null — rhs is nothing (trailing semicolon)
65
81
  } else if (c.type === "comment") {
66
- pendingComments.push(c);
82
+ if (lhs === null) leadingComments.push(c);
83
+ else boundaryComments.push(c);
67
84
  } else if (lhs === null) {
68
85
  lhs = c;
69
86
  } else {
@@ -71,31 +88,50 @@ function collectSemicolonSegments(node, ctx, segments) {
71
88
  }
72
89
  }
73
90
 
91
+ const trailingComments = [];
92
+ const rhsLeadingComments = [];
93
+ for (const c of boundaryComments) {
94
+ if (rhs !== null && isLeadingCommentForNextSegment(c, semiToken, ctx)) {
95
+ rhsLeadingComments.push(c);
96
+ } else {
97
+ trailingComments.push(c);
98
+ }
99
+ }
100
+
74
101
  if (lhs !== null && lhs.type === "infix" && operatorLiteral(lhs, ctx) === ";") {
102
+ const segmentStart = segments.length;
75
103
  // Flatten the left subtree
76
104
  collectSemicolonSegments(lhs, ctx, segments);
105
+ if (segments.length > segmentStart) {
106
+ segments[segmentStart].leadingComments.unshift(...leadingComments);
107
+ }
77
108
  // The last segment from lhs should get the semiToken we found
78
109
  if (segments.length > 0 && semiToken !== null) {
79
110
  segments[segments.length - 1].semiToken = semiToken;
80
111
  }
81
- // Add any comments from between lhs and rhs to the last segment's pending
82
- for (const c of pendingComments) {
112
+ for (const c of trailingComments) {
83
113
  if (segments.length > 0) segments[segments.length - 1].trailingComments.push(c);
84
114
  }
85
115
  } else {
86
116
  // Push lhs as a new segment
87
117
  const nodes = [];
88
118
  if (lhs !== null) nodes.push(lhs);
89
- segments.push({ nodes, semiToken, trailingComments: [] });
90
- for (const c of pendingComments) segments[segments.length - 1].trailingComments.push(c);
119
+ segments.push({ nodes, semiToken, leadingComments, trailingComments });
91
120
  }
92
121
 
93
122
  // Push rhs as the next segment (no semiToken yet — will be set by caller if needed)
94
123
  if (rhs !== null) {
95
- segments.push({ nodes: [rhs], semiToken: null, trailingComments: [] });
124
+ segments.push({ nodes: [rhs], semiToken: null, leadingComments: rhsLeadingComments, trailingComments: [] });
96
125
  }
97
126
  }
98
127
 
128
+ function isLeadingCommentForNextSegment(comment, semiToken, ctx) {
129
+ if (!semiToken) return false;
130
+ const semiEndLine = nodeSource(semiToken, ctx.lineIndex)?.[1]?.[0];
131
+ const commentStartLine = nodeSource(comment, ctx.lineIndex)?.[0]?.[0];
132
+ return Number.isFinite(semiEndLine) && Number.isFinite(commentStartLine) && commentStartLine > semiEndLine;
133
+ }
134
+
99
135
  // Hoist top-level semicolon-separated statements into separate ContainerNode children.
100
136
  // For `a; b; c;`, produces [CompoundExpr(a;Null), CompoundExpr(b;Null), CompoundExpr(c;Null)].
101
137
  // For `a; b`, produces [CompoundExpr(a;Null), b].
@@ -104,6 +140,7 @@ function hoistSemicolonChildren(node, ctx, out) {
104
140
  collectSemicolonSegments(node, ctx, segments);
105
141
 
106
142
  for (const seg of segments) {
143
+ for (const c of seg.leadingComments ?? []) out.push(leaf(c, ctx));
107
144
  if (seg.nodes.length === 0) continue;
108
145
 
109
146
  const adaptedNodes = seg.nodes.map(n => adaptNode(n, ctx));
@@ -20,10 +20,24 @@ async function getLanguage() {
20
20
 
21
21
  // Replace space-based implicit multiplication (a b) with U+2062 (InvisibleTimes)
22
22
  // so the grammar can parse it. Skip content inside strings and nested comments.
23
- export function preprocessInvisibleTimes(src) {
23
+ //
24
+ // Returns { text, map } where `text` is the preprocessed source and `map` is an
25
+ // index translation table: map[i] is the original-source character offset that
26
+ // corresponds to preprocessed-text offset i (map[text.length] === src.length).
27
+ // Because the only length-changing transform collapses a run of spaces into a
28
+ // single InvisibleTimes char, this map lets callers translate tree-sitter node
29
+ // positions (computed on the preprocessed text) back to exact offsets in the
30
+ // original source, without lossy line/col round-trips.
31
+ export function preprocess(src) {
24
32
  let result = "";
25
- let i = 0;
33
+ const map = [];
26
34
  const n = src.length;
35
+ // Copy src[start..end) verbatim, recording the source offset of each char.
36
+ const copyVerbatim = (start, end) => {
37
+ for (let k = start; k < end; k++) map.push(k);
38
+ result += src.slice(start, end);
39
+ };
40
+ let i = 0;
27
41
  while (i < n) {
28
42
  // Skip quoted string
29
43
  if (src[i] === '"') {
@@ -33,7 +47,7 @@ export function preprocessInvisibleTimes(src) {
33
47
  i++;
34
48
  }
35
49
  if (i < n) i++;
36
- result += src.slice(start, i);
50
+ copyVerbatim(start, i);
37
51
  continue;
38
52
  }
39
53
  // Skip nested WL comment (* ... *)
@@ -46,7 +60,7 @@ export function preprocessInvisibleTimes(src) {
46
60
  else if (src[i] === "*" && src[i + 1] === ")") { depth--; i += 2; }
47
61
  else i++;
48
62
  }
49
- result += src.slice(start, i);
63
+ copyVerbatim(start, i);
50
64
  continue;
51
65
  }
52
66
  // Two or more spaces between word chars on same line → InvisibleTimes
@@ -59,14 +73,22 @@ export function preprocessInvisibleTimes(src) {
59
73
  while (j < n && src[j] === " ") j++;
60
74
  if (j < n && /\w/.test(src[j])) {
61
75
  result += "⁢"; // InvisibleTimes, spaces stripped (they're extras)
76
+ map.push(i); // the single char maps to the start of the run
62
77
  i = j;
63
78
  continue;
64
79
  }
65
80
  }
66
81
  }
82
+ map.push(i);
67
83
  result += src[i++];
68
84
  }
69
- return result;
85
+ map.push(n); // sentinel for end offsets (node endIndex === text.length)
86
+ return { text: result, map };
87
+ }
88
+
89
+ // Backward-compatible wrapper returning only the preprocessed text.
90
+ export function preprocessInvisibleTimes(src) {
91
+ return preprocess(src).text;
70
92
  }
71
93
 
72
94
  export class WolframParser {
@@ -74,8 +96,8 @@ export class WolframParser {
74
96
  const lang = await getLanguage();
75
97
  const parser = new Parser();
76
98
  parser.setLanguage(lang);
77
- const preprocessed = preprocessInvisibleTimes(sourceText);
99
+ const { text: preprocessed, map } = preprocess(sourceText);
78
100
  const tree = parser.parse(preprocessed);
79
- return adapt(tree, sourceText, preprocessed);
101
+ return adapt(tree, sourceText, preprocessed, map);
80
102
  }
81
103
  }
@@ -55,8 +55,26 @@ export function offsetToLineCol(lineIndex, charOffset) {
55
55
  }
56
56
 
57
57
  export function nodeSource(tsNode, lineIndex) {
58
- return [
58
+ const source = [
59
59
  offsetToLineCol(lineIndex, tsNode.startIndex),
60
60
  offsetToLineCol(lineIndex, tsNode.endIndex),
61
61
  ];
62
+ // When a preprocessing offset map is available, record the exact original
63
+ // character offsets (non-enumerably, so node.source stays a [[l,c],[l,c]]
64
+ // pair for lint rules). These bypass the lossy WL-byte/visual-column line/col
65
+ // round-trip in addOffsets, which otherwise mismaps offsets whenever the
66
+ // preprocessed text differs from the original (collapsed spaces, tabs, or
67
+ // non-ASCII characters earlier on the line).
68
+ const map = lineIndex?.map;
69
+ if (map) {
70
+ const charStart = map[tsNode.startIndex];
71
+ const charEnd = map[tsNode.endIndex];
72
+ if (typeof charStart === "number" && typeof charEnd === "number") {
73
+ Object.defineProperties(source, {
74
+ charStart: { value: charStart, enumerable: false },
75
+ charEnd: { value: charEnd, enumerable: false },
76
+ });
77
+ }
78
+ }
79
+ return source;
62
80
  }
@@ -57,6 +57,14 @@ export function lineColToOffset(table, line, col) {
57
57
 
58
58
  function sourceToOffsets(source, table) {
59
59
  if (!Array.isArray(source) || source.length !== 2) return null;
60
+ // Exact original char offsets recorded by the parser (see nodeSource) are
61
+ // authoritative when present — they avoid the lossy line/col conversion below.
62
+ if (
63
+ typeof source.charStart === "number" &&
64
+ typeof source.charEnd === "number"
65
+ ) {
66
+ return { locStart: source.charStart, locEnd: source.charEnd };
67
+ }
60
68
  const [start, end] = source;
61
69
  if (!Array.isArray(start) || !Array.isArray(end)) return null;
62
70
  const [startLine, startCol] = start;