critique 0.1.129 → 0.1.134

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -13,9 +13,13 @@ export declare function countDelimiter(code: string, delimiter: string): number;
13
13
  * Pass 1 (tokenize): for each hunk, extract content lines and count
14
14
  * delimiter occurrences.
15
15
  *
16
- * Pass 2 (fix): if a hunk has an odd count for any delimiter, prepend
17
- * that delimiter to the first content line so tree-sitter sees balanced
18
- * pairs. No header adjustment needed since no new lines are added.
16
+ * Pass 2 (repair): if a hunk has an odd count for any symmetric delimiter,
17
+ * classify the unmatched boundary token as a likely opener or closer and
18
+ * escape that token in place.
19
+ *
20
+ * Pass 3 (hunk isolation): if a hunk leaves an asymmetric delimiter open,
21
+ * append its closing token to the last content line so the next hunk starts
22
+ * from a clean parser state.
19
23
  */
20
24
  export declare function balanceDelimiters(rawDiff: string, filetype?: string): string;
21
25
  //# sourceMappingURL=balance-delimiters.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"balance-delimiters.d.ts","sourceRoot":"","sources":["../src/balance-delimiters.ts"],"names":[],"mappings":"AAqCA;;;;;;GAMG;AACH,wBAAgB,cAAc,CAAC,IAAI,EAAE,MAAM,EAAE,SAAS,EAAE,MAAM,GAAG,MAAM,CActE;AAOD;;;;;;;;;;GAUG;AACH,wBAAgB,iBAAiB,CAAC,OAAO,EAAE,MAAM,EAAE,QAAQ,CAAC,EAAE,MAAM,GAAG,MAAM,CA4D5E"}
1
+ {"version":3,"file":"balance-delimiters.d.ts","sourceRoot":"","sources":["../src/balance-delimiters.ts"],"names":[],"mappings":"AA4DA;;;;;;GAMG;AACH,wBAAgB,cAAc,CAAC,IAAI,EAAE,MAAM,EAAE,SAAS,EAAE,MAAM,GAAG,MAAM,CActE;AAmPD;;;;;;;;;;;;;;GAcG;AACH,wBAAgB,iBAAiB,CAAC,OAAO,EAAE,MAAM,EAAE,QAAQ,CAAC,EAAE,MAAM,GAAG,MAAM,CAqE5E"}
@@ -1,37 +1,50 @@
1
1
  // Delimiter balancing for syntax highlighting in diff hunks.
2
2
  //
3
- // When a diff hunk starts inside a paired delimiter (template literal,
4
- // triple-quoted string, fenced code block, etc.), tree-sitter sees an
5
- // odd number of that delimiter and misparses everything after the first
6
- // occurrence.
3
+ // When a diff hunk starts or ends inside a paired delimiter (template
4
+ // literal, triple-quoted string, fenced code block, etc.), tree-sitter can
5
+ // misparse everything after the unmatched token.
7
6
  //
8
- // Two-pass fix:
7
+ // Boundary repair strategy:
9
8
  // 1. Tokenizer: count delimiter occurrences in each hunk's content,
10
9
  // skipping escaped characters.
11
- // 2. Fix: if a hunk has an odd count, prepend a balancing delimiter to
12
- // the first content line so tree-sitter sees balanced pairs.
10
+ // 2. Repair symmetric delimiters by escaping the unmatched boundary token.
11
+ // 3. Repair asymmetric delimiters by appending the closing token to the
12
+ // last content line in the hunk so later hunks do not inherit state.
13
13
  //
14
- // Why no string/comment state tracking: the tokenizer processes partial
15
- // code (diff hunks) that may start mid-string. Tracking quote states
16
- // causes false positives when template text contains apostrophes
17
- // (`it's`) or quotes (`class="foo"`). Tracking comment states breaks
18
- // on `://` in URL templates. The \-escape handler already covers
19
- // escaped delimiters, and unescaped delimiters in "wrong" contexts
20
- // (strings, comments) are rare enough that the tradeoff is worth it.
14
+ // Why this is safer than prepending a synthetic opener: prepending fixes hunks
15
+ // that begin inside a string, but it corrupts hunks that end inside an open
16
+ // string/fence/docstring. Escaping the actual unmatched token keeps the repair
17
+ // local and avoids duplicating delimiters like ``` -> ``````.
18
+ const cStyleBlockCommentRule = {
19
+ token: "/*",
20
+ closeToken: "*/",
21
+ };
22
+ const htmlCommentRule = {
23
+ token: "<!--",
24
+ closeToken: "-->",
25
+ };
21
26
  /**
22
27
  * Delimiters to balance per language filetype.
23
28
  *
24
- * Each entry maps a filetype (from detectFiletype) to the list of
25
- * delimiters that come in open/close pairs and can span lines.
29
+ * Each entry maps a filetype (from detectFiletype) to the list of delimiters
30
+ * that come in open/close pairs and can span lines.
26
31
  */
27
32
  const LANGUAGE_DELIMITERS = {
28
- typescript: ["`"],
29
- python: ['"""', "'''"],
30
- markdown: ["```"],
31
- go: ["`"],
32
- scala: ['"""'],
33
- swift: ['"""'],
34
- julia: ['"""'],
33
+ typescript: [{ token: "`" }, cStyleBlockCommentRule],
34
+ python: [{ token: '"""' }, { token: "'''" }],
35
+ markdown: [{ token: "```" }],
36
+ go: [{ token: "`" }, cStyleBlockCommentRule],
37
+ rust: [cStyleBlockCommentRule],
38
+ cpp: [cStyleBlockCommentRule],
39
+ csharp: [cStyleBlockCommentRule],
40
+ c: [cStyleBlockCommentRule],
41
+ java: [cStyleBlockCommentRule],
42
+ php: [cStyleBlockCommentRule],
43
+ scala: [{ token: '"""' }, cStyleBlockCommentRule],
44
+ html: [htmlCommentRule],
45
+ css: [cStyleBlockCommentRule],
46
+ swift: [{ token: '"""' }, cStyleBlockCommentRule],
47
+ julia: [{ token: '"""' }],
35
48
  };
36
49
  /**
37
50
  * Count unescaped occurrences of a delimiter in a code string.
@@ -49,11 +62,183 @@ export function countDelimiter(code, delimiter) {
49
62
  }
50
63
  else if (code.startsWith(delimiter, i)) {
51
64
  count++;
52
- i += len - 1; // -1 because the loop increments
65
+ i += len - 1;
53
66
  }
54
67
  }
55
68
  return count;
56
69
  }
70
+ function isDiffContentLine(line) {
71
+ return line[0] === " " || line[0] === "+" || line[0] === "-";
72
+ }
73
+ function getContentLines(lines) {
74
+ return lines.flatMap((line, hunkLineIndex) => isDiffContentLine(line)
75
+ ? [{ hunkLineIndex, content: line.slice(1) }]
76
+ : []);
77
+ }
78
+ function findDelimiterOccurrences(contentLines, delimiter) {
79
+ const occurrences = [];
80
+ for (const [contentLineIndex, line] of contentLines.entries()) {
81
+ const content = line.content;
82
+ const len = delimiter.length;
83
+ for (let column = 0; column < content.length; column++) {
84
+ if (content[column] === "\\") {
85
+ column++;
86
+ continue;
87
+ }
88
+ if (!content.startsWith(delimiter, column)) {
89
+ continue;
90
+ }
91
+ occurrences.push({
92
+ contentLineIndex,
93
+ hunkLineIndex: line.hunkLineIndex,
94
+ column,
95
+ });
96
+ column += len - 1;
97
+ }
98
+ }
99
+ return occurrences;
100
+ }
101
+ function findAnyDelimiterOccurrences(contentLines, delimiters) {
102
+ const ordered = [...delimiters].sort((a, b) => b.length - a.length);
103
+ const occurrences = [];
104
+ for (const [contentLineIndex, line] of contentLines.entries()) {
105
+ const content = line.content;
106
+ for (let column = 0; column < content.length; column++) {
107
+ if (content[column] === "\\") {
108
+ column++;
109
+ continue;
110
+ }
111
+ const matched = ordered.find((delimiter) => content.startsWith(delimiter, column));
112
+ if (!matched) {
113
+ continue;
114
+ }
115
+ occurrences.push({
116
+ contentLineIndex,
117
+ hunkLineIndex: line.hunkLineIndex,
118
+ column,
119
+ });
120
+ column += matched.length - 1;
121
+ }
122
+ }
123
+ return occurrences;
124
+ }
125
+ function getPreviousNonWhitespaceChar(content, column) {
126
+ for (let i = column - 1; i >= 0; i--) {
127
+ const char = content[i];
128
+ if (char && !/\s/.test(char)) {
129
+ return char;
130
+ }
131
+ }
132
+ return undefined;
133
+ }
134
+ function getNextNonWhitespaceChar(content, column) {
135
+ for (let i = column; i < content.length; i++) {
136
+ const char = content[i];
137
+ if (char && !/\s/.test(char)) {
138
+ return char;
139
+ }
140
+ }
141
+ return undefined;
142
+ }
143
+ function hasNonEmptyContentBefore(contentLines, contentLineIndex) {
144
+ return contentLines.slice(0, contentLineIndex).some((line) => line.content.trim() !== "");
145
+ }
146
+ function hasNonEmptyContentAfter(contentLines, contentLineIndex) {
147
+ return contentLines.slice(contentLineIndex + 1).some((line) => line.content.trim() !== "");
148
+ }
149
+ function classifyOccurrence(contentLines, occurrence, token) {
150
+ const content = contentLines[occurrence.contentLineIndex]?.content;
151
+ if (content === undefined) {
152
+ return "unknown";
153
+ }
154
+ const before = getPreviousNonWhitespaceChar(content, occurrence.column);
155
+ const after = getNextNonWhitespaceChar(content, occurrence.column + token.length);
156
+ const trimmed = content.trim();
157
+ const hasBeforeLines = hasNonEmptyContentBefore(contentLines, occurrence.contentLineIndex);
158
+ const hasAfterLines = hasNonEmptyContentAfter(contentLines, occurrence.contentLineIndex);
159
+ if (token.length > 1) {
160
+ if (trimmed === token) {
161
+ if (hasBeforeLines)
162
+ return "close";
163
+ if (hasAfterLines)
164
+ return "open";
165
+ return "unknown";
166
+ }
167
+ if (trimmed.startsWith(token)) {
168
+ if (hasBeforeLines && (!after || /[.\])};:,]/.test(after))) {
169
+ return "close";
170
+ }
171
+ if (after) {
172
+ return "open";
173
+ }
174
+ }
175
+ if (trimmed.endsWith(token)) {
176
+ return "close";
177
+ }
178
+ return "unknown";
179
+ }
180
+ if (!before && after) {
181
+ return "open";
182
+ }
183
+ if (before && !after) {
184
+ return "close";
185
+ }
186
+ if (after && /[$A-Za-z0-9_{[(]/.test(after)) {
187
+ return "open";
188
+ }
189
+ if (before && after && /[)\]};:.,]/.test(after)) {
190
+ return "close";
191
+ }
192
+ return "unknown";
193
+ }
194
+ function escapeDelimiterAt(lines, hunkLineIndex, column) {
195
+ return lines.map((line, index) => {
196
+ if (index !== hunkLineIndex || !isDiffContentLine(line)) {
197
+ return line;
198
+ }
199
+ const prefix = line[0] ?? "";
200
+ const content = line.slice(1);
201
+ return prefix + content.slice(0, column) + "\\" + content.slice(column);
202
+ });
203
+ }
204
+ function getRuleOpenTokens(rule) {
205
+ return rule.openTokens ?? [rule.token];
206
+ }
207
+ function getRuleCloseToken(rule) {
208
+ return rule.closeToken ?? rule.token;
209
+ }
210
+ function isSymmetricRule(rule) {
211
+ const openTokens = getRuleOpenTokens(rule);
212
+ const closeToken = getRuleCloseToken(rule);
213
+ return openTokens.length === 1 && openTokens[0] === closeToken;
214
+ }
215
+ function getUnclosedTokenCount(lines, rule) {
216
+ const contentLines = getContentLines(lines);
217
+ const openTokens = getRuleOpenTokens(rule);
218
+ const closeToken = getRuleCloseToken(rule);
219
+ if (isSymmetricRule(rule)) {
220
+ return 0;
221
+ }
222
+ const openCount = findAnyDelimiterOccurrences(contentLines, openTokens).length;
223
+ const closeCount = findDelimiterOccurrences(contentLines, closeToken).length;
224
+ return Math.max(0, openCount - closeCount);
225
+ }
226
+ function appendClosingTokensToLastContentLine(lines, closeToken, count) {
227
+ if (count <= 0) {
228
+ return [...lines];
229
+ }
230
+ const lastContentLineIndex = [...lines].findLastIndex(isDiffContentLine);
231
+ if (lastContentLineIndex === -1) {
232
+ return [...lines];
233
+ }
234
+ const closingSuffix = Array.from({ length: count }, () => closeToken).join(" ");
235
+ return lines.map((line, index) => {
236
+ if (index !== lastContentLineIndex || !isDiffContentLine(line)) {
237
+ return line;
238
+ }
239
+ return `${line} ${closingSuffix}`;
240
+ });
241
+ }
57
242
  /**
58
243
  * Balance paired delimiters in a unified diff patch for correct syntax
59
244
  * highlighting.
@@ -61,20 +246,23 @@ export function countDelimiter(code, delimiter) {
61
246
  * Pass 1 (tokenize): for each hunk, extract content lines and count
62
247
  * delimiter occurrences.
63
248
  *
64
- * Pass 2 (fix): if a hunk has an odd count for any delimiter, prepend
65
- * that delimiter to the first content line so tree-sitter sees balanced
66
- * pairs. No header adjustment needed since no new lines are added.
249
+ * Pass 2 (repair): if a hunk has an odd count for any symmetric delimiter,
250
+ * classify the unmatched boundary token as a likely opener or closer and
251
+ * escape that token in place.
252
+ *
253
+ * Pass 3 (hunk isolation): if a hunk leaves an asymmetric delimiter open,
254
+ * append its closing token to the last content line so the next hunk starts
255
+ * from a clean parser state.
67
256
  */
68
257
  export function balanceDelimiters(rawDiff, filetype) {
69
258
  if (!filetype)
70
259
  return rawDiff;
71
- const delimiters = LANGUAGE_DELIMITERS[filetype];
72
- if (!delimiters)
260
+ const rules = LANGUAGE_DELIMITERS[filetype];
261
+ if (!rules)
73
262
  return rawDiff;
74
263
  const lines = rawDiff.split("\n");
75
264
  const fileHeader = [];
76
265
  const hunks = [];
77
- // Split into file header + hunks
78
266
  for (const line of lines) {
79
267
  if (line.startsWith("@@")) {
80
268
  hunks.push({ header: line, lines: [] });
@@ -88,39 +276,42 @@ export function balanceDelimiters(rawDiff, filetype) {
88
276
  }
89
277
  if (hunks.length === 0)
90
278
  return rawDiff;
91
- // Pass 2: check each hunk and fix if needed
92
279
  const result = [...fileHeader];
93
280
  for (const hunk of hunks) {
94
- // Extract content text from diff lines (strip the +/-/space prefix)
95
- const content = hunk.lines
96
- .filter(l => l[0] === " " || l[0] === "+" || l[0] === "-")
97
- .map(l => l.slice(1))
98
- .join("\n");
99
- // Find the first delimiter with an odd count
100
- let unbalanced;
101
- for (const delim of delimiters) {
102
- if (countDelimiter(content, delim) % 2 !== 0) {
103
- unbalanced = delim;
281
+ const contentLines = getContentLines(hunk.lines);
282
+ let repairedLines = hunk.lines;
283
+ for (const rule of rules) {
284
+ if (!isSymmetricRule(rule)) {
285
+ continue;
286
+ }
287
+ const occurrences = findDelimiterOccurrences(contentLines, rule.token);
288
+ if (occurrences.length % 2 === 0) {
289
+ continue;
290
+ }
291
+ const first = occurrences[0];
292
+ const last = occurrences[occurrences.length - 1];
293
+ if (!first || !last) {
294
+ continue;
295
+ }
296
+ const firstBoundary = classifyOccurrence(contentLines, first, rule.token);
297
+ const lastBoundary = classifyOccurrence(contentLines, last, rule.token);
298
+ if (firstBoundary === "close") {
299
+ repairedLines = escapeDelimiterAt(repairedLines, first.hunkLineIndex, first.column);
104
300
  break;
105
301
  }
106
- }
107
- result.push(hunk.header);
108
- if (unbalanced) {
109
- // Prepend the balancing delimiter to the first content line
110
- let fixed = false;
111
- for (const line of hunk.lines) {
112
- if (!fixed && (line[0] === " " || line[0] === "+" || line[0] === "-")) {
113
- result.push(line[0] + unbalanced + line.slice(1));
114
- fixed = true;
115
- }
116
- else {
117
- result.push(line);
118
- }
302
+ if (lastBoundary === "open") {
303
+ repairedLines = escapeDelimiterAt(repairedLines, last.hunkLineIndex, last.column);
304
+ break;
119
305
  }
120
306
  }
121
- else {
122
- result.push(...hunk.lines);
307
+ for (const rule of rules) {
308
+ const unclosedCount = getUnclosedTokenCount(repairedLines, rule);
309
+ if (unclosedCount > 0) {
310
+ repairedLines = appendClosingTokensToLastContentLine(repairedLines, getRuleCloseToken(rule), unclosedCount);
311
+ }
123
312
  }
313
+ result.push(hunk.header);
314
+ result.push(...repairedLines);
124
315
  }
125
316
  return result.join("\n");
126
317
  }