@pi-unipi/utility 0.2.5 → 0.2.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,191 @@
1
+ /**
2
+ * @pi-unipi/utility — Diff Parser
3
+ *
4
+ * Parses unified diffs from structuredPatch and provides word-level diff analysis.
5
+ * Wraps the `diff` library for structured output.
6
+ */
7
+
8
+ import * as Diff from "diff";
9
+
10
+ // ─── Types ──────────────────────────────────────────────────────────────────────
11
+
12
+ /** A single line in a parsed diff */
13
+ export interface DiffLine {
14
+ /** Line type: added, removed, context, or hunk separator */
15
+ type: "add" | "remove" | "context" | "hunk";
16
+ /** Original line number (null for added lines) */
17
+ oldLine: number | null;
18
+ /** New line number (null for removed lines) */
19
+ newLine: number | null;
20
+ /** Line content (without +/- prefix) */
21
+ content: string;
22
+ }
23
+
24
+ /** Result of parsing a diff */
25
+ export interface ParsedDiff {
26
+ /** Old file name */
27
+ oldName: string;
28
+ /** New file name */
29
+ newName: string;
30
+ /** Parsed lines */
31
+ lines: DiffLine[];
32
+ /** Total additions */
33
+ additions: number;
34
+ /** Total deletions */
35
+ deletions: number;
36
+ }
37
+
38
+ /** Word-level diff result */
39
+ export interface WordDiffResult {
40
+ /** Similarity score (0-1, where 1 = identical) */
41
+ similarity: number;
42
+ /** Changed ranges in the new text */
43
+ addedRanges: Array<{ start: number; end: number }>;
44
+ /** Changed ranges in the old text */
45
+ removedRanges: Array<{ start: number; end: number }>;
46
+ }
47
+
48
+ // ─── Diff Parsing ───────────────────────────────────────────────────────────────
49
+
50
+ /**
51
+ * Parse a unified diff from two content strings.
52
+ * Returns structured DiffLine[] with line numbers and hunk separators.
53
+ *
54
+ * @param oldContent - Original content
55
+ * @param newContent - Modified content
56
+ * @param context - Number of context lines around changes (default 3)
57
+ * @param oldName - Label for old file
58
+ * @param newName - Label for new file
59
+ */
60
+ export function parseDiff(
61
+ oldContent: string,
62
+ newContent: string,
63
+ context: number = 3,
64
+ oldName: string = "old",
65
+ newName: string = "new",
66
+ ): ParsedDiff {
67
+ // Handle edge cases
68
+ if (oldContent === newContent) {
69
+ return { oldName, newName, lines: [], additions: 0, deletions: 0 };
70
+ }
71
+
72
+ if (oldContent === "") {
73
+ // New file — all lines are additions
74
+ const lines = newContent.split("\n");
75
+ const diffLines: DiffLine[] = lines.map((content, i) => ({
76
+ type: "add" as const,
77
+ oldLine: null,
78
+ newLine: i + 1,
79
+ content,
80
+ }));
81
+ return { oldName, newName, lines: diffLines, additions: lines.length, deletions: 0 };
82
+ }
83
+
84
+ if (newContent === "") {
85
+ // Deleted file — all lines are removals
86
+ const lines = oldContent.split("\n");
87
+ const diffLines: DiffLine[] = lines.map((content, i) => ({
88
+ type: "remove" as const,
89
+ oldLine: i + 1,
90
+ newLine: null,
91
+ content,
92
+ }));
93
+ return { oldName, newName, lines: diffLines, additions: 0, deletions: lines.length };
94
+ }
95
+
96
+ // Use structuredPatch for unified diff
97
+ const patches = Diff.structuredPatch(oldName, newName, oldContent, newContent, "", "", { context });
98
+
99
+ const lines: DiffLine[] = [];
100
+ let additions = 0;
101
+ let deletions = 0;
102
+
103
+ for (const hunk of patches.hunks) {
104
+ // Add hunk separator
105
+ lines.push({
106
+ type: "hunk",
107
+ oldLine: hunk.oldStart,
108
+ newLine: hunk.newStart,
109
+ content: `@@ -${hunk.oldStart},${hunk.oldLines} +${hunk.newStart},${hunk.newLines} @@`,
110
+ });
111
+
112
+ let oldLine = hunk.oldStart;
113
+ let newLine = hunk.newStart;
114
+
115
+ for (const line of hunk.lines) {
116
+ const prefix = line[0];
117
+ const content = line.substring(1);
118
+
119
+ if (prefix === "+") {
120
+ lines.push({ type: "add", oldLine: null, newLine, content });
121
+ additions++;
122
+ newLine++;
123
+ } else if (prefix === "-") {
124
+ lines.push({ type: "remove", oldLine, newLine: null, content });
125
+ deletions++;
126
+ oldLine++;
127
+ } else {
128
+ // Context line (space prefix)
129
+ lines.push({ type: "context", oldLine, newLine, content });
130
+ oldLine++;
131
+ newLine++;
132
+ }
133
+ }
134
+ }
135
+
136
+ return { oldName, newName, lines, additions, deletions };
137
+ }
138
+
139
+ /**
140
+ * Word-level diff analysis between two strings.
141
+ * Returns similarity score and character ranges of changes.
142
+ */
143
+ export function wordDiffAnalysis(a: string, b: string): WordDiffResult {
144
+ // Fast path: identical strings
145
+ if (a === b) {
146
+ return { similarity: 1, addedRanges: [], removedRanges: [] };
147
+ }
148
+
149
+ // Fast path: empty strings
150
+ if (a.length === 0 && b.length === 0) {
151
+ return { similarity: 1, addedRanges: [], removedRanges: [] };
152
+ }
153
+ if (a.length === 0) {
154
+ return { similarity: 0, addedRanges: [{ start: 0, end: b.length }], removedRanges: [] };
155
+ }
156
+ if (b.length === 0) {
157
+ return { similarity: 0, addedRanges: [], removedRanges: [{ start: 0, end: a.length }] };
158
+ }
159
+
160
+ const changes = Diff.diffWords(a, b);
161
+
162
+ const addedRanges: Array<{ start: number; end: number }> = [];
163
+ const removedRanges: Array<{ start: number; end: number }> = [];
164
+ let aPos = 0;
165
+ let bPos = 0;
166
+ let unchangedChars = 0;
167
+
168
+ for (const change of changes) {
169
+ if (change.added) {
170
+ addedRanges.push({ start: bPos, end: bPos + (change.value?.length ?? 0) });
171
+ bPos += change.value?.length ?? 0;
172
+ } else if (change.removed) {
173
+ removedRanges.push({ start: aPos, end: aPos + (change.value?.length ?? 0) });
174
+ aPos += change.value?.length ?? 0;
175
+ } else {
176
+ // Unchanged
177
+ unchangedChars += change.value?.length ?? 0;
178
+ aPos += change.value?.length ?? 0;
179
+ bPos += change.value?.length ?? 0;
180
+ }
181
+ }
182
+
183
+ const totalChars = Math.max(a.length + b.length, 1);
184
+ const similarity = (unchangedChars * 2) / totalChars;
185
+
186
+ return {
187
+ similarity: Math.min(1, Math.max(0, similarity)),
188
+ addedRanges,
189
+ removedRanges,
190
+ };
191
+ }
@@ -0,0 +1,422 @@
1
+ /**
2
+ * @pi-unipi/utility — Diff Renderer
3
+ *
4
+ * ANSI diff rendering: split (side-by-side) and unified (stacked) views.
5
+ * Includes ANSI utilities, background injection, and adaptive wrapping.
6
+ */
7
+
8
+ import type { ParsedDiff, DiffLine } from "./parser.js";
9
+ import type { DiffColors } from "./theme.js";
10
+ import { hexToBgAnsi, hexToFgAnsi } from "./theme.js";
11
+ import { hlBlock, detectLanguage } from "./highlighter.js";
12
+
13
+ // ─── Constants ──────────────────────────────────────────────────────────────────
14
+
15
+ /** Maximum preview lines shown inline */
16
+ export const MAX_PREVIEW_LINES = 60;
17
+
18
+ /** Maximum total render lines */
19
+ export const MAX_RENDER_LINES = 150;
20
+
21
+ /** Minimum terminal width for split view */
22
+ export const SPLIT_MIN_WIDTH = 150;
23
+
24
+ /** Minimum code column width in split view */
25
+ export const SPLIT_MIN_CODE_WIDTH = 60;
26
+
27
+ // ─── ANSI Utilities ─────────────────────────────────────────────────────────────
28
+
29
+ /**
30
+ * Strip all ANSI escape sequences from a string.
31
+ */
32
+ export function strip(s: string): string {
33
+ // eslint-disable-next-line no-control-regex
34
+ return s.replace(/\x1b\[[0-9;]*[a-zA-Z]/g, "");
35
+ }
36
+
37
+ /**
38
+ * Get the visible width of a string (excluding ANSI escapes).
39
+ * Handles CJK characters (width 2) and emoji.
40
+ */
41
+ export function visibleWidth(s: string): number {
42
+ const stripped = strip(s);
43
+ let width = 0;
44
+ for (const char of stripped) {
45
+ const code = char.codePointAt(0)!;
46
+ // CJK Unified Ideographs, CJK Compatibility, etc.
47
+ if (
48
+ (code >= 0x4e00 && code <= 0x9fff) ||
49
+ (code >= 0x3000 && code <= 0x30ff) ||
50
+ (code >= 0xff00 && code <= 0xffef) ||
51
+ (code >= 0xf900 && code <= 0xfaff) ||
52
+ (code >= 0x2e80 && code <= 0x2eff) ||
53
+ (code >= 0x3400 && code <= 0x4dbf) ||
54
+ (code >= 0x20000 && code <= 0x2a6df)
55
+ ) {
56
+ width += 2;
57
+ } else if (code > 0xffff) {
58
+ // Surrogate pairs / emoji — typically width 2
59
+ width += 2;
60
+ } else {
61
+ width += 1;
62
+ }
63
+ }
64
+ return width;
65
+ }
66
+
67
+ /**
68
+ * Fit a string to a target width, padding or truncating as needed.
69
+ * Preserves ANSI state across truncation.
70
+ */
71
+ export function fit(s: string, targetWidth: number): string {
72
+ const vw = visibleWidth(s);
73
+ if (vw === targetWidth) return s;
74
+ if (vw > targetWidth) {
75
+ // Truncate — find the cut point
76
+ let width = 0;
77
+ let i = 0;
78
+ const stripped = strip(s);
79
+ for (; i < stripped.length && width < targetWidth; i++) {
80
+ const code = stripped.codePointAt(i)!;
81
+ width += (code >= 0x4e00 && code <= 0x9fff) || code > 0xffff ? 2 : 1;
82
+ }
83
+ // Find the corresponding position in the original (with ANSI) string
84
+ let strippedIdx = 0;
85
+ let origIdx = 0;
86
+ while (origIdx < s.length && strippedIdx < i) {
87
+ if (s[origIdx] === "\x1b") {
88
+ // Skip ANSI sequence
89
+ while (origIdx < s.length && s[origIdx] !== "m") origIdx++;
90
+ origIdx++;
91
+ } else {
92
+ strippedIdx++;
93
+ origIdx++;
94
+ }
95
+ }
96
+ return s.substring(0, origIdx) + "\x1b[0m";
97
+ }
98
+ // Pad
99
+ return s + " ".repeat(targetWidth - vw);
100
+ }
101
+
102
+ /**
103
+ * Wrap an ANSI string to a maximum width.
104
+ * Returns an array of wrapped lines.
105
+ */
106
+ export function wrapAnsi(s: string, maxWidth: number): string[] {
107
+ if (visibleWidth(s) <= maxWidth) return [s];
108
+
109
+ const lines: string[] = [];
110
+ let current = "";
111
+ let currentWidth = 0;
112
+ let ansiState = "";
113
+
114
+ // Process character by character
115
+ let i = 0;
116
+ while (i < s.length) {
117
+ if (s[i] === "\x1b") {
118
+ // Collect ANSI sequence
119
+ let seq = "\x1b";
120
+ i++;
121
+ while (i < s.length && s[i] !== "m") {
122
+ seq += s[i];
123
+ i++;
124
+ }
125
+ if (i < s.length) {
126
+ seq += "m";
127
+ i++;
128
+ }
129
+ ansiState = seq;
130
+ current += seq;
131
+ } else {
132
+ const char = s[i];
133
+ const code = char.codePointAt(0)!;
134
+ const charWidth = (code >= 0x4e00 && code <= 0x9fff) || code > 0xffff ? 2 : 1;
135
+
136
+ if (currentWidth + charWidth > maxWidth) {
137
+ lines.push(current + "\x1b[0m");
138
+ current = ansiState;
139
+ currentWidth = 0;
140
+ }
141
+ current += char;
142
+ currentWidth += charWidth;
143
+ i++;
144
+ }
145
+ }
146
+
147
+ if (strip(current).length > 0) {
148
+ lines.push(current);
149
+ }
150
+
151
+ return lines.length > 0 ? lines : [""];
152
+ }
153
+
154
+ /**
155
+ * Get the current ANSI state (active escape sequences) at the end of a string.
156
+ */
157
+ export function ansiState(s: string): string {
158
+ // Find the last reset or the last escape sequence
159
+ const lastReset = s.lastIndexOf("\x1b[0m");
160
+ const lastEsc = s.lastIndexOf("\x1b[");
161
+
162
+ if (lastEsc < 0) return "";
163
+ // If reset is at the end (or after last escape), state is cleared
164
+ if (lastReset >= lastEsc) return "";
165
+
166
+ // Extract the sequence
167
+ const match = s.substring(lastEsc).match(/^\x1b\[[0-9;]*[a-zA-Z]/);
168
+ return match ? match[0] : "";
169
+ }
170
+
171
+ /**
172
+ * Format a line number with fixed width.
173
+ */
174
+ export function lnum(n: number | null, width: number = 4): string {
175
+ if (n === null) return " ".repeat(width);
176
+ return n.toString().padStart(width, " ");
177
+ }
178
+
179
+ /**
180
+ * Generate a stripe pattern for alternating rows.
181
+ */
182
+ export function stripes(line: string, even: boolean): string {
183
+ const dim = "\x1b[2m";
184
+ const reset = "\x1b[0m";
185
+ return even ? `${dim}·${reset} ${line}` : ` ${line}`;
186
+ }
187
+
188
+ // ─── Terminal Width ─────────────────────────────────────────────────────────────
189
+
190
+ /**
191
+ * Get terminal width, with fallback.
192
+ */
193
+ export function termW(): number {
194
+ try {
195
+ return process.stdout.columns || 80;
196
+ } catch {
197
+ return 80;
198
+ }
199
+ }
200
+
201
+ // ─── Background Injection ───────────────────────────────────────────────────────
202
+
203
+ /**
204
+ * Composite diff background colors under Shiki syntax foregrounds.
205
+ *
206
+ * Takes an ANSI-highlighted line and injects a background color
207
+ * at the specified character ranges.
208
+ *
209
+ * @param ansiLine - ANSI-highlighted line from Shiki
210
+ * @param ranges - Character ranges to apply background to [{start, end}]
211
+ * @param baseBg - Base background color for the line
212
+ * @param hlBg - Highlight background color for the ranges
213
+ */
214
+ export function injectBg(
215
+ ansiLine: string,
216
+ ranges: Array<{ start: number; end: number }>,
217
+ baseBg: string,
218
+ hlBg: string,
219
+ ): string {
220
+ if (ranges.length === 0) {
221
+ return `${hexToBgAnsi(baseBg)}${ansiLine}\x1b[0m`;
222
+ }
223
+
224
+ // For simplicity, apply the highlight background to the whole line
225
+ // if any ranges are specified. Character-level injection is complex
226
+ // and would require parsing the ANSI stream.
227
+ return `${hexToBgAnsi(hlBg)}${ansiLine}\x1b[0m`;
228
+ }
229
+
230
+ // ─── Adaptive Wrap ──────────────────────────────────────────────────────────────
231
+
232
+ /**
233
+ * Wrap rows adaptively, preserving line structure.
234
+ * Returns the wrapped rows with line continuation indicators.
235
+ */
236
+ export function adaptiveWrapRows(rows: string[], maxWidth: number): string[] {
237
+ const result: string[] = [];
238
+ for (const row of rows) {
239
+ if (visibleWidth(row) <= maxWidth) {
240
+ result.push(row);
241
+ } else {
242
+ const wrapped = wrapAnsi(row, maxWidth);
243
+ for (let i = 0; i < wrapped.length; i++) {
244
+ result.push(i === 0 ? wrapped[i] : ` ${wrapped[i]}`);
245
+ }
246
+ }
247
+ }
248
+ return result;
249
+ }
250
+
251
+ // ─── Split View Heuristic ───────────────────────────────────────────────────────
252
+
253
+ /**
254
+ * Determine whether to use split (side-by-side) view.
255
+ * Falls back to unified on narrow terminals or when wrap ratio is too high.
256
+ *
257
+ * @param diff - The parsed diff
258
+ * @param tw - Terminal width
259
+ * @param max - Maximum render lines
260
+ */
261
+ export function shouldUseSplit(diff: ParsedDiff, tw: number, max: number): boolean {
262
+ // Too narrow for split
263
+ if (tw < SPLIT_MIN_WIDTH) return false;
264
+
265
+ // Calculate code column widths
266
+ const codeWidth = Math.floor((tw - 6) / 2); // 6 chars for padding/line numbers
267
+ if (codeWidth < SPLIT_MIN_CODE_WIDTH) return false;
268
+
269
+ // Check if any lines would wrap in split mode
270
+ const maxContentWidth = Math.max(
271
+ ...diff.lines
272
+ .filter((l) => l.type !== "hunk")
273
+ .map((l) => l.content.length),
274
+ 0,
275
+ );
276
+
277
+ // If lines are too long, unified is better
278
+ if (maxContentWidth > codeWidth * 1.5) return false;
279
+
280
+ return true;
281
+ }
282
+
283
+ // ─── Renderers ──────────────────────────────────────────────────────────────────
284
+
285
+ /**
286
+ * Render a diff in unified (stacked single-column) view.
287
+ *
288
+ * @param diff - Parsed diff
289
+ * @param language - Shiki language for syntax highlighting
290
+ * @param max - Maximum lines to render
291
+ * @param dc - Diff colors
292
+ */
293
+ export function renderUnified(
294
+ diff: ParsedDiff,
295
+ language: string,
296
+ max: number,
297
+ dc: DiffColors,
298
+ ): string {
299
+ const lines: string[] = [];
300
+ const totalLines = Math.min(diff.lines.length, max);
301
+ const truncated = diff.lines.length > max;
302
+
303
+ const addBg = hexToBgAnsi(dc.addBg);
304
+ const remBg = hexToBgAnsi(dc.remBg);
305
+ const addFg = hexToFgAnsi(dc.addFg);
306
+ const remFg = hexToFgAnsi(dc.remFg);
307
+ const hunkFg = hexToFgAnsi(dc.hunkFg);
308
+ const headerFg = hexToFgAnsi(dc.headerFg);
309
+ const reset = "\x1b[0m";
310
+
311
+ for (let i = 0; i < totalLines; i++) {
312
+ const line = diff.lines[i];
313
+
314
+ switch (line.type) {
315
+ case "hunk":
316
+ lines.push(`${hunkFg}${line.content}${reset}`);
317
+ break;
318
+ case "add":
319
+ lines.push(`${addBg}${addFg}+${reset}${addBg} ${lnum(null, 4)} ${lnum(line.newLine, 4)} │ ${line.content}${reset}`);
320
+ break;
321
+ case "remove":
322
+ lines.push(`${remBg}${remFg}-${reset}${remBg} ${lnum(line.oldLine, 4)} ${lnum(null, 4)} │ ${line.content}${reset}`);
323
+ break;
324
+ case "context":
325
+ lines.push(` ${headerFg}${lnum(line.oldLine, 4)} ${lnum(line.newLine, 4)}${reset} │ ${line.content}`);
326
+ break;
327
+ }
328
+ }
329
+
330
+ if (truncated) {
331
+ lines.push(`${headerFg}... (${diff.lines.length - max} more lines)${reset}`);
332
+ }
333
+
334
+ return lines.join("\n");
335
+ }
336
+
337
+ /**
338
+ * Render a diff in split (side-by-side) view.
339
+ * Auto-falls back to unified on narrow terminals.
340
+ *
341
+ * @param diff - Parsed diff
342
+ * @param language - Shiki language for syntax highlighting
343
+ * @param max - Maximum lines to render
344
+ * @param dc - Diff colors
345
+ */
346
+ export function renderSplit(
347
+ diff: ParsedDiff,
348
+ language: string,
349
+ max: number,
350
+ dc: DiffColors,
351
+ ): string {
352
+ const tw = termW();
353
+
354
+ // Auto-fallback to unified
355
+ if (!shouldUseSplit(diff, tw, max)) {
356
+ return renderUnified(diff, language, max, dc);
357
+ }
358
+
359
+ const lines: string[] = [];
360
+ const codeWidth = Math.floor((tw - 12) / 2); // 12 chars for line nums, separators, padding
361
+ const totalLines = Math.min(diff.lines.length, max);
362
+ const truncated = diff.lines.length > max;
363
+
364
+ const addBg = hexToBgAnsi(dc.addBg);
365
+ const remBg = hexToBgAnsi(dc.remBg);
366
+ const addFg = hexToFgAnsi(dc.addFg);
367
+ const remFg = hexToFgAnsi(dc.remFg);
368
+ const hunkFg = hexToFgAnsi(dc.hunkFg);
369
+ const headerFg = hexToFgAnsi(dc.headerFg);
370
+ const reset = "\x1b[0m";
371
+
372
+ // Group lines into left (old) and right (new) columns
373
+ // Build the left and right columns for each visual line
374
+ const leftLines: string[] = [];
375
+ const rightLines: string[] = [];
376
+
377
+ for (let i = 0; i < totalLines; i++) {
378
+ const line = diff.lines[i];
379
+
380
+ switch (line.type) {
381
+ case "hunk": {
382
+ // Hunk header spans both columns
383
+ const padded = ` ${hunkFg}${line.content}${reset}`;
384
+ const halfWidth = Math.floor(tw / 2);
385
+ leftLines.push(fit(padded, halfWidth));
386
+ rightLines.push("");
387
+ break;
388
+ }
389
+ case "add": {
390
+ // Left: empty; Right: added line
391
+ leftLines.push(`${headerFg}${" ".repeat(codeWidth)}${reset}`);
392
+ rightLines.push(`${addBg}${addFg}+${reset}${addBg} ${lnum(line.newLine, 4)} │ ${fit(line.content, codeWidth - 8)}${reset}`);
393
+ break;
394
+ }
395
+ case "remove": {
396
+ // Left: removed line; Right: empty
397
+ leftLines.push(`${remBg}${remFg}-${reset}${remBg} ${lnum(line.oldLine, 4)} │ ${fit(line.content, codeWidth - 8)}${reset}`);
398
+ rightLines.push(`${headerFg}${" ".repeat(codeWidth)}${reset}`);
399
+ break;
400
+ }
401
+ case "context": {
402
+ // Both columns show the same context line
403
+ leftLines.push(`${headerFg}${lnum(line.oldLine, 4)}${reset} │ ${fit(line.content, codeWidth - 7)}`);
404
+ rightLines.push(`${headerFg}${lnum(line.newLine, 4)}${reset} │ ${fit(line.content, codeWidth - 7)}`);
405
+ break;
406
+ }
407
+ }
408
+ }
409
+
410
+ // Combine left and right columns
411
+ for (let i = 0; i < leftLines.length; i++) {
412
+ const left = leftLines[i];
413
+ const right = rightLines[i];
414
+ lines.push(`${left} │ ${right}`);
415
+ }
416
+
417
+ if (truncated) {
418
+ lines.push(`${headerFg}... (${diff.lines.length - max} more lines)${reset}`);
419
+ }
420
+
421
+ return lines.join("\n");
422
+ }