@fresh-editor/fresh-editor 0.3.5 → 0.3.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -36,19 +36,21 @@ const NS_OVERLAY = "live-diff-overlay";
36
36
  // on the same line — but in practice users will run one or the other.
37
37
  const PRIORITY = 9;
38
38
 
39
- // Theme keys for backgrounds and virtual-line foregrounds. These are
40
- // resolved at render time by the editor, so the diff colors track
41
- // the active theme automatically. All bundled themes provide
42
- // `editor.diff_*_bg` (defaulted via serde) and `ui.file_status_*_fg`
43
- // (falls through to `diagnostic.{info,warning,error}_fg` when the
44
- // theme doesn't override).
39
+ // Theme keys for backgrounds and "on top of bg" foregrounds. These
40
+ // are resolved at render time by the editor, so the diff colors track
41
+ // the active theme automatically. The `editor.diff_*_fg` keys are
42
+ // purpose-built for "text drawn on top of the matching diff bg" —
43
+ // they default to `ui.file_status_*_fg` so themes that haven't been
44
+ // updated still work, but themes whose `file_status_*_fg` collides
45
+ // with `diff_*_bg` (e.g. `terminal`, where both resolve to ANSI Red)
46
+ // override `editor.diff_*_fg` to a contrasting color.
45
47
  const THEME = {
46
48
  addedBg: "editor.diff_add_bg",
47
- addedFg: "ui.file_status_added_fg",
49
+ addedFg: "editor.diff_add_fg",
48
50
  modifiedBg: "editor.diff_modify_bg",
49
- modifiedFg: "ui.file_status_modified_fg",
51
+ modifiedFg: "editor.diff_modify_fg",
50
52
  removedBg: "editor.diff_remove_bg",
51
- removedFg: "ui.file_status_deleted_fg",
53
+ removedFg: "editor.diff_remove_fg",
52
54
  };
53
55
 
54
56
  // `setLineIndicator` only accepts RGB triples (not theme keys), so the
@@ -75,6 +77,21 @@ const MAX_DIFF_LINES = 20_000;
75
77
  // Soft cap on the LCS DP table; past this we stop computing virtual lines.
76
78
  const MAX_DP_CELLS = 4_000_000;
77
79
 
80
+ // Similarity (Sørensen–Dice over character LCS) above which a 1:1
81
+ // modified pair is rendered as "modified" (bg-only highlight on the
82
+ // new line, no deletion virtual line). Below this we split the pair
83
+ // into a `removed` (virtual deletion line) + `added` (bg-highlighted)
84
+ // hunk pair so the change reads as a rewrite, not an in-place edit.
85
+ //
86
+ // 0.5 matches `difflib.SequenceMatcher.ratio()`-style heuristics used
87
+ // by VS Code, IntelliJ and most diff viewers.
88
+ const SIMILARITY_THRESHOLD = 0.5;
89
+ // Bail out of char-LCS on huge lines; cost is O(m * n).
90
+ const MAX_LINE_LCS_CHARS = 2000;
91
+ // Bail out of word-LCS when either side has more tokens than this;
92
+ // O(m * n) in tokens.
93
+ const MAX_WORD_TOKENS = 1000;
94
+
78
95
  // =============================================================================
79
96
  // Types
80
97
  // =============================================================================
@@ -86,6 +103,16 @@ type DiffMode =
86
103
 
87
104
  type HunkKind = "added" | "removed" | "modified";
88
105
 
106
+ /** Byte range inside a single new-side line, used to emphasise the
107
+ * word-level diff result with bold + underline overlays. Offsets are
108
+ * UTF-8 byte offsets relative to the start of the line, NOT the
109
+ * buffer — `renderHunks` adds the line's own byte offset before
110
+ * passing them to `addOverlay`. */
111
+ interface WordRange {
112
+ start: number;
113
+ end: number;
114
+ }
115
+
89
116
  interface Hunk {
90
117
  kind: HunkKind;
91
118
  /** First changed new-side line (0-indexed). */
@@ -94,6 +121,12 @@ interface Hunk {
94
121
  newCount: number;
95
122
  /** Old-side text, line by line, no trailing newline. */
96
123
  oldLines: string[];
124
+ /** Word-level diff results, one entry per new-side line in this
125
+ * hunk. Set only on `modified` hunks above the similarity threshold
126
+ * — where we suppress the virtual deletion line and instead bold +
127
+ * underline the actually-changed words on the new line. `undefined`
128
+ * for unrefined hunks and for `added`/`removed` hunks. */
129
+ wordRanges?: WordRange[][];
97
130
  }
98
131
 
99
132
  interface BufferDiffState {
@@ -433,6 +466,229 @@ function fillOldLines(hunks: Hunk[], oldLines: string[]): void {
433
466
  }
434
467
  }
435
468
 
469
+ // =============================================================================
470
+ // Similarity + word-level diff
471
+ // =============================================================================
472
+
473
+ /**
474
+ * Sørensen–Dice-style similarity ratio over a character LCS:
475
+ *
476
+ * ratio = 2 * |LCS(a, b)| / (|a| + |b|)
477
+ *
478
+ * Range `0.0..1.0`. Empty / empty is `1.0`; either-side-empty is `0.0`.
479
+ * Both sides are stripped of their common prefix and suffix first so
480
+ * "abcdef" vs "abcXYZdef" pays only for the middle DP table.
481
+ */
482
+ function lineSimilarity(a: string, b: string): number {
483
+ if (a.length === 0 && b.length === 0) return 1.0;
484
+ if (a.length === 0 || b.length === 0) return 0.0;
485
+ if (a.length > MAX_LINE_LCS_CHARS || b.length > MAX_LINE_LCS_CHARS) {
486
+ // Quadratic char LCS is too expensive on huge lines (minified
487
+ // JS, base64 blobs). Treat as different so we don't stall the
488
+ // render; the caller falls back to "split into removed+added".
489
+ return 0.0;
490
+ }
491
+ let prefix = 0;
492
+ const minLen = Math.min(a.length, b.length);
493
+ while (prefix < minLen && a[prefix] === b[prefix]) prefix++;
494
+ let aEnd = a.length;
495
+ let bEnd = b.length;
496
+ while (aEnd > prefix && bEnd > prefix && a[aEnd - 1] === b[bEnd - 1]) {
497
+ aEnd--;
498
+ bEnd--;
499
+ }
500
+ const equal = prefix + (a.length - aEnd);
501
+ const m = aEnd - prefix;
502
+ const n = bEnd - prefix;
503
+ if (m === 0 || n === 0) {
504
+ return (2 * equal) / (a.length + b.length);
505
+ }
506
+ const stride = n + 1;
507
+ const dp: number[] = new Array((m + 1) * stride).fill(0);
508
+ for (let i = 1; i <= m; i++) {
509
+ const ai = a[prefix + i - 1];
510
+ for (let j = 1; j <= n; j++) {
511
+ if (ai === b[prefix + j - 1]) {
512
+ dp[i * stride + j] = dp[(i - 1) * stride + (j - 1)] + 1;
513
+ } else {
514
+ const x = dp[(i - 1) * stride + j];
515
+ const y = dp[i * stride + (j - 1)];
516
+ dp[i * stride + j] = x >= y ? x : y;
517
+ }
518
+ }
519
+ }
520
+ const middleLcs = dp[m * stride + n];
521
+ return (2 * (equal + middleLcs)) / (a.length + b.length);
522
+ }
523
+
524
+ /** A run of word, whitespace, or punctuation characters, with the
525
+ * UTF-8 byte offsets it occupies inside its source string. */
526
+ interface Token {
527
+ text: string;
528
+ byteStart: number;
529
+ byteEnd: number;
530
+ }
531
+
532
+ const WORD_CHAR = /[A-Za-z0-9_]/;
533
+ const WHITESPACE_CHAR = /\s/;
534
+
535
+ /** Tokenize into word runs (`\w+`), whitespace runs (`\s+`), and
536
+ * single non-word non-whitespace characters. Byte offsets are
537
+ * computed once per run via `editor.utf8ByteLength` so downstream
538
+ * overlays can index without re-scanning the string. */
539
+ function tokenize(s: string): Token[] {
540
+ const tokens: Token[] = [];
541
+ let i = 0;
542
+ let bytePos = 0;
543
+ while (i < s.length) {
544
+ let j = i;
545
+ const c = s[i];
546
+ if (WHITESPACE_CHAR.test(c)) {
547
+ while (j < s.length && WHITESPACE_CHAR.test(s[j])) j++;
548
+ } else if (WORD_CHAR.test(c)) {
549
+ while (j < s.length && WORD_CHAR.test(s[j])) j++;
550
+ } else {
551
+ j = i + 1;
552
+ }
553
+ const text = s.slice(i, j);
554
+ const byteLen = editor.utf8ByteLength(text);
555
+ tokens.push({ text, byteStart: bytePos, byteEnd: bytePos + byteLen });
556
+ bytePos += byteLen;
557
+ i = j;
558
+ }
559
+ return tokens;
560
+ }
561
+
562
+ /**
563
+ * Compute the byte ranges of words on the new-side line that are not
564
+ * part of the longest common token subsequence with the old-side
565
+ * line. Whitespace-only tokens are never highlighted (whitespace
566
+ * changes mid-word look like noise; whole-line whitespace edits are
567
+ * handled by the line-level diff). Adjacent unmatched non-whitespace
568
+ * tokens are coalesced into a single range so a renamed
569
+ * `foo.bar.baz` becomes one underline, not three.
570
+ */
571
+ function computeWordDiff(oldS: string, newS: string): WordRange[] {
572
+ const oldTokens = tokenize(oldS);
573
+ const newTokens = tokenize(newS);
574
+ const m = oldTokens.length;
575
+ const n = newTokens.length;
576
+ if (n === 0) return [];
577
+ if (m === 0 || m > MAX_WORD_TOKENS || n > MAX_WORD_TOKENS) {
578
+ // Either nothing to compare against or the line is so long that
579
+ // the token DP would dwarf the line-level pass. Mark every non-
580
+ // whitespace token as changed so the user still sees *something*.
581
+ return collapseRanges(
582
+ newTokens
583
+ .filter((t) => !WHITESPACE_CHAR.test(t.text[0] ?? "")),
584
+ );
585
+ }
586
+ const stride = n + 1;
587
+ const dp: number[] = new Array((m + 1) * stride).fill(0);
588
+ for (let i = 1; i <= m; i++) {
589
+ const ot = oldTokens[i - 1].text;
590
+ for (let j = 1; j <= n; j++) {
591
+ if (ot === newTokens[j - 1].text) {
592
+ dp[i * stride + j] = dp[(i - 1) * stride + (j - 1)] + 1;
593
+ } else {
594
+ const x = dp[(i - 1) * stride + j];
595
+ const y = dp[i * stride + (j - 1)];
596
+ dp[i * stride + j] = x >= y ? x : y;
597
+ }
598
+ }
599
+ }
600
+ // Backtrack to find which newTokens are in the LCS pairing.
601
+ const matched: boolean[] = new Array(n).fill(false);
602
+ let i = m;
603
+ let j = n;
604
+ while (i > 0 && j > 0) {
605
+ if (oldTokens[i - 1].text === newTokens[j - 1].text) {
606
+ matched[j - 1] = true;
607
+ i--;
608
+ j--;
609
+ } else if (dp[(i - 1) * stride + j] >= dp[i * stride + (j - 1)]) {
610
+ i--;
611
+ } else {
612
+ j--;
613
+ }
614
+ }
615
+ const unmatched: Token[] = [];
616
+ for (let k = 0; k < n; k++) {
617
+ if (matched[k]) continue;
618
+ const t = newTokens[k];
619
+ if (WHITESPACE_CHAR.test(t.text[0] ?? "")) continue;
620
+ unmatched.push(t);
621
+ }
622
+ return collapseRanges(unmatched);
623
+ }
624
+
625
+ /** Merge adjacent or touching token ranges into a single range so
626
+ * downstream overlay creation costs are O(runs), not O(tokens). */
627
+ function collapseRanges(tokens: Token[]): WordRange[] {
628
+ const ranges: WordRange[] = [];
629
+ for (const t of tokens) {
630
+ const last = ranges[ranges.length - 1];
631
+ if (last && last.end === t.byteStart) {
632
+ last.end = t.byteEnd;
633
+ } else {
634
+ ranges.push({ start: t.byteStart, end: t.byteEnd });
635
+ }
636
+ }
637
+ return ranges;
638
+ }
639
+
640
+ /**
641
+ * Post-process `opsToHunks` output: split low-similarity 1:1
642
+ * `modified` hunks into separate `removed` (virtual deletion line) +
643
+ * `added` (bg-highlighted) hunks. High-similarity pairs stay as
644
+ * `modified` but drop their old lines (so no virtual line renders)
645
+ * and gain a `wordRanges` entry that drives the bold + underline
646
+ * word-level overlay.
647
+ *
648
+ * Hunks that don't have a 1:1 mapping (e.g. 3 old lines becoming 2
649
+ * new lines) keep their original shape — the pairing is ambiguous,
650
+ * and forcing a rewrite-style split would just create misleading
651
+ * "removed" lines.
652
+ */
653
+ function refineHunks(hunks: Hunk[], newLines: string[]): Hunk[] {
654
+ const out: Hunk[] = [];
655
+ for (const h of hunks) {
656
+ if (h.kind !== "modified" || h.oldLines.length !== h.newCount) {
657
+ out.push(h);
658
+ continue;
659
+ }
660
+ for (let i = 0; i < h.newCount; i++) {
661
+ const oldLine = h.oldLines[i];
662
+ const newLine = newLines[h.newStart + i] ?? "";
663
+ const sim = lineSimilarity(oldLine, newLine);
664
+ if (sim >= SIMILARITY_THRESHOLD) {
665
+ const ranges = computeWordDiff(oldLine, newLine);
666
+ out.push({
667
+ kind: "modified",
668
+ newStart: h.newStart + i,
669
+ newCount: 1,
670
+ oldLines: [],
671
+ wordRanges: [ranges],
672
+ });
673
+ } else {
674
+ out.push({
675
+ kind: "removed",
676
+ newStart: h.newStart + i,
677
+ newCount: 0,
678
+ oldLines: [oldLine],
679
+ });
680
+ out.push({
681
+ kind: "added",
682
+ newStart: h.newStart + i,
683
+ newCount: 1,
684
+ oldLines: [],
685
+ });
686
+ }
687
+ }
688
+ }
689
+ return out;
690
+ }
691
+
436
692
  // =============================================================================
437
693
  // Rendering
438
694
  // =============================================================================
@@ -490,20 +746,18 @@ function renderHunks(state: BufferDiffState, newLines: string[]): void {
490
746
  const lineCount = lineStarts.length;
491
747
 
492
748
  // Group new-side lines per kind for batched setLineIndicators.
749
+ // `removed` hunks have no new-side line they belong on — their
750
+ // indicator rides directly on the virtual deletion line itself
751
+ // via `addVirtualLine`'s `gutterGlyph`, so it sits next to the
752
+ // deleted content instead of on the source line that happens to
753
+ // follow it.
493
754
  const addedLines: number[] = [];
494
755
  const modifiedLines: number[] = [];
495
- const removedAnchors: number[] = [];
496
756
 
497
757
  for (const h of state.hunks) {
498
- if (h.kind === "removed") {
499
- // Anchor on the line that took the deletion's place. If newStart
500
- // is past EOF, step back to the last real line.
501
- let anchor = h.newStart;
502
- if (anchor >= lineCount) anchor = Math.max(0, lineCount - 1);
503
- removedAnchors.push(anchor);
504
- } else if (h.kind === "added") {
758
+ if (h.kind === "added") {
505
759
  for (let i = 0; i < h.newCount; i++) addedLines.push(h.newStart + i);
506
- } else {
760
+ } else if (h.kind === "modified") {
507
761
  for (let i = 0; i < h.newCount; i++) modifiedLines.push(h.newStart + i);
508
762
  }
509
763
  }
@@ -520,17 +774,20 @@ function renderHunks(state: BufferDiffState, newLines: string[]): void {
520
774
  GUTTER_COLORS.modified[0], GUTTER_COLORS.modified[1], GUTTER_COLORS.modified[2], PRIORITY,
521
775
  );
522
776
  }
523
- if (removedAnchors.length > 0) {
524
- editor.setLineIndicators(
525
- bid, removedAnchors, NS_GUTTER, SYMBOLS.removed,
526
- GUTTER_COLORS.removed[0], GUTTER_COLORS.removed[1], GUTTER_COLORS.removed[2], PRIORITY,
527
- );
528
- }
529
777
 
530
778
  // Background highlights and virtual lines, all sync now.
531
779
  for (const h of state.hunks) {
532
780
  if (h.kind === "added" || h.kind === "modified") {
533
781
  const bg = h.kind === "added" ? THEME.addedBg : THEME.modifiedBg;
782
+ // Passing `fg` as a theme key lets each theme decide whether to
783
+ // override the cell's existing fg: themes that DEFINE
784
+ // `editor.diff_*_fg` (e.g. `terminal`, where the ANSI bg would
785
+ // otherwise collide with same-named syntax colors) get a
786
+ // contrasting fg painted on; themes that don't define the key
787
+ // resolve to `None` in `OverlayFace::ThemedStyle`, so the
788
+ // overlay leaves the cell's fg alone and syntax highlighting
789
+ // shows through unchanged.
790
+ const fg = h.kind === "added" ? THEME.addedFg : THEME.modifiedFg;
534
791
  for (let i = 0; i < h.newCount; i++) {
535
792
  const line = h.newStart + i;
536
793
  if (line >= lineCount) break;
@@ -547,6 +804,7 @@ function renderHunks(state: BufferDiffState, newLines: string[]): void {
547
804
  if (end <= start) end = start + 1;
548
805
  editor.addOverlay(bid, NS_OVERLAY, start, end, {
549
806
  bg,
807
+ fg,
550
808
  underline: false,
551
809
  bold: false,
552
810
  italic: false,
@@ -554,6 +812,36 @@ function renderHunks(state: BufferDiffState, newLines: string[]): void {
554
812
  extendToLineEnd: true,
555
813
  });
556
814
  }
815
+
816
+ // Word-level diff: bold + underline the changed words on the
817
+ // new-side line of a refined high-similarity modified hunk.
818
+ // `wordRanges` is set only by `refineHunks` and uses byte
819
+ // offsets relative to each new-side line's start, so we add the
820
+ // line's own start byte before passing to `addOverlay`.
821
+ if (h.wordRanges) {
822
+ for (let i = 0; i < h.newCount; i++) {
823
+ const line = h.newStart + i;
824
+ if (line >= lineCount) break;
825
+ const lineByteStart = lineStarts[line];
826
+ const ranges = h.wordRanges[i];
827
+ if (!ranges) continue;
828
+ for (const r of ranges) {
829
+ editor.addOverlay(
830
+ bid,
831
+ NS_OVERLAY,
832
+ lineByteStart + r.start,
833
+ lineByteStart + r.end,
834
+ {
835
+ bold: true,
836
+ underline: true,
837
+ italic: false,
838
+ strikethrough: false,
839
+ extendToLineEnd: false,
840
+ },
841
+ );
842
+ }
843
+ }
844
+ }
557
845
  }
558
846
 
559
847
  if (h.oldLines.length === 0) continue;
@@ -569,9 +857,9 @@ function renderHunks(state: BufferDiffState, newLines: string[]): void {
569
857
  const anchor = lineStarts[anchorLine];
570
858
 
571
859
  for (let i = 0; i < h.oldLines.length; i++) {
572
- // No "- " prefix the red bg/fg is the visual signal, and the user
573
- // prefers any "-" indicator to live in the gutter rather than
574
- // inside the buffer content.
860
+ // No "- " prefix in the line text the indicator goes in the
861
+ // gutter via `gutterGlyph` so it sits next to the deletion
862
+ // line itself, not on the source line that follows it.
575
863
  editor.addVirtualLine(
576
864
  bid,
577
865
  anchor,
@@ -579,6 +867,8 @@ function renderHunks(state: BufferDiffState, newLines: string[]): void {
579
867
  {
580
868
  fg: THEME.removedFg,
581
869
  bg: THEME.removedBg,
870
+ gutterGlyph: SYMBOLS.removed,
871
+ gutterColor: GUTTER_COLORS.removed,
582
872
  },
583
873
  above,
584
874
  NS_VLINE,
@@ -650,8 +940,13 @@ async function recompute(bufferId: number): Promise<void> {
650
940
  return;
651
941
  }
652
942
 
653
- const hunks = opsToHunks(ops);
654
- fillOldLines(hunks, state.oldLines);
943
+ const rawHunks = opsToHunks(ops);
944
+ fillOldLines(rawHunks, state.oldLines);
945
+ // Decide per-line whether each `modified` pair is a similar
946
+ // in-place edit (keep as `modified`, drop the virtual deletion
947
+ // line, mark changed words) or a low-similarity rewrite (split
948
+ // into separate `removed` + `added` hunks).
949
+ const hunks = refineHunks(rawHunks, newLines);
655
950
 
656
951
  // Skip 2: same hunks as last render. The user can edit inside an
657
952
  // already-flagged region without changing line counts (e.g., typing