@mcarvin/smart-diff 2.0.0 → 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -122,8 +122,30 @@ const markdown = await summarizeGitDiff({
122
122
  | `provider` | `LlmProviderId` — wins over `LLM_PROVIDER` env and auto-detection. |
123
123
  | `model` | Chat model id; overrides `LLM_MODEL` and the provider default. |
124
124
  | `maxDiffChars` | Caps unified diff size for the request. |
125
+ | `contextLines` | Number of context lines around each change (`git diff -U<n>`). Lower values (1 or 0) are the single biggest token saver on modification-heavy diffs. |
126
+ | `ignoreWhitespace` | Passes `-w` / `--ignore-all-space` to `git diff` so pure-whitespace hunks don't consume tokens. Also applies to `--numstat` / `--name-status` so counts stay consistent. |
127
+ | `stripDiffPreamble` | Removes low-value lines from the unified diff (`diff --git`, `index`, mode changes, `similarity/rename/copy` metadata). `--- a/…`, `+++ b/…`, and `@@` hunk headers are kept. |
128
+ | `maxHunkLines` | Caps the body of each hunk; anything past the limit is replaced with a single elision marker. The `@@` header and `DiffSummary` totals are preserved. |
129
+ | `excludeDefaultNoise` | Merges the built-in `DEFAULT_NOISE_EXCLUDES` list (lockfiles, `dist`, `build`, `out`, `coverage`, `node_modules`, `__snapshots__`) into `excludeFolders`. |
125
130
  | `llmModelProvider` | `() => Promise<LanguageModel>` — bypass env-based resolution entirely; hand-wire a Vercel AI SDK `LanguageModel` (required in tests or custom setups). |
126
131
 
132
+ #### Reducing tokens
133
+
134
+ For most repos, the cheapest wins are:
135
+
136
+ ```ts
137
+ await summarizeGitDiff({
138
+ from: 'origin/main',
139
+ contextLines: 1, // -U1 cuts 30-60% of tokens on typical diffs
140
+ ignoreWhitespace: true, // drop pure-whitespace hunks entirely
141
+ stripDiffPreamble: true, // kill `index`/`mode`/`similarity` lines
142
+ maxHunkLines: 400, // truncate monster hunks but keep the @@ header
143
+ excludeDefaultNoise: true // skip lockfiles, dist/, coverage/, node_modules/
144
+ });
145
+ ```
146
+
147
+ These options only reshape the *unified diff text* — the structured `DiffSummary` still reports true file counts and line totals, so the model always sees the full change inventory.
148
+
127
149
  ### Injecting your own `LanguageModel`
128
150
 
129
151
  If you want full control — for example, to configure retries, middlewares, or hit an in-process mock — pass `llmModelProvider`:
@@ -150,7 +172,7 @@ const md = await summarizeGitDiff({
150
172
 
151
173
  The package also exports helpers for building a custom pipeline on top of the same git and LLM behavior:
152
174
 
153
- - **Git**: `createGitClient`, `getRepoRoot`, `getCommits`, `getDiff`, `getDiffSummary`, `getChangedFiles`, `filterCommitsByMessageRegexes`, `buildDiffPathspecs`
175
+ - **Git**: `createGitClient`, `getRepoRoot`, `getCommits`, `getDiff`, `getDiffSummary`, `getChangedFiles`, `filterCommitsByMessageRegexes`, `buildDiffPathspecs`, `buildDiffShapingGitArgs`, `shapeUnifiedDiff`, `DEFAULT_NOISE_EXCLUDES`
154
176
  - **AI**: `generateSummary`, `resolveLlmMaxDiffChars`, `truncateUnifiedDiffForLlm`
155
177
  - **Provider resolution**: `resolveLanguageModel`, `detectLlmProvider`, `isLlmProviderConfigured`, `defaultModelForProvider`, `resolveLlmBaseUrl`, `parseLlmDefaultHeadersFromEnv`
156
178
  - **Constants / types**: `DEFAULT_GIT_DIFF_SYSTEM_PROMPT`, `LLM_GATEWAY_REQUIRED_MESSAGE`, `LlmProviderId`, `LlmModelProvider`, `ResolveLanguageModelOptions`, `GenerateSummaryInput`, `SummarizeFlags`
package/dist/index.cjs CHANGED
@@ -401,7 +401,6 @@ function buildUserContent(flags, commits, fileNames, diffText, diffSummary) {
401
401
  }
402
402
  function callLlm(userContent, systemPrompt, maxOutputTokens, llmModelProvider, flags) {
403
403
  return __awaiter(this, void 0, void 0, function* () {
404
- var _a, _b;
405
404
  const model = llmModelProvider
406
405
  ? yield llmModelProvider()
407
406
  : yield resolveLanguageModel(Object.assign(Object.assign({}, (flags.provider ? { provider: flags.provider } : {})), (flags.model ? { model: flags.model } : {})));
@@ -412,7 +411,7 @@ function callLlm(userContent, systemPrompt, maxOutputTokens, llmModelProvider, f
412
411
  temperature: 0.2,
413
412
  maxOutputTokens,
414
413
  });
415
- const text = (_b = (_a = result.text) === null || _a === void 0 ? void 0 : _a.trim()) !== null && _b !== void 0 ? _b : "";
414
+ const text = result.text.trim();
416
415
  return text.length > 0 ? text : "No summary generated by the model.";
417
416
  });
418
417
  }
@@ -495,6 +494,130 @@ function filterCommitsByMessageRegexes(commits, includePatterns, excludePatterns
495
494
  return commits.filter((c) => commitMessagePassesFilters(c.message, includeRes, excludeRes));
496
495
  }
497
496
 
497
+ const DEFAULT_NOISE_EXCLUDES = [
498
+ "package-lock.json",
499
+ "yarn.lock",
500
+ "pnpm-lock.yaml",
501
+ "npm-shrinkwrap.json",
502
+ "bun.lockb",
503
+ "go.sum",
504
+ "Cargo.lock",
505
+ "Gemfile.lock",
506
+ "composer.lock",
507
+ "Pipfile.lock",
508
+ "poetry.lock",
509
+ "uv.lock",
510
+ "Podfile.lock",
511
+ "node_modules",
512
+ "dist",
513
+ "build",
514
+ "out",
515
+ "coverage",
516
+ "__snapshots__",
517
+ ];
518
+ function normalizeContextLines(raw) {
519
+ if (!Number.isFinite(raw) || raw < 0)
520
+ return 0;
521
+ return Math.trunc(raw);
522
+ }
523
+ function buildDiffShapingGitArgs(shaping) {
524
+ const args = [];
525
+ if ((shaping === null || shaping === void 0 ? void 0 : shaping.contextLines) !== undefined) {
526
+ args.push(`-U${normalizeContextLines(shaping.contextLines)}`);
527
+ }
528
+ if (shaping === null || shaping === void 0 ? void 0 : shaping.ignoreWhitespace) {
529
+ args.push("-w");
530
+ }
531
+ return args;
532
+ }
533
+ const PREAMBLE_NOISE_PREFIXES = [
534
+ "diff --git ",
535
+ "index ",
536
+ "new file mode ",
537
+ "deleted file mode ",
538
+ "old mode ",
539
+ "new mode ",
540
+ "similarity index ",
541
+ "dissimilarity index ",
542
+ "rename from ",
543
+ "rename to ",
544
+ "copy from ",
545
+ "copy to ",
546
+ ];
547
+ function isPreambleNoiseLine(line) {
548
+ for (const prefix of PREAMBLE_NOISE_PREFIXES) {
549
+ if (line.startsWith(prefix))
550
+ return true;
551
+ }
552
+ return false;
553
+ }
554
+ function stripPreambleLines(text) {
555
+ return text
556
+ .split(/\r?\n/)
557
+ .filter((line) => !isPreambleNoiseLine(line))
558
+ .join("\n");
559
+ }
560
+ function isFileHeaderLine(line) {
561
+ return (/^--- (a\/|b\/|"a\/|"b\/|\/dev\/null)/.test(line) ||
562
+ /^\+\+\+ (a\/|b\/|"a\/|"b\/|\/dev\/null)/.test(line));
563
+ }
564
+ function elideLargeHunks(text, maxHunkLines) {
565
+ const limit = normalizeContextLines(maxHunkLines);
566
+ const lines = text.split(/\r?\n/);
567
+ const out = [];
568
+ let inHunk = false;
569
+ let hunkBuf = [];
570
+ const flushHunk = () => {
571
+ if (hunkBuf.length > limit) {
572
+ const elided = hunkBuf.length - limit;
573
+ out.push(...hunkBuf.slice(0, limit));
574
+ out.push(`[... ${elided} diff line${elided === 1 ? "" : "s"} elided ...]`);
575
+ }
576
+ else {
577
+ out.push(...hunkBuf);
578
+ }
579
+ hunkBuf = [];
580
+ inHunk = false;
581
+ };
582
+ for (const line of lines) {
583
+ if (line.startsWith("@@")) {
584
+ if (inHunk)
585
+ flushHunk();
586
+ out.push(line);
587
+ inHunk = true;
588
+ continue;
589
+ }
590
+ if (line.startsWith("diff --git ") || isFileHeaderLine(line)) {
591
+ if (inHunk)
592
+ flushHunk();
593
+ out.push(line);
594
+ continue;
595
+ }
596
+ if (inHunk) {
597
+ hunkBuf.push(line);
598
+ }
599
+ else {
600
+ out.push(line);
601
+ }
602
+ }
603
+ if (inHunk)
604
+ flushHunk();
605
+ return out.join("\n");
606
+ }
607
+ function shapeUnifiedDiff(text, shaping) {
608
+ if (!(shaping === null || shaping === void 0 ? void 0 : shaping.stripDiffPreamble) && (shaping === null || shaping === void 0 ? void 0 : shaping.maxHunkLines) === undefined) {
609
+ return text;
610
+ }
611
+ let out = text;
612
+ if (shaping.stripDiffPreamble) {
613
+ out = stripPreambleLines(out);
614
+ }
615
+ if (shaping.maxHunkLines !== undefined) {
616
+ out = elideLargeHunks(out, shaping.maxHunkLines);
617
+ }
618
+ return out;
619
+ }
620
+
498
621
  const GIT_STATUS_BY_FIRST_CHAR = {
499
622
  A: "added",
500
623
  D: "deleted",
@@ -525,25 +648,17 @@ function mergeStatus(existing, next) {
525
648
  }
526
649
 
527
650
  function parseNameStatusLine(line) {
528
- var _a;
529
651
  const parts = line.split("\t");
530
652
  let entry = null;
531
653
  if (parts.length >= 2) {
532
- const statusToken = (_a = parts[0]) !== null && _a !== void 0 ? _a : "";
654
+ const statusToken = parts[0];
533
655
  const status = mapGitStatus(statusToken);
534
656
  const isRenameOrCopy = statusToken.startsWith("R") || statusToken.startsWith("C");
535
657
  if (isRenameOrCopy && parts.length >= 3) {
536
- const oldPath = parts[1];
537
- const newPath = parts[2];
538
- if (oldPath !== undefined && newPath !== undefined) {
539
- entry = { path: newPath, status, oldPath };
540
- }
658
+ entry = { path: parts[2], status, oldPath: parts[1] };
541
659
  }
542
660
  else if (!isRenameOrCopy) {
543
- const pathOnly = parts[1];
544
- if (pathOnly !== undefined) {
545
- entry = { path: pathOnly, status };
546
- }
661
+ entry = { path: parts[1], status };
547
662
  }
548
663
  }
549
664
  return entry;
@@ -588,12 +703,11 @@ function numStatPathToLookupKey(pathField) {
588
703
  return `${dirRaw}${toSeg}`;
589
704
  }
590
705
  function parseNumStatLine(line) {
591
- var _a, _b;
592
706
  const parts = line.split("\t");
593
707
  if (parts.length < 3)
594
708
  return null;
595
- const addStr = (_a = parts[0]) !== null && _a !== void 0 ? _a : "";
596
- const delStr = (_b = parts[1]) !== null && _b !== void 0 ? _b : "";
709
+ const addStr = parts[0];
710
+ const delStr = parts[1];
597
711
  const pathField = parts.slice(2).join("\t");
598
712
  const additions = addStr !== "-" ? Number.parseInt(addStr, 10) || 0 : 0;
599
713
  const deletions = delStr !== "-" ? Number.parseInt(delStr, 10) || 0 : 0;
@@ -618,11 +732,10 @@ function accumulateNumStat(numStatOutput, into) {
618
732
  }
619
733
 
620
734
  function parseTabDiffSummaryLine(line) {
621
- var _a;
622
735
  const parts = line.split("\t");
623
736
  if (parts.length < 3)
624
737
  return null;
625
- const statusToken = (_a = parts.shift()) !== null && _a !== void 0 ? _a : "";
738
+ const statusToken = parts.shift();
626
739
  const status = mapGitStatus(statusToken);
627
740
  const add0 = parts[0];
628
741
  const del0 = parts[1];
@@ -740,31 +853,48 @@ function getDiffPathContext(git, pathFilter, repoRootOverride) {
740
853
  }
741
854
  function getDiff(git, query) {
742
855
  return __awaiter(this, void 0, void 0, function* () {
743
- const { from, to, commits, filterByCommits, pathFilter, repoRootOverride } = query;
856
+ const { from, to, commits, filterByCommits, pathFilter, repoRootOverride, shaping, } = query;
744
857
  const { specs } = yield getDiffPathContext(git, pathFilter, repoRootOverride);
858
+ const shapingArgs = buildDiffShapingGitArgs(shaping);
745
859
  if (!filterByCommits) {
746
- return git.diff([`${from}..${to}`, "--", ...specs]);
860
+ const raw = yield git.diff([
861
+ ...shapingArgs,
862
+ `${from}..${to}`,
863
+ "--",
864
+ ...specs,
865
+ ]);
866
+ return shapeUnifiedDiff(raw, shaping);
747
867
  }
748
- const patches = yield Promise.all(commits.map((c) => git.diff([`${c.hash}^!`, "--", ...specs])));
749
- return patches.filter(Boolean).join("\n");
868
+ const patches = yield Promise.all(commits.map((c) => git.diff([...shapingArgs, `${c.hash}^!`, "--", ...specs])));
869
+ return patches
870
+ .map((p) => shapeUnifiedDiff(p, shaping))
871
+ .filter(Boolean)
872
+ .join("\n");
750
873
  });
751
874
  }
752
875
  function getDiffSummary(git, query) {
753
876
  return __awaiter(this, void 0, void 0, function* () {
754
- const { from, to, commits, filterByCommits, pathFilter, repoRootOverride } = query;
877
+ const { from, to, commits, filterByCommits, pathFilter, repoRootOverride, shaping, } = query;
755
878
  const { specs } = yield getDiffPathContext(git, pathFilter, repoRootOverride);
879
+ const whitespaceArgs = (shaping === null || shaping === void 0 ? void 0 : shaping.ignoreWhitespace) ? ["-w"] : [];
756
880
  if (!filterByCommits) {
757
881
  const [numOutput, nameOutput] = yield Promise.all([
758
- git.diff(["--numstat", `${from}..${to}`, "--", ...specs]),
759
- git.diff(["--name-status", `${from}..${to}`, "--", ...specs]),
882
+ git.diff([...whitespaceArgs, "--numstat", `${from}..${to}`, "--", ...specs]),
883
+ git.diff([
884
+ ...whitespaceArgs,
885
+ "--name-status",
886
+ `${from}..${to}`,
887
+ "--",
888
+ ...specs,
889
+ ]),
760
890
  ]);
761
891
  return buildDiffSummaryFromGitOutputs(nameOutput, numOutput);
762
892
  }
763
893
  const pairs = yield Promise.all(commits.map((c) => __awaiter(this, void 0, void 0, function* () {
764
894
  const range = `${c.hash}^!`;
765
895
  const [numOutput, nameOutput] = yield Promise.all([
766
- git.diff(["--numstat", range, "--", ...specs]),
767
- git.diff(["--name-status", range, "--", ...specs]),
896
+ git.diff([...whitespaceArgs, "--numstat", range, "--", ...specs]),
897
+ git.diff([...whitespaceArgs, "--name-status", range, "--", ...specs]),
768
898
  ]);
769
899
  return { numOutput, nameOutput };
770
900
  })));
@@ -814,6 +944,37 @@ function getChangedFiles(git, query) {
814
944
  });
815
945
  }
816
946
 
947
+ function buildShapingFromOptions(options) {
948
+ const shaping = {};
949
+ if (options.contextLines !== undefined) {
950
+ shaping.contextLines = options.contextLines;
951
+ }
952
+ if (options.ignoreWhitespace)
953
+ shaping.ignoreWhitespace = true;
954
+ if (options.stripDiffPreamble)
955
+ shaping.stripDiffPreamble = true;
956
+ if (options.maxHunkLines !== undefined) {
957
+ shaping.maxHunkLines = options.maxHunkLines;
958
+ }
959
+ return Object.keys(shaping).length > 0 ? shaping : undefined;
960
+ }
961
+ function buildEffectiveExcludeFolders(options) {
962
+ var _a;
963
+ const userExcludes = (_a = options.excludeFolders) !== null && _a !== void 0 ? _a : [];
964
+ if (!options.excludeDefaultNoise) {
965
+ return userExcludes.length > 0 ? userExcludes : undefined;
966
+ }
967
+ const seen = new Set();
968
+ const merged = [];
969
+ for (const p of [...DEFAULT_NOISE_EXCLUDES, ...userExcludes]) {
970
+ const key = p.trim();
971
+ if (!key || seen.has(key))
972
+ continue;
973
+ seen.add(key);
974
+ merged.push(p);
975
+ }
976
+ return merged;
977
+ }
817
978
  function hasNonEmptyTrimmed(arr) {
818
979
  return (arr !== null && arr !== void 0 ? arr : []).some((s) => s.trim().length > 0);
819
980
  }
@@ -830,22 +991,25 @@ function summarizeGitDiff(options) {
830
991
  const git = (_a = options.git) !== null && _a !== void 0 ? _a : createGitClient(options.cwd);
831
992
  const from = options.from;
832
993
  const to = (_b = options.to) !== null && _b !== void 0 ? _b : "HEAD";
994
+ const effectiveExcludeFolders = buildEffectiveExcludeFolders(options);
833
995
  const pathFilter = hasNonEmptyTrimmed(options.includeFolders) ||
834
- hasNonEmptyTrimmed(options.excludeFolders)
996
+ hasNonEmptyTrimmed(effectiveExcludeFolders)
835
997
  ? {
836
998
  includeFolders: options.includeFolders,
837
- excludeFolders: options.excludeFolders,
999
+ excludeFolders: effectiveExcludeFolders,
838
1000
  }
839
1001
  : undefined;
840
1002
  const allCommits = yield getCommits(git, from, to);
841
1003
  const filteredCommits = filterCommitsByMessageRegexes(allCommits, options.commitMessageIncludeRegexes, options.commitMessageExcludeRegexes);
842
1004
  const filterByCommits = shouldFilterByCommits(allCommits, filteredCommits, options);
1005
+ const shaping = buildShapingFromOptions(options);
843
1006
  const rangeQuery = {
844
1007
  from,
845
1008
  to,
846
1009
  commits: filteredCommits,
847
1010
  filterByCommits,
848
1011
  pathFilter,
1012
+ shaping,
849
1013
  };
850
1014
  const [diffText, fileNames, diffSummary] = yield Promise.all([
851
1015
  getDiff(git, rangeQuery),
@@ -875,8 +1039,10 @@ function summarizeGitDiff(options) {
875
1039
  }
876
1040
 
877
1041
  exports.DEFAULT_GIT_DIFF_SYSTEM_PROMPT = DEFAULT_GIT_DIFF_SYSTEM_PROMPT;
1042
+ exports.DEFAULT_NOISE_EXCLUDES = DEFAULT_NOISE_EXCLUDES;
878
1043
  exports.LLM_GATEWAY_REQUIRED_MESSAGE = LLM_GATEWAY_REQUIRED_MESSAGE;
879
1044
  exports.buildDiffPathspecs = buildDiffPathspecs;
1045
+ exports.buildDiffShapingGitArgs = buildDiffShapingGitArgs;
880
1046
  exports.createGitClient = createGitClient;
881
1047
  exports.defaultModelForProvider = defaultModelForProvider;
882
1048
  exports.detectLlmProvider = detectLlmProvider;
@@ -892,6 +1058,7 @@ exports.parseLlmDefaultHeadersFromEnv = parseLlmDefaultHeadersFromEnv;
892
1058
  exports.resolveLanguageModel = resolveLanguageModel;
893
1059
  exports.resolveLlmBaseUrl = resolveLlmBaseUrl;
894
1060
  exports.resolveLlmMaxDiffChars = resolveLlmMaxDiffChars;
1061
+ exports.shapeUnifiedDiff = shapeUnifiedDiff;
895
1062
  exports.summarizeGitDiff = summarizeGitDiff;
896
1063
  exports.truncateUnifiedDiffForLlm = truncateUnifiedDiffForLlm;
897
1064
  //# sourceMappingURL=index.cjs.map