@de-otio/repo-aegis-core 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (191) hide show
  1. package/dist/age.d.ts +32 -0
  2. package/dist/age.d.ts.map +1 -0
  3. package/dist/age.js +98 -0
  4. package/dist/age.js.map +1 -0
  5. package/dist/audit-log.d.ts +50 -0
  6. package/dist/audit-log.d.ts.map +1 -0
  7. package/dist/audit-log.js +183 -0
  8. package/dist/audit-log.js.map +1 -0
  9. package/dist/audit-log.test.d.ts +2 -0
  10. package/dist/audit-log.test.d.ts.map +1 -0
  11. package/dist/audit-log.test.js +181 -0
  12. package/dist/audit-log.test.js.map +1 -0
  13. package/dist/deny-set.d.ts +43 -0
  14. package/dist/deny-set.d.ts.map +1 -0
  15. package/dist/deny-set.js +165 -0
  16. package/dist/deny-set.js.map +1 -0
  17. package/dist/deny-set.test.d.ts +2 -0
  18. package/dist/deny-set.test.d.ts.map +1 -0
  19. package/dist/deny-set.test.js +155 -0
  20. package/dist/deny-set.test.js.map +1 -0
  21. package/dist/exceptions.d.ts +96 -0
  22. package/dist/exceptions.d.ts.map +1 -0
  23. package/dist/exceptions.js +143 -0
  24. package/dist/exceptions.js.map +1 -0
  25. package/dist/exit-codes.d.ts +4 -0
  26. package/dist/exit-codes.d.ts.map +1 -0
  27. package/dist/exit-codes.js +6 -0
  28. package/dist/exit-codes.js.map +1 -0
  29. package/dist/first-touch.d.ts +57 -0
  30. package/dist/first-touch.d.ts.map +1 -0
  31. package/dist/first-touch.js +112 -0
  32. package/dist/first-touch.js.map +1 -0
  33. package/dist/import-graph.test.d.ts +2 -0
  34. package/dist/import-graph.test.d.ts.map +1 -0
  35. package/dist/import-graph.test.js +210 -0
  36. package/dist/import-graph.test.js.map +1 -0
  37. package/dist/index.d.ts +37 -0
  38. package/dist/index.d.ts.map +1 -0
  39. package/dist/index.js +68 -0
  40. package/dist/index.js.map +1 -0
  41. package/dist/lock.d.ts +22 -0
  42. package/dist/lock.d.ts.map +1 -0
  43. package/dist/lock.js +86 -0
  44. package/dist/lock.js.map +1 -0
  45. package/dist/lock.test.d.ts +2 -0
  46. package/dist/lock.test.d.ts.map +1 -0
  47. package/dist/lock.test.js +125 -0
  48. package/dist/lock.test.js.map +1 -0
  49. package/dist/paths.d.ts +22 -0
  50. package/dist/paths.d.ts.map +1 -0
  51. package/dist/paths.js +46 -0
  52. package/dist/paths.js.map +1 -0
  53. package/dist/paths.test.d.ts +2 -0
  54. package/dist/paths.test.d.ts.map +1 -0
  55. package/dist/paths.test.js +78 -0
  56. package/dist/paths.test.js.map +1 -0
  57. package/dist/redaction.d.ts +29 -0
  58. package/dist/redaction.d.ts.map +1 -0
  59. package/dist/redaction.js +48 -0
  60. package/dist/redaction.js.map +1 -0
  61. package/dist/redaction.test.d.ts +2 -0
  62. package/dist/redaction.test.d.ts.map +1 -0
  63. package/dist/redaction.test.js +67 -0
  64. package/dist/redaction.test.js.map +1 -0
  65. package/dist/regex-safety.d.ts +87 -0
  66. package/dist/regex-safety.d.ts.map +1 -0
  67. package/dist/regex-safety.js +322 -0
  68. package/dist/regex-safety.js.map +1 -0
  69. package/dist/regex-safety.test.d.ts +2 -0
  70. package/dist/regex-safety.test.d.ts.map +1 -0
  71. package/dist/regex-safety.test.js +149 -0
  72. package/dist/regex-safety.test.js.map +1 -0
  73. package/dist/registry-mutate.d.ts +35 -0
  74. package/dist/registry-mutate.d.ts.map +1 -0
  75. package/dist/registry-mutate.js +149 -0
  76. package/dist/registry-mutate.js.map +1 -0
  77. package/dist/registry-mutate.test.d.ts +2 -0
  78. package/dist/registry-mutate.test.d.ts.map +1 -0
  79. package/dist/registry-mutate.test.js +96 -0
  80. package/dist/registry-mutate.test.js.map +1 -0
  81. package/dist/registry.d.ts +64 -0
  82. package/dist/registry.d.ts.map +1 -0
  83. package/dist/registry.js +120 -0
  84. package/dist/registry.js.map +1 -0
  85. package/dist/registry.test.d.ts +2 -0
  86. package/dist/registry.test.d.ts.map +1 -0
  87. package/dist/registry.test.js +316 -0
  88. package/dist/registry.test.js.map +1 -0
  89. package/dist/remote-url.d.ts +18 -0
  90. package/dist/remote-url.d.ts.map +1 -0
  91. package/dist/remote-url.js +66 -0
  92. package/dist/remote-url.js.map +1 -0
  93. package/dist/remote-url.test.d.ts +2 -0
  94. package/dist/remote-url.test.d.ts.map +1 -0
  95. package/dist/remote-url.test.js +116 -0
  96. package/dist/remote-url.test.js.map +1 -0
  97. package/dist/render.d.ts +54 -0
  98. package/dist/render.d.ts.map +1 -0
  99. package/dist/render.js +182 -0
  100. package/dist/render.js.map +1 -0
  101. package/dist/render.test.d.ts +2 -0
  102. package/dist/render.test.d.ts.map +1 -0
  103. package/dist/render.test.js +152 -0
  104. package/dist/render.test.js.map +1 -0
  105. package/dist/repo.d.ts +40 -0
  106. package/dist/repo.d.ts.map +1 -0
  107. package/dist/repo.js +214 -0
  108. package/dist/repo.js.map +1 -0
  109. package/dist/repo.test.d.ts +2 -0
  110. package/dist/repo.test.d.ts.map +1 -0
  111. package/dist/repo.test.js +234 -0
  112. package/dist/repo.test.js.map +1 -0
  113. package/dist/scan.d.ts +103 -0
  114. package/dist/scan.d.ts.map +1 -0
  115. package/dist/scan.js +436 -0
  116. package/dist/scan.js.map +1 -0
  117. package/dist/scan.test.d.ts +2 -0
  118. package/dist/scan.test.d.ts.map +1 -0
  119. package/dist/scan.test.js +437 -0
  120. package/dist/scan.test.js.map +1 -0
  121. package/dist/schemas.d.ts +50 -0
  122. package/dist/schemas.d.ts.map +1 -0
  123. package/dist/schemas.js +190 -0
  124. package/dist/schemas.js.map +1 -0
  125. package/dist/secret-markers.d.ts +34 -0
  126. package/dist/secret-markers.d.ts.map +1 -0
  127. package/dist/secret-markers.js +118 -0
  128. package/dist/secret-markers.js.map +1 -0
  129. package/dist/secret-markers.test.d.ts +2 -0
  130. package/dist/secret-markers.test.d.ts.map +1 -0
  131. package/dist/secret-markers.test.js +154 -0
  132. package/dist/secret-markers.test.js.map +1 -0
  133. package/dist/trust-boundary.d.ts +33 -0
  134. package/dist/trust-boundary.d.ts.map +1 -0
  135. package/dist/trust-boundary.js +77 -0
  136. package/dist/trust-boundary.js.map +1 -0
  137. package/dist/trust-boundary.test.d.ts +2 -0
  138. package/dist/trust-boundary.test.d.ts.map +1 -0
  139. package/dist/trust-boundary.test.js +170 -0
  140. package/dist/trust-boundary.test.js.map +1 -0
  141. package/dist/types.d.ts +47 -0
  142. package/dist/types.d.ts.map +1 -0
  143. package/dist/types.js +8 -0
  144. package/dist/types.js.map +1 -0
  145. package/dist/working-tree.d.ts +38 -0
  146. package/dist/working-tree.d.ts.map +1 -0
  147. package/dist/working-tree.js +133 -0
  148. package/dist/working-tree.js.map +1 -0
  149. package/dist/working-tree.test.d.ts +2 -0
  150. package/dist/working-tree.test.d.ts.map +1 -0
  151. package/dist/working-tree.test.js +162 -0
  152. package/dist/working-tree.test.js.map +1 -0
  153. package/package.json +40 -0
  154. package/src/age.ts +113 -0
  155. package/src/audit-log.test.ts +222 -0
  156. package/src/audit-log.ts +215 -0
  157. package/src/deny-set.test.ts +208 -0
  158. package/src/deny-set.ts +231 -0
  159. package/src/exceptions.ts +134 -0
  160. package/src/exit-codes.ts +5 -0
  161. package/src/first-touch.ts +172 -0
  162. package/src/import-graph.test.ts +239 -0
  163. package/src/index.ts +191 -0
  164. package/src/lock.test.ts +151 -0
  165. package/src/lock.ts +88 -0
  166. package/src/paths.test.ts +94 -0
  167. package/src/paths.ts +55 -0
  168. package/src/redaction.test.ts +81 -0
  169. package/src/redaction.ts +49 -0
  170. package/src/regex-safety.test.ts +194 -0
  171. package/src/regex-safety.ts +349 -0
  172. package/src/registry-mutate.test.ts +134 -0
  173. package/src/registry-mutate.ts +185 -0
  174. package/src/registry.test.ts +460 -0
  175. package/src/registry.ts +178 -0
  176. package/src/remote-url.test.ts +121 -0
  177. package/src/remote-url.ts +78 -0
  178. package/src/render.test.ts +206 -0
  179. package/src/render.ts +215 -0
  180. package/src/repo.test.ts +275 -0
  181. package/src/repo.ts +245 -0
  182. package/src/scan.test.ts +580 -0
  183. package/src/scan.ts +531 -0
  184. package/src/schemas.ts +207 -0
  185. package/src/secret-markers.test.ts +183 -0
  186. package/src/secret-markers.ts +145 -0
  187. package/src/trust-boundary.test.ts +198 -0
  188. package/src/trust-boundary.ts +98 -0
  189. package/src/types.ts +55 -0
  190. package/src/working-tree.test.ts +193 -0
  191. package/src/working-tree.ts +130 -0
@@ -0,0 +1,580 @@
1
+ // SPDX-License-Identifier: GPL-3.0-or-later
2
+ // Copyright (C) 2026 Richard Myers and contributors.
3
+ import { describe, it, before, after } from "node:test";
4
+ import assert from "node:assert/strict";
5
+ import { execFileSync } from "node:child_process";
6
+ import { mkdtempSync, mkdirSync, writeFileSync, readFileSync, rmSync } from "node:fs";
7
+ import { tmpdir } from "node:os";
8
+ import { join } from "node:path";
9
+ import { scanText, scanFile, scanStagedDiff, scanRange, scanHistory } from "./scan.js";
10
+ import type { DenySet } from "./deny-set.js";
11
+ import type { RepoConfig } from "./repo.js";
12
+
13
+ const denySetWithPatterns = (patterns: string[]): DenySet => ({
14
+ files: [],
15
+ patterns,
16
+ combinedRegex: patterns.join("|"),
17
+ warnings: [],
18
+ });
19
+
20
+ let tmp: string;
21
+
22
+ before(() => {
23
+ tmp = mkdtempSync(join(tmpdir(), "repo-aegis-scan-"));
24
+ });
25
+
26
+ after(() => {
27
+ rmSync(tmp, { recursive: true, force: true });
28
+ });
29
+
30
+ describe("scanText", () => {
31
+ it("returns no hits with empty deny set", () => {
32
+ const ds: DenySet = { files: [], patterns: [], combinedRegex: "", warnings: [] };
33
+ assert.deepEqual(scanText("anything", ds), []);
34
+ });
35
+
36
+ it("returns no hits when text doesn't match", () => {
37
+ const ds = denySetWithPatterns(["acme-corp", "betaco"]);
38
+ assert.deepEqual(scanText("hello world", ds), []);
39
+ });
40
+
41
+ it("returns one hit per matching line", () => {
42
+ const ds = denySetWithPatterns(["acme-corp", "betaco"]);
43
+ const hits = scanText("first line\nsecond has acme-corp\nthird line\nfourth has betaco", ds);
44
+ assert.equal(hits.length, 2);
45
+ assert.equal(hits[0]!.line, 2);
46
+ assert.equal(hits[1]!.line, 4);
47
+ });
48
+
49
+ it("computes accurate column number", () => {
50
+ const ds = denySetWithPatterns(["acme-corp"]);
51
+ const hits = scanText("padding acme-corp here", ds);
52
+ assert.equal(hits.length, 1);
53
+ assert.equal(hits[0]!.column, 9);
54
+ });
55
+
56
+ it("redacts match by default (preview format)", () => {
57
+ const ds = denySetWithPatterns(["acme-corp"]);
58
+ const hits = scanText("see acme-corp", ds);
59
+ assert.equal(hits[0]!.matchPreview, "acm***9");
60
+ assert.ok(!hits[0]!.matchPreview.includes("corp"));
61
+ });
62
+
63
+ it("reveals literal when revealMatches is true", () => {
64
+ const ds = denySetWithPatterns(["acme-corp"]);
65
+ const hits = scanText("see acme-corp", ds, undefined, { revealMatches: true });
66
+ assert.equal(hits[0]!.matchPreview, "acme-corp");
67
+ });
68
+
69
+ it("redaction never echoes the literal", () => {
70
+ const ds = denySetWithPatterns(["very-specific-customer-name"]);
71
+ const hits = scanText("contains very-specific-customer-name here", ds);
72
+ assert.ok(!hits[0]!.matchPreview.includes("specific"));
73
+ assert.ok(!hits[0]!.matchPreview.includes("customer"));
74
+ });
75
+
76
+ it("propagates path field when provided", () => {
77
+ const ds = denySetWithPatterns(["acme"]);
78
+ const hits = scanText("acme!", ds, "src/foo.ts");
79
+ assert.equal(hits[0]!.path, "src/foo.ts");
80
+ });
81
+
82
+ it("case-insensitive matching", () => {
83
+ const ds = denySetWithPatterns(["acme"]);
84
+ const hits = scanText("see ACME-Corp", ds);
85
+ assert.equal(hits.length, 1);
86
+ });
87
+
88
+ it("respects per-line allow comments by default", () => {
89
+ const ds = denySetWithPatterns(["acme-corp"]);
90
+ const text = [
91
+ "this acme-corp has hit",
92
+ "this acme-corp is fine // repo-aegis: allow synthetic fixture",
93
+ "this acme-corp is also a hit",
94
+ ].join("\n");
95
+ const hits = scanText(text, ds);
96
+ assert.equal(hits.length, 2);
97
+ assert.equal(hits[0]!.line, 1);
98
+ assert.equal(hits[1]!.line, 3);
99
+ });
100
+
101
+ it("recognises the allow comment in any comment style", () => {
102
+ const ds = denySetWithPatterns(["acme-corp"]);
103
+ const cases = [
104
+ "acme-corp # repo-aegis: allow",
105
+ "acme-corp /* repo-aegis: allow */",
106
+ "acme-corp <!-- repo-aegis: allow -->",
107
+ "acme-corp ;; repo-aegis: allow",
108
+ ];
109
+ for (const text of cases) {
110
+ assert.equal(scanText(text, ds).length, 0, `should suppress: ${text}`);
111
+ }
112
+ });
113
+
114
+ it("does NOT suppress when allow token is malformed", () => {
115
+ const ds = denySetWithPatterns(["acme-corp"]);
116
+ const text = "acme-corp # repo-aegis allow"; // missing colon
117
+ assert.equal(scanText(text, ds).length, 1);
118
+ });
119
+
120
+ it("respectAllowComments=false bypasses suppression", () => {
121
+ const ds = denySetWithPatterns(["acme-corp"]);
122
+ const text = "acme-corp // repo-aegis: allow";
123
+ const hits = scanText(text, ds, undefined, { respectAllowComments: false });
124
+ assert.equal(hits.length, 1);
125
+ });
126
+
127
+ it("attributes the matched pattern to its source engagement", () => {
128
+ // Two patterns from two different "engagements". Hit on the second
129
+ // pattern should report engagement = "customer-b".
130
+ const ds: DenySet = {
131
+ files: [],
132
+ patterns: ["alpha-marker", "bravo-marker"],
133
+ patternSources: ["customer-a", "customer-b"],
134
+ combinedRegex: "alpha-marker|bravo-marker",
135
+ warnings: [],
136
+ };
137
+ const hits = scanText("see bravo-marker", ds);
138
+ assert.equal(hits.length, 1);
139
+ assert.equal(hits[0]!.engagement, "customer-b");
140
+ });
141
+
142
+ it("omits engagement when patternSources is missing", () => {
143
+ const ds = denySetWithPatterns(["lone-marker"]);
144
+ const hits = scanText("see lone-marker", ds);
145
+ assert.equal(hits.length, 1);
146
+ assert.equal(hits[0]!.engagement, undefined);
147
+ });
148
+
149
+ it("omits engagement when patternSources length mismatches", () => {
150
+ const ds: DenySet = {
151
+ files: [],
152
+ patterns: ["a-marker", "b-marker"],
153
+ patternSources: ["only-one"], // length mismatch
154
+ combinedRegex: "a-marker|b-marker",
155
+ warnings: [],
156
+ };
157
+ const hits = scanText("see a-marker", ds);
158
+ assert.equal(hits[0]!.engagement, undefined);
159
+ });
160
+ });
161
+
162
+ describe("scanFile", () => {
163
+ it("returns hits from a real file", () => {
164
+ const path = join(tmp, "test.txt");
165
+ writeFileSync(path, "first line\nsecond has acme-corp here");
166
+ const ds = denySetWithPatterns(["acme-corp"]);
167
+ const r = scanFile(path, ds);
168
+ assert.equal(r.hits.length, 1);
169
+ assert.equal(r.skipped.length, 0);
170
+ });
171
+
172
+ it("skips binary files (NUL byte heuristic)", () => {
173
+ const path = join(tmp, "binary.bin");
174
+ writeFileSync(path, Buffer.from([0x00, 0x01, 0x02, 0x03, 0x04]));
175
+ const ds = denySetWithPatterns(["a"]);
176
+ const r = scanFile(path, ds);
177
+ assert.equal(r.hits.length, 0);
178
+ assert.equal(r.skipped.length, 1);
179
+ assert.equal(r.skipped[0]!.reason, "binary");
180
+ });
181
+
182
+ it("skips files over the size limit", () => {
183
+ const path = join(tmp, "big.txt");
184
+ writeFileSync(path, "a".repeat(100));
185
+ const ds = denySetWithPatterns(["a"]);
186
+ const r = scanFile(path, ds, { maxFileBytes: 50 });
187
+ assert.equal(r.hits.length, 0);
188
+ assert.equal(r.skipped[0]!.reason, "too-large");
189
+ });
190
+
191
+ it("reports unreadable for missing files", () => {
192
+ const ds = denySetWithPatterns(["a"]);
193
+ const r = scanFile(join(tmp, "doesnotexist.txt"), ds);
194
+ assert.equal(r.skipped[0]!.reason, "unreadable");
195
+ });
196
+ });
197
+
198
+ function makeRepoConfig(cwd: string): RepoConfig {
199
+ return {
200
+ cwd,
201
+ isGitRepo: true,
202
+ class: "private-strict",
203
+ classExplicit: true,
204
+ engagements: [],
205
+ };
206
+ }
207
+
208
+ function gitInit(dir: string): void {
209
+ mkdirSync(dir, { recursive: true });
210
+ execFileSync("git", ["init", "-q", "-b", "main"], { cwd: dir });
211
+ execFileSync("git", ["config", "user.email", "test@example.com"], { cwd: dir });
212
+ execFileSync("git", ["config", "user.name", "test"], { cwd: dir });
213
+ }
214
+
215
+ function commit(dir: string, files: Record<string, string>, msg: string): string {
216
+ for (const [p, c] of Object.entries(files)) {
217
+ writeFileSync(join(dir, p), c);
218
+ execFileSync("git", ["add", p], { cwd: dir });
219
+ }
220
+ execFileSync("git", ["commit", "-q", "-m", msg], { cwd: dir });
221
+ return execFileSync("git", ["rev-parse", "HEAD"], { cwd: dir, encoding: "utf8" }).trim();
222
+ }
223
+
224
+ describe("scanRange", () => {
225
+ it("returns no hits when range has no matching additions", () => {
226
+ const dir = join(tmp, "range-clean");
227
+ gitInit(dir);
228
+ const a = commit(dir, { "f.txt": "hello\n" }, "init");
229
+ const b = commit(dir, { "f.txt": "hello\nworld\n" }, "add world");
230
+ const r = scanRange(makeRepoConfig(dir), denySetWithPatterns(["secret-marker"]), `${a}..${b}`);
231
+ assert.equal(r.hits.length, 0);
232
+ });
233
+
234
+ it("flags an added line that contains a marker", () => {
235
+ const dir = join(tmp, "range-hit");
236
+ gitInit(dir);
237
+ const a = commit(dir, { "f.txt": "hello\n" }, "init");
238
+ const b = commit(dir, { "f.txt": "hello\nleaked-marker-here\n" }, "leak");
239
+ const r = scanRange(
240
+ makeRepoConfig(dir),
241
+ denySetWithPatterns(["leaked-marker"]),
242
+ `${a}..${b}`,
243
+ );
244
+ assert.equal(r.hits.length, 1);
245
+ });
246
+
247
+ it("does NOT flag a removed line (only + lines)", () => {
248
+ const dir = join(tmp, "range-removed");
249
+ gitInit(dir);
250
+ const a = commit(dir, { "f.txt": "removed-marker\n" }, "init");
251
+ const b = commit(dir, { "f.txt": "" }, "remove");
252
+ const r = scanRange(
253
+ makeRepoConfig(dir),
254
+ denySetWithPatterns(["removed-marker"]),
255
+ `${a}..${b}`,
256
+ );
257
+ assert.equal(r.hits.length, 0);
258
+ });
259
+
260
+ it("redacts the literal in matchPreview by default", () => {
261
+ const dir = join(tmp, "range-redact");
262
+ gitInit(dir);
263
+ const a = commit(dir, { "f.txt": "x\n" }, "init");
264
+ const b = commit(dir, { "f.txt": "x\nspecific-leak-string\n" }, "leak");
265
+ const r = scanRange(
266
+ makeRepoConfig(dir),
267
+ denySetWithPatterns(["specific-leak-string"]),
268
+ `${a}..${b}`,
269
+ );
270
+ assert.ok(!r.hits[0]!.matchPreview.includes("specific-leak"));
271
+ });
272
+ });
273
+
274
+ describe("scanRange diff parsing edge cases", () => {
275
+ it("does NOT flag the literal '+++ b/<name>' header even when the path matches a pattern", () => {
276
+ // Construct a scenario where the new filename itself contains the
277
+ // marker. The hand-rolled `+`-line filter would have read
278
+ // `+++ b/leaked-marker.txt` as added content (after slicing the
279
+ // first `+`, the content `++ b/leaked-marker.txt` no longer starts
280
+ // with `+`, but historically `+++` was special-cased only for the
281
+ // header itself — pattern-matching the path is the false positive
282
+ // here). parse-diff treats `+++ b/...` as a header and never emits
283
+ // it as an `add` change.
284
+ const dir = join(tmp, "range-rename-header");
285
+ gitInit(dir);
286
+ const a = commit(dir, { "ordinary.txt": "hello\n" }, "init");
287
+ // Rename ordinary.txt to a name that contains the marker. No
288
+ // content changes, so no `add` content lines should be emitted.
289
+ execFileSync("git", ["mv", "ordinary.txt", "leaked-marker.txt"], { cwd: dir });
290
+ execFileSync("git", ["commit", "-q", "-m", "rename"], { cwd: dir });
291
+ const b = execFileSync("git", ["rev-parse", "HEAD"], {
292
+ cwd: dir,
293
+ encoding: "utf8",
294
+ }).trim();
295
+ const r = scanRange(
296
+ makeRepoConfig(dir),
297
+ denySetWithPatterns(["leaked-marker"]),
298
+ `${a}..${b}`,
299
+ );
300
+ assert.equal(
301
+ r.hits.length,
302
+ 0,
303
+ "filename in '+++ b/...' header must not be scanned as content",
304
+ );
305
+ });
306
+
307
+ it("a pure rename (no content changes) yields no additions", () => {
308
+ const dir = join(tmp, "range-rename-only");
309
+ gitInit(dir);
310
+ const a = commit(dir, { "old.txt": "the-secret-marker\n" }, "init");
311
+ execFileSync("git", ["mv", "old.txt", "new.txt"], { cwd: dir });
312
+ execFileSync("git", ["commit", "-q", "-m", "rename"], { cwd: dir });
313
+ const b = execFileSync("git", ["rev-parse", "HEAD"], {
314
+ cwd: dir,
315
+ encoding: "utf8",
316
+ }).trim();
317
+ const r = scanRange(
318
+ makeRepoConfig(dir),
319
+ denySetWithPatterns(["the-secret-marker"]),
320
+ `${a}..${b}`,
321
+ );
322
+ assert.equal(
323
+ r.hits.length,
324
+ 0,
325
+ "pure rename has no `add` changes; pre-existing content must not be re-flagged",
326
+ );
327
+ });
328
+
329
+ it("flags an added line whose literal content begins with '+'", () => {
330
+ // A hunk line of `++added-marker` is, in unified-diff terms, an
331
+ // `add` whose content is `+added-marker`. The hand-rolled filter
332
+ // sliced one `+` off the front (correct), giving `+added-marker`,
333
+ // so it would still flag — but the property under test is that
334
+ // parse-diff also flags it (i.e. we didn't regress the
335
+ // "+`-prefixed in-hunk content gets scanned" case).
336
+ const dir = join(tmp, "range-plus-prefixed");
337
+ gitInit(dir);
338
+ const a = commit(dir, { "f.txt": "hello\n" }, "init");
339
+ const b = commit(
340
+ dir,
341
+ { "f.txt": "hello\n+added-marker\n" },
342
+ "add plus-prefixed line",
343
+ );
344
+ const r = scanRange(
345
+ makeRepoConfig(dir),
346
+ denySetWithPatterns(["added-marker"]),
347
+ `${a}..${b}`,
348
+ );
349
+ assert.equal(r.hits.length, 1, "added line whose content starts with '+' must be flagged");
350
+ });
351
+
352
+ it("does NOT flag context (unchanged) lines whose content begins with '+'", () => {
353
+ // With -U0 there are no context lines in scanRange's git invocation,
354
+ // but feed a synthetic diff through extractAdditions via scanStagedDiff
355
+ // would require staging. Instead, exercise the parser directly through
356
+ // a multi-hunk scenario: two unrelated changes far apart in a file
357
+ // already containing a '+'-prefixed line; only the actually-added
358
+ // line should be flagged.
359
+ const dir = join(tmp, "range-context-plus");
360
+ gitInit(dir);
361
+ // Pre-existing line "+context-marker" is committed (so it lives in
362
+ // the file at base), then an unrelated benign change is added.
363
+ const a = commit(
364
+ dir,
365
+ { "f.txt": "alpha\n+context-marker\nbeta\n" },
366
+ "init with plus-prefixed context",
367
+ );
368
+ const b = commit(
369
+ dir,
370
+ { "f.txt": "alpha\n+context-marker\nbeta\nharmless\n" },
371
+ "append harmless line",
372
+ );
373
+ const r = scanRange(
374
+ makeRepoConfig(dir),
375
+ denySetWithPatterns(["context-marker"]),
376
+ `${a}..${b}`,
377
+ );
378
+ assert.equal(
379
+ r.hits.length,
380
+ 0,
381
+ "context line containing '+context-marker' must not be flagged when only an unrelated line was added",
382
+ );
383
+ });
384
+ });
385
+
386
+ describe("scanStagedDiff", () => {
387
+ it("flags an added line in the staged diff", () => {
388
+ const dir = join(tmp, "staged-hit");
389
+ gitInit(dir);
390
+ commit(dir, { "f.txt": "hello\n" }, "init");
391
+ writeFileSync(join(dir, "f.txt"), "hello\nstaged-leak-marker\n");
392
+ execFileSync("git", ["add", "f.txt"], { cwd: dir });
393
+ const r = scanStagedDiff(
394
+ makeRepoConfig(dir),
395
+ denySetWithPatterns(["staged-leak-marker"]),
396
+ );
397
+ assert.equal(r.hits.length, 1);
398
+ });
399
+
400
+ it("does NOT flag the new filename in a staged rename whose path matches a pattern", () => {
401
+ const dir = join(tmp, "staged-rename");
402
+ gitInit(dir);
403
+ commit(dir, { "ordinary.txt": "hello\n" }, "init");
404
+ execFileSync("git", ["mv", "ordinary.txt", "leaked-marker.txt"], { cwd: dir });
405
+ // staged but not committed
406
+ const r = scanStagedDiff(
407
+ makeRepoConfig(dir),
408
+ denySetWithPatterns(["leaked-marker"]),
409
+ );
410
+ assert.equal(
411
+ r.hits.length,
412
+ 0,
413
+ "filename in '+++ b/...' header must not be scanned as content",
414
+ );
415
+ });
416
+ });
417
+
418
+ describe("scanHistory", () => {
419
+ it("finds a commit whose diff contains a marker pattern", () => {
420
+ const dir = join(tmp, "hist-hit");
421
+ gitInit(dir);
422
+ commit(dir, { "f.txt": "x\n" }, "init");
423
+ const sha = commit(dir, { "f.txt": "x\nhistorical-marker-leak\n" }, "leaks-it");
424
+ const hits = scanHistory(makeRepoConfig(dir), denySetWithPatterns(["historical-marker-leak"]));
425
+ assert.ok(hits.length >= 1);
426
+ assert.ok(hits.some(h => sha.startsWith(h.commitSha)));
427
+ });
428
+
429
+ it("returns empty when no commits match any pattern", () => {
430
+ const dir = join(tmp, "hist-clean");
431
+ gitInit(dir);
432
+ commit(dir, { "f.txt": "boring\n" }, "init");
433
+ const hits = scanHistory(makeRepoConfig(dir), denySetWithPatterns(["never-existed"]));
434
+ assert.equal(hits.length, 0);
435
+ });
436
+
437
+ it("redacts the pattern by default", () => {
438
+ const dir = join(tmp, "hist-redact");
439
+ gitInit(dir);
440
+ commit(dir, { "f.txt": "secret-pattern-x\n" }, "leak");
441
+ const hits = scanHistory(makeRepoConfig(dir), denySetWithPatterns(["secret-pattern-x"]));
442
+ assert.ok(hits.length >= 1);
443
+ assert.ok(!hits[0]!.pattern.includes("secret-pattern"));
444
+ });
445
+
446
+ it("reveals literal pattern with revealMatches=true", () => {
447
+ const dir = join(tmp, "hist-reveal");
448
+ gitInit(dir);
449
+ commit(dir, { "f.txt": "secret-pattern-y\n" }, "leak");
450
+ const hits = scanHistory(
451
+ makeRepoConfig(dir),
452
+ denySetWithPatterns(["secret-pattern-y"]),
453
+ { revealMatches: true },
454
+ );
455
+ assert.equal(hits[0]!.pattern, "secret-pattern-y");
456
+ });
457
+
458
+ it("multi-pattern scan: all patterns attributed in a single git invocation", () => {
459
+ // Two distinct patterns, each leaked in a different commit. The
460
+ // refactor combines patterns into one `git log -G '<a>|<b>'`
461
+ // invocation; we verify that with a counting `git` shim on PATH
462
+ // and assert exactly ONE git invocation drove the whole scan.
463
+ const dir = join(tmp, "hist-multi");
464
+ gitInit(dir);
465
+ commit(dir, { "f.txt": "init\n" }, "init");
466
+ const shaA = commit(
467
+ dir,
468
+ { "f.txt": "init\nalpha-leak-marker-here\n" },
469
+ "leak alpha",
470
+ );
471
+ const shaB = commit(
472
+ dir,
473
+ { "f.txt": "init\nalpha-leak-marker-here\nbravo-leak-marker-here\n" },
474
+ "leak bravo",
475
+ );
476
+
477
+ // Build a fake `git` on PATH that records each invocation to a
478
+ // log file and then exec()s the real git so behaviour is
479
+ // unchanged. Sufficient for invocation counting.
480
+ const shimDir = mkdtempSync(join(tmp, "shim-"));
481
+ const logPath = join(shimDir, "git-calls.log");
482
+ const realGit = execFileSync("which", ["git"], { encoding: "utf8" }).trim();
483
+ const shim = `#!/usr/bin/env bash\necho "$@" >> "${logPath}"\nexec ${realGit} "$@"\n`;
484
+ writeFileSync(join(shimDir, "git"), shim);
485
+ execFileSync("chmod", ["+x", join(shimDir, "git")]);
486
+
487
+ const prevPath = process.env["PATH"];
488
+ process.env["PATH"] = `${shimDir}:${prevPath ?? ""}`;
489
+ let hits;
490
+ try {
491
+ hits = scanHistory(
492
+ makeRepoConfig(dir),
493
+ denySetWithPatterns(["alpha-leak-marker", "bravo-leak-marker"]),
494
+ { revealMatches: true },
495
+ );
496
+ } finally {
497
+ if (prevPath !== undefined) process.env["PATH"] = prevPath;
498
+ else delete process.env["PATH"];
499
+ }
500
+
501
+ // Only one git invocation should have been made by scanHistory.
502
+ const calls = readFileSync(logPath, "utf8")
503
+ .split("\n")
504
+ .filter(Boolean);
505
+ // Some calls may include `git log -G` and others come from the
506
+ // shim itself only when scanHistory invoked git. The test gitInit
507
+ // / commit helpers call `execFileSync("git", ...)` *directly* with
508
+ // an absolute path resolved at import time, so they bypass the
509
+ // shim and don't pollute the count. (We installed the shim AFTER
510
+ // those commits.) Therefore exactly one entry is expected.
511
+ assert.equal(
512
+ calls.length,
513
+ 1,
514
+ `expected exactly 1 git invocation, got ${calls.length}: ${calls.join(" | ")}`,
515
+ );
516
+ assert.ok(calls[0]!.includes("log"), `expected log subcommand, got: ${calls[0]}`);
517
+
518
+ // Each pattern must attribute to the commit that *introduced* it.
519
+ // (`git log -G` filters to commits where the diff content
520
+ // matched. Once a line is present, subsequent commits that don't
521
+ // touch it won't appear for that pattern.)
522
+ const alphaHits = hits.filter(h => h.pattern === "alpha-leak-marker");
523
+ const bravoHits = hits.filter(h => h.pattern === "bravo-leak-marker");
524
+ assert.ok(
525
+ alphaHits.some(h => shaA.startsWith(h.commitSha)),
526
+ `alpha pattern should attribute to commit ${shaA.slice(0, 7)}`,
527
+ );
528
+ assert.ok(
529
+ bravoHits.some(h => shaB.startsWith(h.commitSha)),
530
+ `bravo pattern should attribute to commit ${shaB.slice(0, 7)}`,
531
+ );
532
+ assert.ok(
533
+ !bravoHits.some(h => shaA.startsWith(h.commitSha)),
534
+ `bravo pattern must NOT attribute to commit ${shaA.slice(0, 7)} (not introduced there)`,
535
+ );
536
+ });
537
+ });
538
+
539
+ describe("scanRange streaming", () => {
540
+ it("handles a multi-MB diff without OOM (streaming, not buffered whole)", () => {
541
+ // Build a synthetic diff several MB in size by committing many
542
+ // benign lines and one marker line. The streaming implementation
543
+ // walks the diff in 64 KiB chunks; the prior buffer-the-whole-
544
+ // diff implementation would still complete here, but the test is
545
+ // worthwhile as a smoke test for the new code path on realistic
546
+ // large inputs.
547
+ const dir = join(tmp, "range-large");
548
+ gitInit(dir);
549
+ commit(dir, { "f.txt": "seed\n" }, "init");
550
+ // ~3 MB of benign added content: 60_000 lines × ~50 bytes each.
551
+ const benignLines: string[] = ["seed"];
552
+ for (let i = 0; i < 60_000; i++) {
553
+ benignLines.push(
554
+ `benign-line-${i}-padding-padding-padding-padding-padding`,
555
+ );
556
+ }
557
+ benignLines.push("hidden-streaming-marker-line");
558
+ for (let i = 0; i < 100; i++) {
559
+ benignLines.push(`tail-${i}`);
560
+ }
561
+ const a = execFileSync("git", ["rev-parse", "HEAD"], {
562
+ cwd: dir,
563
+ encoding: "utf8",
564
+ }).trim();
565
+ writeFileSync(join(dir, "f.txt"), benignLines.join("\n") + "\n");
566
+ execFileSync("git", ["add", "f.txt"], { cwd: dir });
567
+ execFileSync("git", ["commit", "-q", "-m", "huge"], { cwd: dir });
568
+ const b = execFileSync("git", ["rev-parse", "HEAD"], {
569
+ cwd: dir,
570
+ encoding: "utf8",
571
+ }).trim();
572
+
573
+ const r = scanRange(
574
+ makeRepoConfig(dir),
575
+ denySetWithPatterns(["hidden-streaming-marker"]),
576
+ `${a}..${b}`,
577
+ );
578
+ assert.equal(r.hits.length, 1, "marker buried in MB of additions must still be found");
579
+ });
580
+ });