@de-otio/repo-aegis-core 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (191) hide show
  1. package/dist/age.d.ts +32 -0
  2. package/dist/age.d.ts.map +1 -0
  3. package/dist/age.js +98 -0
  4. package/dist/age.js.map +1 -0
  5. package/dist/audit-log.d.ts +50 -0
  6. package/dist/audit-log.d.ts.map +1 -0
  7. package/dist/audit-log.js +183 -0
  8. package/dist/audit-log.js.map +1 -0
  9. package/dist/audit-log.test.d.ts +2 -0
  10. package/dist/audit-log.test.d.ts.map +1 -0
  11. package/dist/audit-log.test.js +181 -0
  12. package/dist/audit-log.test.js.map +1 -0
  13. package/dist/deny-set.d.ts +43 -0
  14. package/dist/deny-set.d.ts.map +1 -0
  15. package/dist/deny-set.js +165 -0
  16. package/dist/deny-set.js.map +1 -0
  17. package/dist/deny-set.test.d.ts +2 -0
  18. package/dist/deny-set.test.d.ts.map +1 -0
  19. package/dist/deny-set.test.js +155 -0
  20. package/dist/deny-set.test.js.map +1 -0
  21. package/dist/exceptions.d.ts +96 -0
  22. package/dist/exceptions.d.ts.map +1 -0
  23. package/dist/exceptions.js +143 -0
  24. package/dist/exceptions.js.map +1 -0
  25. package/dist/exit-codes.d.ts +4 -0
  26. package/dist/exit-codes.d.ts.map +1 -0
  27. package/dist/exit-codes.js +6 -0
  28. package/dist/exit-codes.js.map +1 -0
  29. package/dist/first-touch.d.ts +57 -0
  30. package/dist/first-touch.d.ts.map +1 -0
  31. package/dist/first-touch.js +112 -0
  32. package/dist/first-touch.js.map +1 -0
  33. package/dist/import-graph.test.d.ts +2 -0
  34. package/dist/import-graph.test.d.ts.map +1 -0
  35. package/dist/import-graph.test.js +210 -0
  36. package/dist/import-graph.test.js.map +1 -0
  37. package/dist/index.d.ts +37 -0
  38. package/dist/index.d.ts.map +1 -0
  39. package/dist/index.js +68 -0
  40. package/dist/index.js.map +1 -0
  41. package/dist/lock.d.ts +22 -0
  42. package/dist/lock.d.ts.map +1 -0
  43. package/dist/lock.js +86 -0
  44. package/dist/lock.js.map +1 -0
  45. package/dist/lock.test.d.ts +2 -0
  46. package/dist/lock.test.d.ts.map +1 -0
  47. package/dist/lock.test.js +125 -0
  48. package/dist/lock.test.js.map +1 -0
  49. package/dist/paths.d.ts +22 -0
  50. package/dist/paths.d.ts.map +1 -0
  51. package/dist/paths.js +46 -0
  52. package/dist/paths.js.map +1 -0
  53. package/dist/paths.test.d.ts +2 -0
  54. package/dist/paths.test.d.ts.map +1 -0
  55. package/dist/paths.test.js +78 -0
  56. package/dist/paths.test.js.map +1 -0
  57. package/dist/redaction.d.ts +29 -0
  58. package/dist/redaction.d.ts.map +1 -0
  59. package/dist/redaction.js +48 -0
  60. package/dist/redaction.js.map +1 -0
  61. package/dist/redaction.test.d.ts +2 -0
  62. package/dist/redaction.test.d.ts.map +1 -0
  63. package/dist/redaction.test.js +67 -0
  64. package/dist/redaction.test.js.map +1 -0
  65. package/dist/regex-safety.d.ts +87 -0
  66. package/dist/regex-safety.d.ts.map +1 -0
  67. package/dist/regex-safety.js +322 -0
  68. package/dist/regex-safety.js.map +1 -0
  69. package/dist/regex-safety.test.d.ts +2 -0
  70. package/dist/regex-safety.test.d.ts.map +1 -0
  71. package/dist/regex-safety.test.js +149 -0
  72. package/dist/regex-safety.test.js.map +1 -0
  73. package/dist/registry-mutate.d.ts +35 -0
  74. package/dist/registry-mutate.d.ts.map +1 -0
  75. package/dist/registry-mutate.js +149 -0
  76. package/dist/registry-mutate.js.map +1 -0
  77. package/dist/registry-mutate.test.d.ts +2 -0
  78. package/dist/registry-mutate.test.d.ts.map +1 -0
  79. package/dist/registry-mutate.test.js +96 -0
  80. package/dist/registry-mutate.test.js.map +1 -0
  81. package/dist/registry.d.ts +64 -0
  82. package/dist/registry.d.ts.map +1 -0
  83. package/dist/registry.js +120 -0
  84. package/dist/registry.js.map +1 -0
  85. package/dist/registry.test.d.ts +2 -0
  86. package/dist/registry.test.d.ts.map +1 -0
  87. package/dist/registry.test.js +316 -0
  88. package/dist/registry.test.js.map +1 -0
  89. package/dist/remote-url.d.ts +18 -0
  90. package/dist/remote-url.d.ts.map +1 -0
  91. package/dist/remote-url.js +66 -0
  92. package/dist/remote-url.js.map +1 -0
  93. package/dist/remote-url.test.d.ts +2 -0
  94. package/dist/remote-url.test.d.ts.map +1 -0
  95. package/dist/remote-url.test.js +116 -0
  96. package/dist/remote-url.test.js.map +1 -0
  97. package/dist/render.d.ts +54 -0
  98. package/dist/render.d.ts.map +1 -0
  99. package/dist/render.js +182 -0
  100. package/dist/render.js.map +1 -0
  101. package/dist/render.test.d.ts +2 -0
  102. package/dist/render.test.d.ts.map +1 -0
  103. package/dist/render.test.js +152 -0
  104. package/dist/render.test.js.map +1 -0
  105. package/dist/repo.d.ts +40 -0
  106. package/dist/repo.d.ts.map +1 -0
  107. package/dist/repo.js +214 -0
  108. package/dist/repo.js.map +1 -0
  109. package/dist/repo.test.d.ts +2 -0
  110. package/dist/repo.test.d.ts.map +1 -0
  111. package/dist/repo.test.js +234 -0
  112. package/dist/repo.test.js.map +1 -0
  113. package/dist/scan.d.ts +103 -0
  114. package/dist/scan.d.ts.map +1 -0
  115. package/dist/scan.js +436 -0
  116. package/dist/scan.js.map +1 -0
  117. package/dist/scan.test.d.ts +2 -0
  118. package/dist/scan.test.d.ts.map +1 -0
  119. package/dist/scan.test.js +437 -0
  120. package/dist/scan.test.js.map +1 -0
  121. package/dist/schemas.d.ts +50 -0
  122. package/dist/schemas.d.ts.map +1 -0
  123. package/dist/schemas.js +190 -0
  124. package/dist/schemas.js.map +1 -0
  125. package/dist/secret-markers.d.ts +34 -0
  126. package/dist/secret-markers.d.ts.map +1 -0
  127. package/dist/secret-markers.js +118 -0
  128. package/dist/secret-markers.js.map +1 -0
  129. package/dist/secret-markers.test.d.ts +2 -0
  130. package/dist/secret-markers.test.d.ts.map +1 -0
  131. package/dist/secret-markers.test.js +154 -0
  132. package/dist/secret-markers.test.js.map +1 -0
  133. package/dist/trust-boundary.d.ts +33 -0
  134. package/dist/trust-boundary.d.ts.map +1 -0
  135. package/dist/trust-boundary.js +77 -0
  136. package/dist/trust-boundary.js.map +1 -0
  137. package/dist/trust-boundary.test.d.ts +2 -0
  138. package/dist/trust-boundary.test.d.ts.map +1 -0
  139. package/dist/trust-boundary.test.js +170 -0
  140. package/dist/trust-boundary.test.js.map +1 -0
  141. package/dist/types.d.ts +47 -0
  142. package/dist/types.d.ts.map +1 -0
  143. package/dist/types.js +8 -0
  144. package/dist/types.js.map +1 -0
  145. package/dist/working-tree.d.ts +38 -0
  146. package/dist/working-tree.d.ts.map +1 -0
  147. package/dist/working-tree.js +133 -0
  148. package/dist/working-tree.js.map +1 -0
  149. package/dist/working-tree.test.d.ts +2 -0
  150. package/dist/working-tree.test.d.ts.map +1 -0
  151. package/dist/working-tree.test.js +162 -0
  152. package/dist/working-tree.test.js.map +1 -0
  153. package/package.json +40 -0
  154. package/src/age.ts +113 -0
  155. package/src/audit-log.test.ts +222 -0
  156. package/src/audit-log.ts +215 -0
  157. package/src/deny-set.test.ts +208 -0
  158. package/src/deny-set.ts +231 -0
  159. package/src/exceptions.ts +134 -0
  160. package/src/exit-codes.ts +5 -0
  161. package/src/first-touch.ts +172 -0
  162. package/src/import-graph.test.ts +239 -0
  163. package/src/index.ts +191 -0
  164. package/src/lock.test.ts +151 -0
  165. package/src/lock.ts +88 -0
  166. package/src/paths.test.ts +94 -0
  167. package/src/paths.ts +55 -0
  168. package/src/redaction.test.ts +81 -0
  169. package/src/redaction.ts +49 -0
  170. package/src/regex-safety.test.ts +194 -0
  171. package/src/regex-safety.ts +349 -0
  172. package/src/registry-mutate.test.ts +134 -0
  173. package/src/registry-mutate.ts +185 -0
  174. package/src/registry.test.ts +460 -0
  175. package/src/registry.ts +178 -0
  176. package/src/remote-url.test.ts +121 -0
  177. package/src/remote-url.ts +78 -0
  178. package/src/render.test.ts +206 -0
  179. package/src/render.ts +215 -0
  180. package/src/repo.test.ts +275 -0
  181. package/src/repo.ts +245 -0
  182. package/src/scan.test.ts +580 -0
  183. package/src/scan.ts +531 -0
  184. package/src/schemas.ts +207 -0
  185. package/src/secret-markers.test.ts +183 -0
  186. package/src/secret-markers.ts +145 -0
  187. package/src/trust-boundary.test.ts +198 -0
  188. package/src/trust-boundary.ts +98 -0
  189. package/src/types.ts +55 -0
  190. package/src/working-tree.test.ts +193 -0
  191. package/src/working-tree.ts +130 -0
@@ -0,0 +1,194 @@
1
+ // SPDX-License-Identifier: GPL-3.0-or-later
2
+ // Copyright (C) 2026 Richard Myers and contributors.
3
+ import { describe, it, after } from "node:test";
4
+ import assert from "node:assert/strict";
5
+ import {
6
+ validatePattern,
7
+ validatePatterns,
8
+ getRegexBackend,
9
+ setRegexBackendForTesting,
10
+ } from "./regex-safety.js";
11
+
12
+ describe("validatePattern", () => {
13
+ it("accepts ordinary patterns", () => {
14
+ assert.equal(validatePattern("acme-corp").ok, true);
15
+ assert.equal(validatePattern("\\d{12}").ok, true);
16
+ assert.equal(validatePattern("[a-z]+@example\\.com").ok, true);
17
+ });
18
+
19
+ it("rejects empty patterns", () => {
20
+ const r = validatePattern("");
21
+ assert.equal(r.ok, false);
22
+ assert.match(r.reason!, /empty/);
23
+ });
24
+
25
+ it("rejects non-string input", () => {
26
+ const r = validatePattern(undefined as unknown as string);
27
+ assert.equal(r.ok, false);
28
+ });
29
+
30
+ it("rejects syntactically invalid regex", () => {
31
+ const r = validatePattern("(unclosed");
32
+ assert.equal(r.ok, false);
33
+ assert.match(r.reason!, /invalid regex/);
34
+ });
35
+
36
+ it("rejects patterns over the length cap", () => {
37
+ const r = validatePattern("a".repeat(2049));
38
+ assert.equal(r.ok, false);
39
+ assert.match(r.reason!, /exceeds|length/i);
40
+ });
41
+
42
+ it("accepts patterns just under the length cap", () => {
43
+ const r = validatePattern("a".repeat(2000));
44
+ assert.equal(r.ok, true);
45
+ });
46
+ });
47
+
48
+ describe("validatePatterns", () => {
49
+ it("splits valid and invalid patterns", () => {
50
+ const r = validatePatterns(["acme-corp", "(unclosed", "\\d+"]);
51
+ assert.equal(r.valid.length, 2);
52
+ assert.equal(r.invalid.length, 1);
53
+ assert.equal(r.invalid[0]!.pattern, "(unclosed");
54
+ });
55
+
56
+ it("returns empty when all patterns are valid", () => {
57
+ const r = validatePatterns(["a", "b", "c"]);
58
+ assert.equal(r.valid.length, 3);
59
+ assert.equal(r.invalid.length, 0);
60
+ });
61
+
62
+ it("returns empty when no patterns provided", () => {
63
+ const r = validatePatterns([]);
64
+ assert.equal(r.valid.length, 0);
65
+ assert.equal(r.invalid.length, 0);
66
+ });
67
+ });
68
+
69
+ describe("validatePatterns({ strict: true })", () => {
70
+ it("accepts ordinary patterns", () => {
71
+ const r = validatePatterns(["acme-corp", "\\d{12}"], { strict: true });
72
+ assert.equal(r.valid.length, 2);
73
+ assert.equal(r.invalid.length, 0);
74
+ });
75
+
76
+ it("rejects syntactically invalid regex without spawning", () => {
77
+ const r = validatePatterns(["(unclosed"], { strict: true });
78
+ assert.equal(r.invalid.length, 1);
79
+ assert.match(r.invalid[0]!.reason, /invalid regex/);
80
+ });
81
+
82
+ it("rejects patterns over the length cap without spawning", () => {
83
+ const r = validatePatterns(["a".repeat(3000)], { strict: true });
84
+ assert.equal(r.invalid.length, 1);
85
+ assert.match(r.invalid[0]!.reason, /exceeds/);
86
+ });
87
+
88
+ it("flags catastrophic-backtracking patterns via subprocess", () => {
89
+ // Classic ReDoS shape: nested unbounded quantifier with a literal
90
+ // that the all-'a' stress input cannot satisfy, forcing the regex
91
+ // engine to try every possible split.
92
+ const r = validatePatterns(["^(a+)+b$"], { strict: true });
93
+ assert.equal(r.invalid.length, 1, `expected the pattern to be rejected; got valid=${JSON.stringify(r.valid)}`);
94
+ assert.match(r.invalid[0]!.reason, /catastrophic|timed out|>/i);
95
+ });
96
+
97
+ it("returns empty for empty input without spawning", () => {
98
+ const r = validatePatterns([], { strict: true });
99
+ assert.equal(r.valid.length, 0);
100
+ assert.equal(r.invalid.length, 0);
101
+ });
102
+
103
+ it("preserves order across mixed valid/invalid input", () => {
104
+ const r = validatePatterns(
105
+ ["acme-corp", "(bad", "\\d+", ""],
106
+ { strict: true },
107
+ );
108
+ assert.deepEqual(r.valid, ["acme-corp", "\\d+"]);
109
+ assert.equal(r.invalid.length, 2);
110
+ });
111
+
112
+ it("reports trailing patterns when subprocess is killed mid-batch", () => {
113
+ // Strict-batch truncation contract: if the worker subprocess hangs on
114
+ // a catastrophic pattern and gets SIGTERMed, we must still report the
115
+ // patterns it never got to. The first pattern is fine and should be
116
+ // valid, the second is catastrophic-backtracking and should be in
117
+ // invalid (with a "timed out" or "catastrophic" reason), and the
118
+ // third — which the subprocess likely never reached — must also be
119
+ // reported (most likely as "produced no result" if truncation
120
+ // occurred mid-batch).
121
+ const r = validatePatterns(
122
+ ["acme-corp", "^(a+)+b$", "\\d{12}"],
123
+ { strict: true },
124
+ );
125
+
126
+ // The first pattern is well-formed; it should always come back valid.
127
+ assert.ok(
128
+ r.valid.includes("acme-corp"),
129
+ `expected first pattern to validate; got valid=${JSON.stringify(r.valid)}`,
130
+ );
131
+
132
+ // The catastrophic pattern must end up in the invalid bucket with a
133
+ // reason that names the failure mode.
134
+ const cata = r.invalid.find(x => x.pattern === "^(a+)+b$");
135
+ assert.ok(cata, `expected ^(a+)+b$ in invalid; got invalid=${JSON.stringify(r.invalid)}`);
136
+ assert.match(cata!.reason, /timed out|catastrophic|>/i);
137
+
138
+ // The third pattern is well-formed *but* may have been trampled by
139
+ // the kill-on-timeout. It must show up in *one* of the result sets:
140
+ // valid (if the worker got that far before timeout), or invalid with
141
+ // a "no result" / "timed out" reason (if truncation hit it).
142
+ const reported =
143
+ r.valid.includes("\\d{12}") ||
144
+ r.invalid.some(x => x.pattern === "\\d{12}");
145
+ assert.ok(
146
+ reported,
147
+ `third pattern must be reported in valid or invalid; ` +
148
+ `valid=${JSON.stringify(r.valid)} invalid=${JSON.stringify(r.invalid)}`,
149
+ );
150
+
151
+ // Total accounting: every input pattern must show up exactly once
152
+ // across the two buckets.
153
+ assert.equal(r.valid.length + r.invalid.length, 3);
154
+ });
155
+ });
156
+
157
+ describe("getRegexBackend", () => {
158
+ after(() => setRegexBackendForTesting(null));
159
+
160
+ it("returns 're2' or 'in-process' depending on optional dep availability", () => {
161
+ setRegexBackendForTesting(null);
162
+ const backend = getRegexBackend();
163
+ assert.ok(
164
+ backend === "re2" || backend === "in-process",
165
+ `unexpected backend: ${backend}`,
166
+ );
167
+ });
168
+
169
+ it("respects setRegexBackendForTesting override", () => {
170
+ setRegexBackendForTesting("in-process");
171
+ assert.equal(getRegexBackend(), "in-process");
172
+ setRegexBackendForTesting("re2");
173
+ assert.equal(getRegexBackend(), "re2");
174
+ setRegexBackendForTesting(null);
175
+ });
176
+
177
+ it("validatePattern accepts ordinary patterns under both backends", () => {
178
+ setRegexBackendForTesting("in-process");
179
+ assert.equal(validatePattern("acme-corp").ok, true);
180
+ setRegexBackendForTesting("re2");
181
+ assert.equal(validatePattern("acme-corp").ok, true);
182
+ setRegexBackendForTesting(null);
183
+ });
184
+
185
+ it("validatePattern falls back to time-budget when re2 rejects (e.g. lookahead)", () => {
186
+ // Lookahead is a re2-incompatible feature. Whether re2 is installed
187
+ // or not, validatePattern must still accept this pattern because
188
+ // the scanner uses native RegExp, which supports lookahead.
189
+ setRegexBackendForTesting("re2");
190
+ const r = validatePattern("foo(?=bar)");
191
+ assert.equal(r.ok, true, `expected lookahead pattern to validate; reason=${r.reason}`);
192
+ setRegexBackendForTesting(null);
193
+ });
194
+ });
@@ -0,0 +1,349 @@
1
+ // SPDX-License-Identifier: GPL-3.0-or-later
2
+ // Copyright (C) 2026 Richard Myers and contributors.
3
+ import { spawnSync } from "node:child_process";
4
+ import { createRequire } from "node:module";
5
+
6
+ export interface PatternValidationResult {
7
+ ok: boolean;
8
+ reason?: string;
9
+ }
10
+
11
+ export type RegexBackend = "re2" | "in-process" | "subprocess";
12
+
13
+ // Probe `re2` once at module load via `createRequire` so we keep
14
+ // validatePattern fully sync. `re2` is an optionalDependency: install
15
+ // failures (no native build toolchain) leave us on the in-process
16
+ // fallback, which still provides best-effort ReDoS detection.
17
+ const _require = createRequire(import.meta.url);
18
+ interface Re2Constructor {
19
+ new (pattern: string, flags?: string): { test(s: string): boolean };
20
+ }
21
+ let re2Ctor: Re2Constructor | null = null;
22
+ let re2Probed = false;
23
+ function probeRe2(): Re2Constructor | null {
24
+ if (re2Probed) return re2Ctor;
25
+ re2Probed = true;
26
+ try {
27
+ const mod = _require("re2") as Re2Constructor | { default: Re2Constructor };
28
+ re2Ctor =
29
+ typeof mod === "function"
30
+ ? mod
31
+ : "default" in mod
32
+ ? mod.default
33
+ : (mod as unknown as Re2Constructor);
34
+ } catch {
35
+ re2Ctor = null;
36
+ }
37
+ return re2Ctor;
38
+ }
39
+
40
+ let forcedBackend: RegexBackend | null = null;
41
+
42
+ /**
43
+ * Test-only override. Forces {@link getRegexBackend} and
44
+ * {@link validatePattern} to use a specific backend regardless of which
45
+ * backends are installed. Pass `null` to clear. Production callers must
46
+ * not use this.
47
+ *
48
+ * @internal
49
+ */
50
+ export function setRegexBackendForTesting(backend: RegexBackend | null): void {
51
+ forcedBackend = backend;
52
+ }
53
+
54
+ /**
55
+ * Report which regex backend repo-aegis is using for *additional*
56
+ * pattern-safety validation:
57
+ *
58
+ * - `"re2"`: the optional `re2` dependency is installed. Patterns that
59
+ * compile cleanly in re2 are provably safe from catastrophic
60
+ * backtracking (re2's hybrid NFA/DFA evaluator is linear-time by
61
+ * construction). For patterns that re2 can't parse (lookahead /
62
+ * look-behind / backreferences are unsupported in re2), validation
63
+ * falls back to the `"in-process"` time-budget heuristic.
64
+ * - `"in-process"`: re2 is unavailable; the in-process timer fires
65
+ * after the test completes (best-effort, may exceed budget).
66
+ * {@link validatePatterns} `{ strict: true }` upgrades to
67
+ * `"subprocess"` for the duration of that call.
68
+ * - `"subprocess"`: only returned by {@link getRegexBackend} when set
69
+ * via {@link setRegexBackendForTesting}; otherwise an internal
70
+ * detail of {@link validatePatterns}.
71
+ *
72
+ * Note: re2 affects *validation*, not the scanner's regex engine. The
73
+ * scanner still uses Node's native RegExp because the marker patterns
74
+ * may legitimately use lookahead constructs that re2 doesn't support.
75
+ */
76
+ export function getRegexBackend(): RegexBackend {
77
+ if (forcedBackend !== null) return forcedBackend;
78
+ return probeRe2() !== null ? "re2" : "in-process";
79
+ }
80
+
81
+ const MAX_PATTERN_LENGTH = 2048;
82
+ const MAX_COMBINED_BYTES = 128 * 1024;
83
+ const REDOS_STRESS_LENGTH = 1000;
84
+ const REDOS_TIMEOUT_MS = 100;
85
+ const STRICT_BATCH_TIMEOUT_MS = 5000;
86
+
87
+ /**
88
+ * Validate a single regex pattern for use as a marker.
89
+ *
90
+ * Checks:
91
+ * 1. Compiles as a JavaScript RegExp without throwing.
92
+ * 2. Length <= 2048 chars.
93
+ * 3. Backtracking-bound test against `'a'.repeat(1000)` completes within 100ms.
94
+ * Catastrophic-backtracking patterns (e.g., `(a+)+$`) hang here and are
95
+ * rejected as ReDoS-suspected.
96
+ *
97
+ * Run at `render` time; bad patterns must not reach the hot path of `check`.
98
+ *
99
+ * @internal Prefer {@link validatePatterns} (which can run strict,
100
+ * subprocess-backed validation that is preemptable on catastrophic
101
+ * backtracking). This single-pattern, in-process helper is exposed for
102
+ * intra-repo callers that already pre-validate adversary-trust boundaries
103
+ * but is not part of the supported public API.
104
+ */
105
+ export function validatePattern(pattern: string): PatternValidationResult {
106
+ if (typeof pattern !== "string" || pattern.length === 0) {
107
+ return { ok: false, reason: "empty pattern" };
108
+ }
109
+ if (pattern.length > MAX_PATTERN_LENGTH) {
110
+ return { ok: false, reason: `pattern exceeds ${MAX_PATTERN_LENGTH} characters` };
111
+ }
112
+ try {
113
+ new RegExp(pattern, "i");
114
+ } catch (err) {
115
+ return { ok: false, reason: `invalid regex: ${(err as Error).message}` };
116
+ }
117
+ // Backend-dependent ReDoS check.
118
+ const backend = getRegexBackend();
119
+ if (backend === "re2") {
120
+ const Re2 = probeRe2();
121
+ if (Re2 !== null) {
122
+ try {
123
+ new Re2(pattern, "i");
124
+ // re2 compile succeeded → linear-time evaluation guaranteed.
125
+ return { ok: true };
126
+ } catch {
127
+ // re2 rejected (typically: pattern uses lookahead/lookbehind/
128
+ // backreferences which re2 doesn't support). Fall through to
129
+ // the in-process timer — the pattern is still valid for the
130
+ // scanner's RegExp engine, just not provably safe under re2.
131
+ }
132
+ }
133
+ }
134
+ // Synchronous in-process timing check. Best-effort: see SECURITY
135
+ // WARNING on isInTimeBudget. Worker-based watchdog adds startup
136
+ // overhead disproportionate to per-pattern cost; for marker-list sizes
137
+ // we expect (tens to low hundreds of patterns) the in-process check
138
+ // is fine for trusted-by-policy operator input.
139
+ if (!isInTimeBudget(pattern, REDOS_STRESS_LENGTH, REDOS_TIMEOUT_MS)) {
140
+ return {
141
+ ok: false,
142
+ reason:
143
+ `pattern took >${REDOS_TIMEOUT_MS}ms on stress input ` +
144
+ `(possible catastrophic backtracking; consider re-anchoring)`,
145
+ };
146
+ }
147
+ return { ok: true };
148
+ }
149
+
150
+ export interface ValidatePatternsOptions {
151
+ /**
152
+ * If true, run the backtracking-bound test in a subprocess that can be
153
+ * preemptively killed on timeout. Catches catastrophic-backtracking
154
+ * patterns that the in-process timer can only detect after-the-fact.
155
+ * Adds ~50-200ms of process-spawn overhead for the whole batch.
156
+ * Default: false (use the in-process timer).
157
+ */
158
+ strict?: boolean;
159
+ }
160
+
161
+ /**
162
+ * Validate a list of patterns. Returns split valid/invalid.
163
+ *
164
+ * With `strict: true`, runs the backtracking-bound check in a subprocess
165
+ * that can be preemptively killed if any pattern hangs the regex engine.
166
+ * Recommended for `render` and other one-time-cost paths; not for the
167
+ * per-scan hot path.
168
+ */
169
+ export function validatePatterns(
170
+ patterns: string[],
171
+ opts: ValidatePatternsOptions = {},
172
+ ): { valid: string[]; invalid: { pattern: string; reason: string }[] } {
173
+ if (opts.strict) {
174
+ return validatePatternsStrict(patterns);
175
+ }
176
+ const valid: string[] = [];
177
+ const invalid: { pattern: string; reason: string }[] = [];
178
+ for (const p of patterns) {
179
+ const r = validatePattern(p);
180
+ if (r.ok) valid.push(p);
181
+ else invalid.push({ pattern: p, reason: r.reason ?? "unknown" });
182
+ }
183
+ return { valid, invalid };
184
+ }
185
+
186
+ /**
187
+ * Subprocess-backed strict validation. Spawns a child node process that
188
+ * runs each pattern's stress test sequentially, streaming a one-line
189
+ * JSON result per pattern. If the parent kills the child by timeout,
190
+ * the partial output identifies which pattern was in flight.
191
+ */
192
+ function validatePatternsStrict(
193
+ patterns: string[],
194
+ ): { valid: string[]; invalid: { pattern: string; reason: string }[] } {
195
+ if (patterns.length === 0) return { valid: [], invalid: [] };
196
+
197
+ // First pass: catch syntax + length errors in-process so we don't
198
+ // pay process-spawn cost for them.
199
+ const valid: string[] = [];
200
+ const invalid: { pattern: string; reason: string }[] = [];
201
+ const toCheckRedos: string[] = [];
202
+ for (const p of patterns) {
203
+ if (typeof p !== "string" || p.length === 0) {
204
+ invalid.push({ pattern: p, reason: "empty pattern" });
205
+ continue;
206
+ }
207
+ if (p.length > MAX_PATTERN_LENGTH) {
208
+ invalid.push({
209
+ pattern: p,
210
+ reason: `pattern exceeds ${MAX_PATTERN_LENGTH} characters`,
211
+ });
212
+ continue;
213
+ }
214
+ try {
215
+ new RegExp(p, "i");
216
+ } catch (err) {
217
+ invalid.push({ pattern: p, reason: `invalid regex: ${(err as Error).message}` });
218
+ continue;
219
+ }
220
+ toCheckRedos.push(p);
221
+ }
222
+
223
+ if (toCheckRedos.length === 0) {
224
+ return { valid, invalid };
225
+ }
226
+
227
+ const script = `
228
+ const fs = require('fs');
229
+ const input = JSON.parse(fs.readFileSync(0, 'utf8'));
230
+ const { patterns, stressLength, perPatternBudgetMs } = input;
231
+ const stress = 'a'.repeat(stressLength);
232
+ for (let i = 0; i < patterns.length; i++) {
233
+ const p = patterns[i];
234
+ const start = Date.now();
235
+ let outcome;
236
+ try {
237
+ new RegExp(p, 'i').test(stress);
238
+ const elapsed = Date.now() - start;
239
+ if (elapsed > perPatternBudgetMs * 10) {
240
+ outcome = { i, ok: false, reason:
241
+ 'pattern took >' + perPatternBudgetMs + 'ms on stress input ' +
242
+ '(possible catastrophic backtracking; consider re-anchoring)' };
243
+ } else {
244
+ outcome = { i, ok: true };
245
+ }
246
+ } catch (err) {
247
+ outcome = { i, ok: false, reason: 'invalid regex: ' + err.message };
248
+ }
249
+ process.stdout.write(JSON.stringify(outcome) + '\\n');
250
+ }
251
+ `;
252
+ const result = spawnSync(process.execPath, ["-e", script], {
253
+ input: JSON.stringify({
254
+ patterns: toCheckRedos,
255
+ stressLength: REDOS_STRESS_LENGTH,
256
+ perPatternBudgetMs: REDOS_TIMEOUT_MS,
257
+ }),
258
+ encoding: "utf8",
259
+ timeout: STRICT_BATCH_TIMEOUT_MS,
260
+ maxBuffer: 16 * 1024 * 1024,
261
+ });
262
+
263
+ // Parse partial stdout (one JSON object per line).
264
+ const seenResults = new Map<number, PatternValidationResult>();
265
+ for (const line of (result.stdout ?? "").split("\n")) {
266
+ if (!line.trim()) continue;
267
+ try {
268
+ const obj = JSON.parse(line) as { i: number; ok: boolean; reason?: string };
269
+ seenResults.set(obj.i, { ok: obj.ok, reason: obj.reason });
270
+ } catch {
271
+ /* skip malformed line */
272
+ }
273
+ }
274
+
275
+ const timedOut = result.signal === "SIGTERM" || result.signal === "SIGKILL";
276
+
277
+ for (let i = 0; i < toCheckRedos.length; i++) {
278
+ const p = toCheckRedos[i]!;
279
+ const r = seenResults.get(i);
280
+ if (r) {
281
+ if (r.ok) valid.push(p);
282
+ else invalid.push({ pattern: p, reason: r.reason ?? "unknown" });
283
+ } else {
284
+ // No result for this pattern: either the worker died on this
285
+ // pattern (likeliest culprit on timeout) or output was truncated.
286
+ const reason = timedOut
287
+ ? "strict validation timed out on this pattern (likely catastrophic backtracking)"
288
+ : "strict validation produced no result";
289
+ invalid.push({ pattern: p, reason });
290
+ }
291
+ }
292
+ return { valid, invalid };
293
+ }
294
+
295
+ /**
296
+ * Validate that a combined alternation regex is within the size cap.
297
+ * Used by render and the deny-set computation as a safety net.
298
+ */
299
+ export function validateCombinedSize(combined: string): PatternValidationResult {
300
+ if (Buffer.byteLength(combined, "utf8") > MAX_COMBINED_BYTES) {
301
+ return {
302
+ ok: false,
303
+ reason: `combined regex exceeds ${MAX_COMBINED_BYTES} bytes`,
304
+ };
305
+ }
306
+ return { ok: true };
307
+ }
308
+
309
+ /**
310
+ * SECURITY WARNING — adversary input.
311
+ *
312
+ * `isInTimeBudget` (and therefore the non-strict {@link validatePattern}
313
+ * default that calls it) is **not** a preemptive ReDoS guard. Node's
314
+ * regex engine has no timeout; this function runs the pattern in-process
315
+ * against a stress input and measures wall-clock elapsed time *after* it
316
+ * returns. A genuinely catastrophic pattern can hang the event loop for
317
+ * seconds-to-minutes before the timer reading even runs, during which
318
+ * nothing else in the process makes progress.
319
+ *
320
+ * As a consequence, the non-strict {@link validatePattern} **must not**
321
+ * be called on adversary-controlled input. Use it only for marker
322
+ * patterns the operator has authored (registry / `engagements.yaml`),
323
+ * which are trusted-by-policy.
324
+ *
325
+ * For any path that takes pattern strings from outside that trust
326
+ * boundary — third-party config, network input, future MCP tool input —
327
+ * use the strict mode of {@link validatePatterns} (`{ strict: true }`),
328
+ * which spawns a subprocess that the parent can preemptively kill on
329
+ * timeout via `SIGTERM`/`SIGKILL`.
330
+ */
331
+ function isInTimeBudget(pattern: string, stressLength: number, budgetMs: number): boolean {
332
+ // Best-effort time-bounded check. Node has no preemptive regex timeout, so
333
+ // we rely on the regex engine being well-behaved enough that 'a'-fuzzing
334
+ // against a pathological pattern still returns within seconds (not hours).
335
+ // For genuinely catastrophic patterns this may exceed the budget by a
336
+ // small multiple, which is still survivable. The check exists to flag
337
+ // patterns that show signs of being problematic; it does not guarantee
338
+ // safety against an adversary who controls pattern input.
339
+ const re = new RegExp(pattern, "i");
340
+ const stress = "a".repeat(stressLength);
341
+ const start = Date.now();
342
+ try {
343
+ re.test(stress);
344
+ } catch {
345
+ return false;
346
+ }
347
+ const elapsed = Date.now() - start;
348
+ return elapsed <= budgetMs * 10; // generous: 10x to account for noisy CI
349
+ }
@@ -0,0 +1,134 @@
1
+ // SPDX-License-Identifier: GPL-3.0-or-later
2
+ // Copyright (C) 2026 Richard Myers and contributors.
3
+ import { describe, it, before, after, beforeEach } from "node:test";
4
+ import assert from "node:assert/strict";
5
+ import {
6
+ mkdtempSync,
7
+ mkdirSync,
8
+ readFileSync,
9
+ writeFileSync,
10
+ rmSync,
11
+ } from "node:fs";
12
+ import { tmpdir } from "node:os";
13
+ import { join } from "node:path";
14
+ import { addMarkerPattern, addMarkerPatterns } from "./registry-mutate.js";
15
+ import {
16
+ EngagementNotFoundError,
17
+ PatternValidationError,
18
+ } from "./exceptions.js";
19
+
20
+ let tmp: string;
21
+ let home: string;
22
+ let registryPath: string;
23
+
24
+ const STUB = `\
25
+ schemaVersion: 2
26
+ always_block: []
27
+ engagements:
28
+ - id: foo-corp
29
+ name: Foo Corp
30
+ started: 2026-01-01
31
+ markers: [\\bfoo\\b]
32
+ - id: bar-co
33
+ name: Bar Co
34
+ started: 2026-01-15
35
+ markers: []
36
+ `;
37
+
38
+ before(() => {
39
+ tmp = mkdtempSync(join(tmpdir(), "repo-aegis-registry-mutate-"));
40
+ home = join(tmp, "home");
41
+ registryPath = join(home, "engagements.yaml");
42
+ });
43
+
44
+ after(() => {
45
+ rmSync(tmp, { recursive: true, force: true });
46
+ });
47
+
48
+ beforeEach(() => {
49
+ rmSync(home, { recursive: true, force: true });
50
+ mkdirSync(join(home, "markers"), { recursive: true });
51
+ mkdirSync(join(home, "state"), { recursive: true });
52
+ writeFileSync(registryPath, STUB);
53
+ process.env["REPO_AEGIS_HOME"] = home;
54
+ });
55
+
56
+ describe("addMarkerPattern — happy path", () => {
57
+ it("appends a single pattern and renders markers", () => {
58
+ const result = addMarkerPattern("bar-co", "\\bbar-co\\b", { registryPath });
59
+ assert.deepEqual(result.added, ["\\bbar-co\\b"]);
60
+ assert.deepEqual(result.skipped, []);
61
+ const reg = readFileSync(registryPath, "utf8");
62
+ assert.match(reg, /\\bbar-co\\b/);
63
+ });
64
+
65
+ it("idempotent — re-adding the same pattern is a no-op", () => {
66
+ addMarkerPattern("bar-co", "\\bbar-co\\b", { registryPath });
67
+ const r2 = addMarkerPattern("bar-co", "\\bbar-co\\b", { registryPath });
68
+ assert.deepEqual(r2.added, []);
69
+ assert.deepEqual(r2.skipped, ["\\bbar-co\\b"]);
70
+ });
71
+ });
72
+
73
+ describe("addMarkerPatterns — bulk add", () => {
74
+ it("appends multiple patterns with mixed new/duplicate handling", () => {
75
+ addMarkerPattern("bar-co", "\\bbar-co\\b", { registryPath });
76
+ const result = addMarkerPatterns(
77
+ "bar-co",
78
+ ["\\bbar-co\\b", "\\bbar\\.example\\b", "\\bBC-[0-9]+\\b"],
79
+ { registryPath },
80
+ );
81
+ assert.equal(result.added.length, 2);
82
+ assert.equal(result.skipped.length, 1);
83
+ assert.ok(result.added.includes("\\bbar\\.example\\b"));
84
+ assert.ok(result.added.includes("\\bBC-[0-9]+\\b"));
85
+ });
86
+ });
87
+
88
+ describe("addMarkerPatterns — error paths", () => {
89
+ it("throws EngagementNotFoundError for unknown id", () => {
90
+ assert.throws(
91
+ () => addMarkerPattern("nonexistent", "\\bfoo\\b", { registryPath }),
92
+ EngagementNotFoundError,
93
+ );
94
+ });
95
+
96
+ it("throws PatternValidationError on invalid regex", () => {
97
+ assert.throws(
98
+ () => addMarkerPattern("foo-corp", "(?<unclosed", { registryPath }),
99
+ PatternValidationError,
100
+ );
101
+ });
102
+
103
+ it("validates ALL patterns before mutating any", () => {
104
+ // First pattern good, second bad — registry must be unchanged.
105
+ const before = readFileSync(registryPath, "utf8");
106
+ assert.throws(
107
+ () =>
108
+ addMarkerPatterns("bar-co", ["\\bgood\\b", "(?<bad"], { registryPath }),
109
+ PatternValidationError,
110
+ );
111
+ const after = readFileSync(registryPath, "utf8");
112
+ assert.equal(before, after);
113
+ });
114
+ });
115
+
116
+ describe("addMarkerPatterns — [SEC M-3] lock scope", () => {
117
+ it("two parallel calls on different engagements both succeed without lost updates", async () => {
118
+ // Run two adds concurrently — using Promise.all as a parallel-ish
119
+ // proxy. The lock is sync (withLockSync), so they will serialise via
120
+ // proper-lockfile. Asserting both sets land is the correctness goal.
121
+ const a = Promise.resolve().then(() =>
122
+ addMarkerPatterns("foo-corp", ["\\bnew-foo\\b"], { registryPath }),
123
+ );
124
+ const b = Promise.resolve().then(() =>
125
+ addMarkerPatterns("bar-co", ["\\bnew-bar\\b"], { registryPath }),
126
+ );
127
+ const [ra, rb] = await Promise.all([a, b]);
128
+ assert.deepEqual(ra.added, ["\\bnew-foo\\b"]);
129
+ assert.deepEqual(rb.added, ["\\bnew-bar\\b"]);
130
+ const reg = readFileSync(registryPath, "utf8");
131
+ assert.match(reg, /\\bnew-foo\\b/);
132
+ assert.match(reg, /\\bnew-bar\\b/);
133
+ });
134
+ });