auditor-lambda 0.13.0 → 0.14.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
- import type { CoverageFileRecord, CoverageMatrix, FileCoverageRecord, Lens } from "./types.js";
1
+ import type { ClassificationStatus, CoverageFileRecord, CoverageMatrix, FileCoverageRecord, Lens } from "./types.js";
2
2
  export declare function createCoverageMatrix(paths: string[]): CoverageMatrix;
3
- export declare function markExcludedPath(matrix: CoverageMatrix, path: string, classificationStatus: string): void;
3
+ export declare function markExcludedPath(matrix: CoverageMatrix, path: string, classificationStatus: ClassificationStatus): void;
4
4
  export declare function applyUnitCoverage(matrix: CoverageMatrix, path: string, unitId: string, requiredLenses: Lens[]): void;
5
5
  export declare function applyFileCoverage(matrix: CoverageMatrix, fileCoverage: FileCoverageRecord[]): void;
6
6
  export declare function findUncoveredFiles(matrix: CoverageMatrix): CoverageFileRecord[];
@@ -1,9 +1,75 @@
1
+ import type { SpawnSyncOptionsWithStringEncoding, SpawnSyncReturns } from "node:child_process";
1
2
  import type { RepoManifest } from "../types.js";
2
3
  import type { FileDisposition, FileDispositionStatus } from "@audit-tools/shared";
4
+ /**
5
+ * Explicit out-of-scope reason for files excluded because the repository's own
6
+ * VCS ignore rules (.gitignore et al.) cover them.
7
+ */
8
+ export declare const VCS_IGNORED_REASON = "vcs_ignored";
9
+ /**
10
+ * At or below this many vcs-ignored files, the disposition carries one
11
+ * per-file record per ignored file. Above it, ignored files are aggregated by
12
+ * directory prefix so file_disposition.json stays bounded regardless of how
13
+ * many files the ignore rules cover.
14
+ */
15
+ export declare const VCS_IGNORED_PER_FILE_LIMIT = 200;
16
+ /**
17
+ * Guard threshold: when `git check-ignore` reports more than this share of all
18
+ * candidate files as ignored, the gitignore rule is skipped (guard branch
19
+ * `share_exceeded`) and only the existing targeted exclusions apply. A share
20
+ * of exactly 1.0 means the audit root itself is effectively ignored (guard
21
+ * branch `root_ignored`).
22
+ */
23
+ export declare const VCS_IGNORED_MAX_SHARE = 0.9;
24
+ export type VcsIgnoreGuardBranch = "root_ignored" | "share_exceeded";
25
+ /** Bounded directory-prefix aggregate used above VCS_IGNORED_PER_FILE_LIMIT. */
26
+ export interface VcsIgnoredAggregate {
27
+ /** Top-level directory prefix ("." for root-level files). */
28
+ prefix: string;
29
+ /** Number of vcs-ignored files under the prefix. */
30
+ count: number;
31
+ reason: typeof VCS_IGNORED_REASON;
32
+ }
33
+ /**
34
+ * Outcome record for the gitignore disposition rule, persisted alongside the
35
+ * per-file records so the scope pre-digest / intent checkpoint can surface
36
+ * skipped-rule and guard decisions.
37
+ */
38
+ export interface VcsIgnoreSummary {
39
+ /** True when gitignore-based exclusions were applied to the disposition. */
40
+ applied: boolean;
41
+ /** Number of candidate files `git check-ignore` reported as ignored. */
42
+ ignored_count: number;
43
+ /** Why the gitignore rule was skipped (clean fallback or guard). */
44
+ skipped_reason?: string;
45
+ /** Which guard branch fired when a guard skipped the rule. */
46
+ guard_branch?: VcsIgnoreGuardBranch;
47
+ /** Directory-prefix aggregates emitted above VCS_IGNORED_PER_FILE_LIMIT. */
48
+ aggregates?: VcsIgnoredAggregate[];
49
+ }
50
+ /** FileDisposition enriched with the gitignore-rule outcome record. */
51
+ export interface FileDispositionWithVcsIgnore extends FileDisposition {
52
+ vcs_ignore?: VcsIgnoreSummary;
53
+ }
54
+ /** Injection seam for the single batched `git check-ignore` spawn (tests). */
55
+ export type CheckIgnoreSpawn = (command: string, args: readonly string[], options: SpawnSyncOptionsWithStringEncoding) => SpawnSyncReturns<string>;
56
+ export interface BuildFileDispositionOptions {
57
+ /**
58
+ * Audit root. When provided (and a git work tree), enables the batched
59
+ * `git check-ignore --stdin` pass that classifies vcs-ignored files
60
+ * out of scope. Omit for the heuristics-only disposition.
61
+ */
62
+ root?: string;
63
+ /** Test seam: replacement for child_process.spawnSync. */
64
+ spawn?: CheckIgnoreSpawn;
65
+ }
3
66
  /**
4
67
  * Applies shared path heuristics to mark files that should be excluded or
5
- * down-scoped before audit planning begins.
68
+ * down-scoped before audit planning begins. When `options.root` is provided,
69
+ * additionally classifies vcs-ignored files out of scope via one batched
70
+ * `git check-ignore --stdin` pass, with clean fallback to the heuristics-only
71
+ * disposition whenever git is unavailable or a safety guard fires.
6
72
  */
7
- export declare function buildFileDisposition(repoManifest: RepoManifest): FileDisposition;
73
+ export declare function buildFileDisposition(repoManifest: RepoManifest, options?: BuildFileDispositionOptions): FileDispositionWithVcsIgnore;
8
74
  export declare function buildDispositionMap(disposition?: FileDisposition): Map<string, FileDispositionStatus>;
9
- export declare function isAuditExcludedStatus(status: FileDispositionStatus): boolean;
75
+ export declare function isAuditExcludedStatus(status: FileDispositionStatus): status is Exclude<FileDispositionStatus, "included">;
@@ -1,3 +1,4 @@
1
+ import { spawnSync } from "node:child_process";
1
2
  import { isNodeModulesOrGit, isPackageManagerCachePath, isTmpPath, isBuildOutput, isVendorPath, isBinaryArtifact, isLicensePath, isLockfilePath, isLogPath, isDocPath, isGeneratedPath, isAuditArtifactPath, isAuditToolOutputArtifact, isGeneratedTestArtifactPath, isGeneratedInstallArtifactPath, isExamplesOrFixturesPath, normalizeExtractorPath, } from "./pathPatterns.js";
2
3
  function inferDisposition(path) {
3
4
  const normalized = normalizeExtractorPath(path);
@@ -87,13 +88,174 @@ function inferDisposition(path) {
87
88
  reason: "Default included source or config artifact.",
88
89
  };
89
90
  }
91
+ /**
92
+ * Explicit out-of-scope reason for files excluded because the repository's own
93
+ * VCS ignore rules (.gitignore et al.) cover them.
94
+ */
95
+ export const VCS_IGNORED_REASON = "vcs_ignored";
96
+ /**
97
+ * At or below this many vcs-ignored files, the disposition carries one
98
+ * per-file record per ignored file. Above it, ignored files are aggregated by
99
+ * directory prefix so file_disposition.json stays bounded regardless of how
100
+ * many files the ignore rules cover.
101
+ */
102
+ export const VCS_IGNORED_PER_FILE_LIMIT = 200;
103
+ /**
104
+ * Guard threshold: when `git check-ignore` reports more than this share of all
105
+ * candidate files as ignored, the gitignore rule is skipped (guard branch
106
+ * `share_exceeded`) and only the existing targeted exclusions apply. A share
107
+ * of exactly 1.0 means the audit root itself is effectively ignored (guard
108
+ * branch `root_ignored`).
109
+ */
110
+ export const VCS_IGNORED_MAX_SHARE = 0.9;
111
+ function toPosixPath(path) {
112
+ return path.replace(/\\/g, "/");
113
+ }
114
+ /**
115
+ * Evaluates every candidate path through ONE batched
116
+ * `git check-ignore --stdin -z` invocation (never per-file). Exit-code
117
+ * contract: 0 = some paths ignored, 1 = none ignored (success, empty set),
118
+ * anything else (128 / git absent / not a work tree) = clean fallback —
119
+ * the caller keeps only the existing targeted exclusions. Never throws.
120
+ */
121
+ function evaluateVcsIgnored(root, candidatePosixPaths, spawn) {
122
+ if (candidatePosixPaths.length === 0) {
123
+ return { ok: true, ignored: new Set() };
124
+ }
125
+ let result;
126
+ try {
127
+ result = spawn("git", ["check-ignore", "--stdin", "-z"], {
128
+ cwd: root,
129
+ input: candidatePosixPaths.map((path) => `${path}\0`).join(""),
130
+ encoding: "utf8",
131
+ maxBuffer: 256 * 1024 * 1024,
132
+ windowsHide: true,
133
+ });
134
+ }
135
+ catch (error) {
136
+ const message = error instanceof Error ? error.message : String(error);
137
+ return { ok: false, reason: `git check-ignore spawn failed: ${message}` };
138
+ }
139
+ if (result.error) {
140
+ const code = result.error.code;
141
+ return {
142
+ ok: false,
143
+ reason: code === "ENOENT"
144
+ ? "git executable not found (ENOENT)"
145
+ : `git check-ignore failed to start: ${result.error.message}`,
146
+ };
147
+ }
148
+ if (result.status === 0 || result.status === 1) {
149
+ // 0 = some paths ignored (stdout has the set); 1 = no paths ignored.
150
+ const ignored = new Set((result.stdout ?? "").split("\0").filter((path) => path.length > 0));
151
+ return { ok: true, ignored };
152
+ }
153
+ const stderrFirstLine = (result.stderr ?? "").trim().split(/\r?\n/, 1)[0] ?? "";
154
+ return {
155
+ ok: false,
156
+ reason: `git check-ignore exited ${result.status ?? "by signal"}` +
157
+ (stderrFirstLine ? `: ${stderrFirstLine}` : ""),
158
+ };
159
+ }
160
+ function topLevelPrefix(posixPath) {
161
+ const slash = posixPath.indexOf("/");
162
+ return slash === -1 ? "." : posixPath.slice(0, slash);
163
+ }
164
+ function aggregateByPrefix(posixPaths) {
165
+ const counts = new Map();
166
+ for (const path of posixPaths) {
167
+ const prefix = topLevelPrefix(path);
168
+ counts.set(prefix, (counts.get(prefix) ?? 0) + 1);
169
+ }
170
+ return [...counts.entries()]
171
+ .sort(([a], [b]) => (a < b ? -1 : a > b ? 1 : 0))
172
+ .map(([prefix, count]) => ({ prefix, count, reason: VCS_IGNORED_REASON }));
173
+ }
90
174
  /**
91
175
  * Applies shared path heuristics to mark files that should be excluded or
92
- * down-scoped before audit planning begins.
176
+ * down-scoped before audit planning begins. When `options.root` is provided,
177
+ * additionally classifies vcs-ignored files out of scope via one batched
178
+ * `git check-ignore --stdin` pass, with clean fallback to the heuristics-only
179
+ * disposition whenever git is unavailable or a safety guard fires.
93
180
  */
94
- export function buildFileDisposition(repoManifest) {
181
+ export function buildFileDisposition(repoManifest, options = {}) {
182
+ const baseline = repoManifest.files.map((file) => inferDisposition(file.path));
183
+ if (!options.root) {
184
+ return { files: baseline };
185
+ }
186
+ const candidatePosix = repoManifest.files.map((file) => toPosixPath(file.path));
187
+ const evaluation = evaluateVcsIgnored(options.root, candidatePosix, options.spawn ?? spawnSync);
188
+ if (!evaluation.ok) {
189
+ // Clean fallback: keep the existing targeted exclusions only.
190
+ return {
191
+ files: baseline,
192
+ vcs_ignore: {
193
+ applied: false,
194
+ ignored_count: 0,
195
+ skipped_reason: `gitignore rule skipped: ${evaluation.reason}`,
196
+ },
197
+ };
198
+ }
199
+ const total = candidatePosix.length;
200
+ const ignoredCount = candidatePosix.filter((path) => evaluation.ignored.has(path)).length;
201
+ // Root-ignored guard: every candidate ignored means the audit root itself is
202
+ // (effectively) ignored — applying the rule would empty the audit scope.
203
+ if (total > 0 && ignoredCount === total) {
204
+ return {
205
+ files: baseline,
206
+ vcs_ignore: {
207
+ applied: false,
208
+ ignored_count: ignoredCount,
209
+ guard_branch: "root_ignored",
210
+ skipped_reason: "gitignore rule skipped: audit root itself is ignored (every candidate file matched ignore rules)",
211
+ },
212
+ };
213
+ }
214
+ // Share guard: an ignore rule that would exclude more than
215
+ // VCS_IGNORED_MAX_SHARE of candidates is more likely a mis-scope than a
216
+ // legitimate exclusion — skip it and surface the decision.
217
+ if (total > 0 && ignoredCount / total > VCS_IGNORED_MAX_SHARE) {
218
+ return {
219
+ files: baseline,
220
+ vcs_ignore: {
221
+ applied: false,
222
+ ignored_count: ignoredCount,
223
+ guard_branch: "share_exceeded",
224
+ skipped_reason: `gitignore rule skipped: ignored share ${(ignoredCount / total).toFixed(3)} ` +
225
+ `exceeds VCS_IGNORED_MAX_SHARE (${VCS_IGNORED_MAX_SHARE})`,
226
+ },
227
+ };
228
+ }
229
+ // Existing targeted exclusions take precedence; the gitignore rule only
230
+ // re-classifies files the heuristics would otherwise include.
231
+ const newlyIgnoredPosix = [];
232
+ for (let i = 0; i < baseline.length; i++) {
233
+ if (baseline[i].status === "included" &&
234
+ evaluation.ignored.has(candidatePosix[i])) {
235
+ newlyIgnoredPosix.push(candidatePosix[i]);
236
+ }
237
+ }
238
+ if (newlyIgnoredPosix.length <= VCS_IGNORED_PER_FILE_LIMIT) {
239
+ const newlyIgnoredSet = new Set(newlyIgnoredPosix);
240
+ const files = baseline.map((item, i) => newlyIgnoredSet.has(candidatePosix[i]) && item.status === "included"
241
+ ? { path: item.path, status: "excluded", reason: VCS_IGNORED_REASON }
242
+ : item);
243
+ return {
244
+ files,
245
+ vcs_ignore: { applied: true, ignored_count: ignoredCount },
246
+ };
247
+ }
248
+ // Bounded representation: above the per-file limit, drop per-file records
249
+ // for vcs-ignored files and emit directory-prefix aggregates instead.
250
+ const newlyIgnoredSet = new Set(newlyIgnoredPosix);
251
+ const files = baseline.filter((item, i) => !(item.status === "included" && newlyIgnoredSet.has(candidatePosix[i])));
95
252
  return {
96
- files: repoManifest.files.map((file) => inferDisposition(file.path)),
253
+ files,
254
+ vcs_ignore: {
255
+ applied: true,
256
+ ignored_count: ignoredCount,
257
+ aggregates: aggregateByPrefix(newlyIgnoredPosix),
258
+ },
97
259
  };
98
260
  }
99
261
  export function buildDispositionMap(disposition) {
@@ -83,7 +83,7 @@ export async function runIntakeExecutor(bundle, root, artifactsDir) {
83
83
  ignore,
84
84
  hash_files: true,
85
85
  });
86
- const disposition = buildFileDisposition(repoManifest);
86
+ const disposition = buildFileDisposition(repoManifest, { root });
87
87
  const auditableCount = disposition.files.filter((file) => !isAuditExcludedStatus(file.status)).length;
88
88
  if (auditableCount === 0) {
89
89
  throw new Error(`No auditable files found in ${root}. The repository may be empty, generated-only, documentation-only, or filtered by .auditorignore.`);
@@ -10,7 +10,8 @@ export async function runStructureExecutor(bundle, root) {
10
10
  throw new Error("Cannot run structure executor without repo_manifest");
11
11
  }
12
12
  const externalAnalyzerResults = bundle.external_analyzer_results;
13
- const disposition = bundle.file_disposition ?? buildFileDisposition(bundle.repo_manifest);
13
+ const disposition = bundle.file_disposition ??
14
+ buildFileDisposition(bundle.repo_manifest, root ? { root } : {});
14
15
  const unitManifest = buildUnitManifest(bundle.repo_manifest, disposition);
15
16
  const graphBundle = root
16
17
  ? await buildGraphBundleFromFs(bundle.repo_manifest, root, disposition, {
@@ -1,6 +1,53 @@
1
1
  import type { Finding } from "../types.js";
2
2
  /**
3
- * Re-key finalized findings with globally-unique, content-derived ids at the
3
+ * The stable semantic fields a finding's identity may be derived from.
4
+ *
5
+ * Volatile, content-derived values — unit ids, line numbers, pass
6
+ * ordinals/pass_id, timestamps — are deliberately absent from this shape so
7
+ * they can never reach the hash input at any ladder tier. The raw title is
8
+ * accepted but only ever influences identity after aggressive normalization
9
+ * (tier 3), and only when no stronger tier applies.
10
+ */
11
+ export interface FindingIdentityFields {
12
+ /** Repo-relative primary file path of the structural anchor, if any. */
13
+ anchor_path?: string;
14
+ /** Symbol/scope identifier at the anchor (the anchor's unit/scope). */
15
+ anchor_symbol?: string;
16
+ /** Rule/category identifier. */
17
+ category?: string;
18
+ /** Lens — paired with category at tier 2 (the existing category convention). */
19
+ lens?: string;
20
+ /** Title; aggressively normalized before it can influence identity. */
21
+ title?: string;
22
+ }
23
+ /**
24
+ * The single, explicit, deterministic fallback ladder for finding identity.
25
+ * The same semantic finding always yields the same signature across passes
26
+ * and runs; the ladder consults stable semantic fields only:
27
+ *
28
+ * 1. **Structural anchor** — the repo-relative primary file path
29
+ * (separator-normalized, case-folded) together with the anchor's
30
+ * symbol/scope. The unit/scope is part of the hashed identity, so two
31
+ * findings at the same path but different scopes get distinct ids.
32
+ * 2. **Rule/category** — when no structural anchor is available, the
33
+ * rule/category identifier paired with the lens (the existing category
34
+ * convention).
35
+ * 3. **Normalized title** — when neither anchor nor rule/category exists, an
36
+ * aggressively normalized title (see {@link normalizeTitle}).
37
+ *
38
+ * Content-derived unit ids, line numbers, pass ordinals/pass_id, timestamps,
39
+ * and raw (unnormalized) titles are never part of the signature: they do not
40
+ * appear in {@link FindingIdentityFields}, so no tier can hash them.
41
+ *
42
+ * The signature is also independent of a finding's merged affected-file
43
+ * *list*: at most the single structural anchor (primary path + scope) can
44
+ * contribute, never the full file set, so a finding's identity stays put as
45
+ * mergeFindings() unions additional re-emitted files into it across passes
46
+ * and runs.
47
+ */
48
+ export declare function findingIdentitySignature(fields: FindingIdentityFields): string;
49
+ /**
50
+ * Re-key finalized findings with globally-unique, content-addressed ids at the
4
51
  * synthesis boundary.
5
52
  *
6
53
  * Worker packets assign locally-scoped ids (e.g. `MNT-001`) that collide across
@@ -9,10 +56,18 @@ import type { Finding } from "../types.js";
9
56
  * findings into one block), and `work_blocks.finding_ids` / theme `finding_ids` /
10
57
  * the remediator's per-finding addressing can no longer resolve a single finding.
11
58
  *
12
- * The id is `<LENS_PREFIX>-<sha256(content)[:8]>`, deterministic and stable so a
13
- * re-synthesis of the same findings produces the same ids. A vanishingly rare
14
- * hash collision between two *distinct* findings is broken deterministically with
15
- * a numeric suffix (findings arrive in mergeFindings()' stable order).
59
+ * The id is `<LENS_PREFIX>-<sha256(signature)[:8]>`, where the signature comes
60
+ * from the deterministic fallback ladder in {@link findingIdentitySignature}
61
+ * stable semantic fields only, so the same semantic finding keeps the same id
62
+ * across passes and re-syntheses even when volatile fields (line numbers, pass
63
+ * ordinals, unit ids, timestamps, title phrasing) drift. By the time findings
64
+ * reach this function, mergeFindings() has already collapsed every re-emission
65
+ * of one file-independent identity (exact normalized lens|category|title) into
66
+ * a single multi-file finding, and the hash never covers the merged file list,
67
+ * so the id also stays stable as a finding's merged file set grows. Distinct
68
+ * findings that share a signature (e.g. two issues anchored at the same
69
+ * path + scope) are disambiguated deterministically with a numeric suffix
70
+ * (findings arrive in mergeFindings()' stable order).
16
71
  *
17
72
  * `related_findings`, when present, referenced the old colliding ids and cannot
18
73
  * be remapped unambiguously, so it is dropped rather than left dangling. (It is
@@ -15,26 +15,86 @@ const LENS_ID_PREFIX = {
15
15
  config_deployment: "CFG",
16
16
  observability: "OBS",
17
17
  };
18
+ /** Separator-normalized (always `/`), case-folded, repo-relative path. */
19
+ function normalizeAnchorPath(path) {
20
+ return (path ?? "")
21
+ .trim()
22
+ .replace(/\\/g, "/")
23
+ .replace(/^\.\//, "")
24
+ .toLowerCase();
25
+ }
26
+ /**
27
+ * Aggressively normalize a title so volatile content cannot influence
28
+ * identity: case-folded; embedded file paths (with optional `:line[:col]`
29
+ * suffixes) stripped; counts, line numbers, and all other numerals stripped;
30
+ * punctuation collapsed; whitespace collapsed to single spaces.
31
+ */
32
+ function normalizeTitle(title) {
33
+ return (title ?? "")
34
+ .toLowerCase()
35
+ // File paths, optionally suffixed with :line or :line:col.
36
+ .replace(/[\w.~-]*[\\/][\w.\\/~-]*(:\d+(:\d+)?)?/g, " ")
37
+ // Counts, line numbers, and any other numerals.
38
+ .replace(/\d+/g, " ")
39
+ // Collapse punctuation, then whitespace.
40
+ .replace(/[^a-z\s]/g, " ")
41
+ .replace(/\s+/g, " ")
42
+ .trim();
43
+ }
18
44
  /**
19
- * A stable signature of a finding's identity-bearing content. The same logical
20
- * finding yields the same signature across runs (so its id is reproducible),
21
- * while two distinct findings which only coexist after surviving merge and
22
- * dedup with different content — yield different signatures.
45
+ * The single, explicit, deterministic fallback ladder for finding identity.
46
+ * The same semantic finding always yields the same signature across passes
47
+ * and runs; the ladder consults stable semantic fields only:
48
+ *
49
+ * 1. **Structural anchor** — the repo-relative primary file path
50
+ * (separator-normalized, case-folded) together with the anchor's
51
+ * symbol/scope. The unit/scope is part of the hashed identity, so two
52
+ * findings at the same path but different scopes get distinct ids.
53
+ * 2. **Rule/category** — when no structural anchor is available, the
54
+ * rule/category identifier paired with the lens (the existing category
55
+ * convention).
56
+ * 3. **Normalized title** — when neither anchor nor rule/category exists, an
57
+ * aggressively normalized title (see {@link normalizeTitle}).
58
+ *
59
+ * Content-derived unit ids, line numbers, pass ordinals/pass_id, timestamps,
60
+ * and raw (unnormalized) titles are never part of the signature: they do not
61
+ * appear in {@link FindingIdentityFields}, so no tier can hash them.
62
+ *
63
+ * The signature is also independent of a finding's merged affected-file
64
+ * *list*: at most the single structural anchor (primary path + scope) can
65
+ * contribute, never the full file set, so a finding's identity stays put as
66
+ * mergeFindings() unions additional re-emitted files into it across passes
67
+ * and runs.
23
68
  */
24
- function contentSignature(finding) {
25
- const files = finding.affected_files
26
- .map((file) => `${file.path}:${file.line_start ?? ""}:${file.line_end ?? ""}:${file.symbol ?? ""}`)
27
- .sort()
28
- .join(",");
29
- return [
30
- finding.lens.trim().toLowerCase(),
31
- finding.category.trim().toLowerCase(),
32
- finding.title.trim().toLowerCase(),
33
- files,
34
- ].join("|");
69
+ export function findingIdentitySignature(fields) {
70
+ // Tier 1: structural anchor (path + symbol/scope).
71
+ const anchorPath = normalizeAnchorPath(fields.anchor_path);
72
+ if (anchorPath !== "") {
73
+ const scope = (fields.anchor_symbol ?? "").trim();
74
+ return `anchor|${anchorPath}|${scope}`;
75
+ }
76
+ // Tier 2: rule/category (+ lens, the existing category convention).
77
+ const category = (fields.category ?? "").trim().toLowerCase();
78
+ if (category !== "") {
79
+ const lens = (fields.lens ?? "").trim().toLowerCase();
80
+ return `rule|${lens}|${category}`;
81
+ }
82
+ // Tier 3: aggressively normalized title.
83
+ return `title|${normalizeTitle(fields.title)}`;
84
+ }
85
+ /** Extract only the stable identity-bearing fields from a finding. */
86
+ function identityFields(finding) {
87
+ const anchor = finding.affected_files[0];
88
+ return {
89
+ anchor_path: anchor?.path,
90
+ anchor_symbol: anchor?.symbol,
91
+ category: finding.category,
92
+ lens: finding.lens,
93
+ title: finding.title,
94
+ };
35
95
  }
36
96
  /**
37
- * Re-key finalized findings with globally-unique, content-derived ids at the
97
+ * Re-key finalized findings with globally-unique, content-addressed ids at the
38
98
  * synthesis boundary.
39
99
  *
40
100
  * Worker packets assign locally-scoped ids (e.g. `MNT-001`) that collide across
@@ -43,10 +103,18 @@ function contentSignature(finding) {
43
103
  * findings into one block), and `work_blocks.finding_ids` / theme `finding_ids` /
44
104
  * the remediator's per-finding addressing can no longer resolve a single finding.
45
105
  *
46
- * The id is `<LENS_PREFIX>-<sha256(content)[:8]>`, deterministic and stable so a
47
- * re-synthesis of the same findings produces the same ids. A vanishingly rare
48
- * hash collision between two *distinct* findings is broken deterministically with
49
- * a numeric suffix (findings arrive in mergeFindings()' stable order).
106
+ * The id is `<LENS_PREFIX>-<sha256(signature)[:8]>`, where the signature comes
107
+ * from the deterministic fallback ladder in {@link findingIdentitySignature}
108
+ * stable semantic fields only, so the same semantic finding keeps the same id
109
+ * across passes and re-syntheses even when volatile fields (line numbers, pass
110
+ * ordinals, unit ids, timestamps, title phrasing) drift. By the time findings
111
+ * reach this function, mergeFindings() has already collapsed every re-emission
112
+ * of one file-independent identity (exact normalized lens|category|title) into
113
+ * a single multi-file finding, and the hash never covers the merged file list,
114
+ * so the id also stays stable as a finding's merged file set grows. Distinct
115
+ * findings that share a signature (e.g. two issues anchored at the same
116
+ * path + scope) are disambiguated deterministically with a numeric suffix
117
+ * (findings arrive in mergeFindings()' stable order).
50
118
  *
51
119
  * `related_findings`, when present, referenced the old colliding ids and cannot
52
120
  * be remapped unambiguously, so it is dropped rather than left dangling. (It is
@@ -57,7 +125,7 @@ export function assignStableFindingIds(findings) {
57
125
  return findings.map((finding) => {
58
126
  const prefix = LENS_ID_PREFIX[finding.lens.trim().toLowerCase()] ?? "FND";
59
127
  const hash = createHash("sha256")
60
- .update(contentSignature(finding))
128
+ .update(findingIdentitySignature(identityFields(finding)))
61
129
  .digest("hex")
62
130
  .slice(0, 8);
63
131
  let id = `${prefix}-${hash}`;
@@ -34,14 +34,22 @@ function filePathOverlap(a, b) {
34
34
  function primaryPath(finding) {
35
35
  return finding.affected_files[0]?.path ?? "";
36
36
  }
37
+ /**
38
+ * File-independent finding identity. Re-emissions of the same logical finding
39
+ * (exact normalized lens + category + title) across files, units, and passes
40
+ * share one key, so the exact-key merge collapses them into a single finding
41
+ * whose affected_files / evidence are the union of every re-emission.
42
+ *
43
+ * Cross-file merging happens ONLY on this exact equality — the fuzzy
44
+ * (Jaccard-title) dedup passes below stay grouped by primary path, which is
45
+ * what guarantees that distinct problems in different units never collapse
46
+ * on mere similarity.
47
+ */
37
48
  function findingKey(finding) {
38
49
  return [
39
50
  normalizeText(finding.lens),
40
51
  normalizeText(finding.category),
41
52
  normalizeText(finding.title),
42
- primaryPath(finding),
43
- String(finding.affected_files[0]?.line_start ?? ""),
44
- String(finding.affected_files[0]?.line_end ?? ""),
45
53
  ].join("|");
46
54
  }
47
55
  function runtimeSummary(report) {
@@ -213,6 +221,44 @@ function relevantExternalEvidence(finding, results) {
213
221
  .filter((item) => findingPaths.has(item.path))
214
222
  .map((item) => `external:${results.tool}:${item.path}:${item.summary}`);
215
223
  }
224
+ /**
225
+ * Insert a finding into the identity-keyed map, or absorb it into the existing
226
+ * finding with the same identity: affected_files and evidence are unioned,
227
+ * severity / confidence escalate to the maximum rank seen, `systemic` ORs,
228
+ * impact / likelihood backfill, and the longest summary wins.
229
+ */
230
+ function upsertFinding(merged, finding) {
231
+ const key = findingKey(finding);
232
+ const existing = merged.get(key);
233
+ if (!existing) {
234
+ merged.set(key, {
235
+ ...finding,
236
+ affected_files: [...finding.affected_files],
237
+ evidence: [...(finding.evidence ?? [])],
238
+ });
239
+ return;
240
+ }
241
+ if (severityRank(finding.severity) > severityRank(existing.severity)) {
242
+ existing.severity = finding.severity;
243
+ }
244
+ if (confidenceRank(finding.confidence) > confidenceRank(existing.confidence)) {
245
+ existing.confidence = finding.confidence;
246
+ }
247
+ existing.systemic = Boolean(existing.systemic || finding.systemic);
248
+ existing.impact = existing.impact ?? finding.impact;
249
+ existing.likelihood = existing.likelihood ?? finding.likelihood;
250
+ existing.summary =
251
+ existing.summary.length >= finding.summary.length
252
+ ? existing.summary
253
+ : finding.summary;
254
+ mergeAffectedFiles(existing, finding);
255
+ existing.evidence = [
256
+ ...new Set([
257
+ ...(existing.evidence ?? []),
258
+ ...(finding.evidence ?? []),
259
+ ]),
260
+ ];
261
+ }
216
262
  export function mergeFindings(results, runtimeReport, externalAnalyzerResults, designAssessment) {
217
263
  const merged = new Map();
218
264
  const allDesignFindings = [
@@ -220,45 +266,11 @@ export function mergeFindings(results, runtimeReport, externalAnalyzerResults, d
220
266
  ...(designAssessment?.review_findings ?? []),
221
267
  ];
222
268
  for (const finding of allDesignFindings) {
223
- const key = findingKey(finding);
224
- merged.set(key, {
225
- ...finding,
226
- affected_files: [...finding.affected_files],
227
- evidence: [...(finding.evidence ?? [])],
228
- });
269
+ upsertFinding(merged, finding);
229
270
  }
230
271
  for (const result of results) {
231
272
  for (const finding of result.findings) {
232
- const key = findingKey(finding);
233
- const existing = merged.get(key);
234
- if (!existing) {
235
- merged.set(key, {
236
- ...finding,
237
- affected_files: [...finding.affected_files],
238
- evidence: [...(finding.evidence ?? [])],
239
- });
240
- continue;
241
- }
242
- if (severityRank(finding.severity) > severityRank(existing.severity)) {
243
- existing.severity = finding.severity;
244
- }
245
- if (confidenceRank(finding.confidence) > confidenceRank(existing.confidence)) {
246
- existing.confidence = finding.confidence;
247
- }
248
- existing.systemic = Boolean(existing.systemic || finding.systemic);
249
- existing.impact = existing.impact ?? finding.impact;
250
- existing.likelihood = existing.likelihood ?? finding.likelihood;
251
- existing.summary =
252
- existing.summary.length >= finding.summary.length
253
- ? existing.summary
254
- : finding.summary;
255
- mergeAffectedFiles(existing, finding);
256
- existing.evidence = [
257
- ...new Set([
258
- ...(existing.evidence ?? []),
259
- ...(finding.evidence ?? []),
260
- ]),
261
- ];
273
+ upsertFinding(merged, finding);
262
274
  }
263
275
  }
264
276
  for (const finding of merged.values()) {
@@ -55,10 +55,15 @@ function formatCountList(summary) {
55
55
  return parts.length > 0 ? parts.join(", ") : "none";
56
56
  }
57
57
  export function buildAuditReportModel(params) {
58
- // Re-key the finalized findings with globally-unique, content-derived ids
59
- // before anything addresses them by id. buildWorkBlocks keys its union-find on
60
- // finding.id, so the locally-scoped, collision-prone ids worker packets emit
61
- // must be replaced here or unrelated findings fuse into one block.
58
+ // Re-key the finalized findings with globally-unique, content-addressed ids
59
+ // before anything addresses them by id. mergeFindings emits exactly one
60
+ // finding per file-independent identity (exact normalized lens|category|
61
+ // title) across files, units, and passes, and assignStableFindingIds hashes
62
+ // only stable identity signals — never line numbers, pass ids, or the merged
63
+ // file list — so the same logical finding keeps one id across passes and
64
+ // re-syntheses. buildWorkBlocks keys its union-find on finding.id, so the
65
+ // locally-scoped, collision-prone ids worker packets emit must be replaced
66
+ // here or unrelated findings fuse into one block.
62
67
  const findings = assignStableFindingIds(mergeFindings(params.results, params.runtimeValidationReport, params.externalAnalyzerResults, params.designAssessment));
63
68
  const workBlocks = buildWorkBlocks({
64
69
  findings,
package/dist/types.d.ts CHANGED
@@ -57,10 +57,20 @@ export interface FileCoverageRecord {
57
57
  lens?: Lens;
58
58
  agent_role?: string;
59
59
  }
60
+ /** Single source of truth for coverage-matrix classification statuses (mirrors
61
+ * the LENS_REGISTRY-derives-Lens pattern above). The value set is
62
+ * {unclassified, classified} plus the audit-excluded subset of
63
+ * FileDispositionStatus (excluded | generated | vendor | binary | doc_only)
64
+ * plus the scope/trivial-audit statuses written by scope.ts
65
+ * (out_of_scope_delta, out_of_scope_intent) and trivialAudit.ts
66
+ * (excluded_trivial). schemas/coverage_matrix.schema.json must list the same
67
+ * enum — tests/classification-status-drift.test.mjs enforces set equality. */
68
+ export declare const CLASSIFICATION_STATUSES: readonly ["unclassified", "classified", "excluded", "generated", "vendor", "binary", "doc_only", "out_of_scope_delta", "excluded_trivial", "out_of_scope_intent"];
69
+ export type ClassificationStatus = (typeof CLASSIFICATION_STATUSES)[number];
60
70
  export interface CoverageFileRecord {
61
71
  path: string;
62
72
  unit_ids: string[];
63
- classification_status: string;
73
+ classification_status: ClassificationStatus;
64
74
  audit_status: string;
65
75
  required_lenses: Lens[];
66
76
  completed_lenses: Lens[];
package/dist/types.js CHANGED
@@ -26,3 +26,23 @@ export const ENABLED_LENSES = LENS_REGISTRY
26
26
  export function isLens(value) {
27
27
  return (typeof value === "string" && ALL_LENSES.includes(value));
28
28
  }
29
+ /** Single source of truth for coverage-matrix classification statuses (mirrors
30
+ * the LENS_REGISTRY-derives-Lens pattern above). The value set is
31
+ * {unclassified, classified} plus the audit-excluded subset of
32
+ * FileDispositionStatus (excluded | generated | vendor | binary | doc_only)
33
+ * plus the scope/trivial-audit statuses written by scope.ts
34
+ * (out_of_scope_delta, out_of_scope_intent) and trivialAudit.ts
35
+ * (excluded_trivial). schemas/coverage_matrix.schema.json must list the same
36
+ * enum — tests/classification-status-drift.test.mjs enforces set equality. */
37
+ export const CLASSIFICATION_STATUSES = [
38
+ "unclassified",
39
+ "classified",
40
+ "excluded",
41
+ "generated",
42
+ "vendor",
43
+ "binary",
44
+ "doc_only",
45
+ "out_of_scope_delta",
46
+ "excluded_trivial",
47
+ "out_of_scope_intent",
48
+ ];
@@ -107,8 +107,21 @@ export function validateArtifactBundle(bundle) {
107
107
  }
108
108
  if (bundle.repo_manifest && bundle.file_disposition) {
109
109
  const dispositionPaths = new Set(fileDispositionEntries.map((file) => file.path));
110
+ // Above VCS_IGNORED_PER_FILE_LIMIT the disposition drops per-file entries
111
+ // for vcs-ignored files and records directory-prefix aggregates instead;
112
+ // a path covered by an aggregate prefix is accounted for, not missing.
113
+ const aggregatePrefixes = new Set(asArray(bundle.file_disposition
114
+ ?.vcs_ignore?.aggregates).map((aggregate) => aggregate.prefix));
115
+ const coveredByAggregate = (path) => {
116
+ if (aggregatePrefixes.size === 0)
117
+ return false;
118
+ const posix = path.replace(/\\/g, "/");
119
+ const slash = posix.indexOf("/");
120
+ const prefix = slash === -1 ? "." : posix.slice(0, slash);
121
+ return aggregatePrefixes.has(prefix);
122
+ };
110
123
  for (const path of repoPaths) {
111
- if (!dispositionPaths.has(path)) {
124
+ if (!dispositionPaths.has(path) && !coveredByAggregate(path)) {
112
125
  pushIssue(issues, "file_disposition", `Missing disposition entry for ${path}`);
113
126
  }
114
127
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "auditor-lambda",
3
- "version": "0.13.0",
3
+ "version": "0.14.0",
4
4
  "private": false,
5
5
  "description": "Portable hybrid code-auditing framework for arbitrary repositories.",
6
6
  "type": "module",
@@ -18,7 +18,7 @@
18
18
  },
19
19
  "classification_status": {
20
20
  "type": "string",
21
- "enum": ["unclassified", "classified", "excluded", "generated", "vendor", "binary", "doc_only"]
21
+ "enum": ["unclassified", "classified", "excluded", "generated", "vendor", "binary", "doc_only", "out_of_scope_delta", "excluded_trivial", "out_of_scope_intent"]
22
22
  },
23
23
  "audit_status": {
24
24
  "type": "string",
@@ -85,52 +85,35 @@ const OPENCODE_AUDIT_BASH_PERMISSION = {
85
85
  'rm *': 'deny',
86
86
  };
87
87
 
88
- function replaceBackslashes(value) {
89
- return value.replace(/\\/g, '/');
90
- }
91
-
92
- function renderOpenCodeExternalDirectoryPermission() {
93
- return { '*': 'allow' };
94
- }
95
-
96
88
  function objectValue(value) {
97
89
  return value && typeof value === 'object' && !Array.isArray(value)
98
90
  ? value
99
91
  : {};
100
92
  }
101
93
 
102
- function mergeOpenCodePermissionRule(existingRule, generatedRule, managedRules = {}) {
103
- if (generatedRule && typeof generatedRule === 'object' && !Array.isArray(generatedRule)) {
104
- const generatedObject = generatedRule;
105
- const merged = {};
106
- const existingObject =
107
- existingRule && typeof existingRule === 'object' && !Array.isArray(existingRule)
108
- ? existingRule
109
- : {};
110
-
111
- if (typeof existingRule === 'string') {
112
- merged['*'] = existingRule;
113
- } else {
114
- merged['*'] = existingObject['*'] ?? generatedObject['*'] ?? 'ask';
115
- }
116
-
117
- for (const [key, value] of Object.entries(generatedObject)) {
118
- if (key !== '*') merged[key] = value;
119
- }
120
- for (const [key, value] of Object.entries(existingObject)) {
121
- if (key !== '*') merged[key] = value;
122
- }
123
- for (const [key, value] of Object.entries(managedRules)) {
124
- merged[key] = value;
125
- }
126
-
127
- return merged;
94
+ // The scoped OpenCode permission merge helpers are single-sourced in
95
+ // @audit-tools/shared (global top-level scope vs. auditor agent scope).
96
+ // Resolve them best-effort: on a fresh workspace checkout the shared dist may
97
+ // not be built yet, in which case the OpenCode config deployment below is
98
+ // skipped with a warning instead of failing the whole install.
99
+ let sharedOpenCodePermissions = null;
100
+ try {
101
+ const shared = await import('@audit-tools/shared');
102
+ if (
103
+ typeof shared.mergeOpenCodeAgentPermissionRule === 'function' &&
104
+ typeof shared.mergeOpenCodeGlobalPermissionRule === 'function' &&
105
+ typeof shared.migrateOpenCodeGlobalExternalDirectory === 'function'
106
+ ) {
107
+ sharedOpenCodePermissions = shared;
128
108
  }
129
-
130
- return existingRule ?? generatedRule;
109
+ } catch {
110
+ // Leave null; the OpenCode deployment step reports the skip.
131
111
  }
132
112
 
133
- function mergeOpenCodePermissionConfig(existingPermission, generatedPermission) {
113
+ // Auditor agent scope: broad-allow-with-denylist, unchanged. Managed rules
114
+ // (including the wildcard) always win at this scope.
115
+ function mergeOpenCodeAgentPermissionConfig(existingPermission, generatedPermission) {
116
+ const { mergeOpenCodeAgentPermissionRule } = sharedOpenCodePermissions;
134
117
  if (!existingPermission || typeof existingPermission !== 'object' || Array.isArray(existingPermission)) {
135
118
  return generatedPermission;
136
119
  }
@@ -141,17 +124,17 @@ function mergeOpenCodePermissionConfig(existingPermission, generatedPermission)
141
124
  read: generatedPermission.read,
142
125
  glob: generatedPermission.glob,
143
126
  grep: generatedPermission.grep,
144
- external_directory: mergeOpenCodePermissionRule(
127
+ external_directory: mergeOpenCodeAgentPermissionRule(
145
128
  existingPermission.external_directory,
146
129
  generatedPermission.external_directory,
147
130
  OPENCODE_AUDIT_EXTERNAL_DIRECTORY_PERMISSION,
148
131
  ),
149
- edit: mergeOpenCodePermissionRule(
132
+ edit: mergeOpenCodeAgentPermissionRule(
150
133
  existingPermission.edit,
151
134
  generatedPermission.edit,
152
135
  OPENCODE_AUDIT_EDIT_PERMISSION,
153
136
  ),
154
- bash: mergeOpenCodePermissionRule(
137
+ bash: mergeOpenCodeAgentPermissionRule(
155
138
  existingPermission.bash,
156
139
  generatedPermission.bash,
157
140
  OPENCODE_AUDIT_BASH_PERMISSION,
@@ -159,12 +142,52 @@ function mergeOpenCodePermissionConfig(existingPermission, generatedPermission)
159
142
  };
160
143
  }
161
144
 
145
+ // Global top-level scope: never seeds bash['*']='allow' or
146
+ // external_directory['*']='allow', keeps the denylist hygiene rules, and
147
+ // migrates away previously deployed broad rules whose value exactly matches
148
+ // the historically managed value ('allow'). Non-matching values are untouched.
149
+ function mergeOpenCodeGlobalPermissionConfig(existingPermission, generatedPermission) {
150
+ const {
151
+ mergeOpenCodeAgentPermissionRule,
152
+ mergeOpenCodeGlobalPermissionRule,
153
+ migrateOpenCodeGlobalExternalDirectory,
154
+ } = sharedOpenCodePermissions;
155
+ const existing = objectValue(existingPermission);
156
+
157
+ const merged = {
158
+ ...generatedPermission,
159
+ ...existing,
160
+ read: generatedPermission.read,
161
+ glob: generatedPermission.glob,
162
+ grep: generatedPermission.grep,
163
+ edit: mergeOpenCodeAgentPermissionRule(
164
+ existing.edit,
165
+ generatedPermission.edit,
166
+ OPENCODE_AUDIT_EDIT_PERMISSION,
167
+ ),
168
+ bash: mergeOpenCodeGlobalPermissionRule(
169
+ existing.bash,
170
+ generatedPermission.bash,
171
+ OPENCODE_AUDIT_BASH_PERMISSION,
172
+ ),
173
+ };
174
+
175
+ const externalDirectory = migrateOpenCodeGlobalExternalDirectory(existing.external_directory);
176
+ if (externalDirectory === undefined) {
177
+ delete merged.external_directory;
178
+ } else {
179
+ merged.external_directory = externalDirectory;
180
+ }
181
+
182
+ return merged;
183
+ }
184
+
162
185
  function renderOpenCodePermissionConfig() {
163
186
  return {
164
187
  read: 'allow',
165
188
  glob: 'allow',
166
189
  grep: 'allow',
167
- external_directory: renderOpenCodeExternalDirectoryPermission(),
190
+ external_directory: { ...OPENCODE_AUDIT_EXTERNAL_DIRECTORY_PERMISSION },
168
191
  edit: { ...OPENCODE_AUDIT_EDIT_PERMISSION },
169
192
  bash: { ...OPENCODE_AUDIT_BASH_PERMISSION },
170
193
  };
@@ -190,10 +213,7 @@ function mergeOpenCodeGlobalConfig(existing) {
190
213
  subtask: false,
191
214
  },
192
215
  },
193
- permission: {
194
- ...mergeOpenCodePermissionConfig(parsed.permission, auditPermission),
195
- external_directory: { '*': 'allow' },
196
- },
216
+ permission: mergeOpenCodeGlobalPermissionConfig(parsed.permission, auditPermission),
197
217
  agent: {
198
218
  ...(parsed.agent && typeof parsed.agent === 'object' && !Array.isArray(parsed.agent)
199
219
  ? parsed.agent
@@ -202,7 +222,7 @@ function mergeOpenCodeGlobalConfig(existing) {
202
222
  ...existingAuditor,
203
223
  description: 'Read-heavy audit orchestration agent for the /audit-code workflow.',
204
224
  permission: {
205
- ...mergeOpenCodePermissionConfig(existingAuditor.permission, auditPermission),
225
+ ...mergeOpenCodeAgentPermissionConfig(existingAuditor.permission, auditPermission),
206
226
  external_directory: { '*': 'allow' },
207
227
  'auditor_*': 'allow',
208
228
  question: 'allow',
@@ -288,6 +308,11 @@ for (const install of installs) {
288
308
  // Install OpenCode global command and MCP via merged config
289
309
  const opencodeGlobalConfig = join(homedir(), '.config', 'opencode', 'opencode.json');
290
310
  try {
311
+ if (!sharedOpenCodePermissions) {
312
+ throw new Error(
313
+ '@audit-tools/shared is unavailable (build the shared workspace first); skipping OpenCode config deployment',
314
+ );
315
+ }
291
316
  const action = installMergedJson(opencodeGlobalConfig, (existing) =>
292
317
  mergeOpenCodeGlobalConfig(existing),
293
318
  );