auditor-lambda 0.10.2 → 0.10.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,11 +1,12 @@
1
- import { readFile, readdir } from "node:fs/promises";
1
+ import { readFile, readdir, rm } from "node:fs/promises";
2
+ import { existsSync } from "node:fs";
2
3
  import { join, resolve } from "node:path";
3
4
  import { isFileMissingError, readJsonFile, writeJsonFile } from "@audit-tools/shared";
4
5
  import { validateAuditResults } from "../validation/auditResults.js";
5
6
  import { runAuditStep } from "./auditStep.js";
6
7
  import { DISPATCH_RESULT_MAP_FILENAME, ACTIVE_DISPATCH_FILENAME, loadDispatchResultMap, entriesByTaskId, buildPendingAuditTasks, } from "./dispatch.js";
7
8
  import { addFileLineCountHints } from "./lineIndex.js";
8
- import { isCanonicalResultFilename, getArtifactsDir, getFlag } from "./args.js";
9
+ import { isCanonicalResultFilename, taskResultPath, getArtifactsDir, getFlag } from "./args.js";
9
10
  import { buildWorkerResult } from "./workerResult.js";
10
11
  import { PACKET_SCHEMA_FILENAMES } from "../io/runArtifacts.js";
11
12
  // Schema pointer files prepare-dispatch copies into task-results/ for optional
@@ -38,8 +39,31 @@ export async function cmdMergeAndIngest(argv) {
38
39
  throw e;
39
40
  }
40
41
  if (priorSummary) {
41
- console.log(JSON.stringify({ ...priorSummary, idempotent_replay: true }, null, 2));
42
- return;
42
+ // A completion marker can go stale. Selective deepening appends new pending
43
+ // tasks to the SAME run-id, and — in the no-progress-loop bug — their answers
44
+ // already sit on disk under canonical per-task names while the marker says the
45
+ // run is done. If any pending task has a recoverable on-disk result, the marker
46
+ // no longer reflects reality: discard it and re-process so those answers ingest
47
+ // instead of replaying a no-op forever. A genuinely terminal run (no pending
48
+ // tasks, or pending tasks not yet answered — e.g. a new round handled under a
49
+ // different run-id) still replays cleanly.
50
+ let pendingWithResults = 0;
51
+ try {
52
+ const pending = await readJsonFile(tasksPath);
53
+ for (const task of pending) {
54
+ if (existsSync(taskResultPath(taskResultsDir, task.task_id))) {
55
+ pendingWithResults++;
56
+ }
57
+ }
58
+ }
59
+ catch { /* no pending-tasks file — treat as terminal and replay */ }
60
+ if (pendingWithResults === 0) {
61
+ console.log(JSON.stringify({ ...priorSummary, idempotent_replay: true }, null, 2));
62
+ return;
63
+ }
64
+ process.stderr.write(`[merge-and-ingest] completion marker for ${runId} is stale: ` +
65
+ `${pendingWithResults} pending task(s) have un-ingested on-disk results; re-processing.\n`);
66
+ await rm(mergeCompletePath, { force: true });
43
67
  }
44
68
  const workerTask = await readJsonFile(taskPath);
45
69
  const resultMap = await loadDispatchResultMap(runDir);
@@ -116,36 +140,48 @@ export async function cmdMergeAndIngest(argv) {
116
140
  }
117
141
  for (const task of allTasks) {
118
142
  const entry = entryByTaskId.get(task.task_id);
119
- if (!entry) {
120
- // No result-map entry => this pending task was not dispatched this round.
121
- // Leave it pending for the next dispatch; it is not a failure.
122
- notDispatched.push(task.task_id);
123
- continue;
124
- }
125
- const filePath = entry.result_path;
126
143
  let obj;
127
- try {
128
- obj = JSON.parse(await readFile(filePath, "utf8"));
129
- }
130
- catch (e) {
131
- if (isFileMissingError(e)) {
132
- const fallback = fallbackByTaskId.get(task.task_id);
133
- if (fallback) {
134
- process.stderr.write(`[merge-and-ingest] Recovered result for '${task.task_id}' from unexpected file (matched by task_id)\n`);
135
- obj = fallback;
144
+ if (entry) {
145
+ const filePath = entry.result_path;
146
+ try {
147
+ obj = JSON.parse(await readFile(filePath, "utf8"));
148
+ }
149
+ catch (e) {
150
+ if (isFileMissingError(e)) {
151
+ const fallback = fallbackByTaskId.get(task.task_id);
152
+ if (fallback) {
153
+ process.stderr.write(`[merge-and-ingest] Recovered result for '${task.task_id}' from unexpected file (matched by task_id)\n`);
154
+ obj = fallback;
155
+ }
156
+ else {
157
+ failing.push({
158
+ task_id: task.task_id,
159
+ errors: ["Missing audit result for assigned task."],
160
+ });
161
+ continue;
162
+ }
136
163
  }
137
164
  else {
138
- failing.push({
139
- task_id: task.task_id,
140
- errors: ["Missing audit result for assigned task."],
141
- });
165
+ failing.push({ task_id: task.task_id, errors: [`Invalid JSON: ${e.message}`] });
142
166
  continue;
143
167
  }
144
168
  }
145
- else {
146
- failing.push({ task_id: task.task_id, errors: [`Invalid JSON: ${e.message}`] });
169
+ }
170
+ else {
171
+ // No result-map entry => this pending task was not dispatched this round.
172
+ // But its answer may already exist on disk under a canonical per-task name
173
+ // (e.g. a selective-deepening task answered in a prior round whose dispatch
174
+ // manifest was later regenerated empty — the no-progress loop this guards
175
+ // against). Recover it by task_id so it ingests instead of looping forever
176
+ // as "pending"; only when no such file exists is the task genuinely held
177
+ // back for the next dispatch (not a failure).
178
+ const fallback = fallbackByTaskId.get(task.task_id);
179
+ if (!fallback) {
180
+ notDispatched.push(task.task_id);
147
181
  continue;
148
182
  }
183
+ process.stderr.write(`[merge-and-ingest] Recovered un-dispatched task '${task.task_id}' from on-disk result file (matched by task_id)\n`);
184
+ obj = fallback;
149
185
  }
150
186
  const record = obj && typeof obj === "object" && !Array.isArray(obj)
151
187
  ? obj
@@ -278,6 +314,12 @@ export async function cmdMergeAndIngest(argv) {
278
314
  // failures stay replayable for retry, and a canary (notDispatched > 0) must NOT
279
315
  // be marked complete or the fan-out merge on the same run-id would short-circuit
280
316
  // to an idempotent replay and silently drop the fan-out results.
317
+ //
318
+ // Selective deepening appends new pending tasks to the SAME run-id; this marker
319
+ // can therefore go stale once those tasks are later dispatched and answered. The
320
+ // replay guard at the top detects that (a pending task with an on-disk result)
321
+ // and re-processes, so a premature marker self-heals instead of stranding the
322
+ // deepening answers behind an idempotent replay (the no-progress loop).
281
323
  if (failing.length === 0 && notDispatched.length === 0) {
282
324
  await writeJsonFile(mergeCompletePath, summaryPayload);
283
325
  }
@@ -0,0 +1,21 @@
1
+ import type { Finding } from "../types.js";
2
+ /**
3
+ * Re-key finalized findings with globally-unique, content-derived ids at the
4
+ * synthesis boundary.
5
+ *
6
+ * Worker packets assign locally-scoped ids (e.g. `MNT-001`) that collide across
7
+ * packets once merged, which breaks `audit-findings.json` as a machine contract:
8
+ * `buildWorkBlocks` keys its union-find on `id` (so colliding ids fuse unrelated
9
+ * findings into one block), and `work_blocks.finding_ids` / theme `finding_ids` /
10
+ * the remediator's per-finding addressing can no longer resolve a single finding.
11
+ *
12
+ * The id is `<LENS_PREFIX>-<sha256(content)[:8]>`, deterministic and stable so a
13
+ * re-synthesis of the same findings produces the same ids. A vanishingly rare
14
+ * hash collision between two *distinct* findings is broken deterministically with
15
+ * a numeric suffix (findings arrive in mergeFindings()' stable order).
16
+ *
17
+ * `related_findings`, when present, referenced the old colliding ids and cannot
18
+ * be remapped unambiguously, so it is dropped rather than left dangling. (It is
19
+ * unpopulated by every current extractor.)
20
+ */
21
+ export declare function assignStableFindingIds(findings: Finding[]): Finding[];
@@ -0,0 +1,72 @@
1
+ import { createHash } from "node:crypto";
2
+ // Stable lens -> id prefix. The lens is the canonical addressing axis, so the
3
+ // prefix always matches it (no convention drift) and the content hash that
4
+ // follows guarantees global uniqueness.
5
+ const LENS_ID_PREFIX = {
6
+ correctness: "COR",
7
+ architecture: "ARC",
8
+ maintainability: "MNT",
9
+ security: "SEC",
10
+ reliability: "REL",
11
+ performance: "PRF",
12
+ data_integrity: "DAT",
13
+ tests: "TST",
14
+ operability: "OPR",
15
+ config_deployment: "CFG",
16
+ observability: "OBS",
17
+ };
18
+ /**
19
+ * A stable signature of a finding's identity-bearing content. The same logical
20
+ * finding yields the same signature across runs (so its id is reproducible),
21
+ * while two distinct findings — which only coexist after surviving merge and
22
+ * dedup with different content — yield different signatures.
23
+ */
24
+ function contentSignature(finding) {
25
+ const files = finding.affected_files
26
+ .map((file) => `${file.path}:${file.line_start ?? ""}:${file.line_end ?? ""}:${file.symbol ?? ""}`)
27
+ .sort()
28
+ .join(",");
29
+ return [
30
+ finding.lens.trim().toLowerCase(),
31
+ finding.category.trim().toLowerCase(),
32
+ finding.title.trim().toLowerCase(),
33
+ files,
34
+ ].join("|");
35
+ }
36
+ /**
37
+ * Re-key finalized findings with globally-unique, content-derived ids at the
38
+ * synthesis boundary.
39
+ *
40
+ * Worker packets assign locally-scoped ids (e.g. `MNT-001`) that collide across
41
+ * packets once merged, which breaks `audit-findings.json` as a machine contract:
42
+ * `buildWorkBlocks` keys its union-find on `id` (so colliding ids fuse unrelated
43
+ * findings into one block), and `work_blocks.finding_ids` / theme `finding_ids` /
44
+ * the remediator's per-finding addressing can no longer resolve a single finding.
45
+ *
46
+ * The id is `<LENS_PREFIX>-<sha256(content)[:8]>`, deterministic and stable so a
47
+ * re-synthesis of the same findings produces the same ids. A vanishingly rare
48
+ * hash collision between two *distinct* findings is broken deterministically with
49
+ * a numeric suffix (findings arrive in mergeFindings()' stable order).
50
+ *
51
+ * `related_findings`, when present, referenced the old colliding ids and cannot
52
+ * be remapped unambiguously, so it is dropped rather than left dangling. (It is
53
+ * unpopulated by every current extractor.)
54
+ */
55
+ export function assignStableFindingIds(findings) {
56
+ const used = new Set();
57
+ return findings.map((finding) => {
58
+ const prefix = LENS_ID_PREFIX[finding.lens.trim().toLowerCase()] ?? "FND";
59
+ const hash = createHash("sha256")
60
+ .update(contentSignature(finding))
61
+ .digest("hex")
62
+ .slice(0, 8);
63
+ let id = `${prefix}-${hash}`;
64
+ for (let n = 2; used.has(id); n++) {
65
+ id = `${prefix}-${hash}-${n}`;
66
+ }
67
+ used.add(id);
68
+ const reKeyed = { ...finding, id };
69
+ delete reKeyed.related_findings;
70
+ return reKeyed;
71
+ });
72
+ }
@@ -1,6 +1,7 @@
1
1
  import { AUDITOR_REPORT_MARKER } from "@audit-tools/shared";
2
2
  import { buildWorkBlocks } from "./workBlocks.js";
3
3
  import { mergeFindings } from "./mergeFindings.js";
4
+ import { assignStableFindingIds } from "./findingIdentity.js";
4
5
  /** Contract version stamped onto the canonical `audit-findings.json`. */
5
6
  export const AUDIT_FINDINGS_CONTRACT_VERSION = "audit-tools/audit-findings/v1";
6
7
  function countBy(items, selectKey) {
@@ -37,7 +38,11 @@ function formatSeverityList(summary) {
37
38
  return parts.length > 0 ? parts.join(", ") : "none";
38
39
  }
39
40
  export function buildAuditReportModel(params) {
40
- const findings = mergeFindings(params.results, params.runtimeValidationReport, params.externalAnalyzerResults, params.designAssessment);
41
+ // Re-key the finalized findings with globally-unique, content-derived ids
42
+ // before anything addresses them by id. buildWorkBlocks keys its union-find on
43
+ // finding.id, so the locally-scoped, collision-prone ids worker packets emit
44
+ // must be replaced here or unrelated findings fuse into one block.
45
+ const findings = assignStableFindingIds(mergeFindings(params.results, params.runtimeValidationReport, params.externalAnalyzerResults, params.designAssessment));
41
46
  const workBlocks = buildWorkBlocks({
42
47
  findings,
43
48
  unitManifest: params.unitManifest,
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "auditor-lambda",
3
- "version": "0.10.2",
3
+ "version": "0.10.3",
4
4
  "private": false,
5
5
  "description": "Portable hybrid code-auditing framework for arbitrary repositories.",
6
6
  "type": "module",
@@ -50,7 +50,8 @@
50
50
  "path": { "type": "string" },
51
51
  "line_start": { "type": "integer", "minimum": 1 },
52
52
  "line_end": { "type": "integer", "minimum": 1 },
53
- "symbol": { "type": "string" }
53
+ "symbol": { "type": "string" },
54
+ "hash_at_plan_time": { "type": "string" }
54
55
  },
55
56
  "additionalProperties": false
56
57
  }