auditor-lambda 0.7.0 → 0.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +0 -21
- package/dist/cli/auditStep.js +7 -1
- package/dist/cli.d.ts +0 -1
- package/dist/cli.js +0 -2
- package/dist/extractors/graph.js +12 -2
- package/dist/io/artifacts.d.ts +3 -1
- package/dist/io/artifacts.js +18 -2
- package/dist/orchestrator/advance.js +2 -1
- package/dist/orchestrator/artifactFreshness.js +12 -2
- package/dist/orchestrator/autoFixExecutor.d.ts +1 -1
- package/dist/orchestrator/autoFixExecutor.js +10 -0
- package/dist/orchestrator/executorResult.d.ts +12 -0
- package/dist/orchestrator/executorResult.js +1 -0
- package/dist/orchestrator/fileIntegrity.d.ts +1 -0
- package/dist/orchestrator/fileIntegrity.js +12 -3
- package/dist/orchestrator/graphEnrichmentExecutor.d.ts +1 -1
- package/dist/orchestrator/graphEnrichmentExecutor.js +3 -1
- package/dist/orchestrator/internalExecutors.d.ts +1 -18
- package/dist/orchestrator/internalExecutors.js +1 -158
- package/dist/orchestrator/reviewPacketGraph.d.ts +31 -0
- package/dist/orchestrator/reviewPacketGraph.js +691 -0
- package/dist/orchestrator/reviewPackets.d.ts +2 -15
- package/dist/orchestrator/reviewPackets.js +3 -685
- package/dist/orchestrator/runtimeCommand.d.ts +11 -0
- package/dist/orchestrator/runtimeCommand.js +79 -0
- package/dist/orchestrator/scope.js +1 -1
- package/dist/orchestrator/syntaxResolutionExecutor.d.ts +1 -1
- package/dist/orchestrator/synthesisExecutors.d.ts +12 -0
- package/dist/orchestrator/synthesisExecutors.js +90 -0
- package/docs/development.md +35 -139
- package/docs/history.md +26 -0
- package/docs/product.md +41 -108
- package/package.json +1 -1
- package/schemas/audit_findings.schema.json +3 -2
- package/schemas/dispatch_quota.schema.json +2 -0
- package/schemas/external_analyzer_results.schema.json +2 -2
- package/schemas/repo_manifest.schema.json +1 -1
- package/docs/handoff.md +0 -204
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
export declare function runCommand(command: string[], cwd: string, options?: {
|
|
2
|
+
opentoken?: boolean;
|
|
3
|
+
}): Promise<{
|
|
4
|
+
status: "confirmed" | "not_confirmed" | "inconclusive";
|
|
5
|
+
summary: string;
|
|
6
|
+
evidence: string[];
|
|
7
|
+
}>;
|
|
8
|
+
export declare function resolveRuntimeValidationSpawnCommand(command: string[], platform?: NodeJS.Platform, shellCommand?: string): {
|
|
9
|
+
command: string;
|
|
10
|
+
args: string[];
|
|
11
|
+
};
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
import { spawn } from "node:child_process";
|
|
2
|
+
// Deterministic runtime-validation command execution: resolve a command to a
|
|
3
|
+
// platform-correct spawn invocation (Windows package-manager shims need a
|
|
4
|
+
// cmd.exe wrapper), optionally wrap it for opentoken accounting, and run it
|
|
5
|
+
// capturing a confirmed/not_confirmed/inconclusive outcome. Hoisted out of
|
|
6
|
+
// internalExecutors.ts as a shared, side-effect-only helper module.
|
|
7
|
+
function resolveOpentokenWrap(resolved, platform = process.platform) {
|
|
8
|
+
if (platform === "win32") {
|
|
9
|
+
const shell = process.env.ComSpec ?? "cmd.exe";
|
|
10
|
+
const inner = [resolved.command, ...resolved.args]
|
|
11
|
+
.map((v) => (/^[A-Za-z0-9_./:=@+-]+$/.test(v) ? v : `"${v.replace(/(["^&|<>%])/g, "^$1")}"`))
|
|
12
|
+
.join(" ");
|
|
13
|
+
return { command: shell, args: ["/d", "/s", "/c", `opentoken wrap ${inner}`] };
|
|
14
|
+
}
|
|
15
|
+
return { command: "opentoken", args: ["wrap", resolved.command, ...resolved.args] };
|
|
16
|
+
}
|
|
17
|
+
export async function runCommand(command, cwd, options = {}) {
|
|
18
|
+
let spawnCommand = resolveRuntimeValidationSpawnCommand(command);
|
|
19
|
+
if (options.opentoken) {
|
|
20
|
+
spawnCommand = resolveOpentokenWrap(spawnCommand);
|
|
21
|
+
}
|
|
22
|
+
const displayCommand = command.join(" ");
|
|
23
|
+
return await new Promise((resolve) => {
|
|
24
|
+
const child = spawn(spawnCommand.command, spawnCommand.args, {
|
|
25
|
+
cwd,
|
|
26
|
+
env: process.env,
|
|
27
|
+
stdio: ["ignore", "pipe", "pipe"],
|
|
28
|
+
});
|
|
29
|
+
let stdout = "";
|
|
30
|
+
let stderr = "";
|
|
31
|
+
child.stdout.on("data", (chunk) => {
|
|
32
|
+
stdout += String(chunk);
|
|
33
|
+
});
|
|
34
|
+
child.stderr.on("data", (chunk) => {
|
|
35
|
+
stderr += String(chunk);
|
|
36
|
+
});
|
|
37
|
+
child.on("error", (error) => {
|
|
38
|
+
resolve({
|
|
39
|
+
status: "inconclusive",
|
|
40
|
+
summary: `Failed to execute ${displayCommand}: ${error.message}`,
|
|
41
|
+
evidence: [],
|
|
42
|
+
});
|
|
43
|
+
});
|
|
44
|
+
child.on("exit", (code) => {
|
|
45
|
+
const output = `${stdout}\n${stderr}`.trim();
|
|
46
|
+
const evidence = output.length > 0 ? output.split(/\r?\n/).slice(-10) : [];
|
|
47
|
+
resolve({
|
|
48
|
+
status: code === 0 ? "confirmed" : "not_confirmed",
|
|
49
|
+
summary: code === 0
|
|
50
|
+
? `Deterministic runtime command succeeded: ${displayCommand}`
|
|
51
|
+
: `Deterministic runtime command failed with exit code ${code}: ${displayCommand}`,
|
|
52
|
+
evidence,
|
|
53
|
+
});
|
|
54
|
+
});
|
|
55
|
+
});
|
|
56
|
+
}
|
|
57
|
+
export function resolveRuntimeValidationSpawnCommand(command, platform = process.platform, shellCommand = process.env.ComSpec ?? "cmd.exe") {
|
|
58
|
+
const [executable, ...args] = command;
|
|
59
|
+
if (!executable) {
|
|
60
|
+
return { command: "", args: [] };
|
|
61
|
+
}
|
|
62
|
+
if (platform !== "win32") {
|
|
63
|
+
return { command: executable, args };
|
|
64
|
+
}
|
|
65
|
+
const packageManager = executable.replace(/\.(cmd|bat)$/i, "").toLowerCase();
|
|
66
|
+
if (["npm", "npx", "pnpm", "yarn"].includes(packageManager)) {
|
|
67
|
+
return {
|
|
68
|
+
command: shellCommand,
|
|
69
|
+
args: ["/d", "/s", "/c", command.map(quoteCmdArg).join(" ")],
|
|
70
|
+
};
|
|
71
|
+
}
|
|
72
|
+
return { command: executable, args };
|
|
73
|
+
}
|
|
74
|
+
function quoteCmdArg(value) {
|
|
75
|
+
if (/^[A-Za-z0-9_./:=+-]+$/.test(value)) {
|
|
76
|
+
return value;
|
|
77
|
+
}
|
|
78
|
+
return `"${value.replace(/(["^&|<>%])/g, "^$1")}"`;
|
|
79
|
+
}
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import { changedFiles, gitRefExists, isGitRepo } from "@audit-tools/shared";
|
|
2
2
|
import { buildDispositionMap } from "../extractors/disposition.js";
|
|
3
3
|
import { buildPathLookup } from "../extractors/graph.js";
|
|
4
|
-
import { HIGH_FAN_DEGREE_THRESHOLD, buildGraphDegreeIndex, collectGraphEdges, graphEdgeConfidence, normalizeGraphPath, } from "./
|
|
4
|
+
import { HIGH_FAN_DEGREE_THRESHOLD, buildGraphDegreeIndex, collectGraphEdges, graphEdgeConfidence, normalizeGraphPath, } from "./reviewPacketGraph.js";
|
|
5
5
|
/** Default cap on in-scope files (seeds + expanded) before expansion stops. */
|
|
6
6
|
export const DEFAULT_SCOPE_MAX_FILES = 200;
|
|
7
7
|
/** Graph edges below this confidence are never traversed during expansion. */
|
|
@@ -1,3 +1,3 @@
|
|
|
1
1
|
import type { ArtifactBundle } from "../io/artifacts.js";
|
|
2
|
-
import type { ExecutorRunResult } from "./
|
|
2
|
+
import type { ExecutorRunResult } from "./executorResult.js";
|
|
3
3
|
export declare function runSyntaxResolutionExecutor(bundle: ArtifactBundle, root: string): ExecutorRunResult;
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
import type { ArtifactBundle } from "../io/artifacts.js";
|
|
2
|
+
import type { AuditResult } from "../types.js";
|
|
3
|
+
import type { ExecutorRunResult } from "./executorResult.js";
|
|
4
|
+
import type { SynthesisNarrative } from "@audit-tools/shared";
|
|
5
|
+
export declare function runSynthesisExecutor(bundle: ArtifactBundle, results?: AuditResult[]): ExecutorRunResult;
|
|
6
|
+
/**
|
|
7
|
+
* Resolve the optional synthesis-narrative obligation. When a host/provider
|
|
8
|
+
* narrative is supplied it is merged into the canonical findings report and the
|
|
9
|
+
* human report is re-rendered with themes/executive-summary/top-risks; without
|
|
10
|
+
* one the narrative is recorded as omitted and the deterministic report stands.
|
|
11
|
+
*/
|
|
12
|
+
export declare function runSynthesisNarrativeExecutor(bundle: ArtifactBundle, narrative?: SynthesisNarrative): ExecutorRunResult;
|
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
import { applyNarrative, buildAuditFindingsReport, buildAuditReportModel, renderAuditReportMarkdown, } from "../reporting/synthesis.js";
|
|
2
|
+
function buildBaseFindingsReport(bundle, results) {
|
|
3
|
+
return buildAuditFindingsReport(buildAuditReportModel({
|
|
4
|
+
results,
|
|
5
|
+
unitManifest: bundle.unit_manifest,
|
|
6
|
+
graphBundle: bundle.graph_bundle,
|
|
7
|
+
criticalFlows: bundle.critical_flows,
|
|
8
|
+
coverageMatrix: bundle.coverage_matrix,
|
|
9
|
+
runtimeValidationReport: bundle.runtime_validation_report,
|
|
10
|
+
externalAnalyzerResults: bundle.external_analyzer_results,
|
|
11
|
+
designAssessment: bundle.design_assessment,
|
|
12
|
+
}));
|
|
13
|
+
}
|
|
14
|
+
export function runSynthesisExecutor(bundle, results) {
|
|
15
|
+
const finalResults = results ?? bundle.audit_results ?? [];
|
|
16
|
+
// Emit the canonical machine contract and render the human report from it.
|
|
17
|
+
// No narrative yet — that is layered by the synthesis-narrative obligation.
|
|
18
|
+
const findings = buildBaseFindingsReport(bundle, finalResults);
|
|
19
|
+
// Synthesis renders findings; it does NOT own audit_results. Writing
|
|
20
|
+
// audit_results back here desyncs it from its metadata entry (it isn't in
|
|
21
|
+
// artifacts_written, so computeArtifactMetadata reuses the prior hash) and, in
|
|
22
|
+
// the zero-result case, materializes an empty audit_results.jsonl that did not
|
|
23
|
+
// exist before — both perpetually re-stale coverage_matrix → planning,
|
|
24
|
+
// forcing a planning re-run that rewrites runtime_validation_report.json (the
|
|
25
|
+
// finalization-oscillation engine). Leave audit_results as the ingested value.
|
|
26
|
+
return {
|
|
27
|
+
updated: {
|
|
28
|
+
...bundle,
|
|
29
|
+
audit_findings: findings,
|
|
30
|
+
audit_report: renderAuditReportMarkdown(findings, { scope: bundle.scope }),
|
|
31
|
+
},
|
|
32
|
+
artifacts_written: ["audit-findings.json", "audit-report.md"],
|
|
33
|
+
progress_summary: `Rendered deterministic audit report and canonical findings for ${finalResults.length} audit result entries.`,
|
|
34
|
+
};
|
|
35
|
+
}
|
|
36
|
+
/**
|
|
37
|
+
* Resolve the optional synthesis-narrative obligation. When a host/provider
|
|
38
|
+
* narrative is supplied it is merged into the canonical findings report and the
|
|
39
|
+
* human report is re-rendered with themes/executive-summary/top-risks; without
|
|
40
|
+
* one the narrative is recorded as omitted and the deterministic report stands.
|
|
41
|
+
*/
|
|
42
|
+
export function runSynthesisNarrativeExecutor(bundle, narrative) {
|
|
43
|
+
const baseReport = bundle.audit_findings ??
|
|
44
|
+
buildBaseFindingsReport(bundle, bundle.audit_results ?? []);
|
|
45
|
+
const needsBaseWrite = !bundle.audit_findings;
|
|
46
|
+
const hasNarrative = Boolean(narrative &&
|
|
47
|
+
((narrative.themes?.length ?? 0) > 0 ||
|
|
48
|
+
(narrative.executive_summary?.trim().length ?? 0) > 0 ||
|
|
49
|
+
(narrative.top_risks?.length ?? 0) > 0));
|
|
50
|
+
if (!hasNarrative) {
|
|
51
|
+
const record = {
|
|
52
|
+
status: "omitted",
|
|
53
|
+
theme_count: 0,
|
|
54
|
+
executive_summary_present: false,
|
|
55
|
+
top_risk_count: 0,
|
|
56
|
+
};
|
|
57
|
+
return {
|
|
58
|
+
updated: {
|
|
59
|
+
...bundle,
|
|
60
|
+
audit_findings: baseReport,
|
|
61
|
+
synthesis_narrative: record,
|
|
62
|
+
},
|
|
63
|
+
artifacts_written: needsBaseWrite
|
|
64
|
+
? ["audit-findings.json", "synthesis-narrative.json"]
|
|
65
|
+
: ["synthesis-narrative.json"],
|
|
66
|
+
progress_summary: "Synthesis narrative omitted; deterministic findings report retained.",
|
|
67
|
+
};
|
|
68
|
+
}
|
|
69
|
+
const enriched = applyNarrative(baseReport, narrative);
|
|
70
|
+
const record = {
|
|
71
|
+
status: "applied",
|
|
72
|
+
theme_count: enriched.themes?.length ?? 0,
|
|
73
|
+
executive_summary_present: (enriched.executive_summary?.trim().length ?? 0) > 0,
|
|
74
|
+
top_risk_count: enriched.top_risks?.length ?? 0,
|
|
75
|
+
};
|
|
76
|
+
return {
|
|
77
|
+
updated: {
|
|
78
|
+
...bundle,
|
|
79
|
+
audit_findings: enriched,
|
|
80
|
+
audit_report: renderAuditReportMarkdown(enriched, { scope: bundle.scope }),
|
|
81
|
+
synthesis_narrative: record,
|
|
82
|
+
},
|
|
83
|
+
artifacts_written: [
|
|
84
|
+
"audit-findings.json",
|
|
85
|
+
"audit-report.md",
|
|
86
|
+
"synthesis-narrative.json",
|
|
87
|
+
],
|
|
88
|
+
progress_summary: `Synthesis narrative applied: ${record.theme_count} theme(s), ${record.top_risk_count} top risk(s).`,
|
|
89
|
+
};
|
|
90
|
+
}
|
package/docs/development.md
CHANGED
|
@@ -12,10 +12,10 @@
|
|
|
12
12
|
|
|
13
13
|
## Agent handoff
|
|
14
14
|
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
15
|
+
Keep long-term product direction in `docs/product.md` and archival context
|
|
16
|
+
(shipped sprints, field-trial lessons) in `docs/history.md`. There is no
|
|
17
|
+
standing per-sprint handoff file; sprint notes are folded into `docs/history.md`
|
|
18
|
+
once the work ships.
|
|
19
19
|
|
|
20
20
|
## Build and test
|
|
21
21
|
|
|
@@ -30,79 +30,6 @@ The test suite is intentionally contract-heavy. Update tests when changing
|
|
|
30
30
|
schema shape, prompt contracts, dispatch behavior, installer output, or release
|
|
31
31
|
workflow semantics.
|
|
32
32
|
|
|
33
|
-
## Production-readiness workflow
|
|
34
|
-
|
|
35
|
-
Use field trials to decide what to fix next. For each representative repository,
|
|
36
|
-
run to the local review handoff, validate the artifact bundle, and compare
|
|
37
|
-
`audit_plan_metrics.json` across runs. Track at least packet count, weak packet
|
|
38
|
-
count, average cohesion, `merge_edge_kind_counts`,
|
|
39
|
-
`boundary_edge_kind_counts`, and `weakly_explained_packet_samples`.
|
|
40
|
-
|
|
41
|
-
Only promote an extractor or planner change when those metrics expose a
|
|
42
|
-
deterministic gap. Prefer improving shared graph resolution or importing
|
|
43
|
-
generic analyzer ownership roots before adding another ecosystem-specific
|
|
44
|
-
manifest parser.
|
|
45
|
-
|
|
46
|
-
The latest remediator field trial closed the remaining mixed code/schema/test
|
|
47
|
-
weak packet by adding package script links, schema contract test links, bounded
|
|
48
|
-
TypeScript type contract suites, package-script-seeded script suite links, and
|
|
49
|
-
generated test artifact disposition. Keep future suite links similarly bounded
|
|
50
|
-
and evidence-led.
|
|
51
|
-
|
|
52
|
-
The Polar field trial added `conftest-link` (conftest.py → Python files in
|
|
53
|
-
scope) and `pyproject-testpaths-link` (pyproject.toml → conftest.py via
|
|
54
|
-
`[tool.pytest.ini_options] testpaths`). `conftest-link` fires only when the
|
|
55
|
-
conftest is inside a `isTestPath` directory to avoid O(n) fan-out from
|
|
56
|
-
root-level conftests. `pyproject.toml` was also added to `shouldReadForGraph`
|
|
57
|
-
so its content is available during the filesystem-backed build path. Together
|
|
58
|
-
these raised Polar's average cohesion from 0.625 to 0.857 and reduced weak
|
|
59
|
-
packets from 5 to 3.
|
|
60
|
-
|
|
61
|
-
A second Polar field trial added `yaml-path-reference-link` (YAML/YML files
|
|
62
|
-
→ other config files referenced by explicit relative path). Resolution tries
|
|
63
|
-
repo-root-relative first, then file-directory-relative. The extractor only
|
|
64
|
-
fires for string values ending in `.yaml`, `.yml`, `.json`, or `.toml` that
|
|
65
|
-
resolve to an existing repo file. In Polar, this produced 4 edges from
|
|
66
|
-
`configs/benchmark.yaml` to its template files and raised `internal_edge_count`
|
|
67
|
-
in the `experiments-domains` packet from 90 to 94.
|
|
68
|
-
|
|
69
|
-
A third Polar field trial added `python-test-util-suite-link`, which chains
|
|
70
|
-
`.py` files co-located in `utils/`, `helpers/`, or `support/` subdirectories
|
|
71
|
-
within `isTestPath` directories (same bounded-suite pattern as the TypeScript
|
|
72
|
-
type, JSON schema, and package-script suite links). `conftest.py` is excluded
|
|
73
|
-
from the predicate. In Polar, this produced 2 intra-unit edges within the
|
|
74
|
-
`tests-utils` packet, raising its `internal_edge_count` from 0 to 2 and
|
|
75
|
-
eliminating it as a weak packet. Polar metrics improved from 0.857 to 1.000
|
|
76
|
-
cohesion and 3 to 2 weak packets. The 2 remaining weak packets share genuinely
|
|
77
|
-
isolated files (`.auditorignore`, `experiments/domains/__init__.py`,
|
|
78
|
-
`experiments/summarize_results.py`) that cannot be linked without false
|
|
79
|
-
positives; treat as the current floor. Note that intra-unit suite edges do not
|
|
80
|
-
appear in `merge_edge_kind_counts` — their effect is visible in the packet's
|
|
81
|
-
`internal_edge_count` and `unexplained_file_count` fields instead.
|
|
82
|
-
|
|
83
|
-
Before treating a build as production-ready, verify the complete review loop in
|
|
84
|
-
one real host:
|
|
85
|
-
|
|
86
|
-
```text
|
|
87
|
-
audit-code prepare-dispatch --run-id <run_id> --artifacts-dir <artifacts_dir>
|
|
88
|
-
worker reviews each packet prompt
|
|
89
|
-
audit-code submit-packet ...
|
|
90
|
-
audit-code merge-and-ingest --run-id <run_id> --artifacts-dir <artifacts_dir>
|
|
91
|
-
audit-code validate
|
|
92
|
-
```
|
|
93
|
-
|
|
94
|
-
On Windows, runtime validation runs package-manager shim commands such as
|
|
95
|
-
`npm`, `npx`, `pnpm`, and `yarn` through the command shell so `.cmd` wrappers
|
|
96
|
-
execute reliably. Keep that behavior covered when changing runtime command
|
|
97
|
-
execution.
|
|
98
|
-
|
|
99
|
-
If the final `audit-report.md` cannot be copied into the target repository
|
|
100
|
-
because of local permissions, completion should remain successful and the
|
|
101
|
-
artifact copy remains authoritative. Run `audit-code validate` against the
|
|
102
|
-
artifact bundle before treating the run as complete.
|
|
103
|
-
|
|
104
|
-
Then run `npm run verify:release` from a clean checkout.
|
|
105
|
-
|
|
106
33
|
## Architecture
|
|
107
34
|
|
|
108
35
|
The system separates deterministic extraction from bounded LLM judgment:
|
|
@@ -122,6 +49,8 @@ Portability rules:
|
|
|
122
49
|
- review work is attributable to files, lenses, passes, and tasks
|
|
123
50
|
- coverage gaps are machine-detectable
|
|
124
51
|
|
|
52
|
+
`AuditTask` is the coverage identity; `AuditResult[]` is the ingestion contract.
|
|
53
|
+
|
|
125
54
|
## Adding language analyzers
|
|
126
55
|
|
|
127
56
|
Language support should be adapter-based. A new analyzer should enrich shared
|
|
@@ -137,74 +66,41 @@ Preferred outputs:
|
|
|
137
66
|
- graph edges with kind, direction, confidence, and reason
|
|
138
67
|
- entrypoints and surfaces
|
|
139
68
|
- test-to-source links
|
|
140
|
-
- package/module ownership hints, including analyzer-supplied
|
|
141
|
-
|
|
142
|
-
- contract-suite links for small JSON Schema, workflow, package
|
|
69
|
+
- package/module ownership hints, including analyzer-supplied `ownership_roots`
|
|
70
|
+
that become `analyzer-ownership-root-link` graph references
|
|
71
|
+
- contract-suite links for small JSON Schema, workflow, package-script, or
|
|
143
72
|
TypeScript type suites when planner metrics show otherwise weak packets
|
|
144
73
|
- external boundary hints
|
|
145
74
|
- line counts and anchor summaries for large files
|
|
146
75
|
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
- planner observability before additional ecosystem breadth
|
|
150
|
-
- exercising the generic ownership-root input from analyzers or imported
|
|
151
|
-
evidence
|
|
152
|
-
- continued behavior-preserving extraction of high-concentration graph helpers
|
|
153
|
-
- JS/TS compiler-backed resolution only after the current regex edges stay
|
|
154
|
-
stable
|
|
155
|
-
- Python deterministic support beyond the current local import, package/module,
|
|
156
|
-
and pytest/unittest adjacency edges only where planner metrics show gaps
|
|
157
|
-
- generic fallback from path patterns, ctags/tree-sitter, LSP output, or
|
|
158
|
-
external analyzer results when available
|
|
159
|
-
|
|
160
|
-
Keep deep analyzers optional. Repositories should still produce useful packets
|
|
76
|
+
Keep deep analyzers optional: a repository should still produce useful packets
|
|
161
77
|
from manifests, paths, tests, and external analyzer results when a language has
|
|
162
|
-
only fallback support.
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
repo-local config checks, such as `eslint.config.*`, `.eslintrc*`, or
|
|
166
|
-
`package.json` `eslintConfig`, over executing a globally installed tool and
|
|
78
|
+
only fallback support. Command-backed analyzers should prove project intent
|
|
79
|
+
before running — prefer repo-local config checks (`eslint.config.*`, `.eslintrc*`,
|
|
80
|
+
`package.json` `eslintConfig`) over executing a globally installed tool and
|
|
167
81
|
parsing its no-config failure.
|
|
168
82
|
|
|
169
|
-
Language-agnostic semantic affinity
|
|
170
|
-
|
|
171
|
-
|
|
83
|
+
Language-agnostic semantic affinity is useful for ranking adjacent context but
|
|
84
|
+
should stay low-authority: don't let shared token frequency alone force packet
|
|
85
|
+
merges; use it for `boundary_files` or candidate explanations unless a
|
|
172
86
|
deterministic edge corroborates the relationship.
|
|
173
87
|
|
|
174
|
-
##
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
fixture and real repositories before adding deeper Python framework handling
|
|
194
|
-
|
|
195
|
-
Keep `AuditTask` as the coverage identity and `AuditResult[]` as the ingestion
|
|
196
|
-
contract.
|
|
197
|
-
|
|
198
|
-
## File-splitting priorities
|
|
199
|
-
|
|
200
|
-
The largest implementation files should be split conservatively and
|
|
201
|
-
behavior-preservingly:
|
|
202
|
-
|
|
203
|
-
- move CLI command families out of `src/cli.ts`
|
|
204
|
-
- move language metadata tables out of file inventory logic
|
|
205
|
-
- move graph manifest/project-file parsers out of `src/extractors/graph.ts`
|
|
206
|
-
- split selective-deepening task builders by trigger type
|
|
207
|
-
- keep packetization, recovery, and schema changes easier to review
|
|
208
|
-
|
|
209
|
-
Run the focused tests for each area before and after a split, then run
|
|
210
|
-
`npm test`.
|
|
88
|
+
## Production readiness
|
|
89
|
+
|
|
90
|
+
Drive priorities from field trials, not speculation: run representative
|
|
91
|
+
repositories through planning, validate the bundle (`audit-code validate`), and
|
|
92
|
+
compare `audit_plan_metrics.json` (packet count, weak-packet count, cohesion,
|
|
93
|
+
merge/boundary edge kinds) across runs. Promote an extractor or planner change
|
|
94
|
+
when those metrics expose a deterministic gap — and prefer improving shared
|
|
95
|
+
graph resolution or generic analyzer ownership roots before adding another
|
|
96
|
+
ecosystem-specific parser.
|
|
97
|
+
|
|
98
|
+
Before treating a build as production-ready, verify the full review loop in one
|
|
99
|
+
real host (`prepare-dispatch` → worker reviews each packet → `submit-packet` →
|
|
100
|
+
`merge-and-ingest` → `validate`), then run `npm run verify:release` from a clean
|
|
101
|
+
checkout. On Windows, runtime validation runs package-manager shims (`npm`,
|
|
102
|
+
`npx`, `pnpm`, `yarn`) through the command shell so `.cmd` wrappers execute
|
|
103
|
+
reliably — keep that covered when changing runtime command execution. If the
|
|
104
|
+
final `audit-report.md` cannot be copied into the target repo due to local
|
|
105
|
+
permissions, completion still succeeds and the artifact copy is authoritative.
|
|
106
|
+
</content>
|
package/docs/history.md
CHANGED
|
@@ -38,3 +38,29 @@ The old remediation baseline recorded fixes across:
|
|
|
38
38
|
|
|
39
39
|
Current readiness is tracked in `docs/product.md`, `docs/operator-guide.md`,
|
|
40
40
|
`docs/contracts.md`, `docs/release.md`, and `docs/development.md`.
|
|
41
|
+
|
|
42
|
+
## Monorepo migration & drift reconciliation (2026-05 → 2026-06)
|
|
43
|
+
|
|
44
|
+
The auditor and remediator began as standalone repos (`auditor-lambda`,
|
|
45
|
+
`remediator-lambda`) and were merged into this npm-workspaces monorepo on a
|
|
46
|
+
shared `@audit-tools/shared` foundation. `providers/` and `quota/` had been
|
|
47
|
+
copy-pasted into both tools and forked in place; the ten resulting drift bugs
|
|
48
|
+
were all fixed by centralizing the forked logic into `shared` (one source of
|
|
49
|
+
truth). Durable decisions from that work:
|
|
50
|
+
|
|
51
|
+
- **Access scoping is JSON, not MCP.** `AccessDeclaration` rides on the step
|
|
52
|
+
contract, so it works with any host; the MCP servers stay compatibility
|
|
53
|
+
adapters over the same contract.
|
|
54
|
+
- **`--dangerously-skip-permissions` defaults ON for the remediator, OFF for the
|
|
55
|
+
auditor.** The remediator applies changes unattended and cannot pause; the
|
|
56
|
+
auditor is read-only. The asymmetry is intentional and the flag is overrideable.
|
|
57
|
+
- **The remediator's machine input is `audit-findings.json`, not the Markdown
|
|
58
|
+
report.** `audit-report.md` is human-facing; a Markdown file handed to the
|
|
59
|
+
remediator flows through the free-form LLM extractor, not a deterministic parse.
|
|
60
|
+
- **Prompts use one strict path** — no "or / unless / if-available" fallbacks.
|
|
61
|
+
|
|
62
|
+
Large files were then broken up as behaviour-preserving pure moves (`cli.ts` from
|
|
63
|
+
4072 lines to a thin dispatcher plus `src/cli/*` handlers; `graph.ts`,
|
|
64
|
+
`reviewPackets.ts`, `internalExecutors.ts`, and the generated language table all
|
|
65
|
+
split out). The sprint-by-sprint handoff docs that tracked this work were removed
|
|
66
|
+
once shipped; this section is their durable residue.
|
package/docs/product.md
CHANGED
|
@@ -1,5 +1,9 @@
|
|
|
1
1
|
# Product
|
|
2
2
|
|
|
3
|
+
> Normative definition: [`spec/audit-goals.md`](../spec/audit-goals.md) — product
|
|
4
|
+
> identity, invariants, deterministic/LLM boundaries, and completion. This page is
|
|
5
|
+
> the product overview.
|
|
6
|
+
|
|
3
7
|
## Canonical surface
|
|
4
8
|
|
|
5
9
|
The primary product is `/audit-code` in conversation.
|
|
@@ -12,8 +16,8 @@ Normal product usage should:
|
|
|
12
16
|
- keep semantic review with the active conversation agent by default
|
|
13
17
|
- advance the audit automatically until it completes or no further automatic progress is possible
|
|
14
18
|
|
|
15
|
-
The CLI is backend infrastructure, a local development harness, and a
|
|
16
|
-
|
|
19
|
+
The CLI is backend infrastructure, a local development harness, and a repo-local
|
|
20
|
+
fallback. It is not the preferred end-user mental model.
|
|
17
21
|
|
|
18
22
|
## Supported surfaces
|
|
19
23
|
|
|
@@ -48,17 +52,14 @@ provider adapters such as `claude-code`, `opencode`, `subprocess-template`, and
|
|
|
48
52
|
|
|
49
53
|
## Language strategy
|
|
50
54
|
|
|
51
|
-
Packet quality should not depend on one language ecosystem.
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
language-neutral graph and artifact contracts.
|
|
55
|
+
Packet quality should not depend on one language ecosystem. Every language
|
|
56
|
+
analyzer must write into the same language-neutral graph and artifact contracts;
|
|
57
|
+
JS/TS and Python get the richest early support only because they are common.
|
|
55
58
|
|
|
56
59
|
Do not keep expanding support by adding one bespoke parser per ecosystem unless
|
|
57
|
-
there is concrete repository demand or a high-value deterministic signal.
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
observable, maintainable, and extensible through generic ownership hints rather
|
|
61
|
-
than through an open-ended list of file-format handlers.
|
|
60
|
+
there is concrete repository demand or a high-value deterministic signal. Prefer
|
|
61
|
+
making graph planning observable and extensible through generic ownership hints
|
|
62
|
+
over an open-ended list of file-format handlers.
|
|
62
63
|
|
|
63
64
|
The shared graph should model:
|
|
64
65
|
|
|
@@ -71,111 +72,43 @@ The shared graph should model:
|
|
|
71
72
|
subprocesses
|
|
72
73
|
- edge confidence, direction, and reason
|
|
73
74
|
|
|
74
|
-
Graph evidence
|
|
75
|
+
Graph evidence is tiered, strongest first:
|
|
75
76
|
|
|
76
|
-
- deterministic directed edges
|
|
77
|
-
test/source links,
|
|
78
|
-
- deterministic ownership edges
|
|
79
|
-
roots
|
|
77
|
+
- deterministic directed edges (imports, entrypoints, route handlers,
|
|
78
|
+
test/source links, resolved analyzer references)
|
|
79
|
+
- deterministic ownership edges (package, module, project, or subsystem roots)
|
|
80
80
|
- analyzer-supplied ownership roots, normalized into graph reference edges
|
|
81
|
-
- language-agnostic semantic affinity
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
Semantic affinity can help rank `boundary_files`, explain possible context, and
|
|
85
|
-
highlight missing deterministic extraction. It should not merge packets on
|
|
86
|
-
frequency alone because common tokens like `user`, `request`, `client`,
|
|
87
|
-
`config`, and `error` often connect unrelated code.
|
|
88
|
-
|
|
89
|
-
Language-specific adapters should enrich the graph without changing packet or
|
|
90
|
-
result contracts:
|
|
81
|
+
- language-agnostic semantic affinity (shared unusual domain terms, nearby
|
|
82
|
+
paths, identifier overlap, embeddings)
|
|
91
83
|
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
- Other ecosystems: prefer analyzer-supplied ownership roots, ctags/tree-sitter,
|
|
97
|
-
LSP output, or existing external analyzer data before adding new bespoke
|
|
98
|
-
manifest parsers
|
|
84
|
+
Semantic affinity can rank `boundary_files`, explain possible context, and
|
|
85
|
+
highlight missing extraction — but it must not merge packets on frequency alone,
|
|
86
|
+
because common tokens (`user`, `request`, `client`, `config`, `error`) connect
|
|
87
|
+
unrelated code.
|
|
99
88
|
|
|
100
|
-
The fallback
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
syntax-resolution should run only when the repository has repo-local ESLint
|
|
106
|
-
configuration, not merely because an ESLint binary is installed.
|
|
89
|
+
The fallback must stay useful even when a language has no deep analyzer:
|
|
90
|
+
manifests, path structure, tests, config, and external analyzer output can seed a
|
|
91
|
+
graph with lower-confidence edges. Deterministic tool runners should be
|
|
92
|
+
project-config aware — e.g. ESLint syntax-resolution runs only when the repo has
|
|
93
|
+
local ESLint configuration, not merely because the binary is installed.
|
|
107
94
|
|
|
108
95
|
## Packet planning
|
|
109
96
|
|
|
110
|
-
`AuditTask`
|
|
111
|
-
worker-facing unit of understanding.
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
- use planner observability to tune which edge kinds change grouping, which
|
|
116
|
-
files stay boundary-only, and which extractor gaps leave weakly explained
|
|
117
|
-
packets
|
|
118
|
-
- extend and exercise the generic ownership-root input so external analyzers
|
|
119
|
-
can say "these files belong to module root X" without a new parser for every
|
|
120
|
-
ecosystem
|
|
121
|
-
- keep graph and manifest parser code modular before broadening it further
|
|
122
|
-
- exercise deterministic Python import, package, and test/source graph support
|
|
123
|
-
on fixture and real repositories to find the next highest-value gaps
|
|
124
|
-
- use language-agnostic semantic affinity only as low-authority context unless
|
|
125
|
-
corroborated by deterministic graph evidence
|
|
97
|
+
`AuditTask` is the deterministic coverage identity; `ReviewPacket` is the
|
|
98
|
+
worker-facing unit of understanding. Packetization aims for packets that read as
|
|
99
|
+
coherent code-ownership or execution-flow units, not merely budget-sized bundles:
|
|
100
|
+
|
|
126
101
|
- build packets around coherent subsystems and execution flows
|
|
127
|
-
- keep shared fan-in files visible as context
|
|
128
|
-
|
|
129
|
-
- distinguish strong edges from weak or heuristic
|
|
130
|
-
- group tests with the code they verify when
|
|
131
|
-
-
|
|
132
|
-
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
or execution-flow units, not merely budget-sized bundles.
|
|
138
|
-
|
|
139
|
-
## Production readiness
|
|
140
|
-
|
|
141
|
-
The package publication path is operational. The release gate, packaged install
|
|
142
|
-
smoke tests, and GitHub Actions Trusted Publishing path are routine
|
|
143
|
-
maintenance. The remaining production work is product confidence rather than a
|
|
144
|
-
new contract shape.
|
|
145
|
-
|
|
146
|
-
Readiness should be judged through three checks:
|
|
147
|
-
|
|
148
|
-
- field-trial quality: run real repositories through planning, validate
|
|
149
|
-
artifacts, and use `audit_plan_metrics.json` to track packet count, weak
|
|
150
|
-
packet count, average cohesion, merge edge kinds, and weak-packet samples
|
|
151
|
-
- full-loop behavior: prove `next-step` capability routing, packet dispatch,
|
|
152
|
-
worker review, `submit-packet`, `merge-and-ingest`, selective deepening,
|
|
153
|
-
runtime validation, and final `audit-report.md` promotion in at least one
|
|
154
|
-
real host flow
|
|
155
|
-
- release hygiene: keep `npm run verify:release`, linked smoke, packaged
|
|
156
|
-
smoke, tarball preview, and Trusted Publishing green from a clean checkout
|
|
157
|
-
|
|
158
|
-
Extractor work should follow field-trial evidence. Fix deterministic graph gaps
|
|
159
|
-
when metrics show them, prefer analyzer-supplied ownership roots before new
|
|
160
|
-
manifest parsers, and keep semantic affinity as context unless deterministic
|
|
161
|
-
evidence corroborates it.
|
|
162
|
-
|
|
163
|
-
The current production-readiness focus is:
|
|
164
|
-
|
|
165
|
-
- use the remediator packet-dispatch loop and Polar runtime-confirmed loop as
|
|
166
|
-
regression evidence for Windows runtime execution, runtime follow-up, final
|
|
167
|
-
synthesis, and report-promotion behavior
|
|
168
|
-
- use the remediator contract-link field trial as regression evidence that
|
|
169
|
-
small schema, workflow, package script, and type contract suites can become
|
|
170
|
-
graph evidence without broad directory merges
|
|
171
|
-
- rerun `remediator-lambda` after its Windows `EBUSY` test cleanup issue is
|
|
172
|
-
fixed
|
|
173
|
-
- keep exercising analyzer ownership roots on real repositories before adding
|
|
174
|
-
ecosystem-specific manifest parsers
|
|
175
|
-
- keep host setup claims aligned with verified Codex, Claude Desktop, OpenCode,
|
|
176
|
-
VS Code, and Antigravity behavior
|
|
177
|
-
- split high-concentration implementation files only after the packetization
|
|
178
|
-
and schema contracts stay easy to review
|
|
102
|
+
- keep shared fan-in files visible as context rather than merging large parts of
|
|
103
|
+
the repo into one packet
|
|
104
|
+
- distinguish strong (deterministic) edges from weak or heuristic ones
|
|
105
|
+
- group tests with the code they verify when it aids review
|
|
106
|
+
- carry packet rationale, key edges, entrypoints, and boundary files
|
|
107
|
+
- prefer the generic ownership-root contract (analyzers naming module roots) over
|
|
108
|
+
a new parser per ecosystem, and keep graph/manifest parsing modular
|
|
109
|
+
|
|
110
|
+
Planner observability (`audit_plan_metrics.json`: cohesion, fan-in/out, boundary
|
|
111
|
+
crossings, weak-packet gaps) is how extraction gaps are found and prioritized.
|
|
179
112
|
|
|
180
113
|
## Non-goals
|
|
181
114
|
|