npm - auditor-lambda - Versions diffs - 0.1.0 → 0.2.2 - Mend

auditor-lambda 0.1.0 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (87) hide show

package/README.md +2 -1
package/audit-code-wrapper-lib.mjs +205 -187
package/dist/adapters/eslint.js +4 -2
package/dist/adapters/npmAudit.js +1 -1
package/dist/cli.js +296 -12
package/dist/coverage.d.ts +0 -1
package/dist/coverage.js +3 -34
package/dist/extractors/bucketing.js +14 -35
package/dist/extractors/disposition.js +8 -9
package/dist/extractors/flows.js +14 -23
package/dist/extractors/pathPatterns.d.ts +19 -0
package/dist/extractors/pathPatterns.js +87 -0
package/dist/extractors/surfaces.js +2 -7
package/dist/io/artifacts.d.ts +23 -1
package/dist/io/artifacts.js +3 -1
package/dist/io/runArtifacts.js +1 -1
package/dist/orchestrator/advance.js +1 -1
package/dist/orchestrator/flowPlanning.d.ts +1 -1
package/dist/orchestrator/flowPlanning.js +21 -28
package/dist/orchestrator/internalExecutors.js +4 -7
package/dist/orchestrator/planning.js +12 -20
package/dist/orchestrator/resultIngestion.js +3 -2
package/dist/orchestrator/runtimeValidation.js +5 -0
package/dist/orchestrator/syntaxResolutionExecutor.js +10 -2
package/dist/orchestrator/taskBuilder.d.ts +7 -2
package/dist/orchestrator/taskBuilder.js +47 -52
package/dist/prompts/renderWorkerPrompt.js +33 -0
package/dist/providers/claudeCodeProvider.js +5 -0
package/dist/providers/constants.d.ts +1 -0
package/dist/providers/constants.js +1 -0
package/dist/providers/index.js +9 -2
package/dist/providers/spawnLoggedCommand.js +4 -0
package/dist/reporting/mergeFindings.js +0 -7
package/dist/reporting/rootCause.d.ts +0 -1
package/dist/reporting/rootCause.js +0 -6
package/dist/reporting/synthesis.js +18 -0
package/dist/supervisor/operatorHandoff.d.ts +2 -0
package/dist/supervisor/operatorHandoff.js +21 -9
package/dist/supervisor/runLedger.js +6 -3
package/dist/supervisor/sessionConfig.js +1 -0
package/dist/types/flowCoverage.d.ts +1 -1
package/dist/types/runLedger.d.ts +1 -1
package/dist/types/runtimeValidation.d.ts +2 -1
package/dist/types/sessionConfig.d.ts +2 -0
package/dist/types/surfaces.d.ts +2 -1
package/dist/types/workerSession.d.ts +4 -0
package/dist/types.d.ts +0 -2
package/dist/validation/auditResults.d.ts +11 -0
package/dist/validation/auditResults.js +118 -0
package/docs/agent-integrations.md +61 -56
package/docs/agent-roles.md +69 -69
package/docs/architecture.md +90 -90
package/docs/artifacts.md +69 -69
package/docs/bootstrap-install.md +1 -1
package/docs/model-selection.md +86 -86
package/docs/next-steps.md +11 -9
package/docs/packaging.md +3 -3
package/docs/pipeline.md +152 -152
package/docs/production-readiness.md +6 -5
package/docs/repo-layout.md +18 -18
package/docs/run-flow.md +5 -5
package/docs/session-config.md +216 -210
package/docs/supervisor.md +70 -70
package/docs/windows-setup.md +139 -139
package/package.json +56 -56
package/schemas/audit-code-v1alpha1.schema.json +80 -76
package/schemas/audit_result.schema.json +54 -48
package/schemas/audit_state.schema.json +2 -2
package/schemas/audit_task.schema.json +60 -49
package/schemas/blind_spot_register.schema.json +13 -3
package/schemas/coverage_matrix.schema.json +14 -17
package/schemas/critical_flows.schema.json +6 -3
package/schemas/external_analyzer_results.schema.json +10 -4
package/schemas/file_disposition.schema.json +33 -33
package/schemas/finding.schema.json +86 -62
package/schemas/flow_coverage.schema.json +53 -44
package/schemas/graph_bundle.schema.json +12 -6
package/schemas/merged_findings.schema.json +7 -2
package/schemas/risk_register.schema.json +5 -1
package/schemas/root_cause_clusters.schema.json +2 -5
package/schemas/runtime_validation_report.schema.json +34 -34
package/schemas/runtime_validation_tasks.schema.json +4 -1
package/schemas/surface_manifest.schema.json +4 -1
package/schemas/synthesis_report.schema.json +61 -61
package/schemas/unit_manifest.schema.json +10 -3
package/skills/audit-code/SKILL.md +37 -37
package/skills/audit-code/audit-code.prompt.md +54 -54

package/dist/validation/auditResults.js ADDED Viewed

@@ -0,0 +1,118 @@
+const REQUIRED_FINDING_FIELDS = [
+    "id",
+    "title",
+    "category",
+    "severity",
+    "confidence",
+    "lens",
+    "summary",
+];
+const VALID_SEVERITIES = new Set(["critical", "high", "medium", "low", "info"]);
+const VALID_CONFIDENCES = new Set(["high", "medium", "low"]);
+function validateFinding(finding, label, taskId, resultIndex) {
+    const issues = [];
+    for (const field of REQUIRED_FINDING_FIELDS) {
+        const value = finding[field];
+        if (value === undefined || value === null || String(value).trim() === "") {
+            issues.push({
+                result_index: resultIndex,
+                task_id: taskId,
+                severity: "error",
+                field: `${label}.${field}`,
+                message: `Required field '${field}' is missing or empty.`,
+            });
+        }
+    }
+    if (finding.severity && !VALID_SEVERITIES.has(finding.severity)) {
+        issues.push({
+            result_index: resultIndex,
+            task_id: taskId,
+            severity: "error",
+            field: `${label}.severity`,
+            message: `Invalid severity '${finding.severity}'. Must be one of: ${[...VALID_SEVERITIES].join(", ")}.`,
+        });
+    }
+    if (finding.confidence && !VALID_CONFIDENCES.has(finding.confidence)) {
+        issues.push({
+            result_index: resultIndex,
+            task_id: taskId,
+            severity: "error",
+            field: `${label}.confidence`,
+            message: `Invalid confidence '${finding.confidence}'. Must be one of: ${[...VALID_CONFIDENCES].join(", ")}.`,
+        });
+    }
+    if (!finding.affected_files || finding.affected_files.length === 0) {
+        issues.push({
+            result_index: resultIndex,
+            task_id: taskId,
+            severity: "error",
+            field: `${label}.affected_files`,
+            message: "affected_files is empty — at least one file location is required.",
+        });
+    }
+    else {
+        for (let k = 0; k < finding.affected_files.length; k++) {
+            const af = finding.affected_files[k];
+            if (!af.path?.trim()) {
+                issues.push({
+                    result_index: resultIndex,
+                    task_id: taskId,
+                    severity: "error",
+                    field: `${label}.affected_files[${k}].path`,
+                    message: "affected_files entry has an empty path.",
+                });
+            }
+        }
+    }
+    if (!finding.evidence || finding.evidence.length === 0) {
+        issues.push({
+            result_index: resultIndex,
+            task_id: taskId,
+            severity: "error",
+            field: `${label}.evidence`,
+            message: "evidence is empty — at least one quoted or referenced excerpt from the reviewed file is required for every finding.",
+        });
+    }
+    else {
+        const hasSubstantiveEntry = finding.evidence.some((e) => e.trim().length > 0);
+        if (!hasSubstantiveEntry) {
+            issues.push({
+                result_index: resultIndex,
+                task_id: taskId,
+                severity: "error",
+                field: `${label}.evidence`,
+                message: "All evidence entries are empty strings.",
+            });
+        }
+    }
+    return issues;
+}
+export function validateAuditResults(results, tasks) {
+    const issues = [];
+    const taskMap = new Map(tasks.map((t) => [t.task_id, t]));
+    for (let i = 0; i < results.length; i++) {
+        const result = results[i];
+        const taskId = result.task_id ?? `result[${i}]`;
+        if (!result.reviewed_ranges || result.reviewed_ranges.length === 0) {
+            issues.push({
+                result_index: i,
+                task_id: taskId,
+                severity: "error",
+                field: "reviewed_ranges",
+                message: "reviewed_ranges is empty — no proof of file reading was recorded. " +
+                    "Each result must include the line ranges actually read.",
+            });
+        }
+        for (let j = 0; j < (result.findings ?? []).length; j++) {
+            const finding = result.findings[j];
+            const label = `findings[${j}]`;
+            issues.push(...validateFinding(finding, label, taskId, i));
+        }
+    }
+    return issues;
+}
+export function formatAuditResultIssues(issues) {
+    return issues
+        .map((issue) => `  [${issue.severity}] ${issue.task_id} / ${issue.field}: ${issue.message}`)
+        .join("\n");
+}

package/docs/agent-integrations.md CHANGED Viewed

@@ -80,7 +80,7 @@ Run `audit-code install` from the target repository root, then open `.audit-code
 There is no verified project-local slash-command install surface for Claude Desktop in this repository today, so the intended path is:
-1. import `.audit-code/install/audit-code.prompt.md` into Claude Desktop's prompt or instruction surface
+1. import `.audit-code/install/audit-code.import.md` into Claude Desktop's prompt or instruction surface
 2. invoke `/audit-code` conversationally inside Claude Desktop
 ### Antigravity
@@ -89,7 +89,7 @@ Run `audit-code install` from the target repository root, then open `.audit-code
 There is no verified repo-local slash-command install surface for Antigravity in this repository today, so the intended path is:
-1. import `.audit-code/install/audit-code.prompt.md` into Antigravity's prompt or instruction surface when available
+1. import `.audit-code/install/audit-code.import.md` into Antigravity's prompt or instruction surface when available
 2. invoke `/audit-code` conversationally inside Antigravity
 3. fall back to `audit-code` from an Antigravity-managed terminal only when you intentionally need the repo-local backend wrapper
@@ -116,19 +116,19 @@ Use the backend wrapper only when you intentionally need the repo-local fallback
 It:
 - defaults artifacts to `<repo-root>/.audit-artifacts`
-- persists audit continuity there
-- calls `run-to-completion` by default
-- creates fresh worker runs behind the scenes
+- persists audit continuity there
+- calls `run-to-completion` by default
+- creates fresh worker runs behind the scenes
 - returns a stable top-level JSON contract with `contract_version: "audit-code/v1alpha1"`
 ## Minimal repo-local flow
 From the target repository root:
-```bash
-audit-code
-```
+```bash
+audit-code
+```
 Inspect the returned JSON and continue invoking the same entrypoint until either:
 - `next_likely_step === null`
@@ -136,8 +136,10 @@ Inspect the returned JSON and continue invoking the same entrypoint until either
 Terminal interpretation:
 - `audit_state.status === "complete"` means the audit finished end to end.
-- `audit_state.status === "blocked"` means the wrapper exhausted automatic work and the remaining review needs imported results or an interactive provider.
+- `audit_state.status === "blocked"` means the wrapper exhausted automatic work and the remaining review still needs imported results or a provider-capable continuation path.
+When `provider` is configured as `claude-code`, `opencode`, `subprocess-template`, or `vscode-task`, the wrapper can now continue through audit-task review in the same invocation as long as that provider can write structured `AuditResult[]` output and hand control back to the bounded worker command.
 When additional evidence exists, pass it into the same wrapper:
 ```bash
@@ -159,31 +161,34 @@ Use when you want the supervisor to stay entirely local.
 This requires no external agent CLI. The supervisor launches fresh worker subprocesses that call the bounded `worker-run` entrypoint for deterministic stages.
 When the remaining work is genuinely audit-task review, `local-subprocess` stops in a terminal blocked handoff instead of pretending more automatic progress is available.
-This is the safest default backend when the repository is already available locally.
-### claude-code
-Use when Claude Code is installed and authenticated on the machine.
-The built-in adapter launches a fresh Claude Code print-mode session for each worker run.
-Recommended when you want the audit supervisor to delegate bounded tasks into Claude Code without manually driving each step.
-### opencode
-Use when OpenCode is installed and authenticated on the machine.
-The built-in adapter launches a fresh `opencode run ...` session for each worker run.
-Recommended when OpenCode is the preferred local agent surface.
-### subprocess-template
-Use when you need a generic bridge.
-This is the escape hatch for editors, launchers, or agent shells that do not yet have a dedicated provider adapter. The supervisor renders a templated command and executes it as a fresh worker run.
+This is the safest default backend when the repository is already available locally.
+### claude-code
+Use when Claude Code is installed and authenticated on the machine.
+The built-in adapter launches a fresh Claude Code print-mode session for each worker run.
+When audit-task review is pending, the provider prompt now asks Claude Code to write structured audit results and then hand back to the bounded worker command so the same wrapper invocation can continue.
+Recommended when you want the audit supervisor to delegate bounded tasks into Claude Code without manually driving each step.
+### opencode
+Use when OpenCode is installed and authenticated on the machine.
+The built-in adapter launches a fresh `opencode run ...` session for each worker run.
+When audit-task review is pending, the provider prompt now asks OpenCode to write structured audit results and then hand back to the bounded worker command so the same wrapper invocation can continue.
+Recommended when OpenCode is the preferred local agent surface.
+### subprocess-template
+Use when you need a generic bridge.
+This is the escape hatch for editors, launchers, or agent shells that do not yet have a dedicated provider adapter. The supervisor renders a templated command and executes it as a fresh worker run.
+For provider-assisted review stages, that bridge should write `task.audit_results_path` and then execute `task.worker_command`.
 ### vscode-task
 Use when you already have a repository-local or machine-local task bridge and want the supervisor to call that bridge through a command template.
@@ -193,11 +198,11 @@ Treat this as an advanced backend adapter rather than the default path.
 ### Claude Code
 Use the repo-local `audit-code` wrapper from the target repository root, or set `provider` to `claude-code` in `.audit-artifacts/session-config.json` so the supervisor delegates bounded worker runs into Claude Code.
-### OpenCode
-Use the same repo-local `audit-code` wrapper, or set `provider` to `opencode` so the supervisor delegates bounded worker runs into OpenCode.
+### OpenCode
+Use the same repo-local `audit-code` wrapper, or set `provider` to `opencode` so the supervisor delegates bounded worker runs into OpenCode.
 ### VS Code
 Run `audit-code install` once from the target repository root, then use `/audit-code` from chat.
@@ -205,11 +210,11 @@ Run `audit-code install` once from the target repository root, then use `/audit-
 The backend fallback is still available from the integrated terminal and should keep `local-subprocess` unless you specifically need a task bridge.
 If you already have a launcher or task surface that should own fresh worker windows, use `vscode-task` or `subprocess-template`.
-### Google Antigravity
-No dedicated Antigravity provider adapter is shipped today.
+### Google Antigravity
+No dedicated Antigravity provider adapter is shipped today.
 Current recommended usage is one of these:
 - use the skill-first conversational contract as the primary surface
@@ -218,14 +223,14 @@ Current recommended usage is one of these:
 - use `subprocess-template` if you have a reliable Antigravity-side launcher bridge
 That keeps the product usable in Antigravity now without pretending that a native adapter already exists.
-## Model-selection rule
-The product direction remains skill-first:
-- in conversation, use the active conversation model by default
-- for backend CLI delegation, let the chosen provider own its own model-selection behavior unless explicitly configured otherwise
+## Model-selection rule
+The product direction remains skill-first:
+- in conversation, use the active conversation model by default
+- for backend CLI delegation, let the chosen provider own its own model-selection behavior unless explicitly configured otherwise
 ## Practical recommendation
 For a polished operator experience today:
@@ -233,5 +238,5 @@ For a polished operator experience today:
 1. treat `/audit-code` as the canonical user-facing contract
 2. use `audit-code install` first, and fall back to `audit-code prompt-path` only for hosts that still require manual prompt import
 3. use `audit-code` as the repo-local backend fallback
-4. prefer `local-subprocess` unless you need interactive review to continue through agent tasks
+4. prefer `local-subprocess` unless you want interactive review to continue automatically through agent tasks
 5. use `subprocess-template` only when integrating a non-native editor or launcher surface

package/docs/agent-roles.md CHANGED Viewed

@@ -1,69 +1,69 @@
-# Agent roles
-## Principles
-Each agent should consume bounded artifacts and return structured outputs. Agents should not invent process rules.
-## Roles
-### intake-normalizer
-- validates repository intake artifacts
-- flags suspicious exclusions
-- confirms stack profile
-### structural-mapper
-- reviews extracted units, surfaces, and graph artifacts
-- resolves ambiguous file classifications
-- flags missing boundaries
-### blind-spot-mapper
-- identifies repo-specific blind spots tools may miss
-- flags hidden operational or security-critical surfaces
-- proposes additional lenses or dynamic checks
-### correctness-auditor
-- checks whether code behavior appears to match intent
-- focuses on edge cases, defaults, assumptions, and branch handling
-### architecture-auditor
-- inspects layering, boundaries, coupling, abstraction fit, and dependency direction
-### security-auditor
-- inspects trust boundaries, auth/authz, validation, secret handling, risky sinks, and exploitability
-### reliability-auditor
-- inspects retries, timeouts, idempotency, partial failures, crash consistency, and concurrency risk
-### performance-auditor
-- inspects hot paths, repeated work, query inefficiency, algorithmic issues, memory pressure, and scalability risk
-### data-integrity-auditor
-- inspects invariants, migrations, transactional boundaries, schema drift, consistency, and race conditions
-### test-auditor
-- inspects test adequacy, missing negative-path coverage, brittle tests, and false confidence
-### operability-auditor
-- inspects logging, metrics, tracing, debuggability, startup validation, and runtime observability
-### cross-cutting-auditor
-- audits repo-wide themes such as auth, retries, migrations, config validation, feature flags, and secrets flow
-### synthesizer
-- merges duplicate findings
-- clusters root causes
-- prioritizes fixes
-- identifies quick wins vs structural work
+# Agent roles
+## Principles
+Each agent should consume bounded artifacts and return structured outputs. Agents should not invent process rules.
+## Roles
+### intake-normalizer
+- validates repository intake artifacts
+- flags suspicious exclusions
+- confirms stack profile
+### structural-mapper
+- reviews extracted units, surfaces, and graph artifacts
+- resolves ambiguous file classifications
+- flags missing boundaries
+### blind-spot-mapper
+- identifies repo-specific blind spots tools may miss
+- flags hidden operational or security-critical surfaces
+- proposes additional lenses or dynamic checks
+### correctness-auditor
+- checks whether code behavior appears to match intent
+- focuses on edge cases, defaults, assumptions, and branch handling
+### architecture-auditor
+- inspects layering, boundaries, coupling, abstraction fit, and dependency direction
+### security-auditor
+- inspects trust boundaries, auth/authz, validation, secret handling, risky sinks, and exploitability
+### reliability-auditor
+- inspects retries, timeouts, idempotency, partial failures, crash consistency, and concurrency risk
+### performance-auditor
+- inspects hot paths, repeated work, query inefficiency, algorithmic issues, memory pressure, and scalability risk
+### data-integrity-auditor
+- inspects invariants, migrations, transactional boundaries, schema drift, consistency, and race conditions
+### test-auditor
+- inspects test adequacy, missing negative-path coverage, brittle tests, and false confidence
+### operability-auditor
+- inspects logging, metrics, tracing, debuggability, startup validation, and runtime observability
+### cross-cutting-auditor
+- audits repo-wide themes such as auth, retries, migrations, config validation, feature flags, and secrets flow
+### synthesizer
+- merges duplicate findings
+- clusters root causes
+- prioritizes fixes
+- identifies quick wins vs structural work

package/docs/architecture.md CHANGED Viewed

@@ -1,90 +1,90 @@
-# Architecture
-## Objective
-`auditor-lambda` is a portable code-auditing framework for arbitrary repositories. It separates deterministic extraction from bounded LLM judgment so that large or mixed-language codebases can be audited systematically.
-## Design principles
-1. Tool outputs first
-2. Artifact-driven orchestration
-3. Bounded LLM tasks
-4. Explicit coverage accounting
-5. Multi-pass review for critical code
-6. Strict schemas for interoperability
-## System layers
-### 1. Intake
-- file discovery
-- stack detection
-- ignore handling
-- normalization
-### 2. Extractors
-- service and package detection
-- route, job, command, workflow extraction
-- file bucketing
-- graph extraction
-### 3. Mechanical analyzers
-- lint
-- typecheck
-- tests
-- test coverage
-- secret scanning
-- dependency scanning
-- static security scanning
-- complexity and duplication metrics
-### 4. Orchestrator
-- builds audit units
-- assigns passes and lenses
-- chunks large files
-- tracks line coverage and pass overlap
-- requeues uncovered ranges
-### 5. LLM agents
-- ambiguous classification
-- blind-spot mapping
-- per-lens audits
-- cross-cutting audits
-- synthesis
-### 6. Validation
-- targeted runtime checks
-- startup/config probes
-- adversarial repros
-### 7. Reporting
-- normalized findings
-- coverage matrices
-- root-cause clustering
-- remediation planning
-## Core pipeline
-1. Intake and normalize repository
-2. Extract structure and graph artifacts
-3. Run mechanical analyzers
-4. Build audit units and risk register
-5. Run blind-spot mapping
-6. Run lens-based unit audits
-7. Run cross-cutting audits
-8. Run dynamic validation on targeted cases
-9. Verify file and line coverage
-10. Synthesize findings and remediation plan
-## Portability rules
-- Tool-specific collectors should write into tool-agnostic JSON artifacts.
-- LLM prompts should consume artifacts, not raw repos by default.
-- All review work should be attributable to files, line ranges, lenses, and passes.
-- Coverage gaps should be machine-detectable.
+# Architecture
+## Objective
+`auditor-lambda` is a portable code-auditing framework for arbitrary repositories. It separates deterministic extraction from bounded LLM judgment so that large or mixed-language codebases can be audited systematically.
+## Design principles
+1. Tool outputs first
+2. Artifact-driven orchestration
+3. Bounded LLM tasks
+4. Explicit coverage accounting
+5. Multi-pass review for critical code
+6. Strict schemas for interoperability
+## System layers
+### 1. Intake
+- file discovery
+- stack detection
+- ignore handling
+- normalization
+### 2. Extractors
+- service and package detection
+- route, job, command, workflow extraction
+- file bucketing
+- graph extraction
+### 3. Mechanical analyzers
+- lint
+- typecheck
+- tests
+- test coverage
+- secret scanning
+- dependency scanning
+- static security scanning
+- complexity and duplication metrics
+### 4. Orchestrator
+- builds audit units
+- assigns passes and lenses
+- chunks large files
+- tracks line coverage and pass overlap
+- requeues uncovered ranges
+### 5. LLM agents
+- ambiguous classification
+- blind-spot mapping
+- per-lens audits
+- cross-cutting audits
+- synthesis
+### 6. Validation
+- targeted runtime checks
+- startup/config probes
+- adversarial repros
+### 7. Reporting
+- normalized findings
+- coverage matrices
+- root-cause clustering
+- remediation planning
+## Core pipeline
+1. Intake and normalize repository
+2. Extract structure and graph artifacts
+3. Run mechanical analyzers
+4. Build audit units and risk register
+5. Run blind-spot mapping
+6. Run lens-based unit audits
+7. Run cross-cutting audits
+8. Run dynamic validation on targeted cases
+9. Verify file and line coverage
+10. Synthesize findings and remediation plan
+## Portability rules
+- Tool-specific collectors should write into tool-agnostic JSON artifacts.
+- LLM prompts should consume artifacts, not raw repos by default.
+- All review work should be attributable to files, line ranges, lenses, and passes.
+- Coverage gaps should be machine-detectable.