npm - auditor-lambda - Versions diffs - 0.3.12 → 0.3.14 - Mend

auditor-lambda 0.3.12 → 0.3.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (61) hide show

package/README.md +20 -24
package/audit-code-wrapper-lib.mjs +52 -53
package/dist/cli.js +43 -6
package/dist/coverage.js +3 -1
package/dist/extractors/disposition.js +8 -1
package/dist/extractors/graph.d.ts +3 -1
package/dist/extractors/graph.js +1147 -67
package/dist/extractors/graphManifestEdges.d.ts +14 -0
package/dist/extractors/graphManifestEdges.js +1158 -0
package/dist/extractors/graphPathUtils.d.ts +5 -0
package/dist/extractors/graphPathUtils.js +75 -0
package/dist/extractors/pathPatterns.d.ts +1 -0
package/dist/extractors/pathPatterns.js +3 -0
package/dist/io/artifacts.d.ts +10 -1
package/dist/io/artifacts.js +23 -3
package/dist/orchestrator/internalExecutors.d.ts +4 -0
package/dist/orchestrator/internalExecutors.js +35 -6
package/dist/orchestrator/reviewPackets.js +1003 -31
package/dist/orchestrator/syntaxResolutionExecutor.js +34 -0
package/dist/types/externalAnalyzer.d.ts +9 -0
package/dist/types/graph.d.ts +3 -0
package/dist/types/reviewPlanning.d.ts +39 -0
package/docs/contracts.md +215 -0
package/docs/development.md +210 -0
package/docs/handoff.md +204 -0
package/docs/history.md +40 -0
package/docs/operator-guide.md +189 -0
package/docs/product.md +185 -0
package/docs/release.md +131 -0
package/package.json +1 -1
package/schemas/audit_plan_metrics.schema.json +347 -0
package/schemas/external_analyzer_results.schema.json +35 -0
package/schemas/graph_bundle.schema.json +47 -2
package/schemas/review_packets.schema.json +160 -0
package/skills/audit-code/SKILL.md +7 -3
package/skills/audit-code/audit-code.prompt.md +4 -1
package/docs/agent-integrations.md +0 -317
package/docs/agent-roles.md +0 -69
package/docs/architecture.md +0 -90
package/docs/artifacts.md +0 -36
package/docs/bootstrap-install.md +0 -139
package/docs/contract.md +0 -54
package/docs/dispatch-implementation-plan.md +0 -302
package/docs/field-trial-bug-report.md +0 -237
package/docs/github-copilot.md +0 -66
package/docs/model-selection.md +0 -97
package/docs/next-steps.md +0 -202
package/docs/packaging.md +0 -120
package/docs/pipeline.md +0 -152
package/docs/product-direction.md +0 -154
package/docs/production-launch-bar.md +0 -92
package/docs/production-readiness.md +0 -58
package/docs/releasing.md +0 -145
package/docs/remediation-baseline.md +0 -75
package/docs/repo-layout.md +0 -30
package/docs/run-flow.md +0 -56
package/docs/session-config.md +0 -319
package/docs/supervisor.md +0 -100
package/docs/usage.md +0 -215
package/docs/windows-setup.md +0 -146
package/docs/workflow-refactor-brief.md +0 -124

package/dist/orchestrator/syntaxResolutionExecutor.js CHANGED Viewed

@@ -1,5 +1,36 @@
+import { existsSync, readFileSync } from "node:fs";
 import { join } from "node:path";
 import { resolveNodeTool, runFirstAvailableCommand } from "./localCommands.js";
+const ESLINT_CONFIG_FILES = [
+    "eslint.config.js",
+    "eslint.config.mjs",
+    "eslint.config.cjs",
+    "eslint.config.ts",
+    "eslint.config.mts",
+    "eslint.config.cts",
+    ".eslintrc",
+    ".eslintrc.js",
+    ".eslintrc.cjs",
+    ".eslintrc.json",
+    ".eslintrc.yml",
+    ".eslintrc.yaml",
+];
+function hasEslintConfig(root) {
+    if (ESLINT_CONFIG_FILES.some((file) => existsSync(join(root, file)))) {
+        return true;
+    }
+    const packageJsonPath = join(root, "package.json");
+    if (!existsSync(packageJsonPath)) {
+        return false;
+    }
+    try {
+        const packageJson = JSON.parse(readFileSync(packageJsonPath, "utf8"));
+        return packageJson.eslintConfig !== undefined;
+    }
+    catch {
+        return false;
+    }
+}
 function runTsc(root) {
     const results = [];
     const command = runFirstAvailableCommand(root, [
@@ -35,6 +66,9 @@ function runTsc(root) {
 }
 function runEslint(root) {
     const results = [];
+    if (!hasEslintConfig(root)) {
+        return results;
+    }
     const command = runFirstAvailableCommand(root, [
         ...resolveNodeTool(root, join("node_modules", "eslint", "bin", "eslint.js"), [".", "--ext", ".ts,.js,.tsx,.jsx", "--format", "json"], "eslint . --ext .ts,.js,.tsx,.jsx --format json"),
         {

package/dist/types/externalAnalyzer.d.ts CHANGED Viewed

@@ -11,9 +11,18 @@ export interface ExternalAnalyzerResultItem {
     /** Preserves the analyzer-native payload when consumers need original detail. */
     raw?: unknown;
 }
+/** A normalized analyzer hint that a bounded set of files belongs to a root. */
+export interface ExternalAnalyzerOwnershipRoot {
+    root: string;
+    paths: string[];
+    kind?: string;
+    confidence?: number;
+    reason?: string;
+}
 /** Imported analyzer output captured at a single generation time. */
 export interface ExternalAnalyzerResults {
     tool: string;
     generated_at?: string;
+    ownership_roots?: ExternalAnalyzerOwnershipRoot[];
     results: ExternalAnalyzerResultItem[];
 }

package/dist/types/graph.d.ts CHANGED Viewed

@@ -2,6 +2,9 @@ export interface GraphEdge {
     from: string;
     to: string;
     kind?: string;
+    direction?: "directed" | "undirected";
+    confidence?: number;
+    reason?: string;
 }
 export interface RouteEdge {
     path: string;

package/dist/types/reviewPlanning.d.ts CHANGED Viewed

@@ -1,4 +1,23 @@
 import type { AuditTask, Lens } from "../types.js";
+import type { GraphEdge } from "./graph.js";
+export interface ReviewPacketGraphEdge extends Pick<GraphEdge, "from" | "to" | "kind" | "confidence" | "reason"> {
+}
+export interface ReviewPacketQuality {
+    cohesion_score: number;
+    internal_edge_count: number;
+    boundary_edge_count: number;
+    unexplained_file_count: number;
+}
+export interface WeaklyExplainedPacketSample {
+    packet_id: string;
+    primary_gap: "missing_internal_edges" | "unexplained_files" | "partial_cohesion";
+    file_count: number;
+    sample_file_paths: string[];
+    cohesion_score: number;
+    internal_edge_count: number;
+    boundary_edge_count: number;
+    unexplained_file_count: number;
+}
 export interface ReviewPacket {
     packet_id: string;
     task_ids: string[];
@@ -10,6 +29,10 @@ export interface ReviewPacket {
     total_lines: number;
     priority: NonNullable<AuditTask["priority"]>;
     tags?: string[];
+    entrypoints?: string[];
+    key_edges?: ReviewPacketGraphEdge[];
+    boundary_files?: string[];
+    quality: ReviewPacketQuality;
     rationale: string;
     estimated_tokens: number;
 }
@@ -32,6 +55,22 @@ export interface AuditPlanMetrics {
     largest_packet_id?: string;
     lens_task_counts: Partial<Record<Lens, number>>;
     priority_task_counts: Record<NonNullable<AuditTask["priority"]>, number>;
+    packet_quality: {
+        average_cohesion_score: number;
+        boundary_crossing_count: number;
+        merge_edge_kind_counts: Record<string, number>;
+        boundary_edge_kind_counts: Record<string, number>;
+        orphan_task_count: number;
+        high_fan_in_file_count: number;
+        high_fan_out_file_count: number;
+        weakly_explained_gap_counts: Record<WeaklyExplainedPacketSample["primary_gap"], number>;
+        weakly_explained_file_extension_counts: Record<string, number>;
+        weakly_explained_packet_count: number;
+        weakly_explained_packet_ids: string[];
+        weakly_explained_packet_samples: WeaklyExplainedPacketSample[];
+        largest_unexplained_packet_id?: string;
+        largest_unexplained_packet_files: number;
+    };
     packet_size: {
         single_task_packets: number;
         multi_task_packets: number;

package/docs/contracts.md ADDED Viewed

@@ -0,0 +1,215 @@
+# Contracts
+## Versioned surfaces
+The public contract is schema-first. Hosts, workers, prompts, and examples
+should point at schemas and validated examples instead of duplicating fragile
+field descriptions.
+Important schemas live under `schemas/`, including:
+- `audit-code-v1alpha1.schema.json`
+- `audit_result.schema.json`
+- `audit_task.schema.json`
+- `audit_plan_metrics.schema.json`
+- `graph_bundle.schema.json`
+- `review_packets.schema.json`
+- `runtime_validation_update.example.json` in `examples/`
+## Wrapper envelope
+Until completion, `audit-code` returns a JSON envelope with:
+- `contract_version`
+- `audit_state`
+- `selected_obligation`
+- `selected_executor`
+- `progress_made`
+- `artifacts_written`
+- `progress_summary`
+- `next_likely_step`
+- `handoff`
+On completion, the canonical output is repo-root `audit-report.md`. Intermediate
+`.audit-artifacts/` state is cleaned up when the completed report is promoted.
+## AuditResult
+Workers submit `AuditResult[]` shaped by
+`schemas/audit_result.schema.json`.
+Important rules:
+- `task_id`, `unit_id`, `pass_id`, and `lens` must match the assigned task
+- every finding lens must match the assigned task lens
+- `file_coverage` is required and must include assigned files only
+- `file_coverage[].total_lines` must match the current file line count
+- finding `affected_files` entries must be objects, not strings
+- finding `evidence` must be an array of plain strings
+- lens steward tasks emit `findings: []` plus `verification` metadata
+Validate before ingestion:
+```bash
+audit-code validate-results --results /path/to/results.json
+```
+## Artifact bundle
+The backend stores resumable artifacts under `.audit-artifacts/`, including:
+- `repo_manifest.json`
+- `file_disposition.json`
+- `surface_manifest.json`
+- `critical_flows.json`
+- `graph_bundle.json`
+- `unit_manifest.json`
+- `coverage_matrix.json`
+- `risk_register.json`
+- `audit_tasks.json`
+- `review_packets.json`
+- `audit_plan_metrics.json`
+- `audit_results.jsonl`
+- `runtime_validation_tasks.json`
+- `runtime_validation_report.json`
+- `synthesis_report.json`
+Consumers should treat these as versioned JSON artifacts and validate them with
+`audit-code validate` rather than inferring state from filenames alone.
+## Dispatch packets
+Packet dispatch preserves the existing `AuditTask` and `AuditResult`
+contracts. It changes the worker-facing unit of work.
+Planning artifacts are shaped by:
+- `schemas/review_packets.schema.json`
+- `schemas/audit_plan_metrics.schema.json`
+- `examples/review_packets.example.json`
+- `examples/audit_plan_metrics.example.json`
+Normal packet flow:
+```text
+audit-code prepare-dispatch --run-id <run_id> --artifacts-dir <artifacts_dir>
+conversation launches one worker per dispatch-plan entry
+worker reads entry.prompt_path
+worker submits AuditResult[] through submit-packet
+audit-code merge-and-ingest --run-id <run_id> --artifacts-dir <artifacts_dir>
+```
+Packet artifacts:
+- `<artifacts_dir>/runs/<run_id>/dispatch-plan.json`
+- `<artifacts_dir>/runs/<run_id>/dispatch-result-map.json`
+- `<artifacts_dir>/runs/<run_id>/task-results/*.prompt.md`
+- `<artifacts_dir>/runs/<run_id>/task-results/*.anchors.json` for isolated
+  large-file packets
+- `<artifacts_dir>/runs/<run_id>/task-results/*.json`
+- `<artifacts_dir>/runs/<run_id>/dispatch-warnings.json` when needed
+Workers should reply exactly:
+```text
+valid: <packet_id>, findings=<total finding count>
+```
+## Graph contract
+`graph_bundle.json` is language-neutral. Language-specific extractors may add
+metadata, but consumers should rely on shared edge concepts:
+- `from`
+- `to`
+- `kind`
+- optional `direction`
+- optional `confidence` from 0 to 1
+- optional `reason`
+Packet planning should use graph edges to explain why files belong together,
+not merely to merge every connected group. Weak or high-fan-in edges should
+become context hints rather than unlimited packet expansion.
+Current deterministic import edges include JS/TS import kinds and Python
+`python-import` / `python-from-import` edges when local modules resolve.
+Current deterministic reference edges also include package entrypoints, package
+script links, workspace/project module links, JSON Schema `$ref` links, schema
+contract test links, bounded JSON Schema suite links, bounded GitHub Actions
+workflow suite links, bounded package script suite links, bounded TypeScript
+type contract suite links, and deterministic test/source naming links.
+Consumers should treat graph evidence by authority:
+- deterministic directed edges may drive packet expansion when confidence,
+  budget, and fan-in/fan-out guards allow it
+- ownership edges may cluster small bounded groups, but should remain explainable
+  through `key_edges` and packet `quality`
+- semantic-affinity or NLP-style relationships, if added, should default to
+  low-authority context and candidate `boundary_files` unless corroborated by a
+  deterministic edge
+Bounded suite links are intentionally narrow: they connect small, same-directory
+contract suites such as `*.schema.json` files, `.github/workflows/*.yml`
+files, package-script-seeded `scripts/` files, or TypeScript files under
+`types/` directories without turning broad directory proximity into packet
+evidence.
+Analyzer-supplied ownership roots should use this same graph contract instead
+of requiring packet planners to understand a new language-specific artifact.
+Normalized `external_analyzer_results.json` may include `ownership_roots`;
+structure planning translates each bounded root/path membership into
+`analyzer-ownership-root-link` reference edges. Packet planning then consumes
+those edges through the same bounded `module-ownership-link` clustering path as
+project-file evidence.
+Planner metrics should make it possible to see which edge kinds changed packet
+grouping and which stayed context-only.
+`audit_plan_metrics.packet_quality` records that plan-level evidence through
+`merge_edge_kind_counts`, `boundary_edge_kind_counts`, and weak-packet
+diagnostics. Merge counts only include graph edges that joined distinct task
+groups in a final packet; boundary counts include concrete graph edges that
+remained adjacent context instead of internal packet evidence.
+`weakly_explained_gap_counts` summarizes the primary gap type across all weak
+packets, and `weakly_explained_file_extension_counts` summarizes the unique file
+extensions represented by those packets. `weakly_explained_packet_samples` adds
+a bounded snapshot of the weakest packet quality records, including sample file
+paths and the primary gap, so extractor work can be prioritized without scanning
+every packet first.
+Review packets may expose graph-derived context for workers:
+- `entrypoints` for route or handler context inside the packet
+- `key_edges` for the strongest internal file relationships
+- `boundary_files` for adjacent files that should only be checked when evidence
+  genuinely crosses the packet
+- `quality` metrics for cohesion, internal edges, boundary edges, and
+  unexplained files
+## MCP contract
+The local MCP server exposes:
+- `start_audit`
+- `get_status`
+- `continue_audit`
+- `explain_task`
+- `validate_artifacts`
+- `import_results`
+- `import_runtime_updates`
+It also exposes resources for current artifacts, operator handoff, install
+guidance, and the current report. MCP consumers should prefer the tool and
+resource contracts over reading internal files directly.
+## Guided recovery
+Failure responses should distinguish:
+- rerun the same command
+- import these result/update files
+- fix session config
+- retry a worker submission after schema validation errors
+- perform manual semantic review
+Malformed results, invalid config, stale artifacts, and provider failures
+should include field-level or action-level remediation whenever possible.

package/docs/development.md ADDED Viewed

@@ -0,0 +1,210 @@
+# Development
+## Repository layout
+- `src/`: implementation code
+- `schemas/`: JSON schemas for public and internal artifacts
+- `examples/`: validated artifact examples
+- `skills/audit-code/`: canonical prompt and skill-facing instructions
+- `dispatch/`: packet-dispatch support data
+- `tests/`: Node test suite and regression coverage
+- `dist/`: checked-in compiled runtime used by packaged installs
+## Agent handoff
+Use `docs/handoff.md` as the current pickup note for the next implementation
+agent. It should name the latest completed slice, verification status, files
+touched, and the most practical next steps. Keep long-term product direction in
+`docs/product.md`; keep transient implementation pickup notes in the handoff.
+## Build and test
+```bash
+npm install
+npm run check
+npm test
+npm run verify:release
+```
+The test suite is intentionally contract-heavy. Update tests when changing
+schema shape, prompt contracts, dispatch behavior, installer output, or release
+workflow semantics.
+## Production-readiness workflow
+Use field trials to decide what to fix next. For each representative repository,
+run to the local review handoff, validate the artifact bundle, and compare
+`audit_plan_metrics.json` across runs. Track at least packet count, weak packet
+count, average cohesion, `merge_edge_kind_counts`,
+`boundary_edge_kind_counts`, and `weakly_explained_packet_samples`.
+Only promote an extractor or planner change when those metrics expose a
+deterministic gap. Prefer improving shared graph resolution or importing
+generic analyzer ownership roots before adding another ecosystem-specific
+manifest parser.
+The latest remediator field trial closed the remaining mixed code/schema/test
+weak packet by adding package script links, schema contract test links, bounded
+TypeScript type contract suites, package-script-seeded script suite links, and
+generated test artifact disposition. Keep future suite links similarly bounded
+and evidence-led.
+The Polar field trial added `conftest-link` (conftest.py → Python files in
+scope) and `pyproject-testpaths-link` (pyproject.toml → conftest.py via
+`[tool.pytest.ini_options] testpaths`). `conftest-link` fires only when the
+conftest is inside a `isTestPath` directory to avoid O(n) fan-out from
+root-level conftests. `pyproject.toml` was also added to `shouldReadForGraph`
+so its content is available during the filesystem-backed build path. Together
+these raised Polar's average cohesion from 0.625 to 0.857 and reduced weak
+packets from 5 to 3.
+A second Polar field trial added `yaml-path-reference-link` (YAML/YML files
+→ other config files referenced by explicit relative path). Resolution tries
+repo-root-relative first, then file-directory-relative. The extractor only
+fires for string values ending in `.yaml`, `.yml`, `.json`, or `.toml` that
+resolve to an existing repo file. In Polar, this produced 4 edges from
+`configs/benchmark.yaml` to its template files and raised `internal_edge_count`
+in the `experiments-domains` packet from 90 to 94.
+A third Polar field trial added `python-test-util-suite-link`, which chains
+`.py` files co-located in `utils/`, `helpers/`, or `support/` subdirectories
+within `isTestPath` directories (same bounded-suite pattern as the TypeScript
+type, JSON schema, and package-script suite links). `conftest.py` is excluded
+from the predicate. In Polar, this produced 2 intra-unit edges within the
+`tests-utils` packet, raising its `internal_edge_count` from 0 to 2 and
+eliminating it as a weak packet. Polar metrics improved from 0.857 to 1.000
+cohesion and 3 to 2 weak packets. The 2 remaining weak packets share genuinely
+isolated files (`.auditorignore`, `experiments/domains/__init__.py`,
+`experiments/summarize_results.py`) that cannot be linked without false
+positives; treat as the current floor. Note that intra-unit suite edges do not
+appear in `merge_edge_kind_counts` — their effect is visible in the packet's
+`internal_edge_count` and `unexplained_file_count` fields instead.
+Before treating a build as production-ready, verify the complete review loop in
+one real host:
+```text
+audit-code prepare-dispatch --run-id <run_id> --artifacts-dir <artifacts_dir>
+worker reviews each packet prompt
+audit-code submit-packet ...
+audit-code merge-and-ingest --run-id <run_id> --artifacts-dir <artifacts_dir>
+audit-code validate
+```
+On Windows, runtime validation runs package-manager shim commands such as
+`npm`, `npx`, `pnpm`, and `yarn` through the command shell so `.cmd` wrappers
+execute reliably. Keep that behavior covered when changing runtime command
+execution.
+If the final `audit-report.md` cannot be copied into the target repository
+because of local permissions, completion should remain successful and the
+artifact copy remains authoritative. Run `audit-code validate` against the
+artifact bundle before treating the run as complete.
+Then run `npm run verify:release` from a clean checkout.
+## Architecture
+The system separates deterministic extraction from bounded LLM judgment:
+1. intake and file disposition
+2. surface, flow, graph, unit, risk, and coverage artifacts
+3. audit task planning
+4. review packet construction
+5. semantic review through the active conversation or fallback provider bridge
+6. result ingestion, selective deepening, runtime validation, and synthesis
+7. final `audit-report.md`
+Portability rules:
+- tool-specific collectors write tool-agnostic JSON
+- prompts consume artifacts and bounded source context
+- review work is attributable to files, lenses, passes, and tasks
+- coverage gaps are machine-detectable
+## Adding language analyzers
+Language support should be adapter-based. A new analyzer should enrich shared
+artifacts instead of inventing a language-specific planning path.
+Avoid adding another bespoke manifest or project-file parser by default. First
+ask whether the signal is common in expected repositories, whether it provides
+direction or ownership that path heuristics cannot, and whether the same value
+could come from a generic analyzer-supplied ownership hint.
+Preferred outputs:
+- graph edges with kind, direction, confidence, and reason
+- entrypoints and surfaces
+- test-to-source links
+- package/module ownership hints, including analyzer-supplied
+  `ownership_roots` that become `analyzer-ownership-root-link` graph references
+- contract-suite links for small JSON Schema, workflow, package script, or
+  TypeScript type suites when planner metrics show otherwise weak packets
+- external boundary hints
+- line counts and anchor summaries for large files
+Current analyzer priorities:
+- planner observability before additional ecosystem breadth
+- exercising the generic ownership-root input from analyzers or imported
+  evidence
+- continued behavior-preserving extraction of high-concentration graph helpers
+- JS/TS compiler-backed resolution only after the current regex edges stay
+  stable
+- Python deterministic support beyond the current local import, package/module,
+  and pytest/unittest adjacency edges only where planner metrics show gaps
+- generic fallback from path patterns, ctags/tree-sitter, LSP output, or
+  external analyzer results when available
+Keep deep analyzers optional. Repositories should still produce useful packets
+from manifests, paths, tests, and external analyzer results when a language has
+only fallback support.
+Command-backed analyzers should prove project intent before running. Prefer
+repo-local config checks, such as `eslint.config.*`, `.eslintrc*`, or
+`package.json` `eslintConfig`, over executing a globally installed tool and
+parsing its no-config failure.
+Language-agnostic semantic affinity can be useful for ranking adjacent context,
+but it should be low authority. Do not let shared token frequency alone force
+packet merges; use it for `boundary_files` or candidate explanations unless a
+deterministic edge corroborates the relationship.
+## Packetization work
+The current packetizer groups tasks across lenses and merges graph-connected
+task groups within line budgets. Plan metrics now record which graph edge kinds
+caused packet merges, which candidate edge kinds stayed as boundary context,
+and which packets remain weakly explained. Weak-packet diagnostics aggregate
+primary gap counts and unique file-extension counts, while bounded samples
+include representative file paths. Together those metrics let real or fixture
+runs point at the next deterministic extractor or analyzer-ownership
+improvement. The next phase is consolidation and carefully chosen deterministic
+depth:
+- use packet-quality observations to prioritize extractor gaps
+- keep manifest/project-file edge extraction isolated from packet planning code
+- use the generic ownership-root contract before adding more ecosystem-specific
+  module formats
+- keep bounded suite edges as contract evidence, not as a generic
+  same-directory merge rule
+- exercise the Python import, package layout, and test/source edges against
+  fixture and real repositories before adding deeper Python framework handling
+Keep `AuditTask` as the coverage identity and `AuditResult[]` as the ingestion
+contract.
+## File-splitting priorities
+The largest implementation files should be split conservatively and
+behavior-preservingly:
+- move CLI command families out of `src/cli.ts`
+- move language metadata tables out of file inventory logic
+- move graph manifest/project-file parsers out of `src/extractors/graph.ts`
+- split selective-deepening task builders by trigger type
+- keep packetization, recovery, and schema changes easier to review
+Run the focused tests for each area before and after a split, then run
+`npm test`.