npm - sentinelayer-cli - Versions diffs - 0.8.0 → 0.8.1 - Mend

sentinelayer-cli 0.8.0 → 0.8.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (153) hide show

package/README.md +13 -0
package/package.json +4 -4
package/src/agents/ai-governance/index.js +12 -0
package/src/agents/ai-governance/tools/base.js +171 -0
package/src/agents/ai-governance/tools/eval-regression.js +47 -0
package/src/agents/ai-governance/tools/hitl-audit.js +81 -0
package/src/agents/ai-governance/tools/index.js +52 -0
package/src/agents/ai-governance/tools/prompt-drift.js +42 -0
package/src/agents/ai-governance/tools/provenance-check.js +69 -0
package/src/agents/backend/index.js +12 -0
package/src/agents/backend/tools/base.js +189 -0
package/src/agents/backend/tools/circuit-breaker-check.js +123 -0
package/src/agents/backend/tools/idempotency-audit.js +105 -0
package/src/agents/backend/tools/index.js +87 -0
package/src/agents/backend/tools/retry-audit.js +132 -0
package/src/agents/backend/tools/timeout-audit.js +144 -0
package/src/agents/code-quality/index.js +12 -0
package/src/agents/code-quality/tools/base.js +159 -0
package/src/agents/code-quality/tools/complexity-measure.js +197 -0
package/src/agents/code-quality/tools/coupling-analysis.js +81 -0
package/src/agents/code-quality/tools/cycle-detect.js +49 -0
package/src/agents/code-quality/tools/dep-graph.js +196 -0
package/src/agents/code-quality/tools/index.js +89 -0
package/src/agents/data-layer/index.js +12 -0
package/src/agents/data-layer/tools/base.js +181 -0
package/src/agents/data-layer/tools/index-audit.js +165 -0
package/src/agents/data-layer/tools/index.js +83 -0
package/src/agents/data-layer/tools/migration-scan.js +135 -0
package/src/agents/data-layer/tools/query-explain.js +120 -0
package/src/agents/data-layer/tools/tenancy-scan.js +166 -0
package/src/agents/documentation/index.js +12 -0
package/src/agents/documentation/tools/api-diff.js +91 -0
package/src/agents/documentation/tools/base.js +151 -0
package/src/agents/documentation/tools/dead-link-check.js +58 -0
package/src/agents/documentation/tools/docstring-coverage.js +78 -0
package/src/agents/documentation/tools/index.js +52 -0
package/src/agents/documentation/tools/readme-freshness.js +61 -0
package/src/agents/envelope/fix-cycle.js +45 -0
package/src/agents/envelope/index.js +31 -0
package/src/agents/envelope/loop.js +150 -0
package/src/agents/envelope/pulse.js +18 -0
package/src/agents/envelope/stream.js +40 -0
package/src/agents/infrastructure/index.js +12 -0
package/src/agents/infrastructure/tools/base.js +171 -0
package/src/agents/infrastructure/tools/checkov-run.js +32 -0
package/src/agents/infrastructure/tools/drift-detect.js +59 -0
package/src/agents/infrastructure/tools/iam-least-priv-check.js +78 -0
package/src/agents/infrastructure/tools/index.js +52 -0
package/src/agents/infrastructure/tools/tflint-run.js +31 -0
package/src/agents/jules/loop.js +7 -4
package/src/agents/jules/swarm/sub-agent.js +5 -1
package/src/agents/jules/tools/auth-audit.js +10 -1
package/src/agents/mode.js +113 -0
package/src/agents/observability/index.js +12 -0
package/src/agents/observability/tools/alert-audit.js +39 -0
package/src/agents/observability/tools/base.js +181 -0
package/src/agents/observability/tools/dashboard-gap.js +42 -0
package/src/agents/observability/tools/index.js +54 -0
package/src/agents/observability/tools/log-schema-check.js +74 -0
package/src/agents/observability/tools/span-coverage.js +74 -0
package/src/agents/persona-visuals.js +38 -0
package/src/agents/release/index.js +12 -0
package/src/agents/release/tools/base.js +181 -0
package/src/agents/release/tools/changelog-diff.js +86 -0
package/src/agents/release/tools/feature-flag-audit.js +126 -0
package/src/agents/release/tools/index.js +61 -0
package/src/agents/release/tools/rollback-verify.js +129 -0
package/src/agents/release/tools/semver-check.js +109 -0
package/src/agents/reliability/index.js +12 -0
package/src/agents/reliability/tools/backpressure-check.js +129 -0
package/src/agents/reliability/tools/base.js +181 -0
package/src/agents/reliability/tools/chaos-probe.js +109 -0
package/src/agents/reliability/tools/graceful-degradation-check.js +114 -0
package/src/agents/reliability/tools/health-check-audit.js +111 -0
package/src/agents/reliability/tools/index.js +87 -0
package/src/agents/run-persona.js +109 -0
package/src/agents/security/index.js +12 -0
package/src/agents/security/tools/authz-audit.js +134 -0
package/src/agents/security/tools/base.js +190 -0
package/src/agents/security/tools/crypto-review.js +175 -0
package/src/agents/security/tools/index.js +97 -0
package/src/agents/security/tools/sast-scan.js +175 -0
package/src/agents/security/tools/secrets-scan.js +216 -0
package/src/agents/supply-chain/index.js +12 -0
package/src/agents/supply-chain/tools/attestation-check.js +42 -0
package/src/agents/supply-chain/tools/base.js +151 -0
package/src/agents/supply-chain/tools/index.js +52 -0
package/src/agents/supply-chain/tools/lockfile-integrity.js +73 -0
package/src/agents/supply-chain/tools/package-verify.js +56 -0
package/src/agents/supply-chain/tools/sbom-diff.js +34 -0
package/src/agents/testing/index.js +12 -0
package/src/agents/testing/tools/base.js +202 -0
package/src/agents/testing/tools/coverage-gap.js +144 -0
package/src/agents/testing/tools/flake-detect.js +125 -0
package/src/agents/testing/tools/index.js +85 -0
package/src/agents/testing/tools/mutation-test.js +143 -0
package/src/agents/testing/tools/snapshot-diff.js +103 -0
package/src/auth/gate.js +65 -37
package/src/cli.js +1 -1
package/src/commands/chat.js +3 -10
package/src/commands/legacy-args.js +10 -0
package/src/commands/omargate.js +36 -2
package/src/commands/persona.js +46 -1
package/src/commands/scan.js +3 -10
package/src/commands/session.js +654 -6
package/src/commands/spec.js +3 -10
package/src/coord/events-log.js +141 -0
package/src/coord/handshake.js +719 -0
package/src/coord/index.js +35 -0
package/src/coord/paths.js +84 -0
package/src/coord/priority.js +62 -0
package/src/coord/tarjan.js +157 -0
package/src/cost/tokenizer.js +160 -0
package/src/cost/tracker.js +61 -0
package/src/daemon/artifact-lineage.js +362 -0
package/src/daemon/assignment-ledger.js +117 -0
package/src/daemon/ast-drift.js +496 -0
package/src/daemon/ingest-refresh.js +69 -2
package/src/ingest/engine.js +15 -0
package/src/ingest/ownership.js +380 -0
package/src/legacy-cli.js +68 -1
package/src/orchestrator/kai-chen.js +126 -0
package/src/review/ai-review.js +3 -10
package/src/review/compliance-pack.js +389 -0
package/src/review/investor-dd-config.js +54 -0
package/src/review/investor-dd-file-loop.js +303 -0
package/src/review/investor-dd-file-router.js +406 -0
package/src/review/investor-dd-html-report.js +233 -0
package/src/review/investor-dd-notification.js +120 -0
package/src/review/investor-dd-orchestrator.js +405 -0
package/src/review/investor-dd-persona-runner.js +275 -0
package/src/review/live-validator.js +253 -0
package/src/review/omargate-orchestrator.js +90 -2
package/src/review/persona-prompts.js +244 -56
package/src/review/reconciliation-rules.js +329 -0
package/src/review/reproducibility-chain.js +136 -0
package/src/review/scan-modes.js +102 -3
package/src/session/agent-registry.js +7 -0
package/src/session/analytics.js +479 -0
package/src/session/daemon.js +609 -14
package/src/session/file-locks.js +666 -0
package/src/session/paths.js +4 -0
package/src/session/recap.js +567 -0
package/src/session/redact.js +82 -0
package/src/session/runtime-bridge.js +24 -1
package/src/session/scoring.js +406 -0
package/src/session/setup-guides.js +304 -0
package/src/session/store.js +318 -2
package/src/session/stream.js +9 -1
package/src/session/sync.js +753 -0
package/src/session/tasks.js +1054 -0
package/src/session/templates.js +188 -0
package/src/swarm/runtime.js +1 -8

package/src/agents/testing/tools/coverage-gap.js ADDED Viewed

@@ -0,0 +1,144 @@
+// coverage-gap — find source files without a matching test file (#A15).
+//
+// Zero-dep static pass: we don't try to read c8 / istanbul coverage JSON
+// (that lives in a later PR). Instead we use filename-convention matching —
+// for every `src/foo/bar.ts`, check whether any of the standard test file
+// names exists. Misses catches the most valuable 80% of coverage gaps while
+// staying fast and self-contained.
+import path from "node:path";
+import { createFinding, isTestFile, toPosix, walkRepoFiles } from "./base.js";
+const SOURCE_EXTENSIONS = new Set([
+  ".js",
+  ".jsx",
+  ".ts",
+  ".tsx",
+  ".mjs",
+  ".cjs",
+  ".py",
+]);
+// Generate plausible test-file locations for a source file. For
+// src/foo/bar.ts, try tests/foo/bar.test.ts, src/foo/bar.test.ts,
+// __tests__/foo/bar.test.ts, tests/foo/test_bar.py (Python), …
+function candidateTestPaths(sourceRelativePath) {
+  const posix = toPosix(sourceRelativePath);
+  const ext = path.extname(posix).toLowerCase();
+  const base = posix.slice(0, posix.length - ext.length);
+  const fileName = path.posix.basename(base);
+  const dir = path.posix.dirname(base);
+  const candidates = new Set();
+  if (ext === ".py") {
+    candidates.add(`${dir}/${fileName}_test.py`);
+    candidates.add(`${dir}/test_${fileName}.py`);
+    candidates.add(`tests/${dir}/${fileName}_test.py`);
+    candidates.add(`tests/${dir}/test_${fileName}.py`);
+  } else {
+    const testExts = [ext, `.test${ext}`];
+    for (const testExt of testExts) {
+      candidates.add(`${base}.test${ext}`);
+      candidates.add(`${base}.spec${ext}`);
+      candidates.add(`${dir}/__tests__/${fileName}.test${ext}`);
+      candidates.add(`${dir}/__tests__/${fileName}${ext}`);
+      candidates.add(`tests/${base}.test${ext}`);
+      candidates.add(`tests/${dir}/${fileName}.test${ext}`);
+      candidates.add(`test/${dir}/${fileName}.test${ext}`);
+      candidates.add(`test/${dir}/${fileName}.spec${ext}`);
+      // mjs test convention: tests/unit.{name}.test.mjs
+      candidates.add(`tests/unit.${fileName}.test.mjs`);
+      candidates.add(`tests/unit.${fileName}.test.js`);
+    }
+  }
+  return candidates;
+}
+function isLikelyEntryFile(relativePath) {
+  const p = toPosix(relativePath);
+  return (
+    /(^|\/)(index|main)\.[jt]sx?$/.test(p) ||
+    /(^|\/)(bin|scripts)\//.test(p) ||
+    /(^|\/)cli\.[jt]s$/.test(p)
+  );
+}
+function isLikelyConfig(relativePath) {
+  const p = toPosix(relativePath);
+  return (
+    /(^|\/)(config|constants|types?|schema|\.d\.ts)(\.[jt]sx?)?$/.test(p) ||
+    /\.d\.ts$/.test(p)
+  );
+}
+export async function runCoverageGap({ rootPath, files = null } = {}) {
+  const resolvedRoot = path.resolve(String(rootPath || "."));
+  // Pass 1: walk the repo once, collect source + test file lists.
+  const sourceFiles = [];
+  const testFiles = new Set();
+  const iterator =
+    Array.isArray(files) && files.length > 0
+      ? iterateExplicitFiles(resolvedRoot, files)
+      : walkRepoFiles({ rootPath: resolvedRoot, extensions: SOURCE_EXTENSIONS });
+  for await (const { relativePath } of iterator) {
+    if (isTestFile(relativePath)) {
+      testFiles.add(toPosix(relativePath));
+      continue;
+    }
+    sourceFiles.push(toPosix(relativePath));
+  }
+  const findings = [];
+  for (const source of sourceFiles) {
+    if (isLikelyEntryFile(source) || isLikelyConfig(source)) {
+      continue;
+    }
+    const candidates = candidateTestPaths(source);
+    const covered = Array.from(testFiles).some((test) => {
+      for (const candidate of candidates) {
+        if (test === candidate || test.endsWith(`/${path.posix.basename(candidate)}`)) {
+          return true;
+        }
+      }
+      return false;
+    });
+    if (covered) {
+      continue;
+    }
+    findings.push(
+      createFinding({
+        tool: "coverage-gap",
+        kind: "testing.coverage-gap",
+        severity: "P2",
+        file: source,
+        line: 1,
+        evidence: `No test file found for source: ${source}`,
+        rootCause:
+          "Source file has no corresponding test under standard naming conventions (`*.test.*`, `*.spec.*`, `test_*.py`, `__tests__/…`).",
+        recommendedFix:
+          "Add a unit test covering the file's exports, or add an explicit `.notest` marker / coverage-ignore annotation if this file is intentionally untested.",
+        confidence: 0.6,
+      })
+    );
+  }
+  return findings;
+}
+async function* iterateExplicitFiles(resolvedRoot, files) {
+  for (const file of files) {
+    const trimmed = String(file || "").trim();
+    if (!trimmed) {
+      continue;
+    }
+    const fullPath = path.isAbsolute(trimmed)
+      ? trimmed
+      : path.join(resolvedRoot, trimmed);
+    const relativePath = path
+      .relative(resolvedRoot, fullPath)
+      .replace(/\\/g, "/");
+    yield { fullPath, relativePath };
+  }
+}

package/src/agents/testing/tools/flake-detect.js ADDED Viewed

@@ -0,0 +1,125 @@
+// flake-detect — flag flakiness smells in test files (#A15).
+//
+// The usual suspects: tests that sleep, rely on wall-clock arithmetic, hit
+// the real network, or seed randomness without a fixed seed. We scan test
+// files specifically (the coverage-gap heuristic for "is this a test") so
+// the tool doesn't flag production code that legitimately uses setTimeout.
+import fsp from "node:fs/promises";
+import path from "node:path";
+import { createFinding, findLineMatches, getLineContent, isTestFile, toPosix, walkRepoFiles } from "./base.js";
+const TEST_EXTENSIONS = new Set([
+  ".js",
+  ".jsx",
+  ".ts",
+  ".tsx",
+  ".mjs",
+  ".cjs",
+  ".py",
+]);
+const RULES = [
+  {
+    id: "flake.sleep-in-test",
+    // setTimeout(fn, 500) or sleep(2) in a test file — schedule-based flake
+    pattern: /\b(?:setTimeout|setInterval|sleep|time\.sleep|asyncio\.sleep)\s*\(\s*(?:\w+\s*,\s*)?\d{3,}\s*[,)]/,
+    severity: "P2",
+    rootCause:
+      "Test sleeps for a fixed wall-clock duration — slow on CI, flaky on loaded machines.",
+    recommendedFix:
+      "Use fake timers (jest.useFakeTimers, vi.useFakeTimers, freezegun) or event-based waits (await page.waitForSelector / waitForResponse).",
+    confidence: 0.7,
+  },
+  {
+    id: "flake.wall-clock-assertion",
+    pattern: /expect\s*\(\s*(?:Date\.now\(\)|new\s+Date\(\)\.getTime\(\))\s*\)/,
+    severity: "P1",
+    rootCause:
+      "Assertion compares against the live wall clock — value drifts between runs.",
+    recommendedFix:
+      "Freeze time (jest.setSystemTime, vi.setSystemTime, freezegun) or pass a Date supplier the SUT reads from.",
+    confidence: 0.8,
+  },
+  {
+    id: "flake.unstubbed-network",
+    // fetch / axios / requests in a test file — likely reaching out to real
+    // network. Real-network hits are the #1 flake source.
+    pattern: /\b(?:fetch|axios(?:\.[a-z]+)?|got(?:\.[a-z]+)?|requests\.(?:get|post|put|patch|delete|request))\s*\(/,
+    severity: "P1",
+    rootCause:
+      "Test makes a live network call. Real-network tests flake on DNS / TLS / rate limits and make CI unreliable.",
+    recommendedFix:
+      "Mock the client with msw / nock / vcr-py, or inject an HTTP transport and pass a fake in tests.",
+    confidence: 0.65,
+  },
+  {
+    id: "flake.unseeded-random",
+    pattern: /\b(?:Math\.random|random\.(?:random|uniform|shuffle|choice))\s*\(/,
+    severity: "P2",
+    rootCause:
+      "Test uses unseeded randomness — two runs can take different branches and produce different results.",
+    recommendedFix:
+      "Seed the generator or pass a stub random() into the SUT via DI. For Jest / Vitest you can mock Math.random.",
+    confidence: 0.55,
+  },
+];
+export async function runFlakeDetect({ rootPath, files = null } = {}) {
+  const resolvedRoot = path.resolve(String(rootPath || "."));
+  const iterator =
+    Array.isArray(files) && files.length > 0
+      ? iterateExplicitFiles(resolvedRoot, files)
+      : walkRepoFiles({ rootPath: resolvedRoot, extensions: TEST_EXTENSIONS });
+  const findings = [];
+  for await (const { fullPath, relativePath } of iterator) {
+    const relPos = toPosix(relativePath);
+    if (!isTestFile(relPos)) {
+      continue;
+    }
+    let content;
+    try {
+      content = await fsp.readFile(fullPath, "utf-8");
+    } catch {
+      continue;
+    }
+    for (const rule of RULES) {
+      for (const match of findLineMatches(content, rule.pattern)) {
+        findings.push(
+          createFinding({
+            tool: "flake-detect",
+            kind: rule.id,
+            severity: rule.severity,
+            file: relPos,
+            line: match.line,
+            evidence: getLineContent(content, match.line),
+            rootCause: rule.rootCause,
+            recommendedFix: rule.recommendedFix,
+            confidence: rule.confidence,
+          })
+        );
+      }
+    }
+  }
+  return findings;
+}
+async function* iterateExplicitFiles(resolvedRoot, files) {
+  for (const file of files) {
+    const trimmed = String(file || "").trim();
+    if (!trimmed) {
+      continue;
+    }
+    const fullPath = path.isAbsolute(trimmed)
+      ? trimmed
+      : path.join(resolvedRoot, trimmed);
+    const relativePath = path
+      .relative(resolvedRoot, fullPath)
+      .replace(/\\/g, "/");
+    yield { fullPath, relativePath };
+  }
+}
+export { RULES as FLAKE_RULES };

package/src/agents/testing/tools/index.js ADDED Viewed

@@ -0,0 +1,85 @@
+// Priya (testing persona) domain-tool registry (#A15).
+import { runCoverageGap } from "./coverage-gap.js";
+import { runFlakeDetect } from "./flake-detect.js";
+import { runMutationTest } from "./mutation-test.js";
+import { runSnapshotDiff } from "./snapshot-diff.js";
+export const TESTING_TOOLS = Object.freeze({
+  "coverage-gap": {
+    id: "coverage-gap",
+    description:
+      "Walk the repo and flag source files that have no matching test file under standard naming conventions (*.test.*, *.spec.*, test_*.py, __tests__/…).",
+    schema: {
+      type: "object",
+      properties: {
+        rootPath: { type: "string" },
+        files: { type: "array", items: { type: "string" } },
+      },
+    },
+    handler: runCoverageGap,
+  },
+  "flake-detect": {
+    id: "flake-detect",
+    description:
+      "Scan test files for flakiness smells: fixed-duration sleeps, wall-clock assertions, live network calls (fetch / axios / requests), unseeded randomness.",
+    schema: {
+      type: "object",
+      properties: {
+        rootPath: { type: "string" },
+        files: { type: "array", items: { type: "string" } },
+      },
+    },
+    handler: runFlakeDetect,
+  },
+  "snapshot-diff": {
+    id: "snapshot-diff",
+    description:
+      "Walk *.snap / *.ambr files and flag stale (> 90 days untouched) or oversized (> 64 KiB) snapshots.",
+    schema: {
+      type: "object",
+      properties: {
+        rootPath: { type: "string" },
+        staleDays: { type: "number" },
+        files: { type: "array", items: { type: "string" } },
+      },
+    },
+    handler: runSnapshotDiff,
+  },
+  "mutation-test": {
+    id: "mutation-test",
+    description:
+      "Configuration-check pass: verify Stryker / mutmut is wired up and the latest mutation report is fresh (< 30 days).",
+    schema: {
+      type: "object",
+      properties: { rootPath: { type: "string" } },
+    },
+    handler: runMutationTest,
+  },
+});
+export const TESTING_TOOL_IDS = Object.freeze(Object.keys(TESTING_TOOLS));
+export async function dispatchTestingTool(toolId, args = {}) {
+  const tool = TESTING_TOOLS[toolId];
+  if (!tool) {
+    throw new Error(`Unknown testing tool: ${toolId}`);
+  }
+  return tool.handler(args);
+}
+export async function runAllTestingTools({ rootPath, files = null } = {}) {
+  const findings = [];
+  for (const toolId of TESTING_TOOL_IDS) {
+    const out = await dispatchTestingTool(toolId, { rootPath, files });
+    findings.push(...out);
+  }
+  return findings;
+}
+export {
+  runCoverageGap,
+  runFlakeDetect,
+  runMutationTest,
+  runSnapshotDiff,
+};

package/src/agents/testing/tools/mutation-test.js ADDED Viewed

@@ -0,0 +1,143 @@
+// mutation-test — check for mutation-testing configuration (#A15).
+//
+// Priya wants mutation testing (Stryker / pitest / mutmut) as the ceiling
+// signal: do the tests actually assert anything, or is coverage a green
+// but empty number? True mutation runs are expensive — this tool ships as
+// a configuration check first (is Stryker wired up? is there an up-to-date
+// report?). The LLM / operator can dispatch a real run from the resulting
+// advisory.
+import fsp from "node:fs/promises";
+import path from "node:path";
+import { createFinding, toPosix } from "./base.js";
+const CONFIG_CANDIDATES = [
+  "stryker.conf.js",
+  "stryker.conf.cjs",
+  "stryker.conf.mjs",
+  "stryker.config.json",
+  ".stryker-tmp",
+  "setup.cfg", // Python mutmut section
+  "mutmut_config.py",
+  "pyproject.toml", // check for [tool.mutmut]
+];
+const REPORT_CANDIDATES = [
+  "reports/mutation/mutation.html",
+  "reports/mutation/mutation.json",
+  "mutmut_results.json",
+];
+const REPORT_FRESH_DAYS = 30;
+async function fileExists(fullPath) {
+  try {
+    const stat = await fsp.stat(fullPath);
+    return { exists: true, mtimeMs: Number(stat.mtimeMs || 0) };
+  } catch {
+    return { exists: false };
+  }
+}
+async function readTextIfExists(fullPath) {
+  try {
+    return await fsp.readFile(fullPath, "utf-8");
+  } catch {
+    return "";
+  }
+}
+export async function runMutationTest({ rootPath } = {}) {
+  const resolvedRoot = path.resolve(String(rootPath || "."));
+  const findings = [];
+  // Config presence check
+  let configFound = false;
+  for (const candidate of CONFIG_CANDIDATES) {
+    const fullPath = path.join(resolvedRoot, candidate);
+    const result = await fileExists(fullPath);
+    if (result.exists) {
+      if (candidate === "pyproject.toml" || candidate === "setup.cfg") {
+        const text = await readTextIfExists(fullPath);
+        if (!/\[tool\.mutmut\]|\[mutmut\]/.test(text)) {
+          continue;
+        }
+      }
+      configFound = true;
+      break;
+    }
+  }
+  if (!configFound) {
+    findings.push(
+      createFinding({
+        tool: "mutation-test",
+        kind: "testing.no-mutation-config",
+        severity: "P3",
+        file: toPosix("pyproject.toml"),
+        line: 0,
+        evidence: "No Stryker / mutmut / pitest configuration file found.",
+        rootCause:
+          "Without mutation testing, the test suite's assertions could be vacuous — 90% line coverage means nothing if the tests don't fail when the code changes.",
+        recommendedFix:
+          "Wire up @stryker-mutator/core (JS/TS) or mutmut (Python). Start with a single critical module and let the score guide new tests.",
+        confidence: 0.5,
+      })
+    );
+    return findings;
+  }
+  // Report freshness check
+  let reportFound = false;
+  let latestReport = 0;
+  for (const candidate of REPORT_CANDIDATES) {
+    const fullPath = path.join(resolvedRoot, candidate);
+    const result = await fileExists(fullPath);
+    if (result.exists) {
+      reportFound = true;
+      latestReport = Math.max(latestReport, result.mtimeMs);
+    }
+  }
+  if (!reportFound) {
+    findings.push(
+      createFinding({
+        tool: "mutation-test",
+        kind: "testing.no-mutation-report",
+        severity: "P3",
+        file: toPosix("reports/mutation/"),
+        line: 0,
+        evidence: "Stryker / mutmut config present but no mutation report on disk.",
+        rootCause:
+          "Config without a report suggests mutation testing is configured but not actually run.",
+        recommendedFix:
+          "Wire a mutation run into CI on a cadence (weekly is reasonable) so drift in assertion quality is visible.",
+        confidence: 0.55,
+      })
+    );
+    return findings;
+  }
+  const ageDays = Math.floor((Date.now() - latestReport) / (24 * 60 * 60 * 1000));
+  if (ageDays > REPORT_FRESH_DAYS) {
+    findings.push(
+      createFinding({
+        tool: "mutation-test",
+        kind: "testing.mutation-report-stale",
+        severity: "P3",
+        file: toPosix("reports/mutation/"),
+        line: 0,
+        evidence: `Latest mutation report is ${ageDays} days old (threshold ${REPORT_FRESH_DAYS})`,
+        rootCause:
+          "Stale mutation reports mean we're not actually watching assertion quality — drift goes undetected until it matters.",
+        recommendedFix:
+          "Schedule a recurring mutation job (weekly) and file an issue auto-generated from the diff vs. the prior run.",
+        confidence: 0.55,
+      })
+    );
+  }
+  return findings;
+}
+export { CONFIG_CANDIDATES, REPORT_CANDIDATES, REPORT_FRESH_DAYS };

package/src/agents/testing/tools/snapshot-diff.js ADDED Viewed

@@ -0,0 +1,103 @@
+// snapshot-diff — flag stale / oversized / obsolete snapshots (#A15).
+//
+// We walk *.snap files (Jest) and *.raw.snap / *.ambr (Ariadne) and flag:
+//   1. Snapshots that haven't been touched in > STALE_DAYS days — stale
+//      values are a legitimate concern.
+//   2. Snapshots larger than LARGE_SIZE_BYTES — huge blobs are an anti-
+//      pattern (unreviewable diffs, hide regressions).
+//   3. Python doctest / pytest-snapshot *.ambr files that reference a
+//      stored block. Same staleness / size rules.
+//
+// We don't try to diff against the producing code — that's the job of the
+// test runner. We only flag maintenance smells.
+import path from "node:path";
+import { createFinding, toPosix, walkRepoFiles } from "./base.js";
+const SNAPSHOT_EXTENSIONS = new Set([
+  ".snap",
+  ".ambr",
+]);
+const STALE_DAYS = 90;
+const LARGE_SIZE_BYTES = 64 * 1024; // 64 KiB
+export async function runSnapshotDiff({ rootPath, files = null, staleDays = STALE_DAYS } = {}) {
+  const resolvedRoot = path.resolve(String(rootPath || "."));
+  const now = Date.now();
+  const staleThreshold = now - staleDays * 24 * 60 * 60 * 1000;
+  const iterator =
+    Array.isArray(files) && files.length > 0
+      ? iterateExplicitFiles(resolvedRoot, files)
+      : walkRepoFiles({ rootPath: resolvedRoot, extensions: SNAPSHOT_EXTENSIONS });
+  const findings = [];
+  for await (const { relativePath, stat } of iterator) {
+    const mtime = stat ? Number(stat.mtimeMs || 0) : 0;
+    const size = stat ? Number(stat.size || 0) : 0;
+    const rel = toPosix(relativePath);
+    if (mtime && mtime < staleThreshold) {
+      const days = Math.floor((now - mtime) / (24 * 60 * 60 * 1000));
+      findings.push(
+        createFinding({
+          tool: "snapshot-diff",
+          kind: "testing.snapshot-stale",
+          severity: "P3",
+          file: rel,
+          line: 0,
+          evidence: `Last modified ${days} days ago (threshold ${staleDays})`,
+          rootCause:
+            "Snapshot has been unchanged for longer than the staleness threshold — a stale snapshot can hide regressions silently.",
+          recommendedFix:
+            "Re-run the test suite with `--updateSnapshot` (or equivalent) after verifying the current output is actually correct. Delete if the underlying code has been removed.",
+          confidence: 0.5,
+        })
+      );
+    }
+    if (size > LARGE_SIZE_BYTES) {
+      findings.push(
+        createFinding({
+          tool: "snapshot-diff",
+          kind: "testing.snapshot-oversized",
+          severity: "P2",
+          file: rel,
+          line: 0,
+          evidence: `Snapshot is ${Math.round(size / 1024)} KiB (threshold ${Math.round(LARGE_SIZE_BYTES / 1024)} KiB)`,
+          rootCause:
+            "Oversized snapshots are unreviewable in PRs and hide meaningful regressions inside unrelated noise.",
+          recommendedFix:
+            "Split the snapshot into smaller focused tests, switch to a structural assertion, or mask non-essential fields (timestamps, IDs) before snapshotting.",
+          confidence: 0.7,
+        })
+      );
+    }
+  }
+  return findings;
+}
+async function* iterateExplicitFiles(resolvedRoot, files) {
+  const fsp = await import("node:fs/promises");
+  for (const file of files) {
+    const trimmed = String(file || "").trim();
+    if (!trimmed) {
+      continue;
+    }
+    const fullPath = path.isAbsolute(trimmed)
+      ? trimmed
+      : path.join(resolvedRoot, trimmed);
+    const relativePath = path
+      .relative(resolvedRoot, fullPath)
+      .replace(/\\/g, "/");
+    let stat = null;
+    try {
+      stat = await fsp.stat(fullPath);
+    } catch {
+      stat = null;
+    }
+    yield { fullPath, relativePath, stat };
+  }
+}
+export { LARGE_SIZE_BYTES, STALE_DAYS };