npm - atris - Versions diffs - 3.1.0 → 3.5.0 - Mend

atris 3.1.0 → 3.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (54) hide show

package/GETTING_STARTED.md +65 -131
package/README.md +29 -4
package/atris/GETTING_STARTED.md +65 -131
package/atris/PERSONA.md +5 -1
package/atris/atris.md +122 -153
package/atris/skills/aeo/SKILL.md +117 -0
package/atris/skills/atris/SKILL.md +49 -25
package/atris/skills/create-member/SKILL.md +29 -9
package/atris/skills/endgame/SKILL.md +9 -0
package/atris/skills/improve/SKILL.md +2 -2
package/atris/skills/research-search/SKILL.md +167 -0
package/atris/skills/research-search/arxiv_search.py +157 -0
package/atris/skills/research-search/program.md +48 -0
package/atris/skills/research-search/results.tsv +6 -0
package/atris/skills/research-search/scholar_search.py +154 -0
package/atris/skills/tidy/SKILL.md +36 -21
package/atris/team/_template/MEMBER.md +2 -0
package/atris/team/validator/MEMBER.md +35 -1
package/atris.md +118 -178
package/bin/atris.js +37 -6
package/cli/__pycache__/atris_code.cpython-314.pyc +0 -0
package/cli/__pycache__/runtime_guard.cpython-312.pyc +0 -0
package/cli/__pycache__/runtime_guard.cpython-314.pyc +0 -0
package/cli/atris_code.py +889 -0
package/cli/runtime_guard.py +693 -0
package/commands/align.js +15 -0
package/commands/app.js +316 -0
package/commands/autopilot.js +948 -42
package/commands/business.js +691 -11
package/commands/computer.js +1979 -43
package/commands/context-sync.js +5 -0
package/commands/experiments.js +1 -1
package/commands/lifecycle.js +12 -0
package/commands/plugin.js +24 -0
package/commands/pull.js +40 -1
package/commands/push.js +44 -0
package/commands/release.js +183 -0
package/commands/research.js +52 -0
package/commands/serve.js +1 -0
package/commands/sync.js +372 -87
package/commands/verify.js +53 -4
package/commands/wiki.js +71 -26
package/lib/file-ops.js +13 -1
package/lib/journal.js +23 -0
package/lib/reward-config.js +24 -0
package/lib/scorecard.js +58 -6
package/lib/sync-telemetry.js +59 -0
package/lib/todo.js +6 -0
package/lib/wiki.js +235 -60
package/package.json +4 -2
package/utils/api.js +19 -0
package/utils/auth.js +25 -1
package/utils/config.js +24 -0
package/utils/update-check.js +16 -0

package/commands/autopilot.js CHANGED Viewed

@@ -8,7 +8,7 @@
 const fs = require('fs');
 const path = require('path');
-const { execSync } = require('child_process');
+const { execSync, execFileSync, spawnSync } = require('child_process');
 const readline = require('readline');
 const { getLogPath, ensureLogDirectory, createLogFile } = require('../lib/journal');
 const { parseTodo } = require('../lib/todo');
@@ -19,6 +19,7 @@ const {
   writeScorecard,
   detectEndgameCompletion
 } = require('../lib/scorecard');
+const { REWARD_CONFIG, REWARD_CHECKSUM } = require('../lib/reward-config');
 const pkg = require('../package.json');
@@ -28,7 +29,7 @@ const PHASE_TIMEOUT = 600000; // 10 min per phase
  * Scan workspace for the next thing worth doing.
  * Returns { task, why, kind } or null.
  */
-async function suggestNextTask(cwd, skipped = new Set()) {
+async function suggestNextTask(cwd, skipped = new Set(), { auto = false } = {}) {
   const atrisDir = path.join(cwd, 'atris');
   const suggestions = [];
@@ -37,6 +38,7 @@ async function suggestNextTask(cwd, skipped = new Set()) {
   const todo = parseTodo(todoPath);
   for (const t of todo.backlog) {
+    if (t.tags && t.tags.includes('unverified')) continue;
     if (t.tag === 'endgame' && !skipped.has(t.title)) {
       suggestions.push({
         task: t.title,
@@ -51,7 +53,7 @@ async function suggestNextTask(cwd, skipped = new Set()) {
   // --- Resume interrupted work ---
   if (todo.inProgress.length > 0) {
     const t = todo.inProgress[0];
-    if (!skipped.has(t.title)) {
+    if (!(t.tags && t.tags.includes('unverified')) && !skipped.has(t.title)) {
       suggestions.push({
         task: t.title,
         why: `This was already started${t.claimed ? ` by ${t.claimed}` : ''} but never finished.`,
@@ -102,15 +104,17 @@ async function suggestNextTask(cwd, skipped = new Set()) {
   }
   // --- Backlog tasks ---
-  for (const t of todo.backlog.slice(0, 1)) {
+  for (const t of todo.backlog) {
+    if (t.tags && t.tags.includes('unverified')) continue;
     if (skipped.has(t.title)) continue;
-    const remaining = todo.backlog.length;
+    const remaining = todo.backlog.filter(b => !(b.tags && b.tags.includes('unverified'))).length;
     suggestions.push({
       task: t.title,
       why: `Next in the backlog${t.tag ? ` (${t.tag})` : ''}. ${remaining} task${remaining > 1 ? 's' : ''} waiting.`,
       kind: 'backlog',
       priority: 5
     });
+    break;
   }
   // --- Unprocessed inbox items ---
@@ -223,7 +227,64 @@ async function suggestNextTask(cwd, skipped = new Set()) {
   }
   suggestions.sort((a, b) => a.priority - b.priority);
-  return suggestions[0];
+  // Staleness gate: filter out unverified/stale suggestions
+  const staleSkipped = [];
+  const fresh = [];
+  for (const s of suggestions) {
+    const fakeTask = { title: s.task, tag: s.kind === 'endgame' ? 'endgame' : null, claimed: null };
+    if (s.kind === 'resume' && todo.inProgress.length > 0) {
+      fakeTask.claimed = todo.inProgress[0].claimed;
+    }
+    const age = getTaskAgeDays(fakeTask, todoPath);
+    const status = isStillTrue({ title: s.task, age, source: null }, cwd);
+    if (status === 'stale') {
+      staleSkipped.push({ task: s.task, status, reasoning: null });
+      continue;
+    }
+    if (status === 'unverified') {
+      if (auto) {
+        // Auto mode: use model check
+        const result = askModel({ title: s.task, age, source: null }, cwd);
+        if (!result.fresh) {
+          staleSkipped.push({ task: s.task, status: 'unverified (model: not fresh)', reasoning: result.reasoning });
+          continue;
+        }
+      } else {
+        // Interactive mode: ask the human
+        const result = await askHuman(s.task);
+        if (!result.fresh) {
+          staleSkipped.push({ task: s.task, status: 'unverified (human: not relevant)', reasoning: null });
+          continue;
+        }
+      }
+    }
+    fresh.push(s);
+  }
+  // Log skipped items to journal
+  if (staleSkipped.length > 0) {
+    try {
+      const { logFile } = getLogPath();
+      const now = new Date();
+      const hhmm = `${String(now.getHours()).padStart(2, '0')}:${String(now.getMinutes()).padStart(2, '0')}`;
+      const lines = staleSkipped.map(s => `- ${s.task} (${s.status})${s.reasoning ? ` — ${s.reasoning}` : ''}`);
+      const note = `\n### Staleness skip — ${hhmm}\n${lines.join('\n')}\n`;
+      if (fs.existsSync(logFile)) {
+        const content = fs.readFileSync(logFile, 'utf8');
+        const notesIdx = content.indexOf('## Notes');
+        if (notesIdx !== -1) {
+          const insertAt = content.indexOf('\n', notesIdx) + 1;
+          const updated = content.slice(0, insertAt) + note + content.slice(insertAt);
+          fs.writeFileSync(logFile, updated);
+        } else {
+          fs.appendFileSync(logFile, `\n## Notes\n${note}`);
+        }
+      }
+    } catch {}
+  }
+  return fresh[0] || null;
 }
 /**
@@ -242,6 +303,22 @@ function askApproval() {
   });
 }
+/**
+ * Ask the human whether an unverified task is still relevant.
+ * Interactive mode only — in auto mode, caller skips silently.
+ * Returns { fresh: boolean }.
+ */
+function askHuman(taskTitle) {
+  return new Promise((resolve) => {
+    const rl = readline.createInterface({ input: process.stdin, output: process.stdout });
+    rl.question(`  is "${taskTitle}" still relevant? y/n → `, (answer) => {
+      rl.close();
+      const a = (answer || '').trim().toLowerCase();
+      resolve({ fresh: a === 'y' || a === 'yes' });
+    });
+  });
+}
 /**
  * Run a phase via claude -p subprocess.
  */
@@ -555,35 +632,526 @@ function writeLesson(cwd, slug, status, explanation) {
   fs.writeFileSync(lessonsPath, content);
 }
+/**
+ * Record a tick's commit hash and verify command in atris/tick-registry.json.
+ * Each entry: { hash, verifyCmd, slug, timestamp }.
+ */
+function recordTickCommit(cwd, hash, verifyCmd, slug) {
+  const registryPath = path.join(cwd, 'atris', 'tick-registry.json');
+  let registry = [];
+  if (fs.existsSync(registryPath)) {
+    try { registry = JSON.parse(fs.readFileSync(registryPath, 'utf8')); } catch { registry = []; }
+  }
+  registry.push({ hash, verifyCmd, slug, timestamp: new Date().toISOString() });
+  fs.writeFileSync(registryPath, JSON.stringify(registry, null, 2) + '\n');
+}
+/**
+ * Retroactive regression check. Reads last 10 entries from tick-registry.json,
+ * re-runs each verify command at its original commit using git worktree,
+ * returns array of { hash, slug, pass }. On failure: writes a lesson with
+ * retroactive context.
+ */
+function regressionCheck(cwd) {
+  const registryPath = path.join(cwd, 'atris', 'tick-registry.json');
+  if (!fs.existsSync(registryPath)) return [];
+  let registry = [];
+  try { registry = JSON.parse(fs.readFileSync(registryPath, 'utf8')); } catch { return []; }
+  if (!Array.isArray(registry) || registry.length === 0) return [];
+  const entries = registry.slice(-10);
+  const results = [];
+  for (const entry of entries) {
+    if (!entry.hash || !entry.verifyCmd) {
+      results.push({ hash: entry.hash, slug: entry.slug, pass: true, skipped: true });
+      continue;
+    }
+    const worktreePath = path.join(cwd, '.regression-worktree-' + entry.hash.slice(0, 8));
+    let pass = false;
+    try {
+      // Create a worktree at the commit
+      execSync(`git worktree add "${worktreePath}" ${entry.hash} --detach 2>/dev/null`, { cwd, stdio: 'pipe' });
+      try {
+        execSync(entry.verifyCmd, { cwd: worktreePath, stdio: 'pipe', timeout: 60000 });
+        pass = true;
+      } catch {
+        pass = false;
+      }
+    } catch {
+      // If worktree creation fails (e.g., commit doesn't exist), skip
+      results.push({ hash: entry.hash, slug: entry.slug, pass: true, skipped: true });
+      continue;
+    } finally {
+      // Clean up worktree
+      try { execSync(`git worktree remove "${worktreePath}" --force 2>/dev/null`, { cwd, stdio: 'pipe' }); } catch {}
+    }
+    if (!pass) {
+      writeLesson(cwd, `regression-${entry.slug || 'unknown'}`, 'fail',
+        `Retroactive regression: verify command for tick ${entry.hash.slice(0, 7)} (${entry.slug}) now fails. -5 retroactive penalty applied.`);
+    }
+    results.push({ hash: entry.hash, slug: entry.slug, pass });
+  }
+  return results;
+}
 /**
  * Get the verify command for a task from TODO.md
  * Reads TODO.md, finds the task by title across active/completed sections,
  * and extracts the verify field.
- * Defaults to 'npm test' if no verify field found.
+ * Returns { cmd, explicit } — explicit is true only if the task has an explicit Verify field.
  */
 function getVerifyCommand(cwd, taskTitle) {
   const todoPath = path.join(cwd, 'atris', 'TODO.md');
-  if (!fs.existsSync(todoPath)) return 'npm test';
+  if (!fs.existsSync(todoPath)) return { cmd: null, explicit: false };
   const todo = parseTodo(todoPath);
   const task = [...todo.inProgress, ...todo.backlog, ...todo.completed]
     .find(t => t.title === taskTitle);
-  if (!task) return 'npm test';
-  if (task.verify) return task.verify;
-  return 'npm test';
+  if (!task || !task.verify) return { cmd: null, explicit: false };
+  return { cmd: task.verify, explicit: true };
+}
+/**
+ * Verify that computeTickReward has not been modified since ship time.
+ * Returns { ok, expected, actual }.
+ */
+function verifyJudgeIntegrity() {
+  const crypto = require('crypto');
+  const h = crypto.createHash('sha256');
+  h.update(JSON.stringify(REWARD_CONFIG));
+  h.update(computeTickReward.toString());
+  const actual = h.digest('hex');
+  return { ok: actual === REWARD_CHECKSUM, expected: REWARD_CHECKSUM, actual };
+}
+/**
+ * Build the validator's plan-review prompt. Fresh context — the validator
+ * reads the plan output and the contract fields as if it has never seen them.
+ */
+function buildPlanReviewPrompt(context, planOutput) {
+  const files = Array.isArray(context.files) && context.files.length
+    ? context.files.join(', ')
+    : 'none declared in context';
+  return `You are the validator in plan-review mode. You have NOT seen the planning context — read everything fresh.
+Task: "${context.task}"
+Kind: ${context.kind || 'unknown'}
+Files declared in context: ${files}
+Plan output from the navigator:
+---
+${planOutput || '(no plan output captured)'}
+---
+Read from disk:
+- atris/atris.md (the workspace protocol — operating rules and task shape)
+- atris/TODO.md (find this task; inspect Files, Exit, Verify, After, Rollback)
+- atris/lessons.md (recent failures — last 20 lines)
+Decide if the plan is safe to execute. Check:
+1. Verify points at a falsifiable rubric or test (not \`true\`, \`echo ok\`, or similar).
+   Prefer \`atris verify <slug> --section <name>\`.
+2. Files are explicitly declared (not empty, not vague).
+3. Rollback is named (commit, checkpoint, or \`git revert\`).
+4. The plan's claims match the declared Task fields.
+5. Nothing in lessons.md contradicts this plan.
+Output EXACTLY one of these two formats as the LAST thing in your response. No preamble before the verdict line.
+SIGNOFF: <one sentence on why the plan is safe>
+or
+REJECT: <one sentence on what is wrong>
+FIX: <one sentence on what must change>
+PROPOSED:
+  Files: <concrete path list, or omit this line if original is fine>
+  Exit: <sharp observable done condition, or omit this line if original is fine>
+  Verify: <falsifiable shell command, or omit this line if original is fine>
+  Rollback: <git revert <sha> or concrete checkpoint, or omit this line if original is fine>
+Be a drafting partner, not just a critic. When you REJECT, write the PROPOSED block as a concrete draft the human can accept as-is, edit, or reject. Include each PROPOSED line only for fields that need changing; skip a line if the original is correct. Omit the entire PROPOSED block only if the rejection is about scope or intent rather than a draftable field.
+`;
+}
+/**
+ * Parse the validator's verdict line(s) from their output. Returns one of:
+ *   { verdict: 'SIGNOFF', reason }
+ *   { verdict: 'REJECT', reason, fix }
+ * If neither format is present, treats it as a REJECT with a parse-fail reason.
+ */
+function parseVerdict(output) {
+  const text = String(output || '');
+  const rawLines = text.split('\n');
+  const lines = rawLines.map((l) => l.trim()).filter(Boolean);
+  // Scan from the end backwards — the verdict is supposed to be LAST.
+  for (let i = lines.length - 1; i >= 0; i--) {
+    const line = lines[i];
+    if (/^SIGNOFF\s*:/i.test(line)) {
+      return { verdict: 'SIGNOFF', reason: line.replace(/^SIGNOFF\s*:\s*/i, ''), fix: '', proposed: null };
+    }
+    if (/^REJECT\s*:/i.test(line)) {
+      const reason = line.replace(/^REJECT\s*:\s*/i, '');
+      // Fix line is usually immediately after REJECT.
+      const tail = lines.slice(i);
+      const fixLine = tail.find((l) => /^FIX\s*:/i.test(l));
+      const fix = fixLine ? fixLine.replace(/^FIX\s*:\s*/i, '') : '';
+      const proposed = parseProposedBlock(rawLines.slice(rawLines.findIndex((l) => /PROPOSED\s*:/i.test(l))));
+      return { verdict: 'REJECT', reason, fix, proposed };
+    }
+  }
+  return {
+    verdict: 'REJECT',
+    reason: 'validator output did not contain SIGNOFF or REJECT',
+    fix: 'ensure validator emits machine-parseable verdict as the last line',
+    proposed: null,
+  };
+}
+/**
+ * Parse the PROPOSED block: 4 optional indented fields (Files, Exit, Verify,
+ * Rollback). Returns null if no block, or an object with only the fields the
+ * validator chose to propose.
+ */
+function parseProposedBlock(lines) {
+  if (!lines || !lines.length || !/PROPOSED\s*:/i.test(lines[0] || '')) return null;
+  const proposed = {};
+  const fieldMatchers = {
+    files: /^\s*Files\s*:\s*(.+)$/i,
+    exit: /^\s*Exit\s*:\s*(.+)$/i,
+    verify: /^\s*Verify\s*:\s*(.+)$/i,
+    rollback: /^\s*Rollback\s*:\s*(.+)$/i,
+  };
+  for (let j = 1; j < lines.length; j++) {
+    const raw = lines[j];
+    // Stop at a blank line or a new top-level marker (no leading whitespace
+    // and a known verb). Keep scanning through indented lines.
+    if (/^\S/.test(raw) && !/^(Files|Exit|Verify|Rollback)\s*:/i.test(raw)) break;
+    for (const [key, matcher] of Object.entries(fieldMatchers)) {
+      const m = raw.match(matcher);
+      if (m) proposed[key] = m[1].trim();
+    }
+  }
+  return Object.keys(proposed).length ? proposed : null;
+}
+/**
+ * Default executor for plan-review: spawn a fresh claude -p call.
+ * Kept thin so tests can inject a stub via options.planReviewExec.
+ */
+function defaultPlanReviewExecutor(prompt, { cwd, timeout = 180000 } = {}) {
+  const tmpFile = path.join(cwd, '.autopilot-plan-review.tmp');
+  fs.writeFileSync(tmpFile, prompt);
+  try {
+    const cmd = `claude -p "$(cat '${tmpFile.replace(/'/g, "'\\''")}')" --allowedTools "Bash,Read,Grep,Glob"`;
+    const env = { ...process.env };
+    delete env.CLAUDECODE;
+    const output = execSync(cmd, {
+      cwd,
+      encoding: 'utf8',
+      timeout,
+      stdio: 'pipe',
+      maxBuffer: 10 * 1024 * 1024,
+      env,
+    });
+    return output || '';
+  } catch (err) {
+    if (err.stdout) return err.stdout;
+    throw err;
+  } finally {
+    try { fs.unlinkSync(tmpFile); } catch {}
+  }
+}
+/**
+ * Default executor for codex: spawn `codex` with the prompt via stdin.
+ * Users can override with ATRIS_CODEX_CMD env var; tests inject via options.codexExec.
+ */
+function defaultCodexExecutor(prompt, { cwd, timeout = 180000 } = {}) {
+  const cmd = process.env.ATRIS_CODEX_CMD || 'codex';
+  const proc = spawnSync(cmd, ['-p', prompt], {
+    cwd,
+    encoding: 'utf8',
+    timeout,
+    stdio: 'pipe',
+    maxBuffer: 10 * 1024 * 1024,
+  });
+  if (proc.status !== 0 && !proc.stdout) {
+    throw new Error(`codex exited with status ${proc.status}: ${proc.stderr || 'no output'}`);
+  }
+  return proc.stdout || '';
+}
+/**
+ * Check if codex is available on PATH (or ATRIS_CODEX_CMD points to something runnable).
+ * Kept simple: `which` probe. Tests override via options.hasCodex.
+ */
+function hasCodex() {
+  const cmd = process.env.ATRIS_CODEX_CMD || 'codex';
+  try {
+    const r = spawnSync('which', [cmd], { stdio: 'pipe' });
+    return r.status === 0;
+  } catch {
+    return false;
+  }
+}
+/**
+ * Run plan-review: the validator (and optionally codex) read the plan and
+ * decide if it is safe to execute. Returns { verdict, reason, fix, signers, notes }.
+ *
+ * Codex is invoked only when the task explicitly opts in:
+ *   - env ATRIS_USE_CODEX=1, or
+ *   - context.tags includes 'codex', or
+ *   - context.kind === 'endgame' AND context.tags includes 'gray' or 'high-risk'
+ *
+ * If codex is opted-in but not installed, we skip gracefully and surface a note.
+ * If both signers run and disagree, verdict is REJECT with both opinions in reason.
+ */
+function runPlanReview({ cwd, context, planOutput, options = {} }) {
+  const prompt = buildPlanReviewPrompt(context, planOutput);
+  const tags = Array.isArray(context.tags) ? context.tags : [];
+  // Primary signer: validator.
+  const validatorExec = options.planReviewExec || defaultPlanReviewExecutor;
+  const validatorOutput = validatorExec(prompt, { cwd, role: 'validator' });
+  const primary = parseVerdict(validatorOutput);
+  // Codex: opted in explicitly, not inferred.
+  const codexOptIn =
+    process.env.ATRIS_USE_CODEX === '1' ||
+    tags.includes('codex') ||
+    tags.includes('gray') ||
+    tags.includes('high-risk');
+  if (!codexOptIn) {
+    return { ...primary, signers: ['validator'], proposed: primary.proposed || null };
+  }
+  const codexCheck = options.hasCodex != null ? options.hasCodex : hasCodex();
+  if (!codexCheck) {
+    return {
+      ...primary,
+      signers: ['validator'],
+      proposed: primary.proposed || null,
+      notes: 'codex was requested but not on PATH; skipped gracefully',
+    };
+  }
+  const codexExec = options.codexExec || defaultCodexExecutor;
+  let codexOutput;
+  try {
+    codexOutput = codexExec(prompt, { cwd, role: 'codex' });
+  } catch (err) {
+    return {
+      ...primary,
+      signers: ['validator'],
+      notes: `codex invocation failed: ${err.message}; falling back to single signer`,
+    };
+  }
+  const codex = parseVerdict(codexOutput);
+  if (primary.verdict === 'SIGNOFF' && codex.verdict === 'SIGNOFF') {
+    return {
+      verdict: 'SIGNOFF',
+      reason: primary.reason,
+      fix: '',
+      proposed: null,
+      signers: ['validator', 'codex'],
+    };
+  }
+  // Any disagreement or joint reject → halt with both opinions surfaced.
+  // If either signer wrote a PROPOSED draft, surface the validator's first
+  // (or codex's if validator didn't propose one).
+  return {
+    verdict: 'REJECT',
+    reason: `Split verdict. validator=${primary.verdict} (${primary.reason || 'no reason'}); codex=${codex.verdict} (${codex.reason || 'no reason'}).`,
+    fix: primary.fix || codex.fix || 'reconcile the two signers before re-planning',
+    proposed: primary.proposed || codex.proposed || null,
+    signers: ['validator', 'codex'],
+    split: true,
+  };
+}
+/**
+ * Append a plan-review rejection to today's journal under ## Notes.
+ * Intentionally does NOT write to lessons.md — rejections only become lessons
+ * if a human spots a reusable failure pattern.
+ */
+function appendPlanRejection(cwd, context, review) {
+  try {
+    // Compute the journal path from the passed cwd so tests and isolated
+    // workspaces both work. getLogPath() resolves against process.cwd()
+    // which isn't always the task's workspace.
+    const date = new Date();
+    const year = date.getFullYear();
+    const month = String(date.getMonth() + 1).padStart(2, '0');
+    const day = String(date.getDate()).padStart(2, '0');
+    const logFile = path.join(cwd, 'atris', 'logs', String(year), `${year}-${month}-${day}.md`);
+    if (!fs.existsSync(logFile)) return;
+    const now = new Date().toISOString().slice(0, 16).replace('T', ' ');
+    const signers = (review.signers || []).join(' + ');
+    const proposedBlock = review.proposed
+      ? `**Proposed draft:**\n` +
+        (review.proposed.files ? `- Files: ${review.proposed.files}\n` : '') +
+        (review.proposed.exit ? `- Exit: ${review.proposed.exit}\n` : '') +
+        (review.proposed.verify ? `- Verify: ${review.proposed.verify}\n` : '') +
+        (review.proposed.rollback ? `- Rollback: ${review.proposed.rollback}\n` : '')
+      : '';
+    const block =
+      `\n### Plan rejected — ${now}\n\n` +
+      `**Task:** ${context.task}\n` +
+      `**Signers:** ${signers}\n` +
+      `**Reason:** ${review.reason}\n` +
+      (review.fix ? `**Fix:** ${review.fix}\n` : '') +
+      (proposedBlock ? `${proposedBlock}` : '') +
+      (review.notes ? `**Notes:** ${review.notes}\n` : '');
+    let content = fs.readFileSync(logFile, 'utf8');
+    const notesIdx = content.indexOf('## Notes');
+    if (notesIdx === -1) {
+      content = content.replace(/\s*$/, '') + `\n\n## Notes\n${block}\n`;
+    } else {
+      const eol = content.indexOf('\n', notesIdx);
+      content = content.slice(0, eol + 1) + block + content.slice(eol + 1);
+    }
+    fs.writeFileSync(logFile, content);
+  } catch {
+    // journaling must never crash the tick
+  }
 }
 function runTaskOnce(context, options = {}) {
   const { verbose = false, cwd = process.cwd() } = options;
+  // Judge integrity check — halt if computeTickReward was tampered with
+  const integrity = verifyJudgeIntegrity();
+  if (!integrity.ok) {
+    writeLesson(cwd, 'judge-corruption', 'fail',
+      `computeTickReward checksum mismatch. Expected ${integrity.expected}, got ${integrity.actual}. Tick halted.`);
+    return {
+      outcome: 'halted',
+      reason: 'judge-corruption',
+      phaseResults: {},
+      elapsedSeconds: 0,
+      verifyRan: false,
+      verifyPass: false,
+    };
+  }
   const phaseResults = {};
   const startedAt = Date.now();
-  const verifyCmd = getVerifyCommand(cwd, context.task);
+  const verifyResult = getVerifyCommand(cwd, context.task);
+  const verifyCmd = verifyResult.cmd;
+  // Guard: endgame tasks must have an explicit Verify field.
+  // Reactive signals (inbox, staleness, imagined) use npm test as default.
+  if (!verifyResult.explicit && context.kind === 'endgame') {
+    writeLesson(cwd, 'no-verify-field', 'fail',
+      `Task "${context.task}" has no explicit **Verify:** field in TODO.md. Tick halted — every endgame task must declare how to verify it.`);
+    return {
+      outcome: 'halted',
+      reason: 'no-verify-field',
+      phaseResults: {},
+      elapsedSeconds: 0,
+      verifyRan: false,
+      verifyPass: false,
+    };
+  }
+  // Falsifiability gate (endgame + explicit Verify only).
+  // Run Verify BEFORE the work. If it passes, the rubric is trivial or the
+  // task is already done — either way, halt. This is the keystone that makes
+  // Verify load-bearing. The cmd is captured here and reused post-execute so
+  // an agent cannot swap the rubric mid-tick.
+  const skipFalsifiability = options.skipFalsifiability === true;
+  if (!skipFalsifiability && verifyResult.explicit && context.kind === 'endgame' && verifyCmd) {
+    try {
+      execSync(verifyCmd, { cwd, stdio: 'pipe', timeout: 60000 });
+      writeLesson(cwd, 'verify-not-falsifiable', 'fail',
+        `Verify \`${verifyCmd}\` passed before work started on "${context.task}". Either the rubric is trivial or the task is already done. Tick halted.`);
+      return {
+        outcome: 'halted',
+        reason: 'verify-not-falsifiable',
+        phaseResults: {},
+        elapsedSeconds: 0,
+        verifyRan: true,
+        verifyPass: false,
+      };
+    } catch {
+      // Pre-verify failed — good, the rubric is falsifiable. Proceed.
+    }
+  }
-  for (const phase of ['plan', 'do', 'review']) {
+  // Phase: plan
+  {
     const t0 = Date.now();
-    const result = executePhaseDetailed(phase, context, options);
-    phaseResults[phase] = {
+    const result = (options.phaseExec || executePhaseDetailed)('plan', context, options);
+    phaseResults.plan = {
+      prompt: result.prompt,
+      output: result.output || '',
+      elapsedSeconds: Math.round((Date.now() - t0) / 1000),
+    };
+  }
+  // Phase: plan-review — validator reads the plan fresh and signs off or rejects.
+  // Can be skipped via options.skipPlanReview (tests only). Codex is optional,
+  // opt-in via env var / tags. On REJECT, the tick halts and the rejection is
+  // journaled; lessons.md is NOT touched (only promoted lessons go there).
+  if (!options.skipPlanReview) {
+    const t0 = Date.now();
+    const review = runPlanReview({
+      cwd,
+      context,
+      planOutput: phaseResults.plan.output,
+      options,
+    });
+    const elapsed = Math.round((Date.now() - t0) / 1000);
+    phaseResults['plan-review'] = {
+      output:
+        `${review.verdict}: ${review.reason || ''}` +
+        (review.fix ? `\nFIX: ${review.fix}` : '') +
+        (review.notes ? `\n(${review.notes})` : ''),
+      signers: review.signers,
+      elapsedSeconds: elapsed,
+    };
+    if (review.verdict === 'REJECT') {
+      appendPlanRejection(cwd, context, review);
+      return {
+        outcome: 'halted',
+        reason: 'plan-rejected-at-review',
+        phaseResults,
+        elapsedSeconds: Math.round((Date.now() - startedAt) / 1000),
+        verifyRan: false,
+        verifyPass: false,
+      };
+    }
+  }
+  // Phase: do
+  {
+    const t0 = Date.now();
+    const result = (options.phaseExec || executePhaseDetailed)('do', context, options);
+    phaseResults.do = {
+      prompt: result.prompt,
+      output: result.output || '',
+      elapsedSeconds: Math.round((Date.now() - t0) / 1000),
+    };
+  }
+  // Phase: review
+  {
+    const t0 = Date.now();
+    const result = (options.phaseExec || executePhaseDetailed)('review', context, options);
+    phaseResults.review = {
       prompt: result.prompt,
       output: result.output || '',
       elapsedSeconds: Math.round((Date.now() - t0) / 1000),
@@ -595,7 +1163,7 @@ function runTaskOnce(context, options = {}) {
   // After review succeeds, run verify command if present
   let verifyPass = false;
   let verifyRan = false;
-  if (!reviewOutput.includes('failed') && verifyCmd) {
+  if (verifyCmd) {
     verifyRan = true;
     let t0 = Date.now();
     try {
@@ -620,7 +1188,7 @@ function runTaskOnce(context, options = {}) {
   }
   return {
-    success: !reviewOutput.includes('failed') && (!verifyRan || verifyPass),
+    success: verifyRan && verifyPass,
     elapsedSeconds: Math.round((Date.now() - startedAt) / 1000),
     phaseResults,
     reviewOutput,
@@ -673,28 +1241,28 @@ function computeTickReward(execution, tickOutcome, verifyCmd) {
   // Validator clean: review passed without 'failed'
   if (!execution.reviewOutput || !execution.reviewOutput.includes('failed')) {
-    reward += 1;
+    reward += REWARD_CONFIG.REVIEW_CLEAN;
   }
-  // Verify passed: +3
+  // Verify passed
   if (execution.verifyRan && execution.verifyPass) {
-    reward += 3;
+    reward += REWARD_CONFIG.VERIFY_PASS;
   }
-  // npm test passed: +2
+  // npm test passed
   if (execution.verifyRan && execution.verifyPass && verifyCmd === 'npm test') {
-    reward += 2;
+    reward += REWARD_CONFIG.NPM_TEST_BONUS;
   }
   // Commit landed: check do phase output for git commit patterns
   const doOutput = execution.phaseResults.do.output || '';
   if (doOutput.match(/\[.*\s\d+\sfile.*changed/i) || doOutput.includes('git commit') || doOutput.includes('committed')) {
-    reward += 1;
+    reward += REWARD_CONFIG.COMMIT_LANDED;
   }
-  // Halt caught hallucination: -3
+  // Halt caught hallucination
   if (tickOutcome === 'halted') {
-    reward -= 3;
+    reward += REWARD_CONFIG.HALT_PENALTY;
   }
   return reward;
@@ -1070,7 +1638,15 @@ function getRecentSignals(cwd) {
 /**
  * Score endgame candidates by historical reward of similar horizon types.
  * Reads last 10 scorecards, infers type from slug prefix, calculates mean
- * reward per type, scores candidates by expected value, applies 80/20 exploit/explore.
+ * reward per type, scores candidates by expected value.
+ *
+ * Adaptive explore rate: if the last 5 endgames are all the same type,
+ * explore rate boosts to 50%. Otherwise scales between 20%-50% based on
+ * type repetition in the last 5.
+ *
+ * Difficulty floor: candidates whose inferred type has >80% success rate
+ * AND mean reward >5 are filtered out when harder candidates exist, so
+ * easy wins don't starve hard work.
  *
  * @param {string} cwd - Current working directory
  * @param {array} candidates - Array of { title, confidence, rationale }
@@ -1094,10 +1670,14 @@ function scoreEndgameCandidates(cwd, candidates) {
     // Infer type from slug/title by taking prefix before first dash
     const typeToRewards = {};
+    const typeToAttempts = {}; // track shipped/attempted per type
     for (const sc of scorecards) {
       const type = sc.slug.split('-')[0];
       if (!typeToRewards[type]) typeToRewards[type] = [];
       typeToRewards[type].push(sc.totalReward);
+      if (!typeToAttempts[type]) typeToAttempts[type] = { shipped: 0, attempted: 0 };
+      typeToAttempts[type].shipped += sc.tasksShipped;
+      typeToAttempts[type].attempted += sc.tasksAttempted;
     }
     // Calculate mean reward per type
@@ -1107,45 +1687,70 @@ function scoreEndgameCandidates(cwd, candidates) {
       typeMeans[type] = mean;
     }
+    // Calculate success rate per type
+    const typeSuccessRate = {};
+    for (const [type, counts] of Object.entries(typeToAttempts)) {
+      typeSuccessRate[type] = counts.attempted > 0 ? counts.shipped / counts.attempted : 0;
+    }
+    // Adaptive explore rate based on diversity of last 5 scorecards
+    const last5 = scorecards.slice(-5);
+    const last5Types = last5.map(sc => sc.slug.split('-')[0]);
+    const uniqueTypes = new Set(last5Types).size;
+    // All same type → exploreRate=0.5; all different → exploreRate=0.2
+    // Linear interpolation: exploreRate = 0.5 - (uniqueTypes - 1) * 0.3 / (last5Types.length - 1 || 1)
+    const maxTypes = last5Types.length;
+    const exploreRate = maxTypes <= 1
+      ? 0.2
+      : 0.5 - (uniqueTypes - 1) * 0.3 / (maxTypes - 1);
     // Score each candidate by expected value based on historical type mean
     const scored = candidates.map(c => {
       // Infer type from title keywords that match scorecard slug prefixes
       const titleLower = (c.title || '').toLowerCase();
       const cType = Object.keys(typeMeans).find(t => titleLower.includes(t)) || titleLower.split(/[\s\-]+/)[0];
       const historicalMean = typeMeans[cType] !== undefined ? typeMeans[cType] : 0;
+      const successRate = typeSuccessRate[cType] !== undefined ? typeSuccessRate[cType] : 0;
       const expectedValue = historicalMean * c.confidence;
       return {
         ...c,
         expectedValue,
         type: cType,
-        historicalMean
+        historicalMean,
+        successRate
       };
     });
+    // Difficulty floor: filter out easy-win candidates (>80% success rate AND
+    // mean reward >5) when harder candidates exist
+    const hardCandidates = scored.filter(c => !(c.successRate > 0.8 && c.historicalMean > 5));
+    const pool = hardCandidates.length > 0 ? hardCandidates : scored;
     // Sort by expected value (descending)
-    scored.sort((a, b) => b.expectedValue - a.expectedValue);
+    pool.sort((a, b) => b.expectedValue - a.expectedValue);
-    // 80/20 split: 80% exploit (best), 20% explore (random)
+    // Adaptive exploit/explore split
     const choice = Math.random();
     let selected;
-    if (choice < 0.8) {
+    if (choice < (1 - exploreRate)) {
       // Exploit: return highest expected value
-      selected = scored[0];
+      selected = pool[0];
     } else {
-      // Explore: return random candidate
+      // Explore: return random candidate from full scored list (not filtered)
       selected = scored[Math.floor(Math.random() * scored.length)];
     }
-    const reason = choice < 0.8
-      ? `exploit: type=${selected.type} mean-reward=${selected.historicalMean.toFixed(1)} expected-value=${selected.expectedValue.toFixed(1)}`
-      : `explore: random-candidate type=${selected.type}`;
+    const reason = choice < (1 - exploreRate)
+      ? `exploit: type=${selected.type} mean-reward=${selected.historicalMean.toFixed(1)} expected-value=${selected.expectedValue.toFixed(1)} explore-rate=${exploreRate.toFixed(2)}`
+      : `explore: random-candidate type=${selected.type} explore-rate=${exploreRate.toFixed(2)}`;
     return {
       title: selected.title,
       confidence: selected.confidence,
       rationale: selected.rationale,
       scored: true,
-      reason
+      reason,
+      exploreRate
     };
   } catch (err) {
     // If scoring fails, fall back to best by confidence
@@ -1154,14 +1759,71 @@ function scoreEndgameCandidates(cwd, candidates) {
   }
 }
+/**
+ * Check whether a lesson's bug pattern is still present in the named files.
+ * Parses the lesson line for file paths (e.g. `commands/autopilot.js:116`)
+ * and the slug (e.g. `inbox-parser-eats-hr-separator`). Greps the named
+ * files for slug keywords. If none match → lesson is resolved.
+ *
+ * @param {string} lessonLine - A single line from lessons.md
+ * @param {string} cwd - Current working directory
+ * @returns {boolean} true if the lesson's bug pattern is gone (resolved)
+ */
+function isLessonResolved(lessonLine, cwd) {
+  // Extract slug: bold text after date, e.g. **[2026-04-08] inbox-parser-eats-hr-separator**
+  const slugMatch = lessonLine.match(/\*\*\[\d{4}-\d{2}-\d{2}\]\s+([\w-]+)\*\*/);
+  if (!slugMatch) return false;
+  const slug = slugMatch[1];
+  // Extract file paths: patterns like `commands/autopilot.js:116` or `commands/run.js:157`
+  const fileRefs = [];
+  const filePattern = /`([a-zA-Z0-9_/./-]+\.[a-zA-Z]+(?::\d+(?:-\d+)?)?)`/g;
+  let m;
+  while ((m = filePattern.exec(lessonLine)) !== null) {
+    const ref = m[1].replace(/:\d+(-\d+)?$/, ''); // strip line numbers
+    if (ref.includes('/') || ref.endsWith('.js') || ref.endsWith('.md') || ref.endsWith('.ts')) {
+      fileRefs.push(ref);
+    }
+  }
+  if (fileRefs.length === 0) return false;
+  // Derive keywords from slug (split on dashes, drop short words)
+  const keywords = slug.split('-').filter(w => w.length > 2);
+  if (keywords.length === 0) return false;
+  // Grep each named file for any keyword. If at least one file still matches → not resolved.
+  for (const ref of fileRefs) {
+    const absPath = path.isAbsolute(ref) ? ref : path.join(cwd, ref);
+    if (!fs.existsSync(absPath)) continue; // file deleted = pattern gone
+    for (const kw of keywords) {
+      try {
+        execFileSync('grep', ['-q', '-i', kw, absPath], {
+          cwd,
+          timeout: 5000,
+          stdio: ['ignore', 'ignore', 'ignore']
+        });
+        // grep exited 0 → keyword found → lesson still applies
+        return false;
+      } catch {
+        // grep exited non-zero → keyword not found in this file, continue
+      }
+    }
+  }
+  // No keyword matched in any named file → lesson is resolved
+  return true;
+}
 /**
  * Propose 3 candidate next horizons for the autopilot loop. Combines
  * `getIdleTickCount` + `getRecentSignals` into a prompt asking the LLM
  * to imagine what to work on next, spawns `claude -p`, and parses the
  * JSON response into `[{ title, confidence, rationale }]`.
  *
- * Throws on subprocess failure or when fewer than 3 valid candidates
- * come back. Callers are responsible for catching and falling back.
+ * Filters out candidates derived from resolved lessons (bug pattern no
+ * longer present in named files). Resolved lessons get tagged `[resolved]`
+ * in lessons.md. Requires at least 1 valid candidate after filtering.
  */
 async function proposeCandidateHorizons(cwd) {
   const idleTicks = getIdleTickCount(cwd);
@@ -1257,11 +1919,49 @@ Reply with the JSON array and nothing else.`;
       c.rationale.length > 0
     );
-  if (candidates.length < 3) {
-    throw new Error(`proposeCandidateHorizons: expected 3 valid candidates, got ${candidates.length}`);
+  if (candidates.length < 1) {
+    throw new Error(`proposeCandidateHorizons: expected at least 1 valid candidate, got ${candidates.length}`);
+  }
+  // Filter out candidates derived from resolved lessons
+  const lessonsPath = path.join(cwd, 'atris', 'lessons.md');
+  const filtered = [];
+  for (const c of candidates) {
+    const combinedText = `${c.title} ${c.rationale}`.toLowerCase();
+    let droppedByLesson = false;
+    for (const lessonLine of signals.recentLessons) {
+      const slugMatch = lessonLine.match(/\*\*\[\d{4}-\d{2}-\d{2}\]\s+([\w-]+)\*\*/);
+      if (!slugMatch) continue;
+      if (lessonLine.includes('[resolved]')) continue;
+      const slug = slugMatch[1];
+      // Fuzzy match: check if slug keywords appear in the candidate text
+      const slugWords = slug.split('-').filter(w => w.length > 2);
+      const matchCount = slugWords.filter(w => combinedText.includes(w)).length;
+      if (matchCount < Math.ceil(slugWords.length * 0.5)) continue;
+      // Candidate matches this lesson — check if the lesson is resolved
+      if (isLessonResolved(lessonLine, cwd)) {
+        // Tag lesson [resolved] in lessons.md
+        try {
+          let content = fs.readFileSync(lessonsPath, 'utf8');
+          const taggedLine = lessonLine.replace(
+            /\*\*\[(\d{4}-\d{2}-\d{2})\]\s+([\w-]+)\*\*/,
+            '**[$1] $2** [resolved]'
+          );
+          content = content.replace(lessonLine.trim(), taggedLine.trim());
+          fs.writeFileSync(lessonsPath, content);
+        } catch {}
+        droppedByLesson = true;
+        break;
+      }
+    }
+    if (!droppedByLesson) filtered.push(c);
   }
-  return candidates.slice(0, 3);
+  if (filtered.length < 1) {
+    throw new Error('proposeCandidateHorizons: all candidates were from resolved lessons');
+  }
+  return filtered.slice(0, 3);
 }
 async function autopilotAtris(description, options = {}) {
@@ -1361,7 +2061,7 @@ async function autopilotAtris(description, options = {}) {
       break;
     }
-    const suggestion = await suggestNextTask(cwd, skipped);
+    const suggestion = await suggestNextTask(cwd, skipped, { auto });
     if (!suggestion) {
       tickOutcome = 'idle';
@@ -1472,6 +2172,22 @@ async function autopilotAtris(description, options = {}) {
       const execution = runTaskOnce(context, { verbose, cwd });
       lastExecution = execution;
       lastVerifyCmd = execution.verifyCmd;
+      // Early halt — judge corruption or no verify field
+      if (execution.outcome === 'halted') {
+        tickOutcome = 'halted';
+        tickOutcomeText = `I halted before running "${lastTaskTitle}": ${execution.reason}.`;
+        tickNextStep = 'stop until a human looks at the error';
+        if (!verbose) {
+          printPlainBlock([
+            `I halted: ${execution.reason}.`,
+            '',
+            'Next I stopped the loop.'
+          ].join('\n'));
+        }
+        break;
+      }
       const planTime = execution.phaseResults.plan.elapsedSeconds;
       if (verbose) console.log(`  planned (${planTime}s)`);
@@ -1523,6 +2239,39 @@ async function autopilotAtris(description, options = {}) {
       tickOutcomeText = `I planned, built, and reviewed "${suggestion.task}".`;
       tickNextStep = 'pick the next endgame task';
       logCompletion(suggestion.task);
+      // Record commit hash + verify command for retroactive regression checks
+      try {
+        const commitHash = execSync('git rev-parse HEAD', { cwd, encoding: 'utf8' }).trim();
+        const taskSlug = (suggestion.task || 'unknown').replace(/\s+/g, '-').toLowerCase().slice(0, 40);
+        recordTickCommit(cwd, commitHash, execution.verifyCmd || '', taskSlug);
+        // Every 10th tick, run retroactive regression check
+        const registryPath = path.join(cwd, 'atris', 'tick-registry.json');
+        if (fs.existsSync(registryPath)) {
+          try {
+            const registry = JSON.parse(fs.readFileSync(registryPath, 'utf8'));
+            if (Array.isArray(registry) && registry.length % 10 === 0) {
+              const regressionResults = regressionCheck(cwd);
+              const failures = regressionResults.filter(r => !r.pass && !r.skipped);
+              if (failures.length > 0) {
+                // Apply -5 retroactive penalty per failure via journal note
+                for (const f of failures) {
+                  appendTickSummary(cwd, {
+                    outcome: `Retroactive regression failure: tick ${f.hash.slice(0, 7)} (${f.slug}) verify now fails. -5 penalty.`,
+                    horizon: readHorizonSlug(cwd),
+                    nextStep: 'investigate regression',
+                    reward: -5,
+                  });
+                }
+                if (verbose) console.log(`  regression check: ${failures.length} failure(s) found`);
+              } else if (verbose) {
+                console.log(`  regression check: all ${regressionResults.length} entries pass`);
+              }
+            }
+          } catch { /* registry read failure must not crash */ }
+        }
+      } catch { /* commit recording failure must not crash the tick */ }
       if (maybeWriteCompletedEndgameScorecard(cwd, startingEndgame)) {
         tickNextStep = 'pick the next horizon';
       }
@@ -1602,6 +2351,152 @@ async function autopilotAtris(description, options = {}) {
   return { success: completed > 0, completed };
 }
+/**
+ * Compute age in days for a task.
+ * Endgame tasks use the Picked: date from TODO.md Endgame section.
+ * In-progress tasks parse timestamp from Claimed by: field.
+ * Fallback returns 0 (fresh).
+ */
+function getTaskAgeDays(task, todoPath) {
+  if (task.claimed) {
+    const tsMatch = task.claimed.match(/\d{4}-\d{2}-\d{2}/);
+    if (tsMatch) {
+      const d = new Date(tsMatch[0]);
+      if (!isNaN(d)) return Math.floor((Date.now() - d.getTime()) / (1000 * 60 * 60 * 24));
+    }
+  }
+  if (task.tag === 'endgame' && todoPath && fs.existsSync(todoPath)) {
+    const content = fs.readFileSync(todoPath, 'utf8');
+    const m = content.match(/\*\*Picked:\*\*\s*(\d{4}-\d{2}-\d{2})/);
+    if (m) {
+      const d = new Date(m[1]);
+      if (!isNaN(d)) return Math.floor((Date.now() - d.getTime()) / (1000 * 60 * 60 * 24));
+    }
+  }
+  return 0;
+}
+/**
+ * Check whether a task/fact is still actionable.
+ *
+ * @param {{ title: string, age: number, source?: string }} fact
+ *   - title: the task or fact description
+ *   - age: age in days since the task was created/last verified
+ *   - source: optional file path or identifier where the fact originated
+ * @param {string} cwd - workspace root
+ * @returns {'actionable'|'unverified'|'stale'}
+ */
+function isStillTrue(fact, cwd) {
+  const { title, age, source } = fact;
+  // Fresh tasks are always actionable
+  if (age <= 7) return 'actionable';
+  // Extract searchable keywords from the title (skip short/common words)
+  const keywords = title
+    .replace(/[`\[\](){}]/g, '')
+    .split(/[\s/\\.:,;]+/)
+    .filter(w => w.length > 3)
+    .slice(0, 5);
+  if (keywords.length === 0) return 'unverified';
+  // Strategy 1: If source file is given, check it still exists
+  if (source) {
+    const sourcePath = path.isAbsolute(source) ? source : path.join(cwd, source);
+    if (!fs.existsSync(sourcePath)) return 'stale';
+  }
+  // Strategy 2: grep the codebase for key terms from the title
+  let grepHits = 0;
+  for (const kw of keywords) {
+    try {
+      execFileSync('grep', ['-r', '-l', '--include=*.js', '--include=*.md', '-m', '1', kw, '.'], {
+        cwd,
+        stdio: ['ignore', 'pipe', 'ignore'],
+        timeout: 10000
+      });
+      grepHits++;
+    } catch {
+      // grep returns non-zero when no match — that's fine
+    }
+  }
+  // If none of the keywords appear in the codebase, it's stale
+  if (grepHits === 0) return 'stale';
+  // Strategy 3: check git log for recent activity related to the keywords
+  let gitHits = 0;
+  for (const kw of keywords.slice(0, 3)) {
+    try {
+      const out = execFileSync(
+        'git', ['log', '--oneline', '--since=30 days ago', '--all', `--grep=${kw}`, '-1'],
+        { cwd, stdio: ['ignore', 'pipe', 'ignore'], timeout: 10000 }
+      ).toString().trim();
+      if (out.length > 0) gitHits++;
+    } catch {
+      // git-log failure is non-fatal
+    }
+  }
+  // Strong mechanical evidence: grep found terms AND recent git activity
+  if (gitHits > 0) return 'actionable';
+  // Grep found terms but no recent git activity — can't fully verify
+  return 'unverified';
+}
+/**
+ * Ask a local model whether a task/fact is still relevant.
+ * Called when isStillTrue returns 'unverified' — the mechanical check
+ * couldn't confirm or deny, so we ask claude -p to inspect the codebase.
+ *
+ * @param {{ title: string, age: number, source?: string }} fact
+ * @param {string} cwd - workspace root
+ * @returns {{ fresh: boolean, reasoning: string }}
+ */
+function askModel(fact, cwd) {
+  const { title, source } = fact;
+  const sourceHint = source ? `\nOriginal source file: ${source}` : '';
+  const prompt = `You are a staleness checker. Answer with exactly one line: YES or NO, followed by a short reason (under 30 words).
+Is this task still relevant to the codebase? Check for the mentioned files, functions, or patterns.
+Task: "${title}"${sourceHint}
+Search the codebase to verify. Reply: YES <reason> or NO <reason>`;
+  const tmpFile = path.join(cwd, '.staleness-prompt.tmp');
+  fs.writeFileSync(tmpFile, prompt);
+  try {
+    const env = { ...process.env };
+    delete env.CLAUDECODE;
+    const cmd = `claude -p "$(cat '${tmpFile.replace(/'/g, "'\\''")}')" --allowedTools "Bash,Read,Glob,Grep"`;
+    const output = execSync(cmd, {
+      cwd,
+      encoding: 'utf8',
+      timeout: 60000,
+      stdio: 'pipe',
+      maxBuffer: 2 * 1024 * 1024,
+      env
+    }).trim();
+    try { fs.unlinkSync(tmpFile); } catch {}
+    // Parse YES/NO from the first line of output
+    const firstLine = output.split('\n').find(l => /^\s*(YES|NO)\b/i.test(l)) || output.split('\n')[0] || '';
+    const fresh = /^\s*YES\b/i.test(firstLine);
+    const reasoning = firstLine.replace(/^\s*(YES|NO)\s*/i, '').trim() || output.slice(0, 200);
+    return { fresh, reasoning };
+  } catch (err) {
+    try { fs.unlinkSync(tmpFile); } catch {}
+    // On timeout or crash, treat as unverifiable — conservative default
+    return { fresh: false, reasoning: `Model check failed: ${(err.message || '').slice(0, 100)}` };
+  }
+}
 /**
  * Entry point when called without a description.
  */
@@ -1611,19 +2506,30 @@ async function autopilotFromTodo(options = {}) {
 module.exports = {
   appendTickSummary,
+  askHuman,
+  askModel,
   autopilotAtris,
   autopilotFromTodo,
   buildPrompt,
+  isLessonResolved,
+  isStillTrue,
+  getTaskAgeDays,
   getIdleTickCount,
   getRecentSignals,
   getTickStatus,
   getVerifyCommand,
   computeTickReward,
+  verifyJudgeIntegrity,
   maybeWriteCompletedEndgameScorecard,
   renderHumanSuggestion,
   renderHumanTickIntro,
   proposeCandidateHorizons,
+  recordTickCommit,
+  regressionCheck,
+  runPlanReview,
   runTaskOnce,
+  buildPlanReviewPrompt,
+  parseVerdict,
   scoreEndgameCandidates,
   suggestNextTask,
   writeLesson