npm - atris - Versions diffs - 3.15.57 → 3.16.1 - Mend

atris 3.15.57 → 3.16.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (47) hide show

package/AGENTS.md +2 -2
package/GETTING_STARTED.md +1 -1
package/PERSONA.md +4 -4
package/README.md +12 -11
package/atris/skills/copy-editor/SKILL.md +30 -4
package/atris/skills/improve/SKILL.md +18 -20
package/atris/wiki/concepts/agent-activation-contract.md +5 -3
package/atris/wiki/concepts/workspace-initialization-contract.md +4 -4
package/atris/wiki/index.md +1 -0
package/ax +522 -73
package/bin/atris.js +78 -44
package/commands/align.js +0 -14
package/commands/apps.js +102 -1
package/commands/autopilot.js +628 -31
package/commands/brain.js +219 -34
package/commands/brainstorm.js +0 -829
package/commands/compile.js +569 -0
package/commands/computer.js +0 -60
package/commands/improve.js +501 -0
package/commands/integrations.js +233 -71
package/commands/lesson.js +44 -0
package/commands/member.js +4498 -226
package/commands/mission.js +302 -27
package/commands/now.js +89 -1
package/commands/probe.js +366 -0
package/commands/radar.js +181 -56
package/commands/recap.js +203 -0
package/commands/skill.js +6 -2
package/commands/soul.js +0 -4
package/commands/task.js +5587 -499
package/commands/terminal.js +14 -10
package/commands/wiki.js +87 -1
package/commands/workflow.js +288 -73
package/commands/worktree.js +52 -15
package/commands/xp.js +6 -65
package/lib/auto-accept-certified.js +294 -0
package/lib/file-ops.js +0 -184
package/lib/member-alive.js +232 -0
package/lib/policy-lessons.js +280 -0
package/lib/receipt-evidence.js +64 -0
package/lib/state-detection.js +75 -1
package/lib/task-db.js +568 -16
package/lib/task-proof.js +43 -0
package/package.json +1 -1
package/utils/auth.js +13 -4
package/commands/research.js +0 -52
package/lib/section-merge.js +0 -196

package/commands/autopilot.js CHANGED Viewed

@@ -25,6 +25,46 @@ const pkg = require('../package.json');
 const PHASE_TIMEOUT = 600000; // 10 min per phase
+function looksOwnerClaimed(claimed) {
+  const text = String(claimed || '').toLowerCase();
+  return /\bkeshav(?:rao)?\b/.test(text) || /\b(owner|human|operator)\b/.test(text);
+}
+function looksOwnerGatedTitle(title) {
+  const text = String(title || '').toLowerCase();
+  return (
+    /\bowner[- ](?:approval|input|gate|gated)\b/.test(text) ||
+    /\bhuman[- ](?:approval|input|gate|gated)\b/.test(text) ||
+    /\bmanual send\b/.test(text) ||
+    /\broute confirmation\b/.test(text) ||
+    /\bconfirm pallet destination\b/.test(text) ||
+    /\bconfirm .+ destination before .+ approval\b/.test(text) ||
+    /\bapprove and manually send\b/.test(text)
+  );
+}
+function shouldSkipAutoHumanGate(task) {
+  if (!task) return false;
+  return looksOwnerClaimed(task.claimed) || looksOwnerGatedTitle(task.title || task.task);
+}
+function repoMapAuditReportsClean(cwd) {
+  const auditPath = path.join(cwd, 'scripts', 'audit_map_refs.py');
+  if (!fs.existsSync(auditPath)) return false;
+  const result = spawnSync('python3', [auditPath], {
+    cwd,
+    encoding: 'utf8',
+    timeout: 120000,
+    maxBuffer: 1024 * 1024
+  });
+  if (result.status !== 0) return false;
+  const output = `${result.stdout || ''}\n${result.stderr || ''}`;
+  const match = output.match(/Total broken references:\s*(\d+)/i);
+  return Boolean(match && Number(match[1]) === 0);
+}
 /**
  * Scan workspace for the next thing worth doing.
  * Returns { task, why, kind } or null.
@@ -54,7 +94,7 @@ async function suggestNextTask(cwd, skipped = new Set(), { auto = false } = {})
   // --- Resume interrupted work ---
   if (todo.inProgress.length > 0) {
     const t = todo.inProgress[0];
-    if (!(t.tags && t.tags.includes('unverified')) && !skipped.has(t.title)) {
+    if (!(t.tags && t.tags.includes('unverified')) && !skipped.has(t.title) && !(auto && shouldSkipAutoHumanGate(t))) {
       suggestions.push({
         task: t.title,
         why: `This was already started${t.claimed ? ` by ${t.claimed}` : ''} but never finished.`,
@@ -75,6 +115,7 @@ async function suggestNextTask(cwd, skipped = new Set(), { auto = false } = {})
       why: `"${sp.staleSource}" changed on ${sp.sourceDate} but the page was last compiled ${sp.compiledDate}. The content may be wrong.`,
       kind: 'staleness',
       priority: 2,
+      files: [pageName, sp.staleSource],
       skipKey: key
     });
     break;
@@ -95,7 +136,9 @@ async function suggestNextTask(cwd, skipped = new Set(), { auto = false } = {})
   }
   // --- Broken MAP.md references ---
-  const { unhealable } = healBrokenMapRefs(cwd, atrisDir, true); // dry-run
+  const { unhealable } = repoMapAuditReportsClean(cwd)
+    ? { unhealable: [] }
+    : healBrokenMapRefs(cwd, atrisDir, true); // dry-run
   if (unhealable.length > 0 && !skipped.has('fix-map-refs')) {
     const sample = unhealable.slice(0, 3).map(r => `${r.file}:${r.line}`).join(', ');
     suggestions.push({
@@ -127,6 +170,7 @@ async function suggestNextTask(cwd, skipped = new Set(), { auto = false } = {})
   for (const t of todo.backlog) {
     if (t.tags && t.tags.includes('unverified')) continue;
     if (shouldSkipEndgameAtPicker(cwd, t)) continue;
+    if (auto && shouldSkipAutoHumanGate(t)) continue;
     if (skipped.has(t.title)) continue;
     const remaining = todo.backlog.filter(b => !(b.tags && b.tags.includes('unverified'))).length;
     suggestions.push({
@@ -348,6 +392,41 @@ function askHuman(taskTitle) {
   });
 }
+/**
+ * Type-check a child_process error as a timeout/kill. Node's execSync attaches
+ * `code: 'ETIMEDOUT'` and `signal` on timeout — it does NOT set `killed`, so a
+ * `killed`-only guard is dead code on the exact error it was written for
+ * (lesson: etimedout-error-shape, 2026-06-10).
+ */
+function isPhaseTimeoutError(err) {
+  return Boolean(err && (err.killed || err.code === 'ETIMEDOUT' || err.signal));
+}
+/**
+ * execSync with the phase-timeout orphan fix. Node's sync-exec timeout signals
+ * only the direct child pid — the `/bin/sh -c` wrapper — so the `claude` it
+ * spawned kept committing 160–296s past the 600s wall (lesson:
+ * etimedout-error-shape, 2026-06-10). `detached: true` makes the wrapper a
+ * process-group leader; on timeout we sweep the whole group via
+ * `process.kill(-pid, 'SIGKILL')`. ESRCH on the sweep means the group already
+ * died — fine. The original error is rethrown untouched so every call site
+ * keeps its existing catch contract (err.stdout passthrough included).
+ */
+function execPhaseCommandSync(cmd, opts = {}) {
+  try {
+    return execSync(cmd, { ...opts, detached: true });
+  } catch (err) {
+    if (isPhaseTimeoutError(err) && err.pid) {
+      try {
+        process.kill(-err.pid, 'SIGKILL');
+      } catch (sweepErr) {
+        if (sweepErr.code !== 'ESRCH') throw sweepErr;
+      }
+    }
+    throw err;
+  }
+}
 /**
  * Run a phase via claude -p subprocess.
  */
@@ -359,10 +438,11 @@ function executePhaseDetailed(phase, context, options = {}) {
   fs.writeFileSync(tmpFile, prompt);
   try {
-    const cmd = `claude -p "$(cat '${tmpFile.replace(/'/g, "'\\''")}')" --allowedTools "Bash,Read,Write,Edit,Glob,Grep"`;
+    const cmd = options.cmdOverride
+      || `claude -p "$(cat '${tmpFile.replace(/'/g, "'\\''")}')" --allowedTools "Bash,Read,Write,Edit,Glob,Grep"`;
     const env = { ...process.env };
     delete env.CLAUDECODE;
-    const output = execSync(cmd, {
+    const output = execPhaseCommandSync(cmd, {
       cwd: process.cwd(),
       encoding: 'utf8',
       timeout,
@@ -375,7 +455,9 @@ function executePhaseDetailed(phase, context, options = {}) {
     return { prompt, output: output || '' };
   } catch (err) {
     try { fs.unlinkSync(tmpFile); } catch {}
-    if (err.killed) throw new Error(`${phase} timed out after ${timeout / 1000}s`);
+    if (isPhaseTimeoutError(err)) {
+      throw new Error(`${phase} phase timed out after ${timeout / 1000}s (claude -p hit the wall; any work it committed survives — reconcile from pre-tick HEADs)`);
+    }
     if (err.stdout) {
       return { prompt, output: err.stdout };
     }
@@ -383,10 +465,6 @@ function executePhaseDetailed(phase, context, options = {}) {
   }
 }
-function executePhase(phase, context, options = {}) {
-  return executePhaseDetailed(phase, context, options).output;
-}
 /**
  * Build context-aware file list for prompts.
  */
@@ -412,6 +490,16 @@ function getContextFiles(phase, options = {}) {
   return [...new Set(files.filter(Boolean))].map((f) => `- ${f}`).join('\n');
 }
+// T35a (endgame loop-self-repair): shared-checkout git-safety contract.
+// Lesson 39: a concurrent tick's `git reset` destroyed a sibling repo's
+// uncommitted work. Sibling-repo edits ride per-tick worktrees (the same
+// ../repo siblings snapshotRepoHeads tracks); destructive git on the shared
+// checkout is forbidden (COORDINATION.md Rule 4). Interpolated into the
+// default and self-heal do prompts — never the benchmark prompt (it never
+// commits).
+const SHARED_CHECKOUT_GIT_CONTRACT = `- Shared-checkout git safety (COORDINATION.md Rule 4): edits to any repo OTHER than this tick's cwd (../atrisos-backend-style sibling repos) go through a per-tick worktree — start with \`atris worktree start --member <member> --task "<task>"\`, land with \`atris worktree ship --message "<msg>" --verify "<cmd>"\`. Never edit a sibling repo's shared checkout directly.
+- On a shared checkout, \`git reset\`, \`git checkout --\`, \`git clean\`, and stashing other agents' work are FORBIDDEN — concurrent ticks' uncommitted work lives there.`;
 /**
  * Build the right prompt for each phase, adapting to the kind of work.
  */
@@ -422,7 +510,13 @@ function buildPrompt(phase, context, options = {}) {
     contextNote = '',
     runnerName = '',
   } = options;
-  const readFiles = getContextFiles(phase, options);
+  const readFiles = getContextFiles(phase, {
+    ...options,
+    extraReadFiles: [
+      ...(options.extraReadFiles || []),
+      ...(Array.isArray(context.files) ? context.files : []),
+    ],
+  });
   const benchmarkProtocol = benchmarkStrategy === 'stack'
     ? 'coordinated stack run'
     : (benchmarkStrategy === 'single' ? 'pinned single-model baseline run' : '');
@@ -478,12 +572,24 @@ When done, reply: done.`;
     }
     if (kind === 'staleness' || kind === 'docs' || kind === 'review') {
+      const fileList = Array.isArray(context.files) && context.files.length
+        ? context.files.map((file) => `- ${file}`).join('\n')
+        : '- target page or MAP entry from the task title\n- source file(s) that changed';
       return `${baseRules}
 Maintenance task: ${task}
-Figure out what needs to change and why. Create focused tasks in atris/TODO.md.
+Relevant files:
+${fileList}
+Figure out what needs to change and why. Create exactly one focused task in atris/TODO.md unless the drift truly requires separate commits.
 For stale pages, read both the page and its sources to understand the drift.
+The task row must include these fields so plan-review can prove it is executable:
+- **Files:** concrete target page plus source file paths
+- **Exit:** the observable post-update state
+- **Verify:** one raw shell command that checks concrete facts and rejects stale phrases; use shell operators like \`&&\`, \`grep -q\`, or \`test\`, not Markdown backticks or English like "returns 1" / "shows today's date"
+- **Rollback:** git checkout -- <changed-files> before commit, or git revert HEAD --no-edit after commit
+Do not write tasks without Verify and Rollback. Do not use \`true\`, \`echo ok\`, or vague "review manually" verification.
 When done, reply: done.`;
     }
@@ -590,6 +696,7 @@ Rules:
 - Execute ONE step at a time. Verify each step before moving on.
 - Check MAP.md for file locations before grepping.
 - Stay in scope. Only fix the bug described in the lesson — no side quests.
+${SHARED_CHECKOUT_GIT_CONTRACT}
 Read these files first:
 ${readFiles}
@@ -614,6 +721,7 @@ Rules:
 - Check MAP.md for file locations before grepping.
 - If you hit two errors on the same step, stop and flag for re-scope.
 - Stay in scope. Don't touch files outside the task boundary.
+${SHARED_CHECKOUT_GIT_CONTRACT}
 Read these files first:
 ${readFiles}
@@ -684,6 +792,27 @@ If broken beyond quick fix, reply: failed — [reason].`;
   return '';
 }
+/**
+ * Build a clean kebab-case lesson slug from free text. Strips non-alphanumerics
+ * (em-dashes were leaking into slugs verbatim) and truncates at a word boundary
+ * instead of mid-word (e.g. the old `.slice(0, 40)` produced
+ * `verify-fail-per-member-model-selection-—-the-member-`).
+ */
+function lessonSlug(text, maxLen = 40) {
+  const base = String(text || 'unknown')
+    .toLowerCase()
+    .replace(/[^a-z0-9]+/g, '-')
+    .replace(/^-+|-+$/g, '');
+  if (!base) return 'unknown';
+  if (base.length <= maxLen) return base;
+  const cut = base.slice(0, maxLen);
+  const lastDash = cut.lastIndexOf('-');
+  // base[maxLen] continues a word — back up to the last full word.
+  const atBoundary = base[maxLen] === '-';
+  const trimmed = atBoundary ? cut : (lastDash > 0 ? cut.slice(0, lastDash) : cut);
+  return trimmed.replace(/-+$/g, '') || 'unknown';
+}
 /**
  * Write a lesson to atris/lessons.md
  * Appends a line in format: - **[YYYY-MM-DD] slug** — pass/fail — explanation
@@ -802,6 +931,90 @@ function getVerifyCommand(cwd, taskTitle) {
   return { cmd: detectDefaultVerify(cwd), explicit: false };
 }
+function collectExplicitVerifyTasks(cwd) {
+  const todoPath = path.join(cwd, 'atris', 'TODO.md');
+  if (!fs.existsSync(todoPath)) return [];
+  const todo = parseTodo(todoPath);
+  return [...todo.inProgress, ...(todo.review || []), ...todo.backlog, ...todo.completed]
+    .filter((task) => task && task.verify)
+    .map((task) => ({
+      title: task.title,
+      verify: task.verify,
+      key: `${task.title}\0${task.verify}`,
+    }));
+}
+function findNewExplicitVerifyCommand(cwd, beforeKeys) {
+  const prior = beforeKeys instanceof Set ? beforeKeys : new Set(beforeKeys || []);
+  const added = collectExplicitVerifyTasks(cwd).filter((task) => !prior.has(task.key));
+  if (added.length !== 1) return null;
+  return { cmd: added[0].verify, explicit: true, task: added[0].title };
+}
+function shouldAdoptPlannedVerify(kind) {
+  return ['staleness', 'docs', 'review', 'inbox', 'cleanup', 'feature', 'lessons', 'imagined'].includes(kind);
+}
+// Task-plane status vocabulary lint. `atris task list/queue/current --status <s>`
+// only matches raw stored statuses (commands/task.js); `ready` is a TRANSITION
+// (`atris task ready` moves a task to review), so `--status ready` always
+// returns "(no tasks)" — a verify built on it is an unreachable gate; the
+// matching listable form is --status review (lessons.md
+// verify-status-vocabulary, 3rd occurrence 2026-06-10).
+const LISTABLE_TASK_STATUSES = ['open', 'claimed', 'review', 'done', 'failed'];
+const STATUS_CORRECTIONS = { ready: 'review' };
+function lintVerifyTaskStatusVocabulary(text) {
+  // Scan every `atris task list|queue|current` segment (compound verifies
+  // chain with && / || / ;), then pull its --status value if present.
+  const segmentRe = /\batris\s+task\s+(?:list|queue|current)\b([^|&;]*)/g;
+  let segment;
+  while ((segment = segmentRe.exec(text)) !== null) {
+    const statusMatch = /--status[=\s]+["']?([A-Za-z0-9_-]+)["']?/.exec(segment[1]);
+    if (!statusMatch) continue;
+    const status = statusMatch[1];
+    if (LISTABLE_TASK_STATUSES.includes(status)) continue;
+    const vocabulary = LISTABLE_TASK_STATUSES.join('|');
+    const corrected = STATUS_CORRECTIONS[status];
+    const suggestion = corrected
+      ? `use --status ${corrected} instead (atris task ${status} is a transition that lands tasks in ${corrected}, so --status ${status} never matches)`
+      : `use one of --status ${vocabulary}`;
+    return {
+      ok: false,
+      reason: `Verify uses unlistable task status "--status ${status}" — the listable vocabulary is ${vocabulary}; ${suggestion}`,
+    };
+  }
+  return null;
+}
+function validateVerifyCommandShape(cmd) {
+  const text = String(cmd || '').trim();
+  if (!text) return { ok: true };
+  if (text.includes('`')) {
+    return { ok: false, reason: 'Verify contains markdown backticks instead of a raw shell command' };
+  }
+  if (/\b(returns?|shows?|equals?|should|must)\b/i.test(text)) {
+    return { ok: false, reason: 'Verify contains prose expectations instead of shell operators/assertions' };
+  }
+  const statusLint = lintVerifyTaskStatusVocabulary(text);
+  if (statusLint) return statusLint;
+  return { ok: true };
+}
+function haltInvalidVerify(cwd, context, verifyCmd, reason, startedAt, phaseResults = {}) {
+  writeLesson(cwd, 'verify-not-runnable', 'fail',
+    `Verify \`${verifyCmd}\` for "${context.task}" is not a runnable shell command: ${reason}. Tick halted.`);
+  return {
+    outcome: 'halted',
+    reason: 'verify-not-runnable',
+    phaseResults,
+    elapsedSeconds: Math.round((Date.now() - startedAt) / 1000),
+    verifyRan: false,
+    verifyPass: false,
+    verifyCmd,
+  };
+}
 /**
  * Infer a default verify command from the repo shape. Order matters:
  * package.json with a non-stub test script → `npm test`; then pytest/python;
@@ -878,7 +1091,7 @@ Read from disk:
 - atris/lessons.md (recent failures — last 20 lines)
 Decide if the plan is safe to execute. Check:
-1. Verify points at a falsifiable rubric or test (not \`true\`, \`echo ok\`, or similar).
+1. Verify points at a falsifiable raw shell command or rubric (not \`true\`, \`echo ok\`, Markdown backticks, or English like "returns 1" / "shows today's date").
    Prefer \`atris verify <slug> --section <name>\`.
 2. Files are explicitly declared (not empty, not vague).
 3. Rollback is named (commit, checkpoint, or \`git revert\`).
@@ -975,7 +1188,7 @@ function defaultPlanReviewExecutor(prompt, { cwd, timeout = 180000 } = {}) {
     const cmd = `claude -p "$(cat '${tmpFile.replace(/'/g, "'\\''")}')" --allowedTools "Bash,Read,Grep,Glob"`;
     const env = { ...process.env };
     delete env.CLAUDECODE;
-    const output = execSync(cmd, {
+    const output = execPhaseCommandSync(cmd, {
       cwd,
       encoding: 'utf8',
       timeout,
@@ -1004,7 +1217,18 @@ function defaultCodexExecutor(prompt, { cwd, timeout = 180000 } = {}) {
     timeout,
     stdio: 'pipe',
     maxBuffer: 10 * 1024 * 1024,
+    detached: true,
   });
+  // No sh wrapper here, but codex spawns its own children — sweep the group
+  // on timeout so they cannot outlive the wall (same orphan class as the
+  // claude sites; ESRCH means the tree is already dead).
+  if (proc.pid && ((proc.error && proc.error.code === 'ETIMEDOUT') || proc.signal)) {
+    try {
+      process.kill(-proc.pid, 'SIGKILL');
+    } catch (sweepErr) {
+      if (sweepErr.code !== 'ESRCH') throw sweepErr;
+    }
+  }
   if (proc.status !== 0 && !proc.stdout) {
     throw new Error(`codex exited with status ${proc.status}: ${proc.stderr || 'no output'}`);
   }
@@ -1150,6 +1374,216 @@ function appendPlanRejection(cwd, context, review) {
   }
 }
+// ── Timeout reconciliation (T33, endgame loop-self-repair) ─────────────────
+// A do-phase wall-clock timeout kills the reporter, not the work: 12 of 13
+// ETIMEDOUT halts in the 2026-06-10 RSI audit had real commits landed with no
+// receipt, no checked bullet, and a human halt (lessons: executor-timeout-wall,
+// tick-must-mark-own-bullet). These helpers let the tick reconcile from
+// pre-tick HEADs instead of halting when work provably landed.
+function todayJournalPath(cwd) {
+  const now = new Date();
+  const yyyy = now.getFullYear();
+  const mm = String(now.getMonth() + 1).padStart(2, '0');
+  const dd = String(now.getDate()).padStart(2, '0');
+  return {
+    logFile: path.join(cwd, 'atris', 'logs', String(yyyy), `${yyyy}-${mm}-${dd}.md`),
+    dateFormatted: `${yyyy}-${mm}-${dd}`,
+  };
+}
+/**
+ * Normalize text for fuzzy task-title matching: lowercase, strip code spans,
+ * tags, and markdown punctuation down to single-spaced words.
+ */
+function normalizeForMatch(text) {
+  return String(text || '')
+    .toLowerCase()
+    .replace(/`[^`]*`/g, ' ')
+    .replace(/\[[\w-]+\]/g, ' ')
+    .replace(/[^a-z0-9]+/g, ' ')
+    .trim()
+    .replace(/\s+/g, ' ');
+}
+/**
+ * A word-boundary-truncated normalized prefix of the task title, used to find
+ * the task's TODO bullet and journal receipts without exact-string fragility.
+ */
+function taskMatchNeedle(taskTitle, maxLen = 60) {
+  const norm = normalizeForMatch(taskTitle);
+  if (!norm) return '';
+  if (norm.length <= maxLen) return norm;
+  return norm.slice(0, maxLen).replace(/\s+\S*$/, '');
+}
+function gitHeadAt(dir) {
+  try {
+    return execSync('git rev-parse HEAD', { cwd: dir, stdio: ['ignore', 'pipe', 'pipe'], encoding: 'utf8' }).trim();
+  } catch {
+    return null;
+  }
+}
+/**
+ * Snapshot HEAD of the workspace repo plus any sibling repos named in the
+ * task text — both explicit `../atris-cli`-style refs (the journal convention)
+ * and bare sibling-directory names like `atris-cli` that resolve to a git
+ * repo next to cwd. Returns [{ label, dir, head }].
+ */
+function snapshotRepoHeads(cwd, taskText = '') {
+  const root = path.resolve(cwd);
+  const repos = new Map([[root, '.']]);
+  const text = String(taskText || '');
+  for (const ref of text.match(/\.\.\/[A-Za-z0-9._-]+/g) || []) {
+    const dir = path.resolve(cwd, ref);
+    if (dir !== root && fs.existsSync(path.join(dir, '.git'))) repos.set(dir, ref);
+  }
+  for (const tok of text.match(/[A-Za-z][A-Za-z0-9._-]{2,}/g) || []) {
+    const dir = path.resolve(cwd, '..', tok);
+    if (dir !== root && !repos.has(dir) && fs.existsSync(path.join(dir, '.git'))) {
+      repos.set(dir, `../${tok}`);
+    }
+  }
+  return [...repos].map(([dir, label]) => ({ label, dir, head: gitHeadAt(dir) }));
+}
+/**
+ * Re-read HEADs for a prior snapshot; return the repos whose HEAD advanced
+ * as [{ label, dir, before, after }].
+ */
+function diffAdvancedRepoHeads(snapshot) {
+  const advanced = [];
+  for (const repo of snapshot || []) {
+    if (!repo || !repo.head) continue;
+    const after = gitHeadAt(repo.dir);
+    if (after && after !== repo.head) {
+      advanced.push({ label: repo.label, dir: repo.dir, before: repo.head, after });
+    }
+  }
+  return advanced;
+}
+/**
+ * The T31-typed do-phase timeout message thrown by executePhaseDetailed.
+ * Plan/review timeouts stay human halts — only the do phase commits work
+ * worth reconciling.
+ */
+function isDoPhaseTimeoutMessage(message) {
+  return /\bdo phase timed out after\b/.test(String(message || ''));
+}
+/**
+ * Mark the task's TODO bullet `[x]`. Matches the first un-checked,
+ * un-struck bullet whose normalized text contains the normalized title
+ * prefix; `- **T33:** …` becomes `- [x] **T33:** …`, `- [ ]` becomes `- [x]`.
+ * Returns true if a bullet was marked.
+ */
+function markTodoBulletDone(cwd, taskTitle) {
+  const needle = taskMatchNeedle(taskTitle);
+  if (!needle) return false;
+  for (const name of ['TODO.md', 'todo.md']) {
+    const todoPath = path.join(cwd, 'atris', name);
+    if (!fs.existsSync(todoPath)) continue;
+    const lines = fs.readFileSync(todoPath, 'utf8').split('\n');
+    for (let i = 0; i < lines.length; i++) {
+      const bullet = lines[i].match(/^(\s*)- (?:\[( |x)\]\s+)?(.*)$/);
+      if (!bullet) continue;
+      if (bullet[2] === 'x') continue;
+      if (bullet[3].startsWith('~~')) continue;
+      if (!normalizeForMatch(lines[i]).includes(needle)) continue;
+      lines[i] = `${bullet[1]}- [x] ${bullet[3]}`;
+      fs.writeFileSync(todoPath, lines.join('\n'));
+      return true;
+    }
+    return false;
+  }
+  return false;
+}
+/**
+ * Append a block under today's journal `## Notes`, creating the journal file
+ * if the tick dies before any other writer got to it. Never throws.
+ */
+function appendUnderNotes(cwd, block) {
+  try {
+    const { logFile, dateFormatted } = todayJournalPath(cwd);
+    if (!fs.existsSync(logFile)) {
+      const dir = path.dirname(logFile);
+      if (!fs.existsSync(dir)) fs.mkdirSync(dir, { recursive: true });
+      createLogFile(logFile, dateFormatted);
+    }
+    let content = fs.readFileSync(logFile, 'utf8');
+    const notesIdx = content.indexOf('## Notes');
+    if (notesIdx === -1) {
+      content = content.replace(/\s*$/, '') + `\n\n## Notes\n${block}\n`;
+    } else {
+      const eol = content.indexOf('\n', notesIdx);
+      content = content.slice(0, eol + 1) + block + content.slice(eol + 1);
+    }
+    fs.writeFileSync(logFile, content);
+    return true;
+  } catch {
+    return false;
+  }
+}
+function appendTimeoutReconciliation(cwd, { task, advanced }) {
+  const now = new Date().toISOString().slice(0, 16).replace('T', ' ');
+  const repoLines = (advanced || [])
+    .map((r) => `- ${r.label}: ${String(r.before).slice(0, 7)} → ${String(r.after).slice(0, 7)}`)
+    .join('\n');
+  const block =
+    `\n### Timeout reconciliation — ${now} — work-landed-receipt-died\n\n` +
+    `**Task:** ${task}\n` +
+    `**What happened:** the do-phase wall killed the reporter, but commits landed:\n` +
+    `${repoLines}\n` +
+    `Receipt auto-written and the TODO bullet marked; no human halt required.\n`;
+  return appendUnderNotes(cwd, block);
+}
+function appendCheckAndAdvance(cwd, task, receiptLine) {
+  const now = new Date().toISOString().slice(0, 16).replace('T', ' ');
+  const block =
+    `\n### Check-and-advance — ${now} — advanced-already-done\n\n` +
+    `**Task:** ${task}\n` +
+    `**What happened:** verify passed before work started AND today's journal already carries a completion receipt — the work shipped on a prior tick whose reporter died before bookkeeping. Bullet marked, picker advanced.\n` +
+    `**Receipt:** ${receiptLine}\n`;
+  return appendUnderNotes(cwd, block);
+}
+/**
+ * Scan today's journal for a completion receipt naming the task: a `C#`
+ * completed line, a timeout-reconciliation entry, or a `**Task:**` line.
+ * Returns the matching line, or null.
+ */
+function findCompletionReceipt(cwd, taskTitle) {
+  const { logFile } = todayJournalPath(cwd);
+  if (!fs.existsSync(logFile)) return null;
+  const needle = taskMatchNeedle(taskTitle);
+  if (!needle) return null;
+  for (const line of fs.readFileSync(logFile, 'utf8').split('\n')) {
+    const receiptShaped =
+      /\*\*C\d+:\*\*/.test(line) || /\*\*Task:\*\*/.test(line) || /reconciliation/i.test(line);
+    if (receiptShaped && normalizeForMatch(line).includes(needle)) return line.trim();
+  }
+  return null;
+}
+/**
+ * After a do-phase timeout: diff the pre-tick HEAD snapshot. If commits
+ * landed, write the journal reconciliation receipt, mark the TODO bullet, and
+ * report outcome `work-landed-receipt-died`. If nothing landed, the caller
+ * halts exactly as before.
+ */
+function reconcileTimedOutTick(cwd, snapshot, taskTitle) {
+  const advanced = diffAdvancedRepoHeads(snapshot);
+  if (advanced.length === 0) return { reconciled: false, advanced: [] };
+  appendTimeoutReconciliation(cwd, { task: taskTitle, advanced });
+  const bulletMarked = markTodoBulletDone(cwd, taskTitle);
+  return { reconciled: true, outcome: 'work-landed-receipt-died', advanced, bulletMarked };
+}
 function runTaskOnce(context, options = {}) {
   const { verbose = false, cwd = process.cwd() } = options;
@@ -1170,8 +1604,15 @@ function runTaskOnce(context, options = {}) {
   const phaseResults = {};
   const startedAt = Date.now();
-  const verifyResult = getVerifyCommand(cwd, context.task);
-  const verifyCmd = verifyResult.cmd;
+  let verifyResult = getVerifyCommand(cwd, context.task);
+  let verifyCmd = verifyResult.cmd;
+  const explicitVerifyBefore = new Set(
+    collectExplicitVerifyTasks(cwd).map((task) => task.key)
+  );
+  const initialVerifyShape = validateVerifyCommandShape(verifyCmd);
+  if (!initialVerifyShape.ok) {
+    return haltInvalidVerify(cwd, context, verifyCmd, initialVerifyShape.reason, startedAt, phaseResults);
+  }
   // Guard: endgame tasks must have an explicit Verify field.
   // Reactive signals (inbox, staleness, imagined) use npm test as default.
@@ -1203,6 +1644,25 @@ function runTaskOnce(context, options = {}) {
   if (!skipFalsifiability && verifyResult.explicit && context.kind === 'endgame' && verifyCmd) {
     try {
       execSync(verifyCmd, { cwd, stdio: 'pipe', timeout: 300000 });
+      // T33b (lesson: tick-must-mark-own-bullet): a pre-work verify pass WITH
+      // a completion receipt already in today's journal means the work shipped
+      // but the reporter died before bookkeeping. Check the bullet and advance
+      // instead of wedging the picker on verify-not-falsifiable.
+      const receipt = findCompletionReceipt(cwd, context.task);
+      if (receipt) {
+        const bulletMarked = markTodoBulletDone(cwd, context.task);
+        appendCheckAndAdvance(cwd, context.task, receipt);
+        return {
+          outcome: 'advanced-already-done',
+          reason: 'advanced-already-done',
+          receipt,
+          bulletMarked,
+          phaseResults: {},
+          elapsedSeconds: 0,
+          verifyRan: true,
+          verifyPass: true,
+        };
+      }
       writeLesson(cwd, 'verify-not-falsifiable', 'fail',
         `Verify \`${verifyCmd}\` passed before work started on "${context.task}". Either the rubric is trivial or the task is already done. Tick halted.`);
       return {
@@ -1264,6 +1724,18 @@ function runTaskOnce(context, options = {}) {
     }
   }
+  if (!verifyResult.explicit && shouldAdoptPlannedVerify(context.kind)) {
+    const plannedVerify = findNewExplicitVerifyCommand(cwd, explicitVerifyBefore);
+    if (plannedVerify) {
+      verifyResult = plannedVerify;
+      verifyCmd = plannedVerify.cmd;
+    }
+  }
+  const plannedVerifyShape = validateVerifyCommandShape(verifyCmd);
+  if (!plannedVerifyShape.ok) {
+    return haltInvalidVerify(cwd, context, verifyCmd, plannedVerifyShape.reason, startedAt, phaseResults);
+  }
   // Phase: do
   {
     const t0 = Date.now();
@@ -1309,7 +1781,7 @@ function runTaskOnce(context, options = {}) {
         elapsedSeconds: verifyTime,
       };
       try {
-        const slug = (context.task || 'unknown').replace(/\s+/g, '-').toLowerCase().slice(0, 40);
+        const slug = lessonSlug(context.task);
         writeLesson(cwd, `verify-fail-${slug}`, 'fail', `Verify command \`${verifyCmd}\` failed: ${e.message.split('\n')[0]}`);
       } catch { /* lesson write must not crash the tick */ }
     }
@@ -1975,12 +2447,13 @@ function findCodeTodos(cwd) {
   try {
     const out = execFileSync('git', [
       'grep', '-n', '-I', '-E', '(TODO|FIXME)',
-      '--', ':!test/', ':!node_modules/', ':!atris/', ':!**/*.md'
+      '--', ':!test/', ':!node_modules/', ':!atris/', ':!**/_archive/**', ':!**/*.md'
     ], { cwd, encoding: 'utf8', stdio: ['ignore', 'pipe', 'ignore'] });
     const results = [];
     for (const raw of out.split('\n').filter(Boolean)) {
       const m = raw.match(/^([^:]+):(\d+):(.*)$/);
       if (!m) continue;
+      if (m[1].split(/[\\/]/).includes('_archive')) continue;
       const line = m[3];
       // A real TODO is a comment marker at the start of the line (allowing
       // leading indent) followed by TODO/FIXME and at least one word. This
@@ -2160,16 +2633,63 @@ function isLessonResolved(lessonLine, cwd, options = {}) {
   if (!slugMatch) return false;
   const slug = slugMatch[1];
+  if (isCleanMapBrokenRefFailLesson(lessonLine, cwd)) return true;
   // Detector-backed check (typed lesson sidecar)
   const meta = options.meta || loadLessonMetadata(cwd)[slug];
   if (meta && meta.detector) {
     return runLessonDetector(meta.detector, cwd, options.detectorTimeout);
   }
+  if (inlinePythonVerifyFailureNowPasses(lessonLine, cwd, options.detectorTimeout)) return true;
   // Legacy fallback: keyword grep against referenced files.
   return isLessonResolvedLegacy(lessonLine, cwd);
 }
+function isCleanMapBrokenRefFailLesson(lessonLine, cwd) {
+  const text = String(lessonLine || '').toLowerCase();
+  if (!/fix \d+ broken references? in map\.md/.test(text)) return false;
+  return repoMapAuditReportsClean(cwd);
+}
+function extractInlinePythonVerifyFailure(lessonLine) {
+  const commandMatch = String(lessonLine || '').match(/Verify command\s+``([\s\S]*?)``\s+failed/i);
+  if (!commandMatch) return null;
+  const matches = [...commandMatch[1].matchAll(/\b(python3?)\s+-c\s+(["'])([\s\S]*?)\2/g)];
+  const match = matches[matches.length - 1];
+  if (!match) return null;
+  return {
+    executable: match[1],
+    code: match[3].replace(/\\"/g, '"').replace(/\\'/g, "'")
+  };
+}
+function inlinePythonVerifyFailureNowPasses(lessonLine, cwd, timeout = 10000) {
+  const parsed = extractInlinePythonVerifyFailure(lessonLine);
+  if (!parsed) return false;
+  const result = spawnSync(parsed.executable, ['-c', parsed.code], {
+    cwd,
+    encoding: 'utf8',
+    timeout,
+    stdio: ['ignore', 'ignore', 'ignore']
+  });
+  return result.status === 0;
+}
+function legacyLessonFileRefs(lessonLine) {
+  const fileRefs = [];
+  const filePattern = /`([a-zA-Z0-9_/./-]+\.[a-zA-Z]+(?::\d+(?:-\d+)?)?)`/g;
+  let m;
+  while ((m = filePattern.exec(lessonLine)) !== null) {
+    const ref = m[1].replace(/:\d+(-\d+)?$/, '');
+    if (ref.includes('/') || ref.endsWith('.js') || ref.endsWith('.md') || ref.endsWith('.ts')) {
+      fileRefs.push(ref);
+    }
+  }
+  return fileRefs;
+}
 /**
  * The pre-v3.8 resolver — kept as an internal fallback for prose-only lessons
  * that don't have detector metadata yet. Never auto-promotes a prose lesson to
@@ -2182,16 +2702,7 @@ function isLessonResolvedLegacy(lessonLine, cwd) {
   if (!slugMatch) return false;
   const slug = slugMatch[1];
-  // Extract file paths: patterns like `commands/autopilot.js:116` or `commands/run.js:157`
-  const fileRefs = [];
-  const filePattern = /`([a-zA-Z0-9_/./-]+\.[a-zA-Z]+(?::\d+(?:-\d+)?)?)`/g;
-  let m;
-  while ((m = filePattern.exec(lessonLine)) !== null) {
-    const ref = m[1].replace(/:\d+(-\d+)?$/, ''); // strip line numbers
-    if (ref.includes('/') || ref.endsWith('.js') || ref.endsWith('.md') || ref.endsWith('.ts')) {
-      fileRefs.push(ref);
-    }
-  }
+  const fileRefs = legacyLessonFileRefs(lessonLine);
   if (fileRefs.length === 0) return false;
@@ -2274,6 +2785,9 @@ function pickUnresolvedFailLesson(cwd) {
   const candidates = [];
   for (const lesson of lessons) {
     if (lesson.verdict !== 'fail') continue;
+    if (lesson.id === 'verify-not-falsifiable') continue;
+    if (lesson.id === 'no-verify-field') continue;
+    if (lesson.id === 'verify-failed' && lesson.legacy) continue;
     if (lesson.resolvedTag) continue;
     // Typed lesson with explicit status wins — respect the sidecar.
     // `resolved` = done. `observed` = process rule, not a fixable code state.
@@ -2284,6 +2798,7 @@ function pickUnresolvedFailLesson(cwd) {
       if (s === 'resolved' || s === 'observed') continue;
       if (s === 'attempted' && (lesson.meta.attempts || 0) >= MAX_ATTEMPTS) continue;
     }
+    if (lesson.legacy && legacyLessonFileRefs(lesson.line).length === 0) continue;
     // Detector-backed or legacy grep check.
     if (isLessonResolved(lesson.line, cwd, { meta: lesson.meta })) continue;
@@ -2370,7 +2885,7 @@ Reply with the JSON array and nothing else.`;
     const cmd = `claude -p "$(cat '${tmpFile.replace(/'/g, "'\\''")}')"`;
     const env = { ...process.env };
     delete env.CLAUDECODE;
-    output = execSync(cmd, {
+    output = execPhaseCommandSync(cmd, {
       cwd,
       encoding: 'utf8',
       timeout: PHASE_TIMEOUT,
@@ -2378,6 +2893,11 @@ Reply with the JSON array and nothing else.`;
       maxBuffer: 10 * 1024 * 1024,
       env
     }).toString();
+  } catch (err) {
+    if (isPhaseTimeoutError(err)) {
+      throw new Error(`horizon-proposal phase timed out after ${PHASE_TIMEOUT / 1000}s`);
+    }
+    throw err;
   } finally {
     try { fs.unlinkSync(tmpFile); } catch {}
   }
@@ -2658,12 +3178,24 @@ async function autopilotAtris(description, options = {}) {
     const context = {
       task: suggestion.task,
       kind: suggestion.kind,
+      ...(suggestion.files ? { files: suggestion.files } : {}),
       ...(suggestion.lessonLine ? { lessonLine: suggestion.lessonLine } : {}),
       ...(suggestion.lessonSlug ? { lessonSlug: suggestion.lessonSlug } : {}),
       ...(suggestion.lessonDate ? { lessonDate: suggestion.lessonDate } : {})
     };
     const startingEndgame = readEndgameState(cwd);
+    // T33a: snapshot pre-tick HEADs (cwd + sibling repos named in the task)
+    // so a do-phase timeout can be reconciled against what actually landed.
+    let preTickHeads = null;
+    try {
+      const verifyHint = getVerifyCommand(cwd, suggestion.task).cmd || '';
+      preTickHeads = snapshotRepoHeads(
+        cwd,
+        [suggestion.task, ...(suggestion.files || []), verifyHint].join(' ')
+      );
+    } catch { /* snapshot failure must not block the tick */ }
     try {
       if (verbose) {
         console.log('');
@@ -2697,6 +3229,26 @@ async function autopilotAtris(description, options = {}) {
         break;
       }
+      // T33b: the falsifiability gate found a completion receipt — the work
+      // already shipped, the bullet is checked, move straight to the next pick.
+      if (execution.outcome === 'advanced-already-done') {
+        completed++;
+        tickOutcome = 'built';
+        tickOutcomeText = `"${lastTaskTitle}" was already done — verify passed pre-work and today's journal carries its completion receipt, so I checked the bullet and advanced.`;
+        tickNextStep = 'pick the next endgame task';
+        if (verbose) {
+          console.log('  already done (journal receipt found). bullet checked, advancing.');
+        } else {
+          printPlainBlock([
+            'That task was already done — verify passed before work and a completion receipt exists in today\'s journal.',
+            'I checked the bullet and advanced.',
+            '',
+            'Next I will look for the next task.'
+          ].join('\n'));
+        }
+        continue;
+      }
       const planTime = execution.phaseResults.plan.elapsedSeconds;
       if (verbose) console.log(`  planned (${planTime}s)`);
@@ -2758,7 +3310,7 @@ async function autopilotAtris(description, options = {}) {
       // Record commit hash + verify command for retroactive regression checks
       try {
         const commitHash = execSync('git rev-parse HEAD', { cwd, encoding: 'utf8' }).trim();
-        const taskSlug = (suggestion.task || 'unknown').replace(/\s+/g, '-').toLowerCase().slice(0, 40);
+        const taskSlug = lessonSlug(suggestion.task);
         recordTickCommit(cwd, commitHash, execution.verifyCmd || '', taskSlug);
         // Every 10th tick, run retroactive regression check
@@ -2805,6 +3357,36 @@ async function autopilotAtris(description, options = {}) {
       }
     } catch (err) {
+      // T33a: a do-phase timeout with commits landed is a dead reporter, not
+      // dead work — write the reconciliation receipt, mark the bullet, and
+      // record work-landed-receipt-died instead of halting for a human.
+      let reconciliation = null;
+      if (isDoPhaseTimeoutMessage(err.message)) {
+        try {
+          reconciliation = reconcileTimedOutTick(cwd, preTickHeads, lastTaskTitle || suggestion.task);
+        } catch { reconciliation = null; }
+      }
+      if (reconciliation && reconciliation.reconciled) {
+        completed++;
+        const landed = reconciliation.advanced
+          .map((r) => `${r.label} ${String(r.before).slice(0, 7)} → ${String(r.after).slice(0, 7)}`)
+          .join(', ');
+        tickOutcome = 'work-landed-receipt-died';
+        tickOutcomeText = `"${lastTaskTitle}" hit the do-phase wall but commits landed (${landed}). I wrote the reconciliation receipt and marked the bullet — work-landed-receipt-died, no human halt.`;
+        tickNextStep = 'pick the next task';
+        if (verbose) {
+          console.log(`  do phase timed out, but work landed (${landed}). reconciled — no human halt.`);
+        } else {
+          printPlainBlock([
+            'The do phase timed out, but commits landed before the wall.',
+            `Landed: ${landed}.`,
+            'I wrote the reconciliation receipt and marked the task bullet.',
+            '',
+            'Next tick will pick the next task.'
+          ].join('\n'));
+        }
+        break;
+      }
       tickOutcome = 'halted';
       tickOutcomeText = `I hit an error while running "${lastTaskTitle || 'a task'}": ${err.message}`;
       tickNextStep = 'stop until a human looks at the error';
@@ -2988,7 +3570,7 @@ Search the codebase to verify. Reply: YES <reason> or NO <reason>`;
     const env = { ...process.env };
     delete env.CLAUDECODE;
     const cmd = `claude -p "$(cat '${tmpFile.replace(/'/g, "'\\''")}')" --allowedTools "Bash,Read,Glob,Grep"`;
-    const output = execSync(cmd, {
+    const output = execPhaseCommandSync(cmd, {
       cwd,
       encoding: 'utf8',
       timeout: 60000,
@@ -3021,6 +3603,13 @@ async function autopilotFromTodo(options = {}) {
 module.exports = {
   appendTickSummary,
+  snapshotRepoHeads,
+  diffAdvancedRepoHeads,
+  reconcileTimedOutTick,
+  markTodoBulletDone,
+  findCompletionReceipt,
+  isDoPhaseTimeoutMessage,
+  validateVerifyCommandShape,
   askHuman,
   askModel,
   autopilotAtris,
@@ -3052,11 +3641,19 @@ module.exports = {
   proposeCandidateHorizons,
   recordTickCommit,
   regressionCheck,
+  repoMapAuditReportsClean,
+  isCleanMapBrokenRefFailLesson,
+  inlinePythonVerifyFailureNowPasses,
   runPlanReview,
   runTaskOnce,
   buildPlanReviewPrompt,
   parseVerdict,
   scoreEndgameCandidates,
   suggestNextTask,
-  writeLesson
+  shouldSkipAutoHumanGate,
+  writeLesson,
+  isPhaseTimeoutError,
+  execPhaseCommandSync,
+  executePhaseDetailed,
+  lessonSlug
 };