npm - atris - Versions diffs - 3.0.0 → 3.1.0 - Mend

atris 3.0.0 → 3.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

package/README.md +22 -0
package/atris/skills/endgame/SKILL.md +19 -1
package/atris/skills/improve/SKILL.md +65 -62
package/atris/skills/launch/SKILL.md +62 -0
package/atris/skills/tidy/SKILL.md +84 -0
package/bin/atris.js +2 -1
package/commands/autopilot.js +312 -31
package/commands/business.js +149 -32
package/commands/sync.js +9 -5
package/lib/scorecard.js +287 -0
package/lib/todo.js +12 -2
package/package.json +2 -2

package/commands/autopilot.js CHANGED Viewed

@@ -13,6 +13,12 @@ const readline = require('readline');
 const { getLogPath, ensureLogDirectory, createLogFile } = require('../lib/journal');
 const { parseTodo } = require('../lib/todo');
 const { findStalePages, findStaleTasks, healBrokenMapRefs } = require('./clean');
+const {
+  buildScorecardData,
+  readScorecards,
+  writeScorecard,
+  detectEndgameCompletion
+} = require('../lib/scorecard');
 const pkg = require('../package.json');
@@ -204,7 +210,7 @@ async function suggestNextTask(cwd, skipped = new Set()) {
   if (suggestions.length === 0) {
     try {
       const candidates = await proposeCandidateHorizons(cwd);
-      const top = candidates.reduce((best, c) => (c.confidence > best.confidence ? c : best), candidates[0]);
+      const top = scoreEndgameCandidates(cwd, candidates);
       return {
         task: top.title,
         why: top.rationale,
@@ -525,10 +531,54 @@ If broken beyond quick fix, reply: failed — [reason].`;
   return '';
 }
+/**
+ * Write a lesson to atris/lessons.md
+ * Appends a line in format: - **[YYYY-MM-DD] slug** — pass/fail — explanation
+ */
+function writeLesson(cwd, slug, status, explanation) {
+  const lessonsPath = path.join(cwd, 'atris', 'lessons.md');
+  const today = new Date().toISOString().split('T')[0]; // YYYY-MM-DD
+  const lessonLine = `- **[${today}] ${slug}** — ${status} — ${explanation}`;
+  if (!fs.existsSync(lessonsPath)) {
+    fs.writeFileSync(lessonsPath, `# lessons.md — What We Learned\n\n> Append-only. One line per lesson.\n\n---\n\n${lessonLine}\n`);
+    return;
+  }
+  let content = fs.readFileSync(lessonsPath, 'utf8');
+  // Append after the --- separator
+  if (content.includes('---\n')) {
+    content = content.replace(/---\n/, `---\n\n${lessonLine}\n`);
+  } else {
+    content += `\n${lessonLine}\n`;
+  }
+  fs.writeFileSync(lessonsPath, content);
+}
+/**
+ * Get the verify command for a task from TODO.md
+ * Reads TODO.md, finds the task by title across active/completed sections,
+ * and extracts the verify field.
+ * Defaults to 'npm test' if no verify field found.
+ */
+function getVerifyCommand(cwd, taskTitle) {
+  const todoPath = path.join(cwd, 'atris', 'TODO.md');
+  if (!fs.existsSync(todoPath)) return 'npm test';
+  const todo = parseTodo(todoPath);
+  const task = [...todo.inProgress, ...todo.backlog, ...todo.completed]
+    .find(t => t.title === taskTitle);
+  if (!task) return 'npm test';
+  if (task.verify) return task.verify;
+  return 'npm test';
+}
 function runTaskOnce(context, options = {}) {
-  const { verbose = false } = options;
+  const { verbose = false, cwd = process.cwd() } = options;
   const phaseResults = {};
   const startedAt = Date.now();
+  const verifyCmd = getVerifyCommand(cwd, context.task);
   for (const phase of ['plan', 'do', 'review']) {
     const t0 = Date.now();
@@ -542,11 +592,41 @@ function runTaskOnce(context, options = {}) {
   const reviewOutput = phaseResults.review.output || '';
+  // After review succeeds, run verify command if present
+  let verifyPass = false;
+  let verifyRan = false;
+  if (!reviewOutput.includes('failed') && verifyCmd) {
+    verifyRan = true;
+    let t0 = Date.now();
+    try {
+      execSync(verifyCmd, { cwd, stdio: 'pipe' });
+      verifyPass = true;
+      const verifyTime = Math.round((Date.now() - t0) / 1000);
+      phaseResults.verify = {
+        output: `Verify passed (${verifyTime}s)`,
+        elapsedSeconds: verifyTime,
+      };
+    } catch (e) {
+      const verifyTime = Math.round((Date.now() - t0) / 1000);
+      phaseResults.verify = {
+        output: `Verify failed: ${e.message}`,
+        elapsedSeconds: verifyTime,
+      };
+      try {
+        const slug = (context.task || 'unknown').replace(/\s+/g, '-').toLowerCase().slice(0, 40);
+        writeLesson(cwd, `verify-fail-${slug}`, 'fail', `Verify command \`${verifyCmd}\` failed: ${e.message.split('\n')[0]}`);
+      } catch { /* lesson write must not crash the tick */ }
+    }
+  }
   return {
-    success: !reviewOutput.includes('failed'),
+    success: !reviewOutput.includes('failed') && (!verifyRan || verifyPass),
     elapsedSeconds: Math.round((Date.now() - startedAt) / 1000),
     phaseResults,
     reviewOutput,
+    verifyCmd,
+    verifyPass,
+    verifyRan,
   };
 }
@@ -579,6 +659,47 @@ function logCompletion(description) {
   fs.writeFileSync(logFile, content);
 }
+/**
+ * Compute per-tick reward score based on execution signals.
+ * Rewards:
+ *   - commit landed: +1
+ *   - verify passed: +3
+ *   - npm test passed: +2
+ *   - validator clean (review passed): +1
+ *   - halt caught hallucination: -3
+ */
+function computeTickReward(execution, tickOutcome, verifyCmd) {
+  let reward = 0;
+  // Validator clean: review passed without 'failed'
+  if (!execution.reviewOutput || !execution.reviewOutput.includes('failed')) {
+    reward += 1;
+  }
+  // Verify passed: +3
+  if (execution.verifyRan && execution.verifyPass) {
+    reward += 3;
+  }
+  // npm test passed: +2
+  if (execution.verifyRan && execution.verifyPass && verifyCmd === 'npm test') {
+    reward += 2;
+  }
+  // Commit landed: check do phase output for git commit patterns
+  const doOutput = execution.phaseResults.do.output || '';
+  if (doOutput.match(/\[.*\s\d+\sfile.*changed/i) || doOutput.includes('git commit') || doOutput.includes('committed')) {
+    reward += 1;
+  }
+  // Halt caught hallucination: -3
+  if (tickOutcome === 'halted') {
+    reward -= 3;
+  }
+  return reward;
+}
 /**
  * Append a plain-language tick summary block to today's journal `## Notes`.
  * Fields:
@@ -588,9 +709,10 @@ function logCompletion(description) {
  *   - nextStep: what the next tick will do
  *   - idle:     when true, block must contain literal "0 tasks in 0s"
  *               so getIdleTickCount still works.
+ *   - reward:   optional tick reward score (from computeTickReward)
  * Safe to call inside a try/catch — a write failure must never crash a tick.
  */
-function appendTickSummary(cwd, { time, outcome, horizon, nextStep, idle } = {}) {
+function appendTickSummary(cwd, { time, outcome, horizon, nextStep, idle, reward } = {}) {
   const now = new Date();
   const yyyy = now.getFullYear();
   const mm = String(now.getMonth() + 1).padStart(2, '0');
@@ -623,6 +745,10 @@ function appendTickSummary(cwd, { time, outcome, horizon, nextStep, idle } = {})
     `  ${horizonLine}`,
     `  ${nextLine}`,
   ];
+  // Add reward score if present
+  if (reward !== undefined && reward !== null) {
+    blockLines.push(`  Reward: ${reward}`);
+  }
   // Idle marker must be the last non-empty line so getIdleTickCount, which
   // scans bottom-up, counts this block when idle=true.
   if (idleLine) blockLines.push(`  ${idleLine}`);
@@ -644,20 +770,62 @@ function appendTickSummary(cwd, { time, outcome, horizon, nextStep, idle } = {})
 }
 /**
- * Read the current endgame slug from atris/TODO.md. Returns 'unset' on miss.
+ * Read the current endgame state from atris/TODO.md.
  */
-function readHorizonSlug(cwd) {
+function readEndgameState(cwd) {
   try {
     const todoPath = path.join(cwd, 'atris', 'TODO.md');
-    if (!fs.existsSync(todoPath)) return 'unset';
+    if (!fs.existsSync(todoPath)) {
+      return { slug: 'unset', pickedAt: null, horizon: '', remaining: 0, completed: 0 };
+    }
+    const todo = parseTodo(todoPath);
     const content = fs.readFileSync(todoPath, 'utf8');
-    const match = content.match(/\*\*Slug:\*\*\s*(\S+)/);
-    return match ? match[1].trim() : 'unset';
+    const endgameMatch = content.match(/##\s+Endgame\s*\n([\s\S]*?)(?=\n##|$)/);
+    const section = endgameMatch ? endgameMatch[1] : '';
+    const slugMatch = section.match(/\*\*Slug:\*\*\s*(\S+)/);
+    const pickedMatch = section.match(/\*\*Picked:\*\*\s*(.+)/);
+    const horizonMatch = section.match(/\*\*Horizon:\*\*\s*(.+)/);
+    return {
+      slug: slugMatch ? slugMatch[1].trim() : 'unset',
+      pickedAt: pickedMatch ? pickedMatch[1].trim() : null,
+      horizon: horizonMatch ? horizonMatch[1].trim() : '',
+      remaining: todo.backlog.filter(t => t.tag === 'endgame').length
+        + todo.inProgress.filter(t => t.tag === 'endgame').length,
+      completed: todo.completed.filter(t => t.tag === 'endgame').length,
+    };
   } catch {
-    return 'unset';
+    return { slug: 'unset', pickedAt: null, horizon: '', remaining: 0, completed: 0 };
   }
 }
+function readHorizonSlug(cwd) {
+  return readEndgameState(cwd).slug;
+}
+function maybeWriteCompletedEndgameScorecard(cwd, startingEndgame) {
+  if (!startingEndgame || startingEndgame.slug === 'unset' || startingEndgame.remaining === 0) {
+    return false;
+  }
+  const atrisDir = path.join(cwd, 'atris');
+  if (!fs.existsSync(atrisDir)) return false;
+  const { complete, endgameSlug } = detectEndgameCompletion(atrisDir);
+  if (!complete || endgameSlug !== startingEndgame.slug) return false;
+  const alreadyWritten = readScorecards(atrisDir).some(sc => sc.slug === endgameSlug);
+  if (alreadyWritten) return false;
+  const data = buildScorecardData(atrisDir, {
+    slug: endgameSlug,
+    pickedAt: startingEndgame.pickedAt,
+  });
+  writeScorecard(atrisDir, data);
+  return true;
+}
 /**
  * Main loop. Suggest → justify → approve → execute, one at a time.
  */
@@ -737,24 +905,11 @@ function getTickStatus(cwd) {
     }
   }
-  let slug = '(no endgame active — feed inbox or /endgame)';
-  let horizon = '';
-  const todoPath = path.join(atrisDir, 'TODO.md');
-  let remaining = 0;
-  let completedEndgame = 0;
-  if (fs.existsSync(todoPath)) {
-    const todoContent = fs.readFileSync(todoPath, 'utf8');
-    const endgameMatch = todoContent.match(/##\s+Endgame\s*\n([\s\S]*?)(?=\n##|$)/);
-    if (endgameMatch) {
-      const slugMatch = endgameMatch[1].match(/\*\*Slug:\*\*\s*(.+)/);
-      const horizonMatch = endgameMatch[1].match(/\*\*Horizon:\*\*\s*(.+)/);
-      if (slugMatch) slug = slugMatch[1].trim();
-      if (horizonMatch) horizon = horizonMatch[1].trim();
-    }
-    const todo = parseTodo(todoPath);
-    remaining = todo.backlog.filter(t => t.tag === 'endgame').length;
-    completedEndgame = todo.completed.filter(t => /^[A-Z]\d+[a-z]?[:\s]/.test((t.title || '').trim())).length;
-  }
+  const endgame = readEndgameState(cwd);
+  const slug = endgame.slug === 'unset' ? '(no endgame active — feed inbox or /endgame)' : endgame.slug;
+  const horizon = endgame.horizon;
+  const remaining = endgame.remaining;
+  const completedEndgame = endgame.completed;
   const total = remaining + completedEndgame;
   const done = completedEndgame;
@@ -912,6 +1067,93 @@ function getRecentSignals(cwd) {
   return { recentCommits, wikiHealth, recentLessons };
 }
+/**
+ * Score endgame candidates by historical reward of similar horizon types.
+ * Reads last 10 scorecards, infers type from slug prefix, calculates mean
+ * reward per type, scores candidates by expected value, applies 80/20 exploit/explore.
+ *
+ * @param {string} cwd - Current working directory
+ * @param {array} candidates - Array of { title, confidence, rationale }
+ * @returns {object} - Single candidate: { title, confidence, rationale, scored: true, reason }
+ */
+function scoreEndgameCandidates(cwd, candidates) {
+  const atrisDir = path.join(cwd, 'atris');
+  if (!fs.existsSync(atrisDir)) {
+    // No atris folder yet - can't score, return best by confidence
+    const best = candidates.reduce((a, b) => (a.confidence > b.confidence ? a : b), candidates[0]);
+    return { ...best, scored: false, reason: 'no atris folder' };
+  }
+  try {
+    const scorecards = readScorecards(atrisDir).slice(-10); // Last 10
+    if (scorecards.length === 0) {
+      // No scorecards yet - return best by confidence
+      const best = candidates.reduce((a, b) => (a.confidence > b.confidence ? a : b), candidates[0]);
+      return { ...best, scored: false, reason: 'no scorecards' };
+    }
+    // Infer type from slug/title by taking prefix before first dash
+    const typeToRewards = {};
+    for (const sc of scorecards) {
+      const type = sc.slug.split('-')[0];
+      if (!typeToRewards[type]) typeToRewards[type] = [];
+      typeToRewards[type].push(sc.totalReward);
+    }
+    // Calculate mean reward per type
+    const typeMeans = {};
+    for (const [type, rewards] of Object.entries(typeToRewards)) {
+      const mean = rewards.reduce((a, b) => a + b, 0) / rewards.length;
+      typeMeans[type] = mean;
+    }
+    // Score each candidate by expected value based on historical type mean
+    const scored = candidates.map(c => {
+      // Infer type from title keywords that match scorecard slug prefixes
+      const titleLower = (c.title || '').toLowerCase();
+      const cType = Object.keys(typeMeans).find(t => titleLower.includes(t)) || titleLower.split(/[\s\-]+/)[0];
+      const historicalMean = typeMeans[cType] !== undefined ? typeMeans[cType] : 0;
+      const expectedValue = historicalMean * c.confidence;
+      return {
+        ...c,
+        expectedValue,
+        type: cType,
+        historicalMean
+      };
+    });
+    // Sort by expected value (descending)
+    scored.sort((a, b) => b.expectedValue - a.expectedValue);
+    // 80/20 split: 80% exploit (best), 20% explore (random)
+    const choice = Math.random();
+    let selected;
+    if (choice < 0.8) {
+      // Exploit: return highest expected value
+      selected = scored[0];
+    } else {
+      // Explore: return random candidate
+      selected = scored[Math.floor(Math.random() * scored.length)];
+    }
+    const reason = choice < 0.8
+      ? `exploit: type=${selected.type} mean-reward=${selected.historicalMean.toFixed(1)} expected-value=${selected.expectedValue.toFixed(1)}`
+      : `explore: random-candidate type=${selected.type}`;
+    return {
+      title: selected.title,
+      confidence: selected.confidence,
+      rationale: selected.rationale,
+      scored: true,
+      reason
+    };
+  } catch (err) {
+    // If scoring fails, fall back to best by confidence
+    const best = candidates.reduce((a, b) => (a.confidence > b.confidence ? a : b), candidates[0]);
+    return { ...best, scored: false, reason: `scoring error: ${err.message}` };
+  }
+}
 /**
  * Propose 3 candidate next horizons for the autopilot loop. Combines
  * `getIdleTickCount` + `getRecentSignals` into a prompt asking the LLM
@@ -1100,6 +1342,8 @@ async function autopilotAtris(description, options = {}) {
   let tickOutcomeText = 'I stopped for a manual check.';
   let tickNextStep = 'look for new work';
   let lastTaskTitle = null;
+  let lastExecution = null;
+  let lastVerifyCmd = null;
   for (let i = 0; i < maxIterations; i++) {
     // Check time budget
@@ -1212,6 +1456,7 @@ async function autopilotAtris(description, options = {}) {
     // Execute: plan → do → review
     lastTaskTitle = suggestion.task;
     const context = { task: suggestion.task, kind: suggestion.kind };
+    const startingEndgame = readEndgameState(cwd);
     try {
       if (verbose) {
@@ -1224,7 +1469,9 @@ async function autopilotAtris(description, options = {}) {
           'Next I will report what happened and whether review passed.'
         ].join('\n'));
       }
-      const execution = runTaskOnce(context, { verbose });
+      const execution = runTaskOnce(context, { verbose, cwd });
+      lastExecution = execution;
+      lastVerifyCmd = execution.verifyCmd;
       const planTime = execution.phaseResults.plan.elapsedSeconds;
       if (verbose) console.log(`  planned (${planTime}s)`);
@@ -1253,11 +1500,32 @@ async function autopilotAtris(description, options = {}) {
       }
       if (verbose) console.log(`  reviewed (${reviewTime}s)`);
+      // Handle verify failure
+      if (!execution.verifyPass) {
+        tickOutcome = 'halted';
+        tickOutcomeText = `I planned, built, and reviewed "${lastTaskTitle}" but verify failed.`;
+        tickNextStep = 'verify failed, halting';
+        writeLesson(cwd, 'verify-failed', 'fail', `Task "${lastTaskTitle}" passed review but failed verify command.`);
+        if (verbose) {
+          console.log(`  verify failed. stopping for manual check.`);
+        } else {
+          printPlainBlock([
+            `I planned, built, and reviewed the task, but the verify check failed.`,
+            '',
+            'Next I stopped for a manual check.'
+          ].join('\n'));
+        }
+        break;
+      }
       completed++;
       tickOutcome = 'built';
       tickOutcomeText = `I planned, built, and reviewed "${suggestion.task}".`;
       tickNextStep = 'pick the next endgame task';
       logCompletion(suggestion.task);
+      if (maybeWriteCompletedEndgameScorecard(cwd, startingEndgame)) {
+        tickNextStep = 'pick the next horizon';
+      }
       if (verbose) {
         console.log(`  done. ${completed} task${completed > 1 ? 's' : ''} completed.`);
         console.log('');
@@ -1301,12 +1569,20 @@ async function autopilotAtris(description, options = {}) {
       minute: '2-digit'
     }).toLowerCase();
     const idle = tickOutcome === 'idle' || (completed === 0 && tickOutcome !== 'halted');
+    // Compute reward score if we had an execution
+    let tickReward = undefined;
+    if (lastExecution && lastVerifyCmd) {
+      tickReward = computeTickReward(lastExecution, tickOutcome, lastVerifyCmd);
+    }
     appendTickSummary(cwd, {
       time,
       outcome: tickOutcomeText,
       horizon: horizonSlug === 'unset' ? null : horizonSlug,
       nextStep: tickNextStep,
-      idle
+      idle,
+      reward: tickReward
     });
   } catch {
     /* journal write failure must not crash the tick */
@@ -1341,9 +1617,14 @@ module.exports = {
   getIdleTickCount,
   getRecentSignals,
   getTickStatus,
+  getVerifyCommand,
+  computeTickReward,
+  maybeWriteCompletedEndgameScorecard,
   renderHumanSuggestion,
   renderHumanTickIntro,
   proposeCandidateHorizons,
   runTaskOnce,
-  suggestNextTask
+  scoreEndgameCandidates,
+  suggestNextTask,
+  writeLesson
 };