npm - kairn-cli - Versions diffs - 2.2.6 → 2.2.8 - Mend

kairn-cli 2.2.6 → 2.2.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

package/dist/cli.js CHANGED Viewed

@@ -4748,6 +4748,13 @@ the agent lacks a tool it needs, or has tools that add noise without benefit.
 - Consider both additions AND removals. Remove sections that add noise without improving task performance.
 - Bloated harnesses hurt performance \u2014 trim what isn't earning its keep.
+## Anti-Gaming (CRITICAL)
+- Mutations must improve GENERAL-PURPOSE development quality, not target specific eval criteria.
+- You do NOT have access to scoring rubrics or expected outcomes. Diagnose problems from traces only.
+- Do NOT add over-specified rules that restate existing conventions with stronger emphasis (e.g., changing "use chalk.green for success" to "MUST use chalk.green, no exceptions"). If a convention already exists, trust it.
+- Do NOT add rules that only apply to a narrow eval scenario (e.g., write permissions for a specific directory just because one task needed it).
+- Ask: "Would this mutation help a developer working on ANY task in this project?" If not, don't propose it.
 Return ONLY valid JSON.`;
 var STDOUT_TRUNCATION_LIMIT = 1e3;
 var MAX_CONTEXT_CHARS = 1e5;
@@ -4806,8 +4813,6 @@ ${content}
         `### Task: ${task.id}
 - Template: ${task.template}
 - Description: ${task.description}
-- Expected outcome: ${Array.isArray(task.expected_outcome) ? task.expected_outcome.join("; ") : task.expected_outcome}
-- Scoring: ${task.scoring}
 `
       );
     }
@@ -5125,8 +5130,31 @@ async function evolve(workspacePath, tasks, kairnConfig, evolveConfig, onProgres
       break;
     }
     onProgress?.({ type: "iteration-start", iteration: iter });
-    const { results, aggregate } = await evaluateAll(
-      tasks,
+    const isFirstIter = iter === 0;
+    const isLastIter = iter === evolveConfig.maxIterations - 1;
+    const prevLog = history.length > 0 ? history[history.length - 1] : null;
+    let tasksToRun = tasks;
+    const carriedScores = {};
+    if (!isFirstIter && !isLastIter && prevLog) {
+      tasksToRun = [];
+      for (const task of tasks) {
+        const prevScore = prevLog.taskResults[task.id];
+        const prevValue = prevScore ? prevScore.score ?? (prevScore.pass ? 100 : 0) : 0;
+        if (prevValue >= 100) {
+          carriedScores[task.id] = { pass: true, score: 100 };
+          onProgress?.({
+            type: "task-skipped",
+            iteration: iter,
+            taskId: task.id,
+            message: `Skipped ${task.id} (scored 100% last iteration)`
+          });
+        } else {
+          tasksToRun.push(task);
+        }
+      }
+    }
+    const { results: evalResults, aggregate: evalAggregate } = await evaluateAll(
+      tasksToRun,
       harnessPath,
       workspacePath,
       iter,
@@ -5135,6 +5163,13 @@ async function evolve(workspacePath, tasks, kairnConfig, evolveConfig, onProgres
       evolveConfig.runsPerTask,
       evolveConfig.parallelTasks
     );
+    const results = { ...carriedScores, ...evalResults };
+    const allScores = Object.values(results);
+    const total = allScores.reduce(
+      (sum, s) => sum + (s.score ?? (s.pass ? 100 : 0)),
+      0
+    );
+    const aggregate = allScores.length > 0 ? total / allScores.length : 0;
     onProgress?.({ type: "iteration-scored", iteration: iter, score: aggregate });
     if (iter === 0) baselineScore = aggregate;
     if (iter > 0 && aggregate < bestScore) {
@@ -5841,6 +5876,9 @@ evolveCommand.command("run").description("Run tasks against the current harness"
           case "task-run":
             console.log(chalk14.dim(`      ${event.message ?? ""}`));
             break;
+          case "task-skipped":
+            console.log(chalk14.dim(`    SKIP  ${event.taskId ?? "unknown"} (100% last iteration)`));
+            break;
           case "task-scored": {
             const taskScore = event.score ?? 0;
             const taskStatus = taskScore >= 100 ? chalk14.green("PASS") : taskScore >= 60 ? chalk14.yellow("PARTIAL") : chalk14.red("FAIL");