kairn-cli 2.2.5 → 2.2.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.js CHANGED
@@ -1245,9 +1245,6 @@ async function callLLM(config, userMessage, options) {
1245
1245
  const messages = [
1246
1246
  { role: "user", content: userMessage }
1247
1247
  ];
1248
- if (jsonMode) {
1249
- messages.push({ role: "assistant", content: "{" });
1250
- }
1251
1248
  try {
1252
1249
  const response = await client2.messages.create({
1253
1250
  model: config.model,
@@ -1259,7 +1256,7 @@ async function callLLM(config, userMessage, options) {
1259
1256
  if (!textBlock || textBlock.type !== "text") {
1260
1257
  throw new Error("No text response from compiler LLM");
1261
1258
  }
1262
- return jsonMode ? `{${textBlock.text}` : textBlock.text;
1259
+ return textBlock.text;
1263
1260
  } catch (err) {
1264
1261
  throw new Error(classifyError(err, providerName));
1265
1262
  }
@@ -5128,8 +5125,31 @@ async function evolve(workspacePath, tasks, kairnConfig, evolveConfig, onProgres
5128
5125
  break;
5129
5126
  }
5130
5127
  onProgress?.({ type: "iteration-start", iteration: iter });
5131
- const { results, aggregate } = await evaluateAll(
5132
- tasks,
5128
+ const isFirstIter = iter === 0;
5129
+ const isLastIter = iter === evolveConfig.maxIterations - 1;
5130
+ const prevLog = history.length > 0 ? history[history.length - 1] : null;
5131
+ let tasksToRun = tasks;
5132
+ const carriedScores = {};
5133
+ if (!isFirstIter && !isLastIter && prevLog) {
5134
+ tasksToRun = [];
5135
+ for (const task of tasks) {
5136
+ const prevScore = prevLog.taskResults[task.id];
5137
+ const prevValue = prevScore ? prevScore.score ?? (prevScore.pass ? 100 : 0) : 0;
5138
+ if (prevValue >= 100) {
5139
+ carriedScores[task.id] = { pass: true, score: 100 };
5140
+ onProgress?.({
5141
+ type: "task-skipped",
5142
+ iteration: iter,
5143
+ taskId: task.id,
5144
+ message: `Skipped ${task.id} (scored 100% last iteration)`
5145
+ });
5146
+ } else {
5147
+ tasksToRun.push(task);
5148
+ }
5149
+ }
5150
+ }
5151
+ const { results: evalResults, aggregate: evalAggregate } = await evaluateAll(
5152
+ tasksToRun,
5133
5153
  harnessPath,
5134
5154
  workspacePath,
5135
5155
  iter,
@@ -5138,6 +5158,13 @@ async function evolve(workspacePath, tasks, kairnConfig, evolveConfig, onProgres
5138
5158
  evolveConfig.runsPerTask,
5139
5159
  evolveConfig.parallelTasks
5140
5160
  );
5161
+ const results = { ...carriedScores, ...evalResults };
5162
+ const allScores = Object.values(results);
5163
+ const total = allScores.reduce(
5164
+ (sum, s) => sum + (s.score ?? (s.pass ? 100 : 0)),
5165
+ 0
5166
+ );
5167
+ const aggregate = allScores.length > 0 ? total / allScores.length : 0;
5141
5168
  onProgress?.({ type: "iteration-scored", iteration: iter, score: aggregate });
5142
5169
  if (iter === 0) baselineScore = aggregate;
5143
5170
  if (iter > 0 && aggregate < bestScore) {
@@ -5844,6 +5871,9 @@ evolveCommand.command("run").description("Run tasks against the current harness"
5844
5871
  case "task-run":
5845
5872
  console.log(chalk14.dim(` ${event.message ?? ""}`));
5846
5873
  break;
5874
+ case "task-skipped":
5875
+ console.log(chalk14.dim(` SKIP ${event.taskId ?? "unknown"} (100% last iteration)`));
5876
+ break;
5847
5877
  case "task-scored": {
5848
5878
  const taskScore = event.score ?? 0;
5849
5879
  const taskStatus = taskScore >= 100 ? chalk14.green("PASS") : taskScore >= 60 ? chalk14.yellow("PARTIAL") : chalk14.red("FAIL");