kairn-cli 2.2.5 → 2.2.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli.js +36 -6
- package/dist/cli.js.map +1 -1
- package/package.json +1 -1
package/dist/cli.js
CHANGED
|
@@ -1245,9 +1245,6 @@ async function callLLM(config, userMessage, options) {
|
|
|
1245
1245
|
const messages = [
|
|
1246
1246
|
{ role: "user", content: userMessage }
|
|
1247
1247
|
];
|
|
1248
|
-
if (jsonMode) {
|
|
1249
|
-
messages.push({ role: "assistant", content: "{" });
|
|
1250
|
-
}
|
|
1251
1248
|
try {
|
|
1252
1249
|
const response = await client2.messages.create({
|
|
1253
1250
|
model: config.model,
|
|
@@ -1259,7 +1256,7 @@ async function callLLM(config, userMessage, options) {
|
|
|
1259
1256
|
if (!textBlock || textBlock.type !== "text") {
|
|
1260
1257
|
throw new Error("No text response from compiler LLM");
|
|
1261
1258
|
}
|
|
1262
|
-
return
|
|
1259
|
+
return textBlock.text;
|
|
1263
1260
|
} catch (err) {
|
|
1264
1261
|
throw new Error(classifyError(err, providerName));
|
|
1265
1262
|
}
|
|
@@ -5128,8 +5125,31 @@ async function evolve(workspacePath, tasks, kairnConfig, evolveConfig, onProgres
|
|
|
5128
5125
|
break;
|
|
5129
5126
|
}
|
|
5130
5127
|
onProgress?.({ type: "iteration-start", iteration: iter });
|
|
5131
|
-
const
|
|
5132
|
-
|
|
5128
|
+
const isFirstIter = iter === 0;
|
|
5129
|
+
const isLastIter = iter === evolveConfig.maxIterations - 1;
|
|
5130
|
+
const prevLog = history.length > 0 ? history[history.length - 1] : null;
|
|
5131
|
+
let tasksToRun = tasks;
|
|
5132
|
+
const carriedScores = {};
|
|
5133
|
+
if (!isFirstIter && !isLastIter && prevLog) {
|
|
5134
|
+
tasksToRun = [];
|
|
5135
|
+
for (const task of tasks) {
|
|
5136
|
+
const prevScore = prevLog.taskResults[task.id];
|
|
5137
|
+
const prevValue = prevScore ? prevScore.score ?? (prevScore.pass ? 100 : 0) : 0;
|
|
5138
|
+
if (prevValue >= 100) {
|
|
5139
|
+
carriedScores[task.id] = { pass: true, score: 100 };
|
|
5140
|
+
onProgress?.({
|
|
5141
|
+
type: "task-skipped",
|
|
5142
|
+
iteration: iter,
|
|
5143
|
+
taskId: task.id,
|
|
5144
|
+
message: `Skipped ${task.id} (scored 100% last iteration)`
|
|
5145
|
+
});
|
|
5146
|
+
} else {
|
|
5147
|
+
tasksToRun.push(task);
|
|
5148
|
+
}
|
|
5149
|
+
}
|
|
5150
|
+
}
|
|
5151
|
+
const { results: evalResults, aggregate: evalAggregate } = await evaluateAll(
|
|
5152
|
+
tasksToRun,
|
|
5133
5153
|
harnessPath,
|
|
5134
5154
|
workspacePath,
|
|
5135
5155
|
iter,
|
|
@@ -5138,6 +5158,13 @@ async function evolve(workspacePath, tasks, kairnConfig, evolveConfig, onProgres
|
|
|
5138
5158
|
evolveConfig.runsPerTask,
|
|
5139
5159
|
evolveConfig.parallelTasks
|
|
5140
5160
|
);
|
|
5161
|
+
const results = { ...carriedScores, ...evalResults };
|
|
5162
|
+
const allScores = Object.values(results);
|
|
5163
|
+
const total = allScores.reduce(
|
|
5164
|
+
(sum, s) => sum + (s.score ?? (s.pass ? 100 : 0)),
|
|
5165
|
+
0
|
|
5166
|
+
);
|
|
5167
|
+
const aggregate = allScores.length > 0 ? total / allScores.length : 0;
|
|
5141
5168
|
onProgress?.({ type: "iteration-scored", iteration: iter, score: aggregate });
|
|
5142
5169
|
if (iter === 0) baselineScore = aggregate;
|
|
5143
5170
|
if (iter > 0 && aggregate < bestScore) {
|
|
@@ -5844,6 +5871,9 @@ evolveCommand.command("run").description("Run tasks against the current harness"
|
|
|
5844
5871
|
case "task-run":
|
|
5845
5872
|
console.log(chalk14.dim(` ${event.message ?? ""}`));
|
|
5846
5873
|
break;
|
|
5874
|
+
case "task-skipped":
|
|
5875
|
+
console.log(chalk14.dim(` SKIP ${event.taskId ?? "unknown"} (100% last iteration)`));
|
|
5876
|
+
break;
|
|
5847
5877
|
case "task-scored": {
|
|
5848
5878
|
const taskScore = event.score ?? 0;
|
|
5849
5879
|
const taskStatus = taskScore >= 100 ? chalk14.green("PASS") : taskScore >= 60 ? chalk14.yellow("PARTIAL") : chalk14.red("FAIL");
|