kairn-cli 2.2.4 → 2.2.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli.js +54 -14
- package/dist/cli.js.map +1 -1
- package/package.json +1 -1
package/dist/cli.js
CHANGED
|
@@ -1245,9 +1245,6 @@ async function callLLM(config, userMessage, options) {
|
|
|
1245
1245
|
const messages = [
|
|
1246
1246
|
{ role: "user", content: userMessage }
|
|
1247
1247
|
];
|
|
1248
|
-
if (jsonMode) {
|
|
1249
|
-
messages.push({ role: "assistant", content: "{" });
|
|
1250
|
-
}
|
|
1251
1248
|
try {
|
|
1252
1249
|
const response = await client2.messages.create({
|
|
1253
1250
|
model: config.model,
|
|
@@ -1259,7 +1256,7 @@ async function callLLM(config, userMessage, options) {
|
|
|
1259
1256
|
if (!textBlock || textBlock.type !== "text") {
|
|
1260
1257
|
throw new Error("No text response from compiler LLM");
|
|
1261
1258
|
}
|
|
1262
|
-
return
|
|
1259
|
+
return textBlock.text;
|
|
1263
1260
|
} catch (err) {
|
|
1264
1261
|
throw new Error(classifyError(err, providerName));
|
|
1265
1262
|
}
|
|
@@ -4569,17 +4566,47 @@ function parseToolCalls(stdout) {
|
|
|
4569
4566
|
return [];
|
|
4570
4567
|
}
|
|
4571
4568
|
}
|
|
4569
|
+
async function runWithConcurrency(tasks, limit) {
|
|
4570
|
+
const results = new Array(tasks.length);
|
|
4571
|
+
const executing = /* @__PURE__ */ new Set();
|
|
4572
|
+
const errors = [];
|
|
4573
|
+
const effectiveLimit = Math.max(1, limit);
|
|
4574
|
+
for (let i = 0; i < tasks.length; i++) {
|
|
4575
|
+
const p = tasks[i]().then(
|
|
4576
|
+
(result) => {
|
|
4577
|
+
results[i] = result;
|
|
4578
|
+
},
|
|
4579
|
+
(err) => {
|
|
4580
|
+
errors.push(err);
|
|
4581
|
+
}
|
|
4582
|
+
);
|
|
4583
|
+
const tracked = p.then(() => {
|
|
4584
|
+
executing.delete(tracked);
|
|
4585
|
+
});
|
|
4586
|
+
executing.add(tracked);
|
|
4587
|
+
if (executing.size >= effectiveLimit) {
|
|
4588
|
+
await Promise.race(executing);
|
|
4589
|
+
}
|
|
4590
|
+
}
|
|
4591
|
+
await Promise.all(executing);
|
|
4592
|
+
if (errors.length > 0) {
|
|
4593
|
+
throw errors[0];
|
|
4594
|
+
}
|
|
4595
|
+
return results;
|
|
4596
|
+
}
|
|
4572
4597
|
function computeStddev(values, mean) {
|
|
4573
4598
|
if (values.length <= 1) return 0;
|
|
4574
4599
|
const sumSqDiffs = values.reduce((sum, v) => sum + (v - mean) ** 2, 0);
|
|
4575
4600
|
return Math.sqrt(sumSqDiffs / values.length);
|
|
4576
4601
|
}
|
|
4577
|
-
async function evaluateAll(tasks, harnessPath, workspacePath, iteration, config, onProgress, runsPerTask = 1) {
|
|
4602
|
+
async function evaluateAll(tasks, harnessPath, workspacePath, iteration, config, onProgress, runsPerTask = 1, parallelTasks = 1) {
|
|
4578
4603
|
const results = {};
|
|
4579
4604
|
const projectRoot = path18.resolve(workspacePath, "..");
|
|
4580
4605
|
const effectiveRuns = Math.max(1, runsPerTask);
|
|
4581
|
-
|
|
4606
|
+
const concurrency = Math.max(1, parallelTasks);
|
|
4607
|
+
const evaluateTask = async (task) => {
|
|
4582
4608
|
onProgress?.({ type: "task-start", iteration, taskId: task.id });
|
|
4609
|
+
let finalScore;
|
|
4583
4610
|
if (effectiveRuns > 1 && config) {
|
|
4584
4611
|
const runScores = [];
|
|
4585
4612
|
let passCount = 0;
|
|
@@ -4606,7 +4633,7 @@ async function evaluateAll(tasks, harnessPath, workspacePath, iteration, config,
|
|
|
4606
4633
|
}
|
|
4607
4634
|
const mean = runScores.reduce((a, b) => a + b, 0) / runScores.length;
|
|
4608
4635
|
const stddev = computeStddev(runScores, mean);
|
|
4609
|
-
|
|
4636
|
+
finalScore = {
|
|
4610
4637
|
pass: passCount > effectiveRuns / 2,
|
|
4611
4638
|
score: mean,
|
|
4612
4639
|
details: `Mean of ${effectiveRuns} runs`,
|
|
@@ -4625,22 +4652,28 @@ async function evaluateAll(tasks, harnessPath, workspacePath, iteration, config,
|
|
|
4625
4652
|
task.id
|
|
4626
4653
|
);
|
|
4627
4654
|
const taskResult = await runTask(task, harnessPath, traceDir, iteration, projectRoot);
|
|
4628
|
-
|
|
4655
|
+
finalScore = taskResult.score;
|
|
4629
4656
|
if (config) {
|
|
4630
4657
|
const stdout = await fs18.readFile(path18.join(traceDir, "stdout.log"), "utf-8").catch(() => "");
|
|
4631
4658
|
const stderr = await fs18.readFile(path18.join(traceDir, "stderr.log"), "utf-8").catch(() => "");
|
|
4632
|
-
|
|
4633
|
-
await writeScore(traceDir,
|
|
4659
|
+
finalScore = await scoreTask(task, traceDir, stdout, stderr, config);
|
|
4660
|
+
await writeScore(traceDir, finalScore);
|
|
4634
4661
|
}
|
|
4635
|
-
results[task.id] = score;
|
|
4636
4662
|
}
|
|
4637
|
-
const finalScore = results[task.id];
|
|
4638
4663
|
onProgress?.({
|
|
4639
4664
|
type: "task-scored",
|
|
4640
4665
|
iteration,
|
|
4641
4666
|
taskId: task.id,
|
|
4642
4667
|
score: finalScore.score ?? (finalScore.pass ? 100 : 0)
|
|
4643
4668
|
});
|
|
4669
|
+
return { id: task.id, score: finalScore };
|
|
4670
|
+
};
|
|
4671
|
+
const taskResults = await runWithConcurrency(
|
|
4672
|
+
tasks.map((task) => () => evaluateTask(task)),
|
|
4673
|
+
concurrency
|
|
4674
|
+
);
|
|
4675
|
+
for (const { id, score } of taskResults) {
|
|
4676
|
+
results[id] = score;
|
|
4644
4677
|
}
|
|
4645
4678
|
const scores = Object.values(results);
|
|
4646
4679
|
const total = scores.reduce(
|
|
@@ -5099,7 +5132,8 @@ async function evolve(workspacePath, tasks, kairnConfig, evolveConfig, onProgres
|
|
|
5099
5132
|
iter,
|
|
5100
5133
|
kairnConfig,
|
|
5101
5134
|
onProgress,
|
|
5102
|
-
evolveConfig.runsPerTask
|
|
5135
|
+
evolveConfig.runsPerTask,
|
|
5136
|
+
evolveConfig.parallelTasks
|
|
5103
5137
|
);
|
|
5104
5138
|
onProgress?.({ type: "iteration-scored", iteration: iter, score: aggregate });
|
|
5105
5139
|
if (iter === 0) baselineScore = aggregate;
|
|
@@ -5689,7 +5723,7 @@ evolveCommand.command("baseline").description("Snapshot current .claude/ directo
|
|
|
5689
5723
|
process.exit(1);
|
|
5690
5724
|
}
|
|
5691
5725
|
});
|
|
5692
|
-
evolveCommand.command("run").description("Run tasks against the current harness").option("--task <id>", "Run a specific task by ID").option("--iterations <n>", "Number of evolution iterations", "5").option("--runs <n>", "Run each task N times for variance measurement", "1").action(async (options) => {
|
|
5726
|
+
evolveCommand.command("run").description("Run tasks against the current harness").option("--task <id>", "Run a specific task by ID").option("--iterations <n>", "Number of evolution iterations", "5").option("--runs <n>", "Run each task N times for variance measurement", "1").option("--parallel <n>", "Run up to N tasks concurrently", "1").action(async (options) => {
|
|
5693
5727
|
try {
|
|
5694
5728
|
const projectRoot = process.cwd();
|
|
5695
5729
|
const workspace = path24.join(projectRoot, ".kairn-evolve");
|
|
@@ -5764,6 +5798,12 @@ evolveCommand.command("run").description("Run tasks against the current harness"
|
|
|
5764
5798
|
process.exit(1);
|
|
5765
5799
|
}
|
|
5766
5800
|
evolveConfig.runsPerTask = runs;
|
|
5801
|
+
const parallel = parseInt(options.parallel ?? "1", 10);
|
|
5802
|
+
if (isNaN(parallel) || parallel < 1) {
|
|
5803
|
+
console.log(ui.error("--parallel must be a positive integer"));
|
|
5804
|
+
process.exit(1);
|
|
5805
|
+
}
|
|
5806
|
+
evolveConfig.parallelTasks = parallel;
|
|
5767
5807
|
try {
|
|
5768
5808
|
await fs24.access(path24.join(workspace, "iterations", "0", "harness"));
|
|
5769
5809
|
} catch {
|