kairn-cli 2.2.4 → 2.2.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli.js +53 -10
- package/dist/cli.js.map +1 -1
- package/package.json +1 -1
package/dist/cli.js
CHANGED
|
@@ -4569,17 +4569,47 @@ function parseToolCalls(stdout) {
|
|
|
4569
4569
|
return [];
|
|
4570
4570
|
}
|
|
4571
4571
|
}
|
|
4572
|
+
async function runWithConcurrency(tasks, limit) {
|
|
4573
|
+
const results = new Array(tasks.length);
|
|
4574
|
+
const executing = /* @__PURE__ */ new Set();
|
|
4575
|
+
const errors = [];
|
|
4576
|
+
const effectiveLimit = Math.max(1, limit);
|
|
4577
|
+
for (let i = 0; i < tasks.length; i++) {
|
|
4578
|
+
const p = tasks[i]().then(
|
|
4579
|
+
(result) => {
|
|
4580
|
+
results[i] = result;
|
|
4581
|
+
},
|
|
4582
|
+
(err) => {
|
|
4583
|
+
errors.push(err);
|
|
4584
|
+
}
|
|
4585
|
+
);
|
|
4586
|
+
const tracked = p.then(() => {
|
|
4587
|
+
executing.delete(tracked);
|
|
4588
|
+
});
|
|
4589
|
+
executing.add(tracked);
|
|
4590
|
+
if (executing.size >= effectiveLimit) {
|
|
4591
|
+
await Promise.race(executing);
|
|
4592
|
+
}
|
|
4593
|
+
}
|
|
4594
|
+
await Promise.all(executing);
|
|
4595
|
+
if (errors.length > 0) {
|
|
4596
|
+
throw errors[0];
|
|
4597
|
+
}
|
|
4598
|
+
return results;
|
|
4599
|
+
}
|
|
4572
4600
|
function computeStddev(values, mean) {
|
|
4573
4601
|
if (values.length <= 1) return 0;
|
|
4574
4602
|
const sumSqDiffs = values.reduce((sum, v) => sum + (v - mean) ** 2, 0);
|
|
4575
4603
|
return Math.sqrt(sumSqDiffs / values.length);
|
|
4576
4604
|
}
|
|
4577
|
-
async function evaluateAll(tasks, harnessPath, workspacePath, iteration, config, onProgress, runsPerTask = 1) {
|
|
4605
|
+
async function evaluateAll(tasks, harnessPath, workspacePath, iteration, config, onProgress, runsPerTask = 1, parallelTasks = 1) {
|
|
4578
4606
|
const results = {};
|
|
4579
4607
|
const projectRoot = path18.resolve(workspacePath, "..");
|
|
4580
4608
|
const effectiveRuns = Math.max(1, runsPerTask);
|
|
4581
|
-
|
|
4609
|
+
const concurrency = Math.max(1, parallelTasks);
|
|
4610
|
+
const evaluateTask = async (task) => {
|
|
4582
4611
|
onProgress?.({ type: "task-start", iteration, taskId: task.id });
|
|
4612
|
+
let finalScore;
|
|
4583
4613
|
if (effectiveRuns > 1 && config) {
|
|
4584
4614
|
const runScores = [];
|
|
4585
4615
|
let passCount = 0;
|
|
@@ -4606,7 +4636,7 @@ async function evaluateAll(tasks, harnessPath, workspacePath, iteration, config,
|
|
|
4606
4636
|
}
|
|
4607
4637
|
const mean = runScores.reduce((a, b) => a + b, 0) / runScores.length;
|
|
4608
4638
|
const stddev = computeStddev(runScores, mean);
|
|
4609
|
-
|
|
4639
|
+
finalScore = {
|
|
4610
4640
|
pass: passCount > effectiveRuns / 2,
|
|
4611
4641
|
score: mean,
|
|
4612
4642
|
details: `Mean of ${effectiveRuns} runs`,
|
|
@@ -4625,22 +4655,28 @@ async function evaluateAll(tasks, harnessPath, workspacePath, iteration, config,
|
|
|
4625
4655
|
task.id
|
|
4626
4656
|
);
|
|
4627
4657
|
const taskResult = await runTask(task, harnessPath, traceDir, iteration, projectRoot);
|
|
4628
|
-
|
|
4658
|
+
finalScore = taskResult.score;
|
|
4629
4659
|
if (config) {
|
|
4630
4660
|
const stdout = await fs18.readFile(path18.join(traceDir, "stdout.log"), "utf-8").catch(() => "");
|
|
4631
4661
|
const stderr = await fs18.readFile(path18.join(traceDir, "stderr.log"), "utf-8").catch(() => "");
|
|
4632
|
-
|
|
4633
|
-
await writeScore(traceDir,
|
|
4662
|
+
finalScore = await scoreTask(task, traceDir, stdout, stderr, config);
|
|
4663
|
+
await writeScore(traceDir, finalScore);
|
|
4634
4664
|
}
|
|
4635
|
-
results[task.id] = score;
|
|
4636
4665
|
}
|
|
4637
|
-
const finalScore = results[task.id];
|
|
4638
4666
|
onProgress?.({
|
|
4639
4667
|
type: "task-scored",
|
|
4640
4668
|
iteration,
|
|
4641
4669
|
taskId: task.id,
|
|
4642
4670
|
score: finalScore.score ?? (finalScore.pass ? 100 : 0)
|
|
4643
4671
|
});
|
|
4672
|
+
return { id: task.id, score: finalScore };
|
|
4673
|
+
};
|
|
4674
|
+
const taskResults = await runWithConcurrency(
|
|
4675
|
+
tasks.map((task) => () => evaluateTask(task)),
|
|
4676
|
+
concurrency
|
|
4677
|
+
);
|
|
4678
|
+
for (const { id, score } of taskResults) {
|
|
4679
|
+
results[id] = score;
|
|
4644
4680
|
}
|
|
4645
4681
|
const scores = Object.values(results);
|
|
4646
4682
|
const total = scores.reduce(
|
|
@@ -5099,7 +5135,8 @@ async function evolve(workspacePath, tasks, kairnConfig, evolveConfig, onProgres
|
|
|
5099
5135
|
iter,
|
|
5100
5136
|
kairnConfig,
|
|
5101
5137
|
onProgress,
|
|
5102
|
-
evolveConfig.runsPerTask
|
|
5138
|
+
evolveConfig.runsPerTask,
|
|
5139
|
+
evolveConfig.parallelTasks
|
|
5103
5140
|
);
|
|
5104
5141
|
onProgress?.({ type: "iteration-scored", iteration: iter, score: aggregate });
|
|
5105
5142
|
if (iter === 0) baselineScore = aggregate;
|
|
@@ -5689,7 +5726,7 @@ evolveCommand.command("baseline").description("Snapshot current .claude/ directo
|
|
|
5689
5726
|
process.exit(1);
|
|
5690
5727
|
}
|
|
5691
5728
|
});
|
|
5692
|
-
evolveCommand.command("run").description("Run tasks against the current harness").option("--task <id>", "Run a specific task by ID").option("--iterations <n>", "Number of evolution iterations", "5").option("--runs <n>", "Run each task N times for variance measurement", "1").action(async (options) => {
|
|
5729
|
+
evolveCommand.command("run").description("Run tasks against the current harness").option("--task <id>", "Run a specific task by ID").option("--iterations <n>", "Number of evolution iterations", "5").option("--runs <n>", "Run each task N times for variance measurement", "1").option("--parallel <n>", "Run up to N tasks concurrently", "1").action(async (options) => {
|
|
5693
5730
|
try {
|
|
5694
5731
|
const projectRoot = process.cwd();
|
|
5695
5732
|
const workspace = path24.join(projectRoot, ".kairn-evolve");
|
|
@@ -5764,6 +5801,12 @@ evolveCommand.command("run").description("Run tasks against the current harness"
|
|
|
5764
5801
|
process.exit(1);
|
|
5765
5802
|
}
|
|
5766
5803
|
evolveConfig.runsPerTask = runs;
|
|
5804
|
+
const parallel = parseInt(options.parallel ?? "1", 10);
|
|
5805
|
+
if (isNaN(parallel) || parallel < 1) {
|
|
5806
|
+
console.log(ui.error("--parallel must be a positive integer"));
|
|
5807
|
+
process.exit(1);
|
|
5808
|
+
}
|
|
5809
|
+
evolveConfig.parallelTasks = parallel;
|
|
5767
5810
|
try {
|
|
5768
5811
|
await fs24.access(path24.join(workspace, "iterations", "0", "harness"));
|
|
5769
5812
|
} catch {
|