kairn-cli 2.2.4 → 2.2.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.js CHANGED
@@ -1245,9 +1245,6 @@ async function callLLM(config, userMessage, options) {
1245
1245
  const messages = [
1246
1246
  { role: "user", content: userMessage }
1247
1247
  ];
1248
- if (jsonMode) {
1249
- messages.push({ role: "assistant", content: "{" });
1250
- }
1251
1248
  try {
1252
1249
  const response = await client2.messages.create({
1253
1250
  model: config.model,
@@ -1259,7 +1256,7 @@ async function callLLM(config, userMessage, options) {
1259
1256
  if (!textBlock || textBlock.type !== "text") {
1260
1257
  throw new Error("No text response from compiler LLM");
1261
1258
  }
1262
- return jsonMode ? `{${textBlock.text}` : textBlock.text;
1259
+ return textBlock.text;
1263
1260
  } catch (err) {
1264
1261
  throw new Error(classifyError(err, providerName));
1265
1262
  }
@@ -4569,17 +4566,47 @@ function parseToolCalls(stdout) {
4569
4566
  return [];
4570
4567
  }
4571
4568
  }
4569
+ async function runWithConcurrency(tasks, limit) {
4570
+ const results = new Array(tasks.length);
4571
+ const executing = /* @__PURE__ */ new Set();
4572
+ const errors = [];
4573
+ const effectiveLimit = Math.max(1, limit);
4574
+ for (let i = 0; i < tasks.length; i++) {
4575
+ const p = tasks[i]().then(
4576
+ (result) => {
4577
+ results[i] = result;
4578
+ },
4579
+ (err) => {
4580
+ errors.push(err);
4581
+ }
4582
+ );
4583
+ const tracked = p.then(() => {
4584
+ executing.delete(tracked);
4585
+ });
4586
+ executing.add(tracked);
4587
+ if (executing.size >= effectiveLimit) {
4588
+ await Promise.race(executing);
4589
+ }
4590
+ }
4591
+ await Promise.all(executing);
4592
+ if (errors.length > 0) {
4593
+ throw errors[0];
4594
+ }
4595
+ return results;
4596
+ }
4572
4597
  function computeStddev(values, mean) {
4573
4598
  if (values.length <= 1) return 0;
4574
4599
  const sumSqDiffs = values.reduce((sum, v) => sum + (v - mean) ** 2, 0);
4575
4600
  return Math.sqrt(sumSqDiffs / values.length);
4576
4601
  }
4577
- async function evaluateAll(tasks, harnessPath, workspacePath, iteration, config, onProgress, runsPerTask = 1) {
4602
+ async function evaluateAll(tasks, harnessPath, workspacePath, iteration, config, onProgress, runsPerTask = 1, parallelTasks = 1) {
4578
4603
  const results = {};
4579
4604
  const projectRoot = path18.resolve(workspacePath, "..");
4580
4605
  const effectiveRuns = Math.max(1, runsPerTask);
4581
- for (const task of tasks) {
4606
+ const concurrency = Math.max(1, parallelTasks);
4607
+ const evaluateTask = async (task) => {
4582
4608
  onProgress?.({ type: "task-start", iteration, taskId: task.id });
4609
+ let finalScore;
4583
4610
  if (effectiveRuns > 1 && config) {
4584
4611
  const runScores = [];
4585
4612
  let passCount = 0;
@@ -4606,7 +4633,7 @@ async function evaluateAll(tasks, harnessPath, workspacePath, iteration, config,
4606
4633
  }
4607
4634
  const mean = runScores.reduce((a, b) => a + b, 0) / runScores.length;
4608
4635
  const stddev = computeStddev(runScores, mean);
4609
- results[task.id] = {
4636
+ finalScore = {
4610
4637
  pass: passCount > effectiveRuns / 2,
4611
4638
  score: mean,
4612
4639
  details: `Mean of ${effectiveRuns} runs`,
@@ -4625,22 +4652,28 @@ async function evaluateAll(tasks, harnessPath, workspacePath, iteration, config,
4625
4652
  task.id
4626
4653
  );
4627
4654
  const taskResult = await runTask(task, harnessPath, traceDir, iteration, projectRoot);
4628
- let score = taskResult.score;
4655
+ finalScore = taskResult.score;
4629
4656
  if (config) {
4630
4657
  const stdout = await fs18.readFile(path18.join(traceDir, "stdout.log"), "utf-8").catch(() => "");
4631
4658
  const stderr = await fs18.readFile(path18.join(traceDir, "stderr.log"), "utf-8").catch(() => "");
4632
- score = await scoreTask(task, traceDir, stdout, stderr, config);
4633
- await writeScore(traceDir, score);
4659
+ finalScore = await scoreTask(task, traceDir, stdout, stderr, config);
4660
+ await writeScore(traceDir, finalScore);
4634
4661
  }
4635
- results[task.id] = score;
4636
4662
  }
4637
- const finalScore = results[task.id];
4638
4663
  onProgress?.({
4639
4664
  type: "task-scored",
4640
4665
  iteration,
4641
4666
  taskId: task.id,
4642
4667
  score: finalScore.score ?? (finalScore.pass ? 100 : 0)
4643
4668
  });
4669
+ return { id: task.id, score: finalScore };
4670
+ };
4671
+ const taskResults = await runWithConcurrency(
4672
+ tasks.map((task) => () => evaluateTask(task)),
4673
+ concurrency
4674
+ );
4675
+ for (const { id, score } of taskResults) {
4676
+ results[id] = score;
4644
4677
  }
4645
4678
  const scores = Object.values(results);
4646
4679
  const total = scores.reduce(
@@ -5099,7 +5132,8 @@ async function evolve(workspacePath, tasks, kairnConfig, evolveConfig, onProgres
5099
5132
  iter,
5100
5133
  kairnConfig,
5101
5134
  onProgress,
5102
- evolveConfig.runsPerTask
5135
+ evolveConfig.runsPerTask,
5136
+ evolveConfig.parallelTasks
5103
5137
  );
5104
5138
  onProgress?.({ type: "iteration-scored", iteration: iter, score: aggregate });
5105
5139
  if (iter === 0) baselineScore = aggregate;
@@ -5689,7 +5723,7 @@ evolveCommand.command("baseline").description("Snapshot current .claude/ directo
5689
5723
  process.exit(1);
5690
5724
  }
5691
5725
  });
5692
- evolveCommand.command("run").description("Run tasks against the current harness").option("--task <id>", "Run a specific task by ID").option("--iterations <n>", "Number of evolution iterations", "5").option("--runs <n>", "Run each task N times for variance measurement", "1").action(async (options) => {
5726
+ evolveCommand.command("run").description("Run tasks against the current harness").option("--task <id>", "Run a specific task by ID").option("--iterations <n>", "Number of evolution iterations", "5").option("--runs <n>", "Run each task N times for variance measurement", "1").option("--parallel <n>", "Run up to N tasks concurrently", "1").action(async (options) => {
5693
5727
  try {
5694
5728
  const projectRoot = process.cwd();
5695
5729
  const workspace = path24.join(projectRoot, ".kairn-evolve");
@@ -5764,6 +5798,12 @@ evolveCommand.command("run").description("Run tasks against the current harness"
5764
5798
  process.exit(1);
5765
5799
  }
5766
5800
  evolveConfig.runsPerTask = runs;
5801
+ const parallel = parseInt(options.parallel ?? "1", 10);
5802
+ if (isNaN(parallel) || parallel < 1) {
5803
+ console.log(ui.error("--parallel must be a positive integer"));
5804
+ process.exit(1);
5805
+ }
5806
+ evolveConfig.parallelTasks = parallel;
5767
5807
  try {
5768
5808
  await fs24.access(path24.join(workspace, "iterations", "0", "harness"));
5769
5809
  } catch {