@ljoukov/llm 3.0.8 → 3.0.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -101,6 +101,7 @@ __export(index_exports, {
101
101
  resolveFilesystemToolProfile: () => resolveFilesystemToolProfile,
102
102
  resolveFireworksModelId: () => resolveFireworksModelId,
103
103
  runAgentLoop: () => runAgentLoop,
104
+ runCandidateEvolution: () => runCandidateEvolution,
104
105
  runToolLoop: () => runToolLoop,
105
106
  sanitisePartForLogging: () => sanitisePartForLogging,
106
107
  streamAgentLoop: () => streamAgentLoop,
@@ -2212,7 +2213,10 @@ var GEMINI_TEXT_MODEL_IDS = [
2212
2213
  "gemini-flash-latest",
2213
2214
  "gemini-flash-lite-latest"
2214
2215
  ];
2215
- var GEMINI_IMAGE_MODEL_IDS = ["gemini-3-pro-image-preview"];
2216
+ var GEMINI_IMAGE_MODEL_IDS = [
2217
+ "gemini-3-pro-image-preview",
2218
+ "gemini-3.1-flash-image-preview"
2219
+ ];
2216
2220
  var GEMINI_MODEL_IDS = [...GEMINI_TEXT_MODEL_IDS, ...GEMINI_IMAGE_MODEL_IDS];
2217
2221
  function isGeminiModelId(value) {
2218
2222
  return GEMINI_MODEL_IDS.includes(value);
@@ -2685,12 +2689,7 @@ async function runOpenAiCall(fn, modelId, runOptions) {
2685
2689
  }
2686
2690
 
2687
2691
  // src/openai/models.ts
2688
- var OPENAI_MODEL_IDS = [
2689
- "gpt-5.3-codex",
2690
- "gpt-5.3-codex-spark",
2691
- "gpt-5.2",
2692
- "gpt-5.1-codex-mini"
2693
- ];
2692
+ var OPENAI_MODEL_IDS = ["gpt-5.3-codex", "gpt-5.2", "gpt-5.1-codex-mini"];
2694
2693
  function isOpenAiModelId(value) {
2695
2694
  return OPENAI_MODEL_IDS.includes(value);
2696
2695
  }
@@ -4362,6 +4361,9 @@ function extractFireworksToolCalls(message) {
4362
4361
  return calls;
4363
4362
  }
4364
4363
  function resolveGeminiThinkingConfig(modelId) {
4364
+ if (isGeminiImageModelId(modelId)) {
4365
+ return void 0;
4366
+ }
4365
4367
  switch (modelId) {
4366
4368
  case "gemini-3-pro-preview":
4367
4369
  case "gemini-3.1-pro-preview":
@@ -4617,9 +4619,10 @@ async function runTextCall(params) {
4617
4619
  }, modelForProvider);
4618
4620
  } else {
4619
4621
  const geminiContents = contents.map(convertLlmContentToGeminiContent);
4622
+ const thinkingConfig = resolveGeminiThinkingConfig(modelForProvider);
4620
4623
  const config = {
4621
4624
  maxOutputTokens: 32e3,
4622
- thinkingConfig: resolveGeminiThinkingConfig(modelForProvider),
4625
+ ...thinkingConfig ? { thinkingConfig } : {},
4623
4626
  ...request.responseMimeType ? { responseMimeType: request.responseMimeType } : {},
4624
4627
  ...request.responseJsonSchema ? { responseJsonSchema: request.responseJsonSchema } : {},
4625
4628
  ...request.responseModalities ? { responseModalities: Array.from(request.responseModalities) } : {},
@@ -5847,6 +5850,7 @@ async function runToolLoop(request) {
5847
5850
  firstModelEventAtMs = Date.now();
5848
5851
  }
5849
5852
  };
5853
+ const thinkingConfig = resolveGeminiThinkingConfig(request.model);
5850
5854
  const config = {
5851
5855
  maxOutputTokens: 32e3,
5852
5856
  tools: geminiTools,
@@ -5855,7 +5859,7 @@ async function runToolLoop(request) {
5855
5859
  mode: import_genai2.FunctionCallingConfigMode.VALIDATED
5856
5860
  }
5857
5861
  },
5858
- thinkingConfig: resolveGeminiThinkingConfig(request.model)
5862
+ ...thinkingConfig ? { thinkingConfig } : {}
5859
5863
  };
5860
5864
  const onEvent = request.onEvent;
5861
5865
  const response = await runGeminiCall(
@@ -9447,6 +9451,659 @@ function createAgentTelemetryEmitter(params) {
9447
9451
  });
9448
9452
  };
9449
9453
  }
9454
+
9455
+ // src/agent/candidateEvolution.ts
9456
+ var import_node_crypto4 = require("crypto");
9457
+ var DEFAULT_BATCH_SIZE = 1;
9458
+ var DEFAULT_GENERATION_CONCURRENCY = 8;
9459
+ var DEFAULT_ASSESSMENT_CONCURRENCY = 8;
9460
+ var DEFAULT_SHARPNESS = 10;
9461
+ var DEFAULT_NOVELTY_WEIGHT = 1;
9462
+ var DEFAULT_MIDPOINT = { mode: "percentile", percentile: 75 };
9463
+ var DEFAULT_FEEDBACK_SCOPE = { mode: "ancestors" };
9464
+ var DEFAULT_SCORE_PERCENTILES = [0, 25, 50, 75, 90, 95, 100];
9465
+ function createEmptyStats() {
9466
+ return {
9467
+ generationCalls: 0,
9468
+ issuesSupplied: 0,
9469
+ proposalsGenerated: 0,
9470
+ proposalsAfterPostCheck: 0,
9471
+ assessmentCalls: 0,
9472
+ postCheckCalls: 0,
9473
+ feedbackEntriesSupplied: 0
9474
+ };
9475
+ }
9476
+ function addStats(left, right) {
9477
+ return {
9478
+ generationCalls: left.generationCalls + right.generationCalls,
9479
+ issuesSupplied: left.issuesSupplied + right.issuesSupplied,
9480
+ proposalsGenerated: left.proposalsGenerated + right.proposalsGenerated,
9481
+ proposalsAfterPostCheck: left.proposalsAfterPostCheck + right.proposalsAfterPostCheck,
9482
+ assessmentCalls: left.assessmentCalls + right.assessmentCalls,
9483
+ postCheckCalls: left.postCheckCalls + right.postCheckCalls,
9484
+ feedbackEntriesSupplied: left.feedbackEntriesSupplied + right.feedbackEntriesSupplied
9485
+ };
9486
+ }
9487
+ function randomId(prefix) {
9488
+ return `${prefix}_${(0, import_node_crypto4.randomBytes)(8).toString("hex")}`;
9489
+ }
9490
+ function toFiniteNumber(value, fallback) {
9491
+ if (!Number.isFinite(value)) {
9492
+ return fallback;
9493
+ }
9494
+ return value;
9495
+ }
9496
+ function normalizeRandom(random) {
9497
+ if (!random) {
9498
+ return () => Math.random();
9499
+ }
9500
+ return () => {
9501
+ const value = toFiniteNumber(random(), 0);
9502
+ if (value <= 0) {
9503
+ return 0;
9504
+ }
9505
+ if (value >= 1) {
9506
+ return 0.999999999999;
9507
+ }
9508
+ return value;
9509
+ };
9510
+ }
9511
+ function sigmoidScore(score, midpoint, sharpness) {
9512
+ return 1 / (1 + Math.exp(-sharpness * (score - midpoint)));
9513
+ }
9514
+ function computePercentile(sortedValues, percentile) {
9515
+ if (sortedValues.length === 0) {
9516
+ return 0;
9517
+ }
9518
+ if (sortedValues.length === 1) {
9519
+ return sortedValues[0] ?? 0;
9520
+ }
9521
+ const safePercentile = Math.max(0, Math.min(100, percentile));
9522
+ const position = (sortedValues.length - 1) * (safePercentile / 100);
9523
+ const lower = Math.floor(position);
9524
+ const upper = Math.ceil(position);
9525
+ const lowerValue = sortedValues[lower] ?? 0;
9526
+ const upperValue = sortedValues[upper] ?? lowerValue;
9527
+ if (lower === upper) {
9528
+ return lowerValue;
9529
+ }
9530
+ const fraction = position - lower;
9531
+ return lowerValue * (1 - fraction) + upperValue * fraction;
9532
+ }
9533
+ function computeScorePercentiles(records, percentiles) {
9534
+ const scores = records.map((record) => record.assessment.score).filter((score) => Number.isFinite(score)).sort((a, b) => a - b);
9535
+ const output = {};
9536
+ for (const percentile of percentiles) {
9537
+ output[percentile] = computePercentile(scores, percentile);
9538
+ }
9539
+ return output;
9540
+ }
9541
+ function pickByWeights(values, weights, random) {
9542
+ if (values.length === 0) {
9543
+ throw new Error("Cannot pick from an empty set.");
9544
+ }
9545
+ if (values.length !== weights.length) {
9546
+ throw new Error("values and weights must have the same length.");
9547
+ }
9548
+ let totalWeight = 0;
9549
+ for (const weight of weights) {
9550
+ if (Number.isFinite(weight) && weight > 0) {
9551
+ totalWeight += weight;
9552
+ }
9553
+ }
9554
+ if (totalWeight <= 0) {
9555
+ const index = Math.min(values.length - 1, Math.floor(random() * values.length));
9556
+ const fallbackValue = values[index];
9557
+ if (fallbackValue === void 0) {
9558
+ throw new Error("Unexpected empty value during uniform fallback pick.");
9559
+ }
9560
+ return fallbackValue;
9561
+ }
9562
+ let threshold = random() * totalWeight;
9563
+ for (let index = 0; index < values.length; index += 1) {
9564
+ const weight = Number.isFinite(weights[index] ?? 0) && (weights[index] ?? 0) > 0 ? weights[index] ?? 0 : 0;
9565
+ threshold -= weight;
9566
+ if (threshold <= 0) {
9567
+ const value = values[index];
9568
+ if (value === void 0) {
9569
+ break;
9570
+ }
9571
+ return value;
9572
+ }
9573
+ }
9574
+ const last = values[values.length - 1];
9575
+ if (last === void 0) {
9576
+ throw new Error("Unexpected missing final value during weighted pick.");
9577
+ }
9578
+ return last;
9579
+ }
9580
+ function sampleWithoutReplacement(values, k, random) {
9581
+ if (k <= 0 || values.length === 0) {
9582
+ return [];
9583
+ }
9584
+ if (k >= values.length) {
9585
+ return [...values];
9586
+ }
9587
+ const pool = [...values];
9588
+ const output = [];
9589
+ for (let index = 0; index < k; index += 1) {
9590
+ const pickIndex = Math.min(pool.length - 1, Math.floor(random() * pool.length));
9591
+ const [picked] = pool.splice(pickIndex, 1);
9592
+ if (picked === void 0) {
9593
+ break;
9594
+ }
9595
+ output.push(picked);
9596
+ }
9597
+ return output;
9598
+ }
9599
+ function isEligibleRecord(record) {
9600
+ return record.assessment.isViable !== false && record.assessment.trainableIssues.length > 0;
9601
+ }
9602
+ function resolveIssueType(issue) {
9603
+ const raw = issue.issueType?.trim();
9604
+ if (!raw) {
9605
+ return "default";
9606
+ }
9607
+ return raw;
9608
+ }
9609
+ function resolveIssueId(issue, parentId, index) {
9610
+ const raw = issue.issueId?.trim();
9611
+ if (raw && raw.length > 0) {
9612
+ return raw;
9613
+ }
9614
+ return `${parentId}:issue:${index}`;
9615
+ }
9616
+ function normalizeIssuesForRecord(parentId, issues) {
9617
+ return issues.map((issue, index) => ({
9618
+ id: resolveIssueId(issue, parentId, index),
9619
+ issueType: resolveIssueType(issue),
9620
+ issue
9621
+ }));
9622
+ }
9623
+ function sampleIssuesByType(issues, batchSize, typeWeights, random) {
9624
+ if (issues.length === 0 || batchSize <= 0) {
9625
+ return [];
9626
+ }
9627
+ const frequency = /* @__PURE__ */ new Map();
9628
+ for (const issue of issues) {
9629
+ frequency.set(issue.issueType, (frequency.get(issue.issueType) ?? 0) + 1);
9630
+ }
9631
+ const issueTypes = [...frequency.keys()];
9632
+ const weightedFrequency = issueTypes.map((type) => {
9633
+ const base = frequency.get(type) ?? 0;
9634
+ const multiplierRaw = typeWeights?.[type] ?? 1;
9635
+ const multiplier = Number.isFinite(multiplierRaw) && multiplierRaw > 0 ? multiplierRaw : 1;
9636
+ return base * multiplier;
9637
+ });
9638
+ const selectedType = pickByWeights(issueTypes, weightedFrequency, random);
9639
+ const sameTypeIssues = issues.filter((issue) => issue.issueType === selectedType);
9640
+ const effectiveBatchSize = Math.min(batchSize, sameTypeIssues.length);
9641
+ return sampleWithoutReplacement(sameTypeIssues, effectiveBatchSize, random);
9642
+ }
9643
+ function resolveMidpoint(midpoint, archive) {
9644
+ if (midpoint.mode === "fixed") {
9645
+ return midpoint.value;
9646
+ }
9647
+ const scores = archive.map((record) => record.assessment.score).filter((score) => Number.isFinite(score)).sort((a, b) => a - b);
9648
+ return computePercentile(scores, midpoint.percentile);
9649
+ }
9650
+ function selectParents(input) {
9651
+ const {
9652
+ eligible,
9653
+ archive,
9654
+ parentsPerIteration,
9655
+ sharpness,
9656
+ midpoint,
9657
+ noveltyWeight,
9658
+ replace,
9659
+ childCountByParentId,
9660
+ random
9661
+ } = input;
9662
+ if (eligible.length === 0 || parentsPerIteration <= 0) {
9663
+ return [];
9664
+ }
9665
+ const midpointScore = resolveMidpoint(midpoint, archive);
9666
+ const weightedParents = eligible.map((record) => {
9667
+ const performance = sigmoidScore(record.assessment.score, midpointScore, sharpness);
9668
+ const childCount = childCountByParentId.get(record.id) ?? 0;
9669
+ const novelty = 1 / (1 + noveltyWeight * childCount);
9670
+ return {
9671
+ record,
9672
+ weight: performance * novelty
9673
+ };
9674
+ });
9675
+ if (replace) {
9676
+ const output2 = [];
9677
+ for (let index = 0; index < parentsPerIteration; index += 1) {
9678
+ output2.push(
9679
+ pickByWeights(
9680
+ weightedParents.map((entry) => entry.record),
9681
+ weightedParents.map((entry) => entry.weight),
9682
+ random
9683
+ )
9684
+ );
9685
+ }
9686
+ return output2;
9687
+ }
9688
+ if (parentsPerIteration >= weightedParents.length) {
9689
+ return weightedParents.map((entry) => entry.record);
9690
+ }
9691
+ const pool = [...weightedParents];
9692
+ const output = [];
9693
+ for (let index = 0; index < parentsPerIteration; index += 1) {
9694
+ const chosen = pickByWeights(
9695
+ pool.map((entry) => entry.record),
9696
+ pool.map((entry) => entry.weight),
9697
+ random
9698
+ );
9699
+ output.push(chosen);
9700
+ const removeIndex = pool.findIndex((entry) => entry.record.id === chosen.id);
9701
+ if (removeIndex >= 0) {
9702
+ pool.splice(removeIndex, 1);
9703
+ }
9704
+ }
9705
+ return output;
9706
+ }
9707
+ function defaultObservedOutcome(input) {
9708
+ const { assessment, parentAssessment } = input;
9709
+ if (assessment.isViable === false) {
9710
+ return "Inconclusive - resulting candidate was marked non-viable.";
9711
+ }
9712
+ const roundedScore = Number.isFinite(assessment.score) ? assessment.score.toFixed(3) : "n/a";
9713
+ if (!parentAssessment) {
9714
+ return `Candidate score: ${roundedScore}.`;
9715
+ }
9716
+ const parentScore = Number.isFinite(parentAssessment.score) ? parentAssessment.score.toFixed(3) : "n/a";
9717
+ if (assessment.score > parentAssessment.score) {
9718
+ return `Candidate score: ${roundedScore}. Improved over parent score ${parentScore}.`;
9719
+ }
9720
+ if (assessment.score < parentAssessment.score) {
9721
+ return `Candidate score: ${roundedScore}. Worse than parent score ${parentScore}.`;
9722
+ }
9723
+ return `Candidate score: ${roundedScore}. Same as parent score ${parentScore}.`;
9724
+ }
9725
+ function resolveFeedbackEntries(input) {
9726
+ const { scope, parent, candidateById, feedbackByCandidateId, childrenByParentId } = input;
9727
+ if (scope.mode === "none") {
9728
+ return [];
9729
+ }
9730
+ if (scope.mode === "ancestors") {
9731
+ const output2 = [];
9732
+ let currentId = parent.id;
9733
+ let depth = 0;
9734
+ while (currentId) {
9735
+ if (scope.maxDepth !== void 0 && depth > scope.maxDepth) {
9736
+ break;
9737
+ }
9738
+ const entry = feedbackByCandidateId.get(currentId);
9739
+ if (entry) {
9740
+ output2.push(entry);
9741
+ }
9742
+ const current = candidateById.get(currentId);
9743
+ currentId = current?.parentId;
9744
+ depth += 1;
9745
+ }
9746
+ return output2;
9747
+ }
9748
+ const maxDistance = scope.maxDistance;
9749
+ if (maxDistance < 0) {
9750
+ return [];
9751
+ }
9752
+ const output = [];
9753
+ const queue = [{ id: parent.id, distance: 0 }];
9754
+ const visited = /* @__PURE__ */ new Set();
9755
+ while (queue.length > 0) {
9756
+ const current = queue.shift();
9757
+ if (!current) {
9758
+ continue;
9759
+ }
9760
+ if (visited.has(current.id)) {
9761
+ continue;
9762
+ }
9763
+ visited.add(current.id);
9764
+ const entry = feedbackByCandidateId.get(current.id);
9765
+ if (entry) {
9766
+ output.push(entry);
9767
+ }
9768
+ if (current.distance >= maxDistance) {
9769
+ continue;
9770
+ }
9771
+ const parentRecord = candidateById.get(current.id);
9772
+ const ancestorId = parentRecord?.parentId;
9773
+ if (ancestorId && !visited.has(ancestorId)) {
9774
+ queue.push({ id: ancestorId, distance: current.distance + 1 });
9775
+ }
9776
+ const children = childrenByParentId.get(current.id) ?? [];
9777
+ for (const childId of children) {
9778
+ if (!visited.has(childId)) {
9779
+ queue.push({ id: childId, distance: current.distance + 1 });
9780
+ }
9781
+ }
9782
+ }
9783
+ return output;
9784
+ }
9785
+ async function mapWithConcurrency(items, maxConcurrency, worker) {
9786
+ if (items.length === 0) {
9787
+ return [];
9788
+ }
9789
+ const concurrency = Math.max(1, Math.floor(maxConcurrency));
9790
+ const output = new Array(items.length);
9791
+ let nextIndex = 0;
9792
+ const runners = Array.from({ length: Math.min(concurrency, items.length) }, async () => {
9793
+ while (true) {
9794
+ const index = nextIndex;
9795
+ nextIndex += 1;
9796
+ if (index >= items.length) {
9797
+ break;
9798
+ }
9799
+ const item = items[index];
9800
+ if (item === void 0) {
9801
+ continue;
9802
+ }
9803
+ output[index] = await worker(item, index);
9804
+ }
9805
+ });
9806
+ await Promise.all(runners);
9807
+ return output;
9808
+ }
9809
+ async function runCandidateEvolution(options) {
9810
+ const iterations = Math.max(0, Math.floor(options.iterations));
9811
+ const parentsPerIteration = Math.max(0, Math.floor(options.parentsPerIteration));
9812
+ const batchSize = Math.max(1, Math.floor(options.batchSize ?? DEFAULT_BATCH_SIZE));
9813
+ const generationConcurrency = Math.max(
9814
+ 1,
9815
+ Math.floor(options.generationConcurrency ?? DEFAULT_GENERATION_CONCURRENCY)
9816
+ );
9817
+ const assessmentConcurrency = Math.max(
9818
+ 1,
9819
+ Math.floor(options.assessmentConcurrency ?? DEFAULT_ASSESSMENT_CONCURRENCY)
9820
+ );
9821
+ if (options.generators.length === 0) {
9822
+ throw new Error("runCandidateEvolution requires at least one generator subagent.");
9823
+ }
9824
+ if (parentsPerIteration <= 0) {
9825
+ throw new Error("parentsPerIteration must be positive.");
9826
+ }
9827
+ const generationNames = /* @__PURE__ */ new Set();
9828
+ for (const generator of options.generators) {
9829
+ if (!generator.name.trim()) {
9830
+ throw new Error("Generator names must be non-empty.");
9831
+ }
9832
+ if (generationNames.has(generator.name)) {
9833
+ throw new Error(`Duplicate generator name "${generator.name}".`);
9834
+ }
9835
+ generationNames.add(generator.name);
9836
+ }
9837
+ const random = normalizeRandom(options.random);
9838
+ const parentSelection = options.parentSelection;
9839
+ const selectionSharpness = parentSelection?.sharpness !== void 0 ? Math.max(1e-4, parentSelection.sharpness) : DEFAULT_SHARPNESS;
9840
+ const selectionMidpoint = parentSelection?.midpoint ?? DEFAULT_MIDPOINT;
9841
+ const noveltyWeight = parentSelection?.noveltyWeight !== void 0 ? Math.max(0, parentSelection.noveltyWeight) : DEFAULT_NOVELTY_WEIGHT;
9842
+ const selectionReplace = parentSelection?.replace ?? true;
9843
+ const feedbackScope = options.feedbackScope ?? DEFAULT_FEEDBACK_SCOPE;
9844
+ const describeObservedOutcome = options.describeObservedOutcome ?? defaultObservedOutcome;
9845
+ const scorePercentiles = options.scorePercentiles && options.scorePercentiles.length > 0 ? options.scorePercentiles : DEFAULT_SCORE_PERCENTILES;
9846
+ const archive = [];
9847
+ const feedbackEntries = [];
9848
+ const postCheckRejections = [];
9849
+ const snapshots = [];
9850
+ const candidateById = /* @__PURE__ */ new Map();
9851
+ const feedbackByCandidateId = /* @__PURE__ */ new Map();
9852
+ const childCountByParentId = /* @__PURE__ */ new Map();
9853
+ const childrenByParentId = /* @__PURE__ */ new Map();
9854
+ let totalStats = createEmptyStats();
9855
+ let stoppedEarly = false;
9856
+ const seedAssessment = await options.assessCandidate({
9857
+ candidate: options.seedCandidate,
9858
+ iteration: 0
9859
+ });
9860
+ const seedRecord = {
9861
+ id: randomId("candidate"),
9862
+ candidate: options.seedCandidate,
9863
+ assessment: seedAssessment,
9864
+ createdAtIteration: 0
9865
+ };
9866
+ archive.push(seedRecord);
9867
+ candidateById.set(seedRecord.id, seedRecord);
9868
+ const initialStats = createEmptyStats();
9869
+ initialStats.assessmentCalls += 1;
9870
+ totalStats = addStats(totalStats, initialStats);
9871
+ const initialSnapshot = {
9872
+ iteration: 0,
9873
+ archiveSize: archive.length,
9874
+ bestCandidateId: seedRecord.id,
9875
+ bestScore: seedRecord.assessment.score,
9876
+ scorePercentiles: computeScorePercentiles(archive, scorePercentiles),
9877
+ stats: initialStats,
9878
+ bestCandidate: seedRecord
9879
+ };
9880
+ snapshots.push(initialSnapshot);
9881
+ await options.onSnapshot?.(initialSnapshot);
9882
+ for (let iteration = 1; iteration <= iterations; iteration += 1) {
9883
+ const iterationStats = createEmptyStats();
9884
+ const eligible = archive.filter((record) => isEligibleRecord(record));
9885
+ if (eligible.length === 0) {
9886
+ stoppedEarly = true;
9887
+ const bestCandidate3 = archive.reduce(
9888
+ (best, current) => current.assessment.score > best.assessment.score ? current : best
9889
+ );
9890
+ const snapshot2 = {
9891
+ iteration,
9892
+ archiveSize: archive.length,
9893
+ bestCandidateId: bestCandidate3.id,
9894
+ bestScore: bestCandidate3.assessment.score,
9895
+ scorePercentiles: computeScorePercentiles(archive, scorePercentiles),
9896
+ stats: iterationStats,
9897
+ bestCandidate: bestCandidate3
9898
+ };
9899
+ snapshots.push(snapshot2);
9900
+ await options.onSnapshot?.(snapshot2);
9901
+ break;
9902
+ }
9903
+ const selectedParents = selectParents({
9904
+ eligible,
9905
+ archive,
9906
+ parentsPerIteration,
9907
+ sharpness: selectionSharpness,
9908
+ midpoint: selectionMidpoint,
9909
+ noveltyWeight,
9910
+ replace: selectionReplace,
9911
+ childCountByParentId,
9912
+ random
9913
+ });
9914
+ const generationTasks = [];
9915
+ for (const parent of selectedParents) {
9916
+ const issueEnvelopes = normalizeIssuesForRecord(parent.id, parent.assessment.trainableIssues);
9917
+ const sampledIssueEnvelopes = sampleIssuesByType(
9918
+ issueEnvelopes,
9919
+ batchSize,
9920
+ parent.assessment.issueTypeWeights,
9921
+ random
9922
+ );
9923
+ if (sampledIssueEnvelopes.length === 0) {
9924
+ continue;
9925
+ }
9926
+ const visibleFeedbackEntries = resolveFeedbackEntries({
9927
+ scope: feedbackScope,
9928
+ parent,
9929
+ candidateById,
9930
+ feedbackByCandidateId,
9931
+ childrenByParentId
9932
+ });
9933
+ for (const generator of options.generators) {
9934
+ const issuesForGenerator = generator.supportsIssueBatch ? sampledIssueEnvelopes : sampledIssueEnvelopes.slice(0, 1);
9935
+ if (issuesForGenerator.length === 0) {
9936
+ continue;
9937
+ }
9938
+ generationTasks.push({
9939
+ parent,
9940
+ generator,
9941
+ sampledIssueEnvelopes: issuesForGenerator,
9942
+ feedbackEntries: visibleFeedbackEntries
9943
+ });
9944
+ iterationStats.generationCalls += 1;
9945
+ iterationStats.issuesSupplied += issuesForGenerator.length;
9946
+ iterationStats.feedbackEntriesSupplied += visibleFeedbackEntries.length;
9947
+ }
9948
+ }
9949
+ if (generationTasks.length === 0) {
9950
+ stoppedEarly = true;
9951
+ const bestCandidate3 = archive.reduce(
9952
+ (best, current) => current.assessment.score > best.assessment.score ? current : best
9953
+ );
9954
+ const snapshot2 = {
9955
+ iteration,
9956
+ archiveSize: archive.length,
9957
+ bestCandidateId: bestCandidate3.id,
9958
+ bestScore: bestCandidate3.assessment.score,
9959
+ scorePercentiles: computeScorePercentiles(archive, scorePercentiles),
9960
+ stats: iterationStats,
9961
+ bestCandidate: bestCandidate3
9962
+ };
9963
+ snapshots.push(snapshot2);
9964
+ await options.onSnapshot?.(snapshot2);
9965
+ break;
9966
+ }
9967
+ const generatedOutputs = await mapWithConcurrency(
9968
+ generationTasks,
9969
+ generationConcurrency,
9970
+ async (task) => {
9971
+ const proposals = await task.generator.generate({
9972
+ parent: task.parent,
9973
+ sampledIssues: task.sampledIssueEnvelopes.map((envelope) => envelope.issue),
9974
+ feedbackEntries: task.feedbackEntries,
9975
+ iteration
9976
+ });
9977
+ return { task, proposals };
9978
+ }
9979
+ );
9980
+ const pendingProposals = [];
9981
+ for (const output of generatedOutputs) {
9982
+ iterationStats.proposalsGenerated += output.proposals.length;
9983
+ for (const proposal of output.proposals) {
9984
+ pendingProposals.push({
9985
+ proposal,
9986
+ parent: output.task.parent,
9987
+ generatorName: output.task.generator.name,
9988
+ sampledIssues: output.task.sampledIssueEnvelopes.map((envelope) => envelope.issue),
9989
+ sampledIssueIds: output.task.sampledIssueEnvelopes.map((envelope) => envelope.id),
9990
+ sampledFeedbackEntryIds: output.task.feedbackEntries.map((entry) => entry.id)
9991
+ });
9992
+ }
9993
+ }
9994
+ const evaluatedProposals = await mapWithConcurrency(
9995
+ pendingProposals,
9996
+ assessmentConcurrency,
9997
+ async (pending) => {
9998
+ if (options.verifyGeneratedCandidate) {
9999
+ iterationStats.postCheckCalls += 1;
10000
+ const passes = await options.verifyGeneratedCandidate({
10001
+ proposal: pending.proposal,
10002
+ parent: pending.parent,
10003
+ generatorName: pending.generatorName,
10004
+ sampledIssues: pending.sampledIssues,
10005
+ iteration
10006
+ });
10007
+ if (!passes) {
10008
+ postCheckRejections.push({
10009
+ id: randomId("rejected"),
10010
+ candidate: pending.proposal.candidate,
10011
+ parentId: pending.parent.id,
10012
+ generatorName: pending.generatorName,
10013
+ iteration,
10014
+ sampledIssueIds: pending.sampledIssueIds,
10015
+ changeSummary: pending.proposal.changeSummary
10016
+ });
10017
+ return null;
10018
+ }
10019
+ }
10020
+ iterationStats.proposalsAfterPostCheck += 1;
10021
+ iterationStats.assessmentCalls += 1;
10022
+ const assessment = await options.assessCandidate({
10023
+ candidate: pending.proposal.candidate,
10024
+ iteration,
10025
+ parent: pending.parent,
10026
+ generatorName: pending.generatorName,
10027
+ sampledIssues: pending.sampledIssues
10028
+ });
10029
+ return {
10030
+ pending,
10031
+ assessment
10032
+ };
10033
+ }
10034
+ );
10035
+ const acceptedRecords = [];
10036
+ for (const evaluated of evaluatedProposals) {
10037
+ if (!evaluated) {
10038
+ continue;
10039
+ }
10040
+ const { pending, assessment } = evaluated;
10041
+ acceptedRecords.push({
10042
+ id: randomId("candidate"),
10043
+ candidate: pending.proposal.candidate,
10044
+ assessment,
10045
+ createdAtIteration: iteration,
10046
+ parentId: pending.parent.id,
10047
+ generatorName: pending.generatorName,
10048
+ sampledIssueIds: pending.sampledIssueIds,
10049
+ sampledFeedbackEntryIds: pending.sampledFeedbackEntryIds,
10050
+ changeSummary: pending.proposal.changeSummary
10051
+ });
10052
+ }
10053
+ for (const record of acceptedRecords) {
10054
+ archive.push(record);
10055
+ candidateById.set(record.id, record);
10056
+ if (record.parentId) {
10057
+ const nextCount = (childCountByParentId.get(record.parentId) ?? 0) + 1;
10058
+ childCountByParentId.set(record.parentId, nextCount);
10059
+ const existingChildren = childrenByParentId.get(record.parentId) ?? [];
10060
+ existingChildren.push(record.id);
10061
+ childrenByParentId.set(record.parentId, existingChildren);
10062
+ }
10063
+ if (record.changeSummary && record.changeSummary.trim().length > 0) {
10064
+ const parentAssessment = record.parentId ? candidateById.get(record.parentId)?.assessment ?? null : null;
10065
+ const feedbackEntry = {
10066
+ id: randomId("feedback"),
10067
+ candidateId: record.id,
10068
+ attemptedChange: record.changeSummary,
10069
+ observedOutcome: describeObservedOutcome({
10070
+ assessment: record.assessment,
10071
+ parentAssessment
10072
+ })
10073
+ };
10074
+ feedbackEntries.push(feedbackEntry);
10075
+ feedbackByCandidateId.set(record.id, feedbackEntry);
10076
+ }
10077
+ }
10078
+ totalStats = addStats(totalStats, iterationStats);
10079
+ const bestCandidate2 = archive.reduce(
10080
+ (best, current) => current.assessment.score > best.assessment.score ? current : best
10081
+ );
10082
+ const snapshot = {
10083
+ iteration,
10084
+ archiveSize: archive.length,
10085
+ bestCandidateId: bestCandidate2.id,
10086
+ bestScore: bestCandidate2.assessment.score,
10087
+ scorePercentiles: computeScorePercentiles(archive, scorePercentiles),
10088
+ stats: iterationStats,
10089
+ bestCandidate: bestCandidate2
10090
+ };
10091
+ snapshots.push(snapshot);
10092
+ await options.onSnapshot?.(snapshot);
10093
+ }
10094
+ const bestCandidate = archive.reduce(
10095
+ (best, current) => current.assessment.score > best.assessment.score ? current : best
10096
+ );
10097
+ return {
10098
+ archive,
10099
+ feedbackEntries,
10100
+ postCheckRejections,
10101
+ snapshots,
10102
+ bestCandidate,
10103
+ totalStats,
10104
+ stoppedEarly
10105
+ };
10106
+ }
9450
10107
  // Annotate the CommonJS export names for ESM import in node:
9451
10108
  0 && (module.exports = {
9452
10109
  CHATGPT_MODEL_IDS,
@@ -9520,6 +10177,7 @@ function createAgentTelemetryEmitter(params) {
9520
10177
  resolveFilesystemToolProfile,
9521
10178
  resolveFireworksModelId,
9522
10179
  runAgentLoop,
10180
+ runCandidateEvolution,
9523
10181
  runToolLoop,
9524
10182
  sanitisePartForLogging,
9525
10183
  streamAgentLoop,