@ljoukov/llm 3.0.8 → 3.0.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -121,6 +121,15 @@ var GEMINI_2_5_PRO_PRICING = {
121
121
  outputRateLow: 10 / 1e6,
122
122
  outputRateHigh: 15 / 1e6
123
123
  };
124
+ var GEMINI_2_5_FLASH_PRICING = {
125
+ threshold: 2e5,
126
+ inputRateLow: 0.3 / 1e6,
127
+ inputRateHigh: 0.3 / 1e6,
128
+ cachedRateLow: 0.03 / 1e6,
129
+ cachedRateHigh: 0.03 / 1e6,
130
+ outputRateLow: 2.5 / 1e6,
131
+ outputRateHigh: 2.5 / 1e6
132
+ };
124
133
  var GEMINI_IMAGE_PREVIEW_PRICING = {
125
134
  inputRate: 2 / 1e6,
126
135
  cachedRate: 0.2 / 1e6,
@@ -136,6 +145,9 @@ function getGeminiProPricing(modelId) {
136
145
  if (modelId.includes("gemini-2.5-pro")) {
137
146
  return GEMINI_2_5_PRO_PRICING;
138
147
  }
148
+ if (modelId.includes("gemini-2.5-flash") || modelId.includes("gemini-flash-latest")) {
149
+ return GEMINI_2_5_FLASH_PRICING;
150
+ }
139
151
  if (modelId.includes("gemini-3-pro") || modelId.includes("gemini-3.1-pro")) {
140
152
  return GEMINI_3_PRO_PREVIEW_PRICING;
141
153
  }
@@ -2100,7 +2112,10 @@ var GEMINI_TEXT_MODEL_IDS = [
2100
2112
  "gemini-flash-latest",
2101
2113
  "gemini-flash-lite-latest"
2102
2114
  ];
2103
- var GEMINI_IMAGE_MODEL_IDS = ["gemini-3-pro-image-preview"];
2115
+ var GEMINI_IMAGE_MODEL_IDS = [
2116
+ "gemini-3-pro-image-preview",
2117
+ "gemini-3.1-flash-image-preview"
2118
+ ];
2104
2119
  var GEMINI_MODEL_IDS = [...GEMINI_TEXT_MODEL_IDS, ...GEMINI_IMAGE_MODEL_IDS];
2105
2120
  function isGeminiModelId(value) {
2106
2121
  return GEMINI_MODEL_IDS.includes(value);
@@ -2573,12 +2588,7 @@ async function runOpenAiCall(fn, modelId, runOptions) {
2573
2588
  }
2574
2589
 
2575
2590
  // src/openai/models.ts
2576
- var OPENAI_MODEL_IDS = [
2577
- "gpt-5.3-codex",
2578
- "gpt-5.3-codex-spark",
2579
- "gpt-5.2",
2580
- "gpt-5.1-codex-mini"
2581
- ];
2591
+ var OPENAI_MODEL_IDS = ["gpt-5.3-codex", "gpt-5.2", "gpt-5.1-codex-mini"];
2582
2592
  function isOpenAiModelId(value) {
2583
2593
  return OPENAI_MODEL_IDS.includes(value);
2584
2594
  }
@@ -4250,6 +4260,9 @@ function extractFireworksToolCalls(message) {
4250
4260
  return calls;
4251
4261
  }
4252
4262
  function resolveGeminiThinkingConfig(modelId) {
4263
+ if (isGeminiImageModelId(modelId)) {
4264
+ return void 0;
4265
+ }
4253
4266
  switch (modelId) {
4254
4267
  case "gemini-3-pro-preview":
4255
4268
  case "gemini-3.1-pro-preview":
@@ -4505,9 +4518,10 @@ async function runTextCall(params) {
4505
4518
  }, modelForProvider);
4506
4519
  } else {
4507
4520
  const geminiContents = contents.map(convertLlmContentToGeminiContent);
4521
+ const thinkingConfig = resolveGeminiThinkingConfig(modelForProvider);
4508
4522
  const config = {
4509
4523
  maxOutputTokens: 32e3,
4510
- thinkingConfig: resolveGeminiThinkingConfig(modelForProvider),
4524
+ ...thinkingConfig ? { thinkingConfig } : {},
4511
4525
  ...request.responseMimeType ? { responseMimeType: request.responseMimeType } : {},
4512
4526
  ...request.responseJsonSchema ? { responseJsonSchema: request.responseJsonSchema } : {},
4513
4527
  ...request.responseModalities ? { responseModalities: Array.from(request.responseModalities) } : {},
@@ -5735,6 +5749,7 @@ async function runToolLoop(request) {
5735
5749
  firstModelEventAtMs = Date.now();
5736
5750
  }
5737
5751
  };
5752
+ const thinkingConfig = resolveGeminiThinkingConfig(request.model);
5738
5753
  const config = {
5739
5754
  maxOutputTokens: 32e3,
5740
5755
  tools: geminiTools,
@@ -5743,7 +5758,7 @@ async function runToolLoop(request) {
5743
5758
  mode: FunctionCallingConfigMode.VALIDATED
5744
5759
  }
5745
5760
  },
5746
- thinkingConfig: resolveGeminiThinkingConfig(request.model)
5761
+ ...thinkingConfig ? { thinkingConfig } : {}
5747
5762
  };
5748
5763
  const onEvent = request.onEvent;
5749
5764
  const response = await runGeminiCall(
@@ -9335,6 +9350,659 @@ function createAgentTelemetryEmitter(params) {
9335
9350
  });
9336
9351
  };
9337
9352
  }
9353
+
9354
+ // src/agent/candidateEvolution.ts
9355
+ import { randomBytes as randomBytes4 } from "crypto";
9356
+ var DEFAULT_BATCH_SIZE = 1;
9357
+ var DEFAULT_GENERATION_CONCURRENCY = 8;
9358
+ var DEFAULT_ASSESSMENT_CONCURRENCY = 8;
9359
+ var DEFAULT_SHARPNESS = 10;
9360
+ var DEFAULT_NOVELTY_WEIGHT = 1;
9361
+ var DEFAULT_MIDPOINT = { mode: "percentile", percentile: 75 };
9362
+ var DEFAULT_FEEDBACK_SCOPE = { mode: "ancestors" };
9363
+ var DEFAULT_SCORE_PERCENTILES = [0, 25, 50, 75, 90, 95, 100];
9364
+ function createEmptyStats() {
9365
+ return {
9366
+ generationCalls: 0,
9367
+ issuesSupplied: 0,
9368
+ proposalsGenerated: 0,
9369
+ proposalsAfterPostCheck: 0,
9370
+ assessmentCalls: 0,
9371
+ postCheckCalls: 0,
9372
+ feedbackEntriesSupplied: 0
9373
+ };
9374
+ }
9375
+ function addStats(left, right) {
9376
+ return {
9377
+ generationCalls: left.generationCalls + right.generationCalls,
9378
+ issuesSupplied: left.issuesSupplied + right.issuesSupplied,
9379
+ proposalsGenerated: left.proposalsGenerated + right.proposalsGenerated,
9380
+ proposalsAfterPostCheck: left.proposalsAfterPostCheck + right.proposalsAfterPostCheck,
9381
+ assessmentCalls: left.assessmentCalls + right.assessmentCalls,
9382
+ postCheckCalls: left.postCheckCalls + right.postCheckCalls,
9383
+ feedbackEntriesSupplied: left.feedbackEntriesSupplied + right.feedbackEntriesSupplied
9384
+ };
9385
+ }
9386
+ function randomId(prefix) {
9387
+ return `${prefix}_${randomBytes4(8).toString("hex")}`;
9388
+ }
9389
+ function toFiniteNumber(value, fallback) {
9390
+ if (!Number.isFinite(value)) {
9391
+ return fallback;
9392
+ }
9393
+ return value;
9394
+ }
9395
+ function normalizeRandom(random) {
9396
+ if (!random) {
9397
+ return () => Math.random();
9398
+ }
9399
+ return () => {
9400
+ const value = toFiniteNumber(random(), 0);
9401
+ if (value <= 0) {
9402
+ return 0;
9403
+ }
9404
+ if (value >= 1) {
9405
+ return 0.999999999999;
9406
+ }
9407
+ return value;
9408
+ };
9409
+ }
9410
+ function sigmoidScore(score, midpoint, sharpness) {
9411
+ return 1 / (1 + Math.exp(-sharpness * (score - midpoint)));
9412
+ }
9413
+ function computePercentile(sortedValues, percentile) {
9414
+ if (sortedValues.length === 0) {
9415
+ return 0;
9416
+ }
9417
+ if (sortedValues.length === 1) {
9418
+ return sortedValues[0] ?? 0;
9419
+ }
9420
+ const safePercentile = Math.max(0, Math.min(100, percentile));
9421
+ const position = (sortedValues.length - 1) * (safePercentile / 100);
9422
+ const lower = Math.floor(position);
9423
+ const upper = Math.ceil(position);
9424
+ const lowerValue = sortedValues[lower] ?? 0;
9425
+ const upperValue = sortedValues[upper] ?? lowerValue;
9426
+ if (lower === upper) {
9427
+ return lowerValue;
9428
+ }
9429
+ const fraction = position - lower;
9430
+ return lowerValue * (1 - fraction) + upperValue * fraction;
9431
+ }
9432
+ function computeScorePercentiles(records, percentiles) {
9433
+ const scores = records.map((record) => record.assessment.score).filter((score) => Number.isFinite(score)).sort((a, b) => a - b);
9434
+ const output = {};
9435
+ for (const percentile of percentiles) {
9436
+ output[percentile] = computePercentile(scores, percentile);
9437
+ }
9438
+ return output;
9439
+ }
9440
+ function pickByWeights(values, weights, random) {
9441
+ if (values.length === 0) {
9442
+ throw new Error("Cannot pick from an empty set.");
9443
+ }
9444
+ if (values.length !== weights.length) {
9445
+ throw new Error("values and weights must have the same length.");
9446
+ }
9447
+ let totalWeight = 0;
9448
+ for (const weight of weights) {
9449
+ if (Number.isFinite(weight) && weight > 0) {
9450
+ totalWeight += weight;
9451
+ }
9452
+ }
9453
+ if (totalWeight <= 0) {
9454
+ const index = Math.min(values.length - 1, Math.floor(random() * values.length));
9455
+ const fallbackValue = values[index];
9456
+ if (fallbackValue === void 0) {
9457
+ throw new Error("Unexpected empty value during uniform fallback pick.");
9458
+ }
9459
+ return fallbackValue;
9460
+ }
9461
+ let threshold = random() * totalWeight;
9462
+ for (let index = 0; index < values.length; index += 1) {
9463
+ const weight = Number.isFinite(weights[index] ?? 0) && (weights[index] ?? 0) > 0 ? weights[index] ?? 0 : 0;
9464
+ threshold -= weight;
9465
+ if (threshold <= 0) {
9466
+ const value = values[index];
9467
+ if (value === void 0) {
9468
+ break;
9469
+ }
9470
+ return value;
9471
+ }
9472
+ }
9473
+ const last = values[values.length - 1];
9474
+ if (last === void 0) {
9475
+ throw new Error("Unexpected missing final value during weighted pick.");
9476
+ }
9477
+ return last;
9478
+ }
9479
+ function sampleWithoutReplacement(values, k, random) {
9480
+ if (k <= 0 || values.length === 0) {
9481
+ return [];
9482
+ }
9483
+ if (k >= values.length) {
9484
+ return [...values];
9485
+ }
9486
+ const pool = [...values];
9487
+ const output = [];
9488
+ for (let index = 0; index < k; index += 1) {
9489
+ const pickIndex = Math.min(pool.length - 1, Math.floor(random() * pool.length));
9490
+ const [picked] = pool.splice(pickIndex, 1);
9491
+ if (picked === void 0) {
9492
+ break;
9493
+ }
9494
+ output.push(picked);
9495
+ }
9496
+ return output;
9497
+ }
9498
+ function isEligibleRecord(record) {
9499
+ return record.assessment.isViable !== false && record.assessment.trainableIssues.length > 0;
9500
+ }
9501
+ function resolveIssueType(issue) {
9502
+ const raw = issue.issueType?.trim();
9503
+ if (!raw) {
9504
+ return "default";
9505
+ }
9506
+ return raw;
9507
+ }
9508
+ function resolveIssueId(issue, parentId, index) {
9509
+ const raw = issue.issueId?.trim();
9510
+ if (raw && raw.length > 0) {
9511
+ return raw;
9512
+ }
9513
+ return `${parentId}:issue:${index}`;
9514
+ }
9515
+ function normalizeIssuesForRecord(parentId, issues) {
9516
+ return issues.map((issue, index) => ({
9517
+ id: resolveIssueId(issue, parentId, index),
9518
+ issueType: resolveIssueType(issue),
9519
+ issue
9520
+ }));
9521
+ }
9522
+ function sampleIssuesByType(issues, batchSize, typeWeights, random) {
9523
+ if (issues.length === 0 || batchSize <= 0) {
9524
+ return [];
9525
+ }
9526
+ const frequency = /* @__PURE__ */ new Map();
9527
+ for (const issue of issues) {
9528
+ frequency.set(issue.issueType, (frequency.get(issue.issueType) ?? 0) + 1);
9529
+ }
9530
+ const issueTypes = [...frequency.keys()];
9531
+ const weightedFrequency = issueTypes.map((type) => {
9532
+ const base = frequency.get(type) ?? 0;
9533
+ const multiplierRaw = typeWeights?.[type] ?? 1;
9534
+ const multiplier = Number.isFinite(multiplierRaw) && multiplierRaw > 0 ? multiplierRaw : 1;
9535
+ return base * multiplier;
9536
+ });
9537
+ const selectedType = pickByWeights(issueTypes, weightedFrequency, random);
9538
+ const sameTypeIssues = issues.filter((issue) => issue.issueType === selectedType);
9539
+ const effectiveBatchSize = Math.min(batchSize, sameTypeIssues.length);
9540
+ return sampleWithoutReplacement(sameTypeIssues, effectiveBatchSize, random);
9541
+ }
9542
+ function resolveMidpoint(midpoint, archive) {
9543
+ if (midpoint.mode === "fixed") {
9544
+ return midpoint.value;
9545
+ }
9546
+ const scores = archive.map((record) => record.assessment.score).filter((score) => Number.isFinite(score)).sort((a, b) => a - b);
9547
+ return computePercentile(scores, midpoint.percentile);
9548
+ }
9549
+ function selectParents(input) {
9550
+ const {
9551
+ eligible,
9552
+ archive,
9553
+ parentsPerIteration,
9554
+ sharpness,
9555
+ midpoint,
9556
+ noveltyWeight,
9557
+ replace,
9558
+ childCountByParentId,
9559
+ random
9560
+ } = input;
9561
+ if (eligible.length === 0 || parentsPerIteration <= 0) {
9562
+ return [];
9563
+ }
9564
+ const midpointScore = resolveMidpoint(midpoint, archive);
9565
+ const weightedParents = eligible.map((record) => {
9566
+ const performance = sigmoidScore(record.assessment.score, midpointScore, sharpness);
9567
+ const childCount = childCountByParentId.get(record.id) ?? 0;
9568
+ const novelty = 1 / (1 + noveltyWeight * childCount);
9569
+ return {
9570
+ record,
9571
+ weight: performance * novelty
9572
+ };
9573
+ });
9574
+ if (replace) {
9575
+ const output2 = [];
9576
+ for (let index = 0; index < parentsPerIteration; index += 1) {
9577
+ output2.push(
9578
+ pickByWeights(
9579
+ weightedParents.map((entry) => entry.record),
9580
+ weightedParents.map((entry) => entry.weight),
9581
+ random
9582
+ )
9583
+ );
9584
+ }
9585
+ return output2;
9586
+ }
9587
+ if (parentsPerIteration >= weightedParents.length) {
9588
+ return weightedParents.map((entry) => entry.record);
9589
+ }
9590
+ const pool = [...weightedParents];
9591
+ const output = [];
9592
+ for (let index = 0; index < parentsPerIteration; index += 1) {
9593
+ const chosen = pickByWeights(
9594
+ pool.map((entry) => entry.record),
9595
+ pool.map((entry) => entry.weight),
9596
+ random
9597
+ );
9598
+ output.push(chosen);
9599
+ const removeIndex = pool.findIndex((entry) => entry.record.id === chosen.id);
9600
+ if (removeIndex >= 0) {
9601
+ pool.splice(removeIndex, 1);
9602
+ }
9603
+ }
9604
+ return output;
9605
+ }
9606
+ function defaultObservedOutcome(input) {
9607
+ const { assessment, parentAssessment } = input;
9608
+ if (assessment.isViable === false) {
9609
+ return "Inconclusive - resulting candidate was marked non-viable.";
9610
+ }
9611
+ const roundedScore = Number.isFinite(assessment.score) ? assessment.score.toFixed(3) : "n/a";
9612
+ if (!parentAssessment) {
9613
+ return `Candidate score: ${roundedScore}.`;
9614
+ }
9615
+ const parentScore = Number.isFinite(parentAssessment.score) ? parentAssessment.score.toFixed(3) : "n/a";
9616
+ if (assessment.score > parentAssessment.score) {
9617
+ return `Candidate score: ${roundedScore}. Improved over parent score ${parentScore}.`;
9618
+ }
9619
+ if (assessment.score < parentAssessment.score) {
9620
+ return `Candidate score: ${roundedScore}. Worse than parent score ${parentScore}.`;
9621
+ }
9622
+ return `Candidate score: ${roundedScore}. Same as parent score ${parentScore}.`;
9623
+ }
9624
+ function resolveFeedbackEntries(input) {
9625
+ const { scope, parent, candidateById, feedbackByCandidateId, childrenByParentId } = input;
9626
+ if (scope.mode === "none") {
9627
+ return [];
9628
+ }
9629
+ if (scope.mode === "ancestors") {
9630
+ const output2 = [];
9631
+ let currentId = parent.id;
9632
+ let depth = 0;
9633
+ while (currentId) {
9634
+ if (scope.maxDepth !== void 0 && depth > scope.maxDepth) {
9635
+ break;
9636
+ }
9637
+ const entry = feedbackByCandidateId.get(currentId);
9638
+ if (entry) {
9639
+ output2.push(entry);
9640
+ }
9641
+ const current = candidateById.get(currentId);
9642
+ currentId = current?.parentId;
9643
+ depth += 1;
9644
+ }
9645
+ return output2;
9646
+ }
9647
+ const maxDistance = scope.maxDistance;
9648
+ if (maxDistance < 0) {
9649
+ return [];
9650
+ }
9651
+ const output = [];
9652
+ const queue = [{ id: parent.id, distance: 0 }];
9653
+ const visited = /* @__PURE__ */ new Set();
9654
+ while (queue.length > 0) {
9655
+ const current = queue.shift();
9656
+ if (!current) {
9657
+ continue;
9658
+ }
9659
+ if (visited.has(current.id)) {
9660
+ continue;
9661
+ }
9662
+ visited.add(current.id);
9663
+ const entry = feedbackByCandidateId.get(current.id);
9664
+ if (entry) {
9665
+ output.push(entry);
9666
+ }
9667
+ if (current.distance >= maxDistance) {
9668
+ continue;
9669
+ }
9670
+ const parentRecord = candidateById.get(current.id);
9671
+ const ancestorId = parentRecord?.parentId;
9672
+ if (ancestorId && !visited.has(ancestorId)) {
9673
+ queue.push({ id: ancestorId, distance: current.distance + 1 });
9674
+ }
9675
+ const children = childrenByParentId.get(current.id) ?? [];
9676
+ for (const childId of children) {
9677
+ if (!visited.has(childId)) {
9678
+ queue.push({ id: childId, distance: current.distance + 1 });
9679
+ }
9680
+ }
9681
+ }
9682
+ return output;
9683
+ }
9684
+ async function mapWithConcurrency(items, maxConcurrency, worker) {
9685
+ if (items.length === 0) {
9686
+ return [];
9687
+ }
9688
+ const concurrency = Math.max(1, Math.floor(maxConcurrency));
9689
+ const output = new Array(items.length);
9690
+ let nextIndex = 0;
9691
+ const runners = Array.from({ length: Math.min(concurrency, items.length) }, async () => {
9692
+ while (true) {
9693
+ const index = nextIndex;
9694
+ nextIndex += 1;
9695
+ if (index >= items.length) {
9696
+ break;
9697
+ }
9698
+ const item = items[index];
9699
+ if (item === void 0) {
9700
+ continue;
9701
+ }
9702
+ output[index] = await worker(item, index);
9703
+ }
9704
+ });
9705
+ await Promise.all(runners);
9706
+ return output;
9707
+ }
9708
+ async function runCandidateEvolution(options) {
9709
+ const iterations = Math.max(0, Math.floor(options.iterations));
9710
+ const parentsPerIteration = Math.max(0, Math.floor(options.parentsPerIteration));
9711
+ const batchSize = Math.max(1, Math.floor(options.batchSize ?? DEFAULT_BATCH_SIZE));
9712
+ const generationConcurrency = Math.max(
9713
+ 1,
9714
+ Math.floor(options.generationConcurrency ?? DEFAULT_GENERATION_CONCURRENCY)
9715
+ );
9716
+ const assessmentConcurrency = Math.max(
9717
+ 1,
9718
+ Math.floor(options.assessmentConcurrency ?? DEFAULT_ASSESSMENT_CONCURRENCY)
9719
+ );
9720
+ if (options.generators.length === 0) {
9721
+ throw new Error("runCandidateEvolution requires at least one generator subagent.");
9722
+ }
9723
+ if (parentsPerIteration <= 0) {
9724
+ throw new Error("parentsPerIteration must be positive.");
9725
+ }
9726
+ const generationNames = /* @__PURE__ */ new Set();
9727
+ for (const generator of options.generators) {
9728
+ if (!generator.name.trim()) {
9729
+ throw new Error("Generator names must be non-empty.");
9730
+ }
9731
+ if (generationNames.has(generator.name)) {
9732
+ throw new Error(`Duplicate generator name "${generator.name}".`);
9733
+ }
9734
+ generationNames.add(generator.name);
9735
+ }
9736
+ const random = normalizeRandom(options.random);
9737
+ const parentSelection = options.parentSelection;
9738
+ const selectionSharpness = parentSelection?.sharpness !== void 0 ? Math.max(1e-4, parentSelection.sharpness) : DEFAULT_SHARPNESS;
9739
+ const selectionMidpoint = parentSelection?.midpoint ?? DEFAULT_MIDPOINT;
9740
+ const noveltyWeight = parentSelection?.noveltyWeight !== void 0 ? Math.max(0, parentSelection.noveltyWeight) : DEFAULT_NOVELTY_WEIGHT;
9741
+ const selectionReplace = parentSelection?.replace ?? true;
9742
+ const feedbackScope = options.feedbackScope ?? DEFAULT_FEEDBACK_SCOPE;
9743
+ const describeObservedOutcome = options.describeObservedOutcome ?? defaultObservedOutcome;
9744
+ const scorePercentiles = options.scorePercentiles && options.scorePercentiles.length > 0 ? options.scorePercentiles : DEFAULT_SCORE_PERCENTILES;
9745
+ const archive = [];
9746
+ const feedbackEntries = [];
9747
+ const postCheckRejections = [];
9748
+ const snapshots = [];
9749
+ const candidateById = /* @__PURE__ */ new Map();
9750
+ const feedbackByCandidateId = /* @__PURE__ */ new Map();
9751
+ const childCountByParentId = /* @__PURE__ */ new Map();
9752
+ const childrenByParentId = /* @__PURE__ */ new Map();
9753
+ let totalStats = createEmptyStats();
9754
+ let stoppedEarly = false;
9755
+ const seedAssessment = await options.assessCandidate({
9756
+ candidate: options.seedCandidate,
9757
+ iteration: 0
9758
+ });
9759
+ const seedRecord = {
9760
+ id: randomId("candidate"),
9761
+ candidate: options.seedCandidate,
9762
+ assessment: seedAssessment,
9763
+ createdAtIteration: 0
9764
+ };
9765
+ archive.push(seedRecord);
9766
+ candidateById.set(seedRecord.id, seedRecord);
9767
+ const initialStats = createEmptyStats();
9768
+ initialStats.assessmentCalls += 1;
9769
+ totalStats = addStats(totalStats, initialStats);
9770
+ const initialSnapshot = {
9771
+ iteration: 0,
9772
+ archiveSize: archive.length,
9773
+ bestCandidateId: seedRecord.id,
9774
+ bestScore: seedRecord.assessment.score,
9775
+ scorePercentiles: computeScorePercentiles(archive, scorePercentiles),
9776
+ stats: initialStats,
9777
+ bestCandidate: seedRecord
9778
+ };
9779
+ snapshots.push(initialSnapshot);
9780
+ await options.onSnapshot?.(initialSnapshot);
9781
+ for (let iteration = 1; iteration <= iterations; iteration += 1) {
9782
+ const iterationStats = createEmptyStats();
9783
+ const eligible = archive.filter((record) => isEligibleRecord(record));
9784
+ if (eligible.length === 0) {
9785
+ stoppedEarly = true;
9786
+ const bestCandidate3 = archive.reduce(
9787
+ (best, current) => current.assessment.score > best.assessment.score ? current : best
9788
+ );
9789
+ const snapshot2 = {
9790
+ iteration,
9791
+ archiveSize: archive.length,
9792
+ bestCandidateId: bestCandidate3.id,
9793
+ bestScore: bestCandidate3.assessment.score,
9794
+ scorePercentiles: computeScorePercentiles(archive, scorePercentiles),
9795
+ stats: iterationStats,
9796
+ bestCandidate: bestCandidate3
9797
+ };
9798
+ snapshots.push(snapshot2);
9799
+ await options.onSnapshot?.(snapshot2);
9800
+ break;
9801
+ }
9802
+ const selectedParents = selectParents({
9803
+ eligible,
9804
+ archive,
9805
+ parentsPerIteration,
9806
+ sharpness: selectionSharpness,
9807
+ midpoint: selectionMidpoint,
9808
+ noveltyWeight,
9809
+ replace: selectionReplace,
9810
+ childCountByParentId,
9811
+ random
9812
+ });
9813
+ const generationTasks = [];
9814
+ for (const parent of selectedParents) {
9815
+ const issueEnvelopes = normalizeIssuesForRecord(parent.id, parent.assessment.trainableIssues);
9816
+ const sampledIssueEnvelopes = sampleIssuesByType(
9817
+ issueEnvelopes,
9818
+ batchSize,
9819
+ parent.assessment.issueTypeWeights,
9820
+ random
9821
+ );
9822
+ if (sampledIssueEnvelopes.length === 0) {
9823
+ continue;
9824
+ }
9825
+ const visibleFeedbackEntries = resolveFeedbackEntries({
9826
+ scope: feedbackScope,
9827
+ parent,
9828
+ candidateById,
9829
+ feedbackByCandidateId,
9830
+ childrenByParentId
9831
+ });
9832
+ for (const generator of options.generators) {
9833
+ const issuesForGenerator = generator.supportsIssueBatch ? sampledIssueEnvelopes : sampledIssueEnvelopes.slice(0, 1);
9834
+ if (issuesForGenerator.length === 0) {
9835
+ continue;
9836
+ }
9837
+ generationTasks.push({
9838
+ parent,
9839
+ generator,
9840
+ sampledIssueEnvelopes: issuesForGenerator,
9841
+ feedbackEntries: visibleFeedbackEntries
9842
+ });
9843
+ iterationStats.generationCalls += 1;
9844
+ iterationStats.issuesSupplied += issuesForGenerator.length;
9845
+ iterationStats.feedbackEntriesSupplied += visibleFeedbackEntries.length;
9846
+ }
9847
+ }
9848
+ if (generationTasks.length === 0) {
9849
+ stoppedEarly = true;
9850
+ const bestCandidate3 = archive.reduce(
9851
+ (best, current) => current.assessment.score > best.assessment.score ? current : best
9852
+ );
9853
+ const snapshot2 = {
9854
+ iteration,
9855
+ archiveSize: archive.length,
9856
+ bestCandidateId: bestCandidate3.id,
9857
+ bestScore: bestCandidate3.assessment.score,
9858
+ scorePercentiles: computeScorePercentiles(archive, scorePercentiles),
9859
+ stats: iterationStats,
9860
+ bestCandidate: bestCandidate3
9861
+ };
9862
+ snapshots.push(snapshot2);
9863
+ await options.onSnapshot?.(snapshot2);
9864
+ break;
9865
+ }
9866
+ const generatedOutputs = await mapWithConcurrency(
9867
+ generationTasks,
9868
+ generationConcurrency,
9869
+ async (task) => {
9870
+ const proposals = await task.generator.generate({
9871
+ parent: task.parent,
9872
+ sampledIssues: task.sampledIssueEnvelopes.map((envelope) => envelope.issue),
9873
+ feedbackEntries: task.feedbackEntries,
9874
+ iteration
9875
+ });
9876
+ return { task, proposals };
9877
+ }
9878
+ );
9879
+ const pendingProposals = [];
9880
+ for (const output of generatedOutputs) {
9881
+ iterationStats.proposalsGenerated += output.proposals.length;
9882
+ for (const proposal of output.proposals) {
9883
+ pendingProposals.push({
9884
+ proposal,
9885
+ parent: output.task.parent,
9886
+ generatorName: output.task.generator.name,
9887
+ sampledIssues: output.task.sampledIssueEnvelopes.map((envelope) => envelope.issue),
9888
+ sampledIssueIds: output.task.sampledIssueEnvelopes.map((envelope) => envelope.id),
9889
+ sampledFeedbackEntryIds: output.task.feedbackEntries.map((entry) => entry.id)
9890
+ });
9891
+ }
9892
+ }
9893
+ const evaluatedProposals = await mapWithConcurrency(
9894
+ pendingProposals,
9895
+ assessmentConcurrency,
9896
+ async (pending) => {
9897
+ if (options.verifyGeneratedCandidate) {
9898
+ iterationStats.postCheckCalls += 1;
9899
+ const passes = await options.verifyGeneratedCandidate({
9900
+ proposal: pending.proposal,
9901
+ parent: pending.parent,
9902
+ generatorName: pending.generatorName,
9903
+ sampledIssues: pending.sampledIssues,
9904
+ iteration
9905
+ });
9906
+ if (!passes) {
9907
+ postCheckRejections.push({
9908
+ id: randomId("rejected"),
9909
+ candidate: pending.proposal.candidate,
9910
+ parentId: pending.parent.id,
9911
+ generatorName: pending.generatorName,
9912
+ iteration,
9913
+ sampledIssueIds: pending.sampledIssueIds,
9914
+ changeSummary: pending.proposal.changeSummary
9915
+ });
9916
+ return null;
9917
+ }
9918
+ }
9919
+ iterationStats.proposalsAfterPostCheck += 1;
9920
+ iterationStats.assessmentCalls += 1;
9921
+ const assessment = await options.assessCandidate({
9922
+ candidate: pending.proposal.candidate,
9923
+ iteration,
9924
+ parent: pending.parent,
9925
+ generatorName: pending.generatorName,
9926
+ sampledIssues: pending.sampledIssues
9927
+ });
9928
+ return {
9929
+ pending,
9930
+ assessment
9931
+ };
9932
+ }
9933
+ );
9934
+ const acceptedRecords = [];
9935
+ for (const evaluated of evaluatedProposals) {
9936
+ if (!evaluated) {
9937
+ continue;
9938
+ }
9939
+ const { pending, assessment } = evaluated;
9940
+ acceptedRecords.push({
9941
+ id: randomId("candidate"),
9942
+ candidate: pending.proposal.candidate,
9943
+ assessment,
9944
+ createdAtIteration: iteration,
9945
+ parentId: pending.parent.id,
9946
+ generatorName: pending.generatorName,
9947
+ sampledIssueIds: pending.sampledIssueIds,
9948
+ sampledFeedbackEntryIds: pending.sampledFeedbackEntryIds,
9949
+ changeSummary: pending.proposal.changeSummary
9950
+ });
9951
+ }
9952
+ for (const record of acceptedRecords) {
9953
+ archive.push(record);
9954
+ candidateById.set(record.id, record);
9955
+ if (record.parentId) {
9956
+ const nextCount = (childCountByParentId.get(record.parentId) ?? 0) + 1;
9957
+ childCountByParentId.set(record.parentId, nextCount);
9958
+ const existingChildren = childrenByParentId.get(record.parentId) ?? [];
9959
+ existingChildren.push(record.id);
9960
+ childrenByParentId.set(record.parentId, existingChildren);
9961
+ }
9962
+ if (record.changeSummary && record.changeSummary.trim().length > 0) {
9963
+ const parentAssessment = record.parentId ? candidateById.get(record.parentId)?.assessment ?? null : null;
9964
+ const feedbackEntry = {
9965
+ id: randomId("feedback"),
9966
+ candidateId: record.id,
9967
+ attemptedChange: record.changeSummary,
9968
+ observedOutcome: describeObservedOutcome({
9969
+ assessment: record.assessment,
9970
+ parentAssessment
9971
+ })
9972
+ };
9973
+ feedbackEntries.push(feedbackEntry);
9974
+ feedbackByCandidateId.set(record.id, feedbackEntry);
9975
+ }
9976
+ }
9977
+ totalStats = addStats(totalStats, iterationStats);
9978
+ const bestCandidate2 = archive.reduce(
9979
+ (best, current) => current.assessment.score > best.assessment.score ? current : best
9980
+ );
9981
+ const snapshot = {
9982
+ iteration,
9983
+ archiveSize: archive.length,
9984
+ bestCandidateId: bestCandidate2.id,
9985
+ bestScore: bestCandidate2.assessment.score,
9986
+ scorePercentiles: computeScorePercentiles(archive, scorePercentiles),
9987
+ stats: iterationStats,
9988
+ bestCandidate: bestCandidate2
9989
+ };
9990
+ snapshots.push(snapshot);
9991
+ await options.onSnapshot?.(snapshot);
9992
+ }
9993
+ const bestCandidate = archive.reduce(
9994
+ (best, current) => current.assessment.score > best.assessment.score ? current : best
9995
+ );
9996
+ return {
9997
+ archive,
9998
+ feedbackEntries,
9999
+ postCheckRejections,
10000
+ snapshots,
10001
+ bestCandidate,
10002
+ totalStats,
10003
+ stoppedEarly
10004
+ };
10005
+ }
9338
10006
  export {
9339
10007
  CHATGPT_MODEL_IDS,
9340
10008
  CODEX_APPLY_PATCH_FREEFORM_TOOL_DESCRIPTION,
@@ -9407,6 +10075,7 @@ export {
9407
10075
  resolveFilesystemToolProfile,
9408
10076
  resolveFireworksModelId,
9409
10077
  runAgentLoop,
10078
+ runCandidateEvolution,
9410
10079
  runToolLoop,
9411
10080
  sanitisePartForLogging,
9412
10081
  streamAgentLoop,