@ljoukov/llm 3.0.8 → 3.0.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +679 -9
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +145 -7
- package/dist/index.d.ts +145 -7
- package/dist/index.js +678 -9
- package/dist/index.js.map +1 -1
- package/package.json +5 -1
package/dist/index.cjs
CHANGED
|
@@ -101,6 +101,7 @@ __export(index_exports, {
|
|
|
101
101
|
resolveFilesystemToolProfile: () => resolveFilesystemToolProfile,
|
|
102
102
|
resolveFireworksModelId: () => resolveFireworksModelId,
|
|
103
103
|
runAgentLoop: () => runAgentLoop,
|
|
104
|
+
runCandidateEvolution: () => runCandidateEvolution,
|
|
104
105
|
runToolLoop: () => runToolLoop,
|
|
105
106
|
sanitisePartForLogging: () => sanitisePartForLogging,
|
|
106
107
|
streamAgentLoop: () => streamAgentLoop,
|
|
@@ -233,6 +234,15 @@ var GEMINI_2_5_PRO_PRICING = {
|
|
|
233
234
|
outputRateLow: 10 / 1e6,
|
|
234
235
|
outputRateHigh: 15 / 1e6
|
|
235
236
|
};
|
|
237
|
+
var GEMINI_2_5_FLASH_PRICING = {
|
|
238
|
+
threshold: 2e5,
|
|
239
|
+
inputRateLow: 0.3 / 1e6,
|
|
240
|
+
inputRateHigh: 0.3 / 1e6,
|
|
241
|
+
cachedRateLow: 0.03 / 1e6,
|
|
242
|
+
cachedRateHigh: 0.03 / 1e6,
|
|
243
|
+
outputRateLow: 2.5 / 1e6,
|
|
244
|
+
outputRateHigh: 2.5 / 1e6
|
|
245
|
+
};
|
|
236
246
|
var GEMINI_IMAGE_PREVIEW_PRICING = {
|
|
237
247
|
inputRate: 2 / 1e6,
|
|
238
248
|
cachedRate: 0.2 / 1e6,
|
|
@@ -248,6 +258,9 @@ function getGeminiProPricing(modelId) {
|
|
|
248
258
|
if (modelId.includes("gemini-2.5-pro")) {
|
|
249
259
|
return GEMINI_2_5_PRO_PRICING;
|
|
250
260
|
}
|
|
261
|
+
if (modelId.includes("gemini-2.5-flash") || modelId.includes("gemini-flash-latest")) {
|
|
262
|
+
return GEMINI_2_5_FLASH_PRICING;
|
|
263
|
+
}
|
|
251
264
|
if (modelId.includes("gemini-3-pro") || modelId.includes("gemini-3.1-pro")) {
|
|
252
265
|
return GEMINI_3_PRO_PREVIEW_PRICING;
|
|
253
266
|
}
|
|
@@ -2212,7 +2225,10 @@ var GEMINI_TEXT_MODEL_IDS = [
|
|
|
2212
2225
|
"gemini-flash-latest",
|
|
2213
2226
|
"gemini-flash-lite-latest"
|
|
2214
2227
|
];
|
|
2215
|
-
var GEMINI_IMAGE_MODEL_IDS = [
|
|
2228
|
+
var GEMINI_IMAGE_MODEL_IDS = [
|
|
2229
|
+
"gemini-3-pro-image-preview",
|
|
2230
|
+
"gemini-3.1-flash-image-preview"
|
|
2231
|
+
];
|
|
2216
2232
|
var GEMINI_MODEL_IDS = [...GEMINI_TEXT_MODEL_IDS, ...GEMINI_IMAGE_MODEL_IDS];
|
|
2217
2233
|
function isGeminiModelId(value) {
|
|
2218
2234
|
return GEMINI_MODEL_IDS.includes(value);
|
|
@@ -2685,12 +2701,7 @@ async function runOpenAiCall(fn, modelId, runOptions) {
|
|
|
2685
2701
|
}
|
|
2686
2702
|
|
|
2687
2703
|
// src/openai/models.ts
|
|
2688
|
-
var OPENAI_MODEL_IDS = [
|
|
2689
|
-
"gpt-5.3-codex",
|
|
2690
|
-
"gpt-5.3-codex-spark",
|
|
2691
|
-
"gpt-5.2",
|
|
2692
|
-
"gpt-5.1-codex-mini"
|
|
2693
|
-
];
|
|
2704
|
+
var OPENAI_MODEL_IDS = ["gpt-5.3-codex", "gpt-5.2", "gpt-5.1-codex-mini"];
|
|
2694
2705
|
function isOpenAiModelId(value) {
|
|
2695
2706
|
return OPENAI_MODEL_IDS.includes(value);
|
|
2696
2707
|
}
|
|
@@ -4362,6 +4373,9 @@ function extractFireworksToolCalls(message) {
|
|
|
4362
4373
|
return calls;
|
|
4363
4374
|
}
|
|
4364
4375
|
function resolveGeminiThinkingConfig(modelId) {
|
|
4376
|
+
if (isGeminiImageModelId(modelId)) {
|
|
4377
|
+
return void 0;
|
|
4378
|
+
}
|
|
4365
4379
|
switch (modelId) {
|
|
4366
4380
|
case "gemini-3-pro-preview":
|
|
4367
4381
|
case "gemini-3.1-pro-preview":
|
|
@@ -4617,9 +4631,10 @@ async function runTextCall(params) {
|
|
|
4617
4631
|
}, modelForProvider);
|
|
4618
4632
|
} else {
|
|
4619
4633
|
const geminiContents = contents.map(convertLlmContentToGeminiContent);
|
|
4634
|
+
const thinkingConfig = resolveGeminiThinkingConfig(modelForProvider);
|
|
4620
4635
|
const config = {
|
|
4621
4636
|
maxOutputTokens: 32e3,
|
|
4622
|
-
thinkingConfig:
|
|
4637
|
+
...thinkingConfig ? { thinkingConfig } : {},
|
|
4623
4638
|
...request.responseMimeType ? { responseMimeType: request.responseMimeType } : {},
|
|
4624
4639
|
...request.responseJsonSchema ? { responseJsonSchema: request.responseJsonSchema } : {},
|
|
4625
4640
|
...request.responseModalities ? { responseModalities: Array.from(request.responseModalities) } : {},
|
|
@@ -5847,6 +5862,7 @@ async function runToolLoop(request) {
|
|
|
5847
5862
|
firstModelEventAtMs = Date.now();
|
|
5848
5863
|
}
|
|
5849
5864
|
};
|
|
5865
|
+
const thinkingConfig = resolveGeminiThinkingConfig(request.model);
|
|
5850
5866
|
const config = {
|
|
5851
5867
|
maxOutputTokens: 32e3,
|
|
5852
5868
|
tools: geminiTools,
|
|
@@ -5855,7 +5871,7 @@ async function runToolLoop(request) {
|
|
|
5855
5871
|
mode: import_genai2.FunctionCallingConfigMode.VALIDATED
|
|
5856
5872
|
}
|
|
5857
5873
|
},
|
|
5858
|
-
thinkingConfig:
|
|
5874
|
+
...thinkingConfig ? { thinkingConfig } : {}
|
|
5859
5875
|
};
|
|
5860
5876
|
const onEvent = request.onEvent;
|
|
5861
5877
|
const response = await runGeminiCall(
|
|
@@ -9447,6 +9463,659 @@ function createAgentTelemetryEmitter(params) {
|
|
|
9447
9463
|
});
|
|
9448
9464
|
};
|
|
9449
9465
|
}
|
|
9466
|
+
|
|
9467
|
+
// src/agent/candidateEvolution.ts
|
|
9468
|
+
var import_node_crypto4 = require("crypto");
|
|
9469
|
+
var DEFAULT_BATCH_SIZE = 1;
|
|
9470
|
+
var DEFAULT_GENERATION_CONCURRENCY = 8;
|
|
9471
|
+
var DEFAULT_ASSESSMENT_CONCURRENCY = 8;
|
|
9472
|
+
var DEFAULT_SHARPNESS = 10;
|
|
9473
|
+
var DEFAULT_NOVELTY_WEIGHT = 1;
|
|
9474
|
+
var DEFAULT_MIDPOINT = { mode: "percentile", percentile: 75 };
|
|
9475
|
+
var DEFAULT_FEEDBACK_SCOPE = { mode: "ancestors" };
|
|
9476
|
+
var DEFAULT_SCORE_PERCENTILES = [0, 25, 50, 75, 90, 95, 100];
|
|
9477
|
+
function createEmptyStats() {
|
|
9478
|
+
return {
|
|
9479
|
+
generationCalls: 0,
|
|
9480
|
+
issuesSupplied: 0,
|
|
9481
|
+
proposalsGenerated: 0,
|
|
9482
|
+
proposalsAfterPostCheck: 0,
|
|
9483
|
+
assessmentCalls: 0,
|
|
9484
|
+
postCheckCalls: 0,
|
|
9485
|
+
feedbackEntriesSupplied: 0
|
|
9486
|
+
};
|
|
9487
|
+
}
|
|
9488
|
+
function addStats(left, right) {
|
|
9489
|
+
return {
|
|
9490
|
+
generationCalls: left.generationCalls + right.generationCalls,
|
|
9491
|
+
issuesSupplied: left.issuesSupplied + right.issuesSupplied,
|
|
9492
|
+
proposalsGenerated: left.proposalsGenerated + right.proposalsGenerated,
|
|
9493
|
+
proposalsAfterPostCheck: left.proposalsAfterPostCheck + right.proposalsAfterPostCheck,
|
|
9494
|
+
assessmentCalls: left.assessmentCalls + right.assessmentCalls,
|
|
9495
|
+
postCheckCalls: left.postCheckCalls + right.postCheckCalls,
|
|
9496
|
+
feedbackEntriesSupplied: left.feedbackEntriesSupplied + right.feedbackEntriesSupplied
|
|
9497
|
+
};
|
|
9498
|
+
}
|
|
9499
|
+
function randomId(prefix) {
|
|
9500
|
+
return `${prefix}_${(0, import_node_crypto4.randomBytes)(8).toString("hex")}`;
|
|
9501
|
+
}
|
|
9502
|
+
function toFiniteNumber(value, fallback) {
|
|
9503
|
+
if (!Number.isFinite(value)) {
|
|
9504
|
+
return fallback;
|
|
9505
|
+
}
|
|
9506
|
+
return value;
|
|
9507
|
+
}
|
|
9508
|
+
function normalizeRandom(random) {
|
|
9509
|
+
if (!random) {
|
|
9510
|
+
return () => Math.random();
|
|
9511
|
+
}
|
|
9512
|
+
return () => {
|
|
9513
|
+
const value = toFiniteNumber(random(), 0);
|
|
9514
|
+
if (value <= 0) {
|
|
9515
|
+
return 0;
|
|
9516
|
+
}
|
|
9517
|
+
if (value >= 1) {
|
|
9518
|
+
return 0.999999999999;
|
|
9519
|
+
}
|
|
9520
|
+
return value;
|
|
9521
|
+
};
|
|
9522
|
+
}
|
|
9523
|
+
function sigmoidScore(score, midpoint, sharpness) {
|
|
9524
|
+
return 1 / (1 + Math.exp(-sharpness * (score - midpoint)));
|
|
9525
|
+
}
|
|
9526
|
+
function computePercentile(sortedValues, percentile) {
|
|
9527
|
+
if (sortedValues.length === 0) {
|
|
9528
|
+
return 0;
|
|
9529
|
+
}
|
|
9530
|
+
if (sortedValues.length === 1) {
|
|
9531
|
+
return sortedValues[0] ?? 0;
|
|
9532
|
+
}
|
|
9533
|
+
const safePercentile = Math.max(0, Math.min(100, percentile));
|
|
9534
|
+
const position = (sortedValues.length - 1) * (safePercentile / 100);
|
|
9535
|
+
const lower = Math.floor(position);
|
|
9536
|
+
const upper = Math.ceil(position);
|
|
9537
|
+
const lowerValue = sortedValues[lower] ?? 0;
|
|
9538
|
+
const upperValue = sortedValues[upper] ?? lowerValue;
|
|
9539
|
+
if (lower === upper) {
|
|
9540
|
+
return lowerValue;
|
|
9541
|
+
}
|
|
9542
|
+
const fraction = position - lower;
|
|
9543
|
+
return lowerValue * (1 - fraction) + upperValue * fraction;
|
|
9544
|
+
}
|
|
9545
|
+
function computeScorePercentiles(records, percentiles) {
|
|
9546
|
+
const scores = records.map((record) => record.assessment.score).filter((score) => Number.isFinite(score)).sort((a, b) => a - b);
|
|
9547
|
+
const output = {};
|
|
9548
|
+
for (const percentile of percentiles) {
|
|
9549
|
+
output[percentile] = computePercentile(scores, percentile);
|
|
9550
|
+
}
|
|
9551
|
+
return output;
|
|
9552
|
+
}
|
|
9553
|
+
function pickByWeights(values, weights, random) {
|
|
9554
|
+
if (values.length === 0) {
|
|
9555
|
+
throw new Error("Cannot pick from an empty set.");
|
|
9556
|
+
}
|
|
9557
|
+
if (values.length !== weights.length) {
|
|
9558
|
+
throw new Error("values and weights must have the same length.");
|
|
9559
|
+
}
|
|
9560
|
+
let totalWeight = 0;
|
|
9561
|
+
for (const weight of weights) {
|
|
9562
|
+
if (Number.isFinite(weight) && weight > 0) {
|
|
9563
|
+
totalWeight += weight;
|
|
9564
|
+
}
|
|
9565
|
+
}
|
|
9566
|
+
if (totalWeight <= 0) {
|
|
9567
|
+
const index = Math.min(values.length - 1, Math.floor(random() * values.length));
|
|
9568
|
+
const fallbackValue = values[index];
|
|
9569
|
+
if (fallbackValue === void 0) {
|
|
9570
|
+
throw new Error("Unexpected empty value during uniform fallback pick.");
|
|
9571
|
+
}
|
|
9572
|
+
return fallbackValue;
|
|
9573
|
+
}
|
|
9574
|
+
let threshold = random() * totalWeight;
|
|
9575
|
+
for (let index = 0; index < values.length; index += 1) {
|
|
9576
|
+
const weight = Number.isFinite(weights[index] ?? 0) && (weights[index] ?? 0) > 0 ? weights[index] ?? 0 : 0;
|
|
9577
|
+
threshold -= weight;
|
|
9578
|
+
if (threshold <= 0) {
|
|
9579
|
+
const value = values[index];
|
|
9580
|
+
if (value === void 0) {
|
|
9581
|
+
break;
|
|
9582
|
+
}
|
|
9583
|
+
return value;
|
|
9584
|
+
}
|
|
9585
|
+
}
|
|
9586
|
+
const last = values[values.length - 1];
|
|
9587
|
+
if (last === void 0) {
|
|
9588
|
+
throw new Error("Unexpected missing final value during weighted pick.");
|
|
9589
|
+
}
|
|
9590
|
+
return last;
|
|
9591
|
+
}
|
|
9592
|
+
function sampleWithoutReplacement(values, k, random) {
|
|
9593
|
+
if (k <= 0 || values.length === 0) {
|
|
9594
|
+
return [];
|
|
9595
|
+
}
|
|
9596
|
+
if (k >= values.length) {
|
|
9597
|
+
return [...values];
|
|
9598
|
+
}
|
|
9599
|
+
const pool = [...values];
|
|
9600
|
+
const output = [];
|
|
9601
|
+
for (let index = 0; index < k; index += 1) {
|
|
9602
|
+
const pickIndex = Math.min(pool.length - 1, Math.floor(random() * pool.length));
|
|
9603
|
+
const [picked] = pool.splice(pickIndex, 1);
|
|
9604
|
+
if (picked === void 0) {
|
|
9605
|
+
break;
|
|
9606
|
+
}
|
|
9607
|
+
output.push(picked);
|
|
9608
|
+
}
|
|
9609
|
+
return output;
|
|
9610
|
+
}
|
|
9611
|
+
function isEligibleRecord(record) {
|
|
9612
|
+
return record.assessment.isViable !== false && record.assessment.trainableIssues.length > 0;
|
|
9613
|
+
}
|
|
9614
|
+
function resolveIssueType(issue) {
|
|
9615
|
+
const raw = issue.issueType?.trim();
|
|
9616
|
+
if (!raw) {
|
|
9617
|
+
return "default";
|
|
9618
|
+
}
|
|
9619
|
+
return raw;
|
|
9620
|
+
}
|
|
9621
|
+
function resolveIssueId(issue, parentId, index) {
|
|
9622
|
+
const raw = issue.issueId?.trim();
|
|
9623
|
+
if (raw && raw.length > 0) {
|
|
9624
|
+
return raw;
|
|
9625
|
+
}
|
|
9626
|
+
return `${parentId}:issue:${index}`;
|
|
9627
|
+
}
|
|
9628
|
+
function normalizeIssuesForRecord(parentId, issues) {
|
|
9629
|
+
return issues.map((issue, index) => ({
|
|
9630
|
+
id: resolveIssueId(issue, parentId, index),
|
|
9631
|
+
issueType: resolveIssueType(issue),
|
|
9632
|
+
issue
|
|
9633
|
+
}));
|
|
9634
|
+
}
|
|
9635
|
+
function sampleIssuesByType(issues, batchSize, typeWeights, random) {
|
|
9636
|
+
if (issues.length === 0 || batchSize <= 0) {
|
|
9637
|
+
return [];
|
|
9638
|
+
}
|
|
9639
|
+
const frequency = /* @__PURE__ */ new Map();
|
|
9640
|
+
for (const issue of issues) {
|
|
9641
|
+
frequency.set(issue.issueType, (frequency.get(issue.issueType) ?? 0) + 1);
|
|
9642
|
+
}
|
|
9643
|
+
const issueTypes = [...frequency.keys()];
|
|
9644
|
+
const weightedFrequency = issueTypes.map((type) => {
|
|
9645
|
+
const base = frequency.get(type) ?? 0;
|
|
9646
|
+
const multiplierRaw = typeWeights?.[type] ?? 1;
|
|
9647
|
+
const multiplier = Number.isFinite(multiplierRaw) && multiplierRaw > 0 ? multiplierRaw : 1;
|
|
9648
|
+
return base * multiplier;
|
|
9649
|
+
});
|
|
9650
|
+
const selectedType = pickByWeights(issueTypes, weightedFrequency, random);
|
|
9651
|
+
const sameTypeIssues = issues.filter((issue) => issue.issueType === selectedType);
|
|
9652
|
+
const effectiveBatchSize = Math.min(batchSize, sameTypeIssues.length);
|
|
9653
|
+
return sampleWithoutReplacement(sameTypeIssues, effectiveBatchSize, random);
|
|
9654
|
+
}
|
|
9655
|
+
function resolveMidpoint(midpoint, archive) {
|
|
9656
|
+
if (midpoint.mode === "fixed") {
|
|
9657
|
+
return midpoint.value;
|
|
9658
|
+
}
|
|
9659
|
+
const scores = archive.map((record) => record.assessment.score).filter((score) => Number.isFinite(score)).sort((a, b) => a - b);
|
|
9660
|
+
return computePercentile(scores, midpoint.percentile);
|
|
9661
|
+
}
|
|
9662
|
+
function selectParents(input) {
|
|
9663
|
+
const {
|
|
9664
|
+
eligible,
|
|
9665
|
+
archive,
|
|
9666
|
+
parentsPerIteration,
|
|
9667
|
+
sharpness,
|
|
9668
|
+
midpoint,
|
|
9669
|
+
noveltyWeight,
|
|
9670
|
+
replace,
|
|
9671
|
+
childCountByParentId,
|
|
9672
|
+
random
|
|
9673
|
+
} = input;
|
|
9674
|
+
if (eligible.length === 0 || parentsPerIteration <= 0) {
|
|
9675
|
+
return [];
|
|
9676
|
+
}
|
|
9677
|
+
const midpointScore = resolveMidpoint(midpoint, archive);
|
|
9678
|
+
const weightedParents = eligible.map((record) => {
|
|
9679
|
+
const performance = sigmoidScore(record.assessment.score, midpointScore, sharpness);
|
|
9680
|
+
const childCount = childCountByParentId.get(record.id) ?? 0;
|
|
9681
|
+
const novelty = 1 / (1 + noveltyWeight * childCount);
|
|
9682
|
+
return {
|
|
9683
|
+
record,
|
|
9684
|
+
weight: performance * novelty
|
|
9685
|
+
};
|
|
9686
|
+
});
|
|
9687
|
+
if (replace) {
|
|
9688
|
+
const output2 = [];
|
|
9689
|
+
for (let index = 0; index < parentsPerIteration; index += 1) {
|
|
9690
|
+
output2.push(
|
|
9691
|
+
pickByWeights(
|
|
9692
|
+
weightedParents.map((entry) => entry.record),
|
|
9693
|
+
weightedParents.map((entry) => entry.weight),
|
|
9694
|
+
random
|
|
9695
|
+
)
|
|
9696
|
+
);
|
|
9697
|
+
}
|
|
9698
|
+
return output2;
|
|
9699
|
+
}
|
|
9700
|
+
if (parentsPerIteration >= weightedParents.length) {
|
|
9701
|
+
return weightedParents.map((entry) => entry.record);
|
|
9702
|
+
}
|
|
9703
|
+
const pool = [...weightedParents];
|
|
9704
|
+
const output = [];
|
|
9705
|
+
for (let index = 0; index < parentsPerIteration; index += 1) {
|
|
9706
|
+
const chosen = pickByWeights(
|
|
9707
|
+
pool.map((entry) => entry.record),
|
|
9708
|
+
pool.map((entry) => entry.weight),
|
|
9709
|
+
random
|
|
9710
|
+
);
|
|
9711
|
+
output.push(chosen);
|
|
9712
|
+
const removeIndex = pool.findIndex((entry) => entry.record.id === chosen.id);
|
|
9713
|
+
if (removeIndex >= 0) {
|
|
9714
|
+
pool.splice(removeIndex, 1);
|
|
9715
|
+
}
|
|
9716
|
+
}
|
|
9717
|
+
return output;
|
|
9718
|
+
}
|
|
9719
|
+
function defaultObservedOutcome(input) {
|
|
9720
|
+
const { assessment, parentAssessment } = input;
|
|
9721
|
+
if (assessment.isViable === false) {
|
|
9722
|
+
return "Inconclusive - resulting candidate was marked non-viable.";
|
|
9723
|
+
}
|
|
9724
|
+
const roundedScore = Number.isFinite(assessment.score) ? assessment.score.toFixed(3) : "n/a";
|
|
9725
|
+
if (!parentAssessment) {
|
|
9726
|
+
return `Candidate score: ${roundedScore}.`;
|
|
9727
|
+
}
|
|
9728
|
+
const parentScore = Number.isFinite(parentAssessment.score) ? parentAssessment.score.toFixed(3) : "n/a";
|
|
9729
|
+
if (assessment.score > parentAssessment.score) {
|
|
9730
|
+
return `Candidate score: ${roundedScore}. Improved over parent score ${parentScore}.`;
|
|
9731
|
+
}
|
|
9732
|
+
if (assessment.score < parentAssessment.score) {
|
|
9733
|
+
return `Candidate score: ${roundedScore}. Worse than parent score ${parentScore}.`;
|
|
9734
|
+
}
|
|
9735
|
+
return `Candidate score: ${roundedScore}. Same as parent score ${parentScore}.`;
|
|
9736
|
+
}
|
|
9737
|
+
function resolveFeedbackEntries(input) {
|
|
9738
|
+
const { scope, parent, candidateById, feedbackByCandidateId, childrenByParentId } = input;
|
|
9739
|
+
if (scope.mode === "none") {
|
|
9740
|
+
return [];
|
|
9741
|
+
}
|
|
9742
|
+
if (scope.mode === "ancestors") {
|
|
9743
|
+
const output2 = [];
|
|
9744
|
+
let currentId = parent.id;
|
|
9745
|
+
let depth = 0;
|
|
9746
|
+
while (currentId) {
|
|
9747
|
+
if (scope.maxDepth !== void 0 && depth > scope.maxDepth) {
|
|
9748
|
+
break;
|
|
9749
|
+
}
|
|
9750
|
+
const entry = feedbackByCandidateId.get(currentId);
|
|
9751
|
+
if (entry) {
|
|
9752
|
+
output2.push(entry);
|
|
9753
|
+
}
|
|
9754
|
+
const current = candidateById.get(currentId);
|
|
9755
|
+
currentId = current?.parentId;
|
|
9756
|
+
depth += 1;
|
|
9757
|
+
}
|
|
9758
|
+
return output2;
|
|
9759
|
+
}
|
|
9760
|
+
const maxDistance = scope.maxDistance;
|
|
9761
|
+
if (maxDistance < 0) {
|
|
9762
|
+
return [];
|
|
9763
|
+
}
|
|
9764
|
+
const output = [];
|
|
9765
|
+
const queue = [{ id: parent.id, distance: 0 }];
|
|
9766
|
+
const visited = /* @__PURE__ */ new Set();
|
|
9767
|
+
while (queue.length > 0) {
|
|
9768
|
+
const current = queue.shift();
|
|
9769
|
+
if (!current) {
|
|
9770
|
+
continue;
|
|
9771
|
+
}
|
|
9772
|
+
if (visited.has(current.id)) {
|
|
9773
|
+
continue;
|
|
9774
|
+
}
|
|
9775
|
+
visited.add(current.id);
|
|
9776
|
+
const entry = feedbackByCandidateId.get(current.id);
|
|
9777
|
+
if (entry) {
|
|
9778
|
+
output.push(entry);
|
|
9779
|
+
}
|
|
9780
|
+
if (current.distance >= maxDistance) {
|
|
9781
|
+
continue;
|
|
9782
|
+
}
|
|
9783
|
+
const parentRecord = candidateById.get(current.id);
|
|
9784
|
+
const ancestorId = parentRecord?.parentId;
|
|
9785
|
+
if (ancestorId && !visited.has(ancestorId)) {
|
|
9786
|
+
queue.push({ id: ancestorId, distance: current.distance + 1 });
|
|
9787
|
+
}
|
|
9788
|
+
const children = childrenByParentId.get(current.id) ?? [];
|
|
9789
|
+
for (const childId of children) {
|
|
9790
|
+
if (!visited.has(childId)) {
|
|
9791
|
+
queue.push({ id: childId, distance: current.distance + 1 });
|
|
9792
|
+
}
|
|
9793
|
+
}
|
|
9794
|
+
}
|
|
9795
|
+
return output;
|
|
9796
|
+
}
|
|
9797
|
+
async function mapWithConcurrency(items, maxConcurrency, worker) {
|
|
9798
|
+
if (items.length === 0) {
|
|
9799
|
+
return [];
|
|
9800
|
+
}
|
|
9801
|
+
const concurrency = Math.max(1, Math.floor(maxConcurrency));
|
|
9802
|
+
const output = new Array(items.length);
|
|
9803
|
+
let nextIndex = 0;
|
|
9804
|
+
const runners = Array.from({ length: Math.min(concurrency, items.length) }, async () => {
|
|
9805
|
+
while (true) {
|
|
9806
|
+
const index = nextIndex;
|
|
9807
|
+
nextIndex += 1;
|
|
9808
|
+
if (index >= items.length) {
|
|
9809
|
+
break;
|
|
9810
|
+
}
|
|
9811
|
+
const item = items[index];
|
|
9812
|
+
if (item === void 0) {
|
|
9813
|
+
continue;
|
|
9814
|
+
}
|
|
9815
|
+
output[index] = await worker(item, index);
|
|
9816
|
+
}
|
|
9817
|
+
});
|
|
9818
|
+
await Promise.all(runners);
|
|
9819
|
+
return output;
|
|
9820
|
+
}
|
|
9821
|
+
async function runCandidateEvolution(options) {
|
|
9822
|
+
const iterations = Math.max(0, Math.floor(options.iterations));
|
|
9823
|
+
const parentsPerIteration = Math.max(0, Math.floor(options.parentsPerIteration));
|
|
9824
|
+
const batchSize = Math.max(1, Math.floor(options.batchSize ?? DEFAULT_BATCH_SIZE));
|
|
9825
|
+
const generationConcurrency = Math.max(
|
|
9826
|
+
1,
|
|
9827
|
+
Math.floor(options.generationConcurrency ?? DEFAULT_GENERATION_CONCURRENCY)
|
|
9828
|
+
);
|
|
9829
|
+
const assessmentConcurrency = Math.max(
|
|
9830
|
+
1,
|
|
9831
|
+
Math.floor(options.assessmentConcurrency ?? DEFAULT_ASSESSMENT_CONCURRENCY)
|
|
9832
|
+
);
|
|
9833
|
+
if (options.generators.length === 0) {
|
|
9834
|
+
throw new Error("runCandidateEvolution requires at least one generator subagent.");
|
|
9835
|
+
}
|
|
9836
|
+
if (parentsPerIteration <= 0) {
|
|
9837
|
+
throw new Error("parentsPerIteration must be positive.");
|
|
9838
|
+
}
|
|
9839
|
+
const generationNames = /* @__PURE__ */ new Set();
|
|
9840
|
+
for (const generator of options.generators) {
|
|
9841
|
+
if (!generator.name.trim()) {
|
|
9842
|
+
throw new Error("Generator names must be non-empty.");
|
|
9843
|
+
}
|
|
9844
|
+
if (generationNames.has(generator.name)) {
|
|
9845
|
+
throw new Error(`Duplicate generator name "${generator.name}".`);
|
|
9846
|
+
}
|
|
9847
|
+
generationNames.add(generator.name);
|
|
9848
|
+
}
|
|
9849
|
+
const random = normalizeRandom(options.random);
|
|
9850
|
+
const parentSelection = options.parentSelection;
|
|
9851
|
+
const selectionSharpness = parentSelection?.sharpness !== void 0 ? Math.max(1e-4, parentSelection.sharpness) : DEFAULT_SHARPNESS;
|
|
9852
|
+
const selectionMidpoint = parentSelection?.midpoint ?? DEFAULT_MIDPOINT;
|
|
9853
|
+
const noveltyWeight = parentSelection?.noveltyWeight !== void 0 ? Math.max(0, parentSelection.noveltyWeight) : DEFAULT_NOVELTY_WEIGHT;
|
|
9854
|
+
const selectionReplace = parentSelection?.replace ?? true;
|
|
9855
|
+
const feedbackScope = options.feedbackScope ?? DEFAULT_FEEDBACK_SCOPE;
|
|
9856
|
+
const describeObservedOutcome = options.describeObservedOutcome ?? defaultObservedOutcome;
|
|
9857
|
+
const scorePercentiles = options.scorePercentiles && options.scorePercentiles.length > 0 ? options.scorePercentiles : DEFAULT_SCORE_PERCENTILES;
|
|
9858
|
+
const archive = [];
|
|
9859
|
+
const feedbackEntries = [];
|
|
9860
|
+
const postCheckRejections = [];
|
|
9861
|
+
const snapshots = [];
|
|
9862
|
+
const candidateById = /* @__PURE__ */ new Map();
|
|
9863
|
+
const feedbackByCandidateId = /* @__PURE__ */ new Map();
|
|
9864
|
+
const childCountByParentId = /* @__PURE__ */ new Map();
|
|
9865
|
+
const childrenByParentId = /* @__PURE__ */ new Map();
|
|
9866
|
+
let totalStats = createEmptyStats();
|
|
9867
|
+
let stoppedEarly = false;
|
|
9868
|
+
const seedAssessment = await options.assessCandidate({
|
|
9869
|
+
candidate: options.seedCandidate,
|
|
9870
|
+
iteration: 0
|
|
9871
|
+
});
|
|
9872
|
+
const seedRecord = {
|
|
9873
|
+
id: randomId("candidate"),
|
|
9874
|
+
candidate: options.seedCandidate,
|
|
9875
|
+
assessment: seedAssessment,
|
|
9876
|
+
createdAtIteration: 0
|
|
9877
|
+
};
|
|
9878
|
+
archive.push(seedRecord);
|
|
9879
|
+
candidateById.set(seedRecord.id, seedRecord);
|
|
9880
|
+
const initialStats = createEmptyStats();
|
|
9881
|
+
initialStats.assessmentCalls += 1;
|
|
9882
|
+
totalStats = addStats(totalStats, initialStats);
|
|
9883
|
+
const initialSnapshot = {
|
|
9884
|
+
iteration: 0,
|
|
9885
|
+
archiveSize: archive.length,
|
|
9886
|
+
bestCandidateId: seedRecord.id,
|
|
9887
|
+
bestScore: seedRecord.assessment.score,
|
|
9888
|
+
scorePercentiles: computeScorePercentiles(archive, scorePercentiles),
|
|
9889
|
+
stats: initialStats,
|
|
9890
|
+
bestCandidate: seedRecord
|
|
9891
|
+
};
|
|
9892
|
+
snapshots.push(initialSnapshot);
|
|
9893
|
+
await options.onSnapshot?.(initialSnapshot);
|
|
9894
|
+
for (let iteration = 1; iteration <= iterations; iteration += 1) {
|
|
9895
|
+
const iterationStats = createEmptyStats();
|
|
9896
|
+
const eligible = archive.filter((record) => isEligibleRecord(record));
|
|
9897
|
+
if (eligible.length === 0) {
|
|
9898
|
+
stoppedEarly = true;
|
|
9899
|
+
const bestCandidate3 = archive.reduce(
|
|
9900
|
+
(best, current) => current.assessment.score > best.assessment.score ? current : best
|
|
9901
|
+
);
|
|
9902
|
+
const snapshot2 = {
|
|
9903
|
+
iteration,
|
|
9904
|
+
archiveSize: archive.length,
|
|
9905
|
+
bestCandidateId: bestCandidate3.id,
|
|
9906
|
+
bestScore: bestCandidate3.assessment.score,
|
|
9907
|
+
scorePercentiles: computeScorePercentiles(archive, scorePercentiles),
|
|
9908
|
+
stats: iterationStats,
|
|
9909
|
+
bestCandidate: bestCandidate3
|
|
9910
|
+
};
|
|
9911
|
+
snapshots.push(snapshot2);
|
|
9912
|
+
await options.onSnapshot?.(snapshot2);
|
|
9913
|
+
break;
|
|
9914
|
+
}
|
|
9915
|
+
const selectedParents = selectParents({
|
|
9916
|
+
eligible,
|
|
9917
|
+
archive,
|
|
9918
|
+
parentsPerIteration,
|
|
9919
|
+
sharpness: selectionSharpness,
|
|
9920
|
+
midpoint: selectionMidpoint,
|
|
9921
|
+
noveltyWeight,
|
|
9922
|
+
replace: selectionReplace,
|
|
9923
|
+
childCountByParentId,
|
|
9924
|
+
random
|
|
9925
|
+
});
|
|
9926
|
+
const generationTasks = [];
|
|
9927
|
+
for (const parent of selectedParents) {
|
|
9928
|
+
const issueEnvelopes = normalizeIssuesForRecord(parent.id, parent.assessment.trainableIssues);
|
|
9929
|
+
const sampledIssueEnvelopes = sampleIssuesByType(
|
|
9930
|
+
issueEnvelopes,
|
|
9931
|
+
batchSize,
|
|
9932
|
+
parent.assessment.issueTypeWeights,
|
|
9933
|
+
random
|
|
9934
|
+
);
|
|
9935
|
+
if (sampledIssueEnvelopes.length === 0) {
|
|
9936
|
+
continue;
|
|
9937
|
+
}
|
|
9938
|
+
const visibleFeedbackEntries = resolveFeedbackEntries({
|
|
9939
|
+
scope: feedbackScope,
|
|
9940
|
+
parent,
|
|
9941
|
+
candidateById,
|
|
9942
|
+
feedbackByCandidateId,
|
|
9943
|
+
childrenByParentId
|
|
9944
|
+
});
|
|
9945
|
+
for (const generator of options.generators) {
|
|
9946
|
+
const issuesForGenerator = generator.supportsIssueBatch ? sampledIssueEnvelopes : sampledIssueEnvelopes.slice(0, 1);
|
|
9947
|
+
if (issuesForGenerator.length === 0) {
|
|
9948
|
+
continue;
|
|
9949
|
+
}
|
|
9950
|
+
generationTasks.push({
|
|
9951
|
+
parent,
|
|
9952
|
+
generator,
|
|
9953
|
+
sampledIssueEnvelopes: issuesForGenerator,
|
|
9954
|
+
feedbackEntries: visibleFeedbackEntries
|
|
9955
|
+
});
|
|
9956
|
+
iterationStats.generationCalls += 1;
|
|
9957
|
+
iterationStats.issuesSupplied += issuesForGenerator.length;
|
|
9958
|
+
iterationStats.feedbackEntriesSupplied += visibleFeedbackEntries.length;
|
|
9959
|
+
}
|
|
9960
|
+
}
|
|
9961
|
+
if (generationTasks.length === 0) {
|
|
9962
|
+
stoppedEarly = true;
|
|
9963
|
+
const bestCandidate3 = archive.reduce(
|
|
9964
|
+
(best, current) => current.assessment.score > best.assessment.score ? current : best
|
|
9965
|
+
);
|
|
9966
|
+
const snapshot2 = {
|
|
9967
|
+
iteration,
|
|
9968
|
+
archiveSize: archive.length,
|
|
9969
|
+
bestCandidateId: bestCandidate3.id,
|
|
9970
|
+
bestScore: bestCandidate3.assessment.score,
|
|
9971
|
+
scorePercentiles: computeScorePercentiles(archive, scorePercentiles),
|
|
9972
|
+
stats: iterationStats,
|
|
9973
|
+
bestCandidate: bestCandidate3
|
|
9974
|
+
};
|
|
9975
|
+
snapshots.push(snapshot2);
|
|
9976
|
+
await options.onSnapshot?.(snapshot2);
|
|
9977
|
+
break;
|
|
9978
|
+
}
|
|
9979
|
+
const generatedOutputs = await mapWithConcurrency(
|
|
9980
|
+
generationTasks,
|
|
9981
|
+
generationConcurrency,
|
|
9982
|
+
async (task) => {
|
|
9983
|
+
const proposals = await task.generator.generate({
|
|
9984
|
+
parent: task.parent,
|
|
9985
|
+
sampledIssues: task.sampledIssueEnvelopes.map((envelope) => envelope.issue),
|
|
9986
|
+
feedbackEntries: task.feedbackEntries,
|
|
9987
|
+
iteration
|
|
9988
|
+
});
|
|
9989
|
+
return { task, proposals };
|
|
9990
|
+
}
|
|
9991
|
+
);
|
|
9992
|
+
const pendingProposals = [];
|
|
9993
|
+
for (const output of generatedOutputs) {
|
|
9994
|
+
iterationStats.proposalsGenerated += output.proposals.length;
|
|
9995
|
+
for (const proposal of output.proposals) {
|
|
9996
|
+
pendingProposals.push({
|
|
9997
|
+
proposal,
|
|
9998
|
+
parent: output.task.parent,
|
|
9999
|
+
generatorName: output.task.generator.name,
|
|
10000
|
+
sampledIssues: output.task.sampledIssueEnvelopes.map((envelope) => envelope.issue),
|
|
10001
|
+
sampledIssueIds: output.task.sampledIssueEnvelopes.map((envelope) => envelope.id),
|
|
10002
|
+
sampledFeedbackEntryIds: output.task.feedbackEntries.map((entry) => entry.id)
|
|
10003
|
+
});
|
|
10004
|
+
}
|
|
10005
|
+
}
|
|
10006
|
+
const evaluatedProposals = await mapWithConcurrency(
|
|
10007
|
+
pendingProposals,
|
|
10008
|
+
assessmentConcurrency,
|
|
10009
|
+
async (pending) => {
|
|
10010
|
+
if (options.verifyGeneratedCandidate) {
|
|
10011
|
+
iterationStats.postCheckCalls += 1;
|
|
10012
|
+
const passes = await options.verifyGeneratedCandidate({
|
|
10013
|
+
proposal: pending.proposal,
|
|
10014
|
+
parent: pending.parent,
|
|
10015
|
+
generatorName: pending.generatorName,
|
|
10016
|
+
sampledIssues: pending.sampledIssues,
|
|
10017
|
+
iteration
|
|
10018
|
+
});
|
|
10019
|
+
if (!passes) {
|
|
10020
|
+
postCheckRejections.push({
|
|
10021
|
+
id: randomId("rejected"),
|
|
10022
|
+
candidate: pending.proposal.candidate,
|
|
10023
|
+
parentId: pending.parent.id,
|
|
10024
|
+
generatorName: pending.generatorName,
|
|
10025
|
+
iteration,
|
|
10026
|
+
sampledIssueIds: pending.sampledIssueIds,
|
|
10027
|
+
changeSummary: pending.proposal.changeSummary
|
|
10028
|
+
});
|
|
10029
|
+
return null;
|
|
10030
|
+
}
|
|
10031
|
+
}
|
|
10032
|
+
iterationStats.proposalsAfterPostCheck += 1;
|
|
10033
|
+
iterationStats.assessmentCalls += 1;
|
|
10034
|
+
const assessment = await options.assessCandidate({
|
|
10035
|
+
candidate: pending.proposal.candidate,
|
|
10036
|
+
iteration,
|
|
10037
|
+
parent: pending.parent,
|
|
10038
|
+
generatorName: pending.generatorName,
|
|
10039
|
+
sampledIssues: pending.sampledIssues
|
|
10040
|
+
});
|
|
10041
|
+
return {
|
|
10042
|
+
pending,
|
|
10043
|
+
assessment
|
|
10044
|
+
};
|
|
10045
|
+
}
|
|
10046
|
+
);
|
|
10047
|
+
const acceptedRecords = [];
|
|
10048
|
+
for (const evaluated of evaluatedProposals) {
|
|
10049
|
+
if (!evaluated) {
|
|
10050
|
+
continue;
|
|
10051
|
+
}
|
|
10052
|
+
const { pending, assessment } = evaluated;
|
|
10053
|
+
acceptedRecords.push({
|
|
10054
|
+
id: randomId("candidate"),
|
|
10055
|
+
candidate: pending.proposal.candidate,
|
|
10056
|
+
assessment,
|
|
10057
|
+
createdAtIteration: iteration,
|
|
10058
|
+
parentId: pending.parent.id,
|
|
10059
|
+
generatorName: pending.generatorName,
|
|
10060
|
+
sampledIssueIds: pending.sampledIssueIds,
|
|
10061
|
+
sampledFeedbackEntryIds: pending.sampledFeedbackEntryIds,
|
|
10062
|
+
changeSummary: pending.proposal.changeSummary
|
|
10063
|
+
});
|
|
10064
|
+
}
|
|
10065
|
+
for (const record of acceptedRecords) {
|
|
10066
|
+
archive.push(record);
|
|
10067
|
+
candidateById.set(record.id, record);
|
|
10068
|
+
if (record.parentId) {
|
|
10069
|
+
const nextCount = (childCountByParentId.get(record.parentId) ?? 0) + 1;
|
|
10070
|
+
childCountByParentId.set(record.parentId, nextCount);
|
|
10071
|
+
const existingChildren = childrenByParentId.get(record.parentId) ?? [];
|
|
10072
|
+
existingChildren.push(record.id);
|
|
10073
|
+
childrenByParentId.set(record.parentId, existingChildren);
|
|
10074
|
+
}
|
|
10075
|
+
if (record.changeSummary && record.changeSummary.trim().length > 0) {
|
|
10076
|
+
const parentAssessment = record.parentId ? candidateById.get(record.parentId)?.assessment ?? null : null;
|
|
10077
|
+
const feedbackEntry = {
|
|
10078
|
+
id: randomId("feedback"),
|
|
10079
|
+
candidateId: record.id,
|
|
10080
|
+
attemptedChange: record.changeSummary,
|
|
10081
|
+
observedOutcome: describeObservedOutcome({
|
|
10082
|
+
assessment: record.assessment,
|
|
10083
|
+
parentAssessment
|
|
10084
|
+
})
|
|
10085
|
+
};
|
|
10086
|
+
feedbackEntries.push(feedbackEntry);
|
|
10087
|
+
feedbackByCandidateId.set(record.id, feedbackEntry);
|
|
10088
|
+
}
|
|
10089
|
+
}
|
|
10090
|
+
totalStats = addStats(totalStats, iterationStats);
|
|
10091
|
+
const bestCandidate2 = archive.reduce(
|
|
10092
|
+
(best, current) => current.assessment.score > best.assessment.score ? current : best
|
|
10093
|
+
);
|
|
10094
|
+
const snapshot = {
|
|
10095
|
+
iteration,
|
|
10096
|
+
archiveSize: archive.length,
|
|
10097
|
+
bestCandidateId: bestCandidate2.id,
|
|
10098
|
+
bestScore: bestCandidate2.assessment.score,
|
|
10099
|
+
scorePercentiles: computeScorePercentiles(archive, scorePercentiles),
|
|
10100
|
+
stats: iterationStats,
|
|
10101
|
+
bestCandidate: bestCandidate2
|
|
10102
|
+
};
|
|
10103
|
+
snapshots.push(snapshot);
|
|
10104
|
+
await options.onSnapshot?.(snapshot);
|
|
10105
|
+
}
|
|
10106
|
+
const bestCandidate = archive.reduce(
|
|
10107
|
+
(best, current) => current.assessment.score > best.assessment.score ? current : best
|
|
10108
|
+
);
|
|
10109
|
+
return {
|
|
10110
|
+
archive,
|
|
10111
|
+
feedbackEntries,
|
|
10112
|
+
postCheckRejections,
|
|
10113
|
+
snapshots,
|
|
10114
|
+
bestCandidate,
|
|
10115
|
+
totalStats,
|
|
10116
|
+
stoppedEarly
|
|
10117
|
+
};
|
|
10118
|
+
}
|
|
9450
10119
|
// Annotate the CommonJS export names for ESM import in node:
|
|
9451
10120
|
0 && (module.exports = {
|
|
9452
10121
|
CHATGPT_MODEL_IDS,
|
|
@@ -9520,6 +10189,7 @@ function createAgentTelemetryEmitter(params) {
|
|
|
9520
10189
|
resolveFilesystemToolProfile,
|
|
9521
10190
|
resolveFireworksModelId,
|
|
9522
10191
|
runAgentLoop,
|
|
10192
|
+
runCandidateEvolution,
|
|
9523
10193
|
runToolLoop,
|
|
9524
10194
|
sanitisePartForLogging,
|
|
9525
10195
|
streamAgentLoop,
|