@deepthonk/core 0.1.0 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/runner.js CHANGED
@@ -1,15 +1,19 @@
1
1
  import pLimit from "p-limit";
2
+ import { readFile } from "node:fs/promises";
3
+ import { join } from "node:path";
2
4
  import { z } from "zod";
3
5
  import { fitBradleyTerry } from "./bradleyTerry.js";
4
6
  import { BudgetTracker } from "./budgetTracker.js";
5
7
  import { planBudget, validateProfile } from "./budget.js";
6
8
  import { BudgetExceededError, CancelledError, ConfigError, DeepThonkError } from "./errors.js";
9
+ import { runArtifactFiles } from "./artifacts.js";
7
10
  import { parseJsonObject } from "./json.js";
8
- import { emptyUsage } from "./lifecycle.js";
11
+ import { claimRunLock, emptyUsage, releaseRunLock } from "./lifecycle.js";
9
12
  import { makeKRegularPairs } from "./pairScheduler.js";
10
13
  import { comparePrompt, finalizePrompt, generatePrompt, mutatePrompt } from "./prompts.js";
11
14
  import { createRng } from "./rng.js";
12
- import { runConfigSchema } from "./schemas.js";
15
+ import { assertNoPruneInProgress, buildPopulationMap, buildResumePlan, groupComparisons, persistPrunedTrace, pruneTraceToPlan, readResumeTrace, reconstructScores, replayBudgetUsage, resolveResumeRunId, resumeConfigError, toResumePlanStatus } from "./resume.js";
16
+ import { runConfigSchema, builtInProfiles, phaseCompletedEventSchema } from "./schemas.js";
13
17
  import { aggregateCritiques } from "./critique.js";
14
18
  import { TraceStore } from "./traceStore.js";
15
19
  const compareOutputSchema = z.object({
@@ -21,16 +25,27 @@ const compareOutputSchema = z.object({
21
25
  feedback_b: z.string().optional(),
22
26
  selection_reason: z.string().default("")
23
27
  });
24
- export async function runDeepThonk(configInput, driver, control = {}) {
28
+ export async function runDeepThonk(configInput, driver, control = {}, resumeState) {
25
29
  const config = runConfigSchema.parse(configInput);
26
30
  validateProfile(config.profile);
27
31
  enforceBudget(config);
28
- const runId = `run_${new Date().toISOString().replace(/[:.]/g, "-")}_${Math.abs(config.seed)}`;
32
+ const runId = resumeState?.runId ?? `run_${new Date().toISOString().replace(/[:.]/g, "-")}_${Math.abs(config.seed)}`;
29
33
  const trace = new TraceStore(config.runDir);
30
34
  const rng = createRng(config.seed);
31
- const tracker = new BudgetTracker(config);
32
- const startedAt = new Date().toISOString();
33
- await trace.init(config, runId);
35
+ const tracker = resumeState?.tracker ?? new BudgetTracker(config);
36
+ const startedAt = resumeState?.startedAt ?? new Date().toISOString();
37
+ if (!resumeState) {
38
+ await trace.init({ ...config, version: await currentPackageVersion() }, runId);
39
+ }
40
+ else {
41
+ await trace.event({
42
+ type: "run.resumed",
43
+ run_id: runId,
44
+ resumed_at: new Date().toISOString(),
45
+ next_phase: resumeState.nextPhase.phase,
46
+ generation: resumeState.nextPhase.generation
47
+ });
48
+ }
34
49
  let stopped = false;
35
50
  const writeStatus = async (state, phase, extra = {}) => {
36
51
  await trace.writeStatus({
@@ -51,7 +66,7 @@ export async function runDeepThonk(configInput, driver, control = {}) {
51
66
  throw new CancelledError(`Run cancelled before ${phase}.`, {
52
67
  code: "run.cancelled",
53
68
  retryable: false,
54
- fix: "Start a new run with a fresh output directory. Automatic replay is not implemented yet."
69
+ fix: "Resume the run with deepthonk resume --continue after the worker has stopped."
55
70
  });
56
71
  }
57
72
  };
@@ -80,31 +95,52 @@ export async function runDeepThonk(configInput, driver, control = {}) {
80
95
  }
81
96
  };
82
97
  try {
83
- await writeStatus("running", "initialized");
84
- await assertNotCancelled("initial population");
85
- let population = await generateInitialPopulation(config, driver, trace, rng, runId, tracker, guards);
86
- await trace.writePopulation(0, population);
87
- await assertBudget("initial population");
88
- await writeStatus("running", "population_completed", { generation: 0 });
98
+ await writeStatus("running", resumeState ? "resume_replay" : "initialized");
99
+ let population;
100
+ if (isResumePhaseCompleted(resumeState, "initial_generation")) {
101
+ population = resumePopulation(resumeState, 0);
102
+ }
103
+ else {
104
+ await assertNotCancelled("initial population");
105
+ population = await generateInitialPopulation(config, driver, trace, rng, runId, tracker, guards);
106
+ await trace.writePopulation(0, population);
107
+ await assertBudget("initial population");
108
+ await markPhaseCompleted(trace, "initial_generation");
109
+ await writeStatus("running", "population_completed", { generation: 0 });
110
+ }
89
111
  for (let gen = 1; gen <= config.profile.t; gen += 1) {
90
- await assertNotCancelled(`generation ${gen} comparisons`);
91
- await writeStatus("running", "generation_comparisons", { generation: gen });
92
- const pairs = makeKRegularPairs(population.map((candidate) => candidate.id), config.profile.k, rng);
93
- const comparisons = await judgePairs({
94
- config,
95
- driver,
96
- trace,
97
- rng,
98
- runId,
99
- generation: gen,
100
- pairs,
101
- population,
102
- tracker,
103
- guards
104
- });
105
- const scores = fitBradleyTerry(population, comparisons, config.profile.lambda, gen);
106
- await trace.writeScores(gen, scores);
107
- await assertBudget(`generation ${gen} comparisons`);
112
+ if (isResumePhaseCompleted(resumeState, "generation_mutation", gen)) {
113
+ population = resumePopulation(resumeState, gen);
114
+ continue;
115
+ }
116
+ let comparisons;
117
+ let scores;
118
+ if (isResumePhaseCompleted(resumeState, "generation_judging", gen)) {
119
+ comparisons = resumeComparisons(resumeState, gen);
120
+ scores = resumeScores(resumeState, gen, population, comparisons, config.profile.lambda);
121
+ }
122
+ else {
123
+ await assertNotCancelled(`generation ${gen} comparisons`);
124
+ await writeStatus("running", "generation_comparisons", { generation: gen });
125
+ const judgingRng = phaseRng(config.seed, "generation_judging", gen);
126
+ const pairs = makeKRegularPairs(population.map((candidate) => candidate.id), config.profile.k, judgingRng);
127
+ comparisons = await judgePairs({
128
+ config,
129
+ driver,
130
+ trace,
131
+ rng: judgingRng,
132
+ runId,
133
+ generation: gen,
134
+ pairs,
135
+ population,
136
+ tracker,
137
+ guards
138
+ });
139
+ scores = fitBradleyTerry(population, comparisons, config.profile.lambda, gen);
140
+ await trace.writeScores(gen, scores);
141
+ await assertBudget(`generation ${gen} comparisons`);
142
+ await markPhaseCompleted(trace, "generation_judging", gen);
143
+ }
108
144
  const ranked = rankPopulation(population, scores);
109
145
  const elites = topQuartile(ranked);
110
146
  const discarded = bottomQuartile(ranked);
@@ -113,6 +149,7 @@ export async function runDeepThonk(configInput, driver, control = {}) {
113
149
  const critiquesByCandidate = aggregateCritiques(population, comparisons);
114
150
  await assertNotCancelled(`generation ${gen} mutation`);
115
151
  await writeStatus("running", "generation_mutation", { generation: gen });
152
+ await traceDiscardedCandidates(trace, ranked, new Set([...elites, ...mutationParents].map((candidate) => candidate.id)), discarded, gen);
116
153
  const mutants = await mutateSurvivors({
117
154
  config,
118
155
  driver,
@@ -131,26 +168,37 @@ export async function runDeepThonk(configInput, driver, control = {}) {
131
168
  population = keepPopulationSize([...eliteCopies, ...mutants], config.profile.n);
132
169
  await trace.writePopulation(gen, population);
133
170
  await assertBudget(`generation ${gen} mutation`);
171
+ await markPhaseCompleted(trace, "generation_mutation", gen);
134
172
  await writeStatus("running", "generation_completed", { generation: gen });
135
173
  }
136
- await assertNotCancelled("final ranking");
137
- await writeStatus("running", "final_ranking", { generation: "final" });
138
- const finalPairs = makeKRegularPairs(population.map((candidate) => candidate.id), config.profile.m, rng);
139
- const finalComparisons = await judgePairs({
140
- config,
141
- driver,
142
- trace,
143
- rng,
144
- runId,
145
- generation: "final",
146
- pairs: finalPairs,
147
- population,
148
- tracker,
149
- guards
150
- });
151
- const finalScores = fitBradleyTerry(population, finalComparisons, config.profile.lambda, "final");
152
- await trace.writeScores("final", finalScores);
153
- await assertBudget("final ranking");
174
+ let finalComparisons;
175
+ let finalScores;
176
+ if (isResumePhaseCompleted(resumeState, "final_judging")) {
177
+ finalComparisons = resumeComparisons(resumeState, "final");
178
+ finalScores = resumeScores(resumeState, "final", population, finalComparisons, config.profile.lambda);
179
+ }
180
+ else {
181
+ await assertNotCancelled("final ranking");
182
+ await writeStatus("running", "final_ranking", { generation: "final" });
183
+ const finalRng = phaseRng(config.seed, "final_judging");
184
+ const finalPairs = makeKRegularPairs(population.map((candidate) => candidate.id), config.profile.m, finalRng);
185
+ finalComparisons = await judgePairs({
186
+ config,
187
+ driver,
188
+ trace,
189
+ rng: finalRng,
190
+ runId,
191
+ generation: "final",
192
+ pairs: finalPairs,
193
+ population,
194
+ tracker,
195
+ guards
196
+ });
197
+ finalScores = fitBradleyTerry(population, finalComparisons, config.profile.lambda, "final");
198
+ await trace.writeScores("final", finalScores);
199
+ await assertBudget("final ranking");
200
+ await markPhaseCompleted(trace, "final_judging");
201
+ }
154
202
  const winner = rankPopulation(population, finalScores)[0];
155
203
  if (!winner)
156
204
  throw new ConfigError("Run produced no winner.");
@@ -162,6 +210,10 @@ export async function runDeepThonk(configInput, driver, control = {}) {
162
210
  const summary = {
163
211
  run_id: runId,
164
212
  winner_id: winner.id,
213
+ profile: summaryProfile(config.profile),
214
+ profile_name: summaryProfileName(config.profile),
215
+ prompt_style: config.promptStyle,
216
+ models: summaryModels(config),
165
217
  calls: tracker.usage.calls,
166
218
  usage: cloneUsage(tracker.usage),
167
219
  ranked_winner_answer_path: "artifacts/winner.txt",
@@ -171,7 +223,10 @@ export async function runDeepThonk(configInput, driver, control = {}) {
171
223
  completed_at: completedAt
172
224
  };
173
225
  await trace.writeSummary(summary, finalAnswer, winner.content);
226
+ await markPhaseCompleted(trace, "finalizing");
174
227
  await trace.event({ type: "run.completed", winner_id: winner.id, completed_at: completedAt });
228
+ if (resumeState)
229
+ await trace.event({ type: "run.resumed_completed", winner_id: winner.id, completed_at: completedAt });
175
230
  await writeStatus("completed", "summary", { generation: "final", completed_at: completedAt });
176
231
  return {
177
232
  runId,
@@ -190,6 +245,280 @@ export async function runDeepThonk(configInput, driver, control = {}) {
190
245
  throw error;
191
246
  }
192
247
  }
248
+ export async function resumeDeepThonk(runDir, driver, options = {}) {
249
+ const existingSummary = await readOptionalJson(runDir, runArtifactFiles.summary);
250
+ if (existingSummary) {
251
+ resumeConfigError("Run already has summary.json; nothing to resume.", "resume.already_complete", "Use deepthonk inspect/result to read the completed run.");
252
+ }
253
+ const rawConfig = await readRequiredConfig(runDir);
254
+ const currentVersion = await currentPackageVersion();
255
+ if (!sameMajorMinor(typeof rawConfig.version === "string" ? rawConfig.version : undefined, currentVersion)) {
256
+ resumeConfigError(`Cannot resume run from version ${String(rawConfig.version ?? "missing")}; current package version is ${currentVersion}. Resume requires matching major.minor.`, "resume.version_mismatch", "Start a fresh run with the current DeepThonk version, or resume with a package version whose major.minor matches the trace.");
257
+ }
258
+ assertStoredOutputConfigComplete(rawConfig);
259
+ const parsedConfig = runConfigSchema.parse(rawConfig);
260
+ const config = { ...parsedConfig, runDir };
261
+ validateProfile(config.profile);
262
+ enforceBudget(config);
263
+ assertResumeProviderMatches(rawConfig, config, driver, options);
264
+ assertNoPruneInProgress(runDir);
265
+ const lockRunId = resolveResumeRunId(config, undefined, []);
266
+ const claimed = await claimRunLock(runDir, lockRunId);
267
+ if (!claimed) {
268
+ throw new ConfigError(`Run directory is already claimed: ${runDir}`, {
269
+ code: "run.directory_locked",
270
+ retryable: false,
271
+ fix: "Wait for the active run to finish, or inspect the existing run.lock file."
272
+ });
273
+ }
274
+ try {
275
+ const status = await readOptionalJson(runDir, runArtifactFiles.status);
276
+ if ((status?.state === "running" || status?.state === "pending") && isLiveWorker(status.worker_pid)) {
277
+ resumeConfigError(`Run is still in flight at phase ${status.phase}.`, "resume.in_flight", "Wait for the worker to finish, or cancel/stop it before resuming.");
278
+ }
279
+ const trace = await readResumeTrace(runDir);
280
+ const runId = resolveResumeRunId(config, status, trace.events);
281
+ const plan = buildResumePlan(config, trace.events);
282
+ if (plan.nextPhase.phase === "summary") {
283
+ resumeConfigError("Trace says finalizing completed, but summary.json is missing.", "resume.inconsistent_trace", "Inspect the run directory and restore summary.json, or start a fresh run.");
284
+ }
285
+ const planStatus = toResumePlanStatus(runDir, runId, plan);
286
+ if (options.dryRun)
287
+ return planStatus;
288
+ const pruned = pruneTraceToPlan(trace, plan);
289
+ const populationByGeneration = buildPopulationMap(config, pruned.populations, pruned.candidates, plan);
290
+ const comparisonsByGeneration = groupComparisons(pruned.comparisons);
291
+ const scoresByGeneration = reconstructScores(config, populationByGeneration, comparisonsByGeneration, pruned.scores, plan);
292
+ const tracker = replayBudgetUsage(config, pruned.usage);
293
+ const startedAt = status?.started_at ?? new Date().toISOString();
294
+ await new TraceStore(runDir).writeStatus({
295
+ run_id: runId,
296
+ run_dir: runDir,
297
+ state: "running",
298
+ phase: "resume_planning",
299
+ usage: cloneUsage(tracker.usage),
300
+ started_at: startedAt,
301
+ worker_pid: process.pid,
302
+ updated_at: new Date().toISOString()
303
+ });
304
+ await persistPrunedTrace(runDir, pruned);
305
+ return await runDeepThonk(config, driver, {}, {
306
+ runId,
307
+ startedAt,
308
+ completed: plan.completed,
309
+ populationByGeneration,
310
+ comparisonsByGeneration,
311
+ scoresByGeneration,
312
+ tracker,
313
+ nextPhase: plan.nextPhase
314
+ });
315
+ }
316
+ finally {
317
+ await releaseRunLock(runDir);
318
+ }
319
+ }
320
+ async function markPhaseCompleted(trace, phase, generation) {
321
+ const event = phaseCompletedEventSchema.parse({
322
+ type: "phase.completed",
323
+ phase,
324
+ generation,
325
+ at: new Date().toISOString()
326
+ });
327
+ await trace.event(event);
328
+ }
329
+ function isResumePhaseCompleted(resumeState, phase, generation) {
330
+ return Boolean(resumeState?.completed.has(resumePhaseKey(phase, generation)));
331
+ }
332
+ function resumePopulation(resumeState, generation) {
333
+ const population = resumeState?.populationByGeneration.get(generation);
334
+ if (!population) {
335
+ throw new ConfigError(`Resume state is missing population generation ${generation}.`, {
336
+ code: "resume.population_missing",
337
+ retryable: false,
338
+ fix: "Inspect the run directory for missing population snapshots."
339
+ });
340
+ }
341
+ return population;
342
+ }
343
+ function resumeComparisons(resumeState, generation) {
344
+ const comparisons = resumeState?.comparisonsByGeneration.get(generation);
345
+ if (!comparisons) {
346
+ throw new ConfigError(`Resume state is missing comparisons for generation ${generation}.`, {
347
+ code: "resume.comparisons_missing",
348
+ retryable: false,
349
+ fix: "Inspect the run directory for missing comparison trace rows."
350
+ });
351
+ }
352
+ return comparisons;
353
+ }
354
+ function resumeScores(resumeState, generation, population, comparisons, lambda) {
355
+ return resumeState?.scoresByGeneration.get(generation) ?? fitBradleyTerry(population, comparisons, lambda, generation);
356
+ }
357
+ function phaseRng(seed, phase, generation) {
358
+ return createRng(hashSeed(`${seed}:${phase}:${generation ?? ""}`));
359
+ }
360
+ function hashSeed(value) {
361
+ let hash = 2166136261;
362
+ for (let i = 0; i < value.length; i += 1) {
363
+ hash ^= value.charCodeAt(i);
364
+ hash = Math.imul(hash, 16777619);
365
+ }
366
+ return hash >>> 0;
367
+ }
368
+ function resumePhaseKey(phase, generation) {
369
+ if ((phase === "generation_judging" || phase === "generation_mutation") && generation !== undefined)
370
+ return `${phase}:${generation}`;
371
+ return phase;
372
+ }
373
+ function providerLabel(driver) {
374
+ const value = driver;
375
+ if (typeof value.provider === "string")
376
+ return value.provider;
377
+ if (typeof value.providerName === "string")
378
+ return value.providerName;
379
+ if (typeof value.config?.provider === "string")
380
+ return value.config.provider;
381
+ if (value.baseDriver)
382
+ return providerLabel(value.baseDriver);
383
+ const constructorName = driver.constructor?.name;
384
+ if (constructorName === "FakeDriver")
385
+ return "fake";
386
+ if (constructorName === "SamplingDriver")
387
+ return "sampling";
388
+ if (constructorName === "OpenAiCompatibleDriver")
389
+ return "openai-compatible";
390
+ return undefined;
391
+ }
392
+ function assertStoredOutputConfigComplete(rawConfig) {
393
+ const output = rawConfig.output;
394
+ if (!isRecord(output) || typeof output.includeRawModelOutputs !== "boolean" || typeof output.includePrompts !== "boolean") {
395
+ resumeConfigError("Stored config.json is missing the complete output block required for deterministic resume.", "resume.config_incomplete", "Restore output.includeRawModelOutputs and output.includePrompts, or start a fresh run.");
396
+ }
397
+ }
398
+ function assertResumeProviderMatches(rawConfig, config, driver, options) {
399
+ const runtimeProvider = options.provider ?? providerLabel(driver) ?? config.provider;
400
+ if (runtimeProvider !== config.provider) {
401
+ providerMismatch(`Cannot resume provider ${config.provider} with runtime provider ${runtimeProvider}.`);
402
+ }
403
+ const expectedRoutes = expectedProviderRoutes(rawConfig, config);
404
+ const actualRoutes = runtimeProviderRoutes(driver);
405
+ for (const role of providerRoles) {
406
+ const expected = expectedRoutes[role];
407
+ if (!expected)
408
+ continue;
409
+ const actual = actualRoutes[role];
410
+ if (!actual) {
411
+ providerMismatch(`Cannot resume ${role} route ${routeLabel(expected)} without a matching runtime route.`);
412
+ }
413
+ if (expected.provider !== undefined && actual.provider !== expected.provider) {
414
+ providerMismatch(`Cannot resume ${role} route provider ${expected.provider} with runtime provider ${actual.provider ?? "missing"}.`);
415
+ }
416
+ if (expected.baseUrl !== undefined && normalizeBaseUrl(actual.baseUrl) !== normalizeBaseUrl(expected.baseUrl)) {
417
+ providerMismatch(`Cannot resume ${role} route baseUrl ${expected.baseUrl} with runtime baseUrl ${actual.baseUrl ?? "missing"}.`);
418
+ }
419
+ if (expected.model !== undefined && actual.model !== expected.model) {
420
+ providerMismatch(`Cannot resume ${role} route model ${expected.model} with runtime model ${actual.model ?? "missing"}.`);
421
+ }
422
+ }
423
+ }
424
+ const providerRoles = ["generator", "mutator", "judge", "finalizer"];
425
+ function expectedProviderRoutes(rawConfig, config) {
426
+ const providers = rawConfig.providers;
427
+ if (!isRecord(providers))
428
+ return {};
429
+ const routes = {};
430
+ for (const role of providerRoles) {
431
+ const rawRoute = providers[role];
432
+ if (!isRecord(rawRoute))
433
+ continue;
434
+ routes[role] = {
435
+ provider: stringValue(rawRoute.provider) ?? config.provider,
436
+ baseUrl: stringValue(rawRoute.baseUrl) ?? stringValue(rawRoute.base_url),
437
+ model: stringValue(rawRoute.model) ?? modelForRole(config, role)
438
+ };
439
+ }
440
+ return routes;
441
+ }
442
+ function runtimeProviderRoutes(driver) {
443
+ const routeTable = driver.routes;
444
+ if (!isRecord(routeTable))
445
+ return {};
446
+ const routes = {};
447
+ for (const role of providerRoles) {
448
+ const route = routeTable[role];
449
+ if (!isRecord(route))
450
+ continue;
451
+ const routeDriver = route.driver;
452
+ routes[role] = {
453
+ provider: routeDriver ? providerLabel(routeDriver) : undefined,
454
+ baseUrl: routeDriver ? providerBaseUrl(routeDriver) : undefined,
455
+ model: stringValue(route.model)
456
+ };
457
+ }
458
+ return routes;
459
+ }
460
+ function providerBaseUrl(driver) {
461
+ const value = driver;
462
+ if (typeof value.baseUrl === "string")
463
+ return value.baseUrl;
464
+ if (typeof value.config?.baseUrl === "string")
465
+ return value.config.baseUrl;
466
+ if (value.baseDriver)
467
+ return providerBaseUrl(value.baseDriver);
468
+ return undefined;
469
+ }
470
+ function modelForRole(config, role) {
471
+ if (role === "generator")
472
+ return config.generatorModel;
473
+ if (role === "mutator")
474
+ return config.mutatorModel;
475
+ if (role === "judge")
476
+ return config.judgeModel;
477
+ return config.finalizerModel;
478
+ }
479
+ function routeLabel(route) {
480
+ return [route.provider, route.baseUrl, route.model].filter(Boolean).join("/") || "provider route";
481
+ }
482
+ function providerMismatch(message) {
483
+ resumeConfigError(message, "resume.provider_mismatch", "Use the same provider configuration that created config.json.");
484
+ }
485
+ function normalizeBaseUrl(value) {
486
+ return value?.replace(/\/+$/, "");
487
+ }
488
+ function stringValue(value) {
489
+ return typeof value === "string" && value.length > 0 ? value : undefined;
490
+ }
491
+ function isRecord(value) {
492
+ return Boolean(value && typeof value === "object" && !Array.isArray(value));
493
+ }
494
+ async function readRequiredConfig(runDir) {
495
+ const config = await readOptionalJson(runDir, runArtifactFiles.config);
496
+ if (!config) {
497
+ resumeConfigError("Run directory is missing config.json; cannot replay safely.", "resume.config_missing", "Resume only from a DeepThonk run directory with config.json.");
498
+ }
499
+ return config;
500
+ }
501
+ async function readOptionalJson(runDir, fileName) {
502
+ try {
503
+ return JSON.parse(await readFile(join(runDir, fileName), "utf8"));
504
+ }
505
+ catch (error) {
506
+ if (error.code === "ENOENT")
507
+ return undefined;
508
+ throw error;
509
+ }
510
+ }
511
+ function isLiveWorker(pid) {
512
+ if (!pid)
513
+ return true;
514
+ try {
515
+ process.kill(pid, 0);
516
+ return true;
517
+ }
518
+ catch {
519
+ return false;
520
+ }
521
+ }
193
522
  export function rankPopulation(population, scores) {
194
523
  const order = new Map(scores.map((score) => [score.candidateId, score.rank]));
195
524
  return [...population].sort((left, right) => {
@@ -219,6 +548,23 @@ export function copyElites(elites, generation) {
219
548
  export function keepPopulationSize(population, n) {
220
549
  return population.slice(0, n);
221
550
  }
551
+ async function traceDiscardedCandidates(trace, ranked, carriedIds, bottomQuartileIds, generation) {
552
+ for (const candidate of ranked) {
553
+ if (carriedIds.has(candidate.id))
554
+ continue;
555
+ const discardReason = bottomQuartileIds.has(candidate.id) ? "bottom_quartile" : "rounding_trim";
556
+ await trace.writeCandidate({
557
+ ...candidate,
558
+ status: "discarded",
559
+ metadata: {
560
+ ...candidate.metadata,
561
+ discardReason,
562
+ discardedAt: new Date().toISOString()
563
+ }
564
+ });
565
+ await trace.event({ type: "candidate.discarded", candidate_id: candidate.id, generation, reason: discardReason });
566
+ }
567
+ }
222
568
  async function generateInitialPopulation(config, driver, trace, rng, runId, tracker, guards) {
223
569
  const limit = pLimit(config.concurrency.generate);
224
570
  const jobs = Array.from({ length: config.profile.n }, (_, index) => ({
@@ -334,6 +680,13 @@ async function judgePairs(args) {
334
680
  jsonParseFailures += 1;
335
681
  }
336
682
  }
683
+ if (invalidJson || !parsed) {
684
+ throw new ConfigError(`Judge produced ${jsonParseFailures} consecutive invalid-JSON responses (${args.config.retry.invalidJsonRetries + 1} attempts) for comparison ${job.id}. Refusing to synthesize a tie and pollute the ranking.`, {
685
+ code: "judge.persistent_invalid_json",
686
+ retryable: false,
687
+ fix: "The judge model is producing unparseable output. Inspect the raw response (set output.includeRawModelOutputs: true), switch judge models, or raise retry.invalidJsonRetries if the failures are transient."
688
+ });
689
+ }
337
690
  const comparison = {
338
691
  id: job.id,
339
692
  runId: args.runId,
@@ -459,8 +812,46 @@ function resultMetadata(result, config, prompt) {
459
812
  function compactMetadata(metadata) {
460
813
  return Object.fromEntries(Object.entries(metadata).filter(([, value]) => value !== undefined));
461
814
  }
815
+ function summaryProfile(profile) {
816
+ return {
817
+ n: profile.n,
818
+ k: profile.k,
819
+ t: profile.t,
820
+ m: profile.m,
821
+ lambda: profile.lambda,
822
+ sample_temperature: profile.sampleTemperature,
823
+ mutate_temperature: profile.mutateTemperature,
824
+ judge_temperature: profile.judgeTemperature
825
+ };
826
+ }
827
+ function summaryProfileName(profile) {
828
+ for (const name of ["quick", "balanced", "paper"]) {
829
+ if (profilesEqual(profile, builtInProfiles[name]))
830
+ return name;
831
+ }
832
+ return null;
833
+ }
834
+ function profilesEqual(left, right) {
835
+ return (left.n === right.n &&
836
+ left.k === right.k &&
837
+ left.t === right.t &&
838
+ left.m === right.m &&
839
+ left.lambda === right.lambda &&
840
+ left.sampleTemperature === right.sampleTemperature &&
841
+ left.mutateTemperature === right.mutateTemperature &&
842
+ left.judgeTemperature === right.judgeTemperature);
843
+ }
844
+ function summaryModels(config) {
845
+ return {
846
+ generator: config.generatorModel,
847
+ mutator: config.mutatorModel,
848
+ judge: config.judgeModel,
849
+ finalizer: config.finalizerModel ?? null
850
+ };
851
+ }
462
852
  function buildUsageRecord(args) {
463
853
  return {
854
+ schema_version: 1,
464
855
  ts: new Date().toISOString(),
465
856
  phase: args.phase,
466
857
  role: args.role,
@@ -515,4 +906,28 @@ function serializeRunError(error) {
515
906
  retryable: false
516
907
  };
517
908
  }
909
+ async function currentPackageVersion() {
910
+ try {
911
+ const packageJson = JSON.parse(await readFile(new URL("../package.json", import.meta.url), "utf8"));
912
+ return packageJson.version ?? "0.0.0";
913
+ }
914
+ catch {
915
+ return "0.0.0";
916
+ }
917
+ }
918
+ function sameMajorMinor(left, right) {
919
+ if (!left)
920
+ return false;
921
+ const parse = (version) => {
922
+ const match = version.match(/^(\d+)\.(\d+)/);
923
+ if (!match)
924
+ return undefined;
925
+ return [Number(match[1]), Number(match[2])];
926
+ };
927
+ const a = parse(left);
928
+ const b = parse(right);
929
+ if (!a || !b)
930
+ return false;
931
+ return a[0] === b[0] && a[1] === b[1];
932
+ }
518
933
  //# sourceMappingURL=runner.js.map