@deepthonk/core 0.1.1 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/runner.js CHANGED
@@ -1,15 +1,19 @@
1
1
  import pLimit from "p-limit";
2
+ import { readFile } from "node:fs/promises";
3
+ import { join } from "node:path";
2
4
  import { z } from "zod";
3
5
  import { fitBradleyTerry } from "./bradleyTerry.js";
4
6
  import { BudgetTracker } from "./budgetTracker.js";
5
7
  import { planBudget, validateProfile } from "./budget.js";
6
8
  import { BudgetExceededError, CancelledError, ConfigError, DeepThonkError } from "./errors.js";
9
+ import { runArtifactFiles } from "./artifacts.js";
7
10
  import { parseJsonObject } from "./json.js";
8
- import { emptyUsage } from "./lifecycle.js";
11
+ import { claimRunLock, emptyUsage, releaseRunLock } from "./lifecycle.js";
9
12
  import { makeKRegularPairs } from "./pairScheduler.js";
10
13
  import { comparePrompt, finalizePrompt, generatePrompt, mutatePrompt } from "./prompts.js";
11
14
  import { createRng } from "./rng.js";
12
- import { runConfigSchema, builtInProfiles } from "./schemas.js";
15
+ import { assertNoPruneInProgress, buildPopulationMap, buildResumePlan, groupComparisons, persistPrunedTrace, pruneTraceToPlan, readResumeTrace, reconstructScores, replayBudgetUsage, resolveResumeRunId, resumeConfigError, toResumePlanStatus } from "./resume.js";
16
+ import { runConfigSchema, builtInProfiles, phaseCompletedEventSchema } from "./schemas.js";
13
17
  import { aggregateCritiques } from "./critique.js";
14
18
  import { TraceStore } from "./traceStore.js";
15
19
  const compareOutputSchema = z.object({
@@ -21,16 +25,27 @@ const compareOutputSchema = z.object({
21
25
  feedback_b: z.string().optional(),
22
26
  selection_reason: z.string().default("")
23
27
  });
24
- export async function runDeepThonk(configInput, driver, control = {}) {
28
+ export async function runDeepThonk(configInput, driver, control = {}, resumeState) {
25
29
  const config = runConfigSchema.parse(configInput);
26
30
  validateProfile(config.profile);
27
31
  enforceBudget(config);
28
- const runId = `run_${new Date().toISOString().replace(/[:.]/g, "-")}_${Math.abs(config.seed)}`;
32
+ const runId = resumeState?.runId ?? `run_${new Date().toISOString().replace(/[:.]/g, "-")}_${Math.abs(config.seed)}`;
29
33
  const trace = new TraceStore(config.runDir);
30
34
  const rng = createRng(config.seed);
31
- const tracker = new BudgetTracker(config);
32
- const startedAt = new Date().toISOString();
33
- await trace.init(config, runId);
35
+ const tracker = resumeState?.tracker ?? new BudgetTracker(config);
36
+ const startedAt = resumeState?.startedAt ?? new Date().toISOString();
37
+ if (!resumeState) {
38
+ await trace.init({ ...config, version: await currentPackageVersion() }, runId);
39
+ }
40
+ else {
41
+ await trace.event({
42
+ type: "run.resumed",
43
+ run_id: runId,
44
+ resumed_at: new Date().toISOString(),
45
+ next_phase: resumeState.nextPhase.phase,
46
+ generation: resumeState.nextPhase.generation
47
+ });
48
+ }
34
49
  let stopped = false;
35
50
  const writeStatus = async (state, phase, extra = {}) => {
36
51
  await trace.writeStatus({
@@ -51,7 +66,7 @@ export async function runDeepThonk(configInput, driver, control = {}) {
51
66
  throw new CancelledError(`Run cancelled before ${phase}.`, {
52
67
  code: "run.cancelled",
53
68
  retryable: false,
54
- fix: "Start a new run with a fresh output directory. Automatic replay is not implemented yet."
69
+ fix: "Resume the run with deepthonk resume --continue after the worker has stopped."
55
70
  });
56
71
  }
57
72
  };
@@ -80,40 +95,61 @@ export async function runDeepThonk(configInput, driver, control = {}) {
80
95
  }
81
96
  };
82
97
  try {
83
- await writeStatus("running", "initialized");
84
- await assertNotCancelled("initial population");
85
- let population = await generateInitialPopulation(config, driver, trace, rng, runId, tracker, guards);
86
- await trace.writePopulation(0, population);
87
- await assertBudget("initial population");
88
- await writeStatus("running", "population_completed", { generation: 0 });
98
+ await writeStatus("running", resumeState ? "resume_replay" : "initialized");
99
+ let population;
100
+ if (isResumePhaseCompleted(resumeState, "initial_generation")) {
101
+ population = resumePopulation(resumeState, 0);
102
+ }
103
+ else {
104
+ await assertNotCancelled("initial population");
105
+ population = await generateInitialPopulation(config, driver, trace, rng, runId, tracker, guards);
106
+ await trace.writePopulation(0, population);
107
+ await assertBudget("initial population");
108
+ await markPhaseCompleted(trace, "initial_generation");
109
+ await writeStatus("running", "population_completed", { generation: 0 });
110
+ }
89
111
  for (let gen = 1; gen <= config.profile.t; gen += 1) {
90
- await assertNotCancelled(`generation ${gen} comparisons`);
91
- await writeStatus("running", "generation_comparisons", { generation: gen });
92
- const pairs = makeKRegularPairs(population.map((candidate) => candidate.id), config.profile.k, rng);
93
- const comparisons = await judgePairs({
94
- config,
95
- driver,
96
- trace,
97
- rng,
98
- runId,
99
- generation: gen,
100
- pairs,
101
- population,
102
- tracker,
103
- guards
104
- });
105
- const scores = fitBradleyTerry(population, comparisons, config.profile.lambda, gen);
106
- await trace.writeScores(gen, scores);
107
- await assertBudget(`generation ${gen} comparisons`);
112
+ if (isResumePhaseCompleted(resumeState, "generation_mutation", gen)) {
113
+ population = resumePopulation(resumeState, gen);
114
+ continue;
115
+ }
116
+ let comparisons;
117
+ let scores;
118
+ if (isResumePhaseCompleted(resumeState, "generation_judging", gen)) {
119
+ comparisons = resumeComparisons(resumeState, gen);
120
+ scores = resumeScores(resumeState, gen, population, comparisons, config.profile.lambda);
121
+ }
122
+ else {
123
+ await assertNotCancelled(`generation ${gen} comparisons`);
124
+ await writeStatus("running", "generation_comparisons", { generation: gen });
125
+ const judgingRng = phaseRng(config.seed, "generation_judging", gen);
126
+ const pairs = makeKRegularPairs(population.map((candidate) => candidate.id), config.profile.k, judgingRng);
127
+ comparisons = await judgePairs({
128
+ config,
129
+ driver,
130
+ trace,
131
+ rng: judgingRng,
132
+ runId,
133
+ generation: gen,
134
+ pairs,
135
+ population,
136
+ tracker,
137
+ guards
138
+ });
139
+ scores = fitBradleyTerry(population, comparisons, config.profile.lambda, gen);
140
+ await trace.writeScores(gen, scores);
141
+ await assertBudget(`generation ${gen} comparisons`);
142
+ await markPhaseCompleted(trace, "generation_judging", gen);
143
+ }
108
144
  const ranked = rankPopulation(population, scores);
109
145
  const elites = topQuartile(ranked);
110
146
  const discarded = bottomQuartile(ranked);
111
147
  const survivors = ranked.filter((candidate) => !discarded.has(candidate.id));
112
148
  const mutationParents = survivors.slice(0, config.profile.n - elites.length);
113
149
  const critiquesByCandidate = aggregateCritiques(population, comparisons);
114
- await traceDiscardedCandidates(trace, ranked, new Set([...elites, ...mutationParents].map((candidate) => candidate.id)), discarded, gen);
115
150
  await assertNotCancelled(`generation ${gen} mutation`);
116
151
  await writeStatus("running", "generation_mutation", { generation: gen });
152
+ await traceDiscardedCandidates(trace, ranked, new Set([...elites, ...mutationParents].map((candidate) => candidate.id)), discarded, gen);
117
153
  const mutants = await mutateSurvivors({
118
154
  config,
119
155
  driver,
@@ -132,26 +168,37 @@ export async function runDeepThonk(configInput, driver, control = {}) {
132
168
  population = keepPopulationSize([...eliteCopies, ...mutants], config.profile.n);
133
169
  await trace.writePopulation(gen, population);
134
170
  await assertBudget(`generation ${gen} mutation`);
171
+ await markPhaseCompleted(trace, "generation_mutation", gen);
135
172
  await writeStatus("running", "generation_completed", { generation: gen });
136
173
  }
137
- await assertNotCancelled("final ranking");
138
- await writeStatus("running", "final_ranking", { generation: "final" });
139
- const finalPairs = makeKRegularPairs(population.map((candidate) => candidate.id), config.profile.m, rng);
140
- const finalComparisons = await judgePairs({
141
- config,
142
- driver,
143
- trace,
144
- rng,
145
- runId,
146
- generation: "final",
147
- pairs: finalPairs,
148
- population,
149
- tracker,
150
- guards
151
- });
152
- const finalScores = fitBradleyTerry(population, finalComparisons, config.profile.lambda, "final");
153
- await trace.writeScores("final", finalScores);
154
- await assertBudget("final ranking");
174
+ let finalComparisons;
175
+ let finalScores;
176
+ if (isResumePhaseCompleted(resumeState, "final_judging")) {
177
+ finalComparisons = resumeComparisons(resumeState, "final");
178
+ finalScores = resumeScores(resumeState, "final", population, finalComparisons, config.profile.lambda);
179
+ }
180
+ else {
181
+ await assertNotCancelled("final ranking");
182
+ await writeStatus("running", "final_ranking", { generation: "final" });
183
+ const finalRng = phaseRng(config.seed, "final_judging");
184
+ const finalPairs = makeKRegularPairs(population.map((candidate) => candidate.id), config.profile.m, finalRng);
185
+ finalComparisons = await judgePairs({
186
+ config,
187
+ driver,
188
+ trace,
189
+ rng: finalRng,
190
+ runId,
191
+ generation: "final",
192
+ pairs: finalPairs,
193
+ population,
194
+ tracker,
195
+ guards
196
+ });
197
+ finalScores = fitBradleyTerry(population, finalComparisons, config.profile.lambda, "final");
198
+ await trace.writeScores("final", finalScores);
199
+ await assertBudget("final ranking");
200
+ await markPhaseCompleted(trace, "final_judging");
201
+ }
155
202
  const winner = rankPopulation(population, finalScores)[0];
156
203
  if (!winner)
157
204
  throw new ConfigError("Run produced no winner.");
@@ -176,7 +223,10 @@ export async function runDeepThonk(configInput, driver, control = {}) {
176
223
  completed_at: completedAt
177
224
  };
178
225
  await trace.writeSummary(summary, finalAnswer, winner.content);
226
+ await markPhaseCompleted(trace, "finalizing");
179
227
  await trace.event({ type: "run.completed", winner_id: winner.id, completed_at: completedAt });
228
+ if (resumeState)
229
+ await trace.event({ type: "run.resumed_completed", winner_id: winner.id, completed_at: completedAt });
180
230
  await writeStatus("completed", "summary", { generation: "final", completed_at: completedAt });
181
231
  return {
182
232
  runId,
@@ -195,6 +245,280 @@ export async function runDeepThonk(configInput, driver, control = {}) {
195
245
  throw error;
196
246
  }
197
247
  }
248
+ export async function resumeDeepThonk(runDir, driver, options = {}) {
249
+ const existingSummary = await readOptionalJson(runDir, runArtifactFiles.summary);
250
+ if (existingSummary) {
251
+ resumeConfigError("Run already has summary.json; nothing to resume.", "resume.already_complete", "Use deepthonk inspect/result to read the completed run.");
252
+ }
253
+ const rawConfig = await readRequiredConfig(runDir);
254
+ const currentVersion = await currentPackageVersion();
255
+ if (!sameMajorMinor(typeof rawConfig.version === "string" ? rawConfig.version : undefined, currentVersion)) {
256
+ resumeConfigError(`Cannot resume run from version ${String(rawConfig.version ?? "missing")}; current package version is ${currentVersion}. Resume requires matching major.minor.`, "resume.version_mismatch", "Start a fresh run with the current DeepThonk version, or resume with a package version whose major.minor matches the trace.");
257
+ }
258
+ assertStoredOutputConfigComplete(rawConfig);
259
+ const parsedConfig = runConfigSchema.parse(rawConfig);
260
+ const config = { ...parsedConfig, runDir };
261
+ validateProfile(config.profile);
262
+ enforceBudget(config);
263
+ assertResumeProviderMatches(rawConfig, config, driver, options);
264
+ assertNoPruneInProgress(runDir);
265
+ const lockRunId = resolveResumeRunId(config, undefined, []);
266
+ const claimed = await claimRunLock(runDir, lockRunId);
267
+ if (!claimed) {
268
+ throw new ConfigError(`Run directory is already claimed: ${runDir}`, {
269
+ code: "run.directory_locked",
270
+ retryable: false,
271
+ fix: "Wait for the active run to finish, or inspect the existing run.lock file."
272
+ });
273
+ }
274
+ try {
275
+ const status = await readOptionalJson(runDir, runArtifactFiles.status);
276
+ if ((status?.state === "running" || status?.state === "pending") && isLiveWorker(status.worker_pid)) {
277
+ resumeConfigError(`Run is still in flight at phase ${status.phase}.`, "resume.in_flight", "Wait for the worker to finish, or cancel/stop it before resuming.");
278
+ }
279
+ const trace = await readResumeTrace(runDir);
280
+ const runId = resolveResumeRunId(config, status, trace.events);
281
+ const plan = buildResumePlan(config, trace.events);
282
+ if (plan.nextPhase.phase === "summary") {
283
+ resumeConfigError("Trace says finalizing completed, but summary.json is missing.", "resume.inconsistent_trace", "Inspect the run directory and restore summary.json, or start a fresh run.");
284
+ }
285
+ const planStatus = toResumePlanStatus(runDir, runId, plan);
286
+ if (options.dryRun)
287
+ return planStatus;
288
+ const pruned = pruneTraceToPlan(trace, plan);
289
+ const populationByGeneration = buildPopulationMap(config, pruned.populations, pruned.candidates, plan);
290
+ const comparisonsByGeneration = groupComparisons(pruned.comparisons);
291
+ const scoresByGeneration = reconstructScores(config, populationByGeneration, comparisonsByGeneration, pruned.scores, plan);
292
+ const tracker = replayBudgetUsage(config, pruned.usage);
293
+ const startedAt = status?.started_at ?? new Date().toISOString();
294
+ await new TraceStore(runDir).writeStatus({
295
+ run_id: runId,
296
+ run_dir: runDir,
297
+ state: "running",
298
+ phase: "resume_planning",
299
+ usage: cloneUsage(tracker.usage),
300
+ started_at: startedAt,
301
+ worker_pid: process.pid,
302
+ updated_at: new Date().toISOString()
303
+ });
304
+ await persistPrunedTrace(runDir, pruned);
305
+ return await runDeepThonk(config, driver, {}, {
306
+ runId,
307
+ startedAt,
308
+ completed: plan.completed,
309
+ populationByGeneration,
310
+ comparisonsByGeneration,
311
+ scoresByGeneration,
312
+ tracker,
313
+ nextPhase: plan.nextPhase
314
+ });
315
+ }
316
+ finally {
317
+ await releaseRunLock(runDir);
318
+ }
319
+ }
320
+ async function markPhaseCompleted(trace, phase, generation) {
321
+ const event = phaseCompletedEventSchema.parse({
322
+ type: "phase.completed",
323
+ phase,
324
+ generation,
325
+ at: new Date().toISOString()
326
+ });
327
+ await trace.event(event);
328
+ }
329
+ function isResumePhaseCompleted(resumeState, phase, generation) {
330
+ return Boolean(resumeState?.completed.has(resumePhaseKey(phase, generation)));
331
+ }
332
+ function resumePopulation(resumeState, generation) {
333
+ const population = resumeState?.populationByGeneration.get(generation);
334
+ if (!population) {
335
+ throw new ConfigError(`Resume state is missing population generation ${generation}.`, {
336
+ code: "resume.population_missing",
337
+ retryable: false,
338
+ fix: "Inspect the run directory for missing population snapshots."
339
+ });
340
+ }
341
+ return population;
342
+ }
343
+ function resumeComparisons(resumeState, generation) {
344
+ const comparisons = resumeState?.comparisonsByGeneration.get(generation);
345
+ if (!comparisons) {
346
+ throw new ConfigError(`Resume state is missing comparisons for generation ${generation}.`, {
347
+ code: "resume.comparisons_missing",
348
+ retryable: false,
349
+ fix: "Inspect the run directory for missing comparison trace rows."
350
+ });
351
+ }
352
+ return comparisons;
353
+ }
354
+ function resumeScores(resumeState, generation, population, comparisons, lambda) {
355
+ return resumeState?.scoresByGeneration.get(generation) ?? fitBradleyTerry(population, comparisons, lambda, generation);
356
+ }
357
+ function phaseRng(seed, phase, generation) {
358
+ return createRng(hashSeed(`${seed}:${phase}:${generation ?? ""}`));
359
+ }
360
+ function hashSeed(value) {
361
+ let hash = 2166136261;
362
+ for (let i = 0; i < value.length; i += 1) {
363
+ hash ^= value.charCodeAt(i);
364
+ hash = Math.imul(hash, 16777619);
365
+ }
366
+ return hash >>> 0;
367
+ }
368
+ function resumePhaseKey(phase, generation) {
369
+ if ((phase === "generation_judging" || phase === "generation_mutation") && generation !== undefined)
370
+ return `${phase}:${generation}`;
371
+ return phase;
372
+ }
373
+ function providerLabel(driver) {
374
+ const value = driver;
375
+ if (typeof value.provider === "string")
376
+ return value.provider;
377
+ if (typeof value.providerName === "string")
378
+ return value.providerName;
379
+ if (typeof value.config?.provider === "string")
380
+ return value.config.provider;
381
+ if (value.baseDriver)
382
+ return providerLabel(value.baseDriver);
383
+ const constructorName = driver.constructor?.name;
384
+ if (constructorName === "FakeDriver")
385
+ return "fake";
386
+ if (constructorName === "SamplingDriver")
387
+ return "sampling";
388
+ if (constructorName === "OpenAiCompatibleDriver")
389
+ return "openai-compatible";
390
+ return undefined;
391
+ }
392
+ function assertStoredOutputConfigComplete(rawConfig) {
393
+ const output = rawConfig.output;
394
+ if (!isRecord(output) || typeof output.includeRawModelOutputs !== "boolean" || typeof output.includePrompts !== "boolean") {
395
+ resumeConfigError("Stored config.json is missing the complete output block required for deterministic resume.", "resume.config_incomplete", "Restore output.includeRawModelOutputs and output.includePrompts, or start a fresh run.");
396
+ }
397
+ }
398
+ function assertResumeProviderMatches(rawConfig, config, driver, options) {
399
+ const runtimeProvider = options.provider ?? providerLabel(driver) ?? config.provider;
400
+ if (runtimeProvider !== config.provider) {
401
+ providerMismatch(`Cannot resume provider ${config.provider} with runtime provider ${runtimeProvider}.`);
402
+ }
403
+ const expectedRoutes = expectedProviderRoutes(rawConfig, config);
404
+ const actualRoutes = runtimeProviderRoutes(driver);
405
+ for (const role of providerRoles) {
406
+ const expected = expectedRoutes[role];
407
+ if (!expected)
408
+ continue;
409
+ const actual = actualRoutes[role];
410
+ if (!actual) {
411
+ providerMismatch(`Cannot resume ${role} route ${routeLabel(expected)} without a matching runtime route.`);
412
+ }
413
+ if (expected.provider !== undefined && actual.provider !== expected.provider) {
414
+ providerMismatch(`Cannot resume ${role} route provider ${expected.provider} with runtime provider ${actual.provider ?? "missing"}.`);
415
+ }
416
+ if (expected.baseUrl !== undefined && normalizeBaseUrl(actual.baseUrl) !== normalizeBaseUrl(expected.baseUrl)) {
417
+ providerMismatch(`Cannot resume ${role} route baseUrl ${expected.baseUrl} with runtime baseUrl ${actual.baseUrl ?? "missing"}.`);
418
+ }
419
+ if (expected.model !== undefined && actual.model !== expected.model) {
420
+ providerMismatch(`Cannot resume ${role} route model ${expected.model} with runtime model ${actual.model ?? "missing"}.`);
421
+ }
422
+ }
423
+ }
424
+ const providerRoles = ["generator", "mutator", "judge", "finalizer"];
425
+ function expectedProviderRoutes(rawConfig, config) {
426
+ const providers = rawConfig.providers;
427
+ if (!isRecord(providers))
428
+ return {};
429
+ const routes = {};
430
+ for (const role of providerRoles) {
431
+ const rawRoute = providers[role];
432
+ if (!isRecord(rawRoute))
433
+ continue;
434
+ routes[role] = {
435
+ provider: stringValue(rawRoute.provider) ?? config.provider,
436
+ baseUrl: stringValue(rawRoute.baseUrl) ?? stringValue(rawRoute.base_url),
437
+ model: stringValue(rawRoute.model) ?? modelForRole(config, role)
438
+ };
439
+ }
440
+ return routes;
441
+ }
442
+ function runtimeProviderRoutes(driver) {
443
+ const routeTable = driver.routes;
444
+ if (!isRecord(routeTable))
445
+ return {};
446
+ const routes = {};
447
+ for (const role of providerRoles) {
448
+ const route = routeTable[role];
449
+ if (!isRecord(route))
450
+ continue;
451
+ const routeDriver = route.driver;
452
+ routes[role] = {
453
+ provider: routeDriver ? providerLabel(routeDriver) : undefined,
454
+ baseUrl: routeDriver ? providerBaseUrl(routeDriver) : undefined,
455
+ model: stringValue(route.model)
456
+ };
457
+ }
458
+ return routes;
459
+ }
460
+ function providerBaseUrl(driver) {
461
+ const value = driver;
462
+ if (typeof value.baseUrl === "string")
463
+ return value.baseUrl;
464
+ if (typeof value.config?.baseUrl === "string")
465
+ return value.config.baseUrl;
466
+ if (value.baseDriver)
467
+ return providerBaseUrl(value.baseDriver);
468
+ return undefined;
469
+ }
470
+ function modelForRole(config, role) {
471
+ if (role === "generator")
472
+ return config.generatorModel;
473
+ if (role === "mutator")
474
+ return config.mutatorModel;
475
+ if (role === "judge")
476
+ return config.judgeModel;
477
+ return config.finalizerModel;
478
+ }
479
+ function routeLabel(route) {
480
+ return [route.provider, route.baseUrl, route.model].filter(Boolean).join("/") || "provider route";
481
+ }
482
+ function providerMismatch(message) {
483
+ resumeConfigError(message, "resume.provider_mismatch", "Use the same provider configuration that created config.json.");
484
+ }
485
+ function normalizeBaseUrl(value) {
486
+ return value?.replace(/\/+$/, "");
487
+ }
488
+ function stringValue(value) {
489
+ return typeof value === "string" && value.length > 0 ? value : undefined;
490
+ }
491
+ function isRecord(value) {
492
+ return Boolean(value && typeof value === "object" && !Array.isArray(value));
493
+ }
494
+ async function readRequiredConfig(runDir) {
495
+ const config = await readOptionalJson(runDir, runArtifactFiles.config);
496
+ if (!config) {
497
+ resumeConfigError("Run directory is missing config.json; cannot replay safely.", "resume.config_missing", "Resume only from a DeepThonk run directory with config.json.");
498
+ }
499
+ return config;
500
+ }
501
+ async function readOptionalJson(runDir, fileName) {
502
+ try {
503
+ return JSON.parse(await readFile(join(runDir, fileName), "utf8"));
504
+ }
505
+ catch (error) {
506
+ if (error.code === "ENOENT")
507
+ return undefined;
508
+ throw error;
509
+ }
510
+ }
511
+ function isLiveWorker(pid) {
512
+ if (!pid)
513
+ return true;
514
+ try {
515
+ process.kill(pid, 0);
516
+ return true;
517
+ }
518
+ catch {
519
+ return false;
520
+ }
521
+ }
198
522
  export function rankPopulation(population, scores) {
199
523
  const order = new Map(scores.map((score) => [score.candidateId, score.rank]));
200
524
  return [...population].sort((left, right) => {
@@ -356,6 +680,13 @@ async function judgePairs(args) {
356
680
  jsonParseFailures += 1;
357
681
  }
358
682
  }
683
+ if (invalidJson || !parsed) {
684
+ throw new ConfigError(`Judge produced ${jsonParseFailures} consecutive invalid-JSON responses (${args.config.retry.invalidJsonRetries + 1} attempts) for comparison ${job.id}. Refusing to synthesize a tie and pollute the ranking.`, {
685
+ code: "judge.persistent_invalid_json",
686
+ retryable: false,
687
+ fix: "The judge model is producing unparseable output. Inspect the raw response (set output.includeRawModelOutputs: true), switch judge models, or raise retry.invalidJsonRetries if the failures are transient."
688
+ });
689
+ }
359
690
  const comparison = {
360
691
  id: job.id,
361
692
  runId: args.runId,
@@ -575,4 +906,28 @@ function serializeRunError(error) {
575
906
  retryable: false
576
907
  };
577
908
  }
909
+ async function currentPackageVersion() {
910
+ try {
911
+ const packageJson = JSON.parse(await readFile(new URL("../package.json", import.meta.url), "utf8"));
912
+ return packageJson.version ?? "0.0.0";
913
+ }
914
+ catch {
915
+ return "0.0.0";
916
+ }
917
+ }
918
+ function sameMajorMinor(left, right) {
919
+ if (!left)
920
+ return false;
921
+ const parse = (version) => {
922
+ const match = version.match(/^(\d+)\.(\d+)/);
923
+ if (!match)
924
+ return undefined;
925
+ return [Number(match[1]), Number(match[2])];
926
+ };
927
+ const a = parse(left);
928
+ const b = parse(right);
929
+ if (!a || !b)
930
+ return false;
931
+ return a[0] === b[0] && a[1] === b[1];
932
+ }
578
933
  //# sourceMappingURL=runner.js.map