@deepthonk/core 0.1.1 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +5 -3
- package/dist/budgetTracker.js +14 -0
- package/dist/budgetTracker.js.map +1 -1
- package/dist/lifecycle.d.ts +1 -0
- package/dist/lifecycle.js +15 -1
- package/dist/lifecycle.js.map +1 -1
- package/dist/resume.d.ts +51 -0
- package/dist/resume.js +355 -0
- package/dist/resume.js.map +1 -0
- package/dist/runner.d.ts +38 -2
- package/dist/runner.js +406 -51
- package/dist/runner.js.map +1 -1
- package/dist/schemas.d.ts +25 -0
- package/dist/schemas.js +15 -0
- package/dist/schemas.js.map +1 -1
- package/dist/services.js +13 -6
- package/dist/services.js.map +1 -1
- package/package.json +1 -1
package/dist/runner.js
CHANGED
|
@@ -1,15 +1,19 @@
|
|
|
1
1
|
import pLimit from "p-limit";
|
|
2
|
+
import { readFile } from "node:fs/promises";
|
|
3
|
+
import { join } from "node:path";
|
|
2
4
|
import { z } from "zod";
|
|
3
5
|
import { fitBradleyTerry } from "./bradleyTerry.js";
|
|
4
6
|
import { BudgetTracker } from "./budgetTracker.js";
|
|
5
7
|
import { planBudget, validateProfile } from "./budget.js";
|
|
6
8
|
import { BudgetExceededError, CancelledError, ConfigError, DeepThonkError } from "./errors.js";
|
|
9
|
+
import { runArtifactFiles } from "./artifacts.js";
|
|
7
10
|
import { parseJsonObject } from "./json.js";
|
|
8
|
-
import { emptyUsage } from "./lifecycle.js";
|
|
11
|
+
import { claimRunLock, emptyUsage, releaseRunLock } from "./lifecycle.js";
|
|
9
12
|
import { makeKRegularPairs } from "./pairScheduler.js";
|
|
10
13
|
import { comparePrompt, finalizePrompt, generatePrompt, mutatePrompt } from "./prompts.js";
|
|
11
14
|
import { createRng } from "./rng.js";
|
|
12
|
-
import {
|
|
15
|
+
import { assertNoPruneInProgress, buildPopulationMap, buildResumePlan, groupComparisons, persistPrunedTrace, pruneTraceToPlan, readResumeTrace, reconstructScores, replayBudgetUsage, resolveResumeRunId, resumeConfigError, toResumePlanStatus } from "./resume.js";
|
|
16
|
+
import { runConfigSchema, builtInProfiles, phaseCompletedEventSchema } from "./schemas.js";
|
|
13
17
|
import { aggregateCritiques } from "./critique.js";
|
|
14
18
|
import { TraceStore } from "./traceStore.js";
|
|
15
19
|
const compareOutputSchema = z.object({
|
|
@@ -21,16 +25,27 @@ const compareOutputSchema = z.object({
|
|
|
21
25
|
feedback_b: z.string().optional(),
|
|
22
26
|
selection_reason: z.string().default("")
|
|
23
27
|
});
|
|
24
|
-
export async function runDeepThonk(configInput, driver, control = {}) {
|
|
28
|
+
export async function runDeepThonk(configInput, driver, control = {}, resumeState) {
|
|
25
29
|
const config = runConfigSchema.parse(configInput);
|
|
26
30
|
validateProfile(config.profile);
|
|
27
31
|
enforceBudget(config);
|
|
28
|
-
const runId = `run_${new Date().toISOString().replace(/[:.]/g, "-")}_${Math.abs(config.seed)}`;
|
|
32
|
+
const runId = resumeState?.runId ?? `run_${new Date().toISOString().replace(/[:.]/g, "-")}_${Math.abs(config.seed)}`;
|
|
29
33
|
const trace = new TraceStore(config.runDir);
|
|
30
34
|
const rng = createRng(config.seed);
|
|
31
|
-
const tracker = new BudgetTracker(config);
|
|
32
|
-
const startedAt = new Date().toISOString();
|
|
33
|
-
|
|
35
|
+
const tracker = resumeState?.tracker ?? new BudgetTracker(config);
|
|
36
|
+
const startedAt = resumeState?.startedAt ?? new Date().toISOString();
|
|
37
|
+
if (!resumeState) {
|
|
38
|
+
await trace.init({ ...config, version: await currentPackageVersion() }, runId);
|
|
39
|
+
}
|
|
40
|
+
else {
|
|
41
|
+
await trace.event({
|
|
42
|
+
type: "run.resumed",
|
|
43
|
+
run_id: runId,
|
|
44
|
+
resumed_at: new Date().toISOString(),
|
|
45
|
+
next_phase: resumeState.nextPhase.phase,
|
|
46
|
+
generation: resumeState.nextPhase.generation
|
|
47
|
+
});
|
|
48
|
+
}
|
|
34
49
|
let stopped = false;
|
|
35
50
|
const writeStatus = async (state, phase, extra = {}) => {
|
|
36
51
|
await trace.writeStatus({
|
|
@@ -51,7 +66,7 @@ export async function runDeepThonk(configInput, driver, control = {}) {
|
|
|
51
66
|
throw new CancelledError(`Run cancelled before ${phase}.`, {
|
|
52
67
|
code: "run.cancelled",
|
|
53
68
|
retryable: false,
|
|
54
|
-
fix: "
|
|
69
|
+
fix: "Resume the run with deepthonk resume --continue after the worker has stopped."
|
|
55
70
|
});
|
|
56
71
|
}
|
|
57
72
|
};
|
|
@@ -80,40 +95,61 @@ export async function runDeepThonk(configInput, driver, control = {}) {
|
|
|
80
95
|
}
|
|
81
96
|
};
|
|
82
97
|
try {
|
|
83
|
-
await writeStatus("running", "initialized");
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
98
|
+
await writeStatus("running", resumeState ? "resume_replay" : "initialized");
|
|
99
|
+
let population;
|
|
100
|
+
if (isResumePhaseCompleted(resumeState, "initial_generation")) {
|
|
101
|
+
population = resumePopulation(resumeState, 0);
|
|
102
|
+
}
|
|
103
|
+
else {
|
|
104
|
+
await assertNotCancelled("initial population");
|
|
105
|
+
population = await generateInitialPopulation(config, driver, trace, rng, runId, tracker, guards);
|
|
106
|
+
await trace.writePopulation(0, population);
|
|
107
|
+
await assertBudget("initial population");
|
|
108
|
+
await markPhaseCompleted(trace, "initial_generation");
|
|
109
|
+
await writeStatus("running", "population_completed", { generation: 0 });
|
|
110
|
+
}
|
|
89
111
|
for (let gen = 1; gen <= config.profile.t; gen += 1) {
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
112
|
+
if (isResumePhaseCompleted(resumeState, "generation_mutation", gen)) {
|
|
113
|
+
population = resumePopulation(resumeState, gen);
|
|
114
|
+
continue;
|
|
115
|
+
}
|
|
116
|
+
let comparisons;
|
|
117
|
+
let scores;
|
|
118
|
+
if (isResumePhaseCompleted(resumeState, "generation_judging", gen)) {
|
|
119
|
+
comparisons = resumeComparisons(resumeState, gen);
|
|
120
|
+
scores = resumeScores(resumeState, gen, population, comparisons, config.profile.lambda);
|
|
121
|
+
}
|
|
122
|
+
else {
|
|
123
|
+
await assertNotCancelled(`generation ${gen} comparisons`);
|
|
124
|
+
await writeStatus("running", "generation_comparisons", { generation: gen });
|
|
125
|
+
const judgingRng = phaseRng(config.seed, "generation_judging", gen);
|
|
126
|
+
const pairs = makeKRegularPairs(population.map((candidate) => candidate.id), config.profile.k, judgingRng);
|
|
127
|
+
comparisons = await judgePairs({
|
|
128
|
+
config,
|
|
129
|
+
driver,
|
|
130
|
+
trace,
|
|
131
|
+
rng: judgingRng,
|
|
132
|
+
runId,
|
|
133
|
+
generation: gen,
|
|
134
|
+
pairs,
|
|
135
|
+
population,
|
|
136
|
+
tracker,
|
|
137
|
+
guards
|
|
138
|
+
});
|
|
139
|
+
scores = fitBradleyTerry(population, comparisons, config.profile.lambda, gen);
|
|
140
|
+
await trace.writeScores(gen, scores);
|
|
141
|
+
await assertBudget(`generation ${gen} comparisons`);
|
|
142
|
+
await markPhaseCompleted(trace, "generation_judging", gen);
|
|
143
|
+
}
|
|
108
144
|
const ranked = rankPopulation(population, scores);
|
|
109
145
|
const elites = topQuartile(ranked);
|
|
110
146
|
const discarded = bottomQuartile(ranked);
|
|
111
147
|
const survivors = ranked.filter((candidate) => !discarded.has(candidate.id));
|
|
112
148
|
const mutationParents = survivors.slice(0, config.profile.n - elites.length);
|
|
113
149
|
const critiquesByCandidate = aggregateCritiques(population, comparisons);
|
|
114
|
-
await traceDiscardedCandidates(trace, ranked, new Set([...elites, ...mutationParents].map((candidate) => candidate.id)), discarded, gen);
|
|
115
150
|
await assertNotCancelled(`generation ${gen} mutation`);
|
|
116
151
|
await writeStatus("running", "generation_mutation", { generation: gen });
|
|
152
|
+
await traceDiscardedCandidates(trace, ranked, new Set([...elites, ...mutationParents].map((candidate) => candidate.id)), discarded, gen);
|
|
117
153
|
const mutants = await mutateSurvivors({
|
|
118
154
|
config,
|
|
119
155
|
driver,
|
|
@@ -132,26 +168,37 @@ export async function runDeepThonk(configInput, driver, control = {}) {
|
|
|
132
168
|
population = keepPopulationSize([...eliteCopies, ...mutants], config.profile.n);
|
|
133
169
|
await trace.writePopulation(gen, population);
|
|
134
170
|
await assertBudget(`generation ${gen} mutation`);
|
|
171
|
+
await markPhaseCompleted(trace, "generation_mutation", gen);
|
|
135
172
|
await writeStatus("running", "generation_completed", { generation: gen });
|
|
136
173
|
}
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
config
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
174
|
+
let finalComparisons;
|
|
175
|
+
let finalScores;
|
|
176
|
+
if (isResumePhaseCompleted(resumeState, "final_judging")) {
|
|
177
|
+
finalComparisons = resumeComparisons(resumeState, "final");
|
|
178
|
+
finalScores = resumeScores(resumeState, "final", population, finalComparisons, config.profile.lambda);
|
|
179
|
+
}
|
|
180
|
+
else {
|
|
181
|
+
await assertNotCancelled("final ranking");
|
|
182
|
+
await writeStatus("running", "final_ranking", { generation: "final" });
|
|
183
|
+
const finalRng = phaseRng(config.seed, "final_judging");
|
|
184
|
+
const finalPairs = makeKRegularPairs(population.map((candidate) => candidate.id), config.profile.m, finalRng);
|
|
185
|
+
finalComparisons = await judgePairs({
|
|
186
|
+
config,
|
|
187
|
+
driver,
|
|
188
|
+
trace,
|
|
189
|
+
rng: finalRng,
|
|
190
|
+
runId,
|
|
191
|
+
generation: "final",
|
|
192
|
+
pairs: finalPairs,
|
|
193
|
+
population,
|
|
194
|
+
tracker,
|
|
195
|
+
guards
|
|
196
|
+
});
|
|
197
|
+
finalScores = fitBradleyTerry(population, finalComparisons, config.profile.lambda, "final");
|
|
198
|
+
await trace.writeScores("final", finalScores);
|
|
199
|
+
await assertBudget("final ranking");
|
|
200
|
+
await markPhaseCompleted(trace, "final_judging");
|
|
201
|
+
}
|
|
155
202
|
const winner = rankPopulation(population, finalScores)[0];
|
|
156
203
|
if (!winner)
|
|
157
204
|
throw new ConfigError("Run produced no winner.");
|
|
@@ -176,7 +223,10 @@ export async function runDeepThonk(configInput, driver, control = {}) {
|
|
|
176
223
|
completed_at: completedAt
|
|
177
224
|
};
|
|
178
225
|
await trace.writeSummary(summary, finalAnswer, winner.content);
|
|
226
|
+
await markPhaseCompleted(trace, "finalizing");
|
|
179
227
|
await trace.event({ type: "run.completed", winner_id: winner.id, completed_at: completedAt });
|
|
228
|
+
if (resumeState)
|
|
229
|
+
await trace.event({ type: "run.resumed_completed", winner_id: winner.id, completed_at: completedAt });
|
|
180
230
|
await writeStatus("completed", "summary", { generation: "final", completed_at: completedAt });
|
|
181
231
|
return {
|
|
182
232
|
runId,
|
|
@@ -195,6 +245,280 @@ export async function runDeepThonk(configInput, driver, control = {}) {
|
|
|
195
245
|
throw error;
|
|
196
246
|
}
|
|
197
247
|
}
|
|
248
|
+
export async function resumeDeepThonk(runDir, driver, options = {}) {
|
|
249
|
+
const existingSummary = await readOptionalJson(runDir, runArtifactFiles.summary);
|
|
250
|
+
if (existingSummary) {
|
|
251
|
+
resumeConfigError("Run already has summary.json; nothing to resume.", "resume.already_complete", "Use deepthonk inspect/result to read the completed run.");
|
|
252
|
+
}
|
|
253
|
+
const rawConfig = await readRequiredConfig(runDir);
|
|
254
|
+
const currentVersion = await currentPackageVersion();
|
|
255
|
+
if (!sameMajorMinor(typeof rawConfig.version === "string" ? rawConfig.version : undefined, currentVersion)) {
|
|
256
|
+
resumeConfigError(`Cannot resume run from version ${String(rawConfig.version ?? "missing")}; current package version is ${currentVersion}. Resume requires matching major.minor.`, "resume.version_mismatch", "Start a fresh run with the current DeepThonk version, or resume with a package version whose major.minor matches the trace.");
|
|
257
|
+
}
|
|
258
|
+
assertStoredOutputConfigComplete(rawConfig);
|
|
259
|
+
const parsedConfig = runConfigSchema.parse(rawConfig);
|
|
260
|
+
const config = { ...parsedConfig, runDir };
|
|
261
|
+
validateProfile(config.profile);
|
|
262
|
+
enforceBudget(config);
|
|
263
|
+
assertResumeProviderMatches(rawConfig, config, driver, options);
|
|
264
|
+
assertNoPruneInProgress(runDir);
|
|
265
|
+
const lockRunId = resolveResumeRunId(config, undefined, []);
|
|
266
|
+
const claimed = await claimRunLock(runDir, lockRunId);
|
|
267
|
+
if (!claimed) {
|
|
268
|
+
throw new ConfigError(`Run directory is already claimed: ${runDir}`, {
|
|
269
|
+
code: "run.directory_locked",
|
|
270
|
+
retryable: false,
|
|
271
|
+
fix: "Wait for the active run to finish, or inspect the existing run.lock file."
|
|
272
|
+
});
|
|
273
|
+
}
|
|
274
|
+
try {
|
|
275
|
+
const status = await readOptionalJson(runDir, runArtifactFiles.status);
|
|
276
|
+
if ((status?.state === "running" || status?.state === "pending") && isLiveWorker(status.worker_pid)) {
|
|
277
|
+
resumeConfigError(`Run is still in flight at phase ${status.phase}.`, "resume.in_flight", "Wait for the worker to finish, or cancel/stop it before resuming.");
|
|
278
|
+
}
|
|
279
|
+
const trace = await readResumeTrace(runDir);
|
|
280
|
+
const runId = resolveResumeRunId(config, status, trace.events);
|
|
281
|
+
const plan = buildResumePlan(config, trace.events);
|
|
282
|
+
if (plan.nextPhase.phase === "summary") {
|
|
283
|
+
resumeConfigError("Trace says finalizing completed, but summary.json is missing.", "resume.inconsistent_trace", "Inspect the run directory and restore summary.json, or start a fresh run.");
|
|
284
|
+
}
|
|
285
|
+
const planStatus = toResumePlanStatus(runDir, runId, plan);
|
|
286
|
+
if (options.dryRun)
|
|
287
|
+
return planStatus;
|
|
288
|
+
const pruned = pruneTraceToPlan(trace, plan);
|
|
289
|
+
const populationByGeneration = buildPopulationMap(config, pruned.populations, pruned.candidates, plan);
|
|
290
|
+
const comparisonsByGeneration = groupComparisons(pruned.comparisons);
|
|
291
|
+
const scoresByGeneration = reconstructScores(config, populationByGeneration, comparisonsByGeneration, pruned.scores, plan);
|
|
292
|
+
const tracker = replayBudgetUsage(config, pruned.usage);
|
|
293
|
+
const startedAt = status?.started_at ?? new Date().toISOString();
|
|
294
|
+
await new TraceStore(runDir).writeStatus({
|
|
295
|
+
run_id: runId,
|
|
296
|
+
run_dir: runDir,
|
|
297
|
+
state: "running",
|
|
298
|
+
phase: "resume_planning",
|
|
299
|
+
usage: cloneUsage(tracker.usage),
|
|
300
|
+
started_at: startedAt,
|
|
301
|
+
worker_pid: process.pid,
|
|
302
|
+
updated_at: new Date().toISOString()
|
|
303
|
+
});
|
|
304
|
+
await persistPrunedTrace(runDir, pruned);
|
|
305
|
+
return await runDeepThonk(config, driver, {}, {
|
|
306
|
+
runId,
|
|
307
|
+
startedAt,
|
|
308
|
+
completed: plan.completed,
|
|
309
|
+
populationByGeneration,
|
|
310
|
+
comparisonsByGeneration,
|
|
311
|
+
scoresByGeneration,
|
|
312
|
+
tracker,
|
|
313
|
+
nextPhase: plan.nextPhase
|
|
314
|
+
});
|
|
315
|
+
}
|
|
316
|
+
finally {
|
|
317
|
+
await releaseRunLock(runDir);
|
|
318
|
+
}
|
|
319
|
+
}
|
|
320
|
+
async function markPhaseCompleted(trace, phase, generation) {
|
|
321
|
+
const event = phaseCompletedEventSchema.parse({
|
|
322
|
+
type: "phase.completed",
|
|
323
|
+
phase,
|
|
324
|
+
generation,
|
|
325
|
+
at: new Date().toISOString()
|
|
326
|
+
});
|
|
327
|
+
await trace.event(event);
|
|
328
|
+
}
|
|
329
|
+
function isResumePhaseCompleted(resumeState, phase, generation) {
|
|
330
|
+
return Boolean(resumeState?.completed.has(resumePhaseKey(phase, generation)));
|
|
331
|
+
}
|
|
332
|
+
function resumePopulation(resumeState, generation) {
|
|
333
|
+
const population = resumeState?.populationByGeneration.get(generation);
|
|
334
|
+
if (!population) {
|
|
335
|
+
throw new ConfigError(`Resume state is missing population generation ${generation}.`, {
|
|
336
|
+
code: "resume.population_missing",
|
|
337
|
+
retryable: false,
|
|
338
|
+
fix: "Inspect the run directory for missing population snapshots."
|
|
339
|
+
});
|
|
340
|
+
}
|
|
341
|
+
return population;
|
|
342
|
+
}
|
|
343
|
+
function resumeComparisons(resumeState, generation) {
|
|
344
|
+
const comparisons = resumeState?.comparisonsByGeneration.get(generation);
|
|
345
|
+
if (!comparisons) {
|
|
346
|
+
throw new ConfigError(`Resume state is missing comparisons for generation ${generation}.`, {
|
|
347
|
+
code: "resume.comparisons_missing",
|
|
348
|
+
retryable: false,
|
|
349
|
+
fix: "Inspect the run directory for missing comparison trace rows."
|
|
350
|
+
});
|
|
351
|
+
}
|
|
352
|
+
return comparisons;
|
|
353
|
+
}
|
|
354
|
+
function resumeScores(resumeState, generation, population, comparisons, lambda) {
|
|
355
|
+
return resumeState?.scoresByGeneration.get(generation) ?? fitBradleyTerry(population, comparisons, lambda, generation);
|
|
356
|
+
}
|
|
357
|
+
function phaseRng(seed, phase, generation) {
|
|
358
|
+
return createRng(hashSeed(`${seed}:${phase}:${generation ?? ""}`));
|
|
359
|
+
}
|
|
360
|
+
function hashSeed(value) {
|
|
361
|
+
let hash = 2166136261;
|
|
362
|
+
for (let i = 0; i < value.length; i += 1) {
|
|
363
|
+
hash ^= value.charCodeAt(i);
|
|
364
|
+
hash = Math.imul(hash, 16777619);
|
|
365
|
+
}
|
|
366
|
+
return hash >>> 0;
|
|
367
|
+
}
|
|
368
|
+
function resumePhaseKey(phase, generation) {
|
|
369
|
+
if ((phase === "generation_judging" || phase === "generation_mutation") && generation !== undefined)
|
|
370
|
+
return `${phase}:${generation}`;
|
|
371
|
+
return phase;
|
|
372
|
+
}
|
|
373
|
+
function providerLabel(driver) {
|
|
374
|
+
const value = driver;
|
|
375
|
+
if (typeof value.provider === "string")
|
|
376
|
+
return value.provider;
|
|
377
|
+
if (typeof value.providerName === "string")
|
|
378
|
+
return value.providerName;
|
|
379
|
+
if (typeof value.config?.provider === "string")
|
|
380
|
+
return value.config.provider;
|
|
381
|
+
if (value.baseDriver)
|
|
382
|
+
return providerLabel(value.baseDriver);
|
|
383
|
+
const constructorName = driver.constructor?.name;
|
|
384
|
+
if (constructorName === "FakeDriver")
|
|
385
|
+
return "fake";
|
|
386
|
+
if (constructorName === "SamplingDriver")
|
|
387
|
+
return "sampling";
|
|
388
|
+
if (constructorName === "OpenAiCompatibleDriver")
|
|
389
|
+
return "openai-compatible";
|
|
390
|
+
return undefined;
|
|
391
|
+
}
|
|
392
|
+
function assertStoredOutputConfigComplete(rawConfig) {
|
|
393
|
+
const output = rawConfig.output;
|
|
394
|
+
if (!isRecord(output) || typeof output.includeRawModelOutputs !== "boolean" || typeof output.includePrompts !== "boolean") {
|
|
395
|
+
resumeConfigError("Stored config.json is missing the complete output block required for deterministic resume.", "resume.config_incomplete", "Restore output.includeRawModelOutputs and output.includePrompts, or start a fresh run.");
|
|
396
|
+
}
|
|
397
|
+
}
|
|
398
|
+
function assertResumeProviderMatches(rawConfig, config, driver, options) {
|
|
399
|
+
const runtimeProvider = options.provider ?? providerLabel(driver) ?? config.provider;
|
|
400
|
+
if (runtimeProvider !== config.provider) {
|
|
401
|
+
providerMismatch(`Cannot resume provider ${config.provider} with runtime provider ${runtimeProvider}.`);
|
|
402
|
+
}
|
|
403
|
+
const expectedRoutes = expectedProviderRoutes(rawConfig, config);
|
|
404
|
+
const actualRoutes = runtimeProviderRoutes(driver);
|
|
405
|
+
for (const role of providerRoles) {
|
|
406
|
+
const expected = expectedRoutes[role];
|
|
407
|
+
if (!expected)
|
|
408
|
+
continue;
|
|
409
|
+
const actual = actualRoutes[role];
|
|
410
|
+
if (!actual) {
|
|
411
|
+
providerMismatch(`Cannot resume ${role} route ${routeLabel(expected)} without a matching runtime route.`);
|
|
412
|
+
}
|
|
413
|
+
if (expected.provider !== undefined && actual.provider !== expected.provider) {
|
|
414
|
+
providerMismatch(`Cannot resume ${role} route provider ${expected.provider} with runtime provider ${actual.provider ?? "missing"}.`);
|
|
415
|
+
}
|
|
416
|
+
if (expected.baseUrl !== undefined && normalizeBaseUrl(actual.baseUrl) !== normalizeBaseUrl(expected.baseUrl)) {
|
|
417
|
+
providerMismatch(`Cannot resume ${role} route baseUrl ${expected.baseUrl} with runtime baseUrl ${actual.baseUrl ?? "missing"}.`);
|
|
418
|
+
}
|
|
419
|
+
if (expected.model !== undefined && actual.model !== expected.model) {
|
|
420
|
+
providerMismatch(`Cannot resume ${role} route model ${expected.model} with runtime model ${actual.model ?? "missing"}.`);
|
|
421
|
+
}
|
|
422
|
+
}
|
|
423
|
+
}
|
|
424
|
+
const providerRoles = ["generator", "mutator", "judge", "finalizer"];
|
|
425
|
+
function expectedProviderRoutes(rawConfig, config) {
|
|
426
|
+
const providers = rawConfig.providers;
|
|
427
|
+
if (!isRecord(providers))
|
|
428
|
+
return {};
|
|
429
|
+
const routes = {};
|
|
430
|
+
for (const role of providerRoles) {
|
|
431
|
+
const rawRoute = providers[role];
|
|
432
|
+
if (!isRecord(rawRoute))
|
|
433
|
+
continue;
|
|
434
|
+
routes[role] = {
|
|
435
|
+
provider: stringValue(rawRoute.provider) ?? config.provider,
|
|
436
|
+
baseUrl: stringValue(rawRoute.baseUrl) ?? stringValue(rawRoute.base_url),
|
|
437
|
+
model: stringValue(rawRoute.model) ?? modelForRole(config, role)
|
|
438
|
+
};
|
|
439
|
+
}
|
|
440
|
+
return routes;
|
|
441
|
+
}
|
|
442
|
+
function runtimeProviderRoutes(driver) {
|
|
443
|
+
const routeTable = driver.routes;
|
|
444
|
+
if (!isRecord(routeTable))
|
|
445
|
+
return {};
|
|
446
|
+
const routes = {};
|
|
447
|
+
for (const role of providerRoles) {
|
|
448
|
+
const route = routeTable[role];
|
|
449
|
+
if (!isRecord(route))
|
|
450
|
+
continue;
|
|
451
|
+
const routeDriver = route.driver;
|
|
452
|
+
routes[role] = {
|
|
453
|
+
provider: routeDriver ? providerLabel(routeDriver) : undefined,
|
|
454
|
+
baseUrl: routeDriver ? providerBaseUrl(routeDriver) : undefined,
|
|
455
|
+
model: stringValue(route.model)
|
|
456
|
+
};
|
|
457
|
+
}
|
|
458
|
+
return routes;
|
|
459
|
+
}
|
|
460
|
+
function providerBaseUrl(driver) {
|
|
461
|
+
const value = driver;
|
|
462
|
+
if (typeof value.baseUrl === "string")
|
|
463
|
+
return value.baseUrl;
|
|
464
|
+
if (typeof value.config?.baseUrl === "string")
|
|
465
|
+
return value.config.baseUrl;
|
|
466
|
+
if (value.baseDriver)
|
|
467
|
+
return providerBaseUrl(value.baseDriver);
|
|
468
|
+
return undefined;
|
|
469
|
+
}
|
|
470
|
+
function modelForRole(config, role) {
|
|
471
|
+
if (role === "generator")
|
|
472
|
+
return config.generatorModel;
|
|
473
|
+
if (role === "mutator")
|
|
474
|
+
return config.mutatorModel;
|
|
475
|
+
if (role === "judge")
|
|
476
|
+
return config.judgeModel;
|
|
477
|
+
return config.finalizerModel;
|
|
478
|
+
}
|
|
479
|
+
function routeLabel(route) {
|
|
480
|
+
return [route.provider, route.baseUrl, route.model].filter(Boolean).join("/") || "provider route";
|
|
481
|
+
}
|
|
482
|
+
function providerMismatch(message) {
|
|
483
|
+
resumeConfigError(message, "resume.provider_mismatch", "Use the same provider configuration that created config.json.");
|
|
484
|
+
}
|
|
485
|
+
function normalizeBaseUrl(value) {
|
|
486
|
+
return value?.replace(/\/+$/, "");
|
|
487
|
+
}
|
|
488
|
+
function stringValue(value) {
|
|
489
|
+
return typeof value === "string" && value.length > 0 ? value : undefined;
|
|
490
|
+
}
|
|
491
|
+
function isRecord(value) {
|
|
492
|
+
return Boolean(value && typeof value === "object" && !Array.isArray(value));
|
|
493
|
+
}
|
|
494
|
+
async function readRequiredConfig(runDir) {
|
|
495
|
+
const config = await readOptionalJson(runDir, runArtifactFiles.config);
|
|
496
|
+
if (!config) {
|
|
497
|
+
resumeConfigError("Run directory is missing config.json; cannot replay safely.", "resume.config_missing", "Resume only from a DeepThonk run directory with config.json.");
|
|
498
|
+
}
|
|
499
|
+
return config;
|
|
500
|
+
}
|
|
501
|
+
async function readOptionalJson(runDir, fileName) {
|
|
502
|
+
try {
|
|
503
|
+
return JSON.parse(await readFile(join(runDir, fileName), "utf8"));
|
|
504
|
+
}
|
|
505
|
+
catch (error) {
|
|
506
|
+
if (error.code === "ENOENT")
|
|
507
|
+
return undefined;
|
|
508
|
+
throw error;
|
|
509
|
+
}
|
|
510
|
+
}
|
|
511
|
+
function isLiveWorker(pid) {
|
|
512
|
+
if (!pid)
|
|
513
|
+
return true;
|
|
514
|
+
try {
|
|
515
|
+
process.kill(pid, 0);
|
|
516
|
+
return true;
|
|
517
|
+
}
|
|
518
|
+
catch {
|
|
519
|
+
return false;
|
|
520
|
+
}
|
|
521
|
+
}
|
|
198
522
|
export function rankPopulation(population, scores) {
|
|
199
523
|
const order = new Map(scores.map((score) => [score.candidateId, score.rank]));
|
|
200
524
|
return [...population].sort((left, right) => {
|
|
@@ -356,6 +680,13 @@ async function judgePairs(args) {
|
|
|
356
680
|
jsonParseFailures += 1;
|
|
357
681
|
}
|
|
358
682
|
}
|
|
683
|
+
if (invalidJson || !parsed) {
|
|
684
|
+
throw new ConfigError(`Judge produced ${jsonParseFailures} consecutive invalid-JSON responses (${args.config.retry.invalidJsonRetries + 1} attempts) for comparison ${job.id}. Refusing to synthesize a tie and pollute the ranking.`, {
|
|
685
|
+
code: "judge.persistent_invalid_json",
|
|
686
|
+
retryable: false,
|
|
687
|
+
fix: "The judge model is producing unparseable output. Inspect the raw response (set output.includeRawModelOutputs: true), switch judge models, or raise retry.invalidJsonRetries if the failures are transient."
|
|
688
|
+
});
|
|
689
|
+
}
|
|
359
690
|
const comparison = {
|
|
360
691
|
id: job.id,
|
|
361
692
|
runId: args.runId,
|
|
@@ -575,4 +906,28 @@ function serializeRunError(error) {
|
|
|
575
906
|
retryable: false
|
|
576
907
|
};
|
|
577
908
|
}
|
|
909
|
+
async function currentPackageVersion() {
|
|
910
|
+
try {
|
|
911
|
+
const packageJson = JSON.parse(await readFile(new URL("../package.json", import.meta.url), "utf8"));
|
|
912
|
+
return packageJson.version ?? "0.0.0";
|
|
913
|
+
}
|
|
914
|
+
catch {
|
|
915
|
+
return "0.0.0";
|
|
916
|
+
}
|
|
917
|
+
}
|
|
918
|
+
function sameMajorMinor(left, right) {
|
|
919
|
+
if (!left)
|
|
920
|
+
return false;
|
|
921
|
+
const parse = (version) => {
|
|
922
|
+
const match = version.match(/^(\d+)\.(\d+)/);
|
|
923
|
+
if (!match)
|
|
924
|
+
return undefined;
|
|
925
|
+
return [Number(match[1]), Number(match[2])];
|
|
926
|
+
};
|
|
927
|
+
const a = parse(left);
|
|
928
|
+
const b = parse(right);
|
|
929
|
+
if (!a || !b)
|
|
930
|
+
return false;
|
|
931
|
+
return a[0] === b[0] && a[1] === b[1];
|
|
932
|
+
}
|
|
578
933
|
//# sourceMappingURL=runner.js.map
|