@deepthonk/core 0.1.0 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +13 -7
- package/dist/bradleyTerry.js +11 -3
- package/dist/bradleyTerry.js.map +1 -1
- package/dist/budgetTracker.js +14 -0
- package/dist/budgetTracker.js.map +1 -1
- package/dist/lifecycle.d.ts +2 -0
- package/dist/lifecycle.js +15 -1
- package/dist/lifecycle.js.map +1 -1
- package/dist/resume.d.ts +51 -0
- package/dist/resume.js +355 -0
- package/dist/resume.js.map +1 -0
- package/dist/runner.d.ts +38 -2
- package/dist/runner.js +465 -50
- package/dist/runner.js.map +1 -1
- package/dist/schemas.d.ts +27 -0
- package/dist/schemas.js +15 -0
- package/dist/schemas.js.map +1 -1
- package/dist/services.d.ts +5 -1
- package/dist/services.js +15 -8
- package/dist/services.js.map +1 -1
- package/package.json +1 -1
package/dist/runner.js
CHANGED
|
@@ -1,15 +1,19 @@
|
|
|
1
1
|
import pLimit from "p-limit";
|
|
2
|
+
import { readFile } from "node:fs/promises";
|
|
3
|
+
import { join } from "node:path";
|
|
2
4
|
import { z } from "zod";
|
|
3
5
|
import { fitBradleyTerry } from "./bradleyTerry.js";
|
|
4
6
|
import { BudgetTracker } from "./budgetTracker.js";
|
|
5
7
|
import { planBudget, validateProfile } from "./budget.js";
|
|
6
8
|
import { BudgetExceededError, CancelledError, ConfigError, DeepThonkError } from "./errors.js";
|
|
9
|
+
import { runArtifactFiles } from "./artifacts.js";
|
|
7
10
|
import { parseJsonObject } from "./json.js";
|
|
8
|
-
import { emptyUsage } from "./lifecycle.js";
|
|
11
|
+
import { claimRunLock, emptyUsage, releaseRunLock } from "./lifecycle.js";
|
|
9
12
|
import { makeKRegularPairs } from "./pairScheduler.js";
|
|
10
13
|
import { comparePrompt, finalizePrompt, generatePrompt, mutatePrompt } from "./prompts.js";
|
|
11
14
|
import { createRng } from "./rng.js";
|
|
12
|
-
import {
|
|
15
|
+
import { assertNoPruneInProgress, buildPopulationMap, buildResumePlan, groupComparisons, persistPrunedTrace, pruneTraceToPlan, readResumeTrace, reconstructScores, replayBudgetUsage, resolveResumeRunId, resumeConfigError, toResumePlanStatus } from "./resume.js";
|
|
16
|
+
import { runConfigSchema, builtInProfiles, phaseCompletedEventSchema } from "./schemas.js";
|
|
13
17
|
import { aggregateCritiques } from "./critique.js";
|
|
14
18
|
import { TraceStore } from "./traceStore.js";
|
|
15
19
|
const compareOutputSchema = z.object({
|
|
@@ -21,16 +25,27 @@ const compareOutputSchema = z.object({
|
|
|
21
25
|
feedback_b: z.string().optional(),
|
|
22
26
|
selection_reason: z.string().default("")
|
|
23
27
|
});
|
|
24
|
-
export async function runDeepThonk(configInput, driver, control = {}) {
|
|
28
|
+
export async function runDeepThonk(configInput, driver, control = {}, resumeState) {
|
|
25
29
|
const config = runConfigSchema.parse(configInput);
|
|
26
30
|
validateProfile(config.profile);
|
|
27
31
|
enforceBudget(config);
|
|
28
|
-
const runId = `run_${new Date().toISOString().replace(/[:.]/g, "-")}_${Math.abs(config.seed)}`;
|
|
32
|
+
const runId = resumeState?.runId ?? `run_${new Date().toISOString().replace(/[:.]/g, "-")}_${Math.abs(config.seed)}`;
|
|
29
33
|
const trace = new TraceStore(config.runDir);
|
|
30
34
|
const rng = createRng(config.seed);
|
|
31
|
-
const tracker = new BudgetTracker(config);
|
|
32
|
-
const startedAt = new Date().toISOString();
|
|
33
|
-
|
|
35
|
+
const tracker = resumeState?.tracker ?? new BudgetTracker(config);
|
|
36
|
+
const startedAt = resumeState?.startedAt ?? new Date().toISOString();
|
|
37
|
+
if (!resumeState) {
|
|
38
|
+
await trace.init({ ...config, version: await currentPackageVersion() }, runId);
|
|
39
|
+
}
|
|
40
|
+
else {
|
|
41
|
+
await trace.event({
|
|
42
|
+
type: "run.resumed",
|
|
43
|
+
run_id: runId,
|
|
44
|
+
resumed_at: new Date().toISOString(),
|
|
45
|
+
next_phase: resumeState.nextPhase.phase,
|
|
46
|
+
generation: resumeState.nextPhase.generation
|
|
47
|
+
});
|
|
48
|
+
}
|
|
34
49
|
let stopped = false;
|
|
35
50
|
const writeStatus = async (state, phase, extra = {}) => {
|
|
36
51
|
await trace.writeStatus({
|
|
@@ -51,7 +66,7 @@ export async function runDeepThonk(configInput, driver, control = {}) {
|
|
|
51
66
|
throw new CancelledError(`Run cancelled before ${phase}.`, {
|
|
52
67
|
code: "run.cancelled",
|
|
53
68
|
retryable: false,
|
|
54
|
-
fix: "
|
|
69
|
+
fix: "Resume the run with deepthonk resume --continue after the worker has stopped."
|
|
55
70
|
});
|
|
56
71
|
}
|
|
57
72
|
};
|
|
@@ -80,31 +95,52 @@ export async function runDeepThonk(configInput, driver, control = {}) {
|
|
|
80
95
|
}
|
|
81
96
|
};
|
|
82
97
|
try {
|
|
83
|
-
await writeStatus("running", "initialized");
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
98
|
+
await writeStatus("running", resumeState ? "resume_replay" : "initialized");
|
|
99
|
+
let population;
|
|
100
|
+
if (isResumePhaseCompleted(resumeState, "initial_generation")) {
|
|
101
|
+
population = resumePopulation(resumeState, 0);
|
|
102
|
+
}
|
|
103
|
+
else {
|
|
104
|
+
await assertNotCancelled("initial population");
|
|
105
|
+
population = await generateInitialPopulation(config, driver, trace, rng, runId, tracker, guards);
|
|
106
|
+
await trace.writePopulation(0, population);
|
|
107
|
+
await assertBudget("initial population");
|
|
108
|
+
await markPhaseCompleted(trace, "initial_generation");
|
|
109
|
+
await writeStatus("running", "population_completed", { generation: 0 });
|
|
110
|
+
}
|
|
89
111
|
for (let gen = 1; gen <= config.profile.t; gen += 1) {
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
112
|
+
if (isResumePhaseCompleted(resumeState, "generation_mutation", gen)) {
|
|
113
|
+
population = resumePopulation(resumeState, gen);
|
|
114
|
+
continue;
|
|
115
|
+
}
|
|
116
|
+
let comparisons;
|
|
117
|
+
let scores;
|
|
118
|
+
if (isResumePhaseCompleted(resumeState, "generation_judging", gen)) {
|
|
119
|
+
comparisons = resumeComparisons(resumeState, gen);
|
|
120
|
+
scores = resumeScores(resumeState, gen, population, comparisons, config.profile.lambda);
|
|
121
|
+
}
|
|
122
|
+
else {
|
|
123
|
+
await assertNotCancelled(`generation ${gen} comparisons`);
|
|
124
|
+
await writeStatus("running", "generation_comparisons", { generation: gen });
|
|
125
|
+
const judgingRng = phaseRng(config.seed, "generation_judging", gen);
|
|
126
|
+
const pairs = makeKRegularPairs(population.map((candidate) => candidate.id), config.profile.k, judgingRng);
|
|
127
|
+
comparisons = await judgePairs({
|
|
128
|
+
config,
|
|
129
|
+
driver,
|
|
130
|
+
trace,
|
|
131
|
+
rng: judgingRng,
|
|
132
|
+
runId,
|
|
133
|
+
generation: gen,
|
|
134
|
+
pairs,
|
|
135
|
+
population,
|
|
136
|
+
tracker,
|
|
137
|
+
guards
|
|
138
|
+
});
|
|
139
|
+
scores = fitBradleyTerry(population, comparisons, config.profile.lambda, gen);
|
|
140
|
+
await trace.writeScores(gen, scores);
|
|
141
|
+
await assertBudget(`generation ${gen} comparisons`);
|
|
142
|
+
await markPhaseCompleted(trace, "generation_judging", gen);
|
|
143
|
+
}
|
|
108
144
|
const ranked = rankPopulation(population, scores);
|
|
109
145
|
const elites = topQuartile(ranked);
|
|
110
146
|
const discarded = bottomQuartile(ranked);
|
|
@@ -113,6 +149,7 @@ export async function runDeepThonk(configInput, driver, control = {}) {
|
|
|
113
149
|
const critiquesByCandidate = aggregateCritiques(population, comparisons);
|
|
114
150
|
await assertNotCancelled(`generation ${gen} mutation`);
|
|
115
151
|
await writeStatus("running", "generation_mutation", { generation: gen });
|
|
152
|
+
await traceDiscardedCandidates(trace, ranked, new Set([...elites, ...mutationParents].map((candidate) => candidate.id)), discarded, gen);
|
|
116
153
|
const mutants = await mutateSurvivors({
|
|
117
154
|
config,
|
|
118
155
|
driver,
|
|
@@ -131,26 +168,37 @@ export async function runDeepThonk(configInput, driver, control = {}) {
|
|
|
131
168
|
population = keepPopulationSize([...eliteCopies, ...mutants], config.profile.n);
|
|
132
169
|
await trace.writePopulation(gen, population);
|
|
133
170
|
await assertBudget(`generation ${gen} mutation`);
|
|
171
|
+
await markPhaseCompleted(trace, "generation_mutation", gen);
|
|
134
172
|
await writeStatus("running", "generation_completed", { generation: gen });
|
|
135
173
|
}
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
config
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
174
|
+
let finalComparisons;
|
|
175
|
+
let finalScores;
|
|
176
|
+
if (isResumePhaseCompleted(resumeState, "final_judging")) {
|
|
177
|
+
finalComparisons = resumeComparisons(resumeState, "final");
|
|
178
|
+
finalScores = resumeScores(resumeState, "final", population, finalComparisons, config.profile.lambda);
|
|
179
|
+
}
|
|
180
|
+
else {
|
|
181
|
+
await assertNotCancelled("final ranking");
|
|
182
|
+
await writeStatus("running", "final_ranking", { generation: "final" });
|
|
183
|
+
const finalRng = phaseRng(config.seed, "final_judging");
|
|
184
|
+
const finalPairs = makeKRegularPairs(population.map((candidate) => candidate.id), config.profile.m, finalRng);
|
|
185
|
+
finalComparisons = await judgePairs({
|
|
186
|
+
config,
|
|
187
|
+
driver,
|
|
188
|
+
trace,
|
|
189
|
+
rng: finalRng,
|
|
190
|
+
runId,
|
|
191
|
+
generation: "final",
|
|
192
|
+
pairs: finalPairs,
|
|
193
|
+
population,
|
|
194
|
+
tracker,
|
|
195
|
+
guards
|
|
196
|
+
});
|
|
197
|
+
finalScores = fitBradleyTerry(population, finalComparisons, config.profile.lambda, "final");
|
|
198
|
+
await trace.writeScores("final", finalScores);
|
|
199
|
+
await assertBudget("final ranking");
|
|
200
|
+
await markPhaseCompleted(trace, "final_judging");
|
|
201
|
+
}
|
|
154
202
|
const winner = rankPopulation(population, finalScores)[0];
|
|
155
203
|
if (!winner)
|
|
156
204
|
throw new ConfigError("Run produced no winner.");
|
|
@@ -162,6 +210,10 @@ export async function runDeepThonk(configInput, driver, control = {}) {
|
|
|
162
210
|
const summary = {
|
|
163
211
|
run_id: runId,
|
|
164
212
|
winner_id: winner.id,
|
|
213
|
+
profile: summaryProfile(config.profile),
|
|
214
|
+
profile_name: summaryProfileName(config.profile),
|
|
215
|
+
prompt_style: config.promptStyle,
|
|
216
|
+
models: summaryModels(config),
|
|
165
217
|
calls: tracker.usage.calls,
|
|
166
218
|
usage: cloneUsage(tracker.usage),
|
|
167
219
|
ranked_winner_answer_path: "artifacts/winner.txt",
|
|
@@ -171,7 +223,10 @@ export async function runDeepThonk(configInput, driver, control = {}) {
|
|
|
171
223
|
completed_at: completedAt
|
|
172
224
|
};
|
|
173
225
|
await trace.writeSummary(summary, finalAnswer, winner.content);
|
|
226
|
+
await markPhaseCompleted(trace, "finalizing");
|
|
174
227
|
await trace.event({ type: "run.completed", winner_id: winner.id, completed_at: completedAt });
|
|
228
|
+
if (resumeState)
|
|
229
|
+
await trace.event({ type: "run.resumed_completed", winner_id: winner.id, completed_at: completedAt });
|
|
175
230
|
await writeStatus("completed", "summary", { generation: "final", completed_at: completedAt });
|
|
176
231
|
return {
|
|
177
232
|
runId,
|
|
@@ -190,6 +245,280 @@ export async function runDeepThonk(configInput, driver, control = {}) {
|
|
|
190
245
|
throw error;
|
|
191
246
|
}
|
|
192
247
|
}
|
|
248
|
+
export async function resumeDeepThonk(runDir, driver, options = {}) {
|
|
249
|
+
const existingSummary = await readOptionalJson(runDir, runArtifactFiles.summary);
|
|
250
|
+
if (existingSummary) {
|
|
251
|
+
resumeConfigError("Run already has summary.json; nothing to resume.", "resume.already_complete", "Use deepthonk inspect/result to read the completed run.");
|
|
252
|
+
}
|
|
253
|
+
const rawConfig = await readRequiredConfig(runDir);
|
|
254
|
+
const currentVersion = await currentPackageVersion();
|
|
255
|
+
if (!sameMajorMinor(typeof rawConfig.version === "string" ? rawConfig.version : undefined, currentVersion)) {
|
|
256
|
+
resumeConfigError(`Cannot resume run from version ${String(rawConfig.version ?? "missing")}; current package version is ${currentVersion}. Resume requires matching major.minor.`, "resume.version_mismatch", "Start a fresh run with the current DeepThonk version, or resume with a package version whose major.minor matches the trace.");
|
|
257
|
+
}
|
|
258
|
+
assertStoredOutputConfigComplete(rawConfig);
|
|
259
|
+
const parsedConfig = runConfigSchema.parse(rawConfig);
|
|
260
|
+
const config = { ...parsedConfig, runDir };
|
|
261
|
+
validateProfile(config.profile);
|
|
262
|
+
enforceBudget(config);
|
|
263
|
+
assertResumeProviderMatches(rawConfig, config, driver, options);
|
|
264
|
+
assertNoPruneInProgress(runDir);
|
|
265
|
+
const lockRunId = resolveResumeRunId(config, undefined, []);
|
|
266
|
+
const claimed = await claimRunLock(runDir, lockRunId);
|
|
267
|
+
if (!claimed) {
|
|
268
|
+
throw new ConfigError(`Run directory is already claimed: ${runDir}`, {
|
|
269
|
+
code: "run.directory_locked",
|
|
270
|
+
retryable: false,
|
|
271
|
+
fix: "Wait for the active run to finish, or inspect the existing run.lock file."
|
|
272
|
+
});
|
|
273
|
+
}
|
|
274
|
+
try {
|
|
275
|
+
const status = await readOptionalJson(runDir, runArtifactFiles.status);
|
|
276
|
+
if ((status?.state === "running" || status?.state === "pending") && isLiveWorker(status.worker_pid)) {
|
|
277
|
+
resumeConfigError(`Run is still in flight at phase ${status.phase}.`, "resume.in_flight", "Wait for the worker to finish, or cancel/stop it before resuming.");
|
|
278
|
+
}
|
|
279
|
+
const trace = await readResumeTrace(runDir);
|
|
280
|
+
const runId = resolveResumeRunId(config, status, trace.events);
|
|
281
|
+
const plan = buildResumePlan(config, trace.events);
|
|
282
|
+
if (plan.nextPhase.phase === "summary") {
|
|
283
|
+
resumeConfigError("Trace says finalizing completed, but summary.json is missing.", "resume.inconsistent_trace", "Inspect the run directory and restore summary.json, or start a fresh run.");
|
|
284
|
+
}
|
|
285
|
+
const planStatus = toResumePlanStatus(runDir, runId, plan);
|
|
286
|
+
if (options.dryRun)
|
|
287
|
+
return planStatus;
|
|
288
|
+
const pruned = pruneTraceToPlan(trace, plan);
|
|
289
|
+
const populationByGeneration = buildPopulationMap(config, pruned.populations, pruned.candidates, plan);
|
|
290
|
+
const comparisonsByGeneration = groupComparisons(pruned.comparisons);
|
|
291
|
+
const scoresByGeneration = reconstructScores(config, populationByGeneration, comparisonsByGeneration, pruned.scores, plan);
|
|
292
|
+
const tracker = replayBudgetUsage(config, pruned.usage);
|
|
293
|
+
const startedAt = status?.started_at ?? new Date().toISOString();
|
|
294
|
+
await new TraceStore(runDir).writeStatus({
|
|
295
|
+
run_id: runId,
|
|
296
|
+
run_dir: runDir,
|
|
297
|
+
state: "running",
|
|
298
|
+
phase: "resume_planning",
|
|
299
|
+
usage: cloneUsage(tracker.usage),
|
|
300
|
+
started_at: startedAt,
|
|
301
|
+
worker_pid: process.pid,
|
|
302
|
+
updated_at: new Date().toISOString()
|
|
303
|
+
});
|
|
304
|
+
await persistPrunedTrace(runDir, pruned);
|
|
305
|
+
return await runDeepThonk(config, driver, {}, {
|
|
306
|
+
runId,
|
|
307
|
+
startedAt,
|
|
308
|
+
completed: plan.completed,
|
|
309
|
+
populationByGeneration,
|
|
310
|
+
comparisonsByGeneration,
|
|
311
|
+
scoresByGeneration,
|
|
312
|
+
tracker,
|
|
313
|
+
nextPhase: plan.nextPhase
|
|
314
|
+
});
|
|
315
|
+
}
|
|
316
|
+
finally {
|
|
317
|
+
await releaseRunLock(runDir);
|
|
318
|
+
}
|
|
319
|
+
}
|
|
320
|
+
async function markPhaseCompleted(trace, phase, generation) {
|
|
321
|
+
const event = phaseCompletedEventSchema.parse({
|
|
322
|
+
type: "phase.completed",
|
|
323
|
+
phase,
|
|
324
|
+
generation,
|
|
325
|
+
at: new Date().toISOString()
|
|
326
|
+
});
|
|
327
|
+
await trace.event(event);
|
|
328
|
+
}
|
|
329
|
+
function isResumePhaseCompleted(resumeState, phase, generation) {
|
|
330
|
+
return Boolean(resumeState?.completed.has(resumePhaseKey(phase, generation)));
|
|
331
|
+
}
|
|
332
|
+
function resumePopulation(resumeState, generation) {
|
|
333
|
+
const population = resumeState?.populationByGeneration.get(generation);
|
|
334
|
+
if (!population) {
|
|
335
|
+
throw new ConfigError(`Resume state is missing population generation ${generation}.`, {
|
|
336
|
+
code: "resume.population_missing",
|
|
337
|
+
retryable: false,
|
|
338
|
+
fix: "Inspect the run directory for missing population snapshots."
|
|
339
|
+
});
|
|
340
|
+
}
|
|
341
|
+
return population;
|
|
342
|
+
}
|
|
343
|
+
function resumeComparisons(resumeState, generation) {
|
|
344
|
+
const comparisons = resumeState?.comparisonsByGeneration.get(generation);
|
|
345
|
+
if (!comparisons) {
|
|
346
|
+
throw new ConfigError(`Resume state is missing comparisons for generation ${generation}.`, {
|
|
347
|
+
code: "resume.comparisons_missing",
|
|
348
|
+
retryable: false,
|
|
349
|
+
fix: "Inspect the run directory for missing comparison trace rows."
|
|
350
|
+
});
|
|
351
|
+
}
|
|
352
|
+
return comparisons;
|
|
353
|
+
}
|
|
354
|
+
function resumeScores(resumeState, generation, population, comparisons, lambda) {
|
|
355
|
+
return resumeState?.scoresByGeneration.get(generation) ?? fitBradleyTerry(population, comparisons, lambda, generation);
|
|
356
|
+
}
|
|
357
|
+
function phaseRng(seed, phase, generation) {
|
|
358
|
+
return createRng(hashSeed(`${seed}:${phase}:${generation ?? ""}`));
|
|
359
|
+
}
|
|
360
|
+
function hashSeed(value) {
|
|
361
|
+
let hash = 2166136261;
|
|
362
|
+
for (let i = 0; i < value.length; i += 1) {
|
|
363
|
+
hash ^= value.charCodeAt(i);
|
|
364
|
+
hash = Math.imul(hash, 16777619);
|
|
365
|
+
}
|
|
366
|
+
return hash >>> 0;
|
|
367
|
+
}
|
|
368
|
+
function resumePhaseKey(phase, generation) {
|
|
369
|
+
if ((phase === "generation_judging" || phase === "generation_mutation") && generation !== undefined)
|
|
370
|
+
return `${phase}:${generation}`;
|
|
371
|
+
return phase;
|
|
372
|
+
}
|
|
373
|
+
function providerLabel(driver) {
|
|
374
|
+
const value = driver;
|
|
375
|
+
if (typeof value.provider === "string")
|
|
376
|
+
return value.provider;
|
|
377
|
+
if (typeof value.providerName === "string")
|
|
378
|
+
return value.providerName;
|
|
379
|
+
if (typeof value.config?.provider === "string")
|
|
380
|
+
return value.config.provider;
|
|
381
|
+
if (value.baseDriver)
|
|
382
|
+
return providerLabel(value.baseDriver);
|
|
383
|
+
const constructorName = driver.constructor?.name;
|
|
384
|
+
if (constructorName === "FakeDriver")
|
|
385
|
+
return "fake";
|
|
386
|
+
if (constructorName === "SamplingDriver")
|
|
387
|
+
return "sampling";
|
|
388
|
+
if (constructorName === "OpenAiCompatibleDriver")
|
|
389
|
+
return "openai-compatible";
|
|
390
|
+
return undefined;
|
|
391
|
+
}
|
|
392
|
+
function assertStoredOutputConfigComplete(rawConfig) {
|
|
393
|
+
const output = rawConfig.output;
|
|
394
|
+
if (!isRecord(output) || typeof output.includeRawModelOutputs !== "boolean" || typeof output.includePrompts !== "boolean") {
|
|
395
|
+
resumeConfigError("Stored config.json is missing the complete output block required for deterministic resume.", "resume.config_incomplete", "Restore output.includeRawModelOutputs and output.includePrompts, or start a fresh run.");
|
|
396
|
+
}
|
|
397
|
+
}
|
|
398
|
+
function assertResumeProviderMatches(rawConfig, config, driver, options) {
|
|
399
|
+
const runtimeProvider = options.provider ?? providerLabel(driver) ?? config.provider;
|
|
400
|
+
if (runtimeProvider !== config.provider) {
|
|
401
|
+
providerMismatch(`Cannot resume provider ${config.provider} with runtime provider ${runtimeProvider}.`);
|
|
402
|
+
}
|
|
403
|
+
const expectedRoutes = expectedProviderRoutes(rawConfig, config);
|
|
404
|
+
const actualRoutes = runtimeProviderRoutes(driver);
|
|
405
|
+
for (const role of providerRoles) {
|
|
406
|
+
const expected = expectedRoutes[role];
|
|
407
|
+
if (!expected)
|
|
408
|
+
continue;
|
|
409
|
+
const actual = actualRoutes[role];
|
|
410
|
+
if (!actual) {
|
|
411
|
+
providerMismatch(`Cannot resume ${role} route ${routeLabel(expected)} without a matching runtime route.`);
|
|
412
|
+
}
|
|
413
|
+
if (expected.provider !== undefined && actual.provider !== expected.provider) {
|
|
414
|
+
providerMismatch(`Cannot resume ${role} route provider ${expected.provider} with runtime provider ${actual.provider ?? "missing"}.`);
|
|
415
|
+
}
|
|
416
|
+
if (expected.baseUrl !== undefined && normalizeBaseUrl(actual.baseUrl) !== normalizeBaseUrl(expected.baseUrl)) {
|
|
417
|
+
providerMismatch(`Cannot resume ${role} route baseUrl ${expected.baseUrl} with runtime baseUrl ${actual.baseUrl ?? "missing"}.`);
|
|
418
|
+
}
|
|
419
|
+
if (expected.model !== undefined && actual.model !== expected.model) {
|
|
420
|
+
providerMismatch(`Cannot resume ${role} route model ${expected.model} with runtime model ${actual.model ?? "missing"}.`);
|
|
421
|
+
}
|
|
422
|
+
}
|
|
423
|
+
}
|
|
424
|
+
const providerRoles = ["generator", "mutator", "judge", "finalizer"];
|
|
425
|
+
function expectedProviderRoutes(rawConfig, config) {
|
|
426
|
+
const providers = rawConfig.providers;
|
|
427
|
+
if (!isRecord(providers))
|
|
428
|
+
return {};
|
|
429
|
+
const routes = {};
|
|
430
|
+
for (const role of providerRoles) {
|
|
431
|
+
const rawRoute = providers[role];
|
|
432
|
+
if (!isRecord(rawRoute))
|
|
433
|
+
continue;
|
|
434
|
+
routes[role] = {
|
|
435
|
+
provider: stringValue(rawRoute.provider) ?? config.provider,
|
|
436
|
+
baseUrl: stringValue(rawRoute.baseUrl) ?? stringValue(rawRoute.base_url),
|
|
437
|
+
model: stringValue(rawRoute.model) ?? modelForRole(config, role)
|
|
438
|
+
};
|
|
439
|
+
}
|
|
440
|
+
return routes;
|
|
441
|
+
}
|
|
442
|
+
function runtimeProviderRoutes(driver) {
|
|
443
|
+
const routeTable = driver.routes;
|
|
444
|
+
if (!isRecord(routeTable))
|
|
445
|
+
return {};
|
|
446
|
+
const routes = {};
|
|
447
|
+
for (const role of providerRoles) {
|
|
448
|
+
const route = routeTable[role];
|
|
449
|
+
if (!isRecord(route))
|
|
450
|
+
continue;
|
|
451
|
+
const routeDriver = route.driver;
|
|
452
|
+
routes[role] = {
|
|
453
|
+
provider: routeDriver ? providerLabel(routeDriver) : undefined,
|
|
454
|
+
baseUrl: routeDriver ? providerBaseUrl(routeDriver) : undefined,
|
|
455
|
+
model: stringValue(route.model)
|
|
456
|
+
};
|
|
457
|
+
}
|
|
458
|
+
return routes;
|
|
459
|
+
}
|
|
460
|
+
function providerBaseUrl(driver) {
|
|
461
|
+
const value = driver;
|
|
462
|
+
if (typeof value.baseUrl === "string")
|
|
463
|
+
return value.baseUrl;
|
|
464
|
+
if (typeof value.config?.baseUrl === "string")
|
|
465
|
+
return value.config.baseUrl;
|
|
466
|
+
if (value.baseDriver)
|
|
467
|
+
return providerBaseUrl(value.baseDriver);
|
|
468
|
+
return undefined;
|
|
469
|
+
}
|
|
470
|
+
function modelForRole(config, role) {
|
|
471
|
+
if (role === "generator")
|
|
472
|
+
return config.generatorModel;
|
|
473
|
+
if (role === "mutator")
|
|
474
|
+
return config.mutatorModel;
|
|
475
|
+
if (role === "judge")
|
|
476
|
+
return config.judgeModel;
|
|
477
|
+
return config.finalizerModel;
|
|
478
|
+
}
|
|
479
|
+
function routeLabel(route) {
|
|
480
|
+
return [route.provider, route.baseUrl, route.model].filter(Boolean).join("/") || "provider route";
|
|
481
|
+
}
|
|
482
|
+
function providerMismatch(message) {
|
|
483
|
+
resumeConfigError(message, "resume.provider_mismatch", "Use the same provider configuration that created config.json.");
|
|
484
|
+
}
|
|
485
|
+
function normalizeBaseUrl(value) {
|
|
486
|
+
return value?.replace(/\/+$/, "");
|
|
487
|
+
}
|
|
488
|
+
function stringValue(value) {
|
|
489
|
+
return typeof value === "string" && value.length > 0 ? value : undefined;
|
|
490
|
+
}
|
|
491
|
+
function isRecord(value) {
|
|
492
|
+
return Boolean(value && typeof value === "object" && !Array.isArray(value));
|
|
493
|
+
}
|
|
494
|
+
async function readRequiredConfig(runDir) {
|
|
495
|
+
const config = await readOptionalJson(runDir, runArtifactFiles.config);
|
|
496
|
+
if (!config) {
|
|
497
|
+
resumeConfigError("Run directory is missing config.json; cannot replay safely.", "resume.config_missing", "Resume only from a DeepThonk run directory with config.json.");
|
|
498
|
+
}
|
|
499
|
+
return config;
|
|
500
|
+
}
|
|
501
|
+
async function readOptionalJson(runDir, fileName) {
|
|
502
|
+
try {
|
|
503
|
+
return JSON.parse(await readFile(join(runDir, fileName), "utf8"));
|
|
504
|
+
}
|
|
505
|
+
catch (error) {
|
|
506
|
+
if (error.code === "ENOENT")
|
|
507
|
+
return undefined;
|
|
508
|
+
throw error;
|
|
509
|
+
}
|
|
510
|
+
}
|
|
511
|
+
function isLiveWorker(pid) {
|
|
512
|
+
if (!pid)
|
|
513
|
+
return true;
|
|
514
|
+
try {
|
|
515
|
+
process.kill(pid, 0);
|
|
516
|
+
return true;
|
|
517
|
+
}
|
|
518
|
+
catch {
|
|
519
|
+
return false;
|
|
520
|
+
}
|
|
521
|
+
}
|
|
193
522
|
export function rankPopulation(population, scores) {
|
|
194
523
|
const order = new Map(scores.map((score) => [score.candidateId, score.rank]));
|
|
195
524
|
return [...population].sort((left, right) => {
|
|
@@ -219,6 +548,23 @@ export function copyElites(elites, generation) {
|
|
|
219
548
|
export function keepPopulationSize(population, n) {
|
|
220
549
|
return population.slice(0, n);
|
|
221
550
|
}
|
|
551
|
+
async function traceDiscardedCandidates(trace, ranked, carriedIds, bottomQuartileIds, generation) {
|
|
552
|
+
for (const candidate of ranked) {
|
|
553
|
+
if (carriedIds.has(candidate.id))
|
|
554
|
+
continue;
|
|
555
|
+
const discardReason = bottomQuartileIds.has(candidate.id) ? "bottom_quartile" : "rounding_trim";
|
|
556
|
+
await trace.writeCandidate({
|
|
557
|
+
...candidate,
|
|
558
|
+
status: "discarded",
|
|
559
|
+
metadata: {
|
|
560
|
+
...candidate.metadata,
|
|
561
|
+
discardReason,
|
|
562
|
+
discardedAt: new Date().toISOString()
|
|
563
|
+
}
|
|
564
|
+
});
|
|
565
|
+
await trace.event({ type: "candidate.discarded", candidate_id: candidate.id, generation, reason: discardReason });
|
|
566
|
+
}
|
|
567
|
+
}
|
|
222
568
|
async function generateInitialPopulation(config, driver, trace, rng, runId, tracker, guards) {
|
|
223
569
|
const limit = pLimit(config.concurrency.generate);
|
|
224
570
|
const jobs = Array.from({ length: config.profile.n }, (_, index) => ({
|
|
@@ -334,6 +680,13 @@ async function judgePairs(args) {
|
|
|
334
680
|
jsonParseFailures += 1;
|
|
335
681
|
}
|
|
336
682
|
}
|
|
683
|
+
if (invalidJson || !parsed) {
|
|
684
|
+
throw new ConfigError(`Judge produced ${jsonParseFailures} consecutive invalid-JSON responses (${args.config.retry.invalidJsonRetries + 1} attempts) for comparison ${job.id}. Refusing to synthesize a tie and pollute the ranking.`, {
|
|
685
|
+
code: "judge.persistent_invalid_json",
|
|
686
|
+
retryable: false,
|
|
687
|
+
fix: "The judge model is producing unparseable output. Inspect the raw response (set output.includeRawModelOutputs: true), switch judge models, or raise retry.invalidJsonRetries if the failures are transient."
|
|
688
|
+
});
|
|
689
|
+
}
|
|
337
690
|
const comparison = {
|
|
338
691
|
id: job.id,
|
|
339
692
|
runId: args.runId,
|
|
@@ -459,8 +812,46 @@ function resultMetadata(result, config, prompt) {
|
|
|
459
812
|
function compactMetadata(metadata) {
|
|
460
813
|
return Object.fromEntries(Object.entries(metadata).filter(([, value]) => value !== undefined));
|
|
461
814
|
}
|
|
815
|
+
function summaryProfile(profile) {
|
|
816
|
+
return {
|
|
817
|
+
n: profile.n,
|
|
818
|
+
k: profile.k,
|
|
819
|
+
t: profile.t,
|
|
820
|
+
m: profile.m,
|
|
821
|
+
lambda: profile.lambda,
|
|
822
|
+
sample_temperature: profile.sampleTemperature,
|
|
823
|
+
mutate_temperature: profile.mutateTemperature,
|
|
824
|
+
judge_temperature: profile.judgeTemperature
|
|
825
|
+
};
|
|
826
|
+
}
|
|
827
|
+
function summaryProfileName(profile) {
|
|
828
|
+
for (const name of ["quick", "balanced", "paper"]) {
|
|
829
|
+
if (profilesEqual(profile, builtInProfiles[name]))
|
|
830
|
+
return name;
|
|
831
|
+
}
|
|
832
|
+
return null;
|
|
833
|
+
}
|
|
834
|
+
function profilesEqual(left, right) {
|
|
835
|
+
return (left.n === right.n &&
|
|
836
|
+
left.k === right.k &&
|
|
837
|
+
left.t === right.t &&
|
|
838
|
+
left.m === right.m &&
|
|
839
|
+
left.lambda === right.lambda &&
|
|
840
|
+
left.sampleTemperature === right.sampleTemperature &&
|
|
841
|
+
left.mutateTemperature === right.mutateTemperature &&
|
|
842
|
+
left.judgeTemperature === right.judgeTemperature);
|
|
843
|
+
}
|
|
844
|
+
function summaryModels(config) {
|
|
845
|
+
return {
|
|
846
|
+
generator: config.generatorModel,
|
|
847
|
+
mutator: config.mutatorModel,
|
|
848
|
+
judge: config.judgeModel,
|
|
849
|
+
finalizer: config.finalizerModel ?? null
|
|
850
|
+
};
|
|
851
|
+
}
|
|
462
852
|
function buildUsageRecord(args) {
|
|
463
853
|
return {
|
|
854
|
+
schema_version: 1,
|
|
464
855
|
ts: new Date().toISOString(),
|
|
465
856
|
phase: args.phase,
|
|
466
857
|
role: args.role,
|
|
@@ -515,4 +906,28 @@ function serializeRunError(error) {
|
|
|
515
906
|
retryable: false
|
|
516
907
|
};
|
|
517
908
|
}
|
|
909
|
+
async function currentPackageVersion() {
|
|
910
|
+
try {
|
|
911
|
+
const packageJson = JSON.parse(await readFile(new URL("../package.json", import.meta.url), "utf8"));
|
|
912
|
+
return packageJson.version ?? "0.0.0";
|
|
913
|
+
}
|
|
914
|
+
catch {
|
|
915
|
+
return "0.0.0";
|
|
916
|
+
}
|
|
917
|
+
}
|
|
918
|
+
function sameMajorMinor(left, right) {
|
|
919
|
+
if (!left)
|
|
920
|
+
return false;
|
|
921
|
+
const parse = (version) => {
|
|
922
|
+
const match = version.match(/^(\d+)\.(\d+)/);
|
|
923
|
+
if (!match)
|
|
924
|
+
return undefined;
|
|
925
|
+
return [Number(match[1]), Number(match[2])];
|
|
926
|
+
};
|
|
927
|
+
const a = parse(left);
|
|
928
|
+
const b = parse(right);
|
|
929
|
+
if (!a || !b)
|
|
930
|
+
return false;
|
|
931
|
+
return a[0] === b[0] && a[1] === b[1];
|
|
932
|
+
}
|
|
518
933
|
//# sourceMappingURL=runner.js.map
|