npm - @deepthonk/core - Versions diffs - 0.1.1 → 0.1.2 - Mend

@deepthonk/core 0.1.1 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

package/dist/runner.js CHANGED Viewed

@@ -1,15 +1,19 @@
 import pLimit from "p-limit";
+import { readFile } from "node:fs/promises";
+import { join } from "node:path";
 import { z } from "zod";
 import { fitBradleyTerry } from "./bradleyTerry.js";
 import { BudgetTracker } from "./budgetTracker.js";
 import { planBudget, validateProfile } from "./budget.js";
 import { BudgetExceededError, CancelledError, ConfigError, DeepThonkError } from "./errors.js";
+import { runArtifactFiles } from "./artifacts.js";
 import { parseJsonObject } from "./json.js";
-import { emptyUsage } from "./lifecycle.js";
+import { claimRunLock, emptyUsage, releaseRunLock } from "./lifecycle.js";
 import { makeKRegularPairs } from "./pairScheduler.js";
 import { comparePrompt, finalizePrompt, generatePrompt, mutatePrompt } from "./prompts.js";
 import { createRng } from "./rng.js";
-import { runConfigSchema, builtInProfiles } from "./schemas.js";
+import { assertNoPruneInProgress, buildPopulationMap, buildResumePlan, groupComparisons, persistPrunedTrace, pruneTraceToPlan, readResumeTrace, reconstructScores, replayBudgetUsage, resolveResumeRunId, resumeConfigError, toResumePlanStatus } from "./resume.js";
+import { runConfigSchema, builtInProfiles, phaseCompletedEventSchema } from "./schemas.js";
 import { aggregateCritiques } from "./critique.js";
 import { TraceStore } from "./traceStore.js";
 const compareOutputSchema = z.object({
@@ -21,16 +25,27 @@ const compareOutputSchema = z.object({
     feedback_b: z.string().optional(),
     selection_reason: z.string().default("")
 });
-export async function runDeepThonk(configInput, driver, control = {}) {
+export async function runDeepThonk(configInput, driver, control = {}, resumeState) {
     const config = runConfigSchema.parse(configInput);
     validateProfile(config.profile);
     enforceBudget(config);
-    const runId = `run_${new Date().toISOString().replace(/[:.]/g, "-")}_${Math.abs(config.seed)}`;
+    const runId = resumeState?.runId ?? `run_${new Date().toISOString().replace(/[:.]/g, "-")}_${Math.abs(config.seed)}`;
     const trace = new TraceStore(config.runDir);
     const rng = createRng(config.seed);
-    const tracker = new BudgetTracker(config);
-    const startedAt = new Date().toISOString();
-    await trace.init(config, runId);
+    const tracker = resumeState?.tracker ?? new BudgetTracker(config);
+    const startedAt = resumeState?.startedAt ?? new Date().toISOString();
+    if (!resumeState) {
+        await trace.init({ ...config, version: await currentPackageVersion() }, runId);
+    }
+    else {
+        await trace.event({
+            type: "run.resumed",
+            run_id: runId,
+            resumed_at: new Date().toISOString(),
+            next_phase: resumeState.nextPhase.phase,
+            generation: resumeState.nextPhase.generation
+        });
+    }
     let stopped = false;
     const writeStatus = async (state, phase, extra = {}) => {
         await trace.writeStatus({
@@ -51,7 +66,7 @@ export async function runDeepThonk(configInput, driver, control = {}) {
             throw new CancelledError(`Run cancelled before ${phase}.`, {
                 code: "run.cancelled",
                 retryable: false,
-                fix: "Start a new run with a fresh output directory. Automatic replay is not implemented yet."
+                fix: "Resume the run with deepthonk resume --continue after the worker has stopped."
             });
         }
     };
@@ -80,40 +95,61 @@ export async function runDeepThonk(configInput, driver, control = {}) {
         }
     };
     try {
-        await writeStatus("running", "initialized");
-        await assertNotCancelled("initial population");
-        let population = await generateInitialPopulation(config, driver, trace, rng, runId, tracker, guards);
-        await trace.writePopulation(0, population);
-        await assertBudget("initial population");
-        await writeStatus("running", "population_completed", { generation: 0 });
+        await writeStatus("running", resumeState ? "resume_replay" : "initialized");
+        let population;
+        if (isResumePhaseCompleted(resumeState, "initial_generation")) {
+            population = resumePopulation(resumeState, 0);
+        }
+        else {
+            await assertNotCancelled("initial population");
+            population = await generateInitialPopulation(config, driver, trace, rng, runId, tracker, guards);
+            await trace.writePopulation(0, population);
+            await assertBudget("initial population");
+            await markPhaseCompleted(trace, "initial_generation");
+            await writeStatus("running", "population_completed", { generation: 0 });
+        }
         for (let gen = 1; gen <= config.profile.t; gen += 1) {
-            await assertNotCancelled(`generation ${gen} comparisons`);
-            await writeStatus("running", "generation_comparisons", { generation: gen });
-            const pairs = makeKRegularPairs(population.map((candidate) => candidate.id), config.profile.k, rng);
-            const comparisons = await judgePairs({
-                config,
-                driver,
-                trace,
-                rng,
-                runId,
-                generation: gen,
-                pairs,
-                population,
-                tracker,
-                guards
-            });
-            const scores = fitBradleyTerry(population, comparisons, config.profile.lambda, gen);
-            await trace.writeScores(gen, scores);
-            await assertBudget(`generation ${gen} comparisons`);
+            if (isResumePhaseCompleted(resumeState, "generation_mutation", gen)) {
+                population = resumePopulation(resumeState, gen);
+                continue;
+            }
+            let comparisons;
+            let scores;
+            if (isResumePhaseCompleted(resumeState, "generation_judging", gen)) {
+                comparisons = resumeComparisons(resumeState, gen);
+                scores = resumeScores(resumeState, gen, population, comparisons, config.profile.lambda);
+            }
+            else {
+                await assertNotCancelled(`generation ${gen} comparisons`);
+                await writeStatus("running", "generation_comparisons", { generation: gen });
+                const judgingRng = phaseRng(config.seed, "generation_judging", gen);
+                const pairs = makeKRegularPairs(population.map((candidate) => candidate.id), config.profile.k, judgingRng);
+                comparisons = await judgePairs({
+                    config,
+                    driver,
+                    trace,
+                    rng: judgingRng,
+                    runId,
+                    generation: gen,
+                    pairs,
+                    population,
+                    tracker,
+                    guards
+                });
+                scores = fitBradleyTerry(population, comparisons, config.profile.lambda, gen);
+                await trace.writeScores(gen, scores);
+                await assertBudget(`generation ${gen} comparisons`);
+                await markPhaseCompleted(trace, "generation_judging", gen);
+            }
             const ranked = rankPopulation(population, scores);
             const elites = topQuartile(ranked);
             const discarded = bottomQuartile(ranked);
             const survivors = ranked.filter((candidate) => !discarded.has(candidate.id));
             const mutationParents = survivors.slice(0, config.profile.n - elites.length);
             const critiquesByCandidate = aggregateCritiques(population, comparisons);
-            await traceDiscardedCandidates(trace, ranked, new Set([...elites, ...mutationParents].map((candidate) => candidate.id)), discarded, gen);
             await assertNotCancelled(`generation ${gen} mutation`);
             await writeStatus("running", "generation_mutation", { generation: gen });
+            await traceDiscardedCandidates(trace, ranked, new Set([...elites, ...mutationParents].map((candidate) => candidate.id)), discarded, gen);
             const mutants = await mutateSurvivors({
                 config,
                 driver,
@@ -132,26 +168,37 @@ export async function runDeepThonk(configInput, driver, control = {}) {
             population = keepPopulationSize([...eliteCopies, ...mutants], config.profile.n);
             await trace.writePopulation(gen, population);
             await assertBudget(`generation ${gen} mutation`);
+            await markPhaseCompleted(trace, "generation_mutation", gen);
             await writeStatus("running", "generation_completed", { generation: gen });
         }
-        await assertNotCancelled("final ranking");
-        await writeStatus("running", "final_ranking", { generation: "final" });
-        const finalPairs = makeKRegularPairs(population.map((candidate) => candidate.id), config.profile.m, rng);
-        const finalComparisons = await judgePairs({
-            config,
-            driver,
-            trace,
-            rng,
-            runId,
-            generation: "final",
-            pairs: finalPairs,
-            population,
-            tracker,
-            guards
-        });
-        const finalScores = fitBradleyTerry(population, finalComparisons, config.profile.lambda, "final");
-        await trace.writeScores("final", finalScores);
-        await assertBudget("final ranking");
+        let finalComparisons;
+        let finalScores;
+        if (isResumePhaseCompleted(resumeState, "final_judging")) {
+            finalComparisons = resumeComparisons(resumeState, "final");
+            finalScores = resumeScores(resumeState, "final", population, finalComparisons, config.profile.lambda);
+        }
+        else {
+            await assertNotCancelled("final ranking");
+            await writeStatus("running", "final_ranking", { generation: "final" });
+            const finalRng = phaseRng(config.seed, "final_judging");
+            const finalPairs = makeKRegularPairs(population.map((candidate) => candidate.id), config.profile.m, finalRng);
+            finalComparisons = await judgePairs({
+                config,
+                driver,
+                trace,
+                rng: finalRng,
+                runId,
+                generation: "final",
+                pairs: finalPairs,
+                population,
+                tracker,
+                guards
+            });
+            finalScores = fitBradleyTerry(population, finalComparisons, config.profile.lambda, "final");
+            await trace.writeScores("final", finalScores);
+            await assertBudget("final ranking");
+            await markPhaseCompleted(trace, "final_judging");
+        }
         const winner = rankPopulation(population, finalScores)[0];
         if (!winner)
             throw new ConfigError("Run produced no winner.");
@@ -176,7 +223,10 @@ export async function runDeepThonk(configInput, driver, control = {}) {
             completed_at: completedAt
         };
         await trace.writeSummary(summary, finalAnswer, winner.content);
+        await markPhaseCompleted(trace, "finalizing");
         await trace.event({ type: "run.completed", winner_id: winner.id, completed_at: completedAt });
+        if (resumeState)
+            await trace.event({ type: "run.resumed_completed", winner_id: winner.id, completed_at: completedAt });
         await writeStatus("completed", "summary", { generation: "final", completed_at: completedAt });
         return {
             runId,
@@ -195,6 +245,280 @@ export async function runDeepThonk(configInput, driver, control = {}) {
         throw error;
     }
 }
+export async function resumeDeepThonk(runDir, driver, options = {}) {
+    const existingSummary = await readOptionalJson(runDir, runArtifactFiles.summary);
+    if (existingSummary) {
+        resumeConfigError("Run already has summary.json; nothing to resume.", "resume.already_complete", "Use deepthonk inspect/result to read the completed run.");
+    }
+    const rawConfig = await readRequiredConfig(runDir);
+    const currentVersion = await currentPackageVersion();
+    if (!sameMajorMinor(typeof rawConfig.version === "string" ? rawConfig.version : undefined, currentVersion)) {
+        resumeConfigError(`Cannot resume run from version ${String(rawConfig.version ?? "missing")}; current package version is ${currentVersion}. Resume requires matching major.minor.`, "resume.version_mismatch", "Start a fresh run with the current DeepThonk version, or resume with a package version whose major.minor matches the trace.");
+    }
+    assertStoredOutputConfigComplete(rawConfig);
+    const parsedConfig = runConfigSchema.parse(rawConfig);
+    const config = { ...parsedConfig, runDir };
+    validateProfile(config.profile);
+    enforceBudget(config);
+    assertResumeProviderMatches(rawConfig, config, driver, options);
+    assertNoPruneInProgress(runDir);
+    const lockRunId = resolveResumeRunId(config, undefined, []);
+    const claimed = await claimRunLock(runDir, lockRunId);
+    if (!claimed) {
+        throw new ConfigError(`Run directory is already claimed: ${runDir}`, {
+            code: "run.directory_locked",
+            retryable: false,
+            fix: "Wait for the active run to finish, or inspect the existing run.lock file."
+        });
+    }
+    try {
+        const status = await readOptionalJson(runDir, runArtifactFiles.status);
+        if ((status?.state === "running" || status?.state === "pending") && isLiveWorker(status.worker_pid)) {
+            resumeConfigError(`Run is still in flight at phase ${status.phase}.`, "resume.in_flight", "Wait for the worker to finish, or cancel/stop it before resuming.");
+        }
+        const trace = await readResumeTrace(runDir);
+        const runId = resolveResumeRunId(config, status, trace.events);
+        const plan = buildResumePlan(config, trace.events);
+        if (plan.nextPhase.phase === "summary") {
+            resumeConfigError("Trace says finalizing completed, but summary.json is missing.", "resume.inconsistent_trace", "Inspect the run directory and restore summary.json, or start a fresh run.");
+        }
+        const planStatus = toResumePlanStatus(runDir, runId, plan);
+        if (options.dryRun)
+            return planStatus;
+        const pruned = pruneTraceToPlan(trace, plan);
+        const populationByGeneration = buildPopulationMap(config, pruned.populations, pruned.candidates, plan);
+        const comparisonsByGeneration = groupComparisons(pruned.comparisons);
+        const scoresByGeneration = reconstructScores(config, populationByGeneration, comparisonsByGeneration, pruned.scores, plan);
+        const tracker = replayBudgetUsage(config, pruned.usage);
+        const startedAt = status?.started_at ?? new Date().toISOString();
+        await new TraceStore(runDir).writeStatus({
+            run_id: runId,
+            run_dir: runDir,
+            state: "running",
+            phase: "resume_planning",
+            usage: cloneUsage(tracker.usage),
+            started_at: startedAt,
+            worker_pid: process.pid,
+            updated_at: new Date().toISOString()
+        });
+        await persistPrunedTrace(runDir, pruned);
+        return await runDeepThonk(config, driver, {}, {
+            runId,
+            startedAt,
+            completed: plan.completed,
+            populationByGeneration,
+            comparisonsByGeneration,
+            scoresByGeneration,
+            tracker,
+            nextPhase: plan.nextPhase
+        });
+    }
+    finally {
+        await releaseRunLock(runDir);
+    }
+}
+async function markPhaseCompleted(trace, phase, generation) {
+    const event = phaseCompletedEventSchema.parse({
+        type: "phase.completed",
+        phase,
+        generation,
+        at: new Date().toISOString()
+    });
+    await trace.event(event);
+}
+function isResumePhaseCompleted(resumeState, phase, generation) {
+    return Boolean(resumeState?.completed.has(resumePhaseKey(phase, generation)));
+}
+function resumePopulation(resumeState, generation) {
+    const population = resumeState?.populationByGeneration.get(generation);
+    if (!population) {
+        throw new ConfigError(`Resume state is missing population generation ${generation}.`, {
+            code: "resume.population_missing",
+            retryable: false,
+            fix: "Inspect the run directory for missing population snapshots."
+        });
+    }
+    return population;
+}
+function resumeComparisons(resumeState, generation) {
+    const comparisons = resumeState?.comparisonsByGeneration.get(generation);
+    if (!comparisons) {
+        throw new ConfigError(`Resume state is missing comparisons for generation ${generation}.`, {
+            code: "resume.comparisons_missing",
+            retryable: false,
+            fix: "Inspect the run directory for missing comparison trace rows."
+        });
+    }
+    return comparisons;
+}
+function resumeScores(resumeState, generation, population, comparisons, lambda) {
+    return resumeState?.scoresByGeneration.get(generation) ?? fitBradleyTerry(population, comparisons, lambda, generation);
+}
+function phaseRng(seed, phase, generation) {
+    return createRng(hashSeed(`${seed}:${phase}:${generation ?? ""}`));
+}
+function hashSeed(value) {
+    let hash = 2166136261;
+    for (let i = 0; i < value.length; i += 1) {
+        hash ^= value.charCodeAt(i);
+        hash = Math.imul(hash, 16777619);
+    }
+    return hash >>> 0;
+}
+function resumePhaseKey(phase, generation) {
+    if ((phase === "generation_judging" || phase === "generation_mutation") && generation !== undefined)
+        return `${phase}:${generation}`;
+    return phase;
+}
+function providerLabel(driver) {
+    const value = driver;
+    if (typeof value.provider === "string")
+        return value.provider;
+    if (typeof value.providerName === "string")
+        return value.providerName;
+    if (typeof value.config?.provider === "string")
+        return value.config.provider;
+    if (value.baseDriver)
+        return providerLabel(value.baseDriver);
+    const constructorName = driver.constructor?.name;
+    if (constructorName === "FakeDriver")
+        return "fake";
+    if (constructorName === "SamplingDriver")
+        return "sampling";
+    if (constructorName === "OpenAiCompatibleDriver")
+        return "openai-compatible";
+    return undefined;
+}
+function assertStoredOutputConfigComplete(rawConfig) {
+    const output = rawConfig.output;
+    if (!isRecord(output) || typeof output.includeRawModelOutputs !== "boolean" || typeof output.includePrompts !== "boolean") {
+        resumeConfigError("Stored config.json is missing the complete output block required for deterministic resume.", "resume.config_incomplete", "Restore output.includeRawModelOutputs and output.includePrompts, or start a fresh run.");
+    }
+}
+function assertResumeProviderMatches(rawConfig, config, driver, options) {
+    const runtimeProvider = options.provider ?? providerLabel(driver) ?? config.provider;
+    if (runtimeProvider !== config.provider) {
+        providerMismatch(`Cannot resume provider ${config.provider} with runtime provider ${runtimeProvider}.`);
+    }
+    const expectedRoutes = expectedProviderRoutes(rawConfig, config);
+    const actualRoutes = runtimeProviderRoutes(driver);
+    for (const role of providerRoles) {
+        const expected = expectedRoutes[role];
+        if (!expected)
+            continue;
+        const actual = actualRoutes[role];
+        if (!actual) {
+            providerMismatch(`Cannot resume ${role} route ${routeLabel(expected)} without a matching runtime route.`);
+        }
+        if (expected.provider !== undefined && actual.provider !== expected.provider) {
+            providerMismatch(`Cannot resume ${role} route provider ${expected.provider} with runtime provider ${actual.provider ?? "missing"}.`);
+        }
+        if (expected.baseUrl !== undefined && normalizeBaseUrl(actual.baseUrl) !== normalizeBaseUrl(expected.baseUrl)) {
+            providerMismatch(`Cannot resume ${role} route baseUrl ${expected.baseUrl} with runtime baseUrl ${actual.baseUrl ?? "missing"}.`);
+        }
+        if (expected.model !== undefined && actual.model !== expected.model) {
+            providerMismatch(`Cannot resume ${role} route model ${expected.model} with runtime model ${actual.model ?? "missing"}.`);
+        }
+    }
+}
+const providerRoles = ["generator", "mutator", "judge", "finalizer"];
+function expectedProviderRoutes(rawConfig, config) {
+    const providers = rawConfig.providers;
+    if (!isRecord(providers))
+        return {};
+    const routes = {};
+    for (const role of providerRoles) {
+        const rawRoute = providers[role];
+        if (!isRecord(rawRoute))
+            continue;
+        routes[role] = {
+            provider: stringValue(rawRoute.provider) ?? config.provider,
+            baseUrl: stringValue(rawRoute.baseUrl) ?? stringValue(rawRoute.base_url),
+            model: stringValue(rawRoute.model) ?? modelForRole(config, role)
+        };
+    }
+    return routes;
+}
+function runtimeProviderRoutes(driver) {
+    const routeTable = driver.routes;
+    if (!isRecord(routeTable))
+        return {};
+    const routes = {};
+    for (const role of providerRoles) {
+        const route = routeTable[role];
+        if (!isRecord(route))
+            continue;
+        const routeDriver = route.driver;
+        routes[role] = {
+            provider: routeDriver ? providerLabel(routeDriver) : undefined,
+            baseUrl: routeDriver ? providerBaseUrl(routeDriver) : undefined,
+            model: stringValue(route.model)
+        };
+    }
+    return routes;
+}
+function providerBaseUrl(driver) {
+    const value = driver;
+    if (typeof value.baseUrl === "string")
+        return value.baseUrl;
+    if (typeof value.config?.baseUrl === "string")
+        return value.config.baseUrl;
+    if (value.baseDriver)
+        return providerBaseUrl(value.baseDriver);
+    return undefined;
+}
+function modelForRole(config, role) {
+    if (role === "generator")
+        return config.generatorModel;
+    if (role === "mutator")
+        return config.mutatorModel;
+    if (role === "judge")
+        return config.judgeModel;
+    return config.finalizerModel;
+}
+function routeLabel(route) {
+    return [route.provider, route.baseUrl, route.model].filter(Boolean).join("/") || "provider route";
+}
+function providerMismatch(message) {
+    resumeConfigError(message, "resume.provider_mismatch", "Use the same provider configuration that created config.json.");
+}
+function normalizeBaseUrl(value) {
+    return value?.replace(/\/+$/, "");
+}
+function stringValue(value) {
+    return typeof value === "string" && value.length > 0 ? value : undefined;
+}
+function isRecord(value) {
+    return Boolean(value && typeof value === "object" && !Array.isArray(value));
+}
+async function readRequiredConfig(runDir) {
+    const config = await readOptionalJson(runDir, runArtifactFiles.config);
+    if (!config) {
+        resumeConfigError("Run directory is missing config.json; cannot replay safely.", "resume.config_missing", "Resume only from a DeepThonk run directory with config.json.");
+    }
+    return config;
+}
+async function readOptionalJson(runDir, fileName) {
+    try {
+        return JSON.parse(await readFile(join(runDir, fileName), "utf8"));
+    }
+    catch (error) {
+        if (error.code === "ENOENT")
+            return undefined;
+        throw error;
+    }
+}
+function isLiveWorker(pid) {
+    if (!pid)
+        return true;
+    try {
+        process.kill(pid, 0);
+        return true;
+    }
+    catch {
+        return false;
+    }
+}
 export function rankPopulation(population, scores) {
     const order = new Map(scores.map((score) => [score.candidateId, score.rank]));
     return [...population].sort((left, right) => {
@@ -356,6 +680,13 @@ async function judgePairs(args) {
                 jsonParseFailures += 1;
             }
         }
+        if (invalidJson || !parsed) {
+            throw new ConfigError(`Judge produced ${jsonParseFailures} consecutive invalid-JSON responses (${args.config.retry.invalidJsonRetries + 1} attempts) for comparison ${job.id}. Refusing to synthesize a tie and pollute the ranking.`, {
+                code: "judge.persistent_invalid_json",
+                retryable: false,
+                fix: "The judge model is producing unparseable output. Inspect the raw response (set output.includeRawModelOutputs: true), switch judge models, or raise retry.invalidJsonRetries if the failures are transient."
+            });
+        }
         const comparison = {
             id: job.id,
             runId: args.runId,
@@ -575,4 +906,28 @@ function serializeRunError(error) {
         retryable: false
     };
 }
+async function currentPackageVersion() {
+    try {
+        const packageJson = JSON.parse(await readFile(new URL("../package.json", import.meta.url), "utf8"));
+        return packageJson.version ?? "0.0.0";
+    }
+    catch {
+        return "0.0.0";
+    }
+}
+function sameMajorMinor(left, right) {
+    if (!left)
+        return false;
+    const parse = (version) => {
+        const match = version.match(/^(\d+)\.(\d+)/);
+        if (!match)
+            return undefined;
+        return [Number(match[1]), Number(match[2])];
+    };
+    const a = parse(left);
+    const b = parse(right);
+    if (!a || !b)
+        return false;
+    return a[0] === b[0] && a[1] === b[1];
+}
 //# sourceMappingURL=runner.js.map