npm - @lingjingai/scriptctl - Versions diffs - 0.11.3 → 0.11.5 - Mend

@lingjingai/scriptctl 0.11.3 → 0.11.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (23) hide show

package/dist/cli.js +17 -2
package/dist/cli.js.map +1 -1
package/dist/common.d.ts +1 -1
package/dist/common.js +29 -14
package/dist/common.js.map +1 -1
package/dist/domain/direct-core.d.ts +13 -8
package/dist/domain/direct-core.js +53 -13
package/dist/domain/direct-core.js.map +1 -1
package/dist/domain/script-core.d.ts +1 -0
package/dist/domain/script-core.js +11 -2
package/dist/domain/script-core.js.map +1 -1
package/dist/help-text.js +2 -2
package/dist/infra/providers.js +16 -0
package/dist/infra/providers.js.map +1 -1
package/dist/usecases/direct.d.ts +5 -3
package/dist/usecases/direct.js +416 -566
package/dist/usecases/direct.js.map +1 -1
package/dist/usecases/parse.d.ts +15 -0
package/dist/usecases/parse.js +324 -0
package/dist/usecases/parse.js.map +1 -0
package/dist/usecases/script.js +25 -3
package/dist/usecases/script.js.map +1 -1
package/package.json +1 -1

package/dist/usecases/direct.js CHANGED Viewed

@@ -1,7 +1,7 @@
 import * as fs from "node:fs";
 import * as path from "node:path";
-import { CliError, DEFAULT_BATCH_MAX_CHARS, DEFAULT_BATCH_MIN_LINES, DEFAULT_BATCH_MODE, DEFAULT_BATCH_TARGET_LINES, DEFAULT_CONCURRENCY, DEFAULT_MODEL, DEFAULT_PROVIDER, DIRECT_CONTRACT_VERSION, EXIT_INPUT, EXIT_NEEDS_AGENT, EXIT_OK, EXIT_RUNTIME, EXIT_USAGE, PARSE_MD_SPEC, REVIEW_TARGETS, SUPPORTED_EXTS, deletePath, deleteTree, directDir, exists, fmtId, readJson, readText, sha256Text, writeJson, } from "../common.js";
-import { compactBatchResult, compactEpisodeResult, buildBatchPlan, buildEpisodePlan, enrichEpisodePlanTitles, extractBatchWithRecovery, mergeEpisodeResults, normalizeEpisodeResult, normalizeInt, parseAssetDoc, parseMarkdownBatch, recoverBatchFromSource, uniqueAdd, validateBatchExtractionQuality, validateEpisodeExtractionQuality, _md_push_asset, curateScriptAssets, applyMetadataToScript, } from "../domain/direct-core.js";
+import { CliError, DEFAULT_BATCH_MAX_CHARS, DEFAULT_BATCH_MIN_LINES, DEFAULT_BATCH_MODE, DEFAULT_BATCH_TARGET_LINES, DEFAULT_CONCURRENCY, DEFAULT_MODEL, DEFAULT_PROVIDER, DIRECT_CONTRACT_VERSION, EXIT_INPUT, EXIT_NEEDS_AGENT, EXIT_OK, EXIT_RUNTIME, EXIT_USAGE, REVIEW_TARGETS, SUPPORTED_EXTS, deletePath, directDir, exists, fmtId, readJson, readText, sha256Text, writeJson, } from "../common.js";
+import { compactBatchResult, compactEpisodeResult, buildBatchPlan, buildEpisodePlan, classifyProviderError, enrichEpisodePlanTitles, extractBatchWithRecovery, mergeEpisodeResults, normalizeEpisodeResult, normalizeInt, recoverBatchFromSource, uniqueAdd, validateBatchExtractionQuality, validateEpisodeExtractionQuality, _md_push_asset, curateScriptAssets, applyMetadataToScript, } from "../domain/direct-core.js";
 import { validateScript } from "../domain/script-core.js";
 import { makeProvider } from "../infra/providers.js";
 import { makeSourceManifest, prepareSource, } from "../infra/converters.js";
@@ -54,18 +54,6 @@ export function readRunState(workspace) {
         return {};
     }
 }
-function failureSignature(items) {
-    if (!isList(items))
-        return [];
-    const out = [];
-    for (const item of items) {
-        const s = strOf(item).trim();
-        if (s)
-            out.push(s);
-    }
-    out.sort();
-    return out;
-}
 export function addInspectedTarget(workspace, target) {
     const state = readRunState(workspace);
     const targets = [];
@@ -147,9 +135,6 @@ function episodeErrorPath(dir, ep) {
 function episodeResultKey(ep) {
     return `ep_${pad3(Number(ep["episode"]))}`;
 }
-function episodeResultsIndexPath(dir) {
-    return path.join(dir, "index.json");
-}
 function batchResultKey(batch) {
     const bid = strOf(batch["batch_id"]).trim();
     if (bid)
@@ -165,9 +150,6 @@ function batchMarkdownPath(dir, batch) {
 function batchErrorPath(dir, batch) {
     return path.join(dir, `${batchResultKey(batch)}.error.json`);
 }
-function batchResultsIndexPath(dir) {
-    return path.join(dir, "index.json");
-}
 function persistBatchResult(dir, batch, result) {
     const rawMd = result["_raw_markdown"];
     delete result["_raw_markdown"];
@@ -181,130 +163,81 @@ function persistBatchResult(dir, batch, result) {
         deletePath(mdPath);
     }
 }
-function readBatchResultsIndex(dir) {
-    const p = batchResultsIndexPath(dir);
-    if (!exists(p))
-        return { version: 1, batches: {} };
-    let data;
-    try {
-        data = readJson(p);
-    }
-    catch {
-        return { version: 1, batches: {} };
-    }
-    if (!isDict(data))
-        return { version: 1, batches: {} };
-    if (!isDict(data["batches"]))
-        data["batches"] = {};
-    if (!("version" in data))
-        data["version"] = 1;
-    return data;
-}
-function writeBatchResultsIndex(dir, index) {
-    writeJson(batchResultsIndexPath(dir), index);
-}
-function updateBatchResultMetadata(dir, batch, providerName, model) {
-    const index = readBatchResultsIndex(dir);
-    const batches = index["batches"] ?? {};
-    batches[batchResultKey(batch)] = {
-        episode: Number(batch["episode"]),
-        part: Number(batch["part"]),
-        provider: providerName,
-        model,
-        extracted_at: checkpointTimestamp(),
-    };
-    index["batches"] = batches;
-    writeBatchResultsIndex(dir, index);
+function episodeMetaPath(dir, ep) {
+    return path.join(dir, `${episodeResultKey(ep)}.meta.json`);
 }
-function removeBatchResultMetadata(dir, batch) {
-    const index = readBatchResultsIndex(dir);
-    const batches = index["batches"] ?? {};
-    const key = batchResultKey(batch);
-    if (key in batches) {
-        delete batches[key];
-        index["batches"] = batches;
-        writeBatchResultsIndex(dir, index);
-    }
+function batchMetaPath(dir, batch) {
+    return path.join(dir, `${batchResultKey(batch)}.meta.json`);
 }
-function readEpisodeResultsIndex(dir) {
-    const p = episodeResultsIndexPath(dir);
-    if (!exists(p))
-        return { version: 1, episodes: {} };
-    let data;
+function readUnitMeta(metaPath) {
+    if (!exists(metaPath))
+        return null;
     try {
-        data = readJson(p);
+        const data = readJson(metaPath);
+        return isDict(data) ? data : null;
     }
     catch {
-        return { version: 1, episodes: {} };
-    }
-    if (!isDict(data))
-        return { version: 1, episodes: {} };
-    if (!isDict(data["episodes"]))
-        data["episodes"] = {};
-    if (!("version" in data))
-        data["version"] = 1;
-    return data;
+        return null;
+    }
 }
-function writeEpisodeResultsIndex(dir, index) {
-    writeJson(episodeResultsIndexPath(dir), index);
+function writeUnitMeta(metaPath, meta) {
+    fs.mkdirSync(path.dirname(metaPath), { recursive: true });
+    writeJson(metaPath, meta);
 }
-function updateEpisodeResultMetadata(dir, ep, providerName, model) {
-    const index = readEpisodeResultsIndex(dir);
-    const episodes = index["episodes"] ?? {};
-    episodes[episodeResultKey(ep)] = {
-        provider: providerName,
-        model,
-        extracted_at: checkpointTimestamp(),
-    };
-    index["episodes"] = episodes;
-    writeEpisodeResultsIndex(dir, index);
+function removeUnitMeta(metaPath) {
+    if (exists(metaPath))
+        deletePath(metaPath);
 }
-function removeEpisodeResultMetadata(dir, ep) {
-    const index = readEpisodeResultsIndex(dir);
-    const episodes = index["episodes"] ?? {};
-    const key = episodeResultKey(ep);
-    if (key in episodes) {
-        delete episodes[key];
-        index["episodes"] = episodes;
-        writeEpisodeResultsIndex(dir, index);
-    }
+function stampEpisodeMeta(dir, ep, inputHash, provenance, providerName, model) {
+    writeUnitMeta(episodeMetaPath(dir, ep), {
+        schema: 1, key: episodeResultKey(ep), episode: Number(ep["episode"]),
+        input_hash: inputHash, provenance, status: "ok", provider: providerName, model, extracted_at: checkpointTimestamp(),
+    });
 }
-function compactResultHasMultiRefs(data) {
-    for (const scene of asList(data["sc"])) {
-        if (!isDict(scene))
+function stampBatchMeta(dir, batch, inputHash, provenance, providerName, model) {
+    writeUnitMeta(batchMetaPath(dir, batch), {
+        schema: 1, key: batchResultKey(batch), episode: Number(batch["episode"]), part: Number(batch["part"]),
+        input_hash: inputHash, provenance, status: "ok", provider: providerName, model, extracted_at: checkpointTimestamp(),
+    });
+}
+// Content-address a single episode/batch plan unit: the contract version, the
+// exact source span text, the title-stable plan item, and provider/model. Any
+// change to what would alter extraction rotates the hash for THAT unit only.
+export function computeUnitHash(sourceText, unit, providerName, model) {
+    const span = isDict(unit["source_span"]) ? unit["source_span"] : {};
+    const start = Number(span["start"] ?? 0);
+    const end = Number(span["end"] ?? 0);
+    const spanText = sourceText.slice(start, end);
+    const planText = JSON.stringify(unit, checkpointReplacer());
+    return sha256Text([String(DIRECT_CONTRACT_VERSION), spanText, planText, providerName ?? "", model ?? ""].join("\u0000"));
+}
+// Delete result/meta/error/markdown files whose unit key is no longer in the
+// current plan (e.g. the source shed an episode). Pure function of the plan —
+// it never inspects hashes, content, or run_state, so it can only remove units
+// the plan no longer references. Also retires the legacy v3 `index.json`.
+function gcOrphanUnits(dir, liveKeys) {
+    if (!exists(dir))
+        return [];
+    const removed = [];
+    for (const name of fs.readdirSync(dir)) {
+        if (name === "index.json") {
+            deletePath(path.join(dir, name));
+            removed.push(name);
             continue;
-        for (const action of asList(scene["a"])) {
-            if (!isDict(action))
-                continue;
-            const refs = action["r"];
-            if (isList(refs) && refs.length > 1)
-                return true;
+        }
+        const key = name.replace(/\.(meta\.json|error\.json|json|md)$/, "");
+        if (key === name)
+            continue; // not a recognized unit artifact
+        if (!liveKeys.has(key)) {
+            deletePath(path.join(dir, name));
+            removed.push(name);
         }
     }
-    return false;
-}
-export function initCheckpoint(sourceText, plan) {
-    const planText = JSON.stringify(plan, checkpointReplacer());
-    return {
-        contract_version: DIRECT_CONTRACT_VERSION,
-        source_sha256: sha256Text(sourceText),
-        episode_plan_sha256: sha256Text(planText),
-        total_episodes: Number(plan["total_episodes"] ?? asList(plan["episodes"]).length),
-    };
-}
-export function initBatchCheckpoint(sourceText, batchPlan) {
-    const planText = JSON.stringify(batchPlan, checkpointReplacer());
-    return {
-        contract_version: DIRECT_CONTRACT_VERSION,
-        source_sha256: sha256Text(sourceText),
-        batch_plan_sha256: sha256Text(planText),
-        total_batches: Number(batchPlan["total_batches"] ?? asList(batchPlan["batches"]).length),
-    };
+    return removed;
 }
 // Title fields are LLM-mutated downstream by enrichEpisodePlanTitles, so they
-// must be excluded from checkpoint hashes — otherwise every rerun gets a fresh
-// SHA, the previous checkpoint never matches, and the whole batch pipeline
-// re-extracts from scratch.
+// must be excluded from unit hashes — otherwise every rerun gets a fresh SHA,
+// the cached unit never matches, and that unit re-extracts from scratch.
 const CHECKPOINT_UNSTABLE_KEYS = new Set(["title", "generated_title", "title_status", "title_source"]);
 function checkpointReplacer() {
     // Python's json.dumps(sort_keys=True) sorts keys recursively. Replicate by walking and sorting.
@@ -323,121 +256,47 @@ function checkpointReplacer() {
         return value;
     };
 }
-function checkpointSourceMatches(previous, current) {
-    if (!previous || Object.keys(previous).length === 0)
-        return false;
-    const keys = ["contract_version", "source_sha256", "episode_plan_sha256", "total_episodes"];
-    return keys.every((k) => previous[k] === current[k]);
-}
-function batchCheckpointMatches(previous, current) {
-    if (!previous || Object.keys(previous).length === 0)
-        return false;
-    const keys = ["contract_version", "source_sha256", "batch_plan_sha256", "total_batches"];
-    return keys.every((k) => previous[k] === current[k]);
-}
-function resetInitOutputs(dd) {
-    for (const dirname of ["episode_results", "batch_results"]) {
-        const target = path.join(dd, dirname);
-        if (exists(target))
-            deleteTree(target);
-    }
-    for (const name of ["script.initial.json", "validation.json", "batch_plan.json", "asset_curation.json", "asset_metadata.json"]) {
-        const p = path.join(dd, name);
-        if (exists(p))
-            deletePath(p);
-    }
-}
-function resetBatchOutputs(dd) {
-    const batchResultsDir = path.join(dd, "batch_results");
-    if (exists(batchResultsDir))
-        deleteTree(batchResultsDir);
-}
-function loadCheckpointedEpisode(sourceText, episodeResultsDir, ep, providerName, model, previousProvider) {
+// Non-destructive reuse: a cached episode result is reusable iff its sidecar
+// meta records the same input_hash we compute for the current plan unit. The
+// hash subsumes the old provider / source_span / episode-number / contract
+// checks — any of those changing rotates the hash. On any mismatch or read
+// failure we return null and let the caller re-extract and overwrite; we NEVER
+// delete the cached file pre-emptively (that was the data-loss root cause).
+export function loadCachedEpisode(sourceText, episodeResultsDir, ep, expectedHash) {
+    const meta = readUnitMeta(episodeMetaPath(episodeResultsDir, ep));
+    if (!meta || meta["input_hash"] !== expectedHash)
+        return null;
+    if (meta["status"] === "terminal")
+        return null;
     const p = episodeResultPath(episodeResultsDir, ep);
     if (!exists(p))
         return null;
-    let result;
     try {
-        const data = readJson(p);
-        const metadata = isDict(data["_scriptctl"]) ? data["_scriptctl"] : {};
-        const index = readEpisodeResultsIndex(episodeResultsDir);
-        let indexEntry = {};
-        const eps = index["episodes"];
-        if (isDict(eps)) {
-            const entry = eps[episodeResultKey(ep)];
-            if (isDict(entry))
-                indexEntry = entry;
-        }
-        const resultProvider = strOf(metadata["provider"] || indexEntry["provider"] || previousProvider).trim();
-        if (providerName && resultProvider && resultProvider !== providerName) {
-            throw new Error(`checkpoint provider mismatch: ${resultProvider} != ${providerName}`);
-        }
-        result = normalizeEpisodeResult(data, ep);
+        const result = normalizeEpisodeResult(readJson(p), ep);
         validateEpisodeExtractionQuality(sourceText, ep, result);
-        if (!("sc" in data) || ["episode", "title", "source_span", "_scriptctl"].some((k) => k in data)) {
-            writeJson(p, compactEpisodeResult(result));
-            if (providerName && model)
-                updateEpisodeResultMetadata(episodeResultsDir, ep, providerName, model);
-        }
+        return result;
     }
     catch {
-        try {
-            deletePath(p);
-        }
-        catch {
-            // ignore
-        }
-        removeEpisodeResultMetadata(episodeResultsDir, ep);
         return null;
     }
-    if (Number(result["episode"] ?? 0) !== Number(ep["episode"]))
+}
+export function loadCachedBatch(sourceText, batchResultsDir, batch, expectedHash) {
+    const meta = readUnitMeta(batchMetaPath(batchResultsDir, batch));
+    if (!meta || meta["input_hash"] !== expectedHash)
         return null;
-    if (JSON.stringify(result["source_span"]) !== JSON.stringify(ep["source_span"]))
+    if (meta["status"] === "terminal")
         return null;
-    return result;
-}
-function loadCheckpointedBatch(sourceText, batchResultsDir, batch, providerName, model, previousProvider) {
     const p = batchResultPath(batchResultsDir, batch);
     if (!exists(p))
         return null;
-    let result;
     try {
-        const data = readJson(p);
-        const index = readBatchResultsIndex(batchResultsDir);
-        let indexEntry = {};
-        const batches = index["batches"];
-        if (isDict(batches)) {
-            const entry = batches[batchResultKey(batch)];
-            if (isDict(entry))
-                indexEntry = entry;
-        }
-        const resultProvider = strOf(indexEntry["provider"] || previousProvider).trim();
-        if (providerName && resultProvider && resultProvider !== providerName) {
-            throw new Error(`checkpoint provider mismatch: ${resultProvider} != ${providerName}`);
-        }
-        result = normalizeEpisodeResult(data, batch);
+        const result = normalizeEpisodeResult(readJson(p), batch);
         validateBatchExtractionQuality(sourceText, batch, result);
-        if (!("sc" in data) || compactResultHasMultiRefs(data) || ["episode", "title", "source_span", "_scriptctl"].some((k) => k in data)) {
-            persistBatchResult(batchResultsDir, batch, result);
-            if (providerName && model)
-                updateBatchResultMetadata(batchResultsDir, batch, providerName, model);
-        }
+        return result;
     }
     catch {
-        try {
-            deletePath(p);
-        }
-        catch {
-            // ignore
-        }
-        removeBatchResultMetadata(batchResultsDir, batch);
         return null;
     }
-    if (Number(result["episode"] ?? 0) !== Number(batch["episode"]))
-        return null;
-    if (JSON.stringify(result["source_span"]) !== JSON.stringify(batch["source_span"]))
-        return null;
-    return result;
 }
 function mergeScene(target, source) {
     if ((target["location_name"] === "" || target["location_name"] === "未知场景" || target["location_name"] === null || target["location_name"] === undefined) &&
@@ -530,33 +389,9 @@ async function providerExtractAssetCurationLocal(provider, sourceText, script) {
     }
     return {};
 }
-function writeEpisodeFailure(dir, ep, exc) {
-    const err = exc;
-    const error = {
-        episode: Number(ep["episode"]),
-        title: ep["title"],
-        source_span: ep["source_span"],
-        error_type: err?.name || "Error",
-        message: (err?.message || err?.name || "Error").slice(0, 500),
-        failed_at: checkpointTimestamp(),
-    };
-    if (exc instanceof CliError) {
-        if (exc.required.length > 0)
-            error["required"] = exc.required;
-        if (exc.received.length > 0)
-            error["received"] = exc.received;
-        if (exc.nextSteps.length > 0)
-            error["next"] = exc.nextSteps;
-    }
-    const resultPath = episodeResultPath(dir, ep);
-    if (exists(resultPath))
-        deletePath(resultPath);
-    removeEpisodeResultMetadata(dir, ep);
-    writeJson(episodeErrorPath(dir, ep), error);
-    return error;
-}
-function writeBatchFailure(dir, batch, exc) {
+function writeBatchFailure(dir, batch, exc, inputHash, providerName, model) {
     const err = exc;
+    const terminal = classifyProviderError(exc) === "terminal";
     const error = {
         batch_id: batchResultKey(batch),
         episode: Number(batch["episode"]),
@@ -565,6 +400,8 @@ function writeBatchFailure(dir, batch, exc) {
         line_range: batch["line_range"],
         error_type: err?.name || "Error",
         message: (err?.message || err?.name || "Error").slice(0, 500),
+        terminal,
+        input_hash: inputHash,
         failed_at: checkpointTimestamp(),
     };
     if (exc instanceof CliError) {
@@ -578,7 +415,16 @@ function writeBatchFailure(dir, batch, exc) {
     const resultPath = batchResultPath(dir, batch);
     if (exists(resultPath))
         deletePath(resultPath);
-    removeBatchResultMetadata(dir, batch);
+    if (terminal) {
+        writeUnitMeta(batchMetaPath(dir, batch), {
+            schema: 1, key: batchResultKey(batch), episode: Number(batch["episode"]), part: Number(batch["part"]),
+            input_hash: inputHash, provenance: "extracted", status: "terminal",
+            provider: providerName, model, extracted_at: checkpointTimestamp(),
+        });
+    }
+    else {
+        removeUnitMeta(batchMetaPath(dir, batch));
+    }
     writeJson(batchErrorPath(dir, batch), error);
     return error;
 }
@@ -633,6 +479,9 @@ export async function commandInit(opts) {
     const workspace = strOf(opts["workspace_path"] || "workspace");
     const providerName = strOf(opts["provider"] || DEFAULT_PROVIDER);
     const model = strOf(opts["model"] || process.env.SCRIPTCTL_ANTHROPIC_MODEL || DEFAULT_MODEL);
+    // When set, retry batches a prior run marked terminal (content-filtered)
+    // instead of skipping them — e.g. after the provider's filter was adjusted.
+    const retryTerminal = Boolean(opts["retry_terminal"]);
     let concurrency;
     try {
         concurrency = parseInt(strOf(opts["concurrency"] || DEFAULT_CONCURRENCY), 10);
@@ -717,7 +566,6 @@ export async function commandInit(opts) {
     }
     const dd = directDir(workspace);
     fs.mkdirSync(dd, { recursive: true });
-    const previousStateBeforeInit = readRunState(workspace);
     updateRunState(workspace, {
         status: "init_running",
         command: "direct init",
@@ -830,17 +678,6 @@ export async function commandInit(opts) {
             nextSteps: ["Inspect workspace/source.txt and episode_plan.json, then rerun init."],
         });
     }
-    const checkpoint = initCheckpoint(sourceText, plan);
-    const batchCheckpoint = initBatchCheckpoint(sourceText, batchPlan);
-    const previousState = previousStateBeforeInit;
-    const previousCheckpoint = isDict(previousState["checkpoint"]) ? previousState["checkpoint"] : {};
-    const previousBatchCheckpoint = isDict(previousState["batch_checkpoint"]) ? previousState["batch_checkpoint"] : {};
-    const checkpointReused = checkpointSourceMatches(previousCheckpoint, checkpoint);
-    const batchCheckpointReused = checkpointReused && batchCheckpointMatches(previousBatchCheckpoint, batchCheckpoint);
-    if (!checkpointReused)
-        resetInitOutputs(dd);
-    else if (!batchCheckpointReused)
-        resetBatchOutputs(dd);
     writeJson(path.join(dd, "source_manifest.json"), manifest);
     writeJson(path.join(dd, "episode_plan.json"), plan);
     writeJson(path.join(dd, "batch_plan.json"), batchPlan);
@@ -848,13 +685,15 @@ export async function commandInit(opts) {
     const batchResultsDir = path.join(dd, "batch_results");
     fs.mkdirSync(episodeResultsDir, { recursive: true });
     fs.mkdirSync(batchResultsDir, { recursive: true });
+    // Non-destructive GC: drop result/meta/error/md files for units the current
+    // plan no longer references (e.g. the source shed an episode). Pure function
+    // of the plan — it never touches a unit the plan still references, and retires
+    // the legacy v3 index.json. There is no whole-directory reset any more.
+    gcOrphanUnits(episodeResultsDir, new Set(asList(plan["episodes"]).map((ep) => episodeResultKey(ep))));
+    gcOrphanUnits(batchResultsDir, new Set(asList(batchPlan["batches"]).map((b) => batchResultKey(b))));
     updateRunState(workspace, {
         status: "init_running",
         init_stage: "batch_extract",
-        checkpoint,
-        batch_checkpoint: batchCheckpoint,
-        checkpoint_reused: checkpointReused,
-        batch_checkpoint_reused: batchCheckpointReused,
         batch_mode: batchMode,
         batch_target_lines: batchTargetLines,
         batch_max_chars: batchMaxChars,
@@ -873,11 +712,11 @@ export async function commandInit(opts) {
             batchesByEpisode.set(epNum, []);
         batchesByEpisode.get(epNum).push(batch);
     }
-    const previousProvider = strOf(previousState["provider"]).trim() || null;
+    // Per-unit reuse: each episode is judged independently by its own input hash,
+    // so a source edit to one episode invalidates only that episode — not all 33.
     for (const episode of asList(plan["episodes"])) {
-        const cached = checkpointReused
-            ? loadCheckpointedEpisode(sourceText, episodeResultsDir, episode, providerName, model, previousProvider)
-            : null;
+        const epHash = computeUnitHash(sourceText, episode, providerName, model);
+        const cached = loadCachedEpisode(sourceText, episodeResultsDir, episode, epHash);
         if (cached !== null) {
             results.push(cached);
             skipped.push(Number(episode["episode"]));
@@ -887,7 +726,7 @@ export async function commandInit(opts) {
                 if (!exists(batchResultPath(batchResultsDir, cachedBatch))) {
                     const backfilled = recoverBatchFromSource(sourceText, cachedBatch);
                     persistBatchResult(batchResultsDir, cachedBatch, backfilled);
-                    updateBatchResultMetadata(batchResultsDir, cachedBatch, providerName, model);
+                    stampBatchMeta(batchResultsDir, cachedBatch, computeUnitHash(sourceText, cachedBatch, providerName, model), "recovered", providerName, model);
                 }
                 const errorPath = batchErrorPath(batchResultsDir, cachedBatch);
                 if (exists(errorPath))
@@ -900,11 +739,19 @@ export async function commandInit(opts) {
     }
     const batchResults = [];
     const skippedBatches = [];
+    const terminalSkipped = [];
     const pending = [];
     for (const batch of pendingBatches) {
-        const cachedBatch = batchCheckpointReused
-            ? loadCheckpointedBatch(sourceText, batchResultsDir, batch, providerName, model, previousProvider)
-            : null;
+        const bHash = computeUnitHash(sourceText, batch, providerName, model);
+        // A terminal failure (content filter) with the same input hash will fail the
+        // same way — skip it instead of re-calling the provider, unless --retry-terminal
+        // or the source/provider changed (which rotates the hash).
+        const meta = readUnitMeta(batchMetaPath(batchResultsDir, batch));
+        if (!retryTerminal && meta && meta["status"] === "terminal" && meta["input_hash"] === bHash) {
+            terminalSkipped.push(batchResultKey(batch));
+            continue;
+        }
+        const cachedBatch = loadCachedBatch(sourceText, batchResultsDir, batch, bHash);
         if (cachedBatch !== null) {
             cachedBatch["_batch_id"] = batchResultKey(batch);
             cachedBatch["_batch_part"] = Number(batch["part"]);
@@ -931,12 +778,12 @@ export async function commandInit(opts) {
             result["_starts_inside_scene"] = Boolean(batch["starts_inside_scene"]);
             batchResults.push(result);
             persistBatchResult(batchResultsDir, batch, result);
-            updateBatchResultMetadata(batchResultsDir, batch, providerName, model);
+            stampBatchMeta(batchResultsDir, batch, computeUnitHash(sourceText, batch, providerName, model), "extracted", providerName, model);
             if (exists(errorPath))
                 deletePath(errorPath);
         }
         else {
-            failures.push(writeBatchFailure(batchResultsDir, batch, outcome.error));
+            failures.push(writeBatchFailure(batchResultsDir, batch, outcome.error, computeUnitHash(sourceText, batch, providerName, model), providerName, model));
         }
     }
     results.sort((a, b) => Number(a["episode"] ?? 0) - Number(b["episode"] ?? 0));
@@ -955,82 +802,94 @@ export async function commandInit(opts) {
         return Number(a["part"] ?? 0) - Number(b["part"] ?? 0);
     });
     const completedBatches = skippedEpisodeBatchCount + batchResults.length;
-    if (failures.length > 0) {
-        const failedEpisodes = [...new Set(failures.map((it) => Number(it["episode"])))].sort((a, b) => a - b);
-        const failedBatches = failures.map((it) => strOf(it["batch_id"]));
-        const currentFailureSignature = failureSignature(failedBatches);
-        const previousFailureSignature = failureSignature(previousState["failed_batches"]);
-        const sameFailuresRepeated = checkpointReused &&
-            batchCheckpointReused &&
-            currentFailureSignature.length > 0 &&
-            currentFailureSignature.length === previousFailureSignature.length &&
-            currentFailureSignature.every((v, idx) => v === previousFailureSignature[idx]) &&
-            ["init_incomplete", "init_stalled"].includes(strOf(previousState["status"]));
-        const previousFailureStreak = normalizeInt(previousState["failure_streak"], 0);
-        const failureStreak = sameFailuresRepeated ? previousFailureStreak + 1 : 1;
-        const failureTitle = sameFailuresRepeated
-            ? "INIT STALLED: Same batches keep failing"
-            : "INIT INCOMPLETE: Batch extraction failed";
-        const nextSteps = sameFailuresRepeated
-            ? [
-                "Run direct inspect --target issue to read failed batch details.",
-                "Do not rerun the same init command again until source, batch options, provider, or failed content has changed.",
-            ]
-            : [
-                "Run direct inspect --target issue to review failed batches.",
-                "Rerun the same init once if failures look transient; completed checkpoints will be reused.",
-            ];
-        const failedEpisodeSet = new Set(failedEpisodes);
-        const skippedSet = new Set(skipped);
-        const batchResultsByEpisode = new Map();
-        for (const result of batchResults) {
-            const ep = Number(result["episode"] ?? 0);
-            if (!batchResultsByEpisode.has(ep))
-                batchResultsByEpisode.set(ep, []);
-            batchResultsByEpisode.get(ep).push(result);
-        }
+    const transientFailures = failures.filter((it) => !it["terminal"]);
+    const terminalFailures = failures.filter((it) => Boolean(it["terminal"]));
+    const skippedSet = new Set(skipped);
+    // Merge every fully-completed, non-cached episode into an episode_results
+    // checkpoint. Episodes still missing a batch (a failure this run, or a batch
+    // a prior run marked terminal and we skipped) are left unmerged so a rerun or
+    // an override can complete them.
+    const batchResultsByEpisode = new Map();
+    for (const result of batchResults) {
+        const ep = Number(result["episode"] ?? 0);
+        if (!batchResultsByEpisode.has(ep))
+            batchResultsByEpisode.set(ep, []);
+        batchResultsByEpisode.get(ep).push(result);
+    }
+    try {
         for (const episode of asList(plan["episodes"])) {
             const episodeNum = Number(episode["episode"]);
-            if (skippedSet.has(episodeNum) || failedEpisodeSet.has(episodeNum))
+            if (skippedSet.has(episodeNum))
                 continue;
             const expectedBatches = (batchesByEpisode.get(episodeNum) ?? []).length;
-            if (expectedBatches && (batchResultsByEpisode.get(episodeNum) ?? []).length === expectedBatches) {
-                const result = mergeBatchResultsForEpisode(episode, batchResultsByEpisode.get(episodeNum) ?? []);
-                validateEpisodeExtractionQuality(sourceText, episode, result);
-                results.push(result);
-                writeJson(episodeResultPath(episodeResultsDir, episode), compactEpisodeResult(result));
-                updateEpisodeResultMetadata(episodeResultsDir, episode, providerName, model);
-            }
+            if (!expectedBatches || (batchResultsByEpisode.get(episodeNum) ?? []).length !== expectedBatches)
+                continue;
+            const result = mergeBatchResultsForEpisode(episode, batchResultsByEpisode.get(episodeNum) ?? []);
+            validateEpisodeExtractionQuality(sourceText, episode, result);
+            results.push(result);
+            writeJson(episodeResultPath(episodeResultsDir, episode), compactEpisodeResult(result));
+            stampEpisodeMeta(episodeResultsDir, episode, computeUnitHash(sourceText, episode, providerName, model), "extracted", providerName, model);
+            const errorPath = episodeErrorPath(episodeResultsDir, episode);
+            if (exists(errorPath))
+                deletePath(errorPath);
         }
+    }
+    catch (exc) {
+        const e = exc;
+        throw initFailedReport(workspace, {
+            title: "INIT FAILED: Episode merge failed",
+            stage: "episode_merge",
+            required: ["complete batch_results/*.json that can merge into episode_results/*.json"],
+            received: [`${e?.name ?? "Error"}: ${(e?.message ?? "").slice(0, 160)}`],
+            nextSteps: ["Rerun init; completed batch checkpoints will be reused and episode merge will retry."],
+            updates: { batch_completed: completedBatches },
+        });
+    }
+    results.sort((a, b) => Number(a["episode"] ?? 0) - Number(b["episode"] ?? 0));
+    // Classify episodes that could not be assembled. An episode blocked by ANY
+    // transient batch (timeout/5xx) can still complete on rerun → it blocks init.
+    // An episode blocked only by terminal (content-filtered) batches is held out:
+    // the rest of the script ships, and the operator overrides the blocked unit.
+    const completedEpisodeNums = new Set(results.map((r) => Number(r["episode"])));
+    const transientEpisodeSet = new Set(transientFailures.map((it) => Number(it["episode"])));
+    const incompleteEpisodes = asList(plan["episodes"]).map((ep) => Number(ep["episode"])).filter((n) => !completedEpisodeNums.has(n));
+    const transientBlocked = incompleteEpisodes.filter((n) => transientEpisodeSet.has(n)).sort((a, b) => a - b);
+    const heldOutEpisodes = incompleteEpisodes.filter((n) => !transientEpisodeSet.has(n)).sort((a, b) => a - b);
+    if (transientBlocked.length > 0) {
         updateRunState(workspace, {
-            status: sameFailuresRepeated ? "init_stalled" : "init_incomplete",
+            status: "init_incomplete",
             init_stage: "batch_extract",
-            checkpoint,
-            batch_checkpoint: batchCheckpoint,
             episode_total: asList(plan["episodes"]).length,
             episode_completed: results.length,
             episode_reused: skipped.length,
-            episode_failed: failedEpisodes.length,
-            failed_episodes: failedEpisodes,
+            episode_failed: incompleteEpisodes.length,
+            failed_episodes: transientBlocked,
+            held_out_episodes: heldOutEpisodes,
             batch_total: asList(batchPlan["batches"]).length,
             batch_completed: completedBatches,
             batch_reused: skippedEpisodeBatchCount + skippedBatches.length,
             batch_failed: failures.length,
-            failed_batches: failedBatches,
-            failure_signature: currentFailureSignature,
-            failure_streak: failureStreak,
-            last_error: { title: failureTitle, failed_at: checkpointTimestamp() },
-            exportable: false,
+            batch_terminal: terminalFailures.length,
+            transient_failed_batches: transientFailures.map((it) => strOf(it["batch_id"])),
+            terminal_failed_batches: terminalFailures.map((it) => strOf(it["batch_id"])),
+            last_error: { title: "INIT INCOMPLETE: Batch extraction failed", failed_at: checkpointTimestamp() },
         });
-        const issues = failures.slice(0, 5).map((it) => `${it["batch_id"]} episode ${it["episode"]} part ${it["part"]}: ${it["error_type"]} - ${it["message"]}`);
+        const issues = failures.slice(0, 5).map((it) => `${it["batch_id"]} episode ${it["episode"]} part ${it["part"]} [${it["terminal"] ? "terminal" : "transient"}]: ${it["error_type"]} - ${it["message"]}`);
+        const next = [
+            "Run direct inspect --target issue to review failed batches.",
+            "Rerun the same init to retry transient failures; completed units are reused.",
+        ];
+        if (terminalFailures.length > 0) {
+            next.push("Terminal (content-filtered) batches will not clear on retry — use `direct override <unit> --from <file>` or soften the source.");
+        }
         const report = {
-            title: failureTitle,
+            title: "INIT INCOMPLETE: Batch extraction failed",
             result: [
                 `episodes total: ${asList(plan["episodes"]).length}`,
                 `completed: ${results.length}`,
                 `reused: ${skipped.length}`,
-                `failed episodes: ${failedEpisodes.length}`,
-                `batches: ${completedBatches}/${asList(batchPlan["batches"]).length} completed, ${failures.length} failed`,
+                `held out (terminal): ${heldOutEpisodes.length}`,
+                `batches: ${completedBatches}/${asList(batchPlan["batches"]).length} completed, ${transientFailures.length} transient, ${terminalFailures.length} terminal`,
                 `provider: ${providerName}`,
             ],
             artifacts: [
@@ -1043,81 +902,49 @@ export async function commandInit(opts) {
                 path.join(dd, "run_state.json"),
             ],
             issues,
-            next: nextSteps,
+            next,
         };
         return [report, EXIT_RUNTIME];
     }
     updateRunState(workspace, {
         status: "init_running",
         init_stage: "episode_merge",
-        checkpoint,
-        batch_checkpoint: batchCheckpoint,
         episode_total: asList(plan["episodes"]).length,
         episode_completed: results.length,
         episode_reused: skipped.length,
         episode_failed: 0,
         failed_episodes: [],
+        held_out_episodes: heldOutEpisodes,
         batch_total: asList(batchPlan["batches"]).length,
         batch_completed: completedBatches,
         batch_reused: skippedEpisodeBatchCount + skippedBatches.length,
-        batch_failed: 0,
-        failed_batches: [],
-        failure_signature: [],
-        failure_streak: 0,
+        batch_failed: terminalFailures.length,
+        batch_terminal: terminalFailures.length,
         last_error: null,
     });
+    // Drop transient/cleared error markers, but KEEP terminal ones so `direct
+    // status` and export gating can see which episodes are held out.
     for (const dir of [batchResultsDir, episodeResultsDir]) {
         if (!exists(dir))
             continue;
         for (const name of fs.readdirSync(dir)) {
-            if (name.endsWith(".error.json")) {
-                try {
-                    deletePath(path.join(dir, name));
-                }
-                catch {
-                    // ignore
-                }
-            }
-        }
-    }
-    try {
-        const batchResultsByEpisode = new Map();
-        for (const result of batchResults) {
-            const ep = Number(result["episode"] ?? 0);
-            if (!batchResultsByEpisode.has(ep))
-                batchResultsByEpisode.set(ep, []);
-            batchResultsByEpisode.get(ep).push(result);
-        }
-        const skippedSet = new Set(skipped);
-        for (const episode of asList(plan["episodes"])) {
-            const episodeNum = Number(episode["episode"]);
-            if (skippedSet.has(episodeNum))
+            if (!name.endsWith(".error.json"))
                 continue;
-            const result = mergeBatchResultsForEpisode(episode, batchResultsByEpisode.get(episodeNum) ?? []);
-            validateEpisodeExtractionQuality(sourceText, episode, result);
-            results.push(result);
-            writeJson(episodeResultPath(episodeResultsDir, episode), compactEpisodeResult(result));
-            updateEpisodeResultMetadata(episodeResultsDir, episode, providerName, model);
-            const errorPath = episodeErrorPath(episodeResultsDir, episode);
-            if (exists(errorPath))
-                deletePath(errorPath);
+            const errPath = path.join(dir, name);
+            try {
+                const err = readJson(errPath);
+                if (!isDict(err) || !err["terminal"])
+                    deletePath(errPath);
+            }
+            catch {
+                deletePath(errPath);
+            }
         }
     }
-    catch (exc) {
-        const e = exc;
-        throw initFailedReport(workspace, {
-            title: "INIT FAILED: Episode merge failed",
-            stage: "episode_merge",
-            required: ["complete batch_results/*.json that can merge into episode_results/*.json"],
-            received: [`${e?.name ?? "Error"}: ${(e?.message ?? "").slice(0, 160)}`],
-            nextSteps: ["Rerun init; completed batch checkpoints will be reused and episode merge will retry."],
-            updates: { checkpoint, batch_checkpoint: batchCheckpoint, batch_completed: completedBatches },
-        });
-    }
     results.sort((a, b) => Number(a["episode"] ?? 0) - Number(b["episode"] ?? 0));
     let script;
     try {
-        updateRunState(workspace, { status: "init_running", init_stage: "script_merge", checkpoint, batch_checkpoint: batchCheckpoint });
+        updateRunState(workspace, { status: "init_running", init_stage: "script_merge" });
         script = mergeEpisodeResults(results, strOf(info["projectName"]) || path.basename(source, path.extname(source)));
     }
     catch (exc) {
@@ -1128,11 +955,11 @@ export async function commandInit(opts) {
             required: ["complete episode_results/*.json"],
             received: [`${e?.name ?? "Error"}: ${(e?.message ?? "").slice(0, 160)}`],
             nextSteps: ["Rerun init; completed episode extraction checkpoints will be reused and merge will retry."],
-            updates: { checkpoint, batch_checkpoint: batchCheckpoint, episode_completed: results.length },
+            updates: { episode_completed: results.length },
         });
     }
     try {
-        updateRunState(workspace, { status: "init_running", init_stage: "asset_curation", checkpoint, batch_checkpoint: batchCheckpoint });
+        updateRunState(workspace, { status: "init_running", init_stage: "asset_curation" });
         const rawCuration = await providerExtractAssetCurationLocal(provider, sourceText, script);
         const curation = curateScriptAssets(script, rawCuration);
         writeJson(path.join(dd, "asset_curation.json"), curation);
@@ -1146,7 +973,7 @@ export async function commandInit(opts) {
                 required: exc.required.length > 0 ? exc.required : ["asset curation JSON matching final script contract"],
                 received: exc.received.length > 0 ? exc.received : [String(exc.message).slice(0, 160)],
                 nextSteps: exc.nextSteps.length > 0 ? exc.nextSteps : ["Rerun init; extraction checkpoints will be reused and asset curation will retry."],
-                updates: { checkpoint, batch_checkpoint: batchCheckpoint, episode_completed: results.length },
+                updates: { episode_completed: results.length },
             });
         }
         const e = exc;
@@ -1156,11 +983,11 @@ export async function commandInit(opts) {
             required: ["provider location merge decisions and deterministic asset reuse curation"],
             received: [`${e?.name ?? "Error"}: ${(e?.message ?? "").slice(0, 160)}`],
             nextSteps: ["Rerun init; extraction checkpoints will be reused and asset curation will retry."],
-            updates: { checkpoint, batch_checkpoint: batchCheckpoint, episode_completed: results.length },
+            updates: { episode_completed: results.length },
         });
     }
     try {
-        updateRunState(workspace, { status: "init_running", init_stage: "metadata_extract", checkpoint, batch_checkpoint: batchCheckpoint });
+        updateRunState(workspace, { status: "init_running", init_stage: "metadata_extract" });
         let metadata = provider.extractMetadata ? await provider.extractMetadata(sourceText, script) : {};
         if (!isDict(metadata))
             metadata = {};
@@ -1176,7 +1003,7 @@ export async function commandInit(opts) {
                 required: exc.required.length > 0 ? exc.required : ["metadata JSON matching final script contract"],
                 received: exc.received.length > 0 ? exc.received : [String(exc.message).slice(0, 160)],
                 nextSteps: exc.nextSteps.length > 0 ? exc.nextSteps : ["Rerun init; extraction checkpoints will be reused and metadata will retry."],
-                updates: { checkpoint, batch_checkpoint: batchCheckpoint, episode_completed: results.length },
+                updates: { episode_completed: results.length },
             });
         }
         const e = exc;
@@ -1186,12 +1013,12 @@ export async function commandInit(opts) {
             required: ["provider metadata for worldview, role_type, and asset descriptions"],
             received: [`${e?.name ?? "Error"}: ${(e?.message ?? "").slice(0, 160)}`],
             nextSteps: ["Rerun init; extraction checkpoints will be reused and metadata will retry."],
-            updates: { checkpoint, batch_checkpoint: batchCheckpoint, episode_completed: results.length },
+            updates: { episode_completed: results.length },
         });
     }
     const scriptPath = path.join(dd, "script.initial.json");
     writeJson(scriptPath, script);
-    updateRunState(workspace, { status: "init_running", init_stage: "validate", checkpoint, batch_checkpoint: batchCheckpoint });
+    updateRunState(workspace, { status: "init_running", init_stage: "validate" });
     let validation;
     try {
         validation = validateScript(workspace, scriptPath);
@@ -1204,7 +1031,7 @@ export async function commandInit(opts) {
             required: ["script.initial.json that can be validated"],
             received: [`${e?.name ?? "Error"}: ${(e?.message ?? "").slice(0, 160)}`],
             nextSteps: ["Rerun init to retry validation, or inspect script.initial.json if the failure persists."],
-            updates: { checkpoint, script_path: scriptPath },
+            updates: { script_path: scriptPath },
         });
     }
     const passed = Boolean(validation["passed"]);
@@ -1213,10 +1040,6 @@ export async function commandInit(opts) {
         status,
         command: "direct init",
         init_stage: "complete",
-        checkpoint,
-        batch_checkpoint: batchCheckpoint,
-        checkpoint_reused: checkpointReused,
-        batch_checkpoint_reused: batchCheckpointReused,
         provider: providerName,
         model,
         concurrency,
@@ -1232,19 +1055,17 @@ export async function commandInit(opts) {
         episode_reused: skipped.length,
         episode_failed: 0,
         failed_episodes: [],
+        held_out_episodes: heldOutEpisodes,
         batch_total: asList(batchPlan["batches"]).length,
         batch_completed: completedBatches,
         batch_reused: skippedEpisodeBatchCount + skippedBatches.length,
-        batch_failed: 0,
-        failed_batches: [],
-        failure_signature: [],
-        failure_streak: 0,
+        batch_failed: terminalFailures.length,
+        batch_terminal: terminalFailures.length,
         last_error: null,
         review_status: "pending",
         review_missing: [...REVIEW_TARGETS],
         inspected_targets: [],
         patch_count: 0,
-        exportable: providerName !== "mock",
     });
     const title = passed
         ? "INIT COMPLETE: Initial script ready"
@@ -1258,9 +1079,9 @@ export async function commandInit(opts) {
             `actions: ${stats["actions"] ?? 0}`,
             `validation: ${passed ? "passed" : "needs repair"}`,
             `provider: ${providerName}`,
-            `episode checkpoint reused: ${skipped.length}`,
+            `episodes reused: ${skipped.length}`,
             `batches: ${completedBatches}/${asList(batchPlan["batches"]).length} completed`,
-            `batch checkpoint reused: ${skippedEpisodeBatchCount + skippedBatches.length}`,
+            `batches reused: ${skippedEpisodeBatchCount + skippedBatches.length}`,
             "agent_review: pending",
         ],
         artifacts: [
@@ -1286,188 +1107,217 @@ export async function commandInit(opts) {
     };
     return [report, passed ? EXIT_OK : EXIT_NEEDS_AGENT];
 }
-export function summarizeIssues(issues) {
-    if (issues.length === 0)
-        return [];
-    const counts = {};
-    for (const item of issues) {
-        const sev = strOf(item["severity"]);
-        counts[sev] = (counts[sev] ?? 0) + 1;
-    }
-    const parts = Object.entries(counts).sort(([a], [b]) => a.localeCompare(b)).map(([sev, c]) => `${sev}: ${c}`);
-    const first = issues[0];
-    return [parts.join("; "), `first: ${first["code"]} - ${first["summary"]}`];
-}
 // ---------------------------------------------------------------------------
-// command_parse — subagent-authored md workspace → script.initial.json
-//
-// Deterministic, no-LLM counterpart of `direct init`, but a DISTINCT layout from
-// write/direct: the 正文 md carries only the screenplay (no asset registration at
-// all), and each asset KIND is registered in its own file —
-// 人物.md / 场景.md / 道具.md / 发声源.md (+ optional 梗概.md for the whole-script
-// synopsis). It assembles the same script.initial.json and hands off to the
-// existing direct inspect/validate/export downstream (zero changes there).
+// command_override — inject a human extraction for a unit the provider can't
+// produce (content-filtered). The override is content-addressed exactly like a
+// provider result, so init reuses it and never re-calls the provider, and the
+// non-destructive GC never deletes it. We compute the input_hash from the plan
+// ourselves, so the operator never hand-edits source_span.
 // ---------------------------------------------------------------------------
-const _EP_FILE_RE = /^ep[_-]?0*(\d+)\.(?:md|markdown)$/i;
-const ASSET_DOC_SPECS = [
-    { kind: "actors", names: ["人物.md", "角色.md", "characters.md", "actors.md"] },
-    { kind: "locations", names: ["场景.md", "地点.md", "locations.md"] },
-    { kind: "props", names: ["道具.md", "props.md"] },
-    { kind: "speakers", names: ["发声源.md", "speakers.md"] },
-];
-const SYNOPSIS_DOC_NAMES = ["梗概.md", "全文梗概.md", "synopsis.md"];
-function firstExisting(dir, names) {
-    for (const n of names) {
-        const p = path.join(dir, n);
-        if (exists(p) && fs.statSync(p).isFile())
-            return p;
-    }
-    return null;
-}
-function collectEpisodeMdFiles(dir) {
-    if (!exists(dir) || !fs.statSync(dir).isDirectory())
-        return [];
-    const out = [];
-    for (const name of fs.readdirSync(dir)) {
-        const m = _EP_FILE_RE.exec(name);
-        if (!m)
-            continue;
-        const full = path.join(dir, name);
-        if (!fs.statSync(full).isFile())
-            continue;
-        out.push({ path: full, episode: parseInt(m[1], 10) });
+export function commandOverride(opts) {
+    const workspace = strOf(opts["workspace_path"] || "workspace");
+    const unit = strOf(asList(opts["_args"])[0]).trim();
+    const fromPath = strOf(opts["from"]).trim();
+    const dd = directDir(workspace);
+    const state = readRunState(workspace);
+    const providerName = strOf(opts["provider"] || state["provider"] || DEFAULT_PROVIDER);
+    const model = strOf(opts["model"] || state["model"] || DEFAULT_MODEL);
+    const isEpisode = /^ep_\d+$/.test(unit);
+    if (!isEpisode && !/^bat_\d+$/.test(unit)) {
+        throw new CliError("OVERRIDE BLOCKED: Invalid unit", "Invalid unit key.", {
+            exitCode: EXIT_USAGE,
+            required: ["<unit>: ep_NNN or bat_NNNN"],
+            received: [`<unit>: ${unit || "<empty>"}`],
+            nextSteps: ["Pass an episode (ep_007) or batch (bat_0012) key shown by direct status."],
+        });
     }
-    out.sort((a, b) => a.episode - b.episode);
-    return out;
-}
-export function commandParse(opts) {
-    if (opts["spec"]) {
-        return [{ title: "PARSE SPEC: md 工作区写法", body: PARSE_MD_SPEC }, EXIT_OK];
+    if (!fromPath || !exists(fromPath)) {
+        throw new CliError("OVERRIDE BLOCKED: --from not found", "Override source file not found.", {
+            exitCode: EXIT_INPUT,
+            required: ["--from <path>: readable JSON extraction for the unit"],
+            received: [`--from: ${fromPath || "<missing>"}`],
+            nextSteps: ["Provide a JSON file with scenes/actions for the unit."],
+        });
     }
-    const workspace = strOf(opts["workspace_path"] || "workspace");
-    const args = asList(opts["_args"]);
-    const mdDir = strOf(opts["md_dir"] || args[0] || path.join(workspace, "parse"));
-    if (!exists(mdDir) || !fs.statSync(mdDir).isDirectory()) {
-        throw new CliError("PARSE BLOCKED: md workspace not found", "md workspace not found.", {
+    const planPath = path.join(dd, isEpisode ? "episode_plan.json" : "batch_plan.json");
+    if (!exists(planPath)) {
+        throw new CliError("OVERRIDE BLOCKED: Plan not found", "Plan not found.", {
             exitCode: EXIT_INPUT,
-            required: ["a directory with per-episode 正文 md + 人物/场景/道具/发声源 md"],
-            received: [mdDir],
-            nextSteps: ["Pass the md workspace dir: scriptctl parse <dir>. Run `scriptctl parse --spec` for the format."],
+            required: [isEpisode ? "episode_plan.json" : "batch_plan.json"],
+            received: [planPath],
+            nextSteps: ["Run scriptctl direct init first."],
         });
     }
-    let episodesDir = strOf(opts["episodes_dir"]).trim();
-    if (!episodesDir) {
-        const sub = path.join(mdDir, "episodes");
-        episodesDir = exists(sub) && fs.statSync(sub).isDirectory() ? sub : mdDir;
+    const plan = readJson(planPath);
+    const planUnits = asList(plan[isEpisode ? "episodes" : "batches"]);
+    const planItem = planUnits.find((u) => (isEpisode ? episodeResultKey(u) : batchResultKey(u)) === unit) ?? null;
+    if (!planItem) {
+        throw new CliError("OVERRIDE BLOCKED: Unit not in current plan", "Unit not in current plan.", {
+            exitCode: EXIT_INPUT,
+            required: [`${unit} present in ${isEpisode ? "episode_plan.json" : "batch_plan.json"}`],
+            received: [`${unit}: not found among ${planUnits.length} units`],
+            nextSteps: ["Use a unit key from direct status; rerun init if the plan changed."],
+        });
     }
-    const bodyFiles = collectEpisodeMdFiles(episodesDir);
-    if (bodyFiles.length === 0) {
-        throw new CliError("PARSE BLOCKED: no episode md found", "no episode md found.", {
+    const sourceTextPath = path.join(workspace, "source.txt");
+    if (!exists(sourceTextPath)) {
+        throw new CliError("OVERRIDE BLOCKED: source.txt missing", "source.txt missing.", {
             exitCode: EXIT_INPUT,
-            required: ["per-episode body md named like ep_001.md"],
-            received: [episodesDir],
-            nextSteps: ["Add per-episode 正文 md (ep_001.md, ep_002.md, ...). Run `scriptctl parse --spec` for the format."],
+            required: [sourceTextPath],
+            received: ["<missing>"],
+            nextSteps: ["Run scriptctl direct init first."],
         });
     }
-    // Each asset KIND is registered in its own file; the 正文 md carries none.
-    // Fold them all into one bible fragment (per-kind arrays).
-    const bible = { actors: [], locations: [], props: [], speakers: [], state_definitions: [] };
-    const assetDocsFound = [];
-    for (const spec of ASSET_DOC_SPECS) {
-        const p = firstExisting(mdDir, spec.names);
-        if (!p)
-            continue;
-        assetDocsFound.push(path.basename(p));
-        const parsed = parseAssetDoc(readText(p), spec.kind);
-        for (const key of ["actors", "locations", "props", "speakers", "state_definitions"]) {
-            bible[key].push(...asList(parsed[key]));
-        }
+    const sourceText = readText(sourceTextPath);
+    let data;
+    try {
+        data = readJson(fromPath);
     }
-    // Optional whole-script synopsis (梗概.md); strip a leading `# 梗概` header line.
-    let globalSynopsis = "";
-    const synPath = firstExisting(mdDir, SYNOPSIS_DOC_NAMES);
-    if (synPath)
-        globalSynopsis = readText(synPath).replace(/^\s*#\s+\S[^\n]*\n/, "").trim();
-    const results = [];
-    const sourceChunks = [];
-    for (const file of bodyFiles) {
-        const bodyText = readText(file.path);
-        sourceChunks.push(`# ep_${pad3(file.episode)}\n${bodyText.trim()}`);
-        try {
-            results.push(parseMarkdownBatch(bodyText, { episode: file.episode, part: 1 }, { fragmentMode: true }));
-        }
-        catch (exc) {
-            const e = exc;
-            throw new CliError("PARSE BLOCKED: episode md invalid", "episode md invalid.", {
-                exitCode: EXIT_INPUT,
-                required: ["per-episode 正文 md following `scriptctl parse --spec`"],
-                received: [`${path.basename(file.path)}: ${(e?.message ?? "").slice(0, 200)}`],
-                nextSteps: ["Fix the episode md and re-run parse."],
-            });
-        }
+    catch (exc) {
+        throw new CliError("OVERRIDE BLOCKED: --from invalid JSON", "Override JSON invalid.", {
+            exitCode: EXIT_INPUT,
+            required: ["valid extraction JSON"],
+            received: [`${fromPath}: ${exc.message}`],
+            nextSteps: ["Fix the JSON and retry."],
+        });
     }
-    results.sort((a, b) => Number(a["episode"] ?? 0) - Number(b["episode"] ?? 0));
-    // Fold the registered assets into the first episode result so their
-    // descriptions / states flow into the merge. Names are deduplicated globally
-    // by mergeEpisodeResults, so registering them first gives the canonical
-    // (registry) descriptions priority over anything implied by scene references.
-    if (results.length > 0) {
-        const first = results[0];
-        for (const key of ["actors", "locations", "props", "speakers", "state_definitions"]) {
-            first[key] = [...asList(bible[key]), ...asList(first[key])];
-        }
+    const result = normalizeEpisodeResult(data, planItem);
+    if (Number(result["episode"]) !== Number(planItem["episode"])) {
+        throw new CliError("OVERRIDE BLOCKED: Episode mismatch", "Episode mismatch.", {
+            exitCode: EXIT_USAGE,
+            required: [`episode ${Number(planItem["episode"])}`],
+            received: [`episode ${Number(result["episode"])}`],
+            nextSteps: ["Provide an extraction for the correct episode."],
+        });
     }
-    const title = strOf(opts["title"]).trim() || path.basename(path.resolve(mdDir));
-    const script = mergeEpisodeResults(results, title);
-    if (globalSynopsis)
-        script["synopsis"] = globalSynopsis;
+    try {
+        if (isEpisode)
+            validateEpisodeExtractionQuality(sourceText, planItem, result);
+        else
+            validateBatchExtractionQuality(sourceText, planItem, result);
+    }
+    catch (exc) {
+        if (exc instanceof CliError)
+            throw exc;
+        throw new CliError("OVERRIDE BLOCKED: Extraction invalid", "Extraction invalid.", {
+            exitCode: EXIT_USAGE,
+            required: ["valid action types (dialogue/inner_thought/action)"],
+            received: [exc.message.slice(0, 160)],
+            nextSteps: ["Fix the override extraction and retry."],
+        });
+    }
+    const dir = path.join(dd, isEpisode ? "episode_results" : "batch_results");
+    fs.mkdirSync(dir, { recursive: true });
+    const hash = computeUnitHash(sourceText, planItem, providerName, model);
+    if (isEpisode) {
+        writeJson(episodeResultPath(dir, planItem), compactEpisodeResult(result));
+        stampEpisodeMeta(dir, planItem, hash, "override", providerName, model);
+        const errPath = episodeErrorPath(dir, planItem);
+        if (exists(errPath))
+            deletePath(errPath);
+    }
+    else {
+        persistBatchResult(dir, planItem, result);
+        stampBatchMeta(dir, planItem, hash, "override", providerName, model);
+        const errPath = batchErrorPath(dir, planItem);
+        if (exists(errPath))
+            deletePath(errPath);
+    }
+    const report = {
+        title: "OVERRIDE COMPLETE: Unit extraction injected",
+        result: [
+            `unit: ${unit}`,
+            `kind: ${isEpisode ? "episode" : "batch"}`,
+            `provenance: override`,
+            `provider/model: ${providerName} / ${model}`,
+            `scenes: ${asList(result["scenes"]).length}`,
+        ],
+        artifacts: [dir, path.join(dd, "run_state.json")],
+        next: ["Rerun scriptctl direct init — the override is reused without re-calling the provider."],
+    };
+    return [report, EXIT_OK];
+}
+// ---------------------------------------------------------------------------
+// command_status — rebuild the progress view from on-disk meta/error sidecars.
+// run_state is just a cache of this; deleting it loses nothing.
+// ---------------------------------------------------------------------------
+export function commandStatus(opts) {
+    const workspace = strOf(opts["workspace_path"] || "workspace");
     const dd = directDir(workspace);
-    fs.mkdirSync(dd, { recursive: true });
-    // Write source.txt so the existing direct validate/export downstream (which
-    // gates on source.txt existing) works unchanged. For a parse-origin script the
-    // authored md *is* the source, so we persist the concatenated bodies.
-    fs.mkdirSync(workspace, { recursive: true });
-    fs.writeFileSync(path.join(workspace, "source.txt"), sourceChunks.join("\n\n") + "\n", "utf-8");
-    const scriptPath = path.join(dd, "script.initial.json");
-    writeJson(scriptPath, script);
-    const validation = validateScript(workspace, scriptPath, { requireSource: false });
-    const passed = Boolean(validation["passed"]);
-    updateRunState(workspace, {
-        status: passed ? "ready_for_agent" : "needs_agent_repair",
-        command: "parse",
-        init_stage: "complete",
-        provider: "parse",
-        source_path: path.resolve(mdDir),
-        script_path: scriptPath,
-        validation_path: path.join(dd, "validation.json"),
-        episode_total: results.length,
-        episode_completed: results.length,
-        review_status: "pending",
-        review_missing: [...REVIEW_TARGETS],
-        inspected_targets: [],
-        patch_count: 0,
-        exportable: true,
-        last_error: null,
-    });
-    const stats = validation["stats"] ?? {};
+    const episodePlanPath = path.join(dd, "episode_plan.json");
+    const batchPlanPath = path.join(dd, "batch_plan.json");
+    if (!exists(episodePlanPath) || !exists(batchPlanPath)) {
+        throw new CliError("STATUS BLOCKED: Plan not found", "Plan not found.", {
+            exitCode: EXIT_INPUT,
+            required: ["episode_plan.json and batch_plan.json"],
+            received: [exists(episodePlanPath) ? "episode_plan.json ok" : "episode_plan.json missing"],
+            nextSteps: ["Run scriptctl direct init first."],
+        });
+    }
+    const episodes = asList(readJson(episodePlanPath)["episodes"]);
+    const batches = asList(readJson(batchPlanPath)["batches"]);
+    const episodeResultsDir = path.join(dd, "episode_results");
+    const batchResultsDir = path.join(dd, "batch_results");
+    const count = { ok: 0, override: 0, recovered: 0, terminal: 0, missing: 0 };
+    for (const batch of batches) {
+        const meta = readUnitMeta(batchMetaPath(batchResultsDir, batch));
+        if (!meta) {
+            count.missing++;
+            continue;
+        }
+        if (meta["status"] === "terminal") {
+            count.terminal++;
+            continue;
+        }
+        count.ok++;
+        if (meta["provenance"] === "override")
+            count.override++;
+        else if (meta["provenance"] === "recovered")
+            count.recovered++;
+    }
+    const completedEpisodes = [];
+    for (const ep of episodes) {
+        const meta = readUnitMeta(episodeMetaPath(episodeResultsDir, ep));
+        if (meta && meta["status"] === "ok")
+            completedEpisodes.push(Number(ep["episode"]));
+    }
+    // Held out = episodes with at least one terminal batch and no episode result.
+    const completedSet = new Set(completedEpisodes);
+    const heldOut = new Set();
+    for (const batch of batches) {
+        const meta = readUnitMeta(batchMetaPath(batchResultsDir, batch));
+        const epNum = Number(batch["episode"]);
+        if (meta && meta["status"] === "terminal" && !completedSet.has(epNum))
+            heldOut.add(epNum);
+    }
+    const heldOutEpisodes = [...heldOut].sort((a, b) => a - b);
     const report = {
-        title: passed
-            ? "PARSE COMPLETE: Initial script ready"
-            : "PARSE NEEDS AGENT: Initial script written with repair issues",
+        title: "DIRECT STATUS",
         result: [
-            `episodes: ${stats["episodes"] ?? results.length}`,
-            `scenes: ${stats["scenes"] ?? 0}`,
-            `actions: ${stats["actions"] ?? 0}`,
-            `asset docs: ${assetDocsFound.join(" / ") || "(none)"}`,
-            `synopsis: ${globalSynopsis ? "yes" : "no"}`,
-            `validation: ${passed ? "passed" : "needs repair"}`,
-            "agent_review: pending",
+            `episodes: ${completedEpisodes.length}/${episodes.length} complete`,
+            `batches: ${count.ok}/${batches.length} ok (override ${count.override}, recovered ${count.recovered})`,
+            `terminal batches: ${count.terminal}`,
+            `pending batches: ${count.missing}`,
+            `held out episodes: ${heldOutEpisodes.length === 0 ? "-" : heldOutEpisodes.join(", ")}`,
         ],
-        artifacts: [scriptPath, path.join(dd, "validation.json"), path.join(dd, "run_state.json")],
-        issues: summarizeIssues(asList(validation["issues"])),
-        next: ["Run direct inspect (episode/asset/issue) for the two-stage review; apply patches if needed; then direct validate/export."],
+        artifacts: [batchResultsDir, episodeResultsDir, path.join(dd, "run_state.json")],
+        next: heldOutEpisodes.length > 0
+            ? ["Override held-out episodes with direct override, or export 32/33 with direct export --allow-incomplete."]
+            : ["All units accounted for."],
     };
-    return [report, passed ? EXIT_OK : EXIT_NEEDS_AGENT];
+    return [report, EXIT_OK];
+}
+export function summarizeIssues(issues) {
+    if (issues.length === 0)
+        return [];
+    const counts = {};
+    for (const item of issues) {
+        const sev = strOf(item["severity"]);
+        counts[sev] = (counts[sev] ?? 0) + 1;
+    }
+    const parts = Object.entries(counts).sort(([a], [b]) => a.localeCompare(b)).map(([sev, c]) => `${sev}: ${c}`);
+    const first = issues[0];
+    return [parts.join("; "), `first: ${first["code"]} - ${first["summary"]}`];
 }
 // ---------------------------------------------------------------------------
 // command_validate