@lingjingai/scriptctl 0.11.3 → 0.11.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,7 +1,7 @@
1
1
  import * as fs from "node:fs";
2
2
  import * as path from "node:path";
3
- import { CliError, DEFAULT_BATCH_MAX_CHARS, DEFAULT_BATCH_MIN_LINES, DEFAULT_BATCH_MODE, DEFAULT_BATCH_TARGET_LINES, DEFAULT_CONCURRENCY, DEFAULT_MODEL, DEFAULT_PROVIDER, DIRECT_CONTRACT_VERSION, EXIT_INPUT, EXIT_NEEDS_AGENT, EXIT_OK, EXIT_RUNTIME, EXIT_USAGE, PARSE_MD_SPEC, REVIEW_TARGETS, SUPPORTED_EXTS, deletePath, deleteTree, directDir, exists, fmtId, readJson, readText, sha256Text, writeJson, } from "../common.js";
4
- import { compactBatchResult, compactEpisodeResult, buildBatchPlan, buildEpisodePlan, enrichEpisodePlanTitles, extractBatchWithRecovery, mergeEpisodeResults, normalizeEpisodeResult, normalizeInt, parseAssetDoc, parseMarkdownBatch, recoverBatchFromSource, uniqueAdd, validateBatchExtractionQuality, validateEpisodeExtractionQuality, _md_push_asset, curateScriptAssets, applyMetadataToScript, } from "../domain/direct-core.js";
3
+ import { CliError, DEFAULT_BATCH_MAX_CHARS, DEFAULT_BATCH_MIN_LINES, DEFAULT_BATCH_MODE, DEFAULT_BATCH_TARGET_LINES, DEFAULT_CONCURRENCY, DEFAULT_MODEL, DEFAULT_PROVIDER, DIRECT_CONTRACT_VERSION, EXIT_INPUT, EXIT_NEEDS_AGENT, EXIT_OK, EXIT_RUNTIME, EXIT_USAGE, REVIEW_TARGETS, SUPPORTED_EXTS, deletePath, directDir, exists, fmtId, readJson, readText, sha256Text, writeJson, } from "../common.js";
4
+ import { compactBatchResult, compactEpisodeResult, buildBatchPlan, buildEpisodePlan, classifyProviderError, enrichEpisodePlanTitles, extractBatchWithRecovery, mergeEpisodeResults, normalizeEpisodeResult, normalizeInt, recoverBatchFromSource, uniqueAdd, validateBatchExtractionQuality, validateEpisodeExtractionQuality, _md_push_asset, curateScriptAssets, applyMetadataToScript, } from "../domain/direct-core.js";
5
5
  import { validateScript } from "../domain/script-core.js";
6
6
  import { makeProvider } from "../infra/providers.js";
7
7
  import { makeSourceManifest, prepareSource, } from "../infra/converters.js";
@@ -54,18 +54,6 @@ export function readRunState(workspace) {
54
54
  return {};
55
55
  }
56
56
  }
57
- function failureSignature(items) {
58
- if (!isList(items))
59
- return [];
60
- const out = [];
61
- for (const item of items) {
62
- const s = strOf(item).trim();
63
- if (s)
64
- out.push(s);
65
- }
66
- out.sort();
67
- return out;
68
- }
69
57
  export function addInspectedTarget(workspace, target) {
70
58
  const state = readRunState(workspace);
71
59
  const targets = [];
@@ -147,9 +135,6 @@ function episodeErrorPath(dir, ep) {
147
135
  function episodeResultKey(ep) {
148
136
  return `ep_${pad3(Number(ep["episode"]))}`;
149
137
  }
150
- function episodeResultsIndexPath(dir) {
151
- return path.join(dir, "index.json");
152
- }
153
138
  function batchResultKey(batch) {
154
139
  const bid = strOf(batch["batch_id"]).trim();
155
140
  if (bid)
@@ -165,9 +150,6 @@ function batchMarkdownPath(dir, batch) {
165
150
  function batchErrorPath(dir, batch) {
166
151
  return path.join(dir, `${batchResultKey(batch)}.error.json`);
167
152
  }
168
- function batchResultsIndexPath(dir) {
169
- return path.join(dir, "index.json");
170
- }
171
153
  function persistBatchResult(dir, batch, result) {
172
154
  const rawMd = result["_raw_markdown"];
173
155
  delete result["_raw_markdown"];
@@ -181,130 +163,81 @@ function persistBatchResult(dir, batch, result) {
181
163
  deletePath(mdPath);
182
164
  }
183
165
  }
184
- function readBatchResultsIndex(dir) {
185
- const p = batchResultsIndexPath(dir);
186
- if (!exists(p))
187
- return { version: 1, batches: {} };
188
- let data;
189
- try {
190
- data = readJson(p);
191
- }
192
- catch {
193
- return { version: 1, batches: {} };
194
- }
195
- if (!isDict(data))
196
- return { version: 1, batches: {} };
197
- if (!isDict(data["batches"]))
198
- data["batches"] = {};
199
- if (!("version" in data))
200
- data["version"] = 1;
201
- return data;
202
- }
203
- function writeBatchResultsIndex(dir, index) {
204
- writeJson(batchResultsIndexPath(dir), index);
205
- }
206
- function updateBatchResultMetadata(dir, batch, providerName, model) {
207
- const index = readBatchResultsIndex(dir);
208
- const batches = index["batches"] ?? {};
209
- batches[batchResultKey(batch)] = {
210
- episode: Number(batch["episode"]),
211
- part: Number(batch["part"]),
212
- provider: providerName,
213
- model,
214
- extracted_at: checkpointTimestamp(),
215
- };
216
- index["batches"] = batches;
217
- writeBatchResultsIndex(dir, index);
166
+ function episodeMetaPath(dir, ep) {
167
+ return path.join(dir, `${episodeResultKey(ep)}.meta.json`);
218
168
  }
219
- function removeBatchResultMetadata(dir, batch) {
220
- const index = readBatchResultsIndex(dir);
221
- const batches = index["batches"] ?? {};
222
- const key = batchResultKey(batch);
223
- if (key in batches) {
224
- delete batches[key];
225
- index["batches"] = batches;
226
- writeBatchResultsIndex(dir, index);
227
- }
169
+ function batchMetaPath(dir, batch) {
170
+ return path.join(dir, `${batchResultKey(batch)}.meta.json`);
228
171
  }
229
- function readEpisodeResultsIndex(dir) {
230
- const p = episodeResultsIndexPath(dir);
231
- if (!exists(p))
232
- return { version: 1, episodes: {} };
233
- let data;
172
+ function readUnitMeta(metaPath) {
173
+ if (!exists(metaPath))
174
+ return null;
234
175
  try {
235
- data = readJson(p);
176
+ const data = readJson(metaPath);
177
+ return isDict(data) ? data : null;
236
178
  }
237
179
  catch {
238
- return { version: 1, episodes: {} };
239
- }
240
- if (!isDict(data))
241
- return { version: 1, episodes: {} };
242
- if (!isDict(data["episodes"]))
243
- data["episodes"] = {};
244
- if (!("version" in data))
245
- data["version"] = 1;
246
- return data;
180
+ return null;
181
+ }
247
182
  }
248
- function writeEpisodeResultsIndex(dir, index) {
249
- writeJson(episodeResultsIndexPath(dir), index);
183
+ function writeUnitMeta(metaPath, meta) {
184
+ fs.mkdirSync(path.dirname(metaPath), { recursive: true });
185
+ writeJson(metaPath, meta);
250
186
  }
251
- function updateEpisodeResultMetadata(dir, ep, providerName, model) {
252
- const index = readEpisodeResultsIndex(dir);
253
- const episodes = index["episodes"] ?? {};
254
- episodes[episodeResultKey(ep)] = {
255
- provider: providerName,
256
- model,
257
- extracted_at: checkpointTimestamp(),
258
- };
259
- index["episodes"] = episodes;
260
- writeEpisodeResultsIndex(dir, index);
187
+ function removeUnitMeta(metaPath) {
188
+ if (exists(metaPath))
189
+ deletePath(metaPath);
261
190
  }
262
- function removeEpisodeResultMetadata(dir, ep) {
263
- const index = readEpisodeResultsIndex(dir);
264
- const episodes = index["episodes"] ?? {};
265
- const key = episodeResultKey(ep);
266
- if (key in episodes) {
267
- delete episodes[key];
268
- index["episodes"] = episodes;
269
- writeEpisodeResultsIndex(dir, index);
270
- }
191
+ function stampEpisodeMeta(dir, ep, inputHash, provenance, providerName, model) {
192
+ writeUnitMeta(episodeMetaPath(dir, ep), {
193
+ schema: 1, key: episodeResultKey(ep), episode: Number(ep["episode"]),
194
+ input_hash: inputHash, provenance, status: "ok", provider: providerName, model, extracted_at: checkpointTimestamp(),
195
+ });
271
196
  }
272
- function compactResultHasMultiRefs(data) {
273
- for (const scene of asList(data["sc"])) {
274
- if (!isDict(scene))
197
+ function stampBatchMeta(dir, batch, inputHash, provenance, providerName, model) {
198
+ writeUnitMeta(batchMetaPath(dir, batch), {
199
+ schema: 1, key: batchResultKey(batch), episode: Number(batch["episode"]), part: Number(batch["part"]),
200
+ input_hash: inputHash, provenance, status: "ok", provider: providerName, model, extracted_at: checkpointTimestamp(),
201
+ });
202
+ }
203
+ // Content-address a single episode/batch plan unit: the contract version, the
204
+ // exact source span text, the title-stable plan item, and provider/model. Any
205
+ // change to what would alter extraction rotates the hash for THAT unit only.
206
+ export function computeUnitHash(sourceText, unit, providerName, model) {
207
+ const span = isDict(unit["source_span"]) ? unit["source_span"] : {};
208
+ const start = Number(span["start"] ?? 0);
209
+ const end = Number(span["end"] ?? 0);
210
+ const spanText = sourceText.slice(start, end);
211
+ const planText = JSON.stringify(unit, checkpointReplacer());
212
+ return sha256Text([String(DIRECT_CONTRACT_VERSION), spanText, planText, providerName ?? "", model ?? ""].join("\u0000"));
213
+ }
214
+ // Delete result/meta/error/markdown files whose unit key is no longer in the
215
+ // current plan (e.g. the source shed an episode). Pure function of the plan —
216
+ // it never inspects hashes, content, or run_state, so it can only remove units
217
+ // the plan no longer references. Also retires the legacy v3 `index.json`.
218
+ function gcOrphanUnits(dir, liveKeys) {
219
+ if (!exists(dir))
220
+ return [];
221
+ const removed = [];
222
+ for (const name of fs.readdirSync(dir)) {
223
+ if (name === "index.json") {
224
+ deletePath(path.join(dir, name));
225
+ removed.push(name);
275
226
  continue;
276
- for (const action of asList(scene["a"])) {
277
- if (!isDict(action))
278
- continue;
279
- const refs = action["r"];
280
- if (isList(refs) && refs.length > 1)
281
- return true;
227
+ }
228
+ const key = name.replace(/\.(meta\.json|error\.json|json|md)$/, "");
229
+ if (key === name)
230
+ continue; // not a recognized unit artifact
231
+ if (!liveKeys.has(key)) {
232
+ deletePath(path.join(dir, name));
233
+ removed.push(name);
282
234
  }
283
235
  }
284
- return false;
285
- }
286
- export function initCheckpoint(sourceText, plan) {
287
- const planText = JSON.stringify(plan, checkpointReplacer());
288
- return {
289
- contract_version: DIRECT_CONTRACT_VERSION,
290
- source_sha256: sha256Text(sourceText),
291
- episode_plan_sha256: sha256Text(planText),
292
- total_episodes: Number(plan["total_episodes"] ?? asList(plan["episodes"]).length),
293
- };
294
- }
295
- export function initBatchCheckpoint(sourceText, batchPlan) {
296
- const planText = JSON.stringify(batchPlan, checkpointReplacer());
297
- return {
298
- contract_version: DIRECT_CONTRACT_VERSION,
299
- source_sha256: sha256Text(sourceText),
300
- batch_plan_sha256: sha256Text(planText),
301
- total_batches: Number(batchPlan["total_batches"] ?? asList(batchPlan["batches"]).length),
302
- };
236
+ return removed;
303
237
  }
304
238
  // Title fields are LLM-mutated downstream by enrichEpisodePlanTitles, so they
305
- // must be excluded from checkpoint hashes — otherwise every rerun gets a fresh
306
- // SHA, the previous checkpoint never matches, and the whole batch pipeline
307
- // re-extracts from scratch.
239
+ // must be excluded from unit hashes — otherwise every rerun gets a fresh SHA,
240
+ // the cached unit never matches, and that unit re-extracts from scratch.
308
241
  const CHECKPOINT_UNSTABLE_KEYS = new Set(["title", "generated_title", "title_status", "title_source"]);
309
242
  function checkpointReplacer() {
310
243
  // Python's json.dumps(sort_keys=True) sorts keys recursively. Replicate by walking and sorting.
@@ -323,121 +256,47 @@ function checkpointReplacer() {
323
256
  return value;
324
257
  };
325
258
  }
326
- function checkpointSourceMatches(previous, current) {
327
- if (!previous || Object.keys(previous).length === 0)
328
- return false;
329
- const keys = ["contract_version", "source_sha256", "episode_plan_sha256", "total_episodes"];
330
- return keys.every((k) => previous[k] === current[k]);
331
- }
332
- function batchCheckpointMatches(previous, current) {
333
- if (!previous || Object.keys(previous).length === 0)
334
- return false;
335
- const keys = ["contract_version", "source_sha256", "batch_plan_sha256", "total_batches"];
336
- return keys.every((k) => previous[k] === current[k]);
337
- }
338
- function resetInitOutputs(dd) {
339
- for (const dirname of ["episode_results", "batch_results"]) {
340
- const target = path.join(dd, dirname);
341
- if (exists(target))
342
- deleteTree(target);
343
- }
344
- for (const name of ["script.initial.json", "validation.json", "batch_plan.json", "asset_curation.json", "asset_metadata.json"]) {
345
- const p = path.join(dd, name);
346
- if (exists(p))
347
- deletePath(p);
348
- }
349
- }
350
- function resetBatchOutputs(dd) {
351
- const batchResultsDir = path.join(dd, "batch_results");
352
- if (exists(batchResultsDir))
353
- deleteTree(batchResultsDir);
354
- }
355
- function loadCheckpointedEpisode(sourceText, episodeResultsDir, ep, providerName, model, previousProvider) {
259
+ // Non-destructive reuse: a cached episode result is reusable iff its sidecar
260
+ // meta records the same input_hash we compute for the current plan unit. The
261
+ // hash subsumes the old provider / source_span / episode-number / contract
262
+ // checks any of those changing rotates the hash. On any mismatch or read
263
+ // failure we return null and let the caller re-extract and overwrite; we NEVER
264
+ // delete the cached file pre-emptively (that was the data-loss root cause).
265
+ export function loadCachedEpisode(sourceText, episodeResultsDir, ep, expectedHash) {
266
+ const meta = readUnitMeta(episodeMetaPath(episodeResultsDir, ep));
267
+ if (!meta || meta["input_hash"] !== expectedHash)
268
+ return null;
269
+ if (meta["status"] === "terminal")
270
+ return null;
356
271
  const p = episodeResultPath(episodeResultsDir, ep);
357
272
  if (!exists(p))
358
273
  return null;
359
- let result;
360
274
  try {
361
- const data = readJson(p);
362
- const metadata = isDict(data["_scriptctl"]) ? data["_scriptctl"] : {};
363
- const index = readEpisodeResultsIndex(episodeResultsDir);
364
- let indexEntry = {};
365
- const eps = index["episodes"];
366
- if (isDict(eps)) {
367
- const entry = eps[episodeResultKey(ep)];
368
- if (isDict(entry))
369
- indexEntry = entry;
370
- }
371
- const resultProvider = strOf(metadata["provider"] || indexEntry["provider"] || previousProvider).trim();
372
- if (providerName && resultProvider && resultProvider !== providerName) {
373
- throw new Error(`checkpoint provider mismatch: ${resultProvider} != ${providerName}`);
374
- }
375
- result = normalizeEpisodeResult(data, ep);
275
+ const result = normalizeEpisodeResult(readJson(p), ep);
376
276
  validateEpisodeExtractionQuality(sourceText, ep, result);
377
- if (!("sc" in data) || ["episode", "title", "source_span", "_scriptctl"].some((k) => k in data)) {
378
- writeJson(p, compactEpisodeResult(result));
379
- if (providerName && model)
380
- updateEpisodeResultMetadata(episodeResultsDir, ep, providerName, model);
381
- }
277
+ return result;
382
278
  }
383
279
  catch {
384
- try {
385
- deletePath(p);
386
- }
387
- catch {
388
- // ignore
389
- }
390
- removeEpisodeResultMetadata(episodeResultsDir, ep);
391
280
  return null;
392
281
  }
393
- if (Number(result["episode"] ?? 0) !== Number(ep["episode"]))
282
+ }
283
+ export function loadCachedBatch(sourceText, batchResultsDir, batch, expectedHash) {
284
+ const meta = readUnitMeta(batchMetaPath(batchResultsDir, batch));
285
+ if (!meta || meta["input_hash"] !== expectedHash)
394
286
  return null;
395
- if (JSON.stringify(result["source_span"]) !== JSON.stringify(ep["source_span"]))
287
+ if (meta["status"] === "terminal")
396
288
  return null;
397
- return result;
398
- }
399
- function loadCheckpointedBatch(sourceText, batchResultsDir, batch, providerName, model, previousProvider) {
400
289
  const p = batchResultPath(batchResultsDir, batch);
401
290
  if (!exists(p))
402
291
  return null;
403
- let result;
404
292
  try {
405
- const data = readJson(p);
406
- const index = readBatchResultsIndex(batchResultsDir);
407
- let indexEntry = {};
408
- const batches = index["batches"];
409
- if (isDict(batches)) {
410
- const entry = batches[batchResultKey(batch)];
411
- if (isDict(entry))
412
- indexEntry = entry;
413
- }
414
- const resultProvider = strOf(indexEntry["provider"] || previousProvider).trim();
415
- if (providerName && resultProvider && resultProvider !== providerName) {
416
- throw new Error(`checkpoint provider mismatch: ${resultProvider} != ${providerName}`);
417
- }
418
- result = normalizeEpisodeResult(data, batch);
293
+ const result = normalizeEpisodeResult(readJson(p), batch);
419
294
  validateBatchExtractionQuality(sourceText, batch, result);
420
- if (!("sc" in data) || compactResultHasMultiRefs(data) || ["episode", "title", "source_span", "_scriptctl"].some((k) => k in data)) {
421
- persistBatchResult(batchResultsDir, batch, result);
422
- if (providerName && model)
423
- updateBatchResultMetadata(batchResultsDir, batch, providerName, model);
424
- }
295
+ return result;
425
296
  }
426
297
  catch {
427
- try {
428
- deletePath(p);
429
- }
430
- catch {
431
- // ignore
432
- }
433
- removeBatchResultMetadata(batchResultsDir, batch);
434
298
  return null;
435
299
  }
436
- if (Number(result["episode"] ?? 0) !== Number(batch["episode"]))
437
- return null;
438
- if (JSON.stringify(result["source_span"]) !== JSON.stringify(batch["source_span"]))
439
- return null;
440
- return result;
441
300
  }
442
301
  function mergeScene(target, source) {
443
302
  if ((target["location_name"] === "" || target["location_name"] === "未知场景" || target["location_name"] === null || target["location_name"] === undefined) &&
@@ -530,33 +389,9 @@ async function providerExtractAssetCurationLocal(provider, sourceText, script) {
530
389
  }
531
390
  return {};
532
391
  }
533
- function writeEpisodeFailure(dir, ep, exc) {
534
- const err = exc;
535
- const error = {
536
- episode: Number(ep["episode"]),
537
- title: ep["title"],
538
- source_span: ep["source_span"],
539
- error_type: err?.name || "Error",
540
- message: (err?.message || err?.name || "Error").slice(0, 500),
541
- failed_at: checkpointTimestamp(),
542
- };
543
- if (exc instanceof CliError) {
544
- if (exc.required.length > 0)
545
- error["required"] = exc.required;
546
- if (exc.received.length > 0)
547
- error["received"] = exc.received;
548
- if (exc.nextSteps.length > 0)
549
- error["next"] = exc.nextSteps;
550
- }
551
- const resultPath = episodeResultPath(dir, ep);
552
- if (exists(resultPath))
553
- deletePath(resultPath);
554
- removeEpisodeResultMetadata(dir, ep);
555
- writeJson(episodeErrorPath(dir, ep), error);
556
- return error;
557
- }
558
- function writeBatchFailure(dir, batch, exc) {
392
+ function writeBatchFailure(dir, batch, exc, inputHash, providerName, model) {
559
393
  const err = exc;
394
+ const terminal = classifyProviderError(exc) === "terminal";
560
395
  const error = {
561
396
  batch_id: batchResultKey(batch),
562
397
  episode: Number(batch["episode"]),
@@ -565,6 +400,8 @@ function writeBatchFailure(dir, batch, exc) {
565
400
  line_range: batch["line_range"],
566
401
  error_type: err?.name || "Error",
567
402
  message: (err?.message || err?.name || "Error").slice(0, 500),
403
+ terminal,
404
+ input_hash: inputHash,
568
405
  failed_at: checkpointTimestamp(),
569
406
  };
570
407
  if (exc instanceof CliError) {
@@ -578,7 +415,16 @@ function writeBatchFailure(dir, batch, exc) {
578
415
  const resultPath = batchResultPath(dir, batch);
579
416
  if (exists(resultPath))
580
417
  deletePath(resultPath);
581
- removeBatchResultMetadata(dir, batch);
418
+ if (terminal) {
419
+ writeUnitMeta(batchMetaPath(dir, batch), {
420
+ schema: 1, key: batchResultKey(batch), episode: Number(batch["episode"]), part: Number(batch["part"]),
421
+ input_hash: inputHash, provenance: "extracted", status: "terminal",
422
+ provider: providerName, model, extracted_at: checkpointTimestamp(),
423
+ });
424
+ }
425
+ else {
426
+ removeUnitMeta(batchMetaPath(dir, batch));
427
+ }
582
428
  writeJson(batchErrorPath(dir, batch), error);
583
429
  return error;
584
430
  }
@@ -633,6 +479,9 @@ export async function commandInit(opts) {
633
479
  const workspace = strOf(opts["workspace_path"] || "workspace");
634
480
  const providerName = strOf(opts["provider"] || DEFAULT_PROVIDER);
635
481
  const model = strOf(opts["model"] || process.env.SCRIPTCTL_ANTHROPIC_MODEL || DEFAULT_MODEL);
482
+ // When set, retry batches a prior run marked terminal (content-filtered)
483
+ // instead of skipping them — e.g. after the provider's filter was adjusted.
484
+ const retryTerminal = Boolean(opts["retry_terminal"]);
636
485
  let concurrency;
637
486
  try {
638
487
  concurrency = parseInt(strOf(opts["concurrency"] || DEFAULT_CONCURRENCY), 10);
@@ -717,7 +566,6 @@ export async function commandInit(opts) {
717
566
  }
718
567
  const dd = directDir(workspace);
719
568
  fs.mkdirSync(dd, { recursive: true });
720
- const previousStateBeforeInit = readRunState(workspace);
721
569
  updateRunState(workspace, {
722
570
  status: "init_running",
723
571
  command: "direct init",
@@ -830,17 +678,6 @@ export async function commandInit(opts) {
830
678
  nextSteps: ["Inspect workspace/source.txt and episode_plan.json, then rerun init."],
831
679
  });
832
680
  }
833
- const checkpoint = initCheckpoint(sourceText, plan);
834
- const batchCheckpoint = initBatchCheckpoint(sourceText, batchPlan);
835
- const previousState = previousStateBeforeInit;
836
- const previousCheckpoint = isDict(previousState["checkpoint"]) ? previousState["checkpoint"] : {};
837
- const previousBatchCheckpoint = isDict(previousState["batch_checkpoint"]) ? previousState["batch_checkpoint"] : {};
838
- const checkpointReused = checkpointSourceMatches(previousCheckpoint, checkpoint);
839
- const batchCheckpointReused = checkpointReused && batchCheckpointMatches(previousBatchCheckpoint, batchCheckpoint);
840
- if (!checkpointReused)
841
- resetInitOutputs(dd);
842
- else if (!batchCheckpointReused)
843
- resetBatchOutputs(dd);
844
681
  writeJson(path.join(dd, "source_manifest.json"), manifest);
845
682
  writeJson(path.join(dd, "episode_plan.json"), plan);
846
683
  writeJson(path.join(dd, "batch_plan.json"), batchPlan);
@@ -848,13 +685,15 @@ export async function commandInit(opts) {
848
685
  const batchResultsDir = path.join(dd, "batch_results");
849
686
  fs.mkdirSync(episodeResultsDir, { recursive: true });
850
687
  fs.mkdirSync(batchResultsDir, { recursive: true });
688
+ // Non-destructive GC: drop result/meta/error/md files for units the current
689
+ // plan no longer references (e.g. the source shed an episode). Pure function
690
+ // of the plan — it never touches a unit the plan still references, and retires
691
+ // the legacy v3 index.json. There is no whole-directory reset any more.
692
+ gcOrphanUnits(episodeResultsDir, new Set(asList(plan["episodes"]).map((ep) => episodeResultKey(ep))));
693
+ gcOrphanUnits(batchResultsDir, new Set(asList(batchPlan["batches"]).map((b) => batchResultKey(b))));
851
694
  updateRunState(workspace, {
852
695
  status: "init_running",
853
696
  init_stage: "batch_extract",
854
- checkpoint,
855
- batch_checkpoint: batchCheckpoint,
856
- checkpoint_reused: checkpointReused,
857
- batch_checkpoint_reused: batchCheckpointReused,
858
697
  batch_mode: batchMode,
859
698
  batch_target_lines: batchTargetLines,
860
699
  batch_max_chars: batchMaxChars,
@@ -873,11 +712,11 @@ export async function commandInit(opts) {
873
712
  batchesByEpisode.set(epNum, []);
874
713
  batchesByEpisode.get(epNum).push(batch);
875
714
  }
876
- const previousProvider = strOf(previousState["provider"]).trim() || null;
715
+ // Per-unit reuse: each episode is judged independently by its own input hash,
716
+ // so a source edit to one episode invalidates only that episode — not all 33.
877
717
  for (const episode of asList(plan["episodes"])) {
878
- const cached = checkpointReused
879
- ? loadCheckpointedEpisode(sourceText, episodeResultsDir, episode, providerName, model, previousProvider)
880
- : null;
718
+ const epHash = computeUnitHash(sourceText, episode, providerName, model);
719
+ const cached = loadCachedEpisode(sourceText, episodeResultsDir, episode, epHash);
881
720
  if (cached !== null) {
882
721
  results.push(cached);
883
722
  skipped.push(Number(episode["episode"]));
@@ -887,7 +726,7 @@ export async function commandInit(opts) {
887
726
  if (!exists(batchResultPath(batchResultsDir, cachedBatch))) {
888
727
  const backfilled = recoverBatchFromSource(sourceText, cachedBatch);
889
728
  persistBatchResult(batchResultsDir, cachedBatch, backfilled);
890
- updateBatchResultMetadata(batchResultsDir, cachedBatch, providerName, model);
729
+ stampBatchMeta(batchResultsDir, cachedBatch, computeUnitHash(sourceText, cachedBatch, providerName, model), "recovered", providerName, model);
891
730
  }
892
731
  const errorPath = batchErrorPath(batchResultsDir, cachedBatch);
893
732
  if (exists(errorPath))
@@ -900,11 +739,19 @@ export async function commandInit(opts) {
900
739
  }
901
740
  const batchResults = [];
902
741
  const skippedBatches = [];
742
+ const terminalSkipped = [];
903
743
  const pending = [];
904
744
  for (const batch of pendingBatches) {
905
- const cachedBatch = batchCheckpointReused
906
- ? loadCheckpointedBatch(sourceText, batchResultsDir, batch, providerName, model, previousProvider)
907
- : null;
745
+ const bHash = computeUnitHash(sourceText, batch, providerName, model);
746
+ // A terminal failure (content filter) with the same input hash will fail the
747
+ // same way — skip it instead of re-calling the provider, unless --retry-terminal
748
+ // or the source/provider changed (which rotates the hash).
749
+ const meta = readUnitMeta(batchMetaPath(batchResultsDir, batch));
750
+ if (!retryTerminal && meta && meta["status"] === "terminal" && meta["input_hash"] === bHash) {
751
+ terminalSkipped.push(batchResultKey(batch));
752
+ continue;
753
+ }
754
+ const cachedBatch = loadCachedBatch(sourceText, batchResultsDir, batch, bHash);
908
755
  if (cachedBatch !== null) {
909
756
  cachedBatch["_batch_id"] = batchResultKey(batch);
910
757
  cachedBatch["_batch_part"] = Number(batch["part"]);
@@ -931,12 +778,12 @@ export async function commandInit(opts) {
931
778
  result["_starts_inside_scene"] = Boolean(batch["starts_inside_scene"]);
932
779
  batchResults.push(result);
933
780
  persistBatchResult(batchResultsDir, batch, result);
934
- updateBatchResultMetadata(batchResultsDir, batch, providerName, model);
781
+ stampBatchMeta(batchResultsDir, batch, computeUnitHash(sourceText, batch, providerName, model), "extracted", providerName, model);
935
782
  if (exists(errorPath))
936
783
  deletePath(errorPath);
937
784
  }
938
785
  else {
939
- failures.push(writeBatchFailure(batchResultsDir, batch, outcome.error));
786
+ failures.push(writeBatchFailure(batchResultsDir, batch, outcome.error, computeUnitHash(sourceText, batch, providerName, model), providerName, model));
940
787
  }
941
788
  }
942
789
  results.sort((a, b) => Number(a["episode"] ?? 0) - Number(b["episode"] ?? 0));
@@ -955,82 +802,94 @@ export async function commandInit(opts) {
955
802
  return Number(a["part"] ?? 0) - Number(b["part"] ?? 0);
956
803
  });
957
804
  const completedBatches = skippedEpisodeBatchCount + batchResults.length;
958
- if (failures.length > 0) {
959
- const failedEpisodes = [...new Set(failures.map((it) => Number(it["episode"])))].sort((a, b) => a - b);
960
- const failedBatches = failures.map((it) => strOf(it["batch_id"]));
961
- const currentFailureSignature = failureSignature(failedBatches);
962
- const previousFailureSignature = failureSignature(previousState["failed_batches"]);
963
- const sameFailuresRepeated = checkpointReused &&
964
- batchCheckpointReused &&
965
- currentFailureSignature.length > 0 &&
966
- currentFailureSignature.length === previousFailureSignature.length &&
967
- currentFailureSignature.every((v, idx) => v === previousFailureSignature[idx]) &&
968
- ["init_incomplete", "init_stalled"].includes(strOf(previousState["status"]));
969
- const previousFailureStreak = normalizeInt(previousState["failure_streak"], 0);
970
- const failureStreak = sameFailuresRepeated ? previousFailureStreak + 1 : 1;
971
- const failureTitle = sameFailuresRepeated
972
- ? "INIT STALLED: Same batches keep failing"
973
- : "INIT INCOMPLETE: Batch extraction failed";
974
- const nextSteps = sameFailuresRepeated
975
- ? [
976
- "Run direct inspect --target issue to read failed batch details.",
977
- "Do not rerun the same init command again until source, batch options, provider, or failed content has changed.",
978
- ]
979
- : [
980
- "Run direct inspect --target issue to review failed batches.",
981
- "Rerun the same init once if failures look transient; completed checkpoints will be reused.",
982
- ];
983
- const failedEpisodeSet = new Set(failedEpisodes);
984
- const skippedSet = new Set(skipped);
985
- const batchResultsByEpisode = new Map();
986
- for (const result of batchResults) {
987
- const ep = Number(result["episode"] ?? 0);
988
- if (!batchResultsByEpisode.has(ep))
989
- batchResultsByEpisode.set(ep, []);
990
- batchResultsByEpisode.get(ep).push(result);
991
- }
805
+ const transientFailures = failures.filter((it) => !it["terminal"]);
806
+ const terminalFailures = failures.filter((it) => Boolean(it["terminal"]));
807
+ const skippedSet = new Set(skipped);
808
+ // Merge every fully-completed, non-cached episode into an episode_results
809
+ // checkpoint. Episodes still missing a batch (a failure this run, or a batch
810
+ // a prior run marked terminal and we skipped) are left unmerged so a rerun or
811
+ // an override can complete them.
812
+ const batchResultsByEpisode = new Map();
813
+ for (const result of batchResults) {
814
+ const ep = Number(result["episode"] ?? 0);
815
+ if (!batchResultsByEpisode.has(ep))
816
+ batchResultsByEpisode.set(ep, []);
817
+ batchResultsByEpisode.get(ep).push(result);
818
+ }
819
+ try {
992
820
  for (const episode of asList(plan["episodes"])) {
993
821
  const episodeNum = Number(episode["episode"]);
994
- if (skippedSet.has(episodeNum) || failedEpisodeSet.has(episodeNum))
822
+ if (skippedSet.has(episodeNum))
995
823
  continue;
996
824
  const expectedBatches = (batchesByEpisode.get(episodeNum) ?? []).length;
997
- if (expectedBatches && (batchResultsByEpisode.get(episodeNum) ?? []).length === expectedBatches) {
998
- const result = mergeBatchResultsForEpisode(episode, batchResultsByEpisode.get(episodeNum) ?? []);
999
- validateEpisodeExtractionQuality(sourceText, episode, result);
1000
- results.push(result);
1001
- writeJson(episodeResultPath(episodeResultsDir, episode), compactEpisodeResult(result));
1002
- updateEpisodeResultMetadata(episodeResultsDir, episode, providerName, model);
1003
- }
825
+ if (!expectedBatches || (batchResultsByEpisode.get(episodeNum) ?? []).length !== expectedBatches)
826
+ continue;
827
+ const result = mergeBatchResultsForEpisode(episode, batchResultsByEpisode.get(episodeNum) ?? []);
828
+ validateEpisodeExtractionQuality(sourceText, episode, result);
829
+ results.push(result);
830
+ writeJson(episodeResultPath(episodeResultsDir, episode), compactEpisodeResult(result));
831
+ stampEpisodeMeta(episodeResultsDir, episode, computeUnitHash(sourceText, episode, providerName, model), "extracted", providerName, model);
832
+ const errorPath = episodeErrorPath(episodeResultsDir, episode);
833
+ if (exists(errorPath))
834
+ deletePath(errorPath);
1004
835
  }
836
+ }
837
+ catch (exc) {
838
+ const e = exc;
839
+ throw initFailedReport(workspace, {
840
+ title: "INIT FAILED: Episode merge failed",
841
+ stage: "episode_merge",
842
+ required: ["complete batch_results/*.json that can merge into episode_results/*.json"],
843
+ received: [`${e?.name ?? "Error"}: ${(e?.message ?? "").slice(0, 160)}`],
844
+ nextSteps: ["Rerun init; completed batch checkpoints will be reused and episode merge will retry."],
845
+ updates: { batch_completed: completedBatches },
846
+ });
847
+ }
848
+ results.sort((a, b) => Number(a["episode"] ?? 0) - Number(b["episode"] ?? 0));
849
+ // Classify episodes that could not be assembled. An episode blocked by ANY
850
+ // transient batch (timeout/5xx) can still complete on rerun → it blocks init.
851
+ // An episode blocked only by terminal (content-filtered) batches is held out:
852
+ // the rest of the script ships, and the operator overrides the blocked unit.
853
+ const completedEpisodeNums = new Set(results.map((r) => Number(r["episode"])));
854
+ const transientEpisodeSet = new Set(transientFailures.map((it) => Number(it["episode"])));
855
+ const incompleteEpisodes = asList(plan["episodes"]).map((ep) => Number(ep["episode"])).filter((n) => !completedEpisodeNums.has(n));
856
+ const transientBlocked = incompleteEpisodes.filter((n) => transientEpisodeSet.has(n)).sort((a, b) => a - b);
857
+ const heldOutEpisodes = incompleteEpisodes.filter((n) => !transientEpisodeSet.has(n)).sort((a, b) => a - b);
858
+ if (transientBlocked.length > 0) {
1005
859
  updateRunState(workspace, {
1006
- status: sameFailuresRepeated ? "init_stalled" : "init_incomplete",
860
+ status: "init_incomplete",
1007
861
  init_stage: "batch_extract",
1008
- checkpoint,
1009
- batch_checkpoint: batchCheckpoint,
1010
862
  episode_total: asList(plan["episodes"]).length,
1011
863
  episode_completed: results.length,
1012
864
  episode_reused: skipped.length,
1013
- episode_failed: failedEpisodes.length,
1014
- failed_episodes: failedEpisodes,
865
+ episode_failed: incompleteEpisodes.length,
866
+ failed_episodes: transientBlocked,
867
+ held_out_episodes: heldOutEpisodes,
1015
868
  batch_total: asList(batchPlan["batches"]).length,
1016
869
  batch_completed: completedBatches,
1017
870
  batch_reused: skippedEpisodeBatchCount + skippedBatches.length,
1018
871
  batch_failed: failures.length,
1019
- failed_batches: failedBatches,
1020
- failure_signature: currentFailureSignature,
1021
- failure_streak: failureStreak,
1022
- last_error: { title: failureTitle, failed_at: checkpointTimestamp() },
1023
- exportable: false,
872
+ batch_terminal: terminalFailures.length,
873
+ transient_failed_batches: transientFailures.map((it) => strOf(it["batch_id"])),
874
+ terminal_failed_batches: terminalFailures.map((it) => strOf(it["batch_id"])),
875
+ last_error: { title: "INIT INCOMPLETE: Batch extraction failed", failed_at: checkpointTimestamp() },
1024
876
  });
1025
- const issues = failures.slice(0, 5).map((it) => `${it["batch_id"]} episode ${it["episode"]} part ${it["part"]}: ${it["error_type"]} - ${it["message"]}`);
877
+ const issues = failures.slice(0, 5).map((it) => `${it["batch_id"]} episode ${it["episode"]} part ${it["part"]} [${it["terminal"] ? "terminal" : "transient"}]: ${it["error_type"]} - ${it["message"]}`);
878
+ const next = [
879
+ "Run direct inspect --target issue to review failed batches.",
880
+ "Rerun the same init to retry transient failures; completed units are reused.",
881
+ ];
882
+ if (terminalFailures.length > 0) {
883
+ next.push("Terminal (content-filtered) batches will not clear on retry — use `direct override <unit> --from <file>` or soften the source.");
884
+ }
1026
885
  const report = {
1027
- title: failureTitle,
886
+ title: "INIT INCOMPLETE: Batch extraction failed",
1028
887
  result: [
1029
888
  `episodes total: ${asList(plan["episodes"]).length}`,
1030
889
  `completed: ${results.length}`,
1031
890
  `reused: ${skipped.length}`,
1032
- `failed episodes: ${failedEpisodes.length}`,
1033
- `batches: ${completedBatches}/${asList(batchPlan["batches"]).length} completed, ${failures.length} failed`,
891
+ `held out (terminal): ${heldOutEpisodes.length}`,
892
+ `batches: ${completedBatches}/${asList(batchPlan["batches"]).length} completed, ${transientFailures.length} transient, ${terminalFailures.length} terminal`,
1034
893
  `provider: ${providerName}`,
1035
894
  ],
1036
895
  artifacts: [
@@ -1043,81 +902,49 @@ export async function commandInit(opts) {
1043
902
  path.join(dd, "run_state.json"),
1044
903
  ],
1045
904
  issues,
1046
- next: nextSteps,
905
+ next,
1047
906
  };
1048
907
  return [report, EXIT_RUNTIME];
1049
908
  }
1050
909
  updateRunState(workspace, {
1051
910
  status: "init_running",
1052
911
  init_stage: "episode_merge",
1053
- checkpoint,
1054
- batch_checkpoint: batchCheckpoint,
1055
912
  episode_total: asList(plan["episodes"]).length,
1056
913
  episode_completed: results.length,
1057
914
  episode_reused: skipped.length,
1058
915
  episode_failed: 0,
1059
916
  failed_episodes: [],
917
+ held_out_episodes: heldOutEpisodes,
1060
918
  batch_total: asList(batchPlan["batches"]).length,
1061
919
  batch_completed: completedBatches,
1062
920
  batch_reused: skippedEpisodeBatchCount + skippedBatches.length,
1063
- batch_failed: 0,
1064
- failed_batches: [],
1065
- failure_signature: [],
1066
- failure_streak: 0,
921
+ batch_failed: terminalFailures.length,
922
+ batch_terminal: terminalFailures.length,
1067
923
  last_error: null,
1068
924
  });
925
+ // Drop transient/cleared error markers, but KEEP terminal ones so `direct
926
+ // status` and export gating can see which episodes are held out.
1069
927
  for (const dir of [batchResultsDir, episodeResultsDir]) {
1070
928
  if (!exists(dir))
1071
929
  continue;
1072
930
  for (const name of fs.readdirSync(dir)) {
1073
- if (name.endsWith(".error.json")) {
1074
- try {
1075
- deletePath(path.join(dir, name));
1076
- }
1077
- catch {
1078
- // ignore
1079
- }
1080
- }
1081
- }
1082
- }
1083
- try {
1084
- const batchResultsByEpisode = new Map();
1085
- for (const result of batchResults) {
1086
- const ep = Number(result["episode"] ?? 0);
1087
- if (!batchResultsByEpisode.has(ep))
1088
- batchResultsByEpisode.set(ep, []);
1089
- batchResultsByEpisode.get(ep).push(result);
1090
- }
1091
- const skippedSet = new Set(skipped);
1092
- for (const episode of asList(plan["episodes"])) {
1093
- const episodeNum = Number(episode["episode"]);
1094
- if (skippedSet.has(episodeNum))
931
+ if (!name.endsWith(".error.json"))
1095
932
  continue;
1096
- const result = mergeBatchResultsForEpisode(episode, batchResultsByEpisode.get(episodeNum) ?? []);
1097
- validateEpisodeExtractionQuality(sourceText, episode, result);
1098
- results.push(result);
1099
- writeJson(episodeResultPath(episodeResultsDir, episode), compactEpisodeResult(result));
1100
- updateEpisodeResultMetadata(episodeResultsDir, episode, providerName, model);
1101
- const errorPath = episodeErrorPath(episodeResultsDir, episode);
1102
- if (exists(errorPath))
1103
- deletePath(errorPath);
933
+ const errPath = path.join(dir, name);
934
+ try {
935
+ const err = readJson(errPath);
936
+ if (!isDict(err) || !err["terminal"])
937
+ deletePath(errPath);
938
+ }
939
+ catch {
940
+ deletePath(errPath);
941
+ }
1104
942
  }
1105
943
  }
1106
- catch (exc) {
1107
- const e = exc;
1108
- throw initFailedReport(workspace, {
1109
- title: "INIT FAILED: Episode merge failed",
1110
- stage: "episode_merge",
1111
- required: ["complete batch_results/*.json that can merge into episode_results/*.json"],
1112
- received: [`${e?.name ?? "Error"}: ${(e?.message ?? "").slice(0, 160)}`],
1113
- nextSteps: ["Rerun init; completed batch checkpoints will be reused and episode merge will retry."],
1114
- updates: { checkpoint, batch_checkpoint: batchCheckpoint, batch_completed: completedBatches },
1115
- });
1116
- }
1117
944
  results.sort((a, b) => Number(a["episode"] ?? 0) - Number(b["episode"] ?? 0));
1118
945
  let script;
1119
946
  try {
1120
- updateRunState(workspace, { status: "init_running", init_stage: "script_merge", checkpoint, batch_checkpoint: batchCheckpoint });
947
+ updateRunState(workspace, { status: "init_running", init_stage: "script_merge" });
1121
948
  script = mergeEpisodeResults(results, strOf(info["projectName"]) || path.basename(source, path.extname(source)));
1122
949
  }
1123
950
  catch (exc) {
@@ -1128,11 +955,11 @@ export async function commandInit(opts) {
1128
955
  required: ["complete episode_results/*.json"],
1129
956
  received: [`${e?.name ?? "Error"}: ${(e?.message ?? "").slice(0, 160)}`],
1130
957
  nextSteps: ["Rerun init; completed episode extraction checkpoints will be reused and merge will retry."],
1131
- updates: { checkpoint, batch_checkpoint: batchCheckpoint, episode_completed: results.length },
958
+ updates: { episode_completed: results.length },
1132
959
  });
1133
960
  }
1134
961
  try {
1135
- updateRunState(workspace, { status: "init_running", init_stage: "asset_curation", checkpoint, batch_checkpoint: batchCheckpoint });
962
+ updateRunState(workspace, { status: "init_running", init_stage: "asset_curation" });
1136
963
  const rawCuration = await providerExtractAssetCurationLocal(provider, sourceText, script);
1137
964
  const curation = curateScriptAssets(script, rawCuration);
1138
965
  writeJson(path.join(dd, "asset_curation.json"), curation);
@@ -1146,7 +973,7 @@ export async function commandInit(opts) {
1146
973
  required: exc.required.length > 0 ? exc.required : ["asset curation JSON matching final script contract"],
1147
974
  received: exc.received.length > 0 ? exc.received : [String(exc.message).slice(0, 160)],
1148
975
  nextSteps: exc.nextSteps.length > 0 ? exc.nextSteps : ["Rerun init; extraction checkpoints will be reused and asset curation will retry."],
1149
- updates: { checkpoint, batch_checkpoint: batchCheckpoint, episode_completed: results.length },
976
+ updates: { episode_completed: results.length },
1150
977
  });
1151
978
  }
1152
979
  const e = exc;
@@ -1156,11 +983,11 @@ export async function commandInit(opts) {
1156
983
  required: ["provider location merge decisions and deterministic asset reuse curation"],
1157
984
  received: [`${e?.name ?? "Error"}: ${(e?.message ?? "").slice(0, 160)}`],
1158
985
  nextSteps: ["Rerun init; extraction checkpoints will be reused and asset curation will retry."],
1159
- updates: { checkpoint, batch_checkpoint: batchCheckpoint, episode_completed: results.length },
986
+ updates: { episode_completed: results.length },
1160
987
  });
1161
988
  }
1162
989
  try {
1163
- updateRunState(workspace, { status: "init_running", init_stage: "metadata_extract", checkpoint, batch_checkpoint: batchCheckpoint });
990
+ updateRunState(workspace, { status: "init_running", init_stage: "metadata_extract" });
1164
991
  let metadata = provider.extractMetadata ? await provider.extractMetadata(sourceText, script) : {};
1165
992
  if (!isDict(metadata))
1166
993
  metadata = {};
@@ -1176,7 +1003,7 @@ export async function commandInit(opts) {
1176
1003
  required: exc.required.length > 0 ? exc.required : ["metadata JSON matching final script contract"],
1177
1004
  received: exc.received.length > 0 ? exc.received : [String(exc.message).slice(0, 160)],
1178
1005
  nextSteps: exc.nextSteps.length > 0 ? exc.nextSteps : ["Rerun init; extraction checkpoints will be reused and metadata will retry."],
1179
- updates: { checkpoint, batch_checkpoint: batchCheckpoint, episode_completed: results.length },
1006
+ updates: { episode_completed: results.length },
1180
1007
  });
1181
1008
  }
1182
1009
  const e = exc;
@@ -1186,12 +1013,12 @@ export async function commandInit(opts) {
1186
1013
  required: ["provider metadata for worldview, role_type, and asset descriptions"],
1187
1014
  received: [`${e?.name ?? "Error"}: ${(e?.message ?? "").slice(0, 160)}`],
1188
1015
  nextSteps: ["Rerun init; extraction checkpoints will be reused and metadata will retry."],
1189
- updates: { checkpoint, batch_checkpoint: batchCheckpoint, episode_completed: results.length },
1016
+ updates: { episode_completed: results.length },
1190
1017
  });
1191
1018
  }
1192
1019
  const scriptPath = path.join(dd, "script.initial.json");
1193
1020
  writeJson(scriptPath, script);
1194
- updateRunState(workspace, { status: "init_running", init_stage: "validate", checkpoint, batch_checkpoint: batchCheckpoint });
1021
+ updateRunState(workspace, { status: "init_running", init_stage: "validate" });
1195
1022
  let validation;
1196
1023
  try {
1197
1024
  validation = validateScript(workspace, scriptPath);
@@ -1204,7 +1031,7 @@ export async function commandInit(opts) {
1204
1031
  required: ["script.initial.json that can be validated"],
1205
1032
  received: [`${e?.name ?? "Error"}: ${(e?.message ?? "").slice(0, 160)}`],
1206
1033
  nextSteps: ["Rerun init to retry validation, or inspect script.initial.json if the failure persists."],
1207
- updates: { checkpoint, script_path: scriptPath },
1034
+ updates: { script_path: scriptPath },
1208
1035
  });
1209
1036
  }
1210
1037
  const passed = Boolean(validation["passed"]);
@@ -1213,10 +1040,6 @@ export async function commandInit(opts) {
1213
1040
  status,
1214
1041
  command: "direct init",
1215
1042
  init_stage: "complete",
1216
- checkpoint,
1217
- batch_checkpoint: batchCheckpoint,
1218
- checkpoint_reused: checkpointReused,
1219
- batch_checkpoint_reused: batchCheckpointReused,
1220
1043
  provider: providerName,
1221
1044
  model,
1222
1045
  concurrency,
@@ -1232,19 +1055,17 @@ export async function commandInit(opts) {
1232
1055
  episode_reused: skipped.length,
1233
1056
  episode_failed: 0,
1234
1057
  failed_episodes: [],
1058
+ held_out_episodes: heldOutEpisodes,
1235
1059
  batch_total: asList(batchPlan["batches"]).length,
1236
1060
  batch_completed: completedBatches,
1237
1061
  batch_reused: skippedEpisodeBatchCount + skippedBatches.length,
1238
- batch_failed: 0,
1239
- failed_batches: [],
1240
- failure_signature: [],
1241
- failure_streak: 0,
1062
+ batch_failed: terminalFailures.length,
1063
+ batch_terminal: terminalFailures.length,
1242
1064
  last_error: null,
1243
1065
  review_status: "pending",
1244
1066
  review_missing: [...REVIEW_TARGETS],
1245
1067
  inspected_targets: [],
1246
1068
  patch_count: 0,
1247
- exportable: providerName !== "mock",
1248
1069
  });
1249
1070
  const title = passed
1250
1071
  ? "INIT COMPLETE: Initial script ready"
@@ -1258,9 +1079,9 @@ export async function commandInit(opts) {
1258
1079
  `actions: ${stats["actions"] ?? 0}`,
1259
1080
  `validation: ${passed ? "passed" : "needs repair"}`,
1260
1081
  `provider: ${providerName}`,
1261
- `episode checkpoint reused: ${skipped.length}`,
1082
+ `episodes reused: ${skipped.length}`,
1262
1083
  `batches: ${completedBatches}/${asList(batchPlan["batches"]).length} completed`,
1263
- `batch checkpoint reused: ${skippedEpisodeBatchCount + skippedBatches.length}`,
1084
+ `batches reused: ${skippedEpisodeBatchCount + skippedBatches.length}`,
1264
1085
  "agent_review: pending",
1265
1086
  ],
1266
1087
  artifacts: [
@@ -1286,188 +1107,217 @@ export async function commandInit(opts) {
1286
1107
  };
1287
1108
  return [report, passed ? EXIT_OK : EXIT_NEEDS_AGENT];
1288
1109
  }
1289
- export function summarizeIssues(issues) {
1290
- if (issues.length === 0)
1291
- return [];
1292
- const counts = {};
1293
- for (const item of issues) {
1294
- const sev = strOf(item["severity"]);
1295
- counts[sev] = (counts[sev] ?? 0) + 1;
1296
- }
1297
- const parts = Object.entries(counts).sort(([a], [b]) => a.localeCompare(b)).map(([sev, c]) => `${sev}: ${c}`);
1298
- const first = issues[0];
1299
- return [parts.join("; "), `first: ${first["code"]} - ${first["summary"]}`];
1300
- }
1301
1110
  // ---------------------------------------------------------------------------
1302
- // command_parsesubagent-authored md workspace script.initial.json
1303
- //
1304
- // Deterministic, no-LLM counterpart of `direct init`, but a DISTINCT layout from
1305
- // write/direct: the 正文 md carries only the screenplay (no asset registration at
1306
- // all), and each asset KIND is registered in its own file —
1307
- // 人物.md / 场景.md / 道具.md / 发声源.md (+ optional 梗概.md for the whole-script
1308
- // synopsis). It assembles the same script.initial.json and hands off to the
1309
- // existing direct inspect/validate/export downstream (zero changes there).
1111
+ // command_overrideinject a human extraction for a unit the provider can't
1112
+ // produce (content-filtered). The override is content-addressed exactly like a
1113
+ // provider result, so init reuses it and never re-calls the provider, and the
1114
+ // non-destructive GC never deletes it. We compute the input_hash from the plan
1115
+ // ourselves, so the operator never hand-edits source_span.
1310
1116
  // ---------------------------------------------------------------------------
1311
- const _EP_FILE_RE = /^ep[_-]?0*(\d+)\.(?:md|markdown)$/i;
1312
- const ASSET_DOC_SPECS = [
1313
- { kind: "actors", names: ["人物.md", "角色.md", "characters.md", "actors.md"] },
1314
- { kind: "locations", names: ["场景.md", "地点.md", "locations.md"] },
1315
- { kind: "props", names: ["道具.md", "props.md"] },
1316
- { kind: "speakers", names: ["发声源.md", "speakers.md"] },
1317
- ];
1318
- const SYNOPSIS_DOC_NAMES = ["梗概.md", "全文梗概.md", "synopsis.md"];
1319
- function firstExisting(dir, names) {
1320
- for (const n of names) {
1321
- const p = path.join(dir, n);
1322
- if (exists(p) && fs.statSync(p).isFile())
1323
- return p;
1324
- }
1325
- return null;
1326
- }
1327
- function collectEpisodeMdFiles(dir) {
1328
- if (!exists(dir) || !fs.statSync(dir).isDirectory())
1329
- return [];
1330
- const out = [];
1331
- for (const name of fs.readdirSync(dir)) {
1332
- const m = _EP_FILE_RE.exec(name);
1333
- if (!m)
1334
- continue;
1335
- const full = path.join(dir, name);
1336
- if (!fs.statSync(full).isFile())
1337
- continue;
1338
- out.push({ path: full, episode: parseInt(m[1], 10) });
1117
+ export function commandOverride(opts) {
1118
+ const workspace = strOf(opts["workspace_path"] || "workspace");
1119
+ const unit = strOf(asList(opts["_args"])[0]).trim();
1120
+ const fromPath = strOf(opts["from"]).trim();
1121
+ const dd = directDir(workspace);
1122
+ const state = readRunState(workspace);
1123
+ const providerName = strOf(opts["provider"] || state["provider"] || DEFAULT_PROVIDER);
1124
+ const model = strOf(opts["model"] || state["model"] || DEFAULT_MODEL);
1125
+ const isEpisode = /^ep_\d+$/.test(unit);
1126
+ if (!isEpisode && !/^bat_\d+$/.test(unit)) {
1127
+ throw new CliError("OVERRIDE BLOCKED: Invalid unit", "Invalid unit key.", {
1128
+ exitCode: EXIT_USAGE,
1129
+ required: ["<unit>: ep_NNN or bat_NNNN"],
1130
+ received: [`<unit>: ${unit || "<empty>"}`],
1131
+ nextSteps: ["Pass an episode (ep_007) or batch (bat_0012) key shown by direct status."],
1132
+ });
1339
1133
  }
1340
- out.sort((a, b) => a.episode - b.episode);
1341
- return out;
1342
- }
1343
- export function commandParse(opts) {
1344
- if (opts["spec"]) {
1345
- return [{ title: "PARSE SPEC: md 工作区写法", body: PARSE_MD_SPEC }, EXIT_OK];
1134
+ if (!fromPath || !exists(fromPath)) {
1135
+ throw new CliError("OVERRIDE BLOCKED: --from not found", "Override source file not found.", {
1136
+ exitCode: EXIT_INPUT,
1137
+ required: ["--from <path>: readable JSON extraction for the unit"],
1138
+ received: [`--from: ${fromPath || "<missing>"}`],
1139
+ nextSteps: ["Provide a JSON file with scenes/actions for the unit."],
1140
+ });
1346
1141
  }
1347
- const workspace = strOf(opts["workspace_path"] || "workspace");
1348
- const args = asList(opts["_args"]);
1349
- const mdDir = strOf(opts["md_dir"] || args[0] || path.join(workspace, "parse"));
1350
- if (!exists(mdDir) || !fs.statSync(mdDir).isDirectory()) {
1351
- throw new CliError("PARSE BLOCKED: md workspace not found", "md workspace not found.", {
1142
+ const planPath = path.join(dd, isEpisode ? "episode_plan.json" : "batch_plan.json");
1143
+ if (!exists(planPath)) {
1144
+ throw new CliError("OVERRIDE BLOCKED: Plan not found", "Plan not found.", {
1352
1145
  exitCode: EXIT_INPUT,
1353
- required: ["a directory with per-episode 正文 md + 人物/场景/道具/发声源 md"],
1354
- received: [mdDir],
1355
- nextSteps: ["Pass the md workspace dir: scriptctl parse <dir>. Run `scriptctl parse --spec` for the format."],
1146
+ required: [isEpisode ? "episode_plan.json" : "batch_plan.json"],
1147
+ received: [planPath],
1148
+ nextSteps: ["Run scriptctl direct init first."],
1356
1149
  });
1357
1150
  }
1358
- let episodesDir = strOf(opts["episodes_dir"]).trim();
1359
- if (!episodesDir) {
1360
- const sub = path.join(mdDir, "episodes");
1361
- episodesDir = exists(sub) && fs.statSync(sub).isDirectory() ? sub : mdDir;
1151
+ const plan = readJson(planPath);
1152
+ const planUnits = asList(plan[isEpisode ? "episodes" : "batches"]);
1153
+ const planItem = planUnits.find((u) => (isEpisode ? episodeResultKey(u) : batchResultKey(u)) === unit) ?? null;
1154
+ if (!planItem) {
1155
+ throw new CliError("OVERRIDE BLOCKED: Unit not in current plan", "Unit not in current plan.", {
1156
+ exitCode: EXIT_INPUT,
1157
+ required: [`${unit} present in ${isEpisode ? "episode_plan.json" : "batch_plan.json"}`],
1158
+ received: [`${unit}: not found among ${planUnits.length} units`],
1159
+ nextSteps: ["Use a unit key from direct status; rerun init if the plan changed."],
1160
+ });
1362
1161
  }
1363
- const bodyFiles = collectEpisodeMdFiles(episodesDir);
1364
- if (bodyFiles.length === 0) {
1365
- throw new CliError("PARSE BLOCKED: no episode md found", "no episode md found.", {
1162
+ const sourceTextPath = path.join(workspace, "source.txt");
1163
+ if (!exists(sourceTextPath)) {
1164
+ throw new CliError("OVERRIDE BLOCKED: source.txt missing", "source.txt missing.", {
1366
1165
  exitCode: EXIT_INPUT,
1367
- required: ["per-episode body md named like ep_001.md"],
1368
- received: [episodesDir],
1369
- nextSteps: ["Add per-episode 正文 md (ep_001.md, ep_002.md, ...). Run `scriptctl parse --spec` for the format."],
1166
+ required: [sourceTextPath],
1167
+ received: ["<missing>"],
1168
+ nextSteps: ["Run scriptctl direct init first."],
1370
1169
  });
1371
1170
  }
1372
- // Each asset KIND is registered in its own file; the 正文 md carries none.
1373
- // Fold them all into one bible fragment (per-kind arrays).
1374
- const bible = { actors: [], locations: [], props: [], speakers: [], state_definitions: [] };
1375
- const assetDocsFound = [];
1376
- for (const spec of ASSET_DOC_SPECS) {
1377
- const p = firstExisting(mdDir, spec.names);
1378
- if (!p)
1379
- continue;
1380
- assetDocsFound.push(path.basename(p));
1381
- const parsed = parseAssetDoc(readText(p), spec.kind);
1382
- for (const key of ["actors", "locations", "props", "speakers", "state_definitions"]) {
1383
- bible[key].push(...asList(parsed[key]));
1384
- }
1171
+ const sourceText = readText(sourceTextPath);
1172
+ let data;
1173
+ try {
1174
+ data = readJson(fromPath);
1385
1175
  }
1386
- // Optional whole-script synopsis (梗概.md); strip a leading `# 梗概` header line.
1387
- let globalSynopsis = "";
1388
- const synPath = firstExisting(mdDir, SYNOPSIS_DOC_NAMES);
1389
- if (synPath)
1390
- globalSynopsis = readText(synPath).replace(/^\s*#\s+\S[^\n]*\n/, "").trim();
1391
- const results = [];
1392
- const sourceChunks = [];
1393
- for (const file of bodyFiles) {
1394
- const bodyText = readText(file.path);
1395
- sourceChunks.push(`# ep_${pad3(file.episode)}\n${bodyText.trim()}`);
1396
- try {
1397
- results.push(parseMarkdownBatch(bodyText, { episode: file.episode, part: 1 }, { fragmentMode: true }));
1398
- }
1399
- catch (exc) {
1400
- const e = exc;
1401
- throw new CliError("PARSE BLOCKED: episode md invalid", "episode md invalid.", {
1402
- exitCode: EXIT_INPUT,
1403
- required: ["per-episode 正文 md following `scriptctl parse --spec`"],
1404
- received: [`${path.basename(file.path)}: ${(e?.message ?? "").slice(0, 200)}`],
1405
- nextSteps: ["Fix the episode md and re-run parse."],
1406
- });
1407
- }
1176
+ catch (exc) {
1177
+ throw new CliError("OVERRIDE BLOCKED: --from invalid JSON", "Override JSON invalid.", {
1178
+ exitCode: EXIT_INPUT,
1179
+ required: ["valid extraction JSON"],
1180
+ received: [`${fromPath}: ${exc.message}`],
1181
+ nextSteps: ["Fix the JSON and retry."],
1182
+ });
1408
1183
  }
1409
- results.sort((a, b) => Number(a["episode"] ?? 0) - Number(b["episode"] ?? 0));
1410
- // Fold the registered assets into the first episode result so their
1411
- // descriptions / states flow into the merge. Names are deduplicated globally
1412
- // by mergeEpisodeResults, so registering them first gives the canonical
1413
- // (registry) descriptions priority over anything implied by scene references.
1414
- if (results.length > 0) {
1415
- const first = results[0];
1416
- for (const key of ["actors", "locations", "props", "speakers", "state_definitions"]) {
1417
- first[key] = [...asList(bible[key]), ...asList(first[key])];
1418
- }
1184
+ const result = normalizeEpisodeResult(data, planItem);
1185
+ if (Number(result["episode"]) !== Number(planItem["episode"])) {
1186
+ throw new CliError("OVERRIDE BLOCKED: Episode mismatch", "Episode mismatch.", {
1187
+ exitCode: EXIT_USAGE,
1188
+ required: [`episode ${Number(planItem["episode"])}`],
1189
+ received: [`episode ${Number(result["episode"])}`],
1190
+ nextSteps: ["Provide an extraction for the correct episode."],
1191
+ });
1419
1192
  }
1420
- const title = strOf(opts["title"]).trim() || path.basename(path.resolve(mdDir));
1421
- const script = mergeEpisodeResults(results, title);
1422
- if (globalSynopsis)
1423
- script["synopsis"] = globalSynopsis;
1193
+ try {
1194
+ if (isEpisode)
1195
+ validateEpisodeExtractionQuality(sourceText, planItem, result);
1196
+ else
1197
+ validateBatchExtractionQuality(sourceText, planItem, result);
1198
+ }
1199
+ catch (exc) {
1200
+ if (exc instanceof CliError)
1201
+ throw exc;
1202
+ throw new CliError("OVERRIDE BLOCKED: Extraction invalid", "Extraction invalid.", {
1203
+ exitCode: EXIT_USAGE,
1204
+ required: ["valid action types (dialogue/inner_thought/action)"],
1205
+ received: [exc.message.slice(0, 160)],
1206
+ nextSteps: ["Fix the override extraction and retry."],
1207
+ });
1208
+ }
1209
+ const dir = path.join(dd, isEpisode ? "episode_results" : "batch_results");
1210
+ fs.mkdirSync(dir, { recursive: true });
1211
+ const hash = computeUnitHash(sourceText, planItem, providerName, model);
1212
+ if (isEpisode) {
1213
+ writeJson(episodeResultPath(dir, planItem), compactEpisodeResult(result));
1214
+ stampEpisodeMeta(dir, planItem, hash, "override", providerName, model);
1215
+ const errPath = episodeErrorPath(dir, planItem);
1216
+ if (exists(errPath))
1217
+ deletePath(errPath);
1218
+ }
1219
+ else {
1220
+ persistBatchResult(dir, planItem, result);
1221
+ stampBatchMeta(dir, planItem, hash, "override", providerName, model);
1222
+ const errPath = batchErrorPath(dir, planItem);
1223
+ if (exists(errPath))
1224
+ deletePath(errPath);
1225
+ }
1226
+ const report = {
1227
+ title: "OVERRIDE COMPLETE: Unit extraction injected",
1228
+ result: [
1229
+ `unit: ${unit}`,
1230
+ `kind: ${isEpisode ? "episode" : "batch"}`,
1231
+ `provenance: override`,
1232
+ `provider/model: ${providerName} / ${model}`,
1233
+ `scenes: ${asList(result["scenes"]).length}`,
1234
+ ],
1235
+ artifacts: [dir, path.join(dd, "run_state.json")],
1236
+ next: ["Rerun scriptctl direct init — the override is reused without re-calling the provider."],
1237
+ };
1238
+ return [report, EXIT_OK];
1239
+ }
1240
+ // ---------------------------------------------------------------------------
1241
+ // command_status — rebuild the progress view from on-disk meta/error sidecars.
1242
+ // run_state is just a cache of this; deleting it loses nothing.
1243
+ // ---------------------------------------------------------------------------
1244
+ export function commandStatus(opts) {
1245
+ const workspace = strOf(opts["workspace_path"] || "workspace");
1424
1246
  const dd = directDir(workspace);
1425
- fs.mkdirSync(dd, { recursive: true });
1426
- // Write source.txt so the existing direct validate/export downstream (which
1427
- // gates on source.txt existing) works unchanged. For a parse-origin script the
1428
- // authored md *is* the source, so we persist the concatenated bodies.
1429
- fs.mkdirSync(workspace, { recursive: true });
1430
- fs.writeFileSync(path.join(workspace, "source.txt"), sourceChunks.join("\n\n") + "\n", "utf-8");
1431
- const scriptPath = path.join(dd, "script.initial.json");
1432
- writeJson(scriptPath, script);
1433
- const validation = validateScript(workspace, scriptPath, { requireSource: false });
1434
- const passed = Boolean(validation["passed"]);
1435
- updateRunState(workspace, {
1436
- status: passed ? "ready_for_agent" : "needs_agent_repair",
1437
- command: "parse",
1438
- init_stage: "complete",
1439
- provider: "parse",
1440
- source_path: path.resolve(mdDir),
1441
- script_path: scriptPath,
1442
- validation_path: path.join(dd, "validation.json"),
1443
- episode_total: results.length,
1444
- episode_completed: results.length,
1445
- review_status: "pending",
1446
- review_missing: [...REVIEW_TARGETS],
1447
- inspected_targets: [],
1448
- patch_count: 0,
1449
- exportable: true,
1450
- last_error: null,
1451
- });
1452
- const stats = validation["stats"] ?? {};
1247
+ const episodePlanPath = path.join(dd, "episode_plan.json");
1248
+ const batchPlanPath = path.join(dd, "batch_plan.json");
1249
+ if (!exists(episodePlanPath) || !exists(batchPlanPath)) {
1250
+ throw new CliError("STATUS BLOCKED: Plan not found", "Plan not found.", {
1251
+ exitCode: EXIT_INPUT,
1252
+ required: ["episode_plan.json and batch_plan.json"],
1253
+ received: [exists(episodePlanPath) ? "episode_plan.json ok" : "episode_plan.json missing"],
1254
+ nextSteps: ["Run scriptctl direct init first."],
1255
+ });
1256
+ }
1257
+ const episodes = asList(readJson(episodePlanPath)["episodes"]);
1258
+ const batches = asList(readJson(batchPlanPath)["batches"]);
1259
+ const episodeResultsDir = path.join(dd, "episode_results");
1260
+ const batchResultsDir = path.join(dd, "batch_results");
1261
+ const count = { ok: 0, override: 0, recovered: 0, terminal: 0, missing: 0 };
1262
+ for (const batch of batches) {
1263
+ const meta = readUnitMeta(batchMetaPath(batchResultsDir, batch));
1264
+ if (!meta) {
1265
+ count.missing++;
1266
+ continue;
1267
+ }
1268
+ if (meta["status"] === "terminal") {
1269
+ count.terminal++;
1270
+ continue;
1271
+ }
1272
+ count.ok++;
1273
+ if (meta["provenance"] === "override")
1274
+ count.override++;
1275
+ else if (meta["provenance"] === "recovered")
1276
+ count.recovered++;
1277
+ }
1278
+ const completedEpisodes = [];
1279
+ for (const ep of episodes) {
1280
+ const meta = readUnitMeta(episodeMetaPath(episodeResultsDir, ep));
1281
+ if (meta && meta["status"] === "ok")
1282
+ completedEpisodes.push(Number(ep["episode"]));
1283
+ }
1284
+ // Held out = episodes with at least one terminal batch and no episode result.
1285
+ const completedSet = new Set(completedEpisodes);
1286
+ const heldOut = new Set();
1287
+ for (const batch of batches) {
1288
+ const meta = readUnitMeta(batchMetaPath(batchResultsDir, batch));
1289
+ const epNum = Number(batch["episode"]);
1290
+ if (meta && meta["status"] === "terminal" && !completedSet.has(epNum))
1291
+ heldOut.add(epNum);
1292
+ }
1293
+ const heldOutEpisodes = [...heldOut].sort((a, b) => a - b);
1453
1294
  const report = {
1454
- title: passed
1455
- ? "PARSE COMPLETE: Initial script ready"
1456
- : "PARSE NEEDS AGENT: Initial script written with repair issues",
1295
+ title: "DIRECT STATUS",
1457
1296
  result: [
1458
- `episodes: ${stats["episodes"] ?? results.length}`,
1459
- `scenes: ${stats["scenes"] ?? 0}`,
1460
- `actions: ${stats["actions"] ?? 0}`,
1461
- `asset docs: ${assetDocsFound.join(" / ") || "(none)"}`,
1462
- `synopsis: ${globalSynopsis ? "yes" : "no"}`,
1463
- `validation: ${passed ? "passed" : "needs repair"}`,
1464
- "agent_review: pending",
1297
+ `episodes: ${completedEpisodes.length}/${episodes.length} complete`,
1298
+ `batches: ${count.ok}/${batches.length} ok (override ${count.override}, recovered ${count.recovered})`,
1299
+ `terminal batches: ${count.terminal}`,
1300
+ `pending batches: ${count.missing}`,
1301
+ `held out episodes: ${heldOutEpisodes.length === 0 ? "-" : heldOutEpisodes.join(", ")}`,
1465
1302
  ],
1466
- artifacts: [scriptPath, path.join(dd, "validation.json"), path.join(dd, "run_state.json")],
1467
- issues: summarizeIssues(asList(validation["issues"])),
1468
- next: ["Run direct inspect (episode/asset/issue) for the two-stage review; apply patches if needed; then direct validate/export."],
1303
+ artifacts: [batchResultsDir, episodeResultsDir, path.join(dd, "run_state.json")],
1304
+ next: heldOutEpisodes.length > 0
1305
+ ? ["Override held-out episodes with direct override, or export 32/33 with direct export --allow-incomplete."]
1306
+ : ["All units accounted for."],
1469
1307
  };
1470
- return [report, passed ? EXIT_OK : EXIT_NEEDS_AGENT];
1308
+ return [report, EXIT_OK];
1309
+ }
1310
+ export function summarizeIssues(issues) {
1311
+ if (issues.length === 0)
1312
+ return [];
1313
+ const counts = {};
1314
+ for (const item of issues) {
1315
+ const sev = strOf(item["severity"]);
1316
+ counts[sev] = (counts[sev] ?? 0) + 1;
1317
+ }
1318
+ const parts = Object.entries(counts).sort(([a], [b]) => a.localeCompare(b)).map(([sev, c]) => `${sev}: ${c}`);
1319
+ const first = issues[0];
1320
+ return [parts.join("; "), `first: ${first["code"]} - ${first["summary"]}`];
1471
1321
  }
1472
1322
  // ---------------------------------------------------------------------------
1473
1323
  // command_validate