@lingjingai/scriptctl 0.11.4 → 0.11.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,10 +1,8 @@
1
1
  import * as fs from "node:fs";
2
2
  import * as path from "node:path";
3
- import { CliError, DEFAULT_BATCH_MAX_CHARS, DEFAULT_BATCH_MIN_LINES, DEFAULT_BATCH_MODE, DEFAULT_BATCH_TARGET_LINES, DEFAULT_CONCURRENCY, DEFAULT_MODEL, DEFAULT_PROVIDER, DIRECT_CONTRACT_VERSION, EXIT_INPUT, EXIT_NEEDS_AGENT, EXIT_OK, EXIT_RUNTIME, EXIT_USAGE, PARSE_MD_SPEC, REVIEW_TARGETS, SUPPORTED_EXTS, deletePath, deleteTree, directDir, exists, fmtId, readJson, readText, sha256Text, writeJson, } from "../common.js";
4
- import { compactBatchResult, compactEpisodeResult, buildBatchPlan, buildEpisodePlan, enrichEpisodePlanTitles, extractBatchWithRecovery, mergeEpisodeResults, normalizeEpisodeResult, normalizeInt, parseAssetDoc, parseMarkdownBatch, recoverBatchFromSource, uniqueAdd, validateBatchExtractionQuality, validateEpisodeExtractionQuality, _md_push_asset, curateScriptAssets, applyMetadataToScript, } from "../domain/direct-core.js";
3
+ import { CliError, DEFAULT_BATCH_MAX_CHARS, DEFAULT_BATCH_MIN_LINES, DEFAULT_BATCH_MODE, DEFAULT_BATCH_TARGET_LINES, DEFAULT_CONCURRENCY, DEFAULT_MODEL, DEFAULT_PROVIDER, DIRECT_CONTRACT_VERSION, EXIT_INPUT, EXIT_NEEDS_AGENT, EXIT_OK, EXIT_RUNTIME, EXIT_USAGE, REVIEW_TARGETS, SUPPORTED_EXTS, deletePath, directDir, exists, fmtId, readJson, readText, sha256Text, writeJson, } from "../common.js";
4
+ import { compactBatchResult, compactEpisodeResult, buildBatchPlan, buildEpisodePlan, classifyProviderError, enrichEpisodePlanTitles, extractBatchWithRecovery, mergeEpisodeResults, normalizeEpisodeResult, normalizeInt, recoverBatchFromSource, uniqueAdd, validateBatchExtractionQuality, validateEpisodeExtractionQuality, _md_push_asset, curateScriptAssets, applyMetadataToScript, } from "../domain/direct-core.js";
5
5
  import { validateScript } from "../domain/script-core.js";
6
- import { ScriptOutputApiError } from "../infra/script-output-store.js";
7
- import { apiErrorToCli, currentRevisionOrZero, scriptOutputClient, sortDeep } from "./script.js";
8
6
  import { makeProvider } from "../infra/providers.js";
9
7
  import { makeSourceManifest, prepareSource, } from "../infra/converters.js";
10
8
  function strOf(v) {
@@ -56,18 +54,6 @@ export function readRunState(workspace) {
56
54
  return {};
57
55
  }
58
56
  }
59
- function failureSignature(items) {
60
- if (!isList(items))
61
- return [];
62
- const out = [];
63
- for (const item of items) {
64
- const s = strOf(item).trim();
65
- if (s)
66
- out.push(s);
67
- }
68
- out.sort();
69
- return out;
70
- }
71
57
  export function addInspectedTarget(workspace, target) {
72
58
  const state = readRunState(workspace);
73
59
  const targets = [];
@@ -149,9 +135,6 @@ function episodeErrorPath(dir, ep) {
149
135
  function episodeResultKey(ep) {
150
136
  return `ep_${pad3(Number(ep["episode"]))}`;
151
137
  }
152
- function episodeResultsIndexPath(dir) {
153
- return path.join(dir, "index.json");
154
- }
155
138
  function batchResultKey(batch) {
156
139
  const bid = strOf(batch["batch_id"]).trim();
157
140
  if (bid)
@@ -167,9 +150,6 @@ function batchMarkdownPath(dir, batch) {
167
150
  function batchErrorPath(dir, batch) {
168
151
  return path.join(dir, `${batchResultKey(batch)}.error.json`);
169
152
  }
170
- function batchResultsIndexPath(dir) {
171
- return path.join(dir, "index.json");
172
- }
173
153
  function persistBatchResult(dir, batch, result) {
174
154
  const rawMd = result["_raw_markdown"];
175
155
  delete result["_raw_markdown"];
@@ -183,130 +163,81 @@ function persistBatchResult(dir, batch, result) {
183
163
  deletePath(mdPath);
184
164
  }
185
165
  }
186
- function readBatchResultsIndex(dir) {
187
- const p = batchResultsIndexPath(dir);
188
- if (!exists(p))
189
- return { version: 1, batches: {} };
190
- let data;
191
- try {
192
- data = readJson(p);
193
- }
194
- catch {
195
- return { version: 1, batches: {} };
196
- }
197
- if (!isDict(data))
198
- return { version: 1, batches: {} };
199
- if (!isDict(data["batches"]))
200
- data["batches"] = {};
201
- if (!("version" in data))
202
- data["version"] = 1;
203
- return data;
204
- }
205
- function writeBatchResultsIndex(dir, index) {
206
- writeJson(batchResultsIndexPath(dir), index);
207
- }
208
- function updateBatchResultMetadata(dir, batch, providerName, model) {
209
- const index = readBatchResultsIndex(dir);
210
- const batches = index["batches"] ?? {};
211
- batches[batchResultKey(batch)] = {
212
- episode: Number(batch["episode"]),
213
- part: Number(batch["part"]),
214
- provider: providerName,
215
- model,
216
- extracted_at: checkpointTimestamp(),
217
- };
218
- index["batches"] = batches;
219
- writeBatchResultsIndex(dir, index);
166
+ function episodeMetaPath(dir, ep) {
167
+ return path.join(dir, `${episodeResultKey(ep)}.meta.json`);
220
168
  }
221
- function removeBatchResultMetadata(dir, batch) {
222
- const index = readBatchResultsIndex(dir);
223
- const batches = index["batches"] ?? {};
224
- const key = batchResultKey(batch);
225
- if (key in batches) {
226
- delete batches[key];
227
- index["batches"] = batches;
228
- writeBatchResultsIndex(dir, index);
229
- }
169
+ function batchMetaPath(dir, batch) {
170
+ return path.join(dir, `${batchResultKey(batch)}.meta.json`);
230
171
  }
231
- function readEpisodeResultsIndex(dir) {
232
- const p = episodeResultsIndexPath(dir);
233
- if (!exists(p))
234
- return { version: 1, episodes: {} };
235
- let data;
172
+ function readUnitMeta(metaPath) {
173
+ if (!exists(metaPath))
174
+ return null;
236
175
  try {
237
- data = readJson(p);
176
+ const data = readJson(metaPath);
177
+ return isDict(data) ? data : null;
238
178
  }
239
179
  catch {
240
- return { version: 1, episodes: {} };
241
- }
242
- if (!isDict(data))
243
- return { version: 1, episodes: {} };
244
- if (!isDict(data["episodes"]))
245
- data["episodes"] = {};
246
- if (!("version" in data))
247
- data["version"] = 1;
248
- return data;
180
+ return null;
181
+ }
249
182
  }
250
- function writeEpisodeResultsIndex(dir, index) {
251
- writeJson(episodeResultsIndexPath(dir), index);
183
+ function writeUnitMeta(metaPath, meta) {
184
+ fs.mkdirSync(path.dirname(metaPath), { recursive: true });
185
+ writeJson(metaPath, meta);
252
186
  }
253
- function updateEpisodeResultMetadata(dir, ep, providerName, model) {
254
- const index = readEpisodeResultsIndex(dir);
255
- const episodes = index["episodes"] ?? {};
256
- episodes[episodeResultKey(ep)] = {
257
- provider: providerName,
258
- model,
259
- extracted_at: checkpointTimestamp(),
260
- };
261
- index["episodes"] = episodes;
262
- writeEpisodeResultsIndex(dir, index);
187
+ function removeUnitMeta(metaPath) {
188
+ if (exists(metaPath))
189
+ deletePath(metaPath);
263
190
  }
264
- function removeEpisodeResultMetadata(dir, ep) {
265
- const index = readEpisodeResultsIndex(dir);
266
- const episodes = index["episodes"] ?? {};
267
- const key = episodeResultKey(ep);
268
- if (key in episodes) {
269
- delete episodes[key];
270
- index["episodes"] = episodes;
271
- writeEpisodeResultsIndex(dir, index);
272
- }
191
+ function stampEpisodeMeta(dir, ep, inputHash, provenance, providerName, model) {
192
+ writeUnitMeta(episodeMetaPath(dir, ep), {
193
+ schema: 1, key: episodeResultKey(ep), episode: Number(ep["episode"]),
194
+ input_hash: inputHash, provenance, status: "ok", provider: providerName, model, extracted_at: checkpointTimestamp(),
195
+ });
273
196
  }
274
- function compactResultHasMultiRefs(data) {
275
- for (const scene of asList(data["sc"])) {
276
- if (!isDict(scene))
197
+ function stampBatchMeta(dir, batch, inputHash, provenance, providerName, model) {
198
+ writeUnitMeta(batchMetaPath(dir, batch), {
199
+ schema: 1, key: batchResultKey(batch), episode: Number(batch["episode"]), part: Number(batch["part"]),
200
+ input_hash: inputHash, provenance, status: "ok", provider: providerName, model, extracted_at: checkpointTimestamp(),
201
+ });
202
+ }
203
+ // Content-address a single episode/batch plan unit: the contract version, the
204
+ // exact source span text, the title-stable plan item, and provider/model. Any
205
+ // change to what would alter extraction rotates the hash for THAT unit only.
206
+ export function computeUnitHash(sourceText, unit, providerName, model) {
207
+ const span = isDict(unit["source_span"]) ? unit["source_span"] : {};
208
+ const start = Number(span["start"] ?? 0);
209
+ const end = Number(span["end"] ?? 0);
210
+ const spanText = sourceText.slice(start, end);
211
+ const planText = JSON.stringify(unit, checkpointReplacer());
212
+ return sha256Text([String(DIRECT_CONTRACT_VERSION), spanText, planText, providerName ?? "", model ?? ""].join("\u0000"));
213
+ }
214
+ // Delete result/meta/error/markdown files whose unit key is no longer in the
215
+ // current plan (e.g. the source shed an episode). Pure function of the plan —
216
+ // it never inspects hashes, content, or run_state, so it can only remove units
217
+ // the plan no longer references. Also retires the legacy v3 `index.json`.
218
+ function gcOrphanUnits(dir, liveKeys) {
219
+ if (!exists(dir))
220
+ return [];
221
+ const removed = [];
222
+ for (const name of fs.readdirSync(dir)) {
223
+ if (name === "index.json") {
224
+ deletePath(path.join(dir, name));
225
+ removed.push(name);
277
226
  continue;
278
- for (const action of asList(scene["a"])) {
279
- if (!isDict(action))
280
- continue;
281
- const refs = action["r"];
282
- if (isList(refs) && refs.length > 1)
283
- return true;
227
+ }
228
+ const key = name.replace(/\.(meta\.json|error\.json|json|md)$/, "");
229
+ if (key === name)
230
+ continue; // not a recognized unit artifact
231
+ if (!liveKeys.has(key)) {
232
+ deletePath(path.join(dir, name));
233
+ removed.push(name);
284
234
  }
285
235
  }
286
- return false;
287
- }
288
- export function initCheckpoint(sourceText, plan) {
289
- const planText = JSON.stringify(plan, checkpointReplacer());
290
- return {
291
- contract_version: DIRECT_CONTRACT_VERSION,
292
- source_sha256: sha256Text(sourceText),
293
- episode_plan_sha256: sha256Text(planText),
294
- total_episodes: Number(plan["total_episodes"] ?? asList(plan["episodes"]).length),
295
- };
296
- }
297
- export function initBatchCheckpoint(sourceText, batchPlan) {
298
- const planText = JSON.stringify(batchPlan, checkpointReplacer());
299
- return {
300
- contract_version: DIRECT_CONTRACT_VERSION,
301
- source_sha256: sha256Text(sourceText),
302
- batch_plan_sha256: sha256Text(planText),
303
- total_batches: Number(batchPlan["total_batches"] ?? asList(batchPlan["batches"]).length),
304
- };
236
+ return removed;
305
237
  }
306
238
  // Title fields are LLM-mutated downstream by enrichEpisodePlanTitles, so they
307
- // must be excluded from checkpoint hashes — otherwise every rerun gets a fresh
308
- // SHA, the previous checkpoint never matches, and the whole batch pipeline
309
- // re-extracts from scratch.
239
+ // must be excluded from unit hashes — otherwise every rerun gets a fresh SHA,
240
+ // the cached unit never matches, and that unit re-extracts from scratch.
310
241
  const CHECKPOINT_UNSTABLE_KEYS = new Set(["title", "generated_title", "title_status", "title_source"]);
311
242
  function checkpointReplacer() {
312
243
  // Python's json.dumps(sort_keys=True) sorts keys recursively. Replicate by walking and sorting.
@@ -325,121 +256,47 @@ function checkpointReplacer() {
325
256
  return value;
326
257
  };
327
258
  }
328
- function checkpointSourceMatches(previous, current) {
329
- if (!previous || Object.keys(previous).length === 0)
330
- return false;
331
- const keys = ["contract_version", "source_sha256", "episode_plan_sha256", "total_episodes"];
332
- return keys.every((k) => previous[k] === current[k]);
333
- }
334
- function batchCheckpointMatches(previous, current) {
335
- if (!previous || Object.keys(previous).length === 0)
336
- return false;
337
- const keys = ["contract_version", "source_sha256", "batch_plan_sha256", "total_batches"];
338
- return keys.every((k) => previous[k] === current[k]);
339
- }
340
- function resetInitOutputs(dd) {
341
- for (const dirname of ["episode_results", "batch_results"]) {
342
- const target = path.join(dd, dirname);
343
- if (exists(target))
344
- deleteTree(target);
345
- }
346
- for (const name of ["script.initial.json", "validation.json", "batch_plan.json", "asset_curation.json", "asset_metadata.json"]) {
347
- const p = path.join(dd, name);
348
- if (exists(p))
349
- deletePath(p);
350
- }
351
- }
352
- function resetBatchOutputs(dd) {
353
- const batchResultsDir = path.join(dd, "batch_results");
354
- if (exists(batchResultsDir))
355
- deleteTree(batchResultsDir);
356
- }
357
- function loadCheckpointedEpisode(sourceText, episodeResultsDir, ep, providerName, model, previousProvider) {
259
+ // Non-destructive reuse: a cached episode result is reusable iff its sidecar
260
+ // meta records the same input_hash we compute for the current plan unit. The
261
+ // hash subsumes the old provider / source_span / episode-number / contract
262
+ // checks any of those changing rotates the hash. On any mismatch or read
263
+ // failure we return null and let the caller re-extract and overwrite; we NEVER
264
+ // delete the cached file pre-emptively (that was the data-loss root cause).
265
+ export function loadCachedEpisode(sourceText, episodeResultsDir, ep, expectedHash) {
266
+ const meta = readUnitMeta(episodeMetaPath(episodeResultsDir, ep));
267
+ if (!meta || meta["input_hash"] !== expectedHash)
268
+ return null;
269
+ if (meta["status"] === "terminal")
270
+ return null;
358
271
  const p = episodeResultPath(episodeResultsDir, ep);
359
272
  if (!exists(p))
360
273
  return null;
361
- let result;
362
274
  try {
363
- const data = readJson(p);
364
- const metadata = isDict(data["_scriptctl"]) ? data["_scriptctl"] : {};
365
- const index = readEpisodeResultsIndex(episodeResultsDir);
366
- let indexEntry = {};
367
- const eps = index["episodes"];
368
- if (isDict(eps)) {
369
- const entry = eps[episodeResultKey(ep)];
370
- if (isDict(entry))
371
- indexEntry = entry;
372
- }
373
- const resultProvider = strOf(metadata["provider"] || indexEntry["provider"] || previousProvider).trim();
374
- if (providerName && resultProvider && resultProvider !== providerName) {
375
- throw new Error(`checkpoint provider mismatch: ${resultProvider} != ${providerName}`);
376
- }
377
- result = normalizeEpisodeResult(data, ep);
275
+ const result = normalizeEpisodeResult(readJson(p), ep);
378
276
  validateEpisodeExtractionQuality(sourceText, ep, result);
379
- if (!("sc" in data) || ["episode", "title", "source_span", "_scriptctl"].some((k) => k in data)) {
380
- writeJson(p, compactEpisodeResult(result));
381
- if (providerName && model)
382
- updateEpisodeResultMetadata(episodeResultsDir, ep, providerName, model);
383
- }
277
+ return result;
384
278
  }
385
279
  catch {
386
- try {
387
- deletePath(p);
388
- }
389
- catch {
390
- // ignore
391
- }
392
- removeEpisodeResultMetadata(episodeResultsDir, ep);
393
280
  return null;
394
281
  }
395
- if (Number(result["episode"] ?? 0) !== Number(ep["episode"]))
282
+ }
283
+ export function loadCachedBatch(sourceText, batchResultsDir, batch, expectedHash) {
284
+ const meta = readUnitMeta(batchMetaPath(batchResultsDir, batch));
285
+ if (!meta || meta["input_hash"] !== expectedHash)
396
286
  return null;
397
- if (JSON.stringify(result["source_span"]) !== JSON.stringify(ep["source_span"]))
287
+ if (meta["status"] === "terminal")
398
288
  return null;
399
- return result;
400
- }
401
- function loadCheckpointedBatch(sourceText, batchResultsDir, batch, providerName, model, previousProvider) {
402
289
  const p = batchResultPath(batchResultsDir, batch);
403
290
  if (!exists(p))
404
291
  return null;
405
- let result;
406
292
  try {
407
- const data = readJson(p);
408
- const index = readBatchResultsIndex(batchResultsDir);
409
- let indexEntry = {};
410
- const batches = index["batches"];
411
- if (isDict(batches)) {
412
- const entry = batches[batchResultKey(batch)];
413
- if (isDict(entry))
414
- indexEntry = entry;
415
- }
416
- const resultProvider = strOf(indexEntry["provider"] || previousProvider).trim();
417
- if (providerName && resultProvider && resultProvider !== providerName) {
418
- throw new Error(`checkpoint provider mismatch: ${resultProvider} != ${providerName}`);
419
- }
420
- result = normalizeEpisodeResult(data, batch);
293
+ const result = normalizeEpisodeResult(readJson(p), batch);
421
294
  validateBatchExtractionQuality(sourceText, batch, result);
422
- if (!("sc" in data) || compactResultHasMultiRefs(data) || ["episode", "title", "source_span", "_scriptctl"].some((k) => k in data)) {
423
- persistBatchResult(batchResultsDir, batch, result);
424
- if (providerName && model)
425
- updateBatchResultMetadata(batchResultsDir, batch, providerName, model);
426
- }
295
+ return result;
427
296
  }
428
297
  catch {
429
- try {
430
- deletePath(p);
431
- }
432
- catch {
433
- // ignore
434
- }
435
- removeBatchResultMetadata(batchResultsDir, batch);
436
298
  return null;
437
299
  }
438
- if (Number(result["episode"] ?? 0) !== Number(batch["episode"]))
439
- return null;
440
- if (JSON.stringify(result["source_span"]) !== JSON.stringify(batch["source_span"]))
441
- return null;
442
- return result;
443
300
  }
444
301
  function mergeScene(target, source) {
445
302
  if ((target["location_name"] === "" || target["location_name"] === "未知场景" || target["location_name"] === null || target["location_name"] === undefined) &&
@@ -532,33 +389,9 @@ async function providerExtractAssetCurationLocal(provider, sourceText, script) {
532
389
  }
533
390
  return {};
534
391
  }
535
- function writeEpisodeFailure(dir, ep, exc) {
536
- const err = exc;
537
- const error = {
538
- episode: Number(ep["episode"]),
539
- title: ep["title"],
540
- source_span: ep["source_span"],
541
- error_type: err?.name || "Error",
542
- message: (err?.message || err?.name || "Error").slice(0, 500),
543
- failed_at: checkpointTimestamp(),
544
- };
545
- if (exc instanceof CliError) {
546
- if (exc.required.length > 0)
547
- error["required"] = exc.required;
548
- if (exc.received.length > 0)
549
- error["received"] = exc.received;
550
- if (exc.nextSteps.length > 0)
551
- error["next"] = exc.nextSteps;
552
- }
553
- const resultPath = episodeResultPath(dir, ep);
554
- if (exists(resultPath))
555
- deletePath(resultPath);
556
- removeEpisodeResultMetadata(dir, ep);
557
- writeJson(episodeErrorPath(dir, ep), error);
558
- return error;
559
- }
560
- function writeBatchFailure(dir, batch, exc) {
392
+ function writeBatchFailure(dir, batch, exc, inputHash, providerName, model) {
561
393
  const err = exc;
394
+ const terminal = classifyProviderError(exc) === "terminal";
562
395
  const error = {
563
396
  batch_id: batchResultKey(batch),
564
397
  episode: Number(batch["episode"]),
@@ -567,6 +400,8 @@ function writeBatchFailure(dir, batch, exc) {
567
400
  line_range: batch["line_range"],
568
401
  error_type: err?.name || "Error",
569
402
  message: (err?.message || err?.name || "Error").slice(0, 500),
403
+ terminal,
404
+ input_hash: inputHash,
570
405
  failed_at: checkpointTimestamp(),
571
406
  };
572
407
  if (exc instanceof CliError) {
@@ -580,7 +415,16 @@ function writeBatchFailure(dir, batch, exc) {
580
415
  const resultPath = batchResultPath(dir, batch);
581
416
  if (exists(resultPath))
582
417
  deletePath(resultPath);
583
- removeBatchResultMetadata(dir, batch);
418
+ if (terminal) {
419
+ writeUnitMeta(batchMetaPath(dir, batch), {
420
+ schema: 1, key: batchResultKey(batch), episode: Number(batch["episode"]), part: Number(batch["part"]),
421
+ input_hash: inputHash, provenance: "extracted", status: "terminal",
422
+ provider: providerName, model, extracted_at: checkpointTimestamp(),
423
+ });
424
+ }
425
+ else {
426
+ removeUnitMeta(batchMetaPath(dir, batch));
427
+ }
584
428
  writeJson(batchErrorPath(dir, batch), error);
585
429
  return error;
586
430
  }
@@ -635,6 +479,9 @@ export async function commandInit(opts) {
635
479
  const workspace = strOf(opts["workspace_path"] || "workspace");
636
480
  const providerName = strOf(opts["provider"] || DEFAULT_PROVIDER);
637
481
  const model = strOf(opts["model"] || process.env.SCRIPTCTL_ANTHROPIC_MODEL || DEFAULT_MODEL);
482
+ // When set, retry batches a prior run marked terminal (content-filtered)
483
+ // instead of skipping them — e.g. after the provider's filter was adjusted.
484
+ const retryTerminal = Boolean(opts["retry_terminal"]);
638
485
  let concurrency;
639
486
  try {
640
487
  concurrency = parseInt(strOf(opts["concurrency"] || DEFAULT_CONCURRENCY), 10);
@@ -719,7 +566,6 @@ export async function commandInit(opts) {
719
566
  }
720
567
  const dd = directDir(workspace);
721
568
  fs.mkdirSync(dd, { recursive: true });
722
- const previousStateBeforeInit = readRunState(workspace);
723
569
  updateRunState(workspace, {
724
570
  status: "init_running",
725
571
  command: "direct init",
@@ -832,17 +678,6 @@ export async function commandInit(opts) {
832
678
  nextSteps: ["Inspect workspace/source.txt and episode_plan.json, then rerun init."],
833
679
  });
834
680
  }
835
- const checkpoint = initCheckpoint(sourceText, plan);
836
- const batchCheckpoint = initBatchCheckpoint(sourceText, batchPlan);
837
- const previousState = previousStateBeforeInit;
838
- const previousCheckpoint = isDict(previousState["checkpoint"]) ? previousState["checkpoint"] : {};
839
- const previousBatchCheckpoint = isDict(previousState["batch_checkpoint"]) ? previousState["batch_checkpoint"] : {};
840
- const checkpointReused = checkpointSourceMatches(previousCheckpoint, checkpoint);
841
- const batchCheckpointReused = checkpointReused && batchCheckpointMatches(previousBatchCheckpoint, batchCheckpoint);
842
- if (!checkpointReused)
843
- resetInitOutputs(dd);
844
- else if (!batchCheckpointReused)
845
- resetBatchOutputs(dd);
846
681
  writeJson(path.join(dd, "source_manifest.json"), manifest);
847
682
  writeJson(path.join(dd, "episode_plan.json"), plan);
848
683
  writeJson(path.join(dd, "batch_plan.json"), batchPlan);
@@ -850,13 +685,15 @@ export async function commandInit(opts) {
850
685
  const batchResultsDir = path.join(dd, "batch_results");
851
686
  fs.mkdirSync(episodeResultsDir, { recursive: true });
852
687
  fs.mkdirSync(batchResultsDir, { recursive: true });
688
+ // Non-destructive GC: drop result/meta/error/md files for units the current
689
+ // plan no longer references (e.g. the source shed an episode). Pure function
690
+ // of the plan — it never touches a unit the plan still references, and retires
691
+ // the legacy v3 index.json. There is no whole-directory reset any more.
692
+ gcOrphanUnits(episodeResultsDir, new Set(asList(plan["episodes"]).map((ep) => episodeResultKey(ep))));
693
+ gcOrphanUnits(batchResultsDir, new Set(asList(batchPlan["batches"]).map((b) => batchResultKey(b))));
853
694
  updateRunState(workspace, {
854
695
  status: "init_running",
855
696
  init_stage: "batch_extract",
856
- checkpoint,
857
- batch_checkpoint: batchCheckpoint,
858
- checkpoint_reused: checkpointReused,
859
- batch_checkpoint_reused: batchCheckpointReused,
860
697
  batch_mode: batchMode,
861
698
  batch_target_lines: batchTargetLines,
862
699
  batch_max_chars: batchMaxChars,
@@ -875,11 +712,11 @@ export async function commandInit(opts) {
875
712
  batchesByEpisode.set(epNum, []);
876
713
  batchesByEpisode.get(epNum).push(batch);
877
714
  }
878
- const previousProvider = strOf(previousState["provider"]).trim() || null;
715
+ // Per-unit reuse: each episode is judged independently by its own input hash,
716
+ // so a source edit to one episode invalidates only that episode — not all 33.
879
717
  for (const episode of asList(plan["episodes"])) {
880
- const cached = checkpointReused
881
- ? loadCheckpointedEpisode(sourceText, episodeResultsDir, episode, providerName, model, previousProvider)
882
- : null;
718
+ const epHash = computeUnitHash(sourceText, episode, providerName, model);
719
+ const cached = loadCachedEpisode(sourceText, episodeResultsDir, episode, epHash);
883
720
  if (cached !== null) {
884
721
  results.push(cached);
885
722
  skipped.push(Number(episode["episode"]));
@@ -889,7 +726,7 @@ export async function commandInit(opts) {
889
726
  if (!exists(batchResultPath(batchResultsDir, cachedBatch))) {
890
727
  const backfilled = recoverBatchFromSource(sourceText, cachedBatch);
891
728
  persistBatchResult(batchResultsDir, cachedBatch, backfilled);
892
- updateBatchResultMetadata(batchResultsDir, cachedBatch, providerName, model);
729
+ stampBatchMeta(batchResultsDir, cachedBatch, computeUnitHash(sourceText, cachedBatch, providerName, model), "recovered", providerName, model);
893
730
  }
894
731
  const errorPath = batchErrorPath(batchResultsDir, cachedBatch);
895
732
  if (exists(errorPath))
@@ -902,11 +739,19 @@ export async function commandInit(opts) {
902
739
  }
903
740
  const batchResults = [];
904
741
  const skippedBatches = [];
742
+ const terminalSkipped = [];
905
743
  const pending = [];
906
744
  for (const batch of pendingBatches) {
907
- const cachedBatch = batchCheckpointReused
908
- ? loadCheckpointedBatch(sourceText, batchResultsDir, batch, providerName, model, previousProvider)
909
- : null;
745
+ const bHash = computeUnitHash(sourceText, batch, providerName, model);
746
+ // A terminal failure (content filter) with the same input hash will fail the
747
+ // same way — skip it instead of re-calling the provider, unless --retry-terminal
748
+ // or the source/provider changed (which rotates the hash).
749
+ const meta = readUnitMeta(batchMetaPath(batchResultsDir, batch));
750
+ if (!retryTerminal && meta && meta["status"] === "terminal" && meta["input_hash"] === bHash) {
751
+ terminalSkipped.push(batchResultKey(batch));
752
+ continue;
753
+ }
754
+ const cachedBatch = loadCachedBatch(sourceText, batchResultsDir, batch, bHash);
910
755
  if (cachedBatch !== null) {
911
756
  cachedBatch["_batch_id"] = batchResultKey(batch);
912
757
  cachedBatch["_batch_part"] = Number(batch["part"]);
@@ -933,12 +778,12 @@ export async function commandInit(opts) {
933
778
  result["_starts_inside_scene"] = Boolean(batch["starts_inside_scene"]);
934
779
  batchResults.push(result);
935
780
  persistBatchResult(batchResultsDir, batch, result);
936
- updateBatchResultMetadata(batchResultsDir, batch, providerName, model);
781
+ stampBatchMeta(batchResultsDir, batch, computeUnitHash(sourceText, batch, providerName, model), "extracted", providerName, model);
937
782
  if (exists(errorPath))
938
783
  deletePath(errorPath);
939
784
  }
940
785
  else {
941
- failures.push(writeBatchFailure(batchResultsDir, batch, outcome.error));
786
+ failures.push(writeBatchFailure(batchResultsDir, batch, outcome.error, computeUnitHash(sourceText, batch, providerName, model), providerName, model));
942
787
  }
943
788
  }
944
789
  results.sort((a, b) => Number(a["episode"] ?? 0) - Number(b["episode"] ?? 0));
@@ -957,82 +802,94 @@ export async function commandInit(opts) {
957
802
  return Number(a["part"] ?? 0) - Number(b["part"] ?? 0);
958
803
  });
959
804
  const completedBatches = skippedEpisodeBatchCount + batchResults.length;
960
- if (failures.length > 0) {
961
- const failedEpisodes = [...new Set(failures.map((it) => Number(it["episode"])))].sort((a, b) => a - b);
962
- const failedBatches = failures.map((it) => strOf(it["batch_id"]));
963
- const currentFailureSignature = failureSignature(failedBatches);
964
- const previousFailureSignature = failureSignature(previousState["failed_batches"]);
965
- const sameFailuresRepeated = checkpointReused &&
966
- batchCheckpointReused &&
967
- currentFailureSignature.length > 0 &&
968
- currentFailureSignature.length === previousFailureSignature.length &&
969
- currentFailureSignature.every((v, idx) => v === previousFailureSignature[idx]) &&
970
- ["init_incomplete", "init_stalled"].includes(strOf(previousState["status"]));
971
- const previousFailureStreak = normalizeInt(previousState["failure_streak"], 0);
972
- const failureStreak = sameFailuresRepeated ? previousFailureStreak + 1 : 1;
973
- const failureTitle = sameFailuresRepeated
974
- ? "INIT STALLED: Same batches keep failing"
975
- : "INIT INCOMPLETE: Batch extraction failed";
976
- const nextSteps = sameFailuresRepeated
977
- ? [
978
- "Run direct inspect --target issue to read failed batch details.",
979
- "Do not rerun the same init command again until source, batch options, provider, or failed content has changed.",
980
- ]
981
- : [
982
- "Run direct inspect --target issue to review failed batches.",
983
- "Rerun the same init once if failures look transient; completed checkpoints will be reused.",
984
- ];
985
- const failedEpisodeSet = new Set(failedEpisodes);
986
- const skippedSet = new Set(skipped);
987
- const batchResultsByEpisode = new Map();
988
- for (const result of batchResults) {
989
- const ep = Number(result["episode"] ?? 0);
990
- if (!batchResultsByEpisode.has(ep))
991
- batchResultsByEpisode.set(ep, []);
992
- batchResultsByEpisode.get(ep).push(result);
993
- }
805
+ const transientFailures = failures.filter((it) => !it["terminal"]);
806
+ const terminalFailures = failures.filter((it) => Boolean(it["terminal"]));
807
+ const skippedSet = new Set(skipped);
808
+ // Merge every fully-completed, non-cached episode into an episode_results
809
+ // checkpoint. Episodes still missing a batch (a failure this run, or a batch
810
+ // a prior run marked terminal and we skipped) are left unmerged so a rerun or
811
+ // an override can complete them.
812
+ const batchResultsByEpisode = new Map();
813
+ for (const result of batchResults) {
814
+ const ep = Number(result["episode"] ?? 0);
815
+ if (!batchResultsByEpisode.has(ep))
816
+ batchResultsByEpisode.set(ep, []);
817
+ batchResultsByEpisode.get(ep).push(result);
818
+ }
819
+ try {
994
820
  for (const episode of asList(plan["episodes"])) {
995
821
  const episodeNum = Number(episode["episode"]);
996
- if (skippedSet.has(episodeNum) || failedEpisodeSet.has(episodeNum))
822
+ if (skippedSet.has(episodeNum))
997
823
  continue;
998
824
  const expectedBatches = (batchesByEpisode.get(episodeNum) ?? []).length;
999
- if (expectedBatches && (batchResultsByEpisode.get(episodeNum) ?? []).length === expectedBatches) {
1000
- const result = mergeBatchResultsForEpisode(episode, batchResultsByEpisode.get(episodeNum) ?? []);
1001
- validateEpisodeExtractionQuality(sourceText, episode, result);
1002
- results.push(result);
1003
- writeJson(episodeResultPath(episodeResultsDir, episode), compactEpisodeResult(result));
1004
- updateEpisodeResultMetadata(episodeResultsDir, episode, providerName, model);
1005
- }
825
+ if (!expectedBatches || (batchResultsByEpisode.get(episodeNum) ?? []).length !== expectedBatches)
826
+ continue;
827
+ const result = mergeBatchResultsForEpisode(episode, batchResultsByEpisode.get(episodeNum) ?? []);
828
+ validateEpisodeExtractionQuality(sourceText, episode, result);
829
+ results.push(result);
830
+ writeJson(episodeResultPath(episodeResultsDir, episode), compactEpisodeResult(result));
831
+ stampEpisodeMeta(episodeResultsDir, episode, computeUnitHash(sourceText, episode, providerName, model), "extracted", providerName, model);
832
+ const errorPath = episodeErrorPath(episodeResultsDir, episode);
833
+ if (exists(errorPath))
834
+ deletePath(errorPath);
1006
835
  }
836
+ }
837
+ catch (exc) {
838
+ const e = exc;
839
+ throw initFailedReport(workspace, {
840
+ title: "INIT FAILED: Episode merge failed",
841
+ stage: "episode_merge",
842
+ required: ["complete batch_results/*.json that can merge into episode_results/*.json"],
843
+ received: [`${e?.name ?? "Error"}: ${(e?.message ?? "").slice(0, 160)}`],
844
+ nextSteps: ["Rerun init; completed batch checkpoints will be reused and episode merge will retry."],
845
+ updates: { batch_completed: completedBatches },
846
+ });
847
+ }
848
+ results.sort((a, b) => Number(a["episode"] ?? 0) - Number(b["episode"] ?? 0));
849
+ // Classify episodes that could not be assembled. An episode blocked by ANY
850
+ // transient batch (timeout/5xx) can still complete on rerun → it blocks init.
851
+ // An episode blocked only by terminal (content-filtered) batches is held out:
852
+ // the rest of the script ships, and the operator overrides the blocked unit.
853
+ const completedEpisodeNums = new Set(results.map((r) => Number(r["episode"])));
854
+ const transientEpisodeSet = new Set(transientFailures.map((it) => Number(it["episode"])));
855
+ const incompleteEpisodes = asList(plan["episodes"]).map((ep) => Number(ep["episode"])).filter((n) => !completedEpisodeNums.has(n));
856
+ const transientBlocked = incompleteEpisodes.filter((n) => transientEpisodeSet.has(n)).sort((a, b) => a - b);
857
+ const heldOutEpisodes = incompleteEpisodes.filter((n) => !transientEpisodeSet.has(n)).sort((a, b) => a - b);
858
+ if (transientBlocked.length > 0) {
1007
859
  updateRunState(workspace, {
1008
- status: sameFailuresRepeated ? "init_stalled" : "init_incomplete",
860
+ status: "init_incomplete",
1009
861
  init_stage: "batch_extract",
1010
- checkpoint,
1011
- batch_checkpoint: batchCheckpoint,
1012
862
  episode_total: asList(plan["episodes"]).length,
1013
863
  episode_completed: results.length,
1014
864
  episode_reused: skipped.length,
1015
- episode_failed: failedEpisodes.length,
1016
- failed_episodes: failedEpisodes,
865
+ episode_failed: incompleteEpisodes.length,
866
+ failed_episodes: transientBlocked,
867
+ held_out_episodes: heldOutEpisodes,
1017
868
  batch_total: asList(batchPlan["batches"]).length,
1018
869
  batch_completed: completedBatches,
1019
870
  batch_reused: skippedEpisodeBatchCount + skippedBatches.length,
1020
871
  batch_failed: failures.length,
1021
- failed_batches: failedBatches,
1022
- failure_signature: currentFailureSignature,
1023
- failure_streak: failureStreak,
1024
- last_error: { title: failureTitle, failed_at: checkpointTimestamp() },
1025
- exportable: false,
872
+ batch_terminal: terminalFailures.length,
873
+ transient_failed_batches: transientFailures.map((it) => strOf(it["batch_id"])),
874
+ terminal_failed_batches: terminalFailures.map((it) => strOf(it["batch_id"])),
875
+ last_error: { title: "INIT INCOMPLETE: Batch extraction failed", failed_at: checkpointTimestamp() },
1026
876
  });
1027
- const issues = failures.slice(0, 5).map((it) => `${it["batch_id"]} episode ${it["episode"]} part ${it["part"]}: ${it["error_type"]} - ${it["message"]}`);
877
+ const issues = failures.slice(0, 5).map((it) => `${it["batch_id"]} episode ${it["episode"]} part ${it["part"]} [${it["terminal"] ? "terminal" : "transient"}]: ${it["error_type"]} - ${it["message"]}`);
878
+ const next = [
879
+ "Run direct inspect --target issue to review failed batches.",
880
+ "Rerun the same init to retry transient failures; completed units are reused.",
881
+ ];
882
+ if (terminalFailures.length > 0) {
883
+ next.push("Terminal (content-filtered) batches will not clear on retry — use `direct override <unit> --from <file>` or soften the source.");
884
+ }
1028
885
  const report = {
1029
- title: failureTitle,
886
+ title: "INIT INCOMPLETE: Batch extraction failed",
1030
887
  result: [
1031
888
  `episodes total: ${asList(plan["episodes"]).length}`,
1032
889
  `completed: ${results.length}`,
1033
890
  `reused: ${skipped.length}`,
1034
- `failed episodes: ${failedEpisodes.length}`,
1035
- `batches: ${completedBatches}/${asList(batchPlan["batches"]).length} completed, ${failures.length} failed`,
891
+ `held out (terminal): ${heldOutEpisodes.length}`,
892
+ `batches: ${completedBatches}/${asList(batchPlan["batches"]).length} completed, ${transientFailures.length} transient, ${terminalFailures.length} terminal`,
1036
893
  `provider: ${providerName}`,
1037
894
  ],
1038
895
  artifacts: [
@@ -1045,81 +902,49 @@ export async function commandInit(opts) {
1045
902
  path.join(dd, "run_state.json"),
1046
903
  ],
1047
904
  issues,
1048
- next: nextSteps,
905
+ next,
1049
906
  };
1050
907
  return [report, EXIT_RUNTIME];
1051
908
  }
1052
909
  updateRunState(workspace, {
1053
910
  status: "init_running",
1054
911
  init_stage: "episode_merge",
1055
- checkpoint,
1056
- batch_checkpoint: batchCheckpoint,
1057
912
  episode_total: asList(plan["episodes"]).length,
1058
913
  episode_completed: results.length,
1059
914
  episode_reused: skipped.length,
1060
915
  episode_failed: 0,
1061
916
  failed_episodes: [],
917
+ held_out_episodes: heldOutEpisodes,
1062
918
  batch_total: asList(batchPlan["batches"]).length,
1063
919
  batch_completed: completedBatches,
1064
920
  batch_reused: skippedEpisodeBatchCount + skippedBatches.length,
1065
- batch_failed: 0,
1066
- failed_batches: [],
1067
- failure_signature: [],
1068
- failure_streak: 0,
921
+ batch_failed: terminalFailures.length,
922
+ batch_terminal: terminalFailures.length,
1069
923
  last_error: null,
1070
924
  });
925
+ // Drop transient/cleared error markers, but KEEP terminal ones so `direct
926
+ // status` and export gating can see which episodes are held out.
1071
927
  for (const dir of [batchResultsDir, episodeResultsDir]) {
1072
928
  if (!exists(dir))
1073
929
  continue;
1074
930
  for (const name of fs.readdirSync(dir)) {
1075
- if (name.endsWith(".error.json")) {
1076
- try {
1077
- deletePath(path.join(dir, name));
1078
- }
1079
- catch {
1080
- // ignore
1081
- }
1082
- }
1083
- }
1084
- }
1085
- try {
1086
- const batchResultsByEpisode = new Map();
1087
- for (const result of batchResults) {
1088
- const ep = Number(result["episode"] ?? 0);
1089
- if (!batchResultsByEpisode.has(ep))
1090
- batchResultsByEpisode.set(ep, []);
1091
- batchResultsByEpisode.get(ep).push(result);
1092
- }
1093
- const skippedSet = new Set(skipped);
1094
- for (const episode of asList(plan["episodes"])) {
1095
- const episodeNum = Number(episode["episode"]);
1096
- if (skippedSet.has(episodeNum))
931
+ if (!name.endsWith(".error.json"))
1097
932
  continue;
1098
- const result = mergeBatchResultsForEpisode(episode, batchResultsByEpisode.get(episodeNum) ?? []);
1099
- validateEpisodeExtractionQuality(sourceText, episode, result);
1100
- results.push(result);
1101
- writeJson(episodeResultPath(episodeResultsDir, episode), compactEpisodeResult(result));
1102
- updateEpisodeResultMetadata(episodeResultsDir, episode, providerName, model);
1103
- const errorPath = episodeErrorPath(episodeResultsDir, episode);
1104
- if (exists(errorPath))
1105
- deletePath(errorPath);
933
+ const errPath = path.join(dir, name);
934
+ try {
935
+ const err = readJson(errPath);
936
+ if (!isDict(err) || !err["terminal"])
937
+ deletePath(errPath);
938
+ }
939
+ catch {
940
+ deletePath(errPath);
941
+ }
1106
942
  }
1107
943
  }
1108
- catch (exc) {
1109
- const e = exc;
1110
- throw initFailedReport(workspace, {
1111
- title: "INIT FAILED: Episode merge failed",
1112
- stage: "episode_merge",
1113
- required: ["complete batch_results/*.json that can merge into episode_results/*.json"],
1114
- received: [`${e?.name ?? "Error"}: ${(e?.message ?? "").slice(0, 160)}`],
1115
- nextSteps: ["Rerun init; completed batch checkpoints will be reused and episode merge will retry."],
1116
- updates: { checkpoint, batch_checkpoint: batchCheckpoint, batch_completed: completedBatches },
1117
- });
1118
- }
1119
944
  results.sort((a, b) => Number(a["episode"] ?? 0) - Number(b["episode"] ?? 0));
1120
945
  let script;
1121
946
  try {
1122
- updateRunState(workspace, { status: "init_running", init_stage: "script_merge", checkpoint, batch_checkpoint: batchCheckpoint });
947
+ updateRunState(workspace, { status: "init_running", init_stage: "script_merge" });
1123
948
  script = mergeEpisodeResults(results, strOf(info["projectName"]) || path.basename(source, path.extname(source)));
1124
949
  }
1125
950
  catch (exc) {
@@ -1130,11 +955,11 @@ export async function commandInit(opts) {
1130
955
  required: ["complete episode_results/*.json"],
1131
956
  received: [`${e?.name ?? "Error"}: ${(e?.message ?? "").slice(0, 160)}`],
1132
957
  nextSteps: ["Rerun init; completed episode extraction checkpoints will be reused and merge will retry."],
1133
- updates: { checkpoint, batch_checkpoint: batchCheckpoint, episode_completed: results.length },
958
+ updates: { episode_completed: results.length },
1134
959
  });
1135
960
  }
1136
961
  try {
1137
- updateRunState(workspace, { status: "init_running", init_stage: "asset_curation", checkpoint, batch_checkpoint: batchCheckpoint });
962
+ updateRunState(workspace, { status: "init_running", init_stage: "asset_curation" });
1138
963
  const rawCuration = await providerExtractAssetCurationLocal(provider, sourceText, script);
1139
964
  const curation = curateScriptAssets(script, rawCuration);
1140
965
  writeJson(path.join(dd, "asset_curation.json"), curation);
@@ -1148,7 +973,7 @@ export async function commandInit(opts) {
1148
973
  required: exc.required.length > 0 ? exc.required : ["asset curation JSON matching final script contract"],
1149
974
  received: exc.received.length > 0 ? exc.received : [String(exc.message).slice(0, 160)],
1150
975
  nextSteps: exc.nextSteps.length > 0 ? exc.nextSteps : ["Rerun init; extraction checkpoints will be reused and asset curation will retry."],
1151
- updates: { checkpoint, batch_checkpoint: batchCheckpoint, episode_completed: results.length },
976
+ updates: { episode_completed: results.length },
1152
977
  });
1153
978
  }
1154
979
  const e = exc;
@@ -1158,11 +983,11 @@ export async function commandInit(opts) {
1158
983
  required: ["provider location merge decisions and deterministic asset reuse curation"],
1159
984
  received: [`${e?.name ?? "Error"}: ${(e?.message ?? "").slice(0, 160)}`],
1160
985
  nextSteps: ["Rerun init; extraction checkpoints will be reused and asset curation will retry."],
1161
- updates: { checkpoint, batch_checkpoint: batchCheckpoint, episode_completed: results.length },
986
+ updates: { episode_completed: results.length },
1162
987
  });
1163
988
  }
1164
989
  try {
1165
- updateRunState(workspace, { status: "init_running", init_stage: "metadata_extract", checkpoint, batch_checkpoint: batchCheckpoint });
990
+ updateRunState(workspace, { status: "init_running", init_stage: "metadata_extract" });
1166
991
  let metadata = provider.extractMetadata ? await provider.extractMetadata(sourceText, script) : {};
1167
992
  if (!isDict(metadata))
1168
993
  metadata = {};
@@ -1178,7 +1003,7 @@ export async function commandInit(opts) {
1178
1003
  required: exc.required.length > 0 ? exc.required : ["metadata JSON matching final script contract"],
1179
1004
  received: exc.received.length > 0 ? exc.received : [String(exc.message).slice(0, 160)],
1180
1005
  nextSteps: exc.nextSteps.length > 0 ? exc.nextSteps : ["Rerun init; extraction checkpoints will be reused and metadata will retry."],
1181
- updates: { checkpoint, batch_checkpoint: batchCheckpoint, episode_completed: results.length },
1006
+ updates: { episode_completed: results.length },
1182
1007
  });
1183
1008
  }
1184
1009
  const e = exc;
@@ -1188,12 +1013,12 @@ export async function commandInit(opts) {
1188
1013
  required: ["provider metadata for worldview, role_type, and asset descriptions"],
1189
1014
  received: [`${e?.name ?? "Error"}: ${(e?.message ?? "").slice(0, 160)}`],
1190
1015
  nextSteps: ["Rerun init; extraction checkpoints will be reused and metadata will retry."],
1191
- updates: { checkpoint, batch_checkpoint: batchCheckpoint, episode_completed: results.length },
1016
+ updates: { episode_completed: results.length },
1192
1017
  });
1193
1018
  }
1194
1019
  const scriptPath = path.join(dd, "script.initial.json");
1195
1020
  writeJson(scriptPath, script);
1196
- updateRunState(workspace, { status: "init_running", init_stage: "validate", checkpoint, batch_checkpoint: batchCheckpoint });
1021
+ updateRunState(workspace, { status: "init_running", init_stage: "validate" });
1197
1022
  let validation;
1198
1023
  try {
1199
1024
  validation = validateScript(workspace, scriptPath);
@@ -1206,7 +1031,7 @@ export async function commandInit(opts) {
1206
1031
  required: ["script.initial.json that can be validated"],
1207
1032
  received: [`${e?.name ?? "Error"}: ${(e?.message ?? "").slice(0, 160)}`],
1208
1033
  nextSteps: ["Rerun init to retry validation, or inspect script.initial.json if the failure persists."],
1209
- updates: { checkpoint, script_path: scriptPath },
1034
+ updates: { script_path: scriptPath },
1210
1035
  });
1211
1036
  }
1212
1037
  const passed = Boolean(validation["passed"]);
@@ -1215,10 +1040,6 @@ export async function commandInit(opts) {
1215
1040
  status,
1216
1041
  command: "direct init",
1217
1042
  init_stage: "complete",
1218
- checkpoint,
1219
- batch_checkpoint: batchCheckpoint,
1220
- checkpoint_reused: checkpointReused,
1221
- batch_checkpoint_reused: batchCheckpointReused,
1222
1043
  provider: providerName,
1223
1044
  model,
1224
1045
  concurrency,
@@ -1234,19 +1055,17 @@ export async function commandInit(opts) {
1234
1055
  episode_reused: skipped.length,
1235
1056
  episode_failed: 0,
1236
1057
  failed_episodes: [],
1058
+ held_out_episodes: heldOutEpisodes,
1237
1059
  batch_total: asList(batchPlan["batches"]).length,
1238
1060
  batch_completed: completedBatches,
1239
1061
  batch_reused: skippedEpisodeBatchCount + skippedBatches.length,
1240
- batch_failed: 0,
1241
- failed_batches: [],
1242
- failure_signature: [],
1243
- failure_streak: 0,
1062
+ batch_failed: terminalFailures.length,
1063
+ batch_terminal: terminalFailures.length,
1244
1064
  last_error: null,
1245
1065
  review_status: "pending",
1246
1066
  review_missing: [...REVIEW_TARGETS],
1247
1067
  inspected_targets: [],
1248
1068
  patch_count: 0,
1249
- exportable: providerName !== "mock",
1250
1069
  });
1251
1070
  const title = passed
1252
1071
  ? "INIT COMPLETE: Initial script ready"
@@ -1260,9 +1079,9 @@ export async function commandInit(opts) {
1260
1079
  `actions: ${stats["actions"] ?? 0}`,
1261
1080
  `validation: ${passed ? "passed" : "needs repair"}`,
1262
1081
  `provider: ${providerName}`,
1263
- `episode checkpoint reused: ${skipped.length}`,
1082
+ `episodes reused: ${skipped.length}`,
1264
1083
  `batches: ${completedBatches}/${asList(batchPlan["batches"]).length} completed`,
1265
- `batch checkpoint reused: ${skippedEpisodeBatchCount + skippedBatches.length}`,
1084
+ `batches reused: ${skippedEpisodeBatchCount + skippedBatches.length}`,
1266
1085
  "agent_review: pending",
1267
1086
  ],
1268
1087
  artifacts: [
@@ -1288,277 +1107,217 @@ export async function commandInit(opts) {
1288
1107
  };
1289
1108
  return [report, passed ? EXIT_OK : EXIT_NEEDS_AGENT];
1290
1109
  }
1291
- export function summarizeIssues(issues) {
1292
- if (issues.length === 0)
1293
- return [];
1294
- const counts = {};
1295
- for (const item of issues) {
1296
- const sev = strOf(item["severity"]);
1297
- counts[sev] = (counts[sev] ?? 0) + 1;
1298
- }
1299
- const parts = Object.entries(counts).sort(([a], [b]) => a.localeCompare(b)).map(([sev, c]) => `${sev}: ${c}`);
1300
- const first = issues[0];
1301
- return [parts.join("; "), `first: ${first["code"]} - ${first["summary"]}`];
1302
- }
1303
1110
  // ---------------------------------------------------------------------------
1304
- // command_parsesubagent-authored md workspace script.initial.json
1305
- //
1306
- // Deterministic, no-LLM counterpart of `direct init`, but a DISTINCT layout from
1307
- // write/direct: the 正文 md carries only the screenplay (no asset registration at
1308
- // all), and each asset KIND is registered in its own file —
1309
- // 人物.md / 场景.md / 道具.md / 发声源.md (+ optional 梗概.md for the whole-script
1310
- // synopsis). It assembles the same script.initial.json and hands off to the
1311
- // existing direct inspect/validate/export downstream (zero changes there).
1111
+ // command_overrideinject a human extraction for a unit the provider can't
1112
+ // produce (content-filtered). The override is content-addressed exactly like a
1113
+ // provider result, so init reuses it and never re-calls the provider, and the
1114
+ // non-destructive GC never deletes it. We compute the input_hash from the plan
1115
+ // ourselves, so the operator never hand-edits source_span.
1312
1116
  // ---------------------------------------------------------------------------
1313
- const _EP_FILE_RE = /^ep[_-]?0*(\d+)\.(?:md|markdown)$/i;
1314
- const ASSET_DOC_SPECS = [
1315
- { kind: "actors", names: ["人物.md", "角色.md", "characters.md", "actors.md"] },
1316
- { kind: "locations", names: ["场景.md", "地点.md", "locations.md"] },
1317
- { kind: "props", names: ["道具.md", "props.md"] },
1318
- { kind: "speakers", names: ["发声源.md", "speakers.md"] },
1319
- ];
1320
- const SYNOPSIS_DOC_NAMES = ["梗概.md", "全文梗概.md", "synopsis.md"];
1321
- const META_DOC_NAMES = ["元信息.md", "meta.md"];
1322
- // Parse whole-script metadata from 元信息.md. Lines like `worldview: 现代` /
1323
- // `世界观:现代` / `- style: 都市甜宠` / `主角: 陈墨, 苏晴`.
1324
- function parseMetaDoc(text) {
1325
- const out = {};
1326
- for (const line of text.split(/\r?\n/)) {
1327
- const m = /^\s*[-*]?\s*(title|worldview|style|protagonists|标题|世界观|风格|主角|主角列表)\s*[::]\s*(.+?)\s*$/i.exec(line);
1328
- if (!m)
1329
- continue;
1330
- const key = m[1].toLowerCase();
1331
- const val = m[2].trim();
1332
- if (!val)
1333
- continue;
1334
- if (key === "title" || key === "标题")
1335
- out.title = val;
1336
- else if (key === "worldview" || key === "世界观")
1337
- out.worldview = val;
1338
- else if (key === "style" || key === "风格")
1339
- out.style = val;
1340
- else if (key === "protagonists" || key === "主角" || key === "主角列表") {
1341
- out.protagonists = val.split(/[,,、]/).map((s) => s.trim()).filter(Boolean);
1342
- }
1117
+ export function commandOverride(opts) {
1118
+ const workspace = strOf(opts["workspace_path"] || "workspace");
1119
+ const unit = strOf(asList(opts["_args"])[0]).trim();
1120
+ const fromPath = strOf(opts["from"]).trim();
1121
+ const dd = directDir(workspace);
1122
+ const state = readRunState(workspace);
1123
+ const providerName = strOf(opts["provider"] || state["provider"] || DEFAULT_PROVIDER);
1124
+ const model = strOf(opts["model"] || state["model"] || DEFAULT_MODEL);
1125
+ const isEpisode = /^ep_\d+$/.test(unit);
1126
+ if (!isEpisode && !/^bat_\d+$/.test(unit)) {
1127
+ throw new CliError("OVERRIDE BLOCKED: Invalid unit", "Invalid unit key.", {
1128
+ exitCode: EXIT_USAGE,
1129
+ required: ["<unit>: ep_NNN or bat_NNNN"],
1130
+ received: [`<unit>: ${unit || "<empty>"}`],
1131
+ nextSteps: ["Pass an episode (ep_007) or batch (bat_0012) key shown by direct status."],
1132
+ });
1343
1133
  }
1344
- return out;
1345
- }
1346
- function firstExisting(dir, names) {
1347
- for (const n of names) {
1348
- const p = path.join(dir, n);
1349
- if (exists(p) && fs.statSync(p).isFile())
1350
- return p;
1134
+ if (!fromPath || !exists(fromPath)) {
1135
+ throw new CliError("OVERRIDE BLOCKED: --from not found", "Override source file not found.", {
1136
+ exitCode: EXIT_INPUT,
1137
+ required: ["--from <path>: readable JSON extraction for the unit"],
1138
+ received: [`--from: ${fromPath || "<missing>"}`],
1139
+ nextSteps: ["Provide a JSON file with scenes/actions for the unit."],
1140
+ });
1351
1141
  }
1352
- return null;
1353
- }
1354
- function collectEpisodeMdFiles(dir) {
1355
- if (!exists(dir) || !fs.statSync(dir).isDirectory())
1356
- return [];
1357
- const out = [];
1358
- for (const name of fs.readdirSync(dir)) {
1359
- const m = _EP_FILE_RE.exec(name);
1360
- if (!m)
1361
- continue;
1362
- const full = path.join(dir, name);
1363
- if (!fs.statSync(full).isFile())
1364
- continue;
1365
- out.push({ path: full, episode: parseInt(m[1], 10) });
1142
+ const planPath = path.join(dd, isEpisode ? "episode_plan.json" : "batch_plan.json");
1143
+ if (!exists(planPath)) {
1144
+ throw new CliError("OVERRIDE BLOCKED: Plan not found", "Plan not found.", {
1145
+ exitCode: EXIT_INPUT,
1146
+ required: [isEpisode ? "episode_plan.json" : "batch_plan.json"],
1147
+ received: [planPath],
1148
+ nextSteps: ["Run scriptctl direct init first."],
1149
+ });
1366
1150
  }
1367
- out.sort((a, b) => a.episode - b.episode);
1368
- return out;
1369
- }
1370
- export async function commandParse(opts) {
1371
- if (opts["spec"]) {
1372
- return [{ title: "PARSE SPEC: md 工作区写法", body: PARSE_MD_SPEC }, EXIT_OK];
1151
+ const plan = readJson(planPath);
1152
+ const planUnits = asList(plan[isEpisode ? "episodes" : "batches"]);
1153
+ const planItem = planUnits.find((u) => (isEpisode ? episodeResultKey(u) : batchResultKey(u)) === unit) ?? null;
1154
+ if (!planItem) {
1155
+ throw new CliError("OVERRIDE BLOCKED: Unit not in current plan", "Unit not in current plan.", {
1156
+ exitCode: EXIT_INPUT,
1157
+ required: [`${unit} present in ${isEpisode ? "episode_plan.json" : "batch_plan.json"}`],
1158
+ received: [`${unit}: not found among ${planUnits.length} units`],
1159
+ nextSteps: ["Use a unit key from direct status; rerun init if the plan changed."],
1160
+ });
1373
1161
  }
1374
- const workspace = strOf(opts["workspace_path"] || "workspace");
1375
- const args = asList(opts["_args"]);
1376
- const mdDir = strOf(opts["md_dir"] || args[0] || path.join(workspace, "parse"));
1377
- if (!exists(mdDir) || !fs.statSync(mdDir).isDirectory()) {
1378
- throw new CliError("PARSE BLOCKED: md workspace not found", "md workspace not found.", {
1162
+ const sourceTextPath = path.join(workspace, "source.txt");
1163
+ if (!exists(sourceTextPath)) {
1164
+ throw new CliError("OVERRIDE BLOCKED: source.txt missing", "source.txt missing.", {
1379
1165
  exitCode: EXIT_INPUT,
1380
- required: ["a directory with per-episode 正文 md + 人物/场景/道具/发声源 md"],
1381
- received: [mdDir],
1382
- nextSteps: ["Pass the md workspace dir: scriptctl parse <dir>. Run `scriptctl parse --spec` for the format."],
1166
+ required: [sourceTextPath],
1167
+ received: ["<missing>"],
1168
+ nextSteps: ["Run scriptctl direct init first."],
1383
1169
  });
1384
1170
  }
1385
- let episodesDir = strOf(opts["episodes_dir"]).trim();
1386
- if (!episodesDir) {
1387
- const sub = path.join(mdDir, "episodes");
1388
- episodesDir = exists(sub) && fs.statSync(sub).isDirectory() ? sub : mdDir;
1171
+ const sourceText = readText(sourceTextPath);
1172
+ let data;
1173
+ try {
1174
+ data = readJson(fromPath);
1389
1175
  }
1390
- const bodyFiles = collectEpisodeMdFiles(episodesDir);
1391
- if (bodyFiles.length === 0) {
1392
- throw new CliError("PARSE BLOCKED: no episode md found", "no episode md found.", {
1176
+ catch (exc) {
1177
+ throw new CliError("OVERRIDE BLOCKED: --from invalid JSON", "Override JSON invalid.", {
1393
1178
  exitCode: EXIT_INPUT,
1394
- required: ["per-episode body md named like ep_001.md"],
1395
- received: [episodesDir],
1396
- nextSteps: ["Add per-episode 正文 md (ep_001.md, ep_002.md, ...). Run `scriptctl parse --spec` for the format."],
1179
+ required: ["valid extraction JSON"],
1180
+ received: [`${fromPath}: ${exc.message}`],
1181
+ nextSteps: ["Fix the JSON and retry."],
1397
1182
  });
1398
1183
  }
1399
- // Each asset KIND is registered in its own file; the 正文 md carries none.
1400
- // Fold them all into one bible fragment (per-kind arrays).
1401
- const bible = { actors: [], locations: [], props: [], speakers: [], state_definitions: [] };
1402
- const assetDocsFound = [];
1403
- for (const spec of ASSET_DOC_SPECS) {
1404
- const p = firstExisting(mdDir, spec.names);
1405
- if (!p)
1406
- continue;
1407
- assetDocsFound.push(path.basename(p));
1408
- const parsed = parseAssetDoc(readText(p), spec.kind);
1409
- for (const key of ["actors", "locations", "props", "speakers", "state_definitions"]) {
1410
- bible[key].push(...asList(parsed[key]));
1411
- }
1184
+ const result = normalizeEpisodeResult(data, planItem);
1185
+ if (Number(result["episode"]) !== Number(planItem["episode"])) {
1186
+ throw new CliError("OVERRIDE BLOCKED: Episode mismatch", "Episode mismatch.", {
1187
+ exitCode: EXIT_USAGE,
1188
+ required: [`episode ${Number(planItem["episode"])}`],
1189
+ received: [`episode ${Number(result["episode"])}`],
1190
+ nextSteps: ["Provide an extraction for the correct episode."],
1191
+ });
1412
1192
  }
1413
- // Optional whole-script synopsis (梗概.md); strip a leading `# 梗概` header line.
1414
- let globalSynopsis = "";
1415
- const synPath = firstExisting(mdDir, SYNOPSIS_DOC_NAMES);
1416
- if (synPath)
1417
- globalSynopsis = readText(synPath).replace(/^\s*#\s+\S[^\n]*\n/, "").trim();
1418
- // Optional whole-script metadata (元信息.md): worldview / style / title.
1419
- const metaPath = firstExisting(mdDir, META_DOC_NAMES);
1420
- const meta = metaPath ? parseMetaDoc(readText(metaPath)) : {};
1421
- const results = [];
1422
- const sourceChunks = [];
1423
- for (const file of bodyFiles) {
1424
- const bodyText = readText(file.path);
1425
- sourceChunks.push(`# ep_${pad3(file.episode)}\n${bodyText.trim()}`);
1426
- try {
1427
- results.push(parseMarkdownBatch(bodyText, { episode: file.episode, part: 1 }, { fragmentMode: true }));
1428
- }
1429
- catch (exc) {
1430
- const e = exc;
1431
- throw new CliError("PARSE BLOCKED: episode md invalid", "episode md invalid.", {
1432
- exitCode: EXIT_INPUT,
1433
- required: ["per-episode 正文 md following `scriptctl parse --spec`"],
1434
- received: [`${path.basename(file.path)}: ${(e?.message ?? "").slice(0, 200)}`],
1435
- nextSteps: ["Fix the episode md and re-run parse."],
1436
- });
1437
- }
1193
+ try {
1194
+ if (isEpisode)
1195
+ validateEpisodeExtractionQuality(sourceText, planItem, result);
1196
+ else
1197
+ validateBatchExtractionQuality(sourceText, planItem, result);
1438
1198
  }
1439
- results.sort((a, b) => Number(a["episode"] ?? 0) - Number(b["episode"] ?? 0));
1440
- // Fold the registered assets into the first episode result so their
1441
- // descriptions / states flow into the merge. Names are deduplicated globally
1442
- // by mergeEpisodeResults, so registering them first gives the canonical
1443
- // (registry) descriptions priority over anything implied by scene references.
1444
- if (results.length > 0) {
1445
- const first = results[0];
1446
- for (const key of ["actors", "locations", "props", "speakers", "state_definitions"]) {
1447
- first[key] = [...asList(bible[key]), ...asList(first[key])];
1448
- }
1199
+ catch (exc) {
1200
+ if (exc instanceof CliError)
1201
+ throw exc;
1202
+ throw new CliError("OVERRIDE BLOCKED: Extraction invalid", "Extraction invalid.", {
1203
+ exitCode: EXIT_USAGE,
1204
+ required: ["valid action types (dialogue/inner_thought/action)"],
1205
+ received: [exc.message.slice(0, 160)],
1206
+ nextSteps: ["Fix the override extraction and retry."],
1207
+ });
1449
1208
  }
1450
- const title = (meta.title || strOf(opts["title"]).trim() || path.basename(path.resolve(mdDir)));
1451
- const script = mergeEpisodeResults(results, title);
1452
- if (globalSynopsis)
1453
- script["synopsis"] = globalSynopsis;
1454
- if (meta.worldview) {
1455
- script["worldview"] = meta.worldview;
1456
- script["worldview_raw"] = meta.worldview;
1457
- }
1458
- if (meta.style)
1459
- script["style"] = meta.style;
1460
- // role_type (主角/配角) is md-driven via 元信息.md `主角:`, not a json patch.
1461
- if (meta.protagonists && meta.protagonists.length > 0) {
1462
- const leads = new Set(meta.protagonists);
1463
- for (const actor of asList(script["actors"])) {
1464
- actor["role_type"] = leads.has(strOf(actor["actor_name"])) ? "主角" : "配角";
1465
- }
1209
+ const dir = path.join(dd, isEpisode ? "episode_results" : "batch_results");
1210
+ fs.mkdirSync(dir, { recursive: true });
1211
+ const hash = computeUnitHash(sourceText, planItem, providerName, model);
1212
+ if (isEpisode) {
1213
+ writeJson(episodeResultPath(dir, planItem), compactEpisodeResult(result));
1214
+ stampEpisodeMeta(dir, planItem, hash, "override", providerName, model);
1215
+ const errPath = episodeErrorPath(dir, planItem);
1216
+ if (exists(errPath))
1217
+ deletePath(errPath);
1218
+ }
1219
+ else {
1220
+ persistBatchResult(dir, planItem, result);
1221
+ stampBatchMeta(dir, planItem, hash, "override", providerName, model);
1222
+ const errPath = batchErrorPath(dir, planItem);
1223
+ if (exists(errPath))
1224
+ deletePath(errPath);
1466
1225
  }
1226
+ const report = {
1227
+ title: "OVERRIDE COMPLETE: Unit extraction injected",
1228
+ result: [
1229
+ `unit: ${unit}`,
1230
+ `kind: ${isEpisode ? "episode" : "batch"}`,
1231
+ `provenance: override`,
1232
+ `provider/model: ${providerName} / ${model}`,
1233
+ `scenes: ${asList(result["scenes"]).length}`,
1234
+ ],
1235
+ artifacts: [dir, path.join(dd, "run_state.json")],
1236
+ next: ["Rerun scriptctl direct init — the override is reused without re-calling the provider."],
1237
+ };
1238
+ return [report, EXIT_OK];
1239
+ }
1240
+ // ---------------------------------------------------------------------------
1241
+ // command_status — rebuild the progress view from on-disk meta/error sidecars.
1242
+ // run_state is just a cache of this; deleting it loses nothing.
1243
+ // ---------------------------------------------------------------------------
1244
+ export function commandStatus(opts) {
1245
+ const workspace = strOf(opts["workspace_path"] || "workspace");
1467
1246
  const dd = directDir(workspace);
1468
- fs.mkdirSync(dd, { recursive: true });
1469
- // Write source.txt so the existing direct validate/export downstream (which
1470
- // gates on source.txt existing) works unchanged. For a parse-origin script the
1471
- // authored md *is* the source, so we persist the concatenated bodies.
1472
- fs.mkdirSync(workspace, { recursive: true });
1473
- fs.writeFileSync(path.join(workspace, "source.txt"), sourceChunks.join("\n\n") + "\n", "utf-8");
1474
- const scriptPath = path.join(dd, "script.initial.json");
1475
- writeJson(scriptPath, script);
1476
- const validation = validateScript(workspace, scriptPath, { requireSource: false });
1477
- const passed = Boolean(validation["passed"]);
1478
- updateRunState(workspace, {
1479
- status: passed ? "ready_for_agent" : "needs_agent_repair",
1480
- command: "parse",
1481
- init_stage: "complete",
1482
- provider: "parse",
1483
- source_path: path.resolve(mdDir),
1484
- script_path: scriptPath,
1485
- validation_path: path.join(dd, "validation.json"),
1486
- episode_total: results.length,
1487
- episode_completed: results.length,
1488
- review_status: "pending",
1489
- review_missing: [...REVIEW_TARGETS],
1490
- inspected_targets: [],
1491
- patch_count: 0,
1492
- exportable: true,
1493
- last_error: null,
1494
- });
1495
- const stats = validation["stats"] ?? {};
1496
- const blockingOrError = Boolean(validation["has_blocking"]) ||
1497
- asList(validation["issues"]).some((it) => isDict(it) && (it["severity"] === "blocking" || it["severity"] === "error"));
1498
- // --publish: md → 校验 → 直接入库(不经 direct 的 inspect/review/export 门禁)。
1499
- // md 是唯一真相源:校验不过就报问题、不发布,让 agent 直接改 md 后重跑 parse --publish。
1500
- if (opts["publish"]) {
1501
- if (blockingOrError) {
1502
- return [{
1503
- title: "PARSE PUBLISH BLOCKED: 校验未过,改 md 后重跑",
1504
- result: [
1505
- `asset docs: ${assetDocsFound.join(" / ") || "(none)"}`,
1506
- `validation: needs repair`,
1507
- ],
1508
- artifacts: [scriptPath, path.join(dd, "validation.json")],
1509
- issues: summarizeIssues(asList(validation["issues"])),
1510
- next: ["按 issue 直接改对应的 ep_*.md / 人物·场景·道具·发声源.md / 元信息.md,再重跑 `scriptctl parse <dir> --publish`。"],
1511
- }, EXIT_NEEDS_AGENT];
1512
- }
1513
- const client = scriptOutputClient(opts);
1514
- const baseRevision = await currentRevisionOrZero(client);
1515
- const scriptHash = sha256Text(JSON.stringify(sortDeep(script)));
1516
- const requestId = strOf(opts["request_id"]).trim() || `scriptctl-parse:${scriptHash}`;
1517
- let replaceRes;
1518
- try {
1519
- replaceRes = await client.replaceScript({ requestId, baseRevision, script, source: "ctl" });
1247
+ const episodePlanPath = path.join(dd, "episode_plan.json");
1248
+ const batchPlanPath = path.join(dd, "batch_plan.json");
1249
+ if (!exists(episodePlanPath) || !exists(batchPlanPath)) {
1250
+ throw new CliError("STATUS BLOCKED: Plan not found", "Plan not found.", {
1251
+ exitCode: EXIT_INPUT,
1252
+ required: ["episode_plan.json and batch_plan.json"],
1253
+ received: [exists(episodePlanPath) ? "episode_plan.json ok" : "episode_plan.json missing"],
1254
+ nextSteps: ["Run scriptctl direct init first."],
1255
+ });
1256
+ }
1257
+ const episodes = asList(readJson(episodePlanPath)["episodes"]);
1258
+ const batches = asList(readJson(batchPlanPath)["batches"]);
1259
+ const episodeResultsDir = path.join(dd, "episode_results");
1260
+ const batchResultsDir = path.join(dd, "batch_results");
1261
+ const count = { ok: 0, override: 0, recovered: 0, terminal: 0, missing: 0 };
1262
+ for (const batch of batches) {
1263
+ const meta = readUnitMeta(batchMetaPath(batchResultsDir, batch));
1264
+ if (!meta) {
1265
+ count.missing++;
1266
+ continue;
1520
1267
  }
1521
- catch (exc) {
1522
- if (exc instanceof ScriptOutputApiError)
1523
- throw apiErrorToCli("PARSE PUBLISH BLOCKED: 入库写入失败", exc);
1524
- throw exc;
1268
+ if (meta["status"] === "terminal") {
1269
+ count.terminal++;
1270
+ continue;
1525
1271
  }
1526
- return [{
1527
- title: "PARSE PUBLISHED: 剧本已入库",
1528
- result: [
1529
- `episodes: ${stats["episodes"] ?? results.length}`,
1530
- `scenes: ${stats["scenes"] ?? 0}`,
1531
- `actions: ${stats["actions"] ?? 0}`,
1532
- `worldview: ${meta.worldview || "(unset)"}`,
1533
- `base_revision: ${baseRevision}`,
1534
- `revision: ${replaceRes["revision"]}`,
1535
- `idempotent: ${replaceRes["idempotent"] ?? false}`,
1536
- ],
1537
- artifacts: [scriptPath],
1538
- next: ["剧本已入库。若发现问题,直接改 md `scriptctl parse <dir> --publish` 覆盖。"],
1539
- }, EXIT_OK];
1272
+ count.ok++;
1273
+ if (meta["provenance"] === "override")
1274
+ count.override++;
1275
+ else if (meta["provenance"] === "recovered")
1276
+ count.recovered++;
1277
+ }
1278
+ const completedEpisodes = [];
1279
+ for (const ep of episodes) {
1280
+ const meta = readUnitMeta(episodeMetaPath(episodeResultsDir, ep));
1281
+ if (meta && meta["status"] === "ok")
1282
+ completedEpisodes.push(Number(ep["episode"]));
1283
+ }
1284
+ // Held out = episodes with at least one terminal batch and no episode result.
1285
+ const completedSet = new Set(completedEpisodes);
1286
+ const heldOut = new Set();
1287
+ for (const batch of batches) {
1288
+ const meta = readUnitMeta(batchMetaPath(batchResultsDir, batch));
1289
+ const epNum = Number(batch["episode"]);
1290
+ if (meta && meta["status"] === "terminal" && !completedSet.has(epNum))
1291
+ heldOut.add(epNum);
1540
1292
  }
1293
+ const heldOutEpisodes = [...heldOut].sort((a, b) => a - b);
1541
1294
  const report = {
1542
- title: passed
1543
- ? "PARSE COMPLETE: 中间稿已生成(加 --publish 直接入库)"
1544
- : "PARSE NEEDS AGENT: 校验有问题,直接改 md 再 parse",
1295
+ title: "DIRECT STATUS",
1545
1296
  result: [
1546
- `episodes: ${stats["episodes"] ?? results.length}`,
1547
- `scenes: ${stats["scenes"] ?? 0}`,
1548
- `actions: ${stats["actions"] ?? 0}`,
1549
- `asset docs: ${assetDocsFound.join(" / ") || "(none)"}`,
1550
- `worldview: ${meta.worldview || "(unset)"}`,
1551
- `style: ${meta.style || "(unset)"}`,
1552
- `synopsis: ${globalSynopsis ? "yes" : "no"}`,
1553
- `validation: ${passed ? "passed" : "needs repair"}`,
1297
+ `episodes: ${completedEpisodes.length}/${episodes.length} complete`,
1298
+ `batches: ${count.ok}/${batches.length} ok (override ${count.override}, recovered ${count.recovered})`,
1299
+ `terminal batches: ${count.terminal}`,
1300
+ `pending batches: ${count.missing}`,
1301
+ `held out episodes: ${heldOutEpisodes.length === 0 ? "-" : heldOutEpisodes.join(", ")}`,
1554
1302
  ],
1555
- artifacts: [scriptPath, path.join(dd, "validation.json")],
1556
- issues: summarizeIssues(asList(validation["issues"])),
1557
- next: passed
1558
- ? [" `--publish` 直接入库。"]
1559
- : ["按 issue 直接改对应的 md(ep_*.md / 资产 md / 元信息.md),再重跑 parse。"],
1303
+ artifacts: [batchResultsDir, episodeResultsDir, path.join(dd, "run_state.json")],
1304
+ next: heldOutEpisodes.length > 0
1305
+ ? ["Override held-out episodes with direct override, or export 32/33 with direct export --allow-incomplete."]
1306
+ : ["All units accounted for."],
1560
1307
  };
1561
- return [report, passed ? EXIT_OK : EXIT_NEEDS_AGENT];
1308
+ return [report, EXIT_OK];
1309
+ }
1310
+ export function summarizeIssues(issues) {
1311
+ if (issues.length === 0)
1312
+ return [];
1313
+ const counts = {};
1314
+ for (const item of issues) {
1315
+ const sev = strOf(item["severity"]);
1316
+ counts[sev] = (counts[sev] ?? 0) + 1;
1317
+ }
1318
+ const parts = Object.entries(counts).sort(([a], [b]) => a.localeCompare(b)).map(([sev, c]) => `${sev}: ${c}`);
1319
+ const first = issues[0];
1320
+ return [parts.join("; "), `first: ${first["code"]} - ${first["summary"]}`];
1562
1321
  }
1563
1322
  // ---------------------------------------------------------------------------
1564
1323
  // command_validate