@lingjingai/scriptctl 0.11.4 → 0.11.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli.js +14 -3
- package/dist/cli.js.map +1 -1
- package/dist/common.d.ts +1 -1
- package/dist/common.js +4 -1
- package/dist/common.js.map +1 -1
- package/dist/domain/direct-core.d.ts +2 -0
- package/dist/domain/direct-core.js +30 -3
- package/dist/domain/direct-core.js.map +1 -1
- package/dist/domain/script-core.d.ts +1 -0
- package/dist/domain/script-core.js +11 -2
- package/dist/domain/script-core.js.map +1 -1
- package/dist/help-text.js +2 -2
- package/dist/infra/providers.js +16 -0
- package/dist/infra/providers.js.map +1 -1
- package/dist/usecases/direct.d.ts +5 -3
- package/dist/usecases/direct.js +410 -651
- package/dist/usecases/direct.js.map +1 -1
- package/dist/usecases/parse.d.ts +15 -0
- package/dist/usecases/parse.js +324 -0
- package/dist/usecases/parse.js.map +1 -0
- package/dist/usecases/script.js +25 -3
- package/dist/usecases/script.js.map +1 -1
- package/package.json +1 -1
package/dist/usecases/direct.js
CHANGED
|
@@ -1,10 +1,8 @@
|
|
|
1
1
|
import * as fs from "node:fs";
|
|
2
2
|
import * as path from "node:path";
|
|
3
|
-
import { CliError, DEFAULT_BATCH_MAX_CHARS, DEFAULT_BATCH_MIN_LINES, DEFAULT_BATCH_MODE, DEFAULT_BATCH_TARGET_LINES, DEFAULT_CONCURRENCY, DEFAULT_MODEL, DEFAULT_PROVIDER, DIRECT_CONTRACT_VERSION, EXIT_INPUT, EXIT_NEEDS_AGENT, EXIT_OK, EXIT_RUNTIME, EXIT_USAGE,
|
|
4
|
-
import { compactBatchResult, compactEpisodeResult, buildBatchPlan, buildEpisodePlan, enrichEpisodePlanTitles, extractBatchWithRecovery, mergeEpisodeResults, normalizeEpisodeResult, normalizeInt,
|
|
3
|
+
import { CliError, DEFAULT_BATCH_MAX_CHARS, DEFAULT_BATCH_MIN_LINES, DEFAULT_BATCH_MODE, DEFAULT_BATCH_TARGET_LINES, DEFAULT_CONCURRENCY, DEFAULT_MODEL, DEFAULT_PROVIDER, DIRECT_CONTRACT_VERSION, EXIT_INPUT, EXIT_NEEDS_AGENT, EXIT_OK, EXIT_RUNTIME, EXIT_USAGE, REVIEW_TARGETS, SUPPORTED_EXTS, deletePath, directDir, exists, fmtId, readJson, readText, sha256Text, writeJson, } from "../common.js";
|
|
4
|
+
import { compactBatchResult, compactEpisodeResult, buildBatchPlan, buildEpisodePlan, classifyProviderError, enrichEpisodePlanTitles, extractBatchWithRecovery, mergeEpisodeResults, normalizeEpisodeResult, normalizeInt, recoverBatchFromSource, uniqueAdd, validateBatchExtractionQuality, validateEpisodeExtractionQuality, _md_push_asset, curateScriptAssets, applyMetadataToScript, } from "../domain/direct-core.js";
|
|
5
5
|
import { validateScript } from "../domain/script-core.js";
|
|
6
|
-
import { ScriptOutputApiError } from "../infra/script-output-store.js";
|
|
7
|
-
import { apiErrorToCli, currentRevisionOrZero, scriptOutputClient, sortDeep } from "./script.js";
|
|
8
6
|
import { makeProvider } from "../infra/providers.js";
|
|
9
7
|
import { makeSourceManifest, prepareSource, } from "../infra/converters.js";
|
|
10
8
|
function strOf(v) {
|
|
@@ -56,18 +54,6 @@ export function readRunState(workspace) {
|
|
|
56
54
|
return {};
|
|
57
55
|
}
|
|
58
56
|
}
|
|
59
|
-
function failureSignature(items) {
|
|
60
|
-
if (!isList(items))
|
|
61
|
-
return [];
|
|
62
|
-
const out = [];
|
|
63
|
-
for (const item of items) {
|
|
64
|
-
const s = strOf(item).trim();
|
|
65
|
-
if (s)
|
|
66
|
-
out.push(s);
|
|
67
|
-
}
|
|
68
|
-
out.sort();
|
|
69
|
-
return out;
|
|
70
|
-
}
|
|
71
57
|
export function addInspectedTarget(workspace, target) {
|
|
72
58
|
const state = readRunState(workspace);
|
|
73
59
|
const targets = [];
|
|
@@ -149,9 +135,6 @@ function episodeErrorPath(dir, ep) {
|
|
|
149
135
|
function episodeResultKey(ep) {
|
|
150
136
|
return `ep_${pad3(Number(ep["episode"]))}`;
|
|
151
137
|
}
|
|
152
|
-
function episodeResultsIndexPath(dir) {
|
|
153
|
-
return path.join(dir, "index.json");
|
|
154
|
-
}
|
|
155
138
|
function batchResultKey(batch) {
|
|
156
139
|
const bid = strOf(batch["batch_id"]).trim();
|
|
157
140
|
if (bid)
|
|
@@ -167,9 +150,6 @@ function batchMarkdownPath(dir, batch) {
|
|
|
167
150
|
function batchErrorPath(dir, batch) {
|
|
168
151
|
return path.join(dir, `${batchResultKey(batch)}.error.json`);
|
|
169
152
|
}
|
|
170
|
-
function batchResultsIndexPath(dir) {
|
|
171
|
-
return path.join(dir, "index.json");
|
|
172
|
-
}
|
|
173
153
|
function persistBatchResult(dir, batch, result) {
|
|
174
154
|
const rawMd = result["_raw_markdown"];
|
|
175
155
|
delete result["_raw_markdown"];
|
|
@@ -183,130 +163,81 @@ function persistBatchResult(dir, batch, result) {
|
|
|
183
163
|
deletePath(mdPath);
|
|
184
164
|
}
|
|
185
165
|
}
|
|
186
|
-
function
|
|
187
|
-
|
|
188
|
-
if (!exists(p))
|
|
189
|
-
return { version: 1, batches: {} };
|
|
190
|
-
let data;
|
|
191
|
-
try {
|
|
192
|
-
data = readJson(p);
|
|
193
|
-
}
|
|
194
|
-
catch {
|
|
195
|
-
return { version: 1, batches: {} };
|
|
196
|
-
}
|
|
197
|
-
if (!isDict(data))
|
|
198
|
-
return { version: 1, batches: {} };
|
|
199
|
-
if (!isDict(data["batches"]))
|
|
200
|
-
data["batches"] = {};
|
|
201
|
-
if (!("version" in data))
|
|
202
|
-
data["version"] = 1;
|
|
203
|
-
return data;
|
|
204
|
-
}
|
|
205
|
-
function writeBatchResultsIndex(dir, index) {
|
|
206
|
-
writeJson(batchResultsIndexPath(dir), index);
|
|
207
|
-
}
|
|
208
|
-
function updateBatchResultMetadata(dir, batch, providerName, model) {
|
|
209
|
-
const index = readBatchResultsIndex(dir);
|
|
210
|
-
const batches = index["batches"] ?? {};
|
|
211
|
-
batches[batchResultKey(batch)] = {
|
|
212
|
-
episode: Number(batch["episode"]),
|
|
213
|
-
part: Number(batch["part"]),
|
|
214
|
-
provider: providerName,
|
|
215
|
-
model,
|
|
216
|
-
extracted_at: checkpointTimestamp(),
|
|
217
|
-
};
|
|
218
|
-
index["batches"] = batches;
|
|
219
|
-
writeBatchResultsIndex(dir, index);
|
|
166
|
+
function episodeMetaPath(dir, ep) {
|
|
167
|
+
return path.join(dir, `${episodeResultKey(ep)}.meta.json`);
|
|
220
168
|
}
|
|
221
|
-
function
|
|
222
|
-
|
|
223
|
-
const batches = index["batches"] ?? {};
|
|
224
|
-
const key = batchResultKey(batch);
|
|
225
|
-
if (key in batches) {
|
|
226
|
-
delete batches[key];
|
|
227
|
-
index["batches"] = batches;
|
|
228
|
-
writeBatchResultsIndex(dir, index);
|
|
229
|
-
}
|
|
169
|
+
function batchMetaPath(dir, batch) {
|
|
170
|
+
return path.join(dir, `${batchResultKey(batch)}.meta.json`);
|
|
230
171
|
}
|
|
231
|
-
function
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
return { version: 1, episodes: {} };
|
|
235
|
-
let data;
|
|
172
|
+
function readUnitMeta(metaPath) {
|
|
173
|
+
if (!exists(metaPath))
|
|
174
|
+
return null;
|
|
236
175
|
try {
|
|
237
|
-
data = readJson(
|
|
176
|
+
const data = readJson(metaPath);
|
|
177
|
+
return isDict(data) ? data : null;
|
|
238
178
|
}
|
|
239
179
|
catch {
|
|
240
|
-
return
|
|
241
|
-
}
|
|
242
|
-
if (!isDict(data))
|
|
243
|
-
return { version: 1, episodes: {} };
|
|
244
|
-
if (!isDict(data["episodes"]))
|
|
245
|
-
data["episodes"] = {};
|
|
246
|
-
if (!("version" in data))
|
|
247
|
-
data["version"] = 1;
|
|
248
|
-
return data;
|
|
180
|
+
return null;
|
|
181
|
+
}
|
|
249
182
|
}
|
|
250
|
-
function
|
|
251
|
-
|
|
183
|
+
function writeUnitMeta(metaPath, meta) {
|
|
184
|
+
fs.mkdirSync(path.dirname(metaPath), { recursive: true });
|
|
185
|
+
writeJson(metaPath, meta);
|
|
252
186
|
}
|
|
253
|
-
function
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
episodes[episodeResultKey(ep)] = {
|
|
257
|
-
provider: providerName,
|
|
258
|
-
model,
|
|
259
|
-
extracted_at: checkpointTimestamp(),
|
|
260
|
-
};
|
|
261
|
-
index["episodes"] = episodes;
|
|
262
|
-
writeEpisodeResultsIndex(dir, index);
|
|
187
|
+
function removeUnitMeta(metaPath) {
|
|
188
|
+
if (exists(metaPath))
|
|
189
|
+
deletePath(metaPath);
|
|
263
190
|
}
|
|
264
|
-
function
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
delete episodes[key];
|
|
270
|
-
index["episodes"] = episodes;
|
|
271
|
-
writeEpisodeResultsIndex(dir, index);
|
|
272
|
-
}
|
|
191
|
+
function stampEpisodeMeta(dir, ep, inputHash, provenance, providerName, model) {
|
|
192
|
+
writeUnitMeta(episodeMetaPath(dir, ep), {
|
|
193
|
+
schema: 1, key: episodeResultKey(ep), episode: Number(ep["episode"]),
|
|
194
|
+
input_hash: inputHash, provenance, status: "ok", provider: providerName, model, extracted_at: checkpointTimestamp(),
|
|
195
|
+
});
|
|
273
196
|
}
|
|
274
|
-
function
|
|
275
|
-
|
|
276
|
-
|
|
197
|
+
function stampBatchMeta(dir, batch, inputHash, provenance, providerName, model) {
|
|
198
|
+
writeUnitMeta(batchMetaPath(dir, batch), {
|
|
199
|
+
schema: 1, key: batchResultKey(batch), episode: Number(batch["episode"]), part: Number(batch["part"]),
|
|
200
|
+
input_hash: inputHash, provenance, status: "ok", provider: providerName, model, extracted_at: checkpointTimestamp(),
|
|
201
|
+
});
|
|
202
|
+
}
|
|
203
|
+
// Content-address a single episode/batch plan unit: the contract version, the
|
|
204
|
+
// exact source span text, the title-stable plan item, and provider/model. Any
|
|
205
|
+
// change to what would alter extraction rotates the hash for THAT unit only.
|
|
206
|
+
export function computeUnitHash(sourceText, unit, providerName, model) {
|
|
207
|
+
const span = isDict(unit["source_span"]) ? unit["source_span"] : {};
|
|
208
|
+
const start = Number(span["start"] ?? 0);
|
|
209
|
+
const end = Number(span["end"] ?? 0);
|
|
210
|
+
const spanText = sourceText.slice(start, end);
|
|
211
|
+
const planText = JSON.stringify(unit, checkpointReplacer());
|
|
212
|
+
return sha256Text([String(DIRECT_CONTRACT_VERSION), spanText, planText, providerName ?? "", model ?? ""].join("\u0000"));
|
|
213
|
+
}
|
|
214
|
+
// Delete result/meta/error/markdown files whose unit key is no longer in the
|
|
215
|
+
// current plan (e.g. the source shed an episode). Pure function of the plan —
|
|
216
|
+
// it never inspects hashes, content, or run_state, so it can only remove units
|
|
217
|
+
// the plan no longer references. Also retires the legacy v3 `index.json`.
|
|
218
|
+
function gcOrphanUnits(dir, liveKeys) {
|
|
219
|
+
if (!exists(dir))
|
|
220
|
+
return [];
|
|
221
|
+
const removed = [];
|
|
222
|
+
for (const name of fs.readdirSync(dir)) {
|
|
223
|
+
if (name === "index.json") {
|
|
224
|
+
deletePath(path.join(dir, name));
|
|
225
|
+
removed.push(name);
|
|
277
226
|
continue;
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
227
|
+
}
|
|
228
|
+
const key = name.replace(/\.(meta\.json|error\.json|json|md)$/, "");
|
|
229
|
+
if (key === name)
|
|
230
|
+
continue; // not a recognized unit artifact
|
|
231
|
+
if (!liveKeys.has(key)) {
|
|
232
|
+
deletePath(path.join(dir, name));
|
|
233
|
+
removed.push(name);
|
|
284
234
|
}
|
|
285
235
|
}
|
|
286
|
-
return
|
|
287
|
-
}
|
|
288
|
-
export function initCheckpoint(sourceText, plan) {
|
|
289
|
-
const planText = JSON.stringify(plan, checkpointReplacer());
|
|
290
|
-
return {
|
|
291
|
-
contract_version: DIRECT_CONTRACT_VERSION,
|
|
292
|
-
source_sha256: sha256Text(sourceText),
|
|
293
|
-
episode_plan_sha256: sha256Text(planText),
|
|
294
|
-
total_episodes: Number(plan["total_episodes"] ?? asList(plan["episodes"]).length),
|
|
295
|
-
};
|
|
296
|
-
}
|
|
297
|
-
export function initBatchCheckpoint(sourceText, batchPlan) {
|
|
298
|
-
const planText = JSON.stringify(batchPlan, checkpointReplacer());
|
|
299
|
-
return {
|
|
300
|
-
contract_version: DIRECT_CONTRACT_VERSION,
|
|
301
|
-
source_sha256: sha256Text(sourceText),
|
|
302
|
-
batch_plan_sha256: sha256Text(planText),
|
|
303
|
-
total_batches: Number(batchPlan["total_batches"] ?? asList(batchPlan["batches"]).length),
|
|
304
|
-
};
|
|
236
|
+
return removed;
|
|
305
237
|
}
|
|
306
238
|
// Title fields are LLM-mutated downstream by enrichEpisodePlanTitles, so they
|
|
307
|
-
// must be excluded from
|
|
308
|
-
//
|
|
309
|
-
// re-extracts from scratch.
|
|
239
|
+
// must be excluded from unit hashes — otherwise every rerun gets a fresh SHA,
|
|
240
|
+
// the cached unit never matches, and that unit re-extracts from scratch.
|
|
310
241
|
const CHECKPOINT_UNSTABLE_KEYS = new Set(["title", "generated_title", "title_status", "title_source"]);
|
|
311
242
|
function checkpointReplacer() {
|
|
312
243
|
// Python's json.dumps(sort_keys=True) sorts keys recursively. Replicate by walking and sorting.
|
|
@@ -325,121 +256,47 @@ function checkpointReplacer() {
|
|
|
325
256
|
return value;
|
|
326
257
|
};
|
|
327
258
|
}
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
function
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
function resetInitOutputs(dd) {
|
|
341
|
-
for (const dirname of ["episode_results", "batch_results"]) {
|
|
342
|
-
const target = path.join(dd, dirname);
|
|
343
|
-
if (exists(target))
|
|
344
|
-
deleteTree(target);
|
|
345
|
-
}
|
|
346
|
-
for (const name of ["script.initial.json", "validation.json", "batch_plan.json", "asset_curation.json", "asset_metadata.json"]) {
|
|
347
|
-
const p = path.join(dd, name);
|
|
348
|
-
if (exists(p))
|
|
349
|
-
deletePath(p);
|
|
350
|
-
}
|
|
351
|
-
}
|
|
352
|
-
function resetBatchOutputs(dd) {
|
|
353
|
-
const batchResultsDir = path.join(dd, "batch_results");
|
|
354
|
-
if (exists(batchResultsDir))
|
|
355
|
-
deleteTree(batchResultsDir);
|
|
356
|
-
}
|
|
357
|
-
function loadCheckpointedEpisode(sourceText, episodeResultsDir, ep, providerName, model, previousProvider) {
|
|
259
|
+
// Non-destructive reuse: a cached episode result is reusable iff its sidecar
|
|
260
|
+
// meta records the same input_hash we compute for the current plan unit. The
|
|
261
|
+
// hash subsumes the old provider / source_span / episode-number / contract
|
|
262
|
+
// checks — any of those changing rotates the hash. On any mismatch or read
|
|
263
|
+
// failure we return null and let the caller re-extract and overwrite; we NEVER
|
|
264
|
+
// delete the cached file pre-emptively (that was the data-loss root cause).
|
|
265
|
+
export function loadCachedEpisode(sourceText, episodeResultsDir, ep, expectedHash) {
|
|
266
|
+
const meta = readUnitMeta(episodeMetaPath(episodeResultsDir, ep));
|
|
267
|
+
if (!meta || meta["input_hash"] !== expectedHash)
|
|
268
|
+
return null;
|
|
269
|
+
if (meta["status"] === "terminal")
|
|
270
|
+
return null;
|
|
358
271
|
const p = episodeResultPath(episodeResultsDir, ep);
|
|
359
272
|
if (!exists(p))
|
|
360
273
|
return null;
|
|
361
|
-
let result;
|
|
362
274
|
try {
|
|
363
|
-
const
|
|
364
|
-
const metadata = isDict(data["_scriptctl"]) ? data["_scriptctl"] : {};
|
|
365
|
-
const index = readEpisodeResultsIndex(episodeResultsDir);
|
|
366
|
-
let indexEntry = {};
|
|
367
|
-
const eps = index["episodes"];
|
|
368
|
-
if (isDict(eps)) {
|
|
369
|
-
const entry = eps[episodeResultKey(ep)];
|
|
370
|
-
if (isDict(entry))
|
|
371
|
-
indexEntry = entry;
|
|
372
|
-
}
|
|
373
|
-
const resultProvider = strOf(metadata["provider"] || indexEntry["provider"] || previousProvider).trim();
|
|
374
|
-
if (providerName && resultProvider && resultProvider !== providerName) {
|
|
375
|
-
throw new Error(`checkpoint provider mismatch: ${resultProvider} != ${providerName}`);
|
|
376
|
-
}
|
|
377
|
-
result = normalizeEpisodeResult(data, ep);
|
|
275
|
+
const result = normalizeEpisodeResult(readJson(p), ep);
|
|
378
276
|
validateEpisodeExtractionQuality(sourceText, ep, result);
|
|
379
|
-
|
|
380
|
-
writeJson(p, compactEpisodeResult(result));
|
|
381
|
-
if (providerName && model)
|
|
382
|
-
updateEpisodeResultMetadata(episodeResultsDir, ep, providerName, model);
|
|
383
|
-
}
|
|
277
|
+
return result;
|
|
384
278
|
}
|
|
385
279
|
catch {
|
|
386
|
-
try {
|
|
387
|
-
deletePath(p);
|
|
388
|
-
}
|
|
389
|
-
catch {
|
|
390
|
-
// ignore
|
|
391
|
-
}
|
|
392
|
-
removeEpisodeResultMetadata(episodeResultsDir, ep);
|
|
393
280
|
return null;
|
|
394
281
|
}
|
|
395
|
-
|
|
282
|
+
}
|
|
283
|
+
export function loadCachedBatch(sourceText, batchResultsDir, batch, expectedHash) {
|
|
284
|
+
const meta = readUnitMeta(batchMetaPath(batchResultsDir, batch));
|
|
285
|
+
if (!meta || meta["input_hash"] !== expectedHash)
|
|
396
286
|
return null;
|
|
397
|
-
if (
|
|
287
|
+
if (meta["status"] === "terminal")
|
|
398
288
|
return null;
|
|
399
|
-
return result;
|
|
400
|
-
}
|
|
401
|
-
function loadCheckpointedBatch(sourceText, batchResultsDir, batch, providerName, model, previousProvider) {
|
|
402
289
|
const p = batchResultPath(batchResultsDir, batch);
|
|
403
290
|
if (!exists(p))
|
|
404
291
|
return null;
|
|
405
|
-
let result;
|
|
406
292
|
try {
|
|
407
|
-
const
|
|
408
|
-
const index = readBatchResultsIndex(batchResultsDir);
|
|
409
|
-
let indexEntry = {};
|
|
410
|
-
const batches = index["batches"];
|
|
411
|
-
if (isDict(batches)) {
|
|
412
|
-
const entry = batches[batchResultKey(batch)];
|
|
413
|
-
if (isDict(entry))
|
|
414
|
-
indexEntry = entry;
|
|
415
|
-
}
|
|
416
|
-
const resultProvider = strOf(indexEntry["provider"] || previousProvider).trim();
|
|
417
|
-
if (providerName && resultProvider && resultProvider !== providerName) {
|
|
418
|
-
throw new Error(`checkpoint provider mismatch: ${resultProvider} != ${providerName}`);
|
|
419
|
-
}
|
|
420
|
-
result = normalizeEpisodeResult(data, batch);
|
|
293
|
+
const result = normalizeEpisodeResult(readJson(p), batch);
|
|
421
294
|
validateBatchExtractionQuality(sourceText, batch, result);
|
|
422
|
-
|
|
423
|
-
persistBatchResult(batchResultsDir, batch, result);
|
|
424
|
-
if (providerName && model)
|
|
425
|
-
updateBatchResultMetadata(batchResultsDir, batch, providerName, model);
|
|
426
|
-
}
|
|
295
|
+
return result;
|
|
427
296
|
}
|
|
428
297
|
catch {
|
|
429
|
-
try {
|
|
430
|
-
deletePath(p);
|
|
431
|
-
}
|
|
432
|
-
catch {
|
|
433
|
-
// ignore
|
|
434
|
-
}
|
|
435
|
-
removeBatchResultMetadata(batchResultsDir, batch);
|
|
436
298
|
return null;
|
|
437
299
|
}
|
|
438
|
-
if (Number(result["episode"] ?? 0) !== Number(batch["episode"]))
|
|
439
|
-
return null;
|
|
440
|
-
if (JSON.stringify(result["source_span"]) !== JSON.stringify(batch["source_span"]))
|
|
441
|
-
return null;
|
|
442
|
-
return result;
|
|
443
300
|
}
|
|
444
301
|
function mergeScene(target, source) {
|
|
445
302
|
if ((target["location_name"] === "" || target["location_name"] === "未知场景" || target["location_name"] === null || target["location_name"] === undefined) &&
|
|
@@ -532,33 +389,9 @@ async function providerExtractAssetCurationLocal(provider, sourceText, script) {
|
|
|
532
389
|
}
|
|
533
390
|
return {};
|
|
534
391
|
}
|
|
535
|
-
function
|
|
536
|
-
const err = exc;
|
|
537
|
-
const error = {
|
|
538
|
-
episode: Number(ep["episode"]),
|
|
539
|
-
title: ep["title"],
|
|
540
|
-
source_span: ep["source_span"],
|
|
541
|
-
error_type: err?.name || "Error",
|
|
542
|
-
message: (err?.message || err?.name || "Error").slice(0, 500),
|
|
543
|
-
failed_at: checkpointTimestamp(),
|
|
544
|
-
};
|
|
545
|
-
if (exc instanceof CliError) {
|
|
546
|
-
if (exc.required.length > 0)
|
|
547
|
-
error["required"] = exc.required;
|
|
548
|
-
if (exc.received.length > 0)
|
|
549
|
-
error["received"] = exc.received;
|
|
550
|
-
if (exc.nextSteps.length > 0)
|
|
551
|
-
error["next"] = exc.nextSteps;
|
|
552
|
-
}
|
|
553
|
-
const resultPath = episodeResultPath(dir, ep);
|
|
554
|
-
if (exists(resultPath))
|
|
555
|
-
deletePath(resultPath);
|
|
556
|
-
removeEpisodeResultMetadata(dir, ep);
|
|
557
|
-
writeJson(episodeErrorPath(dir, ep), error);
|
|
558
|
-
return error;
|
|
559
|
-
}
|
|
560
|
-
function writeBatchFailure(dir, batch, exc) {
|
|
392
|
+
function writeBatchFailure(dir, batch, exc, inputHash, providerName, model) {
|
|
561
393
|
const err = exc;
|
|
394
|
+
const terminal = classifyProviderError(exc) === "terminal";
|
|
562
395
|
const error = {
|
|
563
396
|
batch_id: batchResultKey(batch),
|
|
564
397
|
episode: Number(batch["episode"]),
|
|
@@ -567,6 +400,8 @@ function writeBatchFailure(dir, batch, exc) {
|
|
|
567
400
|
line_range: batch["line_range"],
|
|
568
401
|
error_type: err?.name || "Error",
|
|
569
402
|
message: (err?.message || err?.name || "Error").slice(0, 500),
|
|
403
|
+
terminal,
|
|
404
|
+
input_hash: inputHash,
|
|
570
405
|
failed_at: checkpointTimestamp(),
|
|
571
406
|
};
|
|
572
407
|
if (exc instanceof CliError) {
|
|
@@ -580,7 +415,16 @@ function writeBatchFailure(dir, batch, exc) {
|
|
|
580
415
|
const resultPath = batchResultPath(dir, batch);
|
|
581
416
|
if (exists(resultPath))
|
|
582
417
|
deletePath(resultPath);
|
|
583
|
-
|
|
418
|
+
if (terminal) {
|
|
419
|
+
writeUnitMeta(batchMetaPath(dir, batch), {
|
|
420
|
+
schema: 1, key: batchResultKey(batch), episode: Number(batch["episode"]), part: Number(batch["part"]),
|
|
421
|
+
input_hash: inputHash, provenance: "extracted", status: "terminal",
|
|
422
|
+
provider: providerName, model, extracted_at: checkpointTimestamp(),
|
|
423
|
+
});
|
|
424
|
+
}
|
|
425
|
+
else {
|
|
426
|
+
removeUnitMeta(batchMetaPath(dir, batch));
|
|
427
|
+
}
|
|
584
428
|
writeJson(batchErrorPath(dir, batch), error);
|
|
585
429
|
return error;
|
|
586
430
|
}
|
|
@@ -635,6 +479,9 @@ export async function commandInit(opts) {
|
|
|
635
479
|
const workspace = strOf(opts["workspace_path"] || "workspace");
|
|
636
480
|
const providerName = strOf(opts["provider"] || DEFAULT_PROVIDER);
|
|
637
481
|
const model = strOf(opts["model"] || process.env.SCRIPTCTL_ANTHROPIC_MODEL || DEFAULT_MODEL);
|
|
482
|
+
// When set, retry batches a prior run marked terminal (content-filtered)
|
|
483
|
+
// instead of skipping them — e.g. after the provider's filter was adjusted.
|
|
484
|
+
const retryTerminal = Boolean(opts["retry_terminal"]);
|
|
638
485
|
let concurrency;
|
|
639
486
|
try {
|
|
640
487
|
concurrency = parseInt(strOf(opts["concurrency"] || DEFAULT_CONCURRENCY), 10);
|
|
@@ -719,7 +566,6 @@ export async function commandInit(opts) {
|
|
|
719
566
|
}
|
|
720
567
|
const dd = directDir(workspace);
|
|
721
568
|
fs.mkdirSync(dd, { recursive: true });
|
|
722
|
-
const previousStateBeforeInit = readRunState(workspace);
|
|
723
569
|
updateRunState(workspace, {
|
|
724
570
|
status: "init_running",
|
|
725
571
|
command: "direct init",
|
|
@@ -832,17 +678,6 @@ export async function commandInit(opts) {
|
|
|
832
678
|
nextSteps: ["Inspect workspace/source.txt and episode_plan.json, then rerun init."],
|
|
833
679
|
});
|
|
834
680
|
}
|
|
835
|
-
const checkpoint = initCheckpoint(sourceText, plan);
|
|
836
|
-
const batchCheckpoint = initBatchCheckpoint(sourceText, batchPlan);
|
|
837
|
-
const previousState = previousStateBeforeInit;
|
|
838
|
-
const previousCheckpoint = isDict(previousState["checkpoint"]) ? previousState["checkpoint"] : {};
|
|
839
|
-
const previousBatchCheckpoint = isDict(previousState["batch_checkpoint"]) ? previousState["batch_checkpoint"] : {};
|
|
840
|
-
const checkpointReused = checkpointSourceMatches(previousCheckpoint, checkpoint);
|
|
841
|
-
const batchCheckpointReused = checkpointReused && batchCheckpointMatches(previousBatchCheckpoint, batchCheckpoint);
|
|
842
|
-
if (!checkpointReused)
|
|
843
|
-
resetInitOutputs(dd);
|
|
844
|
-
else if (!batchCheckpointReused)
|
|
845
|
-
resetBatchOutputs(dd);
|
|
846
681
|
writeJson(path.join(dd, "source_manifest.json"), manifest);
|
|
847
682
|
writeJson(path.join(dd, "episode_plan.json"), plan);
|
|
848
683
|
writeJson(path.join(dd, "batch_plan.json"), batchPlan);
|
|
@@ -850,13 +685,15 @@ export async function commandInit(opts) {
|
|
|
850
685
|
const batchResultsDir = path.join(dd, "batch_results");
|
|
851
686
|
fs.mkdirSync(episodeResultsDir, { recursive: true });
|
|
852
687
|
fs.mkdirSync(batchResultsDir, { recursive: true });
|
|
688
|
+
// Non-destructive GC: drop result/meta/error/md files for units the current
|
|
689
|
+
// plan no longer references (e.g. the source shed an episode). Pure function
|
|
690
|
+
// of the plan — it never touches a unit the plan still references, and retires
|
|
691
|
+
// the legacy v3 index.json. There is no whole-directory reset any more.
|
|
692
|
+
gcOrphanUnits(episodeResultsDir, new Set(asList(plan["episodes"]).map((ep) => episodeResultKey(ep))));
|
|
693
|
+
gcOrphanUnits(batchResultsDir, new Set(asList(batchPlan["batches"]).map((b) => batchResultKey(b))));
|
|
853
694
|
updateRunState(workspace, {
|
|
854
695
|
status: "init_running",
|
|
855
696
|
init_stage: "batch_extract",
|
|
856
|
-
checkpoint,
|
|
857
|
-
batch_checkpoint: batchCheckpoint,
|
|
858
|
-
checkpoint_reused: checkpointReused,
|
|
859
|
-
batch_checkpoint_reused: batchCheckpointReused,
|
|
860
697
|
batch_mode: batchMode,
|
|
861
698
|
batch_target_lines: batchTargetLines,
|
|
862
699
|
batch_max_chars: batchMaxChars,
|
|
@@ -875,11 +712,11 @@ export async function commandInit(opts) {
|
|
|
875
712
|
batchesByEpisode.set(epNum, []);
|
|
876
713
|
batchesByEpisode.get(epNum).push(batch);
|
|
877
714
|
}
|
|
878
|
-
|
|
715
|
+
// Per-unit reuse: each episode is judged independently by its own input hash,
|
|
716
|
+
// so a source edit to one episode invalidates only that episode — not all 33.
|
|
879
717
|
for (const episode of asList(plan["episodes"])) {
|
|
880
|
-
const
|
|
881
|
-
|
|
882
|
-
: null;
|
|
718
|
+
const epHash = computeUnitHash(sourceText, episode, providerName, model);
|
|
719
|
+
const cached = loadCachedEpisode(sourceText, episodeResultsDir, episode, epHash);
|
|
883
720
|
if (cached !== null) {
|
|
884
721
|
results.push(cached);
|
|
885
722
|
skipped.push(Number(episode["episode"]));
|
|
@@ -889,7 +726,7 @@ export async function commandInit(opts) {
|
|
|
889
726
|
if (!exists(batchResultPath(batchResultsDir, cachedBatch))) {
|
|
890
727
|
const backfilled = recoverBatchFromSource(sourceText, cachedBatch);
|
|
891
728
|
persistBatchResult(batchResultsDir, cachedBatch, backfilled);
|
|
892
|
-
|
|
729
|
+
stampBatchMeta(batchResultsDir, cachedBatch, computeUnitHash(sourceText, cachedBatch, providerName, model), "recovered", providerName, model);
|
|
893
730
|
}
|
|
894
731
|
const errorPath = batchErrorPath(batchResultsDir, cachedBatch);
|
|
895
732
|
if (exists(errorPath))
|
|
@@ -902,11 +739,19 @@ export async function commandInit(opts) {
|
|
|
902
739
|
}
|
|
903
740
|
const batchResults = [];
|
|
904
741
|
const skippedBatches = [];
|
|
742
|
+
const terminalSkipped = [];
|
|
905
743
|
const pending = [];
|
|
906
744
|
for (const batch of pendingBatches) {
|
|
907
|
-
const
|
|
908
|
-
|
|
909
|
-
|
|
745
|
+
const bHash = computeUnitHash(sourceText, batch, providerName, model);
|
|
746
|
+
// A terminal failure (content filter) with the same input hash will fail the
|
|
747
|
+
// same way — skip it instead of re-calling the provider, unless --retry-terminal
|
|
748
|
+
// or the source/provider changed (which rotates the hash).
|
|
749
|
+
const meta = readUnitMeta(batchMetaPath(batchResultsDir, batch));
|
|
750
|
+
if (!retryTerminal && meta && meta["status"] === "terminal" && meta["input_hash"] === bHash) {
|
|
751
|
+
terminalSkipped.push(batchResultKey(batch));
|
|
752
|
+
continue;
|
|
753
|
+
}
|
|
754
|
+
const cachedBatch = loadCachedBatch(sourceText, batchResultsDir, batch, bHash);
|
|
910
755
|
if (cachedBatch !== null) {
|
|
911
756
|
cachedBatch["_batch_id"] = batchResultKey(batch);
|
|
912
757
|
cachedBatch["_batch_part"] = Number(batch["part"]);
|
|
@@ -933,12 +778,12 @@ export async function commandInit(opts) {
|
|
|
933
778
|
result["_starts_inside_scene"] = Boolean(batch["starts_inside_scene"]);
|
|
934
779
|
batchResults.push(result);
|
|
935
780
|
persistBatchResult(batchResultsDir, batch, result);
|
|
936
|
-
|
|
781
|
+
stampBatchMeta(batchResultsDir, batch, computeUnitHash(sourceText, batch, providerName, model), "extracted", providerName, model);
|
|
937
782
|
if (exists(errorPath))
|
|
938
783
|
deletePath(errorPath);
|
|
939
784
|
}
|
|
940
785
|
else {
|
|
941
|
-
failures.push(writeBatchFailure(batchResultsDir, batch, outcome.error));
|
|
786
|
+
failures.push(writeBatchFailure(batchResultsDir, batch, outcome.error, computeUnitHash(sourceText, batch, providerName, model), providerName, model));
|
|
942
787
|
}
|
|
943
788
|
}
|
|
944
789
|
results.sort((a, b) => Number(a["episode"] ?? 0) - Number(b["episode"] ?? 0));
|
|
@@ -957,82 +802,94 @@ export async function commandInit(opts) {
|
|
|
957
802
|
return Number(a["part"] ?? 0) - Number(b["part"] ?? 0);
|
|
958
803
|
});
|
|
959
804
|
const completedBatches = skippedEpisodeBatchCount + batchResults.length;
|
|
960
|
-
|
|
961
|
-
|
|
962
|
-
|
|
963
|
-
|
|
964
|
-
|
|
965
|
-
|
|
966
|
-
|
|
967
|
-
|
|
968
|
-
|
|
969
|
-
|
|
970
|
-
|
|
971
|
-
|
|
972
|
-
|
|
973
|
-
|
|
974
|
-
|
|
975
|
-
: "INIT INCOMPLETE: Batch extraction failed";
|
|
976
|
-
const nextSteps = sameFailuresRepeated
|
|
977
|
-
? [
|
|
978
|
-
"Run direct inspect --target issue to read failed batch details.",
|
|
979
|
-
"Do not rerun the same init command again until source, batch options, provider, or failed content has changed.",
|
|
980
|
-
]
|
|
981
|
-
: [
|
|
982
|
-
"Run direct inspect --target issue to review failed batches.",
|
|
983
|
-
"Rerun the same init once if failures look transient; completed checkpoints will be reused.",
|
|
984
|
-
];
|
|
985
|
-
const failedEpisodeSet = new Set(failedEpisodes);
|
|
986
|
-
const skippedSet = new Set(skipped);
|
|
987
|
-
const batchResultsByEpisode = new Map();
|
|
988
|
-
for (const result of batchResults) {
|
|
989
|
-
const ep = Number(result["episode"] ?? 0);
|
|
990
|
-
if (!batchResultsByEpisode.has(ep))
|
|
991
|
-
batchResultsByEpisode.set(ep, []);
|
|
992
|
-
batchResultsByEpisode.get(ep).push(result);
|
|
993
|
-
}
|
|
805
|
+
const transientFailures = failures.filter((it) => !it["terminal"]);
|
|
806
|
+
const terminalFailures = failures.filter((it) => Boolean(it["terminal"]));
|
|
807
|
+
const skippedSet = new Set(skipped);
|
|
808
|
+
// Merge every fully-completed, non-cached episode into an episode_results
|
|
809
|
+
// checkpoint. Episodes still missing a batch (a failure this run, or a batch
|
|
810
|
+
// a prior run marked terminal and we skipped) are left unmerged so a rerun or
|
|
811
|
+
// an override can complete them.
|
|
812
|
+
const batchResultsByEpisode = new Map();
|
|
813
|
+
for (const result of batchResults) {
|
|
814
|
+
const ep = Number(result["episode"] ?? 0);
|
|
815
|
+
if (!batchResultsByEpisode.has(ep))
|
|
816
|
+
batchResultsByEpisode.set(ep, []);
|
|
817
|
+
batchResultsByEpisode.get(ep).push(result);
|
|
818
|
+
}
|
|
819
|
+
try {
|
|
994
820
|
for (const episode of asList(plan["episodes"])) {
|
|
995
821
|
const episodeNum = Number(episode["episode"]);
|
|
996
|
-
if (skippedSet.has(episodeNum)
|
|
822
|
+
if (skippedSet.has(episodeNum))
|
|
997
823
|
continue;
|
|
998
824
|
const expectedBatches = (batchesByEpisode.get(episodeNum) ?? []).length;
|
|
999
|
-
if (expectedBatches
|
|
1000
|
-
|
|
1001
|
-
|
|
1002
|
-
|
|
1003
|
-
|
|
1004
|
-
|
|
1005
|
-
|
|
825
|
+
if (!expectedBatches || (batchResultsByEpisode.get(episodeNum) ?? []).length !== expectedBatches)
|
|
826
|
+
continue;
|
|
827
|
+
const result = mergeBatchResultsForEpisode(episode, batchResultsByEpisode.get(episodeNum) ?? []);
|
|
828
|
+
validateEpisodeExtractionQuality(sourceText, episode, result);
|
|
829
|
+
results.push(result);
|
|
830
|
+
writeJson(episodeResultPath(episodeResultsDir, episode), compactEpisodeResult(result));
|
|
831
|
+
stampEpisodeMeta(episodeResultsDir, episode, computeUnitHash(sourceText, episode, providerName, model), "extracted", providerName, model);
|
|
832
|
+
const errorPath = episodeErrorPath(episodeResultsDir, episode);
|
|
833
|
+
if (exists(errorPath))
|
|
834
|
+
deletePath(errorPath);
|
|
1006
835
|
}
|
|
836
|
+
}
|
|
837
|
+
catch (exc) {
|
|
838
|
+
const e = exc;
|
|
839
|
+
throw initFailedReport(workspace, {
|
|
840
|
+
title: "INIT FAILED: Episode merge failed",
|
|
841
|
+
stage: "episode_merge",
|
|
842
|
+
required: ["complete batch_results/*.json that can merge into episode_results/*.json"],
|
|
843
|
+
received: [`${e?.name ?? "Error"}: ${(e?.message ?? "").slice(0, 160)}`],
|
|
844
|
+
nextSteps: ["Rerun init; completed batch checkpoints will be reused and episode merge will retry."],
|
|
845
|
+
updates: { batch_completed: completedBatches },
|
|
846
|
+
});
|
|
847
|
+
}
|
|
848
|
+
results.sort((a, b) => Number(a["episode"] ?? 0) - Number(b["episode"] ?? 0));
|
|
849
|
+
// Classify episodes that could not be assembled. An episode blocked by ANY
|
|
850
|
+
// transient batch (timeout/5xx) can still complete on rerun → it blocks init.
|
|
851
|
+
// An episode blocked only by terminal (content-filtered) batches is held out:
|
|
852
|
+
// the rest of the script ships, and the operator overrides the blocked unit.
|
|
853
|
+
const completedEpisodeNums = new Set(results.map((r) => Number(r["episode"])));
|
|
854
|
+
const transientEpisodeSet = new Set(transientFailures.map((it) => Number(it["episode"])));
|
|
855
|
+
const incompleteEpisodes = asList(plan["episodes"]).map((ep) => Number(ep["episode"])).filter((n) => !completedEpisodeNums.has(n));
|
|
856
|
+
const transientBlocked = incompleteEpisodes.filter((n) => transientEpisodeSet.has(n)).sort((a, b) => a - b);
|
|
857
|
+
const heldOutEpisodes = incompleteEpisodes.filter((n) => !transientEpisodeSet.has(n)).sort((a, b) => a - b);
|
|
858
|
+
if (transientBlocked.length > 0) {
|
|
1007
859
|
updateRunState(workspace, {
|
|
1008
|
-
status:
|
|
860
|
+
status: "init_incomplete",
|
|
1009
861
|
init_stage: "batch_extract",
|
|
1010
|
-
checkpoint,
|
|
1011
|
-
batch_checkpoint: batchCheckpoint,
|
|
1012
862
|
episode_total: asList(plan["episodes"]).length,
|
|
1013
863
|
episode_completed: results.length,
|
|
1014
864
|
episode_reused: skipped.length,
|
|
1015
|
-
episode_failed:
|
|
1016
|
-
failed_episodes:
|
|
865
|
+
episode_failed: incompleteEpisodes.length,
|
|
866
|
+
failed_episodes: transientBlocked,
|
|
867
|
+
held_out_episodes: heldOutEpisodes,
|
|
1017
868
|
batch_total: asList(batchPlan["batches"]).length,
|
|
1018
869
|
batch_completed: completedBatches,
|
|
1019
870
|
batch_reused: skippedEpisodeBatchCount + skippedBatches.length,
|
|
1020
871
|
batch_failed: failures.length,
|
|
1021
|
-
|
|
1022
|
-
|
|
1023
|
-
|
|
1024
|
-
last_error: { title:
|
|
1025
|
-
exportable: false,
|
|
872
|
+
batch_terminal: terminalFailures.length,
|
|
873
|
+
transient_failed_batches: transientFailures.map((it) => strOf(it["batch_id"])),
|
|
874
|
+
terminal_failed_batches: terminalFailures.map((it) => strOf(it["batch_id"])),
|
|
875
|
+
last_error: { title: "INIT INCOMPLETE: Batch extraction failed", failed_at: checkpointTimestamp() },
|
|
1026
876
|
});
|
|
1027
|
-
const issues = failures.slice(0, 5).map((it) => `${it["batch_id"]} episode ${it["episode"]} part ${it["part"]}: ${it["error_type"]} - ${it["message"]}`);
|
|
877
|
+
const issues = failures.slice(0, 5).map((it) => `${it["batch_id"]} episode ${it["episode"]} part ${it["part"]} [${it["terminal"] ? "terminal" : "transient"}]: ${it["error_type"]} - ${it["message"]}`);
|
|
878
|
+
const next = [
|
|
879
|
+
"Run direct inspect --target issue to review failed batches.",
|
|
880
|
+
"Rerun the same init to retry transient failures; completed units are reused.",
|
|
881
|
+
];
|
|
882
|
+
if (terminalFailures.length > 0) {
|
|
883
|
+
next.push("Terminal (content-filtered) batches will not clear on retry — use `direct override <unit> --from <file>` or soften the source.");
|
|
884
|
+
}
|
|
1028
885
|
const report = {
|
|
1029
|
-
title:
|
|
886
|
+
title: "INIT INCOMPLETE: Batch extraction failed",
|
|
1030
887
|
result: [
|
|
1031
888
|
`episodes total: ${asList(plan["episodes"]).length}`,
|
|
1032
889
|
`completed: ${results.length}`,
|
|
1033
890
|
`reused: ${skipped.length}`,
|
|
1034
|
-
`
|
|
1035
|
-
`batches: ${completedBatches}/${asList(batchPlan["batches"]).length} completed, ${
|
|
891
|
+
`held out (terminal): ${heldOutEpisodes.length}`,
|
|
892
|
+
`batches: ${completedBatches}/${asList(batchPlan["batches"]).length} completed, ${transientFailures.length} transient, ${terminalFailures.length} terminal`,
|
|
1036
893
|
`provider: ${providerName}`,
|
|
1037
894
|
],
|
|
1038
895
|
artifacts: [
|
|
@@ -1045,81 +902,49 @@ export async function commandInit(opts) {
|
|
|
1045
902
|
path.join(dd, "run_state.json"),
|
|
1046
903
|
],
|
|
1047
904
|
issues,
|
|
1048
|
-
next
|
|
905
|
+
next,
|
|
1049
906
|
};
|
|
1050
907
|
return [report, EXIT_RUNTIME];
|
|
1051
908
|
}
|
|
1052
909
|
updateRunState(workspace, {
|
|
1053
910
|
status: "init_running",
|
|
1054
911
|
init_stage: "episode_merge",
|
|
1055
|
-
checkpoint,
|
|
1056
|
-
batch_checkpoint: batchCheckpoint,
|
|
1057
912
|
episode_total: asList(plan["episodes"]).length,
|
|
1058
913
|
episode_completed: results.length,
|
|
1059
914
|
episode_reused: skipped.length,
|
|
1060
915
|
episode_failed: 0,
|
|
1061
916
|
failed_episodes: [],
|
|
917
|
+
held_out_episodes: heldOutEpisodes,
|
|
1062
918
|
batch_total: asList(batchPlan["batches"]).length,
|
|
1063
919
|
batch_completed: completedBatches,
|
|
1064
920
|
batch_reused: skippedEpisodeBatchCount + skippedBatches.length,
|
|
1065
|
-
batch_failed:
|
|
1066
|
-
|
|
1067
|
-
failure_signature: [],
|
|
1068
|
-
failure_streak: 0,
|
|
921
|
+
batch_failed: terminalFailures.length,
|
|
922
|
+
batch_terminal: terminalFailures.length,
|
|
1069
923
|
last_error: null,
|
|
1070
924
|
});
|
|
925
|
+
// Drop transient/cleared error markers, but KEEP terminal ones so `direct
|
|
926
|
+
// status` and export gating can see which episodes are held out.
|
|
1071
927
|
for (const dir of [batchResultsDir, episodeResultsDir]) {
|
|
1072
928
|
if (!exists(dir))
|
|
1073
929
|
continue;
|
|
1074
930
|
for (const name of fs.readdirSync(dir)) {
|
|
1075
|
-
if (name.endsWith(".error.json"))
|
|
1076
|
-
try {
|
|
1077
|
-
deletePath(path.join(dir, name));
|
|
1078
|
-
}
|
|
1079
|
-
catch {
|
|
1080
|
-
// ignore
|
|
1081
|
-
}
|
|
1082
|
-
}
|
|
1083
|
-
}
|
|
1084
|
-
}
|
|
1085
|
-
try {
|
|
1086
|
-
const batchResultsByEpisode = new Map();
|
|
1087
|
-
for (const result of batchResults) {
|
|
1088
|
-
const ep = Number(result["episode"] ?? 0);
|
|
1089
|
-
if (!batchResultsByEpisode.has(ep))
|
|
1090
|
-
batchResultsByEpisode.set(ep, []);
|
|
1091
|
-
batchResultsByEpisode.get(ep).push(result);
|
|
1092
|
-
}
|
|
1093
|
-
const skippedSet = new Set(skipped);
|
|
1094
|
-
for (const episode of asList(plan["episodes"])) {
|
|
1095
|
-
const episodeNum = Number(episode["episode"]);
|
|
1096
|
-
if (skippedSet.has(episodeNum))
|
|
931
|
+
if (!name.endsWith(".error.json"))
|
|
1097
932
|
continue;
|
|
1098
|
-
const
|
|
1099
|
-
|
|
1100
|
-
|
|
1101
|
-
|
|
1102
|
-
|
|
1103
|
-
|
|
1104
|
-
|
|
1105
|
-
deletePath(
|
|
933
|
+
const errPath = path.join(dir, name);
|
|
934
|
+
try {
|
|
935
|
+
const err = readJson(errPath);
|
|
936
|
+
if (!isDict(err) || !err["terminal"])
|
|
937
|
+
deletePath(errPath);
|
|
938
|
+
}
|
|
939
|
+
catch {
|
|
940
|
+
deletePath(errPath);
|
|
941
|
+
}
|
|
1106
942
|
}
|
|
1107
943
|
}
|
|
1108
|
-
catch (exc) {
|
|
1109
|
-
const e = exc;
|
|
1110
|
-
throw initFailedReport(workspace, {
|
|
1111
|
-
title: "INIT FAILED: Episode merge failed",
|
|
1112
|
-
stage: "episode_merge",
|
|
1113
|
-
required: ["complete batch_results/*.json that can merge into episode_results/*.json"],
|
|
1114
|
-
received: [`${e?.name ?? "Error"}: ${(e?.message ?? "").slice(0, 160)}`],
|
|
1115
|
-
nextSteps: ["Rerun init; completed batch checkpoints will be reused and episode merge will retry."],
|
|
1116
|
-
updates: { checkpoint, batch_checkpoint: batchCheckpoint, batch_completed: completedBatches },
|
|
1117
|
-
});
|
|
1118
|
-
}
|
|
1119
944
|
results.sort((a, b) => Number(a["episode"] ?? 0) - Number(b["episode"] ?? 0));
|
|
1120
945
|
let script;
|
|
1121
946
|
try {
|
|
1122
|
-
updateRunState(workspace, { status: "init_running", init_stage: "script_merge"
|
|
947
|
+
updateRunState(workspace, { status: "init_running", init_stage: "script_merge" });
|
|
1123
948
|
script = mergeEpisodeResults(results, strOf(info["projectName"]) || path.basename(source, path.extname(source)));
|
|
1124
949
|
}
|
|
1125
950
|
catch (exc) {
|
|
@@ -1130,11 +955,11 @@ export async function commandInit(opts) {
|
|
|
1130
955
|
required: ["complete episode_results/*.json"],
|
|
1131
956
|
received: [`${e?.name ?? "Error"}: ${(e?.message ?? "").slice(0, 160)}`],
|
|
1132
957
|
nextSteps: ["Rerun init; completed episode extraction checkpoints will be reused and merge will retry."],
|
|
1133
|
-
updates: {
|
|
958
|
+
updates: { episode_completed: results.length },
|
|
1134
959
|
});
|
|
1135
960
|
}
|
|
1136
961
|
try {
|
|
1137
|
-
updateRunState(workspace, { status: "init_running", init_stage: "asset_curation"
|
|
962
|
+
updateRunState(workspace, { status: "init_running", init_stage: "asset_curation" });
|
|
1138
963
|
const rawCuration = await providerExtractAssetCurationLocal(provider, sourceText, script);
|
|
1139
964
|
const curation = curateScriptAssets(script, rawCuration);
|
|
1140
965
|
writeJson(path.join(dd, "asset_curation.json"), curation);
|
|
@@ -1148,7 +973,7 @@ export async function commandInit(opts) {
|
|
|
1148
973
|
required: exc.required.length > 0 ? exc.required : ["asset curation JSON matching final script contract"],
|
|
1149
974
|
received: exc.received.length > 0 ? exc.received : [String(exc.message).slice(0, 160)],
|
|
1150
975
|
nextSteps: exc.nextSteps.length > 0 ? exc.nextSteps : ["Rerun init; extraction checkpoints will be reused and asset curation will retry."],
|
|
1151
|
-
updates: {
|
|
976
|
+
updates: { episode_completed: results.length },
|
|
1152
977
|
});
|
|
1153
978
|
}
|
|
1154
979
|
const e = exc;
|
|
@@ -1158,11 +983,11 @@ export async function commandInit(opts) {
|
|
|
1158
983
|
required: ["provider location merge decisions and deterministic asset reuse curation"],
|
|
1159
984
|
received: [`${e?.name ?? "Error"}: ${(e?.message ?? "").slice(0, 160)}`],
|
|
1160
985
|
nextSteps: ["Rerun init; extraction checkpoints will be reused and asset curation will retry."],
|
|
1161
|
-
updates: {
|
|
986
|
+
updates: { episode_completed: results.length },
|
|
1162
987
|
});
|
|
1163
988
|
}
|
|
1164
989
|
try {
|
|
1165
|
-
updateRunState(workspace, { status: "init_running", init_stage: "metadata_extract"
|
|
990
|
+
updateRunState(workspace, { status: "init_running", init_stage: "metadata_extract" });
|
|
1166
991
|
let metadata = provider.extractMetadata ? await provider.extractMetadata(sourceText, script) : {};
|
|
1167
992
|
if (!isDict(metadata))
|
|
1168
993
|
metadata = {};
|
|
@@ -1178,7 +1003,7 @@ export async function commandInit(opts) {
|
|
|
1178
1003
|
required: exc.required.length > 0 ? exc.required : ["metadata JSON matching final script contract"],
|
|
1179
1004
|
received: exc.received.length > 0 ? exc.received : [String(exc.message).slice(0, 160)],
|
|
1180
1005
|
nextSteps: exc.nextSteps.length > 0 ? exc.nextSteps : ["Rerun init; extraction checkpoints will be reused and metadata will retry."],
|
|
1181
|
-
updates: {
|
|
1006
|
+
updates: { episode_completed: results.length },
|
|
1182
1007
|
});
|
|
1183
1008
|
}
|
|
1184
1009
|
const e = exc;
|
|
@@ -1188,12 +1013,12 @@ export async function commandInit(opts) {
|
|
|
1188
1013
|
required: ["provider metadata for worldview, role_type, and asset descriptions"],
|
|
1189
1014
|
received: [`${e?.name ?? "Error"}: ${(e?.message ?? "").slice(0, 160)}`],
|
|
1190
1015
|
nextSteps: ["Rerun init; extraction checkpoints will be reused and metadata will retry."],
|
|
1191
|
-
updates: {
|
|
1016
|
+
updates: { episode_completed: results.length },
|
|
1192
1017
|
});
|
|
1193
1018
|
}
|
|
1194
1019
|
const scriptPath = path.join(dd, "script.initial.json");
|
|
1195
1020
|
writeJson(scriptPath, script);
|
|
1196
|
-
updateRunState(workspace, { status: "init_running", init_stage: "validate"
|
|
1021
|
+
updateRunState(workspace, { status: "init_running", init_stage: "validate" });
|
|
1197
1022
|
let validation;
|
|
1198
1023
|
try {
|
|
1199
1024
|
validation = validateScript(workspace, scriptPath);
|
|
@@ -1206,7 +1031,7 @@ export async function commandInit(opts) {
|
|
|
1206
1031
|
required: ["script.initial.json that can be validated"],
|
|
1207
1032
|
received: [`${e?.name ?? "Error"}: ${(e?.message ?? "").slice(0, 160)}`],
|
|
1208
1033
|
nextSteps: ["Rerun init to retry validation, or inspect script.initial.json if the failure persists."],
|
|
1209
|
-
updates: {
|
|
1034
|
+
updates: { script_path: scriptPath },
|
|
1210
1035
|
});
|
|
1211
1036
|
}
|
|
1212
1037
|
const passed = Boolean(validation["passed"]);
|
|
@@ -1215,10 +1040,6 @@ export async function commandInit(opts) {
|
|
|
1215
1040
|
status,
|
|
1216
1041
|
command: "direct init",
|
|
1217
1042
|
init_stage: "complete",
|
|
1218
|
-
checkpoint,
|
|
1219
|
-
batch_checkpoint: batchCheckpoint,
|
|
1220
|
-
checkpoint_reused: checkpointReused,
|
|
1221
|
-
batch_checkpoint_reused: batchCheckpointReused,
|
|
1222
1043
|
provider: providerName,
|
|
1223
1044
|
model,
|
|
1224
1045
|
concurrency,
|
|
@@ -1234,19 +1055,17 @@ export async function commandInit(opts) {
|
|
|
1234
1055
|
episode_reused: skipped.length,
|
|
1235
1056
|
episode_failed: 0,
|
|
1236
1057
|
failed_episodes: [],
|
|
1058
|
+
held_out_episodes: heldOutEpisodes,
|
|
1237
1059
|
batch_total: asList(batchPlan["batches"]).length,
|
|
1238
1060
|
batch_completed: completedBatches,
|
|
1239
1061
|
batch_reused: skippedEpisodeBatchCount + skippedBatches.length,
|
|
1240
|
-
batch_failed:
|
|
1241
|
-
|
|
1242
|
-
failure_signature: [],
|
|
1243
|
-
failure_streak: 0,
|
|
1062
|
+
batch_failed: terminalFailures.length,
|
|
1063
|
+
batch_terminal: terminalFailures.length,
|
|
1244
1064
|
last_error: null,
|
|
1245
1065
|
review_status: "pending",
|
|
1246
1066
|
review_missing: [...REVIEW_TARGETS],
|
|
1247
1067
|
inspected_targets: [],
|
|
1248
1068
|
patch_count: 0,
|
|
1249
|
-
exportable: providerName !== "mock",
|
|
1250
1069
|
});
|
|
1251
1070
|
const title = passed
|
|
1252
1071
|
? "INIT COMPLETE: Initial script ready"
|
|
@@ -1260,9 +1079,9 @@ export async function commandInit(opts) {
|
|
|
1260
1079
|
`actions: ${stats["actions"] ?? 0}`,
|
|
1261
1080
|
`validation: ${passed ? "passed" : "needs repair"}`,
|
|
1262
1081
|
`provider: ${providerName}`,
|
|
1263
|
-
`
|
|
1082
|
+
`episodes reused: ${skipped.length}`,
|
|
1264
1083
|
`batches: ${completedBatches}/${asList(batchPlan["batches"]).length} completed`,
|
|
1265
|
-
`
|
|
1084
|
+
`batches reused: ${skippedEpisodeBatchCount + skippedBatches.length}`,
|
|
1266
1085
|
"agent_review: pending",
|
|
1267
1086
|
],
|
|
1268
1087
|
artifacts: [
|
|
@@ -1288,277 +1107,217 @@ export async function commandInit(opts) {
|
|
|
1288
1107
|
};
|
|
1289
1108
|
return [report, passed ? EXIT_OK : EXIT_NEEDS_AGENT];
|
|
1290
1109
|
}
|
|
1291
|
-
export function summarizeIssues(issues) {
|
|
1292
|
-
if (issues.length === 0)
|
|
1293
|
-
return [];
|
|
1294
|
-
const counts = {};
|
|
1295
|
-
for (const item of issues) {
|
|
1296
|
-
const sev = strOf(item["severity"]);
|
|
1297
|
-
counts[sev] = (counts[sev] ?? 0) + 1;
|
|
1298
|
-
}
|
|
1299
|
-
const parts = Object.entries(counts).sort(([a], [b]) => a.localeCompare(b)).map(([sev, c]) => `${sev}: ${c}`);
|
|
1300
|
-
const first = issues[0];
|
|
1301
|
-
return [parts.join("; "), `first: ${first["code"]} - ${first["summary"]}`];
|
|
1302
|
-
}
|
|
1303
1110
|
// ---------------------------------------------------------------------------
|
|
1304
|
-
//
|
|
1305
|
-
//
|
|
1306
|
-
//
|
|
1307
|
-
//
|
|
1308
|
-
//
|
|
1309
|
-
// 人物.md / 场景.md / 道具.md / 发声源.md (+ optional 梗概.md for the whole-script
|
|
1310
|
-
// synopsis). It assembles the same script.initial.json and hands off to the
|
|
1311
|
-
// existing direct inspect/validate/export downstream (zero changes there).
|
|
1111
|
+
// command_override — inject a human extraction for a unit the provider can't
|
|
1112
|
+
// produce (content-filtered). The override is content-addressed exactly like a
|
|
1113
|
+
// provider result, so init reuses it and never re-calls the provider, and the
|
|
1114
|
+
// non-destructive GC never deletes it. We compute the input_hash from the plan
|
|
1115
|
+
// ourselves, so the operator never hand-edits source_span.
|
|
1312
1116
|
// ---------------------------------------------------------------------------
|
|
1313
|
-
|
|
1314
|
-
const
|
|
1315
|
-
|
|
1316
|
-
|
|
1317
|
-
|
|
1318
|
-
|
|
1319
|
-
];
|
|
1320
|
-
const
|
|
1321
|
-
const
|
|
1322
|
-
|
|
1323
|
-
|
|
1324
|
-
|
|
1325
|
-
|
|
1326
|
-
|
|
1327
|
-
|
|
1328
|
-
|
|
1329
|
-
continue;
|
|
1330
|
-
const key = m[1].toLowerCase();
|
|
1331
|
-
const val = m[2].trim();
|
|
1332
|
-
if (!val)
|
|
1333
|
-
continue;
|
|
1334
|
-
if (key === "title" || key === "标题")
|
|
1335
|
-
out.title = val;
|
|
1336
|
-
else if (key === "worldview" || key === "世界观")
|
|
1337
|
-
out.worldview = val;
|
|
1338
|
-
else if (key === "style" || key === "风格")
|
|
1339
|
-
out.style = val;
|
|
1340
|
-
else if (key === "protagonists" || key === "主角" || key === "主角列表") {
|
|
1341
|
-
out.protagonists = val.split(/[,,、]/).map((s) => s.trim()).filter(Boolean);
|
|
1342
|
-
}
|
|
1117
|
+
export function commandOverride(opts) {
|
|
1118
|
+
const workspace = strOf(opts["workspace_path"] || "workspace");
|
|
1119
|
+
const unit = strOf(asList(opts["_args"])[0]).trim();
|
|
1120
|
+
const fromPath = strOf(opts["from"]).trim();
|
|
1121
|
+
const dd = directDir(workspace);
|
|
1122
|
+
const state = readRunState(workspace);
|
|
1123
|
+
const providerName = strOf(opts["provider"] || state["provider"] || DEFAULT_PROVIDER);
|
|
1124
|
+
const model = strOf(opts["model"] || state["model"] || DEFAULT_MODEL);
|
|
1125
|
+
const isEpisode = /^ep_\d+$/.test(unit);
|
|
1126
|
+
if (!isEpisode && !/^bat_\d+$/.test(unit)) {
|
|
1127
|
+
throw new CliError("OVERRIDE BLOCKED: Invalid unit", "Invalid unit key.", {
|
|
1128
|
+
exitCode: EXIT_USAGE,
|
|
1129
|
+
required: ["<unit>: ep_NNN or bat_NNNN"],
|
|
1130
|
+
received: [`<unit>: ${unit || "<empty>"}`],
|
|
1131
|
+
nextSteps: ["Pass an episode (ep_007) or batch (bat_0012) key shown by direct status."],
|
|
1132
|
+
});
|
|
1343
1133
|
}
|
|
1344
|
-
|
|
1345
|
-
|
|
1346
|
-
|
|
1347
|
-
|
|
1348
|
-
|
|
1349
|
-
|
|
1350
|
-
|
|
1134
|
+
if (!fromPath || !exists(fromPath)) {
|
|
1135
|
+
throw new CliError("OVERRIDE BLOCKED: --from not found", "Override source file not found.", {
|
|
1136
|
+
exitCode: EXIT_INPUT,
|
|
1137
|
+
required: ["--from <path>: readable JSON extraction for the unit"],
|
|
1138
|
+
received: [`--from: ${fromPath || "<missing>"}`],
|
|
1139
|
+
nextSteps: ["Provide a JSON file with scenes/actions for the unit."],
|
|
1140
|
+
});
|
|
1351
1141
|
}
|
|
1352
|
-
|
|
1353
|
-
|
|
1354
|
-
|
|
1355
|
-
|
|
1356
|
-
|
|
1357
|
-
|
|
1358
|
-
|
|
1359
|
-
|
|
1360
|
-
if (!m)
|
|
1361
|
-
continue;
|
|
1362
|
-
const full = path.join(dir, name);
|
|
1363
|
-
if (!fs.statSync(full).isFile())
|
|
1364
|
-
continue;
|
|
1365
|
-
out.push({ path: full, episode: parseInt(m[1], 10) });
|
|
1142
|
+
const planPath = path.join(dd, isEpisode ? "episode_plan.json" : "batch_plan.json");
|
|
1143
|
+
if (!exists(planPath)) {
|
|
1144
|
+
throw new CliError("OVERRIDE BLOCKED: Plan not found", "Plan not found.", {
|
|
1145
|
+
exitCode: EXIT_INPUT,
|
|
1146
|
+
required: [isEpisode ? "episode_plan.json" : "batch_plan.json"],
|
|
1147
|
+
received: [planPath],
|
|
1148
|
+
nextSteps: ["Run scriptctl direct init first."],
|
|
1149
|
+
});
|
|
1366
1150
|
}
|
|
1367
|
-
|
|
1368
|
-
|
|
1369
|
-
|
|
1370
|
-
|
|
1371
|
-
|
|
1372
|
-
|
|
1151
|
+
const plan = readJson(planPath);
|
|
1152
|
+
const planUnits = asList(plan[isEpisode ? "episodes" : "batches"]);
|
|
1153
|
+
const planItem = planUnits.find((u) => (isEpisode ? episodeResultKey(u) : batchResultKey(u)) === unit) ?? null;
|
|
1154
|
+
if (!planItem) {
|
|
1155
|
+
throw new CliError("OVERRIDE BLOCKED: Unit not in current plan", "Unit not in current plan.", {
|
|
1156
|
+
exitCode: EXIT_INPUT,
|
|
1157
|
+
required: [`${unit} present in ${isEpisode ? "episode_plan.json" : "batch_plan.json"}`],
|
|
1158
|
+
received: [`${unit}: not found among ${planUnits.length} units`],
|
|
1159
|
+
nextSteps: ["Use a unit key from direct status; rerun init if the plan changed."],
|
|
1160
|
+
});
|
|
1373
1161
|
}
|
|
1374
|
-
const
|
|
1375
|
-
|
|
1376
|
-
|
|
1377
|
-
if (!exists(mdDir) || !fs.statSync(mdDir).isDirectory()) {
|
|
1378
|
-
throw new CliError("PARSE BLOCKED: md workspace not found", "md workspace not found.", {
|
|
1162
|
+
const sourceTextPath = path.join(workspace, "source.txt");
|
|
1163
|
+
if (!exists(sourceTextPath)) {
|
|
1164
|
+
throw new CliError("OVERRIDE BLOCKED: source.txt missing", "source.txt missing.", {
|
|
1379
1165
|
exitCode: EXIT_INPUT,
|
|
1380
|
-
required: [
|
|
1381
|
-
received: [
|
|
1382
|
-
nextSteps: ["
|
|
1166
|
+
required: [sourceTextPath],
|
|
1167
|
+
received: ["<missing>"],
|
|
1168
|
+
nextSteps: ["Run scriptctl direct init first."],
|
|
1383
1169
|
});
|
|
1384
1170
|
}
|
|
1385
|
-
|
|
1386
|
-
|
|
1387
|
-
|
|
1388
|
-
|
|
1171
|
+
const sourceText = readText(sourceTextPath);
|
|
1172
|
+
let data;
|
|
1173
|
+
try {
|
|
1174
|
+
data = readJson(fromPath);
|
|
1389
1175
|
}
|
|
1390
|
-
|
|
1391
|
-
|
|
1392
|
-
throw new CliError("PARSE BLOCKED: no episode md found", "no episode md found.", {
|
|
1176
|
+
catch (exc) {
|
|
1177
|
+
throw new CliError("OVERRIDE BLOCKED: --from invalid JSON", "Override JSON invalid.", {
|
|
1393
1178
|
exitCode: EXIT_INPUT,
|
|
1394
|
-
required: ["
|
|
1395
|
-
received: [
|
|
1396
|
-
nextSteps: ["
|
|
1179
|
+
required: ["valid extraction JSON"],
|
|
1180
|
+
received: [`${fromPath}: ${exc.message}`],
|
|
1181
|
+
nextSteps: ["Fix the JSON and retry."],
|
|
1397
1182
|
});
|
|
1398
1183
|
}
|
|
1399
|
-
|
|
1400
|
-
|
|
1401
|
-
|
|
1402
|
-
|
|
1403
|
-
|
|
1404
|
-
|
|
1405
|
-
|
|
1406
|
-
|
|
1407
|
-
assetDocsFound.push(path.basename(p));
|
|
1408
|
-
const parsed = parseAssetDoc(readText(p), spec.kind);
|
|
1409
|
-
for (const key of ["actors", "locations", "props", "speakers", "state_definitions"]) {
|
|
1410
|
-
bible[key].push(...asList(parsed[key]));
|
|
1411
|
-
}
|
|
1184
|
+
const result = normalizeEpisodeResult(data, planItem);
|
|
1185
|
+
if (Number(result["episode"]) !== Number(planItem["episode"])) {
|
|
1186
|
+
throw new CliError("OVERRIDE BLOCKED: Episode mismatch", "Episode mismatch.", {
|
|
1187
|
+
exitCode: EXIT_USAGE,
|
|
1188
|
+
required: [`episode ${Number(planItem["episode"])}`],
|
|
1189
|
+
received: [`episode ${Number(result["episode"])}`],
|
|
1190
|
+
nextSteps: ["Provide an extraction for the correct episode."],
|
|
1191
|
+
});
|
|
1412
1192
|
}
|
|
1413
|
-
|
|
1414
|
-
|
|
1415
|
-
|
|
1416
|
-
|
|
1417
|
-
|
|
1418
|
-
// Optional whole-script metadata (元信息.md): worldview / style / title.
|
|
1419
|
-
const metaPath = firstExisting(mdDir, META_DOC_NAMES);
|
|
1420
|
-
const meta = metaPath ? parseMetaDoc(readText(metaPath)) : {};
|
|
1421
|
-
const results = [];
|
|
1422
|
-
const sourceChunks = [];
|
|
1423
|
-
for (const file of bodyFiles) {
|
|
1424
|
-
const bodyText = readText(file.path);
|
|
1425
|
-
sourceChunks.push(`# ep_${pad3(file.episode)}\n${bodyText.trim()}`);
|
|
1426
|
-
try {
|
|
1427
|
-
results.push(parseMarkdownBatch(bodyText, { episode: file.episode, part: 1 }, { fragmentMode: true }));
|
|
1428
|
-
}
|
|
1429
|
-
catch (exc) {
|
|
1430
|
-
const e = exc;
|
|
1431
|
-
throw new CliError("PARSE BLOCKED: episode md invalid", "episode md invalid.", {
|
|
1432
|
-
exitCode: EXIT_INPUT,
|
|
1433
|
-
required: ["per-episode 正文 md following `scriptctl parse --spec`"],
|
|
1434
|
-
received: [`${path.basename(file.path)}: ${(e?.message ?? "").slice(0, 200)}`],
|
|
1435
|
-
nextSteps: ["Fix the episode md and re-run parse."],
|
|
1436
|
-
});
|
|
1437
|
-
}
|
|
1193
|
+
try {
|
|
1194
|
+
if (isEpisode)
|
|
1195
|
+
validateEpisodeExtractionQuality(sourceText, planItem, result);
|
|
1196
|
+
else
|
|
1197
|
+
validateBatchExtractionQuality(sourceText, planItem, result);
|
|
1438
1198
|
}
|
|
1439
|
-
|
|
1440
|
-
|
|
1441
|
-
|
|
1442
|
-
|
|
1443
|
-
|
|
1444
|
-
|
|
1445
|
-
|
|
1446
|
-
|
|
1447
|
-
|
|
1448
|
-
}
|
|
1199
|
+
catch (exc) {
|
|
1200
|
+
if (exc instanceof CliError)
|
|
1201
|
+
throw exc;
|
|
1202
|
+
throw new CliError("OVERRIDE BLOCKED: Extraction invalid", "Extraction invalid.", {
|
|
1203
|
+
exitCode: EXIT_USAGE,
|
|
1204
|
+
required: ["valid action types (dialogue/inner_thought/action)"],
|
|
1205
|
+
received: [exc.message.slice(0, 160)],
|
|
1206
|
+
nextSteps: ["Fix the override extraction and retry."],
|
|
1207
|
+
});
|
|
1449
1208
|
}
|
|
1450
|
-
const
|
|
1451
|
-
|
|
1452
|
-
|
|
1453
|
-
|
|
1454
|
-
|
|
1455
|
-
|
|
1456
|
-
|
|
1457
|
-
|
|
1458
|
-
|
|
1459
|
-
|
|
1460
|
-
|
|
1461
|
-
|
|
1462
|
-
|
|
1463
|
-
|
|
1464
|
-
|
|
1465
|
-
|
|
1209
|
+
const dir = path.join(dd, isEpisode ? "episode_results" : "batch_results");
|
|
1210
|
+
fs.mkdirSync(dir, { recursive: true });
|
|
1211
|
+
const hash = computeUnitHash(sourceText, planItem, providerName, model);
|
|
1212
|
+
if (isEpisode) {
|
|
1213
|
+
writeJson(episodeResultPath(dir, planItem), compactEpisodeResult(result));
|
|
1214
|
+
stampEpisodeMeta(dir, planItem, hash, "override", providerName, model);
|
|
1215
|
+
const errPath = episodeErrorPath(dir, planItem);
|
|
1216
|
+
if (exists(errPath))
|
|
1217
|
+
deletePath(errPath);
|
|
1218
|
+
}
|
|
1219
|
+
else {
|
|
1220
|
+
persistBatchResult(dir, planItem, result);
|
|
1221
|
+
stampBatchMeta(dir, planItem, hash, "override", providerName, model);
|
|
1222
|
+
const errPath = batchErrorPath(dir, planItem);
|
|
1223
|
+
if (exists(errPath))
|
|
1224
|
+
deletePath(errPath);
|
|
1466
1225
|
}
|
|
1226
|
+
const report = {
|
|
1227
|
+
title: "OVERRIDE COMPLETE: Unit extraction injected",
|
|
1228
|
+
result: [
|
|
1229
|
+
`unit: ${unit}`,
|
|
1230
|
+
`kind: ${isEpisode ? "episode" : "batch"}`,
|
|
1231
|
+
`provenance: override`,
|
|
1232
|
+
`provider/model: ${providerName} / ${model}`,
|
|
1233
|
+
`scenes: ${asList(result["scenes"]).length}`,
|
|
1234
|
+
],
|
|
1235
|
+
artifacts: [dir, path.join(dd, "run_state.json")],
|
|
1236
|
+
next: ["Rerun scriptctl direct init — the override is reused without re-calling the provider."],
|
|
1237
|
+
};
|
|
1238
|
+
return [report, EXIT_OK];
|
|
1239
|
+
}
|
|
1240
|
+
// ---------------------------------------------------------------------------
|
|
1241
|
+
// command_status — rebuild the progress view from on-disk meta/error sidecars.
|
|
1242
|
+
// run_state is just a cache of this; deleting it loses nothing.
|
|
1243
|
+
// ---------------------------------------------------------------------------
|
|
1244
|
+
export function commandStatus(opts) {
|
|
1245
|
+
const workspace = strOf(opts["workspace_path"] || "workspace");
|
|
1467
1246
|
const dd = directDir(workspace);
|
|
1468
|
-
|
|
1469
|
-
|
|
1470
|
-
|
|
1471
|
-
|
|
1472
|
-
|
|
1473
|
-
|
|
1474
|
-
|
|
1475
|
-
|
|
1476
|
-
|
|
1477
|
-
|
|
1478
|
-
|
|
1479
|
-
|
|
1480
|
-
|
|
1481
|
-
|
|
1482
|
-
|
|
1483
|
-
|
|
1484
|
-
|
|
1485
|
-
|
|
1486
|
-
|
|
1487
|
-
|
|
1488
|
-
review_status: "pending",
|
|
1489
|
-
review_missing: [...REVIEW_TARGETS],
|
|
1490
|
-
inspected_targets: [],
|
|
1491
|
-
patch_count: 0,
|
|
1492
|
-
exportable: true,
|
|
1493
|
-
last_error: null,
|
|
1494
|
-
});
|
|
1495
|
-
const stats = validation["stats"] ?? {};
|
|
1496
|
-
const blockingOrError = Boolean(validation["has_blocking"]) ||
|
|
1497
|
-
asList(validation["issues"]).some((it) => isDict(it) && (it["severity"] === "blocking" || it["severity"] === "error"));
|
|
1498
|
-
// --publish: md → 校验 → 直接入库(不经 direct 的 inspect/review/export 门禁)。
|
|
1499
|
-
// md 是唯一真相源:校验不过就报问题、不发布,让 agent 直接改 md 后重跑 parse --publish。
|
|
1500
|
-
if (opts["publish"]) {
|
|
1501
|
-
if (blockingOrError) {
|
|
1502
|
-
return [{
|
|
1503
|
-
title: "PARSE PUBLISH BLOCKED: 校验未过,改 md 后重跑",
|
|
1504
|
-
result: [
|
|
1505
|
-
`asset docs: ${assetDocsFound.join(" / ") || "(none)"}`,
|
|
1506
|
-
`validation: needs repair`,
|
|
1507
|
-
],
|
|
1508
|
-
artifacts: [scriptPath, path.join(dd, "validation.json")],
|
|
1509
|
-
issues: summarizeIssues(asList(validation["issues"])),
|
|
1510
|
-
next: ["按 issue 直接改对应的 ep_*.md / 人物·场景·道具·发声源.md / 元信息.md,再重跑 `scriptctl parse <dir> --publish`。"],
|
|
1511
|
-
}, EXIT_NEEDS_AGENT];
|
|
1512
|
-
}
|
|
1513
|
-
const client = scriptOutputClient(opts);
|
|
1514
|
-
const baseRevision = await currentRevisionOrZero(client);
|
|
1515
|
-
const scriptHash = sha256Text(JSON.stringify(sortDeep(script)));
|
|
1516
|
-
const requestId = strOf(opts["request_id"]).trim() || `scriptctl-parse:${scriptHash}`;
|
|
1517
|
-
let replaceRes;
|
|
1518
|
-
try {
|
|
1519
|
-
replaceRes = await client.replaceScript({ requestId, baseRevision, script, source: "ctl" });
|
|
1247
|
+
const episodePlanPath = path.join(dd, "episode_plan.json");
|
|
1248
|
+
const batchPlanPath = path.join(dd, "batch_plan.json");
|
|
1249
|
+
if (!exists(episodePlanPath) || !exists(batchPlanPath)) {
|
|
1250
|
+
throw new CliError("STATUS BLOCKED: Plan not found", "Plan not found.", {
|
|
1251
|
+
exitCode: EXIT_INPUT,
|
|
1252
|
+
required: ["episode_plan.json and batch_plan.json"],
|
|
1253
|
+
received: [exists(episodePlanPath) ? "episode_plan.json ok" : "episode_plan.json missing"],
|
|
1254
|
+
nextSteps: ["Run scriptctl direct init first."],
|
|
1255
|
+
});
|
|
1256
|
+
}
|
|
1257
|
+
const episodes = asList(readJson(episodePlanPath)["episodes"]);
|
|
1258
|
+
const batches = asList(readJson(batchPlanPath)["batches"]);
|
|
1259
|
+
const episodeResultsDir = path.join(dd, "episode_results");
|
|
1260
|
+
const batchResultsDir = path.join(dd, "batch_results");
|
|
1261
|
+
const count = { ok: 0, override: 0, recovered: 0, terminal: 0, missing: 0 };
|
|
1262
|
+
for (const batch of batches) {
|
|
1263
|
+
const meta = readUnitMeta(batchMetaPath(batchResultsDir, batch));
|
|
1264
|
+
if (!meta) {
|
|
1265
|
+
count.missing++;
|
|
1266
|
+
continue;
|
|
1520
1267
|
}
|
|
1521
|
-
|
|
1522
|
-
|
|
1523
|
-
|
|
1524
|
-
throw exc;
|
|
1268
|
+
if (meta["status"] === "terminal") {
|
|
1269
|
+
count.terminal++;
|
|
1270
|
+
continue;
|
|
1525
1271
|
}
|
|
1526
|
-
|
|
1527
|
-
|
|
1528
|
-
|
|
1529
|
-
|
|
1530
|
-
|
|
1531
|
-
|
|
1532
|
-
|
|
1533
|
-
|
|
1534
|
-
|
|
1535
|
-
|
|
1536
|
-
|
|
1537
|
-
|
|
1538
|
-
|
|
1539
|
-
|
|
1272
|
+
count.ok++;
|
|
1273
|
+
if (meta["provenance"] === "override")
|
|
1274
|
+
count.override++;
|
|
1275
|
+
else if (meta["provenance"] === "recovered")
|
|
1276
|
+
count.recovered++;
|
|
1277
|
+
}
|
|
1278
|
+
const completedEpisodes = [];
|
|
1279
|
+
for (const ep of episodes) {
|
|
1280
|
+
const meta = readUnitMeta(episodeMetaPath(episodeResultsDir, ep));
|
|
1281
|
+
if (meta && meta["status"] === "ok")
|
|
1282
|
+
completedEpisodes.push(Number(ep["episode"]));
|
|
1283
|
+
}
|
|
1284
|
+
// Held out = episodes with at least one terminal batch and no episode result.
|
|
1285
|
+
const completedSet = new Set(completedEpisodes);
|
|
1286
|
+
const heldOut = new Set();
|
|
1287
|
+
for (const batch of batches) {
|
|
1288
|
+
const meta = readUnitMeta(batchMetaPath(batchResultsDir, batch));
|
|
1289
|
+
const epNum = Number(batch["episode"]);
|
|
1290
|
+
if (meta && meta["status"] === "terminal" && !completedSet.has(epNum))
|
|
1291
|
+
heldOut.add(epNum);
|
|
1540
1292
|
}
|
|
1293
|
+
const heldOutEpisodes = [...heldOut].sort((a, b) => a - b);
|
|
1541
1294
|
const report = {
|
|
1542
|
-
title:
|
|
1543
|
-
? "PARSE COMPLETE: 中间稿已生成(加 --publish 直接入库)"
|
|
1544
|
-
: "PARSE NEEDS AGENT: 校验有问题,直接改 md 再 parse",
|
|
1295
|
+
title: "DIRECT STATUS",
|
|
1545
1296
|
result: [
|
|
1546
|
-
`episodes: ${
|
|
1547
|
-
`
|
|
1548
|
-
`
|
|
1549
|
-
`
|
|
1550
|
-
`
|
|
1551
|
-
`style: ${meta.style || "(unset)"}`,
|
|
1552
|
-
`synopsis: ${globalSynopsis ? "yes" : "no"}`,
|
|
1553
|
-
`validation: ${passed ? "passed" : "needs repair"}`,
|
|
1297
|
+
`episodes: ${completedEpisodes.length}/${episodes.length} complete`,
|
|
1298
|
+
`batches: ${count.ok}/${batches.length} ok (override ${count.override}, recovered ${count.recovered})`,
|
|
1299
|
+
`terminal batches: ${count.terminal}`,
|
|
1300
|
+
`pending batches: ${count.missing}`,
|
|
1301
|
+
`held out episodes: ${heldOutEpisodes.length === 0 ? "-" : heldOutEpisodes.join(", ")}`,
|
|
1554
1302
|
],
|
|
1555
|
-
artifacts: [
|
|
1556
|
-
|
|
1557
|
-
|
|
1558
|
-
|
|
1559
|
-
: ["按 issue 直接改对应的 md(ep_*.md / 资产 md / 元信息.md),再重跑 parse。"],
|
|
1303
|
+
artifacts: [batchResultsDir, episodeResultsDir, path.join(dd, "run_state.json")],
|
|
1304
|
+
next: heldOutEpisodes.length > 0
|
|
1305
|
+
? ["Override held-out episodes with direct override, or export 32/33 with direct export --allow-incomplete."]
|
|
1306
|
+
: ["All units accounted for."],
|
|
1560
1307
|
};
|
|
1561
|
-
return [report,
|
|
1308
|
+
return [report, EXIT_OK];
|
|
1309
|
+
}
|
|
1310
|
+
export function summarizeIssues(issues) {
|
|
1311
|
+
if (issues.length === 0)
|
|
1312
|
+
return [];
|
|
1313
|
+
const counts = {};
|
|
1314
|
+
for (const item of issues) {
|
|
1315
|
+
const sev = strOf(item["severity"]);
|
|
1316
|
+
counts[sev] = (counts[sev] ?? 0) + 1;
|
|
1317
|
+
}
|
|
1318
|
+
const parts = Object.entries(counts).sort(([a], [b]) => a.localeCompare(b)).map(([sev, c]) => `${sev}: ${c}`);
|
|
1319
|
+
const first = issues[0];
|
|
1320
|
+
return [parts.join("; "), `first: ${first["code"]} - ${first["summary"]}`];
|
|
1562
1321
|
}
|
|
1563
1322
|
// ---------------------------------------------------------------------------
|
|
1564
1323
|
// command_validate
|