@lingjingai/scriptctl 0.11.3 → 0.11.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli.js +17 -2
- package/dist/cli.js.map +1 -1
- package/dist/common.d.ts +1 -1
- package/dist/common.js +29 -14
- package/dist/common.js.map +1 -1
- package/dist/domain/direct-core.d.ts +13 -8
- package/dist/domain/direct-core.js +53 -13
- package/dist/domain/direct-core.js.map +1 -1
- package/dist/domain/script-core.d.ts +1 -0
- package/dist/domain/script-core.js +11 -2
- package/dist/domain/script-core.js.map +1 -1
- package/dist/help-text.js +2 -2
- package/dist/infra/providers.js +16 -0
- package/dist/infra/providers.js.map +1 -1
- package/dist/usecases/direct.d.ts +5 -3
- package/dist/usecases/direct.js +416 -566
- package/dist/usecases/direct.js.map +1 -1
- package/dist/usecases/parse.d.ts +15 -0
- package/dist/usecases/parse.js +324 -0
- package/dist/usecases/parse.js.map +1 -0
- package/dist/usecases/script.js +25 -3
- package/dist/usecases/script.js.map +1 -1
- package/package.json +1 -1
package/dist/usecases/direct.js
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import * as fs from "node:fs";
|
|
2
2
|
import * as path from "node:path";
|
|
3
|
-
import { CliError, DEFAULT_BATCH_MAX_CHARS, DEFAULT_BATCH_MIN_LINES, DEFAULT_BATCH_MODE, DEFAULT_BATCH_TARGET_LINES, DEFAULT_CONCURRENCY, DEFAULT_MODEL, DEFAULT_PROVIDER, DIRECT_CONTRACT_VERSION, EXIT_INPUT, EXIT_NEEDS_AGENT, EXIT_OK, EXIT_RUNTIME, EXIT_USAGE,
|
|
4
|
-
import { compactBatchResult, compactEpisodeResult, buildBatchPlan, buildEpisodePlan, enrichEpisodePlanTitles, extractBatchWithRecovery, mergeEpisodeResults, normalizeEpisodeResult, normalizeInt,
|
|
3
|
+
import { CliError, DEFAULT_BATCH_MAX_CHARS, DEFAULT_BATCH_MIN_LINES, DEFAULT_BATCH_MODE, DEFAULT_BATCH_TARGET_LINES, DEFAULT_CONCURRENCY, DEFAULT_MODEL, DEFAULT_PROVIDER, DIRECT_CONTRACT_VERSION, EXIT_INPUT, EXIT_NEEDS_AGENT, EXIT_OK, EXIT_RUNTIME, EXIT_USAGE, REVIEW_TARGETS, SUPPORTED_EXTS, deletePath, directDir, exists, fmtId, readJson, readText, sha256Text, writeJson, } from "../common.js";
|
|
4
|
+
import { compactBatchResult, compactEpisodeResult, buildBatchPlan, buildEpisodePlan, classifyProviderError, enrichEpisodePlanTitles, extractBatchWithRecovery, mergeEpisodeResults, normalizeEpisodeResult, normalizeInt, recoverBatchFromSource, uniqueAdd, validateBatchExtractionQuality, validateEpisodeExtractionQuality, _md_push_asset, curateScriptAssets, applyMetadataToScript, } from "../domain/direct-core.js";
|
|
5
5
|
import { validateScript } from "../domain/script-core.js";
|
|
6
6
|
import { makeProvider } from "../infra/providers.js";
|
|
7
7
|
import { makeSourceManifest, prepareSource, } from "../infra/converters.js";
|
|
@@ -54,18 +54,6 @@ export function readRunState(workspace) {
|
|
|
54
54
|
return {};
|
|
55
55
|
}
|
|
56
56
|
}
|
|
57
|
-
function failureSignature(items) {
|
|
58
|
-
if (!isList(items))
|
|
59
|
-
return [];
|
|
60
|
-
const out = [];
|
|
61
|
-
for (const item of items) {
|
|
62
|
-
const s = strOf(item).trim();
|
|
63
|
-
if (s)
|
|
64
|
-
out.push(s);
|
|
65
|
-
}
|
|
66
|
-
out.sort();
|
|
67
|
-
return out;
|
|
68
|
-
}
|
|
69
57
|
export function addInspectedTarget(workspace, target) {
|
|
70
58
|
const state = readRunState(workspace);
|
|
71
59
|
const targets = [];
|
|
@@ -147,9 +135,6 @@ function episodeErrorPath(dir, ep) {
|
|
|
147
135
|
function episodeResultKey(ep) {
|
|
148
136
|
return `ep_${pad3(Number(ep["episode"]))}`;
|
|
149
137
|
}
|
|
150
|
-
function episodeResultsIndexPath(dir) {
|
|
151
|
-
return path.join(dir, "index.json");
|
|
152
|
-
}
|
|
153
138
|
function batchResultKey(batch) {
|
|
154
139
|
const bid = strOf(batch["batch_id"]).trim();
|
|
155
140
|
if (bid)
|
|
@@ -165,9 +150,6 @@ function batchMarkdownPath(dir, batch) {
|
|
|
165
150
|
function batchErrorPath(dir, batch) {
|
|
166
151
|
return path.join(dir, `${batchResultKey(batch)}.error.json`);
|
|
167
152
|
}
|
|
168
|
-
function batchResultsIndexPath(dir) {
|
|
169
|
-
return path.join(dir, "index.json");
|
|
170
|
-
}
|
|
171
153
|
function persistBatchResult(dir, batch, result) {
|
|
172
154
|
const rawMd = result["_raw_markdown"];
|
|
173
155
|
delete result["_raw_markdown"];
|
|
@@ -181,130 +163,81 @@ function persistBatchResult(dir, batch, result) {
|
|
|
181
163
|
deletePath(mdPath);
|
|
182
164
|
}
|
|
183
165
|
}
|
|
184
|
-
function
|
|
185
|
-
|
|
186
|
-
if (!exists(p))
|
|
187
|
-
return { version: 1, batches: {} };
|
|
188
|
-
let data;
|
|
189
|
-
try {
|
|
190
|
-
data = readJson(p);
|
|
191
|
-
}
|
|
192
|
-
catch {
|
|
193
|
-
return { version: 1, batches: {} };
|
|
194
|
-
}
|
|
195
|
-
if (!isDict(data))
|
|
196
|
-
return { version: 1, batches: {} };
|
|
197
|
-
if (!isDict(data["batches"]))
|
|
198
|
-
data["batches"] = {};
|
|
199
|
-
if (!("version" in data))
|
|
200
|
-
data["version"] = 1;
|
|
201
|
-
return data;
|
|
202
|
-
}
|
|
203
|
-
function writeBatchResultsIndex(dir, index) {
|
|
204
|
-
writeJson(batchResultsIndexPath(dir), index);
|
|
205
|
-
}
|
|
206
|
-
function updateBatchResultMetadata(dir, batch, providerName, model) {
|
|
207
|
-
const index = readBatchResultsIndex(dir);
|
|
208
|
-
const batches = index["batches"] ?? {};
|
|
209
|
-
batches[batchResultKey(batch)] = {
|
|
210
|
-
episode: Number(batch["episode"]),
|
|
211
|
-
part: Number(batch["part"]),
|
|
212
|
-
provider: providerName,
|
|
213
|
-
model,
|
|
214
|
-
extracted_at: checkpointTimestamp(),
|
|
215
|
-
};
|
|
216
|
-
index["batches"] = batches;
|
|
217
|
-
writeBatchResultsIndex(dir, index);
|
|
166
|
+
function episodeMetaPath(dir, ep) {
|
|
167
|
+
return path.join(dir, `${episodeResultKey(ep)}.meta.json`);
|
|
218
168
|
}
|
|
219
|
-
function
|
|
220
|
-
|
|
221
|
-
const batches = index["batches"] ?? {};
|
|
222
|
-
const key = batchResultKey(batch);
|
|
223
|
-
if (key in batches) {
|
|
224
|
-
delete batches[key];
|
|
225
|
-
index["batches"] = batches;
|
|
226
|
-
writeBatchResultsIndex(dir, index);
|
|
227
|
-
}
|
|
169
|
+
function batchMetaPath(dir, batch) {
|
|
170
|
+
return path.join(dir, `${batchResultKey(batch)}.meta.json`);
|
|
228
171
|
}
|
|
229
|
-
function
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
return { version: 1, episodes: {} };
|
|
233
|
-
let data;
|
|
172
|
+
function readUnitMeta(metaPath) {
|
|
173
|
+
if (!exists(metaPath))
|
|
174
|
+
return null;
|
|
234
175
|
try {
|
|
235
|
-
data = readJson(
|
|
176
|
+
const data = readJson(metaPath);
|
|
177
|
+
return isDict(data) ? data : null;
|
|
236
178
|
}
|
|
237
179
|
catch {
|
|
238
|
-
return
|
|
239
|
-
}
|
|
240
|
-
if (!isDict(data))
|
|
241
|
-
return { version: 1, episodes: {} };
|
|
242
|
-
if (!isDict(data["episodes"]))
|
|
243
|
-
data["episodes"] = {};
|
|
244
|
-
if (!("version" in data))
|
|
245
|
-
data["version"] = 1;
|
|
246
|
-
return data;
|
|
180
|
+
return null;
|
|
181
|
+
}
|
|
247
182
|
}
|
|
248
|
-
function
|
|
249
|
-
|
|
183
|
+
function writeUnitMeta(metaPath, meta) {
|
|
184
|
+
fs.mkdirSync(path.dirname(metaPath), { recursive: true });
|
|
185
|
+
writeJson(metaPath, meta);
|
|
250
186
|
}
|
|
251
|
-
function
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
episodes[episodeResultKey(ep)] = {
|
|
255
|
-
provider: providerName,
|
|
256
|
-
model,
|
|
257
|
-
extracted_at: checkpointTimestamp(),
|
|
258
|
-
};
|
|
259
|
-
index["episodes"] = episodes;
|
|
260
|
-
writeEpisodeResultsIndex(dir, index);
|
|
187
|
+
function removeUnitMeta(metaPath) {
|
|
188
|
+
if (exists(metaPath))
|
|
189
|
+
deletePath(metaPath);
|
|
261
190
|
}
|
|
262
|
-
function
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
delete episodes[key];
|
|
268
|
-
index["episodes"] = episodes;
|
|
269
|
-
writeEpisodeResultsIndex(dir, index);
|
|
270
|
-
}
|
|
191
|
+
function stampEpisodeMeta(dir, ep, inputHash, provenance, providerName, model) {
|
|
192
|
+
writeUnitMeta(episodeMetaPath(dir, ep), {
|
|
193
|
+
schema: 1, key: episodeResultKey(ep), episode: Number(ep["episode"]),
|
|
194
|
+
input_hash: inputHash, provenance, status: "ok", provider: providerName, model, extracted_at: checkpointTimestamp(),
|
|
195
|
+
});
|
|
271
196
|
}
|
|
272
|
-
function
|
|
273
|
-
|
|
274
|
-
|
|
197
|
+
function stampBatchMeta(dir, batch, inputHash, provenance, providerName, model) {
|
|
198
|
+
writeUnitMeta(batchMetaPath(dir, batch), {
|
|
199
|
+
schema: 1, key: batchResultKey(batch), episode: Number(batch["episode"]), part: Number(batch["part"]),
|
|
200
|
+
input_hash: inputHash, provenance, status: "ok", provider: providerName, model, extracted_at: checkpointTimestamp(),
|
|
201
|
+
});
|
|
202
|
+
}
|
|
203
|
+
// Content-address a single episode/batch plan unit: the contract version, the
|
|
204
|
+
// exact source span text, the title-stable plan item, and provider/model. Any
|
|
205
|
+
// change to what would alter extraction rotates the hash for THAT unit only.
|
|
206
|
+
export function computeUnitHash(sourceText, unit, providerName, model) {
|
|
207
|
+
const span = isDict(unit["source_span"]) ? unit["source_span"] : {};
|
|
208
|
+
const start = Number(span["start"] ?? 0);
|
|
209
|
+
const end = Number(span["end"] ?? 0);
|
|
210
|
+
const spanText = sourceText.slice(start, end);
|
|
211
|
+
const planText = JSON.stringify(unit, checkpointReplacer());
|
|
212
|
+
return sha256Text([String(DIRECT_CONTRACT_VERSION), spanText, planText, providerName ?? "", model ?? ""].join("\u0000"));
|
|
213
|
+
}
|
|
214
|
+
// Delete result/meta/error/markdown files whose unit key is no longer in the
|
|
215
|
+
// current plan (e.g. the source shed an episode). Pure function of the plan —
|
|
216
|
+
// it never inspects hashes, content, or run_state, so it can only remove units
|
|
217
|
+
// the plan no longer references. Also retires the legacy v3 `index.json`.
|
|
218
|
+
function gcOrphanUnits(dir, liveKeys) {
|
|
219
|
+
if (!exists(dir))
|
|
220
|
+
return [];
|
|
221
|
+
const removed = [];
|
|
222
|
+
for (const name of fs.readdirSync(dir)) {
|
|
223
|
+
if (name === "index.json") {
|
|
224
|
+
deletePath(path.join(dir, name));
|
|
225
|
+
removed.push(name);
|
|
275
226
|
continue;
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
227
|
+
}
|
|
228
|
+
const key = name.replace(/\.(meta\.json|error\.json|json|md)$/, "");
|
|
229
|
+
if (key === name)
|
|
230
|
+
continue; // not a recognized unit artifact
|
|
231
|
+
if (!liveKeys.has(key)) {
|
|
232
|
+
deletePath(path.join(dir, name));
|
|
233
|
+
removed.push(name);
|
|
282
234
|
}
|
|
283
235
|
}
|
|
284
|
-
return
|
|
285
|
-
}
|
|
286
|
-
export function initCheckpoint(sourceText, plan) {
|
|
287
|
-
const planText = JSON.stringify(plan, checkpointReplacer());
|
|
288
|
-
return {
|
|
289
|
-
contract_version: DIRECT_CONTRACT_VERSION,
|
|
290
|
-
source_sha256: sha256Text(sourceText),
|
|
291
|
-
episode_plan_sha256: sha256Text(planText),
|
|
292
|
-
total_episodes: Number(plan["total_episodes"] ?? asList(plan["episodes"]).length),
|
|
293
|
-
};
|
|
294
|
-
}
|
|
295
|
-
export function initBatchCheckpoint(sourceText, batchPlan) {
|
|
296
|
-
const planText = JSON.stringify(batchPlan, checkpointReplacer());
|
|
297
|
-
return {
|
|
298
|
-
contract_version: DIRECT_CONTRACT_VERSION,
|
|
299
|
-
source_sha256: sha256Text(sourceText),
|
|
300
|
-
batch_plan_sha256: sha256Text(planText),
|
|
301
|
-
total_batches: Number(batchPlan["total_batches"] ?? asList(batchPlan["batches"]).length),
|
|
302
|
-
};
|
|
236
|
+
return removed;
|
|
303
237
|
}
|
|
304
238
|
// Title fields are LLM-mutated downstream by enrichEpisodePlanTitles, so they
|
|
305
|
-
// must be excluded from
|
|
306
|
-
//
|
|
307
|
-
// re-extracts from scratch.
|
|
239
|
+
// must be excluded from unit hashes — otherwise every rerun gets a fresh SHA,
|
|
240
|
+
// the cached unit never matches, and that unit re-extracts from scratch.
|
|
308
241
|
const CHECKPOINT_UNSTABLE_KEYS = new Set(["title", "generated_title", "title_status", "title_source"]);
|
|
309
242
|
function checkpointReplacer() {
|
|
310
243
|
// Python's json.dumps(sort_keys=True) sorts keys recursively. Replicate by walking and sorting.
|
|
@@ -323,121 +256,47 @@ function checkpointReplacer() {
|
|
|
323
256
|
return value;
|
|
324
257
|
};
|
|
325
258
|
}
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
function
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
function resetInitOutputs(dd) {
|
|
339
|
-
for (const dirname of ["episode_results", "batch_results"]) {
|
|
340
|
-
const target = path.join(dd, dirname);
|
|
341
|
-
if (exists(target))
|
|
342
|
-
deleteTree(target);
|
|
343
|
-
}
|
|
344
|
-
for (const name of ["script.initial.json", "validation.json", "batch_plan.json", "asset_curation.json", "asset_metadata.json"]) {
|
|
345
|
-
const p = path.join(dd, name);
|
|
346
|
-
if (exists(p))
|
|
347
|
-
deletePath(p);
|
|
348
|
-
}
|
|
349
|
-
}
|
|
350
|
-
function resetBatchOutputs(dd) {
|
|
351
|
-
const batchResultsDir = path.join(dd, "batch_results");
|
|
352
|
-
if (exists(batchResultsDir))
|
|
353
|
-
deleteTree(batchResultsDir);
|
|
354
|
-
}
|
|
355
|
-
function loadCheckpointedEpisode(sourceText, episodeResultsDir, ep, providerName, model, previousProvider) {
|
|
259
|
+
// Non-destructive reuse: a cached episode result is reusable iff its sidecar
|
|
260
|
+
// meta records the same input_hash we compute for the current plan unit. The
|
|
261
|
+
// hash subsumes the old provider / source_span / episode-number / contract
|
|
262
|
+
// checks — any of those changing rotates the hash. On any mismatch or read
|
|
263
|
+
// failure we return null and let the caller re-extract and overwrite; we NEVER
|
|
264
|
+
// delete the cached file pre-emptively (that was the data-loss root cause).
|
|
265
|
+
export function loadCachedEpisode(sourceText, episodeResultsDir, ep, expectedHash) {
|
|
266
|
+
const meta = readUnitMeta(episodeMetaPath(episodeResultsDir, ep));
|
|
267
|
+
if (!meta || meta["input_hash"] !== expectedHash)
|
|
268
|
+
return null;
|
|
269
|
+
if (meta["status"] === "terminal")
|
|
270
|
+
return null;
|
|
356
271
|
const p = episodeResultPath(episodeResultsDir, ep);
|
|
357
272
|
if (!exists(p))
|
|
358
273
|
return null;
|
|
359
|
-
let result;
|
|
360
274
|
try {
|
|
361
|
-
const
|
|
362
|
-
const metadata = isDict(data["_scriptctl"]) ? data["_scriptctl"] : {};
|
|
363
|
-
const index = readEpisodeResultsIndex(episodeResultsDir);
|
|
364
|
-
let indexEntry = {};
|
|
365
|
-
const eps = index["episodes"];
|
|
366
|
-
if (isDict(eps)) {
|
|
367
|
-
const entry = eps[episodeResultKey(ep)];
|
|
368
|
-
if (isDict(entry))
|
|
369
|
-
indexEntry = entry;
|
|
370
|
-
}
|
|
371
|
-
const resultProvider = strOf(metadata["provider"] || indexEntry["provider"] || previousProvider).trim();
|
|
372
|
-
if (providerName && resultProvider && resultProvider !== providerName) {
|
|
373
|
-
throw new Error(`checkpoint provider mismatch: ${resultProvider} != ${providerName}`);
|
|
374
|
-
}
|
|
375
|
-
result = normalizeEpisodeResult(data, ep);
|
|
275
|
+
const result = normalizeEpisodeResult(readJson(p), ep);
|
|
376
276
|
validateEpisodeExtractionQuality(sourceText, ep, result);
|
|
377
|
-
|
|
378
|
-
writeJson(p, compactEpisodeResult(result));
|
|
379
|
-
if (providerName && model)
|
|
380
|
-
updateEpisodeResultMetadata(episodeResultsDir, ep, providerName, model);
|
|
381
|
-
}
|
|
277
|
+
return result;
|
|
382
278
|
}
|
|
383
279
|
catch {
|
|
384
|
-
try {
|
|
385
|
-
deletePath(p);
|
|
386
|
-
}
|
|
387
|
-
catch {
|
|
388
|
-
// ignore
|
|
389
|
-
}
|
|
390
|
-
removeEpisodeResultMetadata(episodeResultsDir, ep);
|
|
391
280
|
return null;
|
|
392
281
|
}
|
|
393
|
-
|
|
282
|
+
}
|
|
283
|
+
export function loadCachedBatch(sourceText, batchResultsDir, batch, expectedHash) {
|
|
284
|
+
const meta = readUnitMeta(batchMetaPath(batchResultsDir, batch));
|
|
285
|
+
if (!meta || meta["input_hash"] !== expectedHash)
|
|
394
286
|
return null;
|
|
395
|
-
if (
|
|
287
|
+
if (meta["status"] === "terminal")
|
|
396
288
|
return null;
|
|
397
|
-
return result;
|
|
398
|
-
}
|
|
399
|
-
function loadCheckpointedBatch(sourceText, batchResultsDir, batch, providerName, model, previousProvider) {
|
|
400
289
|
const p = batchResultPath(batchResultsDir, batch);
|
|
401
290
|
if (!exists(p))
|
|
402
291
|
return null;
|
|
403
|
-
let result;
|
|
404
292
|
try {
|
|
405
|
-
const
|
|
406
|
-
const index = readBatchResultsIndex(batchResultsDir);
|
|
407
|
-
let indexEntry = {};
|
|
408
|
-
const batches = index["batches"];
|
|
409
|
-
if (isDict(batches)) {
|
|
410
|
-
const entry = batches[batchResultKey(batch)];
|
|
411
|
-
if (isDict(entry))
|
|
412
|
-
indexEntry = entry;
|
|
413
|
-
}
|
|
414
|
-
const resultProvider = strOf(indexEntry["provider"] || previousProvider).trim();
|
|
415
|
-
if (providerName && resultProvider && resultProvider !== providerName) {
|
|
416
|
-
throw new Error(`checkpoint provider mismatch: ${resultProvider} != ${providerName}`);
|
|
417
|
-
}
|
|
418
|
-
result = normalizeEpisodeResult(data, batch);
|
|
293
|
+
const result = normalizeEpisodeResult(readJson(p), batch);
|
|
419
294
|
validateBatchExtractionQuality(sourceText, batch, result);
|
|
420
|
-
|
|
421
|
-
persistBatchResult(batchResultsDir, batch, result);
|
|
422
|
-
if (providerName && model)
|
|
423
|
-
updateBatchResultMetadata(batchResultsDir, batch, providerName, model);
|
|
424
|
-
}
|
|
295
|
+
return result;
|
|
425
296
|
}
|
|
426
297
|
catch {
|
|
427
|
-
try {
|
|
428
|
-
deletePath(p);
|
|
429
|
-
}
|
|
430
|
-
catch {
|
|
431
|
-
// ignore
|
|
432
|
-
}
|
|
433
|
-
removeBatchResultMetadata(batchResultsDir, batch);
|
|
434
298
|
return null;
|
|
435
299
|
}
|
|
436
|
-
if (Number(result["episode"] ?? 0) !== Number(batch["episode"]))
|
|
437
|
-
return null;
|
|
438
|
-
if (JSON.stringify(result["source_span"]) !== JSON.stringify(batch["source_span"]))
|
|
439
|
-
return null;
|
|
440
|
-
return result;
|
|
441
300
|
}
|
|
442
301
|
function mergeScene(target, source) {
|
|
443
302
|
if ((target["location_name"] === "" || target["location_name"] === "未知场景" || target["location_name"] === null || target["location_name"] === undefined) &&
|
|
@@ -530,33 +389,9 @@ async function providerExtractAssetCurationLocal(provider, sourceText, script) {
|
|
|
530
389
|
}
|
|
531
390
|
return {};
|
|
532
391
|
}
|
|
533
|
-
function
|
|
534
|
-
const err = exc;
|
|
535
|
-
const error = {
|
|
536
|
-
episode: Number(ep["episode"]),
|
|
537
|
-
title: ep["title"],
|
|
538
|
-
source_span: ep["source_span"],
|
|
539
|
-
error_type: err?.name || "Error",
|
|
540
|
-
message: (err?.message || err?.name || "Error").slice(0, 500),
|
|
541
|
-
failed_at: checkpointTimestamp(),
|
|
542
|
-
};
|
|
543
|
-
if (exc instanceof CliError) {
|
|
544
|
-
if (exc.required.length > 0)
|
|
545
|
-
error["required"] = exc.required;
|
|
546
|
-
if (exc.received.length > 0)
|
|
547
|
-
error["received"] = exc.received;
|
|
548
|
-
if (exc.nextSteps.length > 0)
|
|
549
|
-
error["next"] = exc.nextSteps;
|
|
550
|
-
}
|
|
551
|
-
const resultPath = episodeResultPath(dir, ep);
|
|
552
|
-
if (exists(resultPath))
|
|
553
|
-
deletePath(resultPath);
|
|
554
|
-
removeEpisodeResultMetadata(dir, ep);
|
|
555
|
-
writeJson(episodeErrorPath(dir, ep), error);
|
|
556
|
-
return error;
|
|
557
|
-
}
|
|
558
|
-
function writeBatchFailure(dir, batch, exc) {
|
|
392
|
+
function writeBatchFailure(dir, batch, exc, inputHash, providerName, model) {
|
|
559
393
|
const err = exc;
|
|
394
|
+
const terminal = classifyProviderError(exc) === "terminal";
|
|
560
395
|
const error = {
|
|
561
396
|
batch_id: batchResultKey(batch),
|
|
562
397
|
episode: Number(batch["episode"]),
|
|
@@ -565,6 +400,8 @@ function writeBatchFailure(dir, batch, exc) {
|
|
|
565
400
|
line_range: batch["line_range"],
|
|
566
401
|
error_type: err?.name || "Error",
|
|
567
402
|
message: (err?.message || err?.name || "Error").slice(0, 500),
|
|
403
|
+
terminal,
|
|
404
|
+
input_hash: inputHash,
|
|
568
405
|
failed_at: checkpointTimestamp(),
|
|
569
406
|
};
|
|
570
407
|
if (exc instanceof CliError) {
|
|
@@ -578,7 +415,16 @@ function writeBatchFailure(dir, batch, exc) {
|
|
|
578
415
|
const resultPath = batchResultPath(dir, batch);
|
|
579
416
|
if (exists(resultPath))
|
|
580
417
|
deletePath(resultPath);
|
|
581
|
-
|
|
418
|
+
if (terminal) {
|
|
419
|
+
writeUnitMeta(batchMetaPath(dir, batch), {
|
|
420
|
+
schema: 1, key: batchResultKey(batch), episode: Number(batch["episode"]), part: Number(batch["part"]),
|
|
421
|
+
input_hash: inputHash, provenance: "extracted", status: "terminal",
|
|
422
|
+
provider: providerName, model, extracted_at: checkpointTimestamp(),
|
|
423
|
+
});
|
|
424
|
+
}
|
|
425
|
+
else {
|
|
426
|
+
removeUnitMeta(batchMetaPath(dir, batch));
|
|
427
|
+
}
|
|
582
428
|
writeJson(batchErrorPath(dir, batch), error);
|
|
583
429
|
return error;
|
|
584
430
|
}
|
|
@@ -633,6 +479,9 @@ export async function commandInit(opts) {
|
|
|
633
479
|
const workspace = strOf(opts["workspace_path"] || "workspace");
|
|
634
480
|
const providerName = strOf(opts["provider"] || DEFAULT_PROVIDER);
|
|
635
481
|
const model = strOf(opts["model"] || process.env.SCRIPTCTL_ANTHROPIC_MODEL || DEFAULT_MODEL);
|
|
482
|
+
// When set, retry batches a prior run marked terminal (content-filtered)
|
|
483
|
+
// instead of skipping them — e.g. after the provider's filter was adjusted.
|
|
484
|
+
const retryTerminal = Boolean(opts["retry_terminal"]);
|
|
636
485
|
let concurrency;
|
|
637
486
|
try {
|
|
638
487
|
concurrency = parseInt(strOf(opts["concurrency"] || DEFAULT_CONCURRENCY), 10);
|
|
@@ -717,7 +566,6 @@ export async function commandInit(opts) {
|
|
|
717
566
|
}
|
|
718
567
|
const dd = directDir(workspace);
|
|
719
568
|
fs.mkdirSync(dd, { recursive: true });
|
|
720
|
-
const previousStateBeforeInit = readRunState(workspace);
|
|
721
569
|
updateRunState(workspace, {
|
|
722
570
|
status: "init_running",
|
|
723
571
|
command: "direct init",
|
|
@@ -830,17 +678,6 @@ export async function commandInit(opts) {
|
|
|
830
678
|
nextSteps: ["Inspect workspace/source.txt and episode_plan.json, then rerun init."],
|
|
831
679
|
});
|
|
832
680
|
}
|
|
833
|
-
const checkpoint = initCheckpoint(sourceText, plan);
|
|
834
|
-
const batchCheckpoint = initBatchCheckpoint(sourceText, batchPlan);
|
|
835
|
-
const previousState = previousStateBeforeInit;
|
|
836
|
-
const previousCheckpoint = isDict(previousState["checkpoint"]) ? previousState["checkpoint"] : {};
|
|
837
|
-
const previousBatchCheckpoint = isDict(previousState["batch_checkpoint"]) ? previousState["batch_checkpoint"] : {};
|
|
838
|
-
const checkpointReused = checkpointSourceMatches(previousCheckpoint, checkpoint);
|
|
839
|
-
const batchCheckpointReused = checkpointReused && batchCheckpointMatches(previousBatchCheckpoint, batchCheckpoint);
|
|
840
|
-
if (!checkpointReused)
|
|
841
|
-
resetInitOutputs(dd);
|
|
842
|
-
else if (!batchCheckpointReused)
|
|
843
|
-
resetBatchOutputs(dd);
|
|
844
681
|
writeJson(path.join(dd, "source_manifest.json"), manifest);
|
|
845
682
|
writeJson(path.join(dd, "episode_plan.json"), plan);
|
|
846
683
|
writeJson(path.join(dd, "batch_plan.json"), batchPlan);
|
|
@@ -848,13 +685,15 @@ export async function commandInit(opts) {
|
|
|
848
685
|
const batchResultsDir = path.join(dd, "batch_results");
|
|
849
686
|
fs.mkdirSync(episodeResultsDir, { recursive: true });
|
|
850
687
|
fs.mkdirSync(batchResultsDir, { recursive: true });
|
|
688
|
+
// Non-destructive GC: drop result/meta/error/md files for units the current
|
|
689
|
+
// plan no longer references (e.g. the source shed an episode). Pure function
|
|
690
|
+
// of the plan — it never touches a unit the plan still references, and retires
|
|
691
|
+
// the legacy v3 index.json. There is no whole-directory reset any more.
|
|
692
|
+
gcOrphanUnits(episodeResultsDir, new Set(asList(plan["episodes"]).map((ep) => episodeResultKey(ep))));
|
|
693
|
+
gcOrphanUnits(batchResultsDir, new Set(asList(batchPlan["batches"]).map((b) => batchResultKey(b))));
|
|
851
694
|
updateRunState(workspace, {
|
|
852
695
|
status: "init_running",
|
|
853
696
|
init_stage: "batch_extract",
|
|
854
|
-
checkpoint,
|
|
855
|
-
batch_checkpoint: batchCheckpoint,
|
|
856
|
-
checkpoint_reused: checkpointReused,
|
|
857
|
-
batch_checkpoint_reused: batchCheckpointReused,
|
|
858
697
|
batch_mode: batchMode,
|
|
859
698
|
batch_target_lines: batchTargetLines,
|
|
860
699
|
batch_max_chars: batchMaxChars,
|
|
@@ -873,11 +712,11 @@ export async function commandInit(opts) {
|
|
|
873
712
|
batchesByEpisode.set(epNum, []);
|
|
874
713
|
batchesByEpisode.get(epNum).push(batch);
|
|
875
714
|
}
|
|
876
|
-
|
|
715
|
+
// Per-unit reuse: each episode is judged independently by its own input hash,
|
|
716
|
+
// so a source edit to one episode invalidates only that episode — not all 33.
|
|
877
717
|
for (const episode of asList(plan["episodes"])) {
|
|
878
|
-
const
|
|
879
|
-
|
|
880
|
-
: null;
|
|
718
|
+
const epHash = computeUnitHash(sourceText, episode, providerName, model);
|
|
719
|
+
const cached = loadCachedEpisode(sourceText, episodeResultsDir, episode, epHash);
|
|
881
720
|
if (cached !== null) {
|
|
882
721
|
results.push(cached);
|
|
883
722
|
skipped.push(Number(episode["episode"]));
|
|
@@ -887,7 +726,7 @@ export async function commandInit(opts) {
|
|
|
887
726
|
if (!exists(batchResultPath(batchResultsDir, cachedBatch))) {
|
|
888
727
|
const backfilled = recoverBatchFromSource(sourceText, cachedBatch);
|
|
889
728
|
persistBatchResult(batchResultsDir, cachedBatch, backfilled);
|
|
890
|
-
|
|
729
|
+
stampBatchMeta(batchResultsDir, cachedBatch, computeUnitHash(sourceText, cachedBatch, providerName, model), "recovered", providerName, model);
|
|
891
730
|
}
|
|
892
731
|
const errorPath = batchErrorPath(batchResultsDir, cachedBatch);
|
|
893
732
|
if (exists(errorPath))
|
|
@@ -900,11 +739,19 @@ export async function commandInit(opts) {
|
|
|
900
739
|
}
|
|
901
740
|
const batchResults = [];
|
|
902
741
|
const skippedBatches = [];
|
|
742
|
+
const terminalSkipped = [];
|
|
903
743
|
const pending = [];
|
|
904
744
|
for (const batch of pendingBatches) {
|
|
905
|
-
const
|
|
906
|
-
|
|
907
|
-
|
|
745
|
+
const bHash = computeUnitHash(sourceText, batch, providerName, model);
|
|
746
|
+
// A terminal failure (content filter) with the same input hash will fail the
|
|
747
|
+
// same way — skip it instead of re-calling the provider, unless --retry-terminal
|
|
748
|
+
// or the source/provider changed (which rotates the hash).
|
|
749
|
+
const meta = readUnitMeta(batchMetaPath(batchResultsDir, batch));
|
|
750
|
+
if (!retryTerminal && meta && meta["status"] === "terminal" && meta["input_hash"] === bHash) {
|
|
751
|
+
terminalSkipped.push(batchResultKey(batch));
|
|
752
|
+
continue;
|
|
753
|
+
}
|
|
754
|
+
const cachedBatch = loadCachedBatch(sourceText, batchResultsDir, batch, bHash);
|
|
908
755
|
if (cachedBatch !== null) {
|
|
909
756
|
cachedBatch["_batch_id"] = batchResultKey(batch);
|
|
910
757
|
cachedBatch["_batch_part"] = Number(batch["part"]);
|
|
@@ -931,12 +778,12 @@ export async function commandInit(opts) {
|
|
|
931
778
|
result["_starts_inside_scene"] = Boolean(batch["starts_inside_scene"]);
|
|
932
779
|
batchResults.push(result);
|
|
933
780
|
persistBatchResult(batchResultsDir, batch, result);
|
|
934
|
-
|
|
781
|
+
stampBatchMeta(batchResultsDir, batch, computeUnitHash(sourceText, batch, providerName, model), "extracted", providerName, model);
|
|
935
782
|
if (exists(errorPath))
|
|
936
783
|
deletePath(errorPath);
|
|
937
784
|
}
|
|
938
785
|
else {
|
|
939
|
-
failures.push(writeBatchFailure(batchResultsDir, batch, outcome.error));
|
|
786
|
+
failures.push(writeBatchFailure(batchResultsDir, batch, outcome.error, computeUnitHash(sourceText, batch, providerName, model), providerName, model));
|
|
940
787
|
}
|
|
941
788
|
}
|
|
942
789
|
results.sort((a, b) => Number(a["episode"] ?? 0) - Number(b["episode"] ?? 0));
|
|
@@ -955,82 +802,94 @@ export async function commandInit(opts) {
|
|
|
955
802
|
return Number(a["part"] ?? 0) - Number(b["part"] ?? 0);
|
|
956
803
|
});
|
|
957
804
|
const completedBatches = skippedEpisodeBatchCount + batchResults.length;
|
|
958
|
-
|
|
959
|
-
|
|
960
|
-
|
|
961
|
-
|
|
962
|
-
|
|
963
|
-
|
|
964
|
-
|
|
965
|
-
|
|
966
|
-
|
|
967
|
-
|
|
968
|
-
|
|
969
|
-
|
|
970
|
-
|
|
971
|
-
|
|
972
|
-
|
|
973
|
-
: "INIT INCOMPLETE: Batch extraction failed";
|
|
974
|
-
const nextSteps = sameFailuresRepeated
|
|
975
|
-
? [
|
|
976
|
-
"Run direct inspect --target issue to read failed batch details.",
|
|
977
|
-
"Do not rerun the same init command again until source, batch options, provider, or failed content has changed.",
|
|
978
|
-
]
|
|
979
|
-
: [
|
|
980
|
-
"Run direct inspect --target issue to review failed batches.",
|
|
981
|
-
"Rerun the same init once if failures look transient; completed checkpoints will be reused.",
|
|
982
|
-
];
|
|
983
|
-
const failedEpisodeSet = new Set(failedEpisodes);
|
|
984
|
-
const skippedSet = new Set(skipped);
|
|
985
|
-
const batchResultsByEpisode = new Map();
|
|
986
|
-
for (const result of batchResults) {
|
|
987
|
-
const ep = Number(result["episode"] ?? 0);
|
|
988
|
-
if (!batchResultsByEpisode.has(ep))
|
|
989
|
-
batchResultsByEpisode.set(ep, []);
|
|
990
|
-
batchResultsByEpisode.get(ep).push(result);
|
|
991
|
-
}
|
|
805
|
+
const transientFailures = failures.filter((it) => !it["terminal"]);
|
|
806
|
+
const terminalFailures = failures.filter((it) => Boolean(it["terminal"]));
|
|
807
|
+
const skippedSet = new Set(skipped);
|
|
808
|
+
// Merge every fully-completed, non-cached episode into an episode_results
|
|
809
|
+
// checkpoint. Episodes still missing a batch (a failure this run, or a batch
|
|
810
|
+
// a prior run marked terminal and we skipped) are left unmerged so a rerun or
|
|
811
|
+
// an override can complete them.
|
|
812
|
+
const batchResultsByEpisode = new Map();
|
|
813
|
+
for (const result of batchResults) {
|
|
814
|
+
const ep = Number(result["episode"] ?? 0);
|
|
815
|
+
if (!batchResultsByEpisode.has(ep))
|
|
816
|
+
batchResultsByEpisode.set(ep, []);
|
|
817
|
+
batchResultsByEpisode.get(ep).push(result);
|
|
818
|
+
}
|
|
819
|
+
try {
|
|
992
820
|
for (const episode of asList(plan["episodes"])) {
|
|
993
821
|
const episodeNum = Number(episode["episode"]);
|
|
994
|
-
if (skippedSet.has(episodeNum)
|
|
822
|
+
if (skippedSet.has(episodeNum))
|
|
995
823
|
continue;
|
|
996
824
|
const expectedBatches = (batchesByEpisode.get(episodeNum) ?? []).length;
|
|
997
|
-
if (expectedBatches
|
|
998
|
-
|
|
999
|
-
|
|
1000
|
-
|
|
1001
|
-
|
|
1002
|
-
|
|
1003
|
-
|
|
825
|
+
if (!expectedBatches || (batchResultsByEpisode.get(episodeNum) ?? []).length !== expectedBatches)
|
|
826
|
+
continue;
|
|
827
|
+
const result = mergeBatchResultsForEpisode(episode, batchResultsByEpisode.get(episodeNum) ?? []);
|
|
828
|
+
validateEpisodeExtractionQuality(sourceText, episode, result);
|
|
829
|
+
results.push(result);
|
|
830
|
+
writeJson(episodeResultPath(episodeResultsDir, episode), compactEpisodeResult(result));
|
|
831
|
+
stampEpisodeMeta(episodeResultsDir, episode, computeUnitHash(sourceText, episode, providerName, model), "extracted", providerName, model);
|
|
832
|
+
const errorPath = episodeErrorPath(episodeResultsDir, episode);
|
|
833
|
+
if (exists(errorPath))
|
|
834
|
+
deletePath(errorPath);
|
|
1004
835
|
}
|
|
836
|
+
}
|
|
837
|
+
catch (exc) {
|
|
838
|
+
const e = exc;
|
|
839
|
+
throw initFailedReport(workspace, {
|
|
840
|
+
title: "INIT FAILED: Episode merge failed",
|
|
841
|
+
stage: "episode_merge",
|
|
842
|
+
required: ["complete batch_results/*.json that can merge into episode_results/*.json"],
|
|
843
|
+
received: [`${e?.name ?? "Error"}: ${(e?.message ?? "").slice(0, 160)}`],
|
|
844
|
+
nextSteps: ["Rerun init; completed batch checkpoints will be reused and episode merge will retry."],
|
|
845
|
+
updates: { batch_completed: completedBatches },
|
|
846
|
+
});
|
|
847
|
+
}
|
|
848
|
+
results.sort((a, b) => Number(a["episode"] ?? 0) - Number(b["episode"] ?? 0));
|
|
849
|
+
// Classify episodes that could not be assembled. An episode blocked by ANY
|
|
850
|
+
// transient batch (timeout/5xx) can still complete on rerun → it blocks init.
|
|
851
|
+
// An episode blocked only by terminal (content-filtered) batches is held out:
|
|
852
|
+
// the rest of the script ships, and the operator overrides the blocked unit.
|
|
853
|
+
const completedEpisodeNums = new Set(results.map((r) => Number(r["episode"])));
|
|
854
|
+
const transientEpisodeSet = new Set(transientFailures.map((it) => Number(it["episode"])));
|
|
855
|
+
const incompleteEpisodes = asList(plan["episodes"]).map((ep) => Number(ep["episode"])).filter((n) => !completedEpisodeNums.has(n));
|
|
856
|
+
const transientBlocked = incompleteEpisodes.filter((n) => transientEpisodeSet.has(n)).sort((a, b) => a - b);
|
|
857
|
+
const heldOutEpisodes = incompleteEpisodes.filter((n) => !transientEpisodeSet.has(n)).sort((a, b) => a - b);
|
|
858
|
+
if (transientBlocked.length > 0) {
|
|
1005
859
|
updateRunState(workspace, {
|
|
1006
|
-
status:
|
|
860
|
+
status: "init_incomplete",
|
|
1007
861
|
init_stage: "batch_extract",
|
|
1008
|
-
checkpoint,
|
|
1009
|
-
batch_checkpoint: batchCheckpoint,
|
|
1010
862
|
episode_total: asList(plan["episodes"]).length,
|
|
1011
863
|
episode_completed: results.length,
|
|
1012
864
|
episode_reused: skipped.length,
|
|
1013
|
-
episode_failed:
|
|
1014
|
-
failed_episodes:
|
|
865
|
+
episode_failed: incompleteEpisodes.length,
|
|
866
|
+
failed_episodes: transientBlocked,
|
|
867
|
+
held_out_episodes: heldOutEpisodes,
|
|
1015
868
|
batch_total: asList(batchPlan["batches"]).length,
|
|
1016
869
|
batch_completed: completedBatches,
|
|
1017
870
|
batch_reused: skippedEpisodeBatchCount + skippedBatches.length,
|
|
1018
871
|
batch_failed: failures.length,
|
|
1019
|
-
|
|
1020
|
-
|
|
1021
|
-
|
|
1022
|
-
last_error: { title:
|
|
1023
|
-
exportable: false,
|
|
872
|
+
batch_terminal: terminalFailures.length,
|
|
873
|
+
transient_failed_batches: transientFailures.map((it) => strOf(it["batch_id"])),
|
|
874
|
+
terminal_failed_batches: terminalFailures.map((it) => strOf(it["batch_id"])),
|
|
875
|
+
last_error: { title: "INIT INCOMPLETE: Batch extraction failed", failed_at: checkpointTimestamp() },
|
|
1024
876
|
});
|
|
1025
|
-
const issues = failures.slice(0, 5).map((it) => `${it["batch_id"]} episode ${it["episode"]} part ${it["part"]}: ${it["error_type"]} - ${it["message"]}`);
|
|
877
|
+
const issues = failures.slice(0, 5).map((it) => `${it["batch_id"]} episode ${it["episode"]} part ${it["part"]} [${it["terminal"] ? "terminal" : "transient"}]: ${it["error_type"]} - ${it["message"]}`);
|
|
878
|
+
const next = [
|
|
879
|
+
"Run direct inspect --target issue to review failed batches.",
|
|
880
|
+
"Rerun the same init to retry transient failures; completed units are reused.",
|
|
881
|
+
];
|
|
882
|
+
if (terminalFailures.length > 0) {
|
|
883
|
+
next.push("Terminal (content-filtered) batches will not clear on retry — use `direct override <unit> --from <file>` or soften the source.");
|
|
884
|
+
}
|
|
1026
885
|
const report = {
|
|
1027
|
-
title:
|
|
886
|
+
title: "INIT INCOMPLETE: Batch extraction failed",
|
|
1028
887
|
result: [
|
|
1029
888
|
`episodes total: ${asList(plan["episodes"]).length}`,
|
|
1030
889
|
`completed: ${results.length}`,
|
|
1031
890
|
`reused: ${skipped.length}`,
|
|
1032
|
-
`
|
|
1033
|
-
`batches: ${completedBatches}/${asList(batchPlan["batches"]).length} completed, ${
|
|
891
|
+
`held out (terminal): ${heldOutEpisodes.length}`,
|
|
892
|
+
`batches: ${completedBatches}/${asList(batchPlan["batches"]).length} completed, ${transientFailures.length} transient, ${terminalFailures.length} terminal`,
|
|
1034
893
|
`provider: ${providerName}`,
|
|
1035
894
|
],
|
|
1036
895
|
artifacts: [
|
|
@@ -1043,81 +902,49 @@ export async function commandInit(opts) {
|
|
|
1043
902
|
path.join(dd, "run_state.json"),
|
|
1044
903
|
],
|
|
1045
904
|
issues,
|
|
1046
|
-
next
|
|
905
|
+
next,
|
|
1047
906
|
};
|
|
1048
907
|
return [report, EXIT_RUNTIME];
|
|
1049
908
|
}
|
|
1050
909
|
updateRunState(workspace, {
|
|
1051
910
|
status: "init_running",
|
|
1052
911
|
init_stage: "episode_merge",
|
|
1053
|
-
checkpoint,
|
|
1054
|
-
batch_checkpoint: batchCheckpoint,
|
|
1055
912
|
episode_total: asList(plan["episodes"]).length,
|
|
1056
913
|
episode_completed: results.length,
|
|
1057
914
|
episode_reused: skipped.length,
|
|
1058
915
|
episode_failed: 0,
|
|
1059
916
|
failed_episodes: [],
|
|
917
|
+
held_out_episodes: heldOutEpisodes,
|
|
1060
918
|
batch_total: asList(batchPlan["batches"]).length,
|
|
1061
919
|
batch_completed: completedBatches,
|
|
1062
920
|
batch_reused: skippedEpisodeBatchCount + skippedBatches.length,
|
|
1063
|
-
batch_failed:
|
|
1064
|
-
|
|
1065
|
-
failure_signature: [],
|
|
1066
|
-
failure_streak: 0,
|
|
921
|
+
batch_failed: terminalFailures.length,
|
|
922
|
+
batch_terminal: terminalFailures.length,
|
|
1067
923
|
last_error: null,
|
|
1068
924
|
});
|
|
925
|
+
// Drop transient/cleared error markers, but KEEP terminal ones so `direct
|
|
926
|
+
// status` and export gating can see which episodes are held out.
|
|
1069
927
|
for (const dir of [batchResultsDir, episodeResultsDir]) {
|
|
1070
928
|
if (!exists(dir))
|
|
1071
929
|
continue;
|
|
1072
930
|
for (const name of fs.readdirSync(dir)) {
|
|
1073
|
-
if (name.endsWith(".error.json"))
|
|
1074
|
-
try {
|
|
1075
|
-
deletePath(path.join(dir, name));
|
|
1076
|
-
}
|
|
1077
|
-
catch {
|
|
1078
|
-
// ignore
|
|
1079
|
-
}
|
|
1080
|
-
}
|
|
1081
|
-
}
|
|
1082
|
-
}
|
|
1083
|
-
try {
|
|
1084
|
-
const batchResultsByEpisode = new Map();
|
|
1085
|
-
for (const result of batchResults) {
|
|
1086
|
-
const ep = Number(result["episode"] ?? 0);
|
|
1087
|
-
if (!batchResultsByEpisode.has(ep))
|
|
1088
|
-
batchResultsByEpisode.set(ep, []);
|
|
1089
|
-
batchResultsByEpisode.get(ep).push(result);
|
|
1090
|
-
}
|
|
1091
|
-
const skippedSet = new Set(skipped);
|
|
1092
|
-
for (const episode of asList(plan["episodes"])) {
|
|
1093
|
-
const episodeNum = Number(episode["episode"]);
|
|
1094
|
-
if (skippedSet.has(episodeNum))
|
|
931
|
+
if (!name.endsWith(".error.json"))
|
|
1095
932
|
continue;
|
|
1096
|
-
const
|
|
1097
|
-
|
|
1098
|
-
|
|
1099
|
-
|
|
1100
|
-
|
|
1101
|
-
|
|
1102
|
-
|
|
1103
|
-
deletePath(
|
|
933
|
+
const errPath = path.join(dir, name);
|
|
934
|
+
try {
|
|
935
|
+
const err = readJson(errPath);
|
|
936
|
+
if (!isDict(err) || !err["terminal"])
|
|
937
|
+
deletePath(errPath);
|
|
938
|
+
}
|
|
939
|
+
catch {
|
|
940
|
+
deletePath(errPath);
|
|
941
|
+
}
|
|
1104
942
|
}
|
|
1105
943
|
}
|
|
1106
|
-
catch (exc) {
|
|
1107
|
-
const e = exc;
|
|
1108
|
-
throw initFailedReport(workspace, {
|
|
1109
|
-
title: "INIT FAILED: Episode merge failed",
|
|
1110
|
-
stage: "episode_merge",
|
|
1111
|
-
required: ["complete batch_results/*.json that can merge into episode_results/*.json"],
|
|
1112
|
-
received: [`${e?.name ?? "Error"}: ${(e?.message ?? "").slice(0, 160)}`],
|
|
1113
|
-
nextSteps: ["Rerun init; completed batch checkpoints will be reused and episode merge will retry."],
|
|
1114
|
-
updates: { checkpoint, batch_checkpoint: batchCheckpoint, batch_completed: completedBatches },
|
|
1115
|
-
});
|
|
1116
|
-
}
|
|
1117
944
|
results.sort((a, b) => Number(a["episode"] ?? 0) - Number(b["episode"] ?? 0));
|
|
1118
945
|
let script;
|
|
1119
946
|
try {
|
|
1120
|
-
updateRunState(workspace, { status: "init_running", init_stage: "script_merge"
|
|
947
|
+
updateRunState(workspace, { status: "init_running", init_stage: "script_merge" });
|
|
1121
948
|
script = mergeEpisodeResults(results, strOf(info["projectName"]) || path.basename(source, path.extname(source)));
|
|
1122
949
|
}
|
|
1123
950
|
catch (exc) {
|
|
@@ -1128,11 +955,11 @@ export async function commandInit(opts) {
|
|
|
1128
955
|
required: ["complete episode_results/*.json"],
|
|
1129
956
|
received: [`${e?.name ?? "Error"}: ${(e?.message ?? "").slice(0, 160)}`],
|
|
1130
957
|
nextSteps: ["Rerun init; completed episode extraction checkpoints will be reused and merge will retry."],
|
|
1131
|
-
updates: {
|
|
958
|
+
updates: { episode_completed: results.length },
|
|
1132
959
|
});
|
|
1133
960
|
}
|
|
1134
961
|
try {
|
|
1135
|
-
updateRunState(workspace, { status: "init_running", init_stage: "asset_curation"
|
|
962
|
+
updateRunState(workspace, { status: "init_running", init_stage: "asset_curation" });
|
|
1136
963
|
const rawCuration = await providerExtractAssetCurationLocal(provider, sourceText, script);
|
|
1137
964
|
const curation = curateScriptAssets(script, rawCuration);
|
|
1138
965
|
writeJson(path.join(dd, "asset_curation.json"), curation);
|
|
@@ -1146,7 +973,7 @@ export async function commandInit(opts) {
|
|
|
1146
973
|
required: exc.required.length > 0 ? exc.required : ["asset curation JSON matching final script contract"],
|
|
1147
974
|
received: exc.received.length > 0 ? exc.received : [String(exc.message).slice(0, 160)],
|
|
1148
975
|
nextSteps: exc.nextSteps.length > 0 ? exc.nextSteps : ["Rerun init; extraction checkpoints will be reused and asset curation will retry."],
|
|
1149
|
-
updates: {
|
|
976
|
+
updates: { episode_completed: results.length },
|
|
1150
977
|
});
|
|
1151
978
|
}
|
|
1152
979
|
const e = exc;
|
|
@@ -1156,11 +983,11 @@ export async function commandInit(opts) {
|
|
|
1156
983
|
required: ["provider location merge decisions and deterministic asset reuse curation"],
|
|
1157
984
|
received: [`${e?.name ?? "Error"}: ${(e?.message ?? "").slice(0, 160)}`],
|
|
1158
985
|
nextSteps: ["Rerun init; extraction checkpoints will be reused and asset curation will retry."],
|
|
1159
|
-
updates: {
|
|
986
|
+
updates: { episode_completed: results.length },
|
|
1160
987
|
});
|
|
1161
988
|
}
|
|
1162
989
|
try {
|
|
1163
|
-
updateRunState(workspace, { status: "init_running", init_stage: "metadata_extract"
|
|
990
|
+
updateRunState(workspace, { status: "init_running", init_stage: "metadata_extract" });
|
|
1164
991
|
let metadata = provider.extractMetadata ? await provider.extractMetadata(sourceText, script) : {};
|
|
1165
992
|
if (!isDict(metadata))
|
|
1166
993
|
metadata = {};
|
|
@@ -1176,7 +1003,7 @@ export async function commandInit(opts) {
|
|
|
1176
1003
|
required: exc.required.length > 0 ? exc.required : ["metadata JSON matching final script contract"],
|
|
1177
1004
|
received: exc.received.length > 0 ? exc.received : [String(exc.message).slice(0, 160)],
|
|
1178
1005
|
nextSteps: exc.nextSteps.length > 0 ? exc.nextSteps : ["Rerun init; extraction checkpoints will be reused and metadata will retry."],
|
|
1179
|
-
updates: {
|
|
1006
|
+
updates: { episode_completed: results.length },
|
|
1180
1007
|
});
|
|
1181
1008
|
}
|
|
1182
1009
|
const e = exc;
|
|
@@ -1186,12 +1013,12 @@ export async function commandInit(opts) {
|
|
|
1186
1013
|
required: ["provider metadata for worldview, role_type, and asset descriptions"],
|
|
1187
1014
|
received: [`${e?.name ?? "Error"}: ${(e?.message ?? "").slice(0, 160)}`],
|
|
1188
1015
|
nextSteps: ["Rerun init; extraction checkpoints will be reused and metadata will retry."],
|
|
1189
|
-
updates: {
|
|
1016
|
+
updates: { episode_completed: results.length },
|
|
1190
1017
|
});
|
|
1191
1018
|
}
|
|
1192
1019
|
const scriptPath = path.join(dd, "script.initial.json");
|
|
1193
1020
|
writeJson(scriptPath, script);
|
|
1194
|
-
updateRunState(workspace, { status: "init_running", init_stage: "validate"
|
|
1021
|
+
updateRunState(workspace, { status: "init_running", init_stage: "validate" });
|
|
1195
1022
|
let validation;
|
|
1196
1023
|
try {
|
|
1197
1024
|
validation = validateScript(workspace, scriptPath);
|
|
@@ -1204,7 +1031,7 @@ export async function commandInit(opts) {
|
|
|
1204
1031
|
required: ["script.initial.json that can be validated"],
|
|
1205
1032
|
received: [`${e?.name ?? "Error"}: ${(e?.message ?? "").slice(0, 160)}`],
|
|
1206
1033
|
nextSteps: ["Rerun init to retry validation, or inspect script.initial.json if the failure persists."],
|
|
1207
|
-
updates: {
|
|
1034
|
+
updates: { script_path: scriptPath },
|
|
1208
1035
|
});
|
|
1209
1036
|
}
|
|
1210
1037
|
const passed = Boolean(validation["passed"]);
|
|
@@ -1213,10 +1040,6 @@ export async function commandInit(opts) {
|
|
|
1213
1040
|
status,
|
|
1214
1041
|
command: "direct init",
|
|
1215
1042
|
init_stage: "complete",
|
|
1216
|
-
checkpoint,
|
|
1217
|
-
batch_checkpoint: batchCheckpoint,
|
|
1218
|
-
checkpoint_reused: checkpointReused,
|
|
1219
|
-
batch_checkpoint_reused: batchCheckpointReused,
|
|
1220
1043
|
provider: providerName,
|
|
1221
1044
|
model,
|
|
1222
1045
|
concurrency,
|
|
@@ -1232,19 +1055,17 @@ export async function commandInit(opts) {
|
|
|
1232
1055
|
episode_reused: skipped.length,
|
|
1233
1056
|
episode_failed: 0,
|
|
1234
1057
|
failed_episodes: [],
|
|
1058
|
+
held_out_episodes: heldOutEpisodes,
|
|
1235
1059
|
batch_total: asList(batchPlan["batches"]).length,
|
|
1236
1060
|
batch_completed: completedBatches,
|
|
1237
1061
|
batch_reused: skippedEpisodeBatchCount + skippedBatches.length,
|
|
1238
|
-
batch_failed:
|
|
1239
|
-
|
|
1240
|
-
failure_signature: [],
|
|
1241
|
-
failure_streak: 0,
|
|
1062
|
+
batch_failed: terminalFailures.length,
|
|
1063
|
+
batch_terminal: terminalFailures.length,
|
|
1242
1064
|
last_error: null,
|
|
1243
1065
|
review_status: "pending",
|
|
1244
1066
|
review_missing: [...REVIEW_TARGETS],
|
|
1245
1067
|
inspected_targets: [],
|
|
1246
1068
|
patch_count: 0,
|
|
1247
|
-
exportable: providerName !== "mock",
|
|
1248
1069
|
});
|
|
1249
1070
|
const title = passed
|
|
1250
1071
|
? "INIT COMPLETE: Initial script ready"
|
|
@@ -1258,9 +1079,9 @@ export async function commandInit(opts) {
|
|
|
1258
1079
|
`actions: ${stats["actions"] ?? 0}`,
|
|
1259
1080
|
`validation: ${passed ? "passed" : "needs repair"}`,
|
|
1260
1081
|
`provider: ${providerName}`,
|
|
1261
|
-
`
|
|
1082
|
+
`episodes reused: ${skipped.length}`,
|
|
1262
1083
|
`batches: ${completedBatches}/${asList(batchPlan["batches"]).length} completed`,
|
|
1263
|
-
`
|
|
1084
|
+
`batches reused: ${skippedEpisodeBatchCount + skippedBatches.length}`,
|
|
1264
1085
|
"agent_review: pending",
|
|
1265
1086
|
],
|
|
1266
1087
|
artifacts: [
|
|
@@ -1286,188 +1107,217 @@ export async function commandInit(opts) {
|
|
|
1286
1107
|
};
|
|
1287
1108
|
return [report, passed ? EXIT_OK : EXIT_NEEDS_AGENT];
|
|
1288
1109
|
}
|
|
1289
|
-
export function summarizeIssues(issues) {
|
|
1290
|
-
if (issues.length === 0)
|
|
1291
|
-
return [];
|
|
1292
|
-
const counts = {};
|
|
1293
|
-
for (const item of issues) {
|
|
1294
|
-
const sev = strOf(item["severity"]);
|
|
1295
|
-
counts[sev] = (counts[sev] ?? 0) + 1;
|
|
1296
|
-
}
|
|
1297
|
-
const parts = Object.entries(counts).sort(([a], [b]) => a.localeCompare(b)).map(([sev, c]) => `${sev}: ${c}`);
|
|
1298
|
-
const first = issues[0];
|
|
1299
|
-
return [parts.join("; "), `first: ${first["code"]} - ${first["summary"]}`];
|
|
1300
|
-
}
|
|
1301
1110
|
// ---------------------------------------------------------------------------
|
|
1302
|
-
//
|
|
1303
|
-
//
|
|
1304
|
-
//
|
|
1305
|
-
//
|
|
1306
|
-
//
|
|
1307
|
-
// 人物.md / 场景.md / 道具.md / 发声源.md (+ optional 梗概.md for the whole-script
|
|
1308
|
-
// synopsis). It assembles the same script.initial.json and hands off to the
|
|
1309
|
-
// existing direct inspect/validate/export downstream (zero changes there).
|
|
1111
|
+
// command_override — inject a human extraction for a unit the provider can't
|
|
1112
|
+
// produce (content-filtered). The override is content-addressed exactly like a
|
|
1113
|
+
// provider result, so init reuses it and never re-calls the provider, and the
|
|
1114
|
+
// non-destructive GC never deletes it. We compute the input_hash from the plan
|
|
1115
|
+
// ourselves, so the operator never hand-edits source_span.
|
|
1310
1116
|
// ---------------------------------------------------------------------------
|
|
1311
|
-
|
|
1312
|
-
const
|
|
1313
|
-
|
|
1314
|
-
|
|
1315
|
-
|
|
1316
|
-
|
|
1317
|
-
];
|
|
1318
|
-
const
|
|
1319
|
-
|
|
1320
|
-
|
|
1321
|
-
|
|
1322
|
-
|
|
1323
|
-
|
|
1324
|
-
|
|
1325
|
-
|
|
1326
|
-
}
|
|
1327
|
-
function collectEpisodeMdFiles(dir) {
|
|
1328
|
-
if (!exists(dir) || !fs.statSync(dir).isDirectory())
|
|
1329
|
-
return [];
|
|
1330
|
-
const out = [];
|
|
1331
|
-
for (const name of fs.readdirSync(dir)) {
|
|
1332
|
-
const m = _EP_FILE_RE.exec(name);
|
|
1333
|
-
if (!m)
|
|
1334
|
-
continue;
|
|
1335
|
-
const full = path.join(dir, name);
|
|
1336
|
-
if (!fs.statSync(full).isFile())
|
|
1337
|
-
continue;
|
|
1338
|
-
out.push({ path: full, episode: parseInt(m[1], 10) });
|
|
1117
|
+
export function commandOverride(opts) {
|
|
1118
|
+
const workspace = strOf(opts["workspace_path"] || "workspace");
|
|
1119
|
+
const unit = strOf(asList(opts["_args"])[0]).trim();
|
|
1120
|
+
const fromPath = strOf(opts["from"]).trim();
|
|
1121
|
+
const dd = directDir(workspace);
|
|
1122
|
+
const state = readRunState(workspace);
|
|
1123
|
+
const providerName = strOf(opts["provider"] || state["provider"] || DEFAULT_PROVIDER);
|
|
1124
|
+
const model = strOf(opts["model"] || state["model"] || DEFAULT_MODEL);
|
|
1125
|
+
const isEpisode = /^ep_\d+$/.test(unit);
|
|
1126
|
+
if (!isEpisode && !/^bat_\d+$/.test(unit)) {
|
|
1127
|
+
throw new CliError("OVERRIDE BLOCKED: Invalid unit", "Invalid unit key.", {
|
|
1128
|
+
exitCode: EXIT_USAGE,
|
|
1129
|
+
required: ["<unit>: ep_NNN or bat_NNNN"],
|
|
1130
|
+
received: [`<unit>: ${unit || "<empty>"}`],
|
|
1131
|
+
nextSteps: ["Pass an episode (ep_007) or batch (bat_0012) key shown by direct status."],
|
|
1132
|
+
});
|
|
1339
1133
|
}
|
|
1340
|
-
|
|
1341
|
-
|
|
1342
|
-
|
|
1343
|
-
|
|
1344
|
-
|
|
1345
|
-
|
|
1134
|
+
if (!fromPath || !exists(fromPath)) {
|
|
1135
|
+
throw new CliError("OVERRIDE BLOCKED: --from not found", "Override source file not found.", {
|
|
1136
|
+
exitCode: EXIT_INPUT,
|
|
1137
|
+
required: ["--from <path>: readable JSON extraction for the unit"],
|
|
1138
|
+
received: [`--from: ${fromPath || "<missing>"}`],
|
|
1139
|
+
nextSteps: ["Provide a JSON file with scenes/actions for the unit."],
|
|
1140
|
+
});
|
|
1346
1141
|
}
|
|
1347
|
-
const
|
|
1348
|
-
|
|
1349
|
-
|
|
1350
|
-
if (!exists(mdDir) || !fs.statSync(mdDir).isDirectory()) {
|
|
1351
|
-
throw new CliError("PARSE BLOCKED: md workspace not found", "md workspace not found.", {
|
|
1142
|
+
const planPath = path.join(dd, isEpisode ? "episode_plan.json" : "batch_plan.json");
|
|
1143
|
+
if (!exists(planPath)) {
|
|
1144
|
+
throw new CliError("OVERRIDE BLOCKED: Plan not found", "Plan not found.", {
|
|
1352
1145
|
exitCode: EXIT_INPUT,
|
|
1353
|
-
required: [
|
|
1354
|
-
received: [
|
|
1355
|
-
nextSteps: ["
|
|
1146
|
+
required: [isEpisode ? "episode_plan.json" : "batch_plan.json"],
|
|
1147
|
+
received: [planPath],
|
|
1148
|
+
nextSteps: ["Run scriptctl direct init first."],
|
|
1356
1149
|
});
|
|
1357
1150
|
}
|
|
1358
|
-
|
|
1359
|
-
|
|
1360
|
-
|
|
1361
|
-
|
|
1151
|
+
const plan = readJson(planPath);
|
|
1152
|
+
const planUnits = asList(plan[isEpisode ? "episodes" : "batches"]);
|
|
1153
|
+
const planItem = planUnits.find((u) => (isEpisode ? episodeResultKey(u) : batchResultKey(u)) === unit) ?? null;
|
|
1154
|
+
if (!planItem) {
|
|
1155
|
+
throw new CliError("OVERRIDE BLOCKED: Unit not in current plan", "Unit not in current plan.", {
|
|
1156
|
+
exitCode: EXIT_INPUT,
|
|
1157
|
+
required: [`${unit} present in ${isEpisode ? "episode_plan.json" : "batch_plan.json"}`],
|
|
1158
|
+
received: [`${unit}: not found among ${planUnits.length} units`],
|
|
1159
|
+
nextSteps: ["Use a unit key from direct status; rerun init if the plan changed."],
|
|
1160
|
+
});
|
|
1362
1161
|
}
|
|
1363
|
-
const
|
|
1364
|
-
if (
|
|
1365
|
-
throw new CliError("
|
|
1162
|
+
const sourceTextPath = path.join(workspace, "source.txt");
|
|
1163
|
+
if (!exists(sourceTextPath)) {
|
|
1164
|
+
throw new CliError("OVERRIDE BLOCKED: source.txt missing", "source.txt missing.", {
|
|
1366
1165
|
exitCode: EXIT_INPUT,
|
|
1367
|
-
required: [
|
|
1368
|
-
received: [
|
|
1369
|
-
nextSteps: ["
|
|
1166
|
+
required: [sourceTextPath],
|
|
1167
|
+
received: ["<missing>"],
|
|
1168
|
+
nextSteps: ["Run scriptctl direct init first."],
|
|
1370
1169
|
});
|
|
1371
1170
|
}
|
|
1372
|
-
|
|
1373
|
-
|
|
1374
|
-
|
|
1375
|
-
|
|
1376
|
-
for (const spec of ASSET_DOC_SPECS) {
|
|
1377
|
-
const p = firstExisting(mdDir, spec.names);
|
|
1378
|
-
if (!p)
|
|
1379
|
-
continue;
|
|
1380
|
-
assetDocsFound.push(path.basename(p));
|
|
1381
|
-
const parsed = parseAssetDoc(readText(p), spec.kind);
|
|
1382
|
-
for (const key of ["actors", "locations", "props", "speakers", "state_definitions"]) {
|
|
1383
|
-
bible[key].push(...asList(parsed[key]));
|
|
1384
|
-
}
|
|
1171
|
+
const sourceText = readText(sourceTextPath);
|
|
1172
|
+
let data;
|
|
1173
|
+
try {
|
|
1174
|
+
data = readJson(fromPath);
|
|
1385
1175
|
}
|
|
1386
|
-
|
|
1387
|
-
|
|
1388
|
-
|
|
1389
|
-
|
|
1390
|
-
|
|
1391
|
-
|
|
1392
|
-
|
|
1393
|
-
for (const file of bodyFiles) {
|
|
1394
|
-
const bodyText = readText(file.path);
|
|
1395
|
-
sourceChunks.push(`# ep_${pad3(file.episode)}\n${bodyText.trim()}`);
|
|
1396
|
-
try {
|
|
1397
|
-
results.push(parseMarkdownBatch(bodyText, { episode: file.episode, part: 1 }, { fragmentMode: true }));
|
|
1398
|
-
}
|
|
1399
|
-
catch (exc) {
|
|
1400
|
-
const e = exc;
|
|
1401
|
-
throw new CliError("PARSE BLOCKED: episode md invalid", "episode md invalid.", {
|
|
1402
|
-
exitCode: EXIT_INPUT,
|
|
1403
|
-
required: ["per-episode 正文 md following `scriptctl parse --spec`"],
|
|
1404
|
-
received: [`${path.basename(file.path)}: ${(e?.message ?? "").slice(0, 200)}`],
|
|
1405
|
-
nextSteps: ["Fix the episode md and re-run parse."],
|
|
1406
|
-
});
|
|
1407
|
-
}
|
|
1176
|
+
catch (exc) {
|
|
1177
|
+
throw new CliError("OVERRIDE BLOCKED: --from invalid JSON", "Override JSON invalid.", {
|
|
1178
|
+
exitCode: EXIT_INPUT,
|
|
1179
|
+
required: ["valid extraction JSON"],
|
|
1180
|
+
received: [`${fromPath}: ${exc.message}`],
|
|
1181
|
+
nextSteps: ["Fix the JSON and retry."],
|
|
1182
|
+
});
|
|
1408
1183
|
}
|
|
1409
|
-
|
|
1410
|
-
|
|
1411
|
-
|
|
1412
|
-
|
|
1413
|
-
|
|
1414
|
-
|
|
1415
|
-
|
|
1416
|
-
|
|
1417
|
-
first[key] = [...asList(bible[key]), ...asList(first[key])];
|
|
1418
|
-
}
|
|
1184
|
+
const result = normalizeEpisodeResult(data, planItem);
|
|
1185
|
+
if (Number(result["episode"]) !== Number(planItem["episode"])) {
|
|
1186
|
+
throw new CliError("OVERRIDE BLOCKED: Episode mismatch", "Episode mismatch.", {
|
|
1187
|
+
exitCode: EXIT_USAGE,
|
|
1188
|
+
required: [`episode ${Number(planItem["episode"])}`],
|
|
1189
|
+
received: [`episode ${Number(result["episode"])}`],
|
|
1190
|
+
nextSteps: ["Provide an extraction for the correct episode."],
|
|
1191
|
+
});
|
|
1419
1192
|
}
|
|
1420
|
-
|
|
1421
|
-
|
|
1422
|
-
|
|
1423
|
-
|
|
1193
|
+
try {
|
|
1194
|
+
if (isEpisode)
|
|
1195
|
+
validateEpisodeExtractionQuality(sourceText, planItem, result);
|
|
1196
|
+
else
|
|
1197
|
+
validateBatchExtractionQuality(sourceText, planItem, result);
|
|
1198
|
+
}
|
|
1199
|
+
catch (exc) {
|
|
1200
|
+
if (exc instanceof CliError)
|
|
1201
|
+
throw exc;
|
|
1202
|
+
throw new CliError("OVERRIDE BLOCKED: Extraction invalid", "Extraction invalid.", {
|
|
1203
|
+
exitCode: EXIT_USAGE,
|
|
1204
|
+
required: ["valid action types (dialogue/inner_thought/action)"],
|
|
1205
|
+
received: [exc.message.slice(0, 160)],
|
|
1206
|
+
nextSteps: ["Fix the override extraction and retry."],
|
|
1207
|
+
});
|
|
1208
|
+
}
|
|
1209
|
+
const dir = path.join(dd, isEpisode ? "episode_results" : "batch_results");
|
|
1210
|
+
fs.mkdirSync(dir, { recursive: true });
|
|
1211
|
+
const hash = computeUnitHash(sourceText, planItem, providerName, model);
|
|
1212
|
+
if (isEpisode) {
|
|
1213
|
+
writeJson(episodeResultPath(dir, planItem), compactEpisodeResult(result));
|
|
1214
|
+
stampEpisodeMeta(dir, planItem, hash, "override", providerName, model);
|
|
1215
|
+
const errPath = episodeErrorPath(dir, planItem);
|
|
1216
|
+
if (exists(errPath))
|
|
1217
|
+
deletePath(errPath);
|
|
1218
|
+
}
|
|
1219
|
+
else {
|
|
1220
|
+
persistBatchResult(dir, planItem, result);
|
|
1221
|
+
stampBatchMeta(dir, planItem, hash, "override", providerName, model);
|
|
1222
|
+
const errPath = batchErrorPath(dir, planItem);
|
|
1223
|
+
if (exists(errPath))
|
|
1224
|
+
deletePath(errPath);
|
|
1225
|
+
}
|
|
1226
|
+
const report = {
|
|
1227
|
+
title: "OVERRIDE COMPLETE: Unit extraction injected",
|
|
1228
|
+
result: [
|
|
1229
|
+
`unit: ${unit}`,
|
|
1230
|
+
`kind: ${isEpisode ? "episode" : "batch"}`,
|
|
1231
|
+
`provenance: override`,
|
|
1232
|
+
`provider/model: ${providerName} / ${model}`,
|
|
1233
|
+
`scenes: ${asList(result["scenes"]).length}`,
|
|
1234
|
+
],
|
|
1235
|
+
artifacts: [dir, path.join(dd, "run_state.json")],
|
|
1236
|
+
next: ["Rerun scriptctl direct init — the override is reused without re-calling the provider."],
|
|
1237
|
+
};
|
|
1238
|
+
return [report, EXIT_OK];
|
|
1239
|
+
}
|
|
1240
|
+
// ---------------------------------------------------------------------------
|
|
1241
|
+
// command_status — rebuild the progress view from on-disk meta/error sidecars.
|
|
1242
|
+
// run_state is just a cache of this; deleting it loses nothing.
|
|
1243
|
+
// ---------------------------------------------------------------------------
|
|
1244
|
+
export function commandStatus(opts) {
|
|
1245
|
+
const workspace = strOf(opts["workspace_path"] || "workspace");
|
|
1424
1246
|
const dd = directDir(workspace);
|
|
1425
|
-
|
|
1426
|
-
|
|
1427
|
-
|
|
1428
|
-
|
|
1429
|
-
|
|
1430
|
-
|
|
1431
|
-
|
|
1432
|
-
|
|
1433
|
-
|
|
1434
|
-
|
|
1435
|
-
|
|
1436
|
-
|
|
1437
|
-
|
|
1438
|
-
|
|
1439
|
-
|
|
1440
|
-
|
|
1441
|
-
|
|
1442
|
-
|
|
1443
|
-
|
|
1444
|
-
|
|
1445
|
-
|
|
1446
|
-
|
|
1447
|
-
|
|
1448
|
-
|
|
1449
|
-
|
|
1450
|
-
|
|
1451
|
-
|
|
1452
|
-
|
|
1247
|
+
const episodePlanPath = path.join(dd, "episode_plan.json");
|
|
1248
|
+
const batchPlanPath = path.join(dd, "batch_plan.json");
|
|
1249
|
+
if (!exists(episodePlanPath) || !exists(batchPlanPath)) {
|
|
1250
|
+
throw new CliError("STATUS BLOCKED: Plan not found", "Plan not found.", {
|
|
1251
|
+
exitCode: EXIT_INPUT,
|
|
1252
|
+
required: ["episode_plan.json and batch_plan.json"],
|
|
1253
|
+
received: [exists(episodePlanPath) ? "episode_plan.json ok" : "episode_plan.json missing"],
|
|
1254
|
+
nextSteps: ["Run scriptctl direct init first."],
|
|
1255
|
+
});
|
|
1256
|
+
}
|
|
1257
|
+
const episodes = asList(readJson(episodePlanPath)["episodes"]);
|
|
1258
|
+
const batches = asList(readJson(batchPlanPath)["batches"]);
|
|
1259
|
+
const episodeResultsDir = path.join(dd, "episode_results");
|
|
1260
|
+
const batchResultsDir = path.join(dd, "batch_results");
|
|
1261
|
+
const count = { ok: 0, override: 0, recovered: 0, terminal: 0, missing: 0 };
|
|
1262
|
+
for (const batch of batches) {
|
|
1263
|
+
const meta = readUnitMeta(batchMetaPath(batchResultsDir, batch));
|
|
1264
|
+
if (!meta) {
|
|
1265
|
+
count.missing++;
|
|
1266
|
+
continue;
|
|
1267
|
+
}
|
|
1268
|
+
if (meta["status"] === "terminal") {
|
|
1269
|
+
count.terminal++;
|
|
1270
|
+
continue;
|
|
1271
|
+
}
|
|
1272
|
+
count.ok++;
|
|
1273
|
+
if (meta["provenance"] === "override")
|
|
1274
|
+
count.override++;
|
|
1275
|
+
else if (meta["provenance"] === "recovered")
|
|
1276
|
+
count.recovered++;
|
|
1277
|
+
}
|
|
1278
|
+
const completedEpisodes = [];
|
|
1279
|
+
for (const ep of episodes) {
|
|
1280
|
+
const meta = readUnitMeta(episodeMetaPath(episodeResultsDir, ep));
|
|
1281
|
+
if (meta && meta["status"] === "ok")
|
|
1282
|
+
completedEpisodes.push(Number(ep["episode"]));
|
|
1283
|
+
}
|
|
1284
|
+
// Held out = episodes with at least one terminal batch and no episode result.
|
|
1285
|
+
const completedSet = new Set(completedEpisodes);
|
|
1286
|
+
const heldOut = new Set();
|
|
1287
|
+
for (const batch of batches) {
|
|
1288
|
+
const meta = readUnitMeta(batchMetaPath(batchResultsDir, batch));
|
|
1289
|
+
const epNum = Number(batch["episode"]);
|
|
1290
|
+
if (meta && meta["status"] === "terminal" && !completedSet.has(epNum))
|
|
1291
|
+
heldOut.add(epNum);
|
|
1292
|
+
}
|
|
1293
|
+
const heldOutEpisodes = [...heldOut].sort((a, b) => a - b);
|
|
1453
1294
|
const report = {
|
|
1454
|
-
title:
|
|
1455
|
-
? "PARSE COMPLETE: Initial script ready"
|
|
1456
|
-
: "PARSE NEEDS AGENT: Initial script written with repair issues",
|
|
1295
|
+
title: "DIRECT STATUS",
|
|
1457
1296
|
result: [
|
|
1458
|
-
`episodes: ${
|
|
1459
|
-
`
|
|
1460
|
-
`
|
|
1461
|
-
`
|
|
1462
|
-
`
|
|
1463
|
-
`validation: ${passed ? "passed" : "needs repair"}`,
|
|
1464
|
-
"agent_review: pending",
|
|
1297
|
+
`episodes: ${completedEpisodes.length}/${episodes.length} complete`,
|
|
1298
|
+
`batches: ${count.ok}/${batches.length} ok (override ${count.override}, recovered ${count.recovered})`,
|
|
1299
|
+
`terminal batches: ${count.terminal}`,
|
|
1300
|
+
`pending batches: ${count.missing}`,
|
|
1301
|
+
`held out episodes: ${heldOutEpisodes.length === 0 ? "-" : heldOutEpisodes.join(", ")}`,
|
|
1465
1302
|
],
|
|
1466
|
-
artifacts: [
|
|
1467
|
-
|
|
1468
|
-
|
|
1303
|
+
artifacts: [batchResultsDir, episodeResultsDir, path.join(dd, "run_state.json")],
|
|
1304
|
+
next: heldOutEpisodes.length > 0
|
|
1305
|
+
? ["Override held-out episodes with direct override, or export 32/33 with direct export --allow-incomplete."]
|
|
1306
|
+
: ["All units accounted for."],
|
|
1469
1307
|
};
|
|
1470
|
-
return [report,
|
|
1308
|
+
return [report, EXIT_OK];
|
|
1309
|
+
}
|
|
1310
|
+
export function summarizeIssues(issues) {
|
|
1311
|
+
if (issues.length === 0)
|
|
1312
|
+
return [];
|
|
1313
|
+
const counts = {};
|
|
1314
|
+
for (const item of issues) {
|
|
1315
|
+
const sev = strOf(item["severity"]);
|
|
1316
|
+
counts[sev] = (counts[sev] ?? 0) + 1;
|
|
1317
|
+
}
|
|
1318
|
+
const parts = Object.entries(counts).sort(([a], [b]) => a.localeCompare(b)).map(([sev, c]) => `${sev}: ${c}`);
|
|
1319
|
+
const first = issues[0];
|
|
1320
|
+
return [parts.join("; "), `first: ${first["code"]} - ${first["summary"]}`];
|
|
1471
1321
|
}
|
|
1472
1322
|
// ---------------------------------------------------------------------------
|
|
1473
1323
|
// command_validate
|