@lingjingai/scriptctl 0.9.6 → 0.9.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/usecases/direct.js +680 -553
- package/dist/usecases/direct.js.map +1 -1
- package/package.json +1 -1
package/dist/usecases/direct.js
CHANGED
|
@@ -626,6 +626,12 @@ async function pMapWithConcurrency(items, concurrency, worker) {
|
|
|
626
626
|
// command_init
|
|
627
627
|
// ---------------------------------------------------------------------------
|
|
628
628
|
export async function commandInit(opts) {
|
|
629
|
+
const ctx = makeInitCtx(validateInitOpts(opts));
|
|
630
|
+
fs.mkdirSync(ctx.dd, { recursive: true });
|
|
631
|
+
ctx.previousState = readRunState(ctx.workspace);
|
|
632
|
+
return runInitPipeline(INIT_STEPS, ctx);
|
|
633
|
+
}
|
|
634
|
+
function validateInitOpts(opts) {
|
|
629
635
|
const sourcePathArg = strOf(opts["source_path"]);
|
|
630
636
|
const source = sourcePathArg.startsWith("~")
|
|
631
637
|
? path.join(process.env.HOME ?? "", sourcePathArg.slice(1))
|
|
@@ -715,577 +721,698 @@ export async function commandInit(opts) {
|
|
|
715
721
|
nextSteps: ["Use a supported source file and rerun init."],
|
|
716
722
|
});
|
|
717
723
|
}
|
|
718
|
-
|
|
719
|
-
|
|
720
|
-
|
|
721
|
-
|
|
722
|
-
|
|
723
|
-
|
|
724
|
-
|
|
725
|
-
|
|
726
|
-
|
|
727
|
-
|
|
728
|
-
|
|
729
|
-
|
|
730
|
-
|
|
731
|
-
|
|
732
|
-
|
|
733
|
-
|
|
734
|
-
|
|
735
|
-
|
|
736
|
-
|
|
737
|
-
|
|
738
|
-
|
|
739
|
-
|
|
740
|
-
|
|
724
|
+
return { source, workspace, providerName, model, concurrency, batchMode, batchTargetLines, batchMaxChars, batchMinLines };
|
|
725
|
+
}
|
|
726
|
+
function makeInitCtx(inputs) {
|
|
727
|
+
return {
|
|
728
|
+
...inputs,
|
|
729
|
+
dd: directDir(inputs.workspace),
|
|
730
|
+
previousState: {},
|
|
731
|
+
info: {},
|
|
732
|
+
sourceText: "",
|
|
733
|
+
manifest: {},
|
|
734
|
+
plan: {},
|
|
735
|
+
provider: undefined,
|
|
736
|
+
batchPlan: {},
|
|
737
|
+
checkpoint: {},
|
|
738
|
+
batchCheckpoint: {},
|
|
739
|
+
checkpointReused: false,
|
|
740
|
+
batchCheckpointReused: false,
|
|
741
|
+
episodeResultsDir: "",
|
|
742
|
+
batchResultsDir: "",
|
|
743
|
+
results: [],
|
|
744
|
+
skipped: [],
|
|
745
|
+
skippedEpisodeBatchCount: 0,
|
|
746
|
+
skippedBatches: [],
|
|
747
|
+
batchResults: [],
|
|
748
|
+
completedBatches: 0,
|
|
749
|
+
script: {},
|
|
750
|
+
};
|
|
751
|
+
}
|
|
752
|
+
async function runInitPipeline(steps, ctx) {
|
|
753
|
+
for (const step of steps) {
|
|
754
|
+
if (step.enter) {
|
|
755
|
+
const updates = step.enter(ctx);
|
|
756
|
+
if (updates)
|
|
757
|
+
updateRunState(ctx.workspace, updates);
|
|
758
|
+
}
|
|
759
|
+
let result;
|
|
760
|
+
try {
|
|
761
|
+
result = await step.run(ctx);
|
|
762
|
+
}
|
|
763
|
+
catch (exc) {
|
|
764
|
+
if (step.onError)
|
|
765
|
+
step.onError(ctx, exc);
|
|
741
766
|
throw exc;
|
|
742
767
|
}
|
|
743
|
-
|
|
744
|
-
|
|
745
|
-
updateRunState(workspace, {
|
|
746
|
-
status: "init_failed",
|
|
747
|
-
init_stage: "source_prepare",
|
|
748
|
-
last_error: { title: "INIT BLOCKED: Source preparation failed", received: [receivedError], failed_at: checkpointTimestamp() },
|
|
749
|
-
});
|
|
750
|
-
throw new CliError("INIT BLOCKED: Source preparation failed", "Source preparation failed.", {
|
|
751
|
-
exitCode: EXIT_INPUT,
|
|
752
|
-
required: ["readable source file that can be converted to source.txt"],
|
|
753
|
-
received: [receivedError],
|
|
754
|
-
nextSteps: ["Fix or re-export the source file, then rerun init."],
|
|
755
|
-
});
|
|
768
|
+
if (result.kind === "halt")
|
|
769
|
+
return [result.report, result.exitCode];
|
|
756
770
|
}
|
|
757
|
-
|
|
758
|
-
|
|
759
|
-
|
|
760
|
-
|
|
761
|
-
|
|
762
|
-
|
|
763
|
-
|
|
764
|
-
|
|
765
|
-
|
|
766
|
-
|
|
767
|
-
|
|
768
|
-
|
|
769
|
-
|
|
770
|
-
|
|
771
|
-
|
|
772
|
-
|
|
773
|
-
|
|
774
|
-
|
|
775
|
-
|
|
776
|
-
|
|
777
|
-
|
|
778
|
-
|
|
779
|
-
|
|
780
|
-
|
|
781
|
-
|
|
782
|
-
|
|
783
|
-
|
|
784
|
-
|
|
785
|
-
|
|
771
|
+
// The validate step always halts; reaching here means the step list is malformed.
|
|
772
|
+
throw new Error("init pipeline ended without halting");
|
|
773
|
+
}
|
|
774
|
+
function groupResultsByEpisode(batchResults) {
|
|
775
|
+
const byEpisode = new Map();
|
|
776
|
+
for (const result of batchResults) {
|
|
777
|
+
const ep = Number(result["episode"] ?? 0);
|
|
778
|
+
if (!byEpisode.has(ep))
|
|
779
|
+
byEpisode.set(ep, []);
|
|
780
|
+
byEpisode.get(ep).push(result);
|
|
781
|
+
}
|
|
782
|
+
return byEpisode;
|
|
783
|
+
}
|
|
784
|
+
// Merge one episode's batch results, validate, and persist its episode_results
|
|
785
|
+
// file + index metadata. Shared by the success path (episode_merge) and the
|
|
786
|
+
// failure path's partial merge so the two stay in lockstep.
|
|
787
|
+
function mergeEpisodeAndPersist(sourceText, episodeResultsDir, episode, batchResultsForEpisode, providerName, model) {
|
|
788
|
+
const result = mergeBatchResultsForEpisode(episode, batchResultsForEpisode);
|
|
789
|
+
validateEpisodeExtractionQuality(sourceText, episode, result);
|
|
790
|
+
writeJson(episodeResultPath(episodeResultsDir, episode), compactEpisodeResult(result));
|
|
791
|
+
updateEpisodeResultMetadata(episodeResultsDir, episode, providerName, model);
|
|
792
|
+
return result;
|
|
793
|
+
}
|
|
794
|
+
const INIT_STEPS = [
|
|
795
|
+
{
|
|
796
|
+
stage: "source_prepare",
|
|
797
|
+
enter: (ctx) => ({
|
|
798
|
+
status: "init_running",
|
|
799
|
+
command: "direct init",
|
|
800
|
+
init_stage: "source_prepare",
|
|
801
|
+
provider: ctx.providerName,
|
|
802
|
+
model: ctx.model,
|
|
803
|
+
concurrency: ctx.concurrency,
|
|
804
|
+
source_path: path.resolve(ctx.source),
|
|
805
|
+
}),
|
|
806
|
+
// Handles errors inline: distinguishes CliError vs raw error and writes a
|
|
807
|
+
// bespoke last_error.received before failing, so it omits onError.
|
|
808
|
+
run: async (ctx) => {
|
|
809
|
+
let info;
|
|
810
|
+
try {
|
|
811
|
+
info = await prepareSource(ctx.source, ctx.workspace);
|
|
812
|
+
}
|
|
813
|
+
catch (exc) {
|
|
814
|
+
if (exc instanceof CliError) {
|
|
815
|
+
updateRunState(ctx.workspace, {
|
|
816
|
+
status: "init_failed",
|
|
817
|
+
init_stage: "source_prepare",
|
|
818
|
+
last_error: { title: exc.title, received: exc.received, failed_at: checkpointTimestamp() },
|
|
819
|
+
});
|
|
820
|
+
throw exc;
|
|
821
|
+
}
|
|
822
|
+
const e = exc;
|
|
823
|
+
const receivedError = `${ctx.source}: ${e?.name ?? "Error"}${e?.message ? `: ${e.message}` : ""}`;
|
|
824
|
+
updateRunState(ctx.workspace, {
|
|
825
|
+
status: "init_failed",
|
|
826
|
+
init_stage: "source_prepare",
|
|
827
|
+
last_error: { title: "INIT BLOCKED: Source preparation failed", received: [receivedError], failed_at: checkpointTimestamp() },
|
|
828
|
+
});
|
|
829
|
+
throw new CliError("INIT BLOCKED: Source preparation failed", "Source preparation failed.", {
|
|
830
|
+
exitCode: EXIT_INPUT,
|
|
831
|
+
required: ["readable source file that can be converted to source.txt"],
|
|
832
|
+
received: [receivedError],
|
|
833
|
+
nextSteps: ["Fix or re-export the source file, then rerun init."],
|
|
834
|
+
});
|
|
835
|
+
}
|
|
836
|
+
ctx.info = info;
|
|
837
|
+
const sourceTextPath = strOf(info["sourceTextPath"]);
|
|
838
|
+
ctx.sourceText = readText(sourceTextPath);
|
|
839
|
+
ctx.manifest = makeSourceManifest(ctx.source, sourceTextPath, info);
|
|
840
|
+
return { kind: "continue" };
|
|
841
|
+
},
|
|
842
|
+
},
|
|
843
|
+
{
|
|
844
|
+
stage: "episode_plan",
|
|
845
|
+
enter: () => ({ status: "init_running", init_stage: "episode_plan" }),
|
|
846
|
+
run: (ctx) => {
|
|
847
|
+
ctx.plan = buildEpisodePlan(ctx.sourceText);
|
|
848
|
+
return { kind: "continue" };
|
|
849
|
+
},
|
|
850
|
+
onError: (ctx, exc) => {
|
|
851
|
+
const e = exc;
|
|
852
|
+
throw initFailedReport(ctx.workspace, {
|
|
853
|
+
title: "INIT FAILED: Episode planning failed",
|
|
854
|
+
stage: "episode_plan",
|
|
855
|
+
required: ["source.txt that can be split into episodes"],
|
|
856
|
+
received: [`${e?.name ?? "Error"}: ${(e?.message ?? "").slice(0, 160)}`],
|
|
857
|
+
nextSteps: ["Inspect workspace/source.txt, fix the source file, and rerun init."],
|
|
786
858
|
});
|
|
787
|
-
}
|
|
788
|
-
|
|
789
|
-
|
|
790
|
-
|
|
791
|
-
|
|
792
|
-
|
|
793
|
-
|
|
794
|
-
|
|
795
|
-
|
|
796
|
-
|
|
797
|
-
|
|
859
|
+
},
|
|
860
|
+
},
|
|
861
|
+
{
|
|
862
|
+
stage: "provider",
|
|
863
|
+
enter: () => ({ status: "init_running", init_stage: "provider" }),
|
|
864
|
+
// Handles errors inline: writes init_failed run_state only for CliError,
|
|
865
|
+
// then rethrows the original error unchanged, so it omits onError.
|
|
866
|
+
run: (ctx) => {
|
|
867
|
+
try {
|
|
868
|
+
ctx.provider = makeProvider(ctx.providerName, ctx.model);
|
|
869
|
+
}
|
|
870
|
+
catch (exc) {
|
|
871
|
+
if (exc instanceof CliError) {
|
|
872
|
+
updateRunState(ctx.workspace, {
|
|
873
|
+
status: "init_failed",
|
|
874
|
+
init_stage: "provider",
|
|
875
|
+
last_error: { title: exc.title, received: exc.received, failed_at: checkpointTimestamp() },
|
|
876
|
+
});
|
|
877
|
+
}
|
|
878
|
+
throw exc;
|
|
879
|
+
}
|
|
880
|
+
return { kind: "continue" };
|
|
881
|
+
},
|
|
882
|
+
},
|
|
883
|
+
{
|
|
884
|
+
stage: "episode_titles",
|
|
885
|
+
enter: () => ({ status: "init_running", init_stage: "episode_titles" }),
|
|
886
|
+
run: async (ctx) => {
|
|
887
|
+
ctx.plan = await enrichEpisodePlanTitles(ctx.sourceText, ctx.plan, ctx.provider);
|
|
888
|
+
return { kind: "continue" };
|
|
889
|
+
},
|
|
890
|
+
onError: (ctx, exc) => {
|
|
891
|
+
if (exc instanceof CliError) {
|
|
892
|
+
throw initFailedReport(ctx.workspace, {
|
|
893
|
+
title: exc.title,
|
|
894
|
+
stage: "episode_titles",
|
|
895
|
+
exitCode: exc.exitCode,
|
|
896
|
+
required: exc.required.length > 0 ? exc.required : ["episode titles generated from source text"],
|
|
897
|
+
received: exc.received.length > 0 ? exc.received : [String(exc.message).slice(0, 160)],
|
|
898
|
+
nextSteps: exc.nextSteps.length > 0 ? exc.nextSteps : ["Rerun init after checking source episode headers."],
|
|
899
|
+
});
|
|
900
|
+
}
|
|
901
|
+
const e = exc;
|
|
902
|
+
throw initFailedReport(ctx.workspace, {
|
|
903
|
+
title: "INIT FAILED: Episode title planning failed",
|
|
798
904
|
stage: "episode_titles",
|
|
799
|
-
|
|
800
|
-
|
|
801
|
-
|
|
802
|
-
nextSteps: exc.nextSteps.length > 0 ? exc.nextSteps : ["Rerun init after checking source episode headers."],
|
|
905
|
+
required: ["episode titles generated from source text"],
|
|
906
|
+
received: [`${e?.name ?? "Error"}: ${(e?.message ?? "").slice(0, 160)}`],
|
|
907
|
+
nextSteps: ["Inspect workspace/source.txt and episode_plan.json, then rerun init."],
|
|
803
908
|
});
|
|
804
|
-
}
|
|
805
|
-
|
|
806
|
-
|
|
807
|
-
|
|
808
|
-
|
|
809
|
-
|
|
810
|
-
|
|
811
|
-
|
|
812
|
-
|
|
813
|
-
|
|
814
|
-
|
|
815
|
-
|
|
816
|
-
|
|
817
|
-
|
|
818
|
-
|
|
819
|
-
|
|
820
|
-
|
|
821
|
-
|
|
822
|
-
|
|
823
|
-
|
|
824
|
-
|
|
825
|
-
|
|
826
|
-
|
|
827
|
-
|
|
828
|
-
|
|
829
|
-
|
|
830
|
-
|
|
831
|
-
|
|
832
|
-
|
|
833
|
-
|
|
834
|
-
|
|
835
|
-
|
|
836
|
-
|
|
837
|
-
|
|
838
|
-
|
|
839
|
-
|
|
840
|
-
|
|
841
|
-
|
|
842
|
-
|
|
843
|
-
|
|
844
|
-
|
|
845
|
-
|
|
846
|
-
|
|
847
|
-
|
|
848
|
-
|
|
849
|
-
|
|
850
|
-
|
|
851
|
-
|
|
852
|
-
|
|
853
|
-
|
|
854
|
-
|
|
855
|
-
|
|
856
|
-
|
|
857
|
-
|
|
858
|
-
|
|
859
|
-
|
|
860
|
-
|
|
861
|
-
|
|
862
|
-
|
|
863
|
-
|
|
864
|
-
|
|
865
|
-
|
|
866
|
-
|
|
867
|
-
|
|
868
|
-
|
|
869
|
-
|
|
870
|
-
|
|
871
|
-
|
|
872
|
-
|
|
873
|
-
|
|
874
|
-
|
|
875
|
-
|
|
876
|
-
|
|
877
|
-
|
|
878
|
-
|
|
879
|
-
|
|
880
|
-
|
|
881
|
-
|
|
882
|
-
|
|
883
|
-
|
|
884
|
-
const
|
|
885
|
-
|
|
886
|
-
|
|
887
|
-
|
|
888
|
-
|
|
889
|
-
|
|
890
|
-
|
|
909
|
+
},
|
|
910
|
+
},
|
|
911
|
+
{
|
|
912
|
+
stage: "batch_plan",
|
|
913
|
+
run: (ctx) => {
|
|
914
|
+
ctx.batchPlan = buildBatchPlan(ctx.sourceText, ctx.plan, {
|
|
915
|
+
targetLines: ctx.batchTargetLines,
|
|
916
|
+
maxChars: ctx.batchMaxChars,
|
|
917
|
+
minLines: ctx.batchMinLines,
|
|
918
|
+
mode: ctx.batchMode,
|
|
919
|
+
});
|
|
920
|
+
return { kind: "continue" };
|
|
921
|
+
},
|
|
922
|
+
onError: (ctx, exc) => {
|
|
923
|
+
const e = exc;
|
|
924
|
+
throw initFailedReport(ctx.workspace, {
|
|
925
|
+
title: "INIT FAILED: Batch planning failed",
|
|
926
|
+
stage: "batch_plan",
|
|
927
|
+
required: ["episode_plan.json that can be split into natural paragraph batches"],
|
|
928
|
+
received: [`${e?.name ?? "Error"}: ${(e?.message ?? "").slice(0, 160)}`],
|
|
929
|
+
nextSteps: ["Inspect workspace/source.txt and episode_plan.json, then rerun init."],
|
|
930
|
+
});
|
|
931
|
+
},
|
|
932
|
+
},
|
|
933
|
+
{
|
|
934
|
+
stage: "checkpoint_setup",
|
|
935
|
+
run: (ctx) => {
|
|
936
|
+
const checkpoint = initCheckpoint(ctx.sourceText, ctx.plan);
|
|
937
|
+
const batchCheckpoint = initBatchCheckpoint(ctx.sourceText, ctx.batchPlan);
|
|
938
|
+
const previousState = ctx.previousState;
|
|
939
|
+
const previousCheckpoint = isDict(previousState["checkpoint"]) ? previousState["checkpoint"] : {};
|
|
940
|
+
const previousBatchCheckpoint = isDict(previousState["batch_checkpoint"]) ? previousState["batch_checkpoint"] : {};
|
|
941
|
+
const checkpointReused = checkpointSourceMatches(previousCheckpoint, checkpoint);
|
|
942
|
+
const batchCheckpointReused = checkpointReused && batchCheckpointMatches(previousBatchCheckpoint, batchCheckpoint);
|
|
943
|
+
if (!checkpointReused)
|
|
944
|
+
resetInitOutputs(ctx.dd);
|
|
945
|
+
else if (!batchCheckpointReused)
|
|
946
|
+
resetBatchOutputs(ctx.dd);
|
|
947
|
+
writeJson(path.join(ctx.dd, "source_manifest.json"), ctx.manifest);
|
|
948
|
+
writeJson(path.join(ctx.dd, "episode_plan.json"), ctx.plan);
|
|
949
|
+
writeJson(path.join(ctx.dd, "batch_plan.json"), ctx.batchPlan);
|
|
950
|
+
const episodeResultsDir = path.join(ctx.dd, "episode_results");
|
|
951
|
+
const batchResultsDir = path.join(ctx.dd, "batch_results");
|
|
952
|
+
fs.mkdirSync(episodeResultsDir, { recursive: true });
|
|
953
|
+
fs.mkdirSync(batchResultsDir, { recursive: true });
|
|
954
|
+
ctx.checkpoint = checkpoint;
|
|
955
|
+
ctx.batchCheckpoint = batchCheckpoint;
|
|
956
|
+
ctx.checkpointReused = checkpointReused;
|
|
957
|
+
ctx.batchCheckpointReused = batchCheckpointReused;
|
|
958
|
+
ctx.episodeResultsDir = episodeResultsDir;
|
|
959
|
+
ctx.batchResultsDir = batchResultsDir;
|
|
960
|
+
updateRunState(ctx.workspace, {
|
|
961
|
+
status: "init_running",
|
|
962
|
+
init_stage: "batch_extract",
|
|
963
|
+
checkpoint,
|
|
964
|
+
batch_checkpoint: batchCheckpoint,
|
|
965
|
+
checkpoint_reused: checkpointReused,
|
|
966
|
+
batch_checkpoint_reused: batchCheckpointReused,
|
|
967
|
+
batch_mode: ctx.batchMode,
|
|
968
|
+
batch_target_lines: ctx.batchTargetLines,
|
|
969
|
+
batch_max_chars: ctx.batchMaxChars,
|
|
970
|
+
batch_min_lines: ctx.batchMinLines,
|
|
971
|
+
episode_total: asList(ctx.plan["episodes"]).length,
|
|
972
|
+
batch_total: asList(ctx.batchPlan["batches"]).length,
|
|
973
|
+
});
|
|
974
|
+
return { kind: "continue" };
|
|
975
|
+
},
|
|
976
|
+
},
|
|
977
|
+
{
|
|
978
|
+
stage: "extract_episodes",
|
|
979
|
+
run: async (ctx) => {
|
|
980
|
+
const pendingBatches = [];
|
|
981
|
+
const batchesByEpisode = new Map();
|
|
982
|
+
for (const batch of asList(ctx.batchPlan["batches"])) {
|
|
983
|
+
const epNum = Number(batch["episode"]);
|
|
984
|
+
if (!batchesByEpisode.has(epNum))
|
|
985
|
+
batchesByEpisode.set(epNum, []);
|
|
986
|
+
batchesByEpisode.get(epNum).push(batch);
|
|
987
|
+
}
|
|
988
|
+
const previousProvider = strOf(ctx.previousState["provider"]).trim() || null;
|
|
989
|
+
for (const episode of asList(ctx.plan["episodes"])) {
|
|
990
|
+
const cached = ctx.checkpointReused
|
|
991
|
+
? loadCheckpointedEpisode(ctx.sourceText, ctx.episodeResultsDir, episode, ctx.providerName, ctx.model, previousProvider)
|
|
992
|
+
: null;
|
|
993
|
+
if (cached !== null) {
|
|
994
|
+
ctx.results.push(cached);
|
|
995
|
+
ctx.skipped.push(Number(episode["episode"]));
|
|
996
|
+
const cachedBatches = batchesByEpisode.get(Number(episode["episode"])) ?? [];
|
|
997
|
+
ctx.skippedEpisodeBatchCount += cachedBatches.length;
|
|
998
|
+
for (const cachedBatch of cachedBatches) {
|
|
999
|
+
if (!exists(batchResultPath(ctx.batchResultsDir, cachedBatch))) {
|
|
1000
|
+
const backfilled = recoverBatchFromSource(ctx.sourceText, cachedBatch);
|
|
1001
|
+
persistBatchResult(ctx.batchResultsDir, cachedBatch, backfilled);
|
|
1002
|
+
updateBatchResultMetadata(ctx.batchResultsDir, cachedBatch, ctx.providerName, ctx.model);
|
|
1003
|
+
}
|
|
1004
|
+
const errorPath = batchErrorPath(ctx.batchResultsDir, cachedBatch);
|
|
1005
|
+
if (exists(errorPath))
|
|
1006
|
+
deletePath(errorPath);
|
|
1007
|
+
}
|
|
1008
|
+
}
|
|
1009
|
+
else {
|
|
1010
|
+
pendingBatches.push(...(batchesByEpisode.get(Number(episode["episode"])) ?? []));
|
|
891
1011
|
}
|
|
892
|
-
const errorPath = batchErrorPath(batchResultsDir, cachedBatch);
|
|
893
|
-
if (exists(errorPath))
|
|
894
|
-
deletePath(errorPath);
|
|
895
1012
|
}
|
|
896
|
-
|
|
897
|
-
|
|
898
|
-
|
|
899
|
-
|
|
900
|
-
|
|
901
|
-
|
|
902
|
-
|
|
903
|
-
|
|
904
|
-
|
|
905
|
-
|
|
906
|
-
|
|
907
|
-
|
|
908
|
-
|
|
909
|
-
|
|
910
|
-
|
|
911
|
-
cachedBatch["_starts_inside_scene"] = Boolean(batch["starts_inside_scene"]);
|
|
912
|
-
batchResults.push(cachedBatch);
|
|
913
|
-
skippedBatches.push(batchResultKey(batch));
|
|
914
|
-
}
|
|
915
|
-
else {
|
|
916
|
-
pending.push(batch);
|
|
917
|
-
}
|
|
918
|
-
}
|
|
919
|
-
const failures = [];
|
|
920
|
-
const outcomes = await pMapWithConcurrency(pending, concurrency, async (batch) => {
|
|
921
|
-
return await extractBatchWithRecovery(provider, sourceText, batch);
|
|
922
|
-
});
|
|
923
|
-
for (let i = 0; i < outcomes.length; i++) {
|
|
924
|
-
const outcome = outcomes[i];
|
|
925
|
-
const batch = pending[i];
|
|
926
|
-
const errorPath = batchErrorPath(batchResultsDir, batch);
|
|
927
|
-
if (outcome.ok) {
|
|
928
|
-
const result = outcome.value;
|
|
929
|
-
result["_batch_id"] = batchResultKey(batch);
|
|
930
|
-
result["_batch_part"] = Number(batch["part"]);
|
|
931
|
-
result["_starts_inside_scene"] = Boolean(batch["starts_inside_scene"]);
|
|
932
|
-
batchResults.push(result);
|
|
933
|
-
persistBatchResult(batchResultsDir, batch, result);
|
|
934
|
-
updateBatchResultMetadata(batchResultsDir, batch, providerName, model);
|
|
935
|
-
if (exists(errorPath))
|
|
936
|
-
deletePath(errorPath);
|
|
937
|
-
}
|
|
938
|
-
else {
|
|
939
|
-
failures.push(writeBatchFailure(batchResultsDir, batch, outcome.error));
|
|
940
|
-
}
|
|
941
|
-
}
|
|
942
|
-
results.sort((a, b) => Number(a["episode"] ?? 0) - Number(b["episode"] ?? 0));
|
|
943
|
-
batchResults.sort((a, b) => {
|
|
944
|
-
const ea = Number(a["episode"] ?? 0);
|
|
945
|
-
const eb = Number(b["episode"] ?? 0);
|
|
946
|
-
if (ea !== eb)
|
|
947
|
-
return ea - eb;
|
|
948
|
-
return Number(a["_batch_part"] ?? 0) - Number(b["_batch_part"] ?? 0);
|
|
949
|
-
});
|
|
950
|
-
failures.sort((a, b) => {
|
|
951
|
-
const ea = Number(a["episode"] ?? 0);
|
|
952
|
-
const eb = Number(b["episode"] ?? 0);
|
|
953
|
-
if (ea !== eb)
|
|
954
|
-
return ea - eb;
|
|
955
|
-
return Number(a["part"] ?? 0) - Number(b["part"] ?? 0);
|
|
956
|
-
});
|
|
957
|
-
const completedBatches = skippedEpisodeBatchCount + batchResults.length;
|
|
958
|
-
if (failures.length > 0) {
|
|
959
|
-
const failedEpisodes = [...new Set(failures.map((it) => Number(it["episode"])))].sort((a, b) => a - b);
|
|
960
|
-
const failedBatches = failures.map((it) => strOf(it["batch_id"]));
|
|
961
|
-
const currentFailureSignature = failureSignature(failedBatches);
|
|
962
|
-
const previousFailureSignature = failureSignature(previousState["failed_batches"]);
|
|
963
|
-
const sameFailuresRepeated = checkpointReused &&
|
|
964
|
-
batchCheckpointReused &&
|
|
965
|
-
currentFailureSignature.length > 0 &&
|
|
966
|
-
currentFailureSignature.length === previousFailureSignature.length &&
|
|
967
|
-
currentFailureSignature.every((v, idx) => v === previousFailureSignature[idx]) &&
|
|
968
|
-
["init_incomplete", "init_stalled"].includes(strOf(previousState["status"]));
|
|
969
|
-
const previousFailureStreak = normalizeInt(previousState["failure_streak"], 0);
|
|
970
|
-
const failureStreak = sameFailuresRepeated ? previousFailureStreak + 1 : 1;
|
|
971
|
-
const failureTitle = sameFailuresRepeated
|
|
972
|
-
? "INIT STALLED: Same batches keep failing"
|
|
973
|
-
: "INIT INCOMPLETE: Batch extraction failed";
|
|
974
|
-
const nextSteps = sameFailuresRepeated
|
|
975
|
-
? [
|
|
976
|
-
"Run direct inspect --target issue to read failed batch details.",
|
|
977
|
-
"Do not rerun the same init command again until source, batch options, provider, or failed content has changed.",
|
|
978
|
-
]
|
|
979
|
-
: [
|
|
980
|
-
"Run direct inspect --target issue to review failed batches.",
|
|
981
|
-
"Rerun the same init once if failures look transient; completed checkpoints will be reused.",
|
|
982
|
-
];
|
|
983
|
-
const failedEpisodeSet = new Set(failedEpisodes);
|
|
984
|
-
const skippedSet = new Set(skipped);
|
|
985
|
-
const batchResultsByEpisode = new Map();
|
|
986
|
-
for (const result of batchResults) {
|
|
987
|
-
const ep = Number(result["episode"] ?? 0);
|
|
988
|
-
if (!batchResultsByEpisode.has(ep))
|
|
989
|
-
batchResultsByEpisode.set(ep, []);
|
|
990
|
-
batchResultsByEpisode.get(ep).push(result);
|
|
991
|
-
}
|
|
992
|
-
for (const episode of asList(plan["episodes"])) {
|
|
993
|
-
const episodeNum = Number(episode["episode"]);
|
|
994
|
-
if (skippedSet.has(episodeNum) || failedEpisodeSet.has(episodeNum))
|
|
995
|
-
continue;
|
|
996
|
-
const expectedBatches = (batchesByEpisode.get(episodeNum) ?? []).length;
|
|
997
|
-
if (expectedBatches && (batchResultsByEpisode.get(episodeNum) ?? []).length === expectedBatches) {
|
|
998
|
-
const result = mergeBatchResultsForEpisode(episode, batchResultsByEpisode.get(episodeNum) ?? []);
|
|
999
|
-
validateEpisodeExtractionQuality(sourceText, episode, result);
|
|
1000
|
-
results.push(result);
|
|
1001
|
-
writeJson(episodeResultPath(episodeResultsDir, episode), compactEpisodeResult(result));
|
|
1002
|
-
updateEpisodeResultMetadata(episodeResultsDir, episode, providerName, model);
|
|
1013
|
+
const pending = [];
|
|
1014
|
+
for (const batch of pendingBatches) {
|
|
1015
|
+
const cachedBatch = ctx.batchCheckpointReused
|
|
1016
|
+
? loadCheckpointedBatch(ctx.sourceText, ctx.batchResultsDir, batch, ctx.providerName, ctx.model, previousProvider)
|
|
1017
|
+
: null;
|
|
1018
|
+
if (cachedBatch !== null) {
|
|
1019
|
+
cachedBatch["_batch_id"] = batchResultKey(batch);
|
|
1020
|
+
cachedBatch["_batch_part"] = Number(batch["part"]);
|
|
1021
|
+
cachedBatch["_starts_inside_scene"] = Boolean(batch["starts_inside_scene"]);
|
|
1022
|
+
ctx.batchResults.push(cachedBatch);
|
|
1023
|
+
ctx.skippedBatches.push(batchResultKey(batch));
|
|
1024
|
+
}
|
|
1025
|
+
else {
|
|
1026
|
+
pending.push(batch);
|
|
1027
|
+
}
|
|
1003
1028
|
}
|
|
1004
|
-
|
|
1005
|
-
|
|
1006
|
-
|
|
1007
|
-
|
|
1008
|
-
|
|
1009
|
-
|
|
1010
|
-
|
|
1011
|
-
|
|
1012
|
-
|
|
1013
|
-
|
|
1014
|
-
|
|
1015
|
-
|
|
1016
|
-
|
|
1017
|
-
|
|
1018
|
-
|
|
1019
|
-
|
|
1020
|
-
|
|
1021
|
-
|
|
1022
|
-
|
|
1023
|
-
|
|
1024
|
-
|
|
1025
|
-
|
|
1026
|
-
|
|
1027
|
-
|
|
1028
|
-
|
|
1029
|
-
|
|
1030
|
-
|
|
1031
|
-
|
|
1032
|
-
|
|
1033
|
-
|
|
1034
|
-
|
|
1035
|
-
|
|
1036
|
-
|
|
1037
|
-
|
|
1038
|
-
|
|
1039
|
-
|
|
1040
|
-
|
|
1041
|
-
|
|
1042
|
-
|
|
1043
|
-
|
|
1044
|
-
|
|
1045
|
-
|
|
1046
|
-
|
|
1047
|
-
|
|
1048
|
-
|
|
1049
|
-
|
|
1050
|
-
|
|
1051
|
-
|
|
1052
|
-
|
|
1053
|
-
|
|
1054
|
-
|
|
1055
|
-
|
|
1056
|
-
|
|
1057
|
-
|
|
1058
|
-
|
|
1059
|
-
|
|
1060
|
-
|
|
1061
|
-
|
|
1062
|
-
|
|
1063
|
-
|
|
1064
|
-
|
|
1065
|
-
|
|
1066
|
-
|
|
1067
|
-
|
|
1068
|
-
|
|
1069
|
-
|
|
1070
|
-
|
|
1071
|
-
|
|
1072
|
-
|
|
1073
|
-
|
|
1029
|
+
const failures = [];
|
|
1030
|
+
const outcomes = await pMapWithConcurrency(pending, ctx.concurrency, async (batch) => {
|
|
1031
|
+
return await extractBatchWithRecovery(ctx.provider, ctx.sourceText, batch);
|
|
1032
|
+
});
|
|
1033
|
+
for (let i = 0; i < outcomes.length; i++) {
|
|
1034
|
+
const outcome = outcomes[i];
|
|
1035
|
+
const batch = pending[i];
|
|
1036
|
+
const errorPath = batchErrorPath(ctx.batchResultsDir, batch);
|
|
1037
|
+
if (outcome.ok) {
|
|
1038
|
+
const result = outcome.value;
|
|
1039
|
+
result["_batch_id"] = batchResultKey(batch);
|
|
1040
|
+
result["_batch_part"] = Number(batch["part"]);
|
|
1041
|
+
result["_starts_inside_scene"] = Boolean(batch["starts_inside_scene"]);
|
|
1042
|
+
ctx.batchResults.push(result);
|
|
1043
|
+
persistBatchResult(ctx.batchResultsDir, batch, result);
|
|
1044
|
+
updateBatchResultMetadata(ctx.batchResultsDir, batch, ctx.providerName, ctx.model);
|
|
1045
|
+
if (exists(errorPath))
|
|
1046
|
+
deletePath(errorPath);
|
|
1047
|
+
}
|
|
1048
|
+
else {
|
|
1049
|
+
failures.push(writeBatchFailure(ctx.batchResultsDir, batch, outcome.error));
|
|
1050
|
+
}
|
|
1051
|
+
}
|
|
1052
|
+
ctx.results.sort((a, b) => Number(a["episode"] ?? 0) - Number(b["episode"] ?? 0));
|
|
1053
|
+
ctx.batchResults.sort((a, b) => {
|
|
1054
|
+
const ea = Number(a["episode"] ?? 0);
|
|
1055
|
+
const eb = Number(b["episode"] ?? 0);
|
|
1056
|
+
if (ea !== eb)
|
|
1057
|
+
return ea - eb;
|
|
1058
|
+
return Number(a["_batch_part"] ?? 0) - Number(b["_batch_part"] ?? 0);
|
|
1059
|
+
});
|
|
1060
|
+
failures.sort((a, b) => {
|
|
1061
|
+
const ea = Number(a["episode"] ?? 0);
|
|
1062
|
+
const eb = Number(b["episode"] ?? 0);
|
|
1063
|
+
if (ea !== eb)
|
|
1064
|
+
return ea - eb;
|
|
1065
|
+
return Number(a["part"] ?? 0) - Number(b["part"] ?? 0);
|
|
1066
|
+
});
|
|
1067
|
+
const completedBatches = ctx.skippedEpisodeBatchCount + ctx.batchResults.length;
|
|
1068
|
+
ctx.completedBatches = completedBatches;
|
|
1069
|
+
if (failures.length > 0) {
|
|
1070
|
+
const failedEpisodes = [...new Set(failures.map((it) => Number(it["episode"])))].sort((a, b) => a - b);
|
|
1071
|
+
const failedBatches = failures.map((it) => strOf(it["batch_id"]));
|
|
1072
|
+
const currentFailureSignature = failureSignature(failedBatches);
|
|
1073
|
+
const previousFailureSignature = failureSignature(ctx.previousState["failed_batches"]);
|
|
1074
|
+
const sameFailuresRepeated = ctx.checkpointReused &&
|
|
1075
|
+
ctx.batchCheckpointReused &&
|
|
1076
|
+
currentFailureSignature.length > 0 &&
|
|
1077
|
+
currentFailureSignature.length === previousFailureSignature.length &&
|
|
1078
|
+
currentFailureSignature.every((v, idx) => v === previousFailureSignature[idx]) &&
|
|
1079
|
+
["init_incomplete", "init_stalled"].includes(strOf(ctx.previousState["status"]));
|
|
1080
|
+
const previousFailureStreak = normalizeInt(ctx.previousState["failure_streak"], 0);
|
|
1081
|
+
const failureStreak = sameFailuresRepeated ? previousFailureStreak + 1 : 1;
|
|
1082
|
+
const failureTitle = sameFailuresRepeated
|
|
1083
|
+
? "INIT STALLED: Same batches keep failing"
|
|
1084
|
+
: "INIT INCOMPLETE: Batch extraction failed";
|
|
1085
|
+
const nextSteps = sameFailuresRepeated
|
|
1086
|
+
? [
|
|
1087
|
+
"Run direct inspect --target issue to read failed batch details.",
|
|
1088
|
+
"Do not rerun the same init command again until source, batch options, provider, or failed content has changed.",
|
|
1089
|
+
]
|
|
1090
|
+
: [
|
|
1091
|
+
"Run direct inspect --target issue to review failed batches.",
|
|
1092
|
+
"Rerun the same init once if failures look transient; completed checkpoints will be reused.",
|
|
1093
|
+
];
|
|
1094
|
+
const failedEpisodeSet = new Set(failedEpisodes);
|
|
1095
|
+
const skippedSet = new Set(ctx.skipped);
|
|
1096
|
+
const batchResultsByEpisode = groupResultsByEpisode(ctx.batchResults);
|
|
1097
|
+
for (const episode of asList(ctx.plan["episodes"])) {
|
|
1098
|
+
const episodeNum = Number(episode["episode"]);
|
|
1099
|
+
if (skippedSet.has(episodeNum) || failedEpisodeSet.has(episodeNum))
|
|
1100
|
+
continue;
|
|
1101
|
+
const expectedBatches = (batchesByEpisode.get(episodeNum) ?? []).length;
|
|
1102
|
+
if (expectedBatches && (batchResultsByEpisode.get(episodeNum) ?? []).length === expectedBatches) {
|
|
1103
|
+
ctx.results.push(mergeEpisodeAndPersist(ctx.sourceText, ctx.episodeResultsDir, episode, batchResultsByEpisode.get(episodeNum) ?? [], ctx.providerName, ctx.model));
|
|
1104
|
+
}
|
|
1105
|
+
}
|
|
1106
|
+
updateRunState(ctx.workspace, {
|
|
1107
|
+
status: sameFailuresRepeated ? "init_stalled" : "init_incomplete",
|
|
1108
|
+
init_stage: "batch_extract",
|
|
1109
|
+
checkpoint: ctx.checkpoint,
|
|
1110
|
+
batch_checkpoint: ctx.batchCheckpoint,
|
|
1111
|
+
episode_total: asList(ctx.plan["episodes"]).length,
|
|
1112
|
+
episode_completed: ctx.results.length,
|
|
1113
|
+
episode_reused: ctx.skipped.length,
|
|
1114
|
+
episode_failed: failedEpisodes.length,
|
|
1115
|
+
failed_episodes: failedEpisodes,
|
|
1116
|
+
batch_total: asList(ctx.batchPlan["batches"]).length,
|
|
1117
|
+
batch_completed: completedBatches,
|
|
1118
|
+
batch_reused: ctx.skippedEpisodeBatchCount + ctx.skippedBatches.length,
|
|
1119
|
+
batch_failed: failures.length,
|
|
1120
|
+
failed_batches: failedBatches,
|
|
1121
|
+
failure_signature: currentFailureSignature,
|
|
1122
|
+
failure_streak: failureStreak,
|
|
1123
|
+
last_error: { title: failureTitle, failed_at: checkpointTimestamp() },
|
|
1124
|
+
exportable: false,
|
|
1125
|
+
});
|
|
1126
|
+
const issues = failures.slice(0, 5).map((it) => `${it["batch_id"]} episode ${it["episode"]} part ${it["part"]}: ${it["error_type"]} - ${it["message"]}`);
|
|
1127
|
+
const report = {
|
|
1128
|
+
title: failureTitle,
|
|
1129
|
+
result: [
|
|
1130
|
+
`episodes total: ${asList(ctx.plan["episodes"]).length}`,
|
|
1131
|
+
`completed: ${ctx.results.length}`,
|
|
1132
|
+
`reused: ${ctx.skipped.length}`,
|
|
1133
|
+
`failed episodes: ${failedEpisodes.length}`,
|
|
1134
|
+
`batches: ${completedBatches}/${asList(ctx.batchPlan["batches"]).length} completed, ${failures.length} failed`,
|
|
1135
|
+
`provider: ${ctx.providerName}`,
|
|
1136
|
+
],
|
|
1137
|
+
artifacts: [
|
|
1138
|
+
path.join(ctx.workspace, "source.txt"),
|
|
1139
|
+
path.join(ctx.dd, "source_manifest.json"),
|
|
1140
|
+
path.join(ctx.dd, "episode_plan.json"),
|
|
1141
|
+
path.join(ctx.dd, "batch_plan.json"),
|
|
1142
|
+
ctx.batchResultsDir,
|
|
1143
|
+
ctx.episodeResultsDir,
|
|
1144
|
+
path.join(ctx.dd, "run_state.json"),
|
|
1145
|
+
],
|
|
1146
|
+
issues,
|
|
1147
|
+
next: nextSteps,
|
|
1148
|
+
};
|
|
1149
|
+
return { kind: "halt", report, exitCode: EXIT_RUNTIME };
|
|
1150
|
+
}
|
|
1151
|
+
return { kind: "continue" };
|
|
1152
|
+
},
|
|
1153
|
+
},
|
|
1154
|
+
{
|
|
1155
|
+
stage: "episode_merge",
|
|
1156
|
+
enter: (ctx) => ({
|
|
1157
|
+
status: "init_running",
|
|
1158
|
+
init_stage: "episode_merge",
|
|
1159
|
+
checkpoint: ctx.checkpoint,
|
|
1160
|
+
batch_checkpoint: ctx.batchCheckpoint,
|
|
1161
|
+
episode_total: asList(ctx.plan["episodes"]).length,
|
|
1162
|
+
episode_completed: ctx.results.length,
|
|
1163
|
+
episode_reused: ctx.skipped.length,
|
|
1164
|
+
episode_failed: 0,
|
|
1165
|
+
failed_episodes: [],
|
|
1166
|
+
batch_total: asList(ctx.batchPlan["batches"]).length,
|
|
1167
|
+
batch_completed: ctx.completedBatches,
|
|
1168
|
+
batch_reused: ctx.skippedEpisodeBatchCount + ctx.skippedBatches.length,
|
|
1169
|
+
batch_failed: 0,
|
|
1170
|
+
failed_batches: [],
|
|
1171
|
+
failure_signature: [],
|
|
1172
|
+
failure_streak: 0,
|
|
1173
|
+
last_error: null,
|
|
1174
|
+
}),
|
|
1175
|
+
run: (ctx) => {
|
|
1176
|
+
// Error-file cleanup is best-effort cosmetics: a readdir hiccup must never
|
|
1177
|
+
// abort an otherwise-successful merge nor get misreported as a merge failure.
|
|
1178
|
+
for (const dir of [ctx.batchResultsDir, ctx.episodeResultsDir]) {
|
|
1179
|
+
if (!exists(dir))
|
|
1180
|
+
continue;
|
|
1181
|
+
let names;
|
|
1074
1182
|
try {
|
|
1075
|
-
|
|
1183
|
+
names = fs.readdirSync(dir);
|
|
1076
1184
|
}
|
|
1077
1185
|
catch {
|
|
1078
|
-
|
|
1186
|
+
continue;
|
|
1187
|
+
}
|
|
1188
|
+
for (const name of names) {
|
|
1189
|
+
if (name.endsWith(".error.json")) {
|
|
1190
|
+
try {
|
|
1191
|
+
deletePath(path.join(dir, name));
|
|
1192
|
+
}
|
|
1193
|
+
catch {
|
|
1194
|
+
// ignore
|
|
1195
|
+
}
|
|
1196
|
+
}
|
|
1079
1197
|
}
|
|
1080
1198
|
}
|
|
1081
|
-
|
|
1082
|
-
|
|
1083
|
-
|
|
1084
|
-
|
|
1085
|
-
|
|
1086
|
-
|
|
1087
|
-
|
|
1088
|
-
|
|
1089
|
-
|
|
1090
|
-
|
|
1091
|
-
|
|
1092
|
-
|
|
1093
|
-
|
|
1094
|
-
|
|
1095
|
-
|
|
1096
|
-
const
|
|
1097
|
-
|
|
1098
|
-
|
|
1099
|
-
|
|
1100
|
-
|
|
1101
|
-
|
|
1102
|
-
|
|
1103
|
-
|
|
1104
|
-
|
|
1105
|
-
|
|
1106
|
-
|
|
1107
|
-
|
|
1108
|
-
|
|
1109
|
-
|
|
1110
|
-
|
|
1111
|
-
|
|
1112
|
-
|
|
1113
|
-
|
|
1114
|
-
|
|
1115
|
-
|
|
1116
|
-
|
|
1117
|
-
|
|
1118
|
-
|
|
1119
|
-
|
|
1120
|
-
|
|
1121
|
-
|
|
1122
|
-
|
|
1123
|
-
|
|
1124
|
-
|
|
1125
|
-
|
|
1126
|
-
|
|
1127
|
-
|
|
1128
|
-
|
|
1129
|
-
|
|
1130
|
-
|
|
1131
|
-
|
|
1132
|
-
|
|
1133
|
-
|
|
1134
|
-
|
|
1135
|
-
|
|
1136
|
-
|
|
1137
|
-
|
|
1138
|
-
|
|
1139
|
-
|
|
1140
|
-
|
|
1141
|
-
|
|
1142
|
-
|
|
1143
|
-
|
|
1199
|
+
const batchResultsByEpisode = groupResultsByEpisode(ctx.batchResults);
|
|
1200
|
+
const skippedSet = new Set(ctx.skipped);
|
|
1201
|
+
for (const episode of asList(ctx.plan["episodes"])) {
|
|
1202
|
+
const episodeNum = Number(episode["episode"]);
|
|
1203
|
+
if (skippedSet.has(episodeNum))
|
|
1204
|
+
continue;
|
|
1205
|
+
ctx.results.push(mergeEpisodeAndPersist(ctx.sourceText, ctx.episodeResultsDir, episode, batchResultsByEpisode.get(episodeNum) ?? [], ctx.providerName, ctx.model));
|
|
1206
|
+
const errorPath = episodeErrorPath(ctx.episodeResultsDir, episode);
|
|
1207
|
+
if (exists(errorPath))
|
|
1208
|
+
deletePath(errorPath);
|
|
1209
|
+
}
|
|
1210
|
+
ctx.results.sort((a, b) => Number(a["episode"] ?? 0) - Number(b["episode"] ?? 0));
|
|
1211
|
+
return { kind: "continue" };
|
|
1212
|
+
},
|
|
1213
|
+
onError: (ctx, exc) => {
|
|
1214
|
+
const e = exc;
|
|
1215
|
+
throw initFailedReport(ctx.workspace, {
|
|
1216
|
+
title: "INIT FAILED: Episode merge failed",
|
|
1217
|
+
stage: "episode_merge",
|
|
1218
|
+
required: ["complete batch_results/*.json that can merge into episode_results/*.json"],
|
|
1219
|
+
received: [`${e?.name ?? "Error"}: ${(e?.message ?? "").slice(0, 160)}`],
|
|
1220
|
+
nextSteps: ["Rerun init; completed batch checkpoints will be reused and episode merge will retry."],
|
|
1221
|
+
updates: { checkpoint: ctx.checkpoint, batch_checkpoint: ctx.batchCheckpoint, batch_completed: ctx.completedBatches },
|
|
1222
|
+
});
|
|
1223
|
+
},
|
|
1224
|
+
},
|
|
1225
|
+
{
|
|
1226
|
+
stage: "script_merge",
|
|
1227
|
+
enter: (ctx) => ({ status: "init_running", init_stage: "script_merge", checkpoint: ctx.checkpoint, batch_checkpoint: ctx.batchCheckpoint }),
|
|
1228
|
+
run: (ctx) => {
|
|
1229
|
+
ctx.script = mergeEpisodeResults(ctx.results, strOf(ctx.info["projectName"]) || path.basename(ctx.source, path.extname(ctx.source)));
|
|
1230
|
+
return { kind: "continue" };
|
|
1231
|
+
},
|
|
1232
|
+
onError: (ctx, exc) => {
|
|
1233
|
+
const e = exc;
|
|
1234
|
+
throw initFailedReport(ctx.workspace, {
|
|
1235
|
+
title: "INIT FAILED: Merge failed",
|
|
1236
|
+
stage: "script_merge",
|
|
1237
|
+
required: ["complete episode_results/*.json"],
|
|
1238
|
+
received: [`${e?.name ?? "Error"}: ${(e?.message ?? "").slice(0, 160)}`],
|
|
1239
|
+
nextSteps: ["Rerun init; completed episode extraction checkpoints will be reused and merge will retry."],
|
|
1240
|
+
updates: { checkpoint: ctx.checkpoint, batch_checkpoint: ctx.batchCheckpoint, episode_completed: ctx.results.length },
|
|
1241
|
+
});
|
|
1242
|
+
},
|
|
1243
|
+
},
|
|
1244
|
+
{
|
|
1245
|
+
stage: "asset_curation",
|
|
1246
|
+
enter: (ctx) => ({ status: "init_running", init_stage: "asset_curation", checkpoint: ctx.checkpoint, batch_checkpoint: ctx.batchCheckpoint }),
|
|
1247
|
+
run: async (ctx) => {
|
|
1248
|
+
const rawCuration = await providerExtractAssetCurationLocal(ctx.provider, ctx.sourceText, ctx.script);
|
|
1249
|
+
const curation = curateScriptAssets(ctx.script, rawCuration);
|
|
1250
|
+
writeJson(path.join(ctx.dd, "asset_curation.json"), curation);
|
|
1251
|
+
return { kind: "continue" };
|
|
1252
|
+
},
|
|
1253
|
+
onError: (ctx, exc) => {
|
|
1254
|
+
if (exc instanceof CliError) {
|
|
1255
|
+
throw initFailedReport(ctx.workspace, {
|
|
1256
|
+
title: exc.title,
|
|
1257
|
+
stage: "asset_curation",
|
|
1258
|
+
exitCode: exc.exitCode,
|
|
1259
|
+
required: exc.required.length > 0 ? exc.required : ["asset curation JSON matching final script contract"],
|
|
1260
|
+
received: exc.received.length > 0 ? exc.received : [String(exc.message).slice(0, 160)],
|
|
1261
|
+
nextSteps: exc.nextSteps.length > 0 ? exc.nextSteps : ["Rerun init; extraction checkpoints will be reused and asset curation will retry."],
|
|
1262
|
+
updates: { checkpoint: ctx.checkpoint, batch_checkpoint: ctx.batchCheckpoint, episode_completed: ctx.results.length },
|
|
1263
|
+
});
|
|
1264
|
+
}
|
|
1265
|
+
const e = exc;
|
|
1266
|
+
throw initFailedReport(ctx.workspace, {
|
|
1267
|
+
title: "INIT FAILED: Asset curation failed",
|
|
1144
1268
|
stage: "asset_curation",
|
|
1145
|
-
|
|
1146
|
-
|
|
1147
|
-
|
|
1148
|
-
|
|
1149
|
-
updates: { checkpoint, batch_checkpoint: batchCheckpoint, episode_completed: results.length },
|
|
1269
|
+
required: ["provider location merge decisions and deterministic asset reuse curation"],
|
|
1270
|
+
received: [`${e?.name ?? "Error"}: ${(e?.message ?? "").slice(0, 160)}`],
|
|
1271
|
+
nextSteps: ["Rerun init; extraction checkpoints will be reused and asset curation will retry."],
|
|
1272
|
+
updates: { checkpoint: ctx.checkpoint, batch_checkpoint: ctx.batchCheckpoint, episode_completed: ctx.results.length },
|
|
1150
1273
|
});
|
|
1151
|
-
}
|
|
1152
|
-
|
|
1153
|
-
|
|
1154
|
-
|
|
1155
|
-
|
|
1156
|
-
|
|
1157
|
-
|
|
1158
|
-
|
|
1159
|
-
|
|
1160
|
-
|
|
1161
|
-
|
|
1162
|
-
|
|
1163
|
-
|
|
1164
|
-
|
|
1165
|
-
|
|
1166
|
-
|
|
1167
|
-
|
|
1168
|
-
|
|
1169
|
-
|
|
1170
|
-
|
|
1171
|
-
|
|
1172
|
-
|
|
1173
|
-
|
|
1274
|
+
},
|
|
1275
|
+
},
|
|
1276
|
+
{
|
|
1277
|
+
stage: "metadata_extract",
|
|
1278
|
+
enter: (ctx) => ({ status: "init_running", init_stage: "metadata_extract", checkpoint: ctx.checkpoint, batch_checkpoint: ctx.batchCheckpoint }),
|
|
1279
|
+
run: async (ctx) => {
|
|
1280
|
+
let metadata = ctx.provider.extractMetadata ? await ctx.provider.extractMetadata(ctx.sourceText, ctx.script) : {};
|
|
1281
|
+
if (!isDict(metadata))
|
|
1282
|
+
metadata = {};
|
|
1283
|
+
writeJson(path.join(ctx.dd, "asset_metadata.json"), metadata);
|
|
1284
|
+
applyMetadataToScript(ctx.script, metadata);
|
|
1285
|
+
return { kind: "continue" };
|
|
1286
|
+
},
|
|
1287
|
+
onError: (ctx, exc) => {
|
|
1288
|
+
if (exc instanceof CliError) {
|
|
1289
|
+
throw initFailedReport(ctx.workspace, {
|
|
1290
|
+
title: exc.title,
|
|
1291
|
+
stage: "metadata_extract",
|
|
1292
|
+
exitCode: exc.exitCode,
|
|
1293
|
+
required: exc.required.length > 0 ? exc.required : ["metadata JSON matching final script contract"],
|
|
1294
|
+
received: exc.received.length > 0 ? exc.received : [String(exc.message).slice(0, 160)],
|
|
1295
|
+
nextSteps: exc.nextSteps.length > 0 ? exc.nextSteps : ["Rerun init; extraction checkpoints will be reused and metadata will retry."],
|
|
1296
|
+
updates: { checkpoint: ctx.checkpoint, batch_checkpoint: ctx.batchCheckpoint, episode_completed: ctx.results.length },
|
|
1297
|
+
});
|
|
1298
|
+
}
|
|
1299
|
+
const e = exc;
|
|
1300
|
+
throw initFailedReport(ctx.workspace, {
|
|
1301
|
+
title: "INIT FAILED: Metadata extraction failed",
|
|
1174
1302
|
stage: "metadata_extract",
|
|
1175
|
-
|
|
1176
|
-
|
|
1177
|
-
|
|
1178
|
-
|
|
1179
|
-
updates: { checkpoint, batch_checkpoint: batchCheckpoint, episode_completed: results.length },
|
|
1303
|
+
required: ["provider metadata for worldview, role_type, and asset descriptions"],
|
|
1304
|
+
received: [`${e?.name ?? "Error"}: ${(e?.message ?? "").slice(0, 160)}`],
|
|
1305
|
+
nextSteps: ["Rerun init; extraction checkpoints will be reused and metadata will retry."],
|
|
1306
|
+
updates: { checkpoint: ctx.checkpoint, batch_checkpoint: ctx.batchCheckpoint, episode_completed: ctx.results.length },
|
|
1180
1307
|
});
|
|
1181
|
-
}
|
|
1182
|
-
|
|
1183
|
-
|
|
1184
|
-
|
|
1185
|
-
|
|
1186
|
-
|
|
1187
|
-
|
|
1188
|
-
|
|
1189
|
-
|
|
1190
|
-
|
|
1191
|
-
|
|
1192
|
-
|
|
1193
|
-
|
|
1194
|
-
|
|
1195
|
-
|
|
1196
|
-
|
|
1197
|
-
|
|
1198
|
-
|
|
1199
|
-
|
|
1200
|
-
|
|
1201
|
-
|
|
1202
|
-
|
|
1203
|
-
|
|
1204
|
-
|
|
1205
|
-
|
|
1206
|
-
|
|
1207
|
-
|
|
1208
|
-
|
|
1209
|
-
|
|
1210
|
-
|
|
1211
|
-
|
|
1212
|
-
|
|
1213
|
-
|
|
1214
|
-
|
|
1215
|
-
|
|
1216
|
-
|
|
1217
|
-
|
|
1218
|
-
|
|
1219
|
-
|
|
1220
|
-
|
|
1221
|
-
|
|
1222
|
-
|
|
1223
|
-
|
|
1224
|
-
|
|
1225
|
-
|
|
1226
|
-
|
|
1227
|
-
|
|
1228
|
-
|
|
1229
|
-
|
|
1230
|
-
|
|
1231
|
-
|
|
1232
|
-
|
|
1233
|
-
|
|
1234
|
-
|
|
1235
|
-
|
|
1236
|
-
|
|
1237
|
-
|
|
1238
|
-
|
|
1239
|
-
|
|
1240
|
-
|
|
1241
|
-
|
|
1242
|
-
|
|
1243
|
-
|
|
1244
|
-
|
|
1245
|
-
|
|
1246
|
-
|
|
1247
|
-
|
|
1248
|
-
|
|
1249
|
-
|
|
1250
|
-
|
|
1251
|
-
|
|
1252
|
-
|
|
1253
|
-
|
|
1254
|
-
|
|
1255
|
-
|
|
1256
|
-
|
|
1257
|
-
|
|
1258
|
-
|
|
1259
|
-
|
|
1260
|
-
|
|
1261
|
-
|
|
1262
|
-
|
|
1263
|
-
|
|
1264
|
-
|
|
1265
|
-
|
|
1266
|
-
|
|
1267
|
-
|
|
1268
|
-
|
|
1269
|
-
|
|
1270
|
-
|
|
1271
|
-
|
|
1272
|
-
|
|
1273
|
-
|
|
1274
|
-
|
|
1275
|
-
|
|
1276
|
-
|
|
1277
|
-
|
|
1278
|
-
|
|
1279
|
-
|
|
1280
|
-
|
|
1281
|
-
|
|
1282
|
-
|
|
1283
|
-
|
|
1284
|
-
|
|
1285
|
-
:
|
|
1286
|
-
|
|
1287
|
-
|
|
1288
|
-
|
|
1308
|
+
},
|
|
1309
|
+
},
|
|
1310
|
+
{
|
|
1311
|
+
stage: "validate",
|
|
1312
|
+
// Handles errors inline: script.initial.json must be written and the
|
|
1313
|
+
// validate run_state written before validateScript runs, so a uniform
|
|
1314
|
+
// onError mapping cannot reproduce the ordering.
|
|
1315
|
+
run: (ctx) => {
|
|
1316
|
+
const { workspace, providerName, model, concurrency, source } = ctx;
|
|
1317
|
+
const scriptPath = path.join(ctx.dd, "script.initial.json");
|
|
1318
|
+
writeJson(scriptPath, ctx.script);
|
|
1319
|
+
updateRunState(workspace, { status: "init_running", init_stage: "validate", checkpoint: ctx.checkpoint, batch_checkpoint: ctx.batchCheckpoint });
|
|
1320
|
+
let validation;
|
|
1321
|
+
try {
|
|
1322
|
+
validation = validateScript(workspace, scriptPath);
|
|
1323
|
+
}
|
|
1324
|
+
catch (exc) {
|
|
1325
|
+
const e = exc;
|
|
1326
|
+
throw initFailedReport(workspace, {
|
|
1327
|
+
title: "INIT FAILED: Validation failed",
|
|
1328
|
+
stage: "validate",
|
|
1329
|
+
required: ["script.initial.json that can be validated"],
|
|
1330
|
+
received: [`${e?.name ?? "Error"}: ${(e?.message ?? "").slice(0, 160)}`],
|
|
1331
|
+
nextSteps: ["Rerun init to retry validation, or inspect script.initial.json if the failure persists."],
|
|
1332
|
+
updates: { checkpoint: ctx.checkpoint, script_path: scriptPath },
|
|
1333
|
+
});
|
|
1334
|
+
}
|
|
1335
|
+
const passed = Boolean(validation["passed"]);
|
|
1336
|
+
const status = passed ? "ready_for_agent" : "needs_agent_repair";
|
|
1337
|
+
updateRunState(workspace, {
|
|
1338
|
+
status,
|
|
1339
|
+
command: "direct init",
|
|
1340
|
+
init_stage: "complete",
|
|
1341
|
+
checkpoint: ctx.checkpoint,
|
|
1342
|
+
batch_checkpoint: ctx.batchCheckpoint,
|
|
1343
|
+
checkpoint_reused: ctx.checkpointReused,
|
|
1344
|
+
batch_checkpoint_reused: ctx.batchCheckpointReused,
|
|
1345
|
+
provider: providerName,
|
|
1346
|
+
model,
|
|
1347
|
+
concurrency,
|
|
1348
|
+
batch_mode: ctx.batchMode,
|
|
1349
|
+
batch_target_lines: ctx.batchTargetLines,
|
|
1350
|
+
batch_max_chars: ctx.batchMaxChars,
|
|
1351
|
+
batch_min_lines: ctx.batchMinLines,
|
|
1352
|
+
source_path: path.resolve(source),
|
|
1353
|
+
script_path: scriptPath,
|
|
1354
|
+
validation_path: path.join(ctx.dd, "validation.json"),
|
|
1355
|
+
episode_total: asList(ctx.plan["episodes"]).length,
|
|
1356
|
+
episode_completed: ctx.results.length,
|
|
1357
|
+
episode_reused: ctx.skipped.length,
|
|
1358
|
+
episode_failed: 0,
|
|
1359
|
+
failed_episodes: [],
|
|
1360
|
+
batch_total: asList(ctx.batchPlan["batches"]).length,
|
|
1361
|
+
batch_completed: ctx.completedBatches,
|
|
1362
|
+
batch_reused: ctx.skippedEpisodeBatchCount + ctx.skippedBatches.length,
|
|
1363
|
+
batch_failed: 0,
|
|
1364
|
+
failed_batches: [],
|
|
1365
|
+
failure_signature: [],
|
|
1366
|
+
failure_streak: 0,
|
|
1367
|
+
last_error: null,
|
|
1368
|
+
review_status: "pending",
|
|
1369
|
+
review_missing: [...REVIEW_TARGETS],
|
|
1370
|
+
inspected_targets: [],
|
|
1371
|
+
patch_count: 0,
|
|
1372
|
+
exportable: providerName !== "mock",
|
|
1373
|
+
});
|
|
1374
|
+
const title = passed
|
|
1375
|
+
? "INIT COMPLETE: Initial script ready"
|
|
1376
|
+
: "INIT NEEDS AGENT: Initial script written with repair issues";
|
|
1377
|
+
const stats = validation["stats"] ?? {};
|
|
1378
|
+
const report = {
|
|
1379
|
+
title,
|
|
1380
|
+
result: [
|
|
1381
|
+
`episodes: ${stats["episodes"] ?? 0}`,
|
|
1382
|
+
`scenes: ${stats["scenes"] ?? 0}`,
|
|
1383
|
+
`actions: ${stats["actions"] ?? 0}`,
|
|
1384
|
+
`validation: ${passed ? "passed" : "needs repair"}`,
|
|
1385
|
+
`provider: ${providerName}`,
|
|
1386
|
+
`episode checkpoint reused: ${ctx.skipped.length}`,
|
|
1387
|
+
`batches: ${ctx.completedBatches}/${asList(ctx.batchPlan["batches"]).length} completed`,
|
|
1388
|
+
`batch checkpoint reused: ${ctx.skippedEpisodeBatchCount + ctx.skippedBatches.length}`,
|
|
1389
|
+
"agent_review: pending",
|
|
1390
|
+
],
|
|
1391
|
+
artifacts: [
|
|
1392
|
+
path.join(workspace, "source.txt"),
|
|
1393
|
+
path.join(ctx.dd, "source_manifest.json"),
|
|
1394
|
+
path.join(ctx.dd, "episode_plan.json"),
|
|
1395
|
+
path.join(ctx.dd, "batch_plan.json"),
|
|
1396
|
+
ctx.batchResultsDir,
|
|
1397
|
+
ctx.episodeResultsDir,
|
|
1398
|
+
path.join(ctx.dd, "asset_curation.json"),
|
|
1399
|
+
path.join(ctx.dd, "asset_metadata.json"),
|
|
1400
|
+
scriptPath,
|
|
1401
|
+
path.join(ctx.dd, "validation.json"),
|
|
1402
|
+
path.join(ctx.dd, "run_state.json"),
|
|
1403
|
+
],
|
|
1404
|
+
issues: summarizeIssues(asList(validation["issues"])),
|
|
1405
|
+
next: providerName === "mock"
|
|
1406
|
+
? [
|
|
1407
|
+
"Run inspect for issue, episode, and asset; apply patches if needed; then validate/export.",
|
|
1408
|
+
"Do not export mock-provider results for delivery.",
|
|
1409
|
+
]
|
|
1410
|
+
: ["Run inspect for issue, episode, and asset; apply patches if needed; then validate/export."],
|
|
1411
|
+
};
|
|
1412
|
+
return { kind: "halt", report, exitCode: passed ? EXIT_OK : EXIT_NEEDS_AGENT };
|
|
1413
|
+
},
|
|
1414
|
+
},
|
|
1415
|
+
];
|
|
1289
1416
|
export function summarizeIssues(issues) {
|
|
1290
1417
|
if (issues.length === 0)
|
|
1291
1418
|
return [];
|