chainlesschain 0.47.6 → 0.47.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +2 -2
- package/src/assets/web-panel/.build-hash +1 -1
- package/src/assets/web-panel/assets/Analytics-BFI7jbwM.css +1 -0
- package/src/assets/web-panel/assets/Analytics-DQ135mAd.js +3 -0
- package/src/assets/web-panel/assets/AppLayout-6SPt_8Y_.js +1 -0
- package/src/assets/web-panel/assets/AppLayout-BFJ-Fofn.css +1 -0
- package/src/assets/web-panel/assets/{Backup-Ba9UybpT.js → Backup-DbVRG5vE.js} +1 -1
- package/src/assets/web-panel/assets/{Chat-BwXskT21.js → Chat-wVhrFK9C.js} +1 -1
- package/src/assets/web-panel/assets/{Cowork-UmOe7qvE.js → Cowork-lOC25IW2.js} +1 -1
- package/src/assets/web-panel/assets/{Cron-JHS-rc-4.js → Cron-3P0eVLTV.js} +1 -1
- package/src/assets/web-panel/assets/{Dashboard-B95cMCO7.js → Dashboard-Br7kCwKJ.js} +1 -1
- package/src/assets/web-panel/assets/{Git-CSYO0_zk.js → Git-CrDCcBig.js} +2 -2
- package/src/assets/web-panel/assets/{Logs-Hxw_K0km.js → Logs-BfTE8urP.js} +1 -1
- package/src/assets/web-panel/assets/{McpTools-DIE75TrB.js → McpTools-CsGIijNe.js} +1 -1
- package/src/assets/web-panel/assets/{Memory-C4KVnLlp.js → Memory-BXX_yMKJ.js} +1 -1
- package/src/assets/web-panel/assets/{Notes-DuzrHMAk.js → Notes-DU6Vf2cL.js} +1 -1
- package/src/assets/web-panel/assets/{Organization-DTq6uF82.js → Organization-Bny6yOPV.js} +4 -4
- package/src/assets/web-panel/assets/{P2P-C0hjlhsR.js → P2P-BxFZ1Bit.js} +2 -2
- package/src/assets/web-panel/assets/{Permissions-Ec0NH-xC.js → Permissions-B1j3Mtms.js} +3 -3
- package/src/assets/web-panel/assets/{Projects-U8D0asCS.js → Projects-D-CGscDu.js} +1 -1
- package/src/assets/web-panel/assets/{Providers-BngtTLvJ.js → Providers-r6NaBYMf.js} +1 -1
- package/src/assets/web-panel/assets/{RssFeed-B9NbwCKM.js → RssFeed-D7b68C5q.js} +1 -1
- package/src/assets/web-panel/assets/{Security-BL5Rkr1T.js → Security-MJfKv0EJ.js} +3 -3
- package/src/assets/web-panel/assets/{Services-D4MJzLld.js → Services-Yb_Q1V3d.js} +1 -1
- package/src/assets/web-panel/assets/{Skills-CQTOMDwF.js → Skills-DLTHcH5T.js} +1 -1
- package/src/assets/web-panel/assets/{Tasks-DepbJMnL.js → Tasks-CqycpPjS.js} +1 -1
- package/src/assets/web-panel/assets/{Templates-C24PVZPu.js → Templates-y01u2Zis.js} +1 -1
- package/src/assets/web-panel/assets/VideoEditing-BA1N-5kq.css +1 -0
- package/src/assets/web-panel/assets/VideoEditing-B_nPKw6B.js +1 -0
- package/src/assets/web-panel/assets/{Wallet-PQoSpN_P.js → Wallet-CsRgnjJY.js} +1 -1
- package/src/assets/web-panel/assets/{WebAuthn-BcuyQ4Lr.js → WebAuthn-DWoR5ADp.js} +1 -1
- package/src/assets/web-panel/assets/{WorkflowEditor-C-SvXbHW.js → WorkflowEditor-DBJhFPMN.js} +1 -1
- package/src/assets/web-panel/assets/{antd-DEjZPGMj.js → antd-Dh2t0vGq.js} +84 -84
- package/src/assets/web-panel/assets/index-tN-8TosE.js +2 -0
- package/src/assets/web-panel/assets/{markdown-CusdXFxb.js → markdown-CBnGGMzE.js} +1 -1
- package/src/assets/web-panel/index.html +2 -2
- package/src/commands/agent.js +20 -0
- package/src/commands/mcp.js +86 -4
- package/src/commands/memory.js +85 -4
- package/src/commands/sandbox.js +80 -6
- package/src/commands/serve.js +10 -0
- package/src/commands/session.js +250 -0
- package/src/commands/stream.js +75 -0
- package/src/commands/video.js +363 -0
- package/src/gateways/http/envelope-http-server.js +194 -0
- package/src/gateways/ws/message-dispatcher.js +123 -0
- package/src/gateways/ws/session-core-protocol.js +427 -0
- package/src/gateways/ws/session-protocol.js +42 -1
- package/src/gateways/ws/video-protocol.js +230 -0
- package/src/gateways/ws/ws-server.js +72 -0
- package/src/gateways/ws/ws-session-gateway.js +7 -3
- package/src/harness/jsonl-session-store.js +17 -9
- package/src/index.js +8 -0
- package/src/lib/agent-stream.js +63 -0
- package/src/lib/chat-core.js +183 -6
- package/src/lib/cowork/ab-comparator-cli.js +44 -23
- package/src/lib/cowork/agent-group-runner.js +145 -0
- package/src/lib/cowork/debate-review-cli.js +47 -25
- package/src/lib/cowork/project-style-analyzer-cli.js +34 -7
- package/src/lib/interaction-adapter.js +59 -1
- package/src/lib/jsonl-session-store.js +2 -0
- package/src/lib/memory-injection.js +90 -0
- package/src/lib/provider-stream.js +120 -0
- package/src/lib/sandbox-v2.js +198 -3
- package/src/lib/session-consolidator.js +125 -0
- package/src/lib/session-core-singletons.js +56 -0
- package/src/lib/session-tail.js +128 -0
- package/src/lib/session-usage.js +166 -0
- package/src/lib/shell-approval.js +96 -0
- package/src/lib/ws-chat-handler.js +3 -0
- package/src/repl/agent-repl.js +271 -6
- package/src/repl/chat-repl.js +87 -100
- package/src/runtime/agent-core.js +98 -15
- package/src/runtime/agent-runtime.js +105 -3
- package/src/runtime/policies/agent-policy.js +10 -0
- package/src/skills/video-editing/SKILL.md +46 -0
- package/src/skills/video-editing/beat-snap.js +127 -0
- package/src/skills/video-editing/extractors/audio-extractor.js +212 -0
- package/src/skills/video-editing/extractors/subtitle-extractor.js +90 -0
- package/src/skills/video-editing/extractors/video-extractor.js +137 -0
- package/src/skills/video-editing/parallel-orchestrator.js +212 -0
- package/src/skills/video-editing/pipeline.js +480 -0
- package/src/skills/video-editing/prompts/aesthetic-analysis.md +21 -0
- package/src/skills/video-editing/prompts/audio-segment.md +15 -0
- package/src/skills/video-editing/prompts/character-identify.md +19 -0
- package/src/skills/video-editing/prompts/dense-caption.md +20 -0
- package/src/skills/video-editing/prompts/editor-system.md +29 -0
- package/src/skills/video-editing/prompts/hook-dialogue.md +17 -0
- package/src/skills/video-editing/prompts/protagonist-detect.md +20 -0
- package/src/skills/video-editing/prompts/scene-caption.md +16 -0
- package/src/skills/video-editing/prompts/shot-caption.md +25 -0
- package/src/skills/video-editing/prompts/shot-plan.md +28 -0
- package/src/skills/video-editing/prompts/structure-proposal.md +16 -0
- package/src/skills/video-editing/prompts/vlog-scene-caption.md +18 -0
- package/src/skills/video-editing/render/audio-mix.js +128 -0
- package/src/skills/video-editing/render/ffmpeg-concat.js +45 -0
- package/src/skills/video-editing/render/ffmpeg-extract.js +67 -0
- package/src/skills/video-editing/reviewer.js +161 -0
- package/src/skills/video-editing/tools/commit.js +108 -0
- package/src/skills/video-editing/tools/review-clip.js +46 -0
- package/src/skills/video-editing/tools/semantic-retrieval.js +56 -0
- package/src/skills/video-editing/tools/shot-trimming.js +73 -0
- package/src/assets/web-panel/assets/Analytics-B4OM8S8X.css +0 -1
- package/src/assets/web-panel/assets/Analytics-DgypYeUB.js +0 -3
- package/src/assets/web-panel/assets/AppLayout-Bzf3mSZI.js +0 -1
- package/src/assets/web-panel/assets/AppLayout-DQyDwGut.css +0 -1
- package/src/assets/web-panel/assets/index-CwvzTTw_.js +0 -2
|
@@ -0,0 +1,480 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* pipeline.js — 视频剪辑编排入口
|
|
3
|
+
*
|
|
4
|
+
* 四阶段:deconstruct → plan → assemble → render
|
|
5
|
+
* 通过 EventEmitter 吐统一进度事件(CLI --stream / Web stream.event 消费)
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
import { EventEmitter } from "events";
|
|
9
|
+
import { promises as fs } from "fs";
|
|
10
|
+
import path from "path";
|
|
11
|
+
import { createHash } from "crypto";
|
|
12
|
+
import { runVideoExtractor } from "./extractors/video-extractor.js";
|
|
13
|
+
import { runAudioExtractor } from "./extractors/audio-extractor.js";
|
|
14
|
+
import * as semanticRetrieval from "./tools/semantic-retrieval.js";
|
|
15
|
+
import * as shotTrimming from "./tools/shot-trimming.js";
|
|
16
|
+
import * as reviewClip from "./tools/review-clip.js";
|
|
17
|
+
import * as commitClip from "./tools/commit.js";
|
|
18
|
+
import { ParallelShotOrchestrator } from "./parallel-orchestrator.js";
|
|
19
|
+
import { reviewEntry, createQualityCheckPolicy } from "./reviewer.js";
|
|
20
|
+
import { snapToBeats } from "./beat-snap.js";
|
|
21
|
+
|
|
22
|
+
export const TOOLS = [semanticRetrieval, shotTrimming, reviewClip, commitClip];
|
|
23
|
+
|
|
24
|
+
export function getCacheDir(videoPath, audioPath) {
|
|
25
|
+
const hash = createHash("sha256")
|
|
26
|
+
.update(videoPath)
|
|
27
|
+
.update(audioPath || "")
|
|
28
|
+
.digest("hex")
|
|
29
|
+
.slice(0, 16);
|
|
30
|
+
const base = process.env.APPDATA
|
|
31
|
+
? path.join(
|
|
32
|
+
process.env.APPDATA,
|
|
33
|
+
"chainlesschain-desktop-vue",
|
|
34
|
+
".chainlesschain",
|
|
35
|
+
)
|
|
36
|
+
: path.join(process.env.HOME || "~", ".chainlesschain");
|
|
37
|
+
return path.join(base, "video-editing", hash);
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
export class VideoPipeline extends EventEmitter {
|
|
41
|
+
constructor(options = {}) {
|
|
42
|
+
super();
|
|
43
|
+
this.videoPath = options.videoPath;
|
|
44
|
+
this.audioPath = options.audioPath;
|
|
45
|
+
this.instruction = options.instruction || "";
|
|
46
|
+
this.outputPath = options.outputPath || "./output.mp4";
|
|
47
|
+
this.llmCall = options.llmCall || null;
|
|
48
|
+
this.existingSrt = options.existingSrt || null;
|
|
49
|
+
this.fps = options.fps || 2;
|
|
50
|
+
this.mainCharacter = options.mainCharacter || "";
|
|
51
|
+
this.useMadmom = options.useMadmom || false;
|
|
52
|
+
this.snapBeats = options.snapBeats || false;
|
|
53
|
+
this.ducking = options.ducking || false;
|
|
54
|
+
this.cacheDir =
|
|
55
|
+
options.cacheDir || getCacheDir(this.videoPath, this.audioPath);
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
_emit(type, data = {}) {
|
|
59
|
+
const ev = { type, ts: Date.now(), ...data };
|
|
60
|
+
this.emit("event", ev);
|
|
61
|
+
return ev;
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
// ── Phase 1: Deconstruct ──────────────────────────────────
|
|
65
|
+
|
|
66
|
+
async deconstruct() {
|
|
67
|
+
this._emit("phase.start", { phase: "deconstruct" });
|
|
68
|
+
await fs.mkdir(this.cacheDir, { recursive: true });
|
|
69
|
+
|
|
70
|
+
const meta = { videoPath: this.videoPath, audioPath: this.audioPath };
|
|
71
|
+
const metaPath = path.join(this.cacheDir, "meta.json");
|
|
72
|
+
|
|
73
|
+
try {
|
|
74
|
+
const existing = JSON.parse(await fs.readFile(metaPath, "utf-8"));
|
|
75
|
+
if (
|
|
76
|
+
existing.videoPath === this.videoPath &&
|
|
77
|
+
existing.audioPath === this.audioPath
|
|
78
|
+
) {
|
|
79
|
+
this._emit("phase.progress", {
|
|
80
|
+
phase: "deconstruct",
|
|
81
|
+
pct: 1,
|
|
82
|
+
message: "Using cache",
|
|
83
|
+
});
|
|
84
|
+
this._emit("phase.end", { phase: "deconstruct", cached: true });
|
|
85
|
+
return this.cacheDir;
|
|
86
|
+
}
|
|
87
|
+
} catch {}
|
|
88
|
+
|
|
89
|
+
this._emit("phase.progress", {
|
|
90
|
+
phase: "deconstruct",
|
|
91
|
+
pct: 0.1,
|
|
92
|
+
message: "Extracting frames",
|
|
93
|
+
});
|
|
94
|
+
await runVideoExtractor(this.videoPath, this.cacheDir, {
|
|
95
|
+
fps: this.fps,
|
|
96
|
+
llmCall: this.llmCall,
|
|
97
|
+
});
|
|
98
|
+
|
|
99
|
+
this._emit("phase.progress", {
|
|
100
|
+
phase: "deconstruct",
|
|
101
|
+
pct: 0.6,
|
|
102
|
+
message: "Analyzing audio",
|
|
103
|
+
});
|
|
104
|
+
if (this.audioPath) {
|
|
105
|
+
await runAudioExtractor(this.audioPath, this.cacheDir, {
|
|
106
|
+
existingSrt: this.existingSrt,
|
|
107
|
+
llmCall: this.llmCall,
|
|
108
|
+
useMadmom: this.useMadmom,
|
|
109
|
+
});
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
await fs.writeFile(metaPath, JSON.stringify(meta, null, 2));
|
|
113
|
+
this._emit("phase.end", { phase: "deconstruct" });
|
|
114
|
+
return this.cacheDir;
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
// ── Phase 2: Plan (Screenwriter) ─────────────────────────
|
|
118
|
+
|
|
119
|
+
async plan(assetDir) {
|
|
120
|
+
this._emit("phase.start", { phase: "plan" });
|
|
121
|
+
const dir = assetDir || this.cacheDir;
|
|
122
|
+
|
|
123
|
+
let scenes = { scenes: [] };
|
|
124
|
+
let audioCaption = { segments: [] };
|
|
125
|
+
try {
|
|
126
|
+
scenes = JSON.parse(
|
|
127
|
+
await fs.readFile(path.join(dir, "scene.json"), "utf-8"),
|
|
128
|
+
);
|
|
129
|
+
} catch {}
|
|
130
|
+
try {
|
|
131
|
+
audioCaption = JSON.parse(
|
|
132
|
+
await fs.readFile(path.join(dir, "audio_caption.json"), "utf-8"),
|
|
133
|
+
);
|
|
134
|
+
} catch {}
|
|
135
|
+
|
|
136
|
+
let shotPlan;
|
|
137
|
+
if (this.llmCall) {
|
|
138
|
+
shotPlan = await this.llmCall({
|
|
139
|
+
type: "shot-plan",
|
|
140
|
+
instruction: this.instruction,
|
|
141
|
+
mainCharacter: this.mainCharacter,
|
|
142
|
+
scenes: scenes.scenes,
|
|
143
|
+
audioSegments: audioCaption.segments,
|
|
144
|
+
});
|
|
145
|
+
} else {
|
|
146
|
+
shotPlan = this._defaultShotPlan(
|
|
147
|
+
audioCaption.segments || [],
|
|
148
|
+
scenes.scenes || [],
|
|
149
|
+
);
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
if (this.snapBeats) {
|
|
153
|
+
try {
|
|
154
|
+
const beatsData = JSON.parse(
|
|
155
|
+
await fs.readFile(path.join(dir, "audio_beats.json"), "utf-8"),
|
|
156
|
+
);
|
|
157
|
+
if (beatsData.beats && beatsData.beats.length > 0) {
|
|
158
|
+
shotPlan = snapToBeats(shotPlan, beatsData.beats);
|
|
159
|
+
this._emit("phase.progress", {
|
|
160
|
+
phase: "plan",
|
|
161
|
+
pct: 0.9,
|
|
162
|
+
message: "Beat-snapped",
|
|
163
|
+
});
|
|
164
|
+
}
|
|
165
|
+
} catch {}
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
const planPath = path.join(dir, "shot_plan.json");
|
|
169
|
+
await fs.writeFile(planPath, JSON.stringify(shotPlan, null, 2));
|
|
170
|
+
this._emit("phase.end", { phase: "plan", output: planPath });
|
|
171
|
+
return shotPlan;
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
_defaultShotPlan(segments, scenes) {
|
|
175
|
+
return {
|
|
176
|
+
sections: segments.map((seg, i) => ({
|
|
177
|
+
section_idx: i,
|
|
178
|
+
music_segment: { start: seg.start, end: seg.end, label: seg.label },
|
|
179
|
+
shots: [
|
|
180
|
+
{
|
|
181
|
+
shot_idx: 0,
|
|
182
|
+
target_duration: parseFloat((seg.end - seg.start).toFixed(3)),
|
|
183
|
+
emotion: "neutral",
|
|
184
|
+
visual_target: `Scene near segment ${i}`,
|
|
185
|
+
},
|
|
186
|
+
],
|
|
187
|
+
})),
|
|
188
|
+
};
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
// ── Phase 3: Assemble (Editor ReAct) ─────────────────────
|
|
192
|
+
|
|
193
|
+
async assemble(shotPlan, assetDir) {
|
|
194
|
+
this._emit("phase.start", { phase: "assemble" });
|
|
195
|
+
const dir = assetDir || this.cacheDir;
|
|
196
|
+
const shotPointPath = path.join(dir, "shot_point.json");
|
|
197
|
+
await fs.writeFile(shotPointPath, "[]");
|
|
198
|
+
|
|
199
|
+
const committedClips = [];
|
|
200
|
+
const context = {
|
|
201
|
+
assetDir: dir,
|
|
202
|
+
committedClips,
|
|
203
|
+
shotPointPath,
|
|
204
|
+
llmCall: this.llmCall,
|
|
205
|
+
};
|
|
206
|
+
|
|
207
|
+
const sections = shotPlan.sections || [];
|
|
208
|
+
const totalShots = sections.reduce(
|
|
209
|
+
(s, sec) => s + (sec.shots?.length || 0),
|
|
210
|
+
0,
|
|
211
|
+
);
|
|
212
|
+
let done = 0;
|
|
213
|
+
|
|
214
|
+
for (const section of sections) {
|
|
215
|
+
for (const shot of section.shots || []) {
|
|
216
|
+
this._emit("phase.progress", {
|
|
217
|
+
phase: "assemble",
|
|
218
|
+
pct: parseFloat((done / Math.max(totalShots, 1)).toFixed(2)),
|
|
219
|
+
message: `Section ${section.section_idx} Shot ${shot.shot_idx}`,
|
|
220
|
+
});
|
|
221
|
+
|
|
222
|
+
await this._runEditorLoop(section, shot, context);
|
|
223
|
+
done++;
|
|
224
|
+
}
|
|
225
|
+
}
|
|
226
|
+
|
|
227
|
+
this._emit("phase.end", { phase: "assemble", output: shotPointPath });
|
|
228
|
+
|
|
229
|
+
const result = JSON.parse(await fs.readFile(shotPointPath, "utf-8"));
|
|
230
|
+
return result;
|
|
231
|
+
}
|
|
232
|
+
|
|
233
|
+
async _runEditorLoop(section, shot, context) {
|
|
234
|
+
const musicSeg = section.music_segment || {};
|
|
235
|
+
const scenes = await this._loadScenes(context.assetDir);
|
|
236
|
+
const totalScenes = scenes.length;
|
|
237
|
+
|
|
238
|
+
const sceneStart = Math.floor(
|
|
239
|
+
(musicSeg.start / (musicSeg.end || 1)) * totalScenes,
|
|
240
|
+
);
|
|
241
|
+
const sceneEnd = Math.min(
|
|
242
|
+
sceneStart + Math.ceil(totalScenes * 0.3),
|
|
243
|
+
totalScenes - 1,
|
|
244
|
+
);
|
|
245
|
+
|
|
246
|
+
this._emit("agent.tool_call", {
|
|
247
|
+
agent: "editor",
|
|
248
|
+
tool: "video_semantic_retrieval",
|
|
249
|
+
args: { scene_start: sceneStart, scene_end: sceneEnd },
|
|
250
|
+
});
|
|
251
|
+
const candidates = await semanticRetrieval.execute(
|
|
252
|
+
{ scene_start: sceneStart, scene_end: sceneEnd },
|
|
253
|
+
context,
|
|
254
|
+
);
|
|
255
|
+
|
|
256
|
+
if (candidates.candidates.length === 0) return;
|
|
257
|
+
|
|
258
|
+
const picked = candidates.candidates[0];
|
|
259
|
+
const startTime = picked.time || 0;
|
|
260
|
+
const endTime = startTime + (shot.target_duration || 3);
|
|
261
|
+
|
|
262
|
+
this._emit("agent.tool_call", {
|
|
263
|
+
agent: "editor",
|
|
264
|
+
tool: "video_review_clip",
|
|
265
|
+
args: { start: startTime, end: endTime },
|
|
266
|
+
});
|
|
267
|
+
const review = reviewClip.execute(
|
|
268
|
+
{ start: startTime, end: endTime },
|
|
269
|
+
context,
|
|
270
|
+
);
|
|
271
|
+
|
|
272
|
+
if (review.has_conflict) {
|
|
273
|
+
this._emit("review.fail", {
|
|
274
|
+
reason: review.suggestion,
|
|
275
|
+
willRerun: false,
|
|
276
|
+
});
|
|
277
|
+
return;
|
|
278
|
+
}
|
|
279
|
+
|
|
280
|
+
this._emit("agent.tool_call", {
|
|
281
|
+
agent: "editor",
|
|
282
|
+
tool: "video_commit_clip",
|
|
283
|
+
args: {
|
|
284
|
+
section_idx: section.section_idx,
|
|
285
|
+
shot_idx: shot.shot_idx,
|
|
286
|
+
clips: [{ start: startTime, end: endTime }],
|
|
287
|
+
},
|
|
288
|
+
});
|
|
289
|
+
await commitClip.execute(
|
|
290
|
+
{
|
|
291
|
+
section_idx: section.section_idx,
|
|
292
|
+
shot_idx: shot.shot_idx,
|
|
293
|
+
clips: [{ start: startTime, end: endTime }],
|
|
294
|
+
},
|
|
295
|
+
context,
|
|
296
|
+
);
|
|
297
|
+
}
|
|
298
|
+
|
|
299
|
+
async _loadScenes(assetDir) {
|
|
300
|
+
try {
|
|
301
|
+
const raw = await fs.readFile(path.join(assetDir, "scene.json"), "utf-8");
|
|
302
|
+
return JSON.parse(raw).scenes || [];
|
|
303
|
+
} catch {
|
|
304
|
+
return [];
|
|
305
|
+
}
|
|
306
|
+
}
|
|
307
|
+
|
|
308
|
+
// ── Phase 3b: Assemble (Parallel) ─────────────────────────
|
|
309
|
+
|
|
310
|
+
async assembleParallel(shotPlan, assetDir, options = {}) {
|
|
311
|
+
this._emit("phase.start", { phase: "assemble" });
|
|
312
|
+
const dir = assetDir || this.cacheDir;
|
|
313
|
+
const shotPointPath = path.join(dir, "shot_point.json");
|
|
314
|
+
await fs.writeFile(shotPointPath, "[]");
|
|
315
|
+
|
|
316
|
+
const committedClips = [];
|
|
317
|
+
const context = {
|
|
318
|
+
assetDir: dir,
|
|
319
|
+
committedClips,
|
|
320
|
+
shotPointPath,
|
|
321
|
+
llmCall: this.llmCall,
|
|
322
|
+
};
|
|
323
|
+
|
|
324
|
+
const orchestrator = new ParallelShotOrchestrator({
|
|
325
|
+
maxConcurrency: options.maxConcurrency || 4,
|
|
326
|
+
maxReruns: options.maxReruns || 3,
|
|
327
|
+
});
|
|
328
|
+
|
|
329
|
+
orchestrator.on("event", (ev) => this._emit(ev.type, ev));
|
|
330
|
+
|
|
331
|
+
const runShot = async (section, shot, ctx) => {
|
|
332
|
+
const mergedCtx = { ...context, ...ctx };
|
|
333
|
+
await this._runEditorLoop(section, shot, mergedCtx);
|
|
334
|
+
const lastCommit = mergedCtx.committedClips.slice(-1)[0];
|
|
335
|
+
if (!lastCommit) return null;
|
|
336
|
+
return {
|
|
337
|
+
section_idx: section.section_idx,
|
|
338
|
+
shot_idx: shot.shot_idx,
|
|
339
|
+
clips: [{ start: lastCommit.start, end: lastCommit.end }],
|
|
340
|
+
total_duration: lastCommit.end - lastCommit.start,
|
|
341
|
+
music_segment: section.music_segment,
|
|
342
|
+
};
|
|
343
|
+
};
|
|
344
|
+
|
|
345
|
+
const results = await orchestrator.run(
|
|
346
|
+
shotPlan.sections || [],
|
|
347
|
+
runShot,
|
|
348
|
+
context,
|
|
349
|
+
);
|
|
350
|
+
|
|
351
|
+
await fs.writeFile(shotPointPath, JSON.stringify(results, null, 2));
|
|
352
|
+
this._emit("phase.end", { phase: "assemble", output: shotPointPath });
|
|
353
|
+
return results;
|
|
354
|
+
}
|
|
355
|
+
|
|
356
|
+
// ── Phase 3c: Review (Quality Gate) ──────────────────────
|
|
357
|
+
|
|
358
|
+
async review(shotPoints, assetDir, options = {}) {
|
|
359
|
+
this._emit("phase.start", { phase: "review" });
|
|
360
|
+
const checkerNames = options.checkers || ["vision-protagonist"];
|
|
361
|
+
const context = {
|
|
362
|
+
llmCall: this.llmCall,
|
|
363
|
+
mainCharacter: this.mainCharacter,
|
|
364
|
+
thresholds: options.thresholds,
|
|
365
|
+
};
|
|
366
|
+
|
|
367
|
+
const approved = [];
|
|
368
|
+
const rejected = [];
|
|
369
|
+
|
|
370
|
+
for (const entry of shotPoints) {
|
|
371
|
+
const result = await reviewEntry(entry, checkerNames, context);
|
|
372
|
+
this._emit("phase.progress", {
|
|
373
|
+
phase: "review",
|
|
374
|
+
pct: (approved.length + rejected.length) / shotPoints.length,
|
|
375
|
+
message: `S${entry.section_idx}:${entry.shot_idx} ${result.pass ? "pass" : "fail"} (${result.aggregateScore.toFixed(2)})`,
|
|
376
|
+
});
|
|
377
|
+
|
|
378
|
+
if (result.pass) {
|
|
379
|
+
approved.push({ ...entry, review: result });
|
|
380
|
+
} else {
|
|
381
|
+
rejected.push({ ...entry, review: result });
|
|
382
|
+
this._emit("review.fail", {
|
|
383
|
+
section_idx: entry.section_idx,
|
|
384
|
+
shot_idx: entry.shot_idx,
|
|
385
|
+
checks: result.checks,
|
|
386
|
+
willRerun: false,
|
|
387
|
+
});
|
|
388
|
+
}
|
|
389
|
+
}
|
|
390
|
+
|
|
391
|
+
this._emit("phase.end", {
|
|
392
|
+
phase: "review",
|
|
393
|
+
approved: approved.length,
|
|
394
|
+
rejected: rejected.length,
|
|
395
|
+
});
|
|
396
|
+
|
|
397
|
+
return { approved, rejected };
|
|
398
|
+
}
|
|
399
|
+
|
|
400
|
+
// ── Phase 4: Render ──────────────────────────────────────
|
|
401
|
+
|
|
402
|
+
async render(shotPoints, assetDir) {
|
|
403
|
+
this._emit("phase.start", { phase: "render" });
|
|
404
|
+
const dir = assetDir || this.cacheDir;
|
|
405
|
+
|
|
406
|
+
const { extractClips } = await import("./render/ffmpeg-extract.js");
|
|
407
|
+
const { concatClips } = await import("./render/ffmpeg-concat.js");
|
|
408
|
+
const { mixAudio } = await import("./render/audio-mix.js");
|
|
409
|
+
|
|
410
|
+
const clipPaths = await extractClips(this.videoPath, shotPoints, dir);
|
|
411
|
+
this._emit("phase.progress", {
|
|
412
|
+
phase: "render",
|
|
413
|
+
pct: 0.4,
|
|
414
|
+
message: "Clips extracted",
|
|
415
|
+
});
|
|
416
|
+
|
|
417
|
+
const concatPath = await concatClips(clipPaths, dir);
|
|
418
|
+
this._emit("phase.progress", {
|
|
419
|
+
phase: "render",
|
|
420
|
+
pct: 0.7,
|
|
421
|
+
message: "Concatenated",
|
|
422
|
+
});
|
|
423
|
+
|
|
424
|
+
let finalPath = concatPath;
|
|
425
|
+
if (this.audioPath) {
|
|
426
|
+
if (this.ducking) {
|
|
427
|
+
const { mixAudioWithDucking } = await import("./render/audio-mix.js");
|
|
428
|
+
finalPath = await mixAudioWithDucking(
|
|
429
|
+
concatPath,
|
|
430
|
+
this.audioPath,
|
|
431
|
+
this.outputPath,
|
|
432
|
+
);
|
|
433
|
+
} else {
|
|
434
|
+
finalPath = await mixAudio(concatPath, this.audioPath, this.outputPath);
|
|
435
|
+
}
|
|
436
|
+
this._emit("phase.progress", {
|
|
437
|
+
phase: "render",
|
|
438
|
+
pct: 0.95,
|
|
439
|
+
message: "Audio mixed",
|
|
440
|
+
});
|
|
441
|
+
} else {
|
|
442
|
+
await fs.copyFile(concatPath, this.outputPath);
|
|
443
|
+
finalPath = this.outputPath;
|
|
444
|
+
}
|
|
445
|
+
|
|
446
|
+
this._emit("phase.end", { phase: "render", output: finalPath });
|
|
447
|
+
return finalPath;
|
|
448
|
+
}
|
|
449
|
+
|
|
450
|
+
// ── Full Pipeline ────────────────────────────────────────
|
|
451
|
+
|
|
452
|
+
async run(options = {}) {
|
|
453
|
+
const assetDir = await this.deconstruct();
|
|
454
|
+
const shotPlan = await this.plan(assetDir);
|
|
455
|
+
|
|
456
|
+
const shotPoints = options.parallel
|
|
457
|
+
? await this.assembleParallel(shotPlan, assetDir, options)
|
|
458
|
+
: await this.assemble(shotPlan, assetDir);
|
|
459
|
+
|
|
460
|
+
const outputPath = await this.render(shotPoints, assetDir);
|
|
461
|
+
return { assetDir, shotPlan, shotPoints, outputPath };
|
|
462
|
+
}
|
|
463
|
+
|
|
464
|
+
async runWithReview(options = {}) {
|
|
465
|
+
const assetDir = await this.deconstruct();
|
|
466
|
+
const shotPlan = await this.plan(assetDir);
|
|
467
|
+
|
|
468
|
+
const shotPoints = options.parallel
|
|
469
|
+
? await this.assembleParallel(shotPlan, assetDir, options)
|
|
470
|
+
: await this.assemble(shotPlan, assetDir);
|
|
471
|
+
|
|
472
|
+
const { approved, rejected } = await this.review(
|
|
473
|
+
shotPoints,
|
|
474
|
+
assetDir,
|
|
475
|
+
options,
|
|
476
|
+
);
|
|
477
|
+
const outputPath = await this.render(approved, assetDir);
|
|
478
|
+
return { assetDir, shotPlan, shotPoints: approved, rejected, outputPath };
|
|
479
|
+
}
|
|
480
|
+
}
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
# Aesthetic Analysis
|
|
2
|
+
|
|
3
|
+
评估镜头的电影摄影质量。
|
|
4
|
+
|
|
5
|
+
**输入**: 视频片段帧序列
|
|
6
|
+
**输出**: 严格 JSON
|
|
7
|
+
|
|
8
|
+
```json
|
|
9
|
+
{
|
|
10
|
+
"lighting": 0.0,
|
|
11
|
+
"color": 0.0,
|
|
12
|
+
"composition": 0.0,
|
|
13
|
+
"camera": 0.0,
|
|
14
|
+
"overall_aesthetic_score": 0.0,
|
|
15
|
+
"strengths": ["..."],
|
|
16
|
+
"weaknesses": ["..."],
|
|
17
|
+
"recommendation": "use | reject | conditional"
|
|
18
|
+
}
|
|
19
|
+
```
|
|
20
|
+
|
|
21
|
+
每项 1.0-5.0。只输出 JSON。
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
# Audio Segment Selection
|
|
2
|
+
|
|
3
|
+
挑选与剪辑指令匹配的音乐段。
|
|
4
|
+
|
|
5
|
+
**输入**: 音乐总览 + 可用段落列表 + 目标时长
|
|
6
|
+
**输出**: 严格 JSON
|
|
7
|
+
|
|
8
|
+
```json
|
|
9
|
+
{
|
|
10
|
+
"section_idx": 2,
|
|
11
|
+
"justification": "<one sentence>"
|
|
12
|
+
}
|
|
13
|
+
```
|
|
14
|
+
|
|
15
|
+
**默认偏好**: 高能量节奏段(Chorus / Drop / Build-up),除非指令明确要求慢节奏。只输出 JSON。
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
# Character Identification
|
|
2
|
+
|
|
3
|
+
分析字幕识别说话人。
|
|
4
|
+
|
|
5
|
+
**输入**: 按 speaker 标签分组的对话样本 + 视频上下文
|
|
6
|
+
**输出**: 严格 JSON
|
|
7
|
+
|
|
8
|
+
```json
|
|
9
|
+
{
|
|
10
|
+
"<SPEAKER_ID>": {
|
|
11
|
+
"name": "<character name>",
|
|
12
|
+
"confidence": "high | medium | low",
|
|
13
|
+
"evidence": "<which dialogue lines support this>",
|
|
14
|
+
"role": "protagonist | supporting | minor"
|
|
15
|
+
}
|
|
16
|
+
}
|
|
17
|
+
```
|
|
18
|
+
|
|
19
|
+
只输出 JSON,禁止 markdown 包裹或其他文本。
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
# Dense Caption (Film)
|
|
2
|
+
|
|
3
|
+
识别镜头边界并做情绪分析。Segments 必须有意义的时长,只保留显著的剪辑点。
|
|
4
|
+
|
|
5
|
+
**输入**: 视频片段 + 主角上下文
|
|
6
|
+
**输出**: 严格 JSON 数组
|
|
7
|
+
|
|
8
|
+
```json
|
|
9
|
+
[
|
|
10
|
+
{
|
|
11
|
+
"start": "HH:MM:SS",
|
|
12
|
+
"end": "HH:MM:SS",
|
|
13
|
+
"mood": "tense | calm | uplifting | melancholy | action",
|
|
14
|
+
"protagonist_prominence": 0.0,
|
|
15
|
+
"summary": "<one sentence>"
|
|
16
|
+
}
|
|
17
|
+
]
|
|
18
|
+
```
|
|
19
|
+
|
|
20
|
+
protagonist_prominence 范围 0.0-1.0(主角在帧中占比 + 焦点程度)。只输出 JSON。
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
# Editor System (ReAct)
|
|
2
|
+
|
|
3
|
+
按 THINK → ACT → OBSERVE 循环为每个 shot 选定时间戳。
|
|
4
|
+
|
|
5
|
+
**目标**: 为当前 shot 找到画面达标且时长接近 `target_duration` 的片段,commit 后退出。
|
|
6
|
+
|
|
7
|
+
**主角约束**: 主角必须**清晰可见**且是镜头的**焦点**。
|
|
8
|
+
|
|
9
|
+
## 可用工具
|
|
10
|
+
|
|
11
|
+
| 工具 | 用途 |
|
|
12
|
+
| --------------------------------------- | -------------------------------------- |
|
|
13
|
+
| `video_semantic_retrieval(scene_range)` | 探索 scene 元数据,返回候选 shot 列表 |
|
|
14
|
+
| `video_shot_trimming(time_range)` | 抽帧 + VLM 分析,返回精细断点 + 可用性 |
|
|
15
|
+
| `video_review_clip(start, end)` | 检查与已 commit 片段的时间冲突 |
|
|
16
|
+
| `video_commit_clip(clips[])` | 提交(最多 3 段拼接成一个 shot) |
|
|
17
|
+
|
|
18
|
+
## 流程
|
|
19
|
+
|
|
20
|
+
1. THINK: 当前 shot 想要什么样的画面?
|
|
21
|
+
2. ACT: 调 retrieval 拿候选 → 调 trimming 收窄 → 调 review_clip 防冲突
|
|
22
|
+
3. OBSERVE: 不达标就回到 THINK
|
|
23
|
+
4. commit 一旦成功立即退出
|
|
24
|
+
|
|
25
|
+
## 提示
|
|
26
|
+
|
|
27
|
+
- 同一 shot 最多调 8 次工具,超出上限选当前最佳候选 commit
|
|
28
|
+
- 时长偏差 ±15% 内可接受
|
|
29
|
+
- review 失败时通过 `forbidden_time_ranges` 提示绕开
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
# Hook Dialogue Selection
|
|
2
|
+
|
|
3
|
+
为视频选开场对白片段。
|
|
4
|
+
|
|
5
|
+
**输入**: 角色上下文 + shot_plan + 带时间戳的字幕
|
|
6
|
+
**输出**: 严格 JSON
|
|
7
|
+
|
|
8
|
+
```json
|
|
9
|
+
{
|
|
10
|
+
"selected_lines": [
|
|
11
|
+
{ "speaker": "...", "text": "...", "start": "HH:MM:SS", "end": "HH:MM:SS" }
|
|
12
|
+
],
|
|
13
|
+
"reasoning": "<为什么选这些>"
|
|
14
|
+
}
|
|
15
|
+
```
|
|
16
|
+
|
|
17
|
+
**时长规则**: 总时长必须在 `{{HOOK_MIN}}`-`{{HOOK_MAX}}` 秒之间,超出区间无效。只输出 JSON。
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
# Protagonist Detection (VLM)
|
|
2
|
+
|
|
3
|
+
逐帧检测主角是否出现。主角名: `{{MAIN_CHARACTER_NAME}}`。
|
|
4
|
+
|
|
5
|
+
**输入**: 帧图像 + 帧索引数组
|
|
6
|
+
**输出**: 严格 JSON 数组
|
|
7
|
+
|
|
8
|
+
```json
|
|
9
|
+
[
|
|
10
|
+
{
|
|
11
|
+
"frame_idx": 0,
|
|
12
|
+
"protagonist_detected": true,
|
|
13
|
+
"bounding_box": [x1, y1, x2, y2],
|
|
14
|
+
"face_quality": "clear | partial | obscured",
|
|
15
|
+
"confidence": 0.0
|
|
16
|
+
}
|
|
17
|
+
]
|
|
18
|
+
```
|
|
19
|
+
|
|
20
|
+
bounding_box 不可见时设 null。confidence 范围 0.0-1.0。只输出 JSON。
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
# Scene Video Caption (Film)
|
|
2
|
+
|
|
3
|
+
分类叙事场景并打剪辑重要性分。
|
|
4
|
+
|
|
5
|
+
**输入**: 顺序帧序列 + 角色上下文
|
|
6
|
+
**输出**: 严格 JSON
|
|
7
|
+
|
|
8
|
+
```json
|
|
9
|
+
{
|
|
10
|
+
"scene_type": "content | studio_logo | credits | transition",
|
|
11
|
+
"importance": 0,
|
|
12
|
+
"narrative_summary": "<one sentence>"
|
|
13
|
+
}
|
|
14
|
+
```
|
|
15
|
+
|
|
16
|
+
约束: 如果**第一帧**是 logo,scene_type 必须是 `studio_logo`,禁止 `content`。importance 范围 0-5。只输出 JSON。
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
# Shot Caption
|
|
2
|
+
|
|
3
|
+
为单个镜头存档结构化视觉数据,把对白与视觉元素客观关联。
|
|
4
|
+
|
|
5
|
+
**输入**: 单镜头转录 + 帧
|
|
6
|
+
**输出**: 严格 JSON
|
|
7
|
+
|
|
8
|
+
```json
|
|
9
|
+
{
|
|
10
|
+
"spatio_temporal": {
|
|
11
|
+
"start": "HH:MM:SS",
|
|
12
|
+
"end": "HH:MM:SS",
|
|
13
|
+
"location": "..."
|
|
14
|
+
},
|
|
15
|
+
"entities": ["..."],
|
|
16
|
+
"actions": ["..."],
|
|
17
|
+
"cinematography": {
|
|
18
|
+
"shot_type": "wide | medium | close-up",
|
|
19
|
+
"movement": "static | pan | tracking"
|
|
20
|
+
},
|
|
21
|
+
"dialogue_visual_link": "<which line maps to which visual>"
|
|
22
|
+
}
|
|
23
|
+
```
|
|
24
|
+
|
|
25
|
+
只输出 JSON。
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
# Shot Plan
|
|
2
|
+
|
|
3
|
+
把音乐段映射到具体镜头,产出分镜表。
|
|
4
|
+
|
|
5
|
+
**输入**: 音乐段落 + 创作方向 + 视频区段信息
|
|
6
|
+
**输出**: 严格 JSON
|
|
7
|
+
|
|
8
|
+
```json
|
|
9
|
+
{
|
|
10
|
+
"sections": [
|
|
11
|
+
{
|
|
12
|
+
"section_idx": 0,
|
|
13
|
+
"music_segment": { "start": 0.0, "end": 12.4, "label": "intro" },
|
|
14
|
+
"shots": [
|
|
15
|
+
{
|
|
16
|
+
"shot_idx": 0,
|
|
17
|
+
"target_duration": 2.0,
|
|
18
|
+
"emotion": "tension",
|
|
19
|
+
"visual_target": "<想要的画面>",
|
|
20
|
+
"music_beat_alignment": "downbeat | offbeat"
|
|
21
|
+
}
|
|
22
|
+
]
|
|
23
|
+
}
|
|
24
|
+
]
|
|
25
|
+
}
|
|
26
|
+
```
|
|
27
|
+
|
|
28
|
+
**核心原则**: 选择基于"可见、可剪、可上屏"的画面。只输出 JSON。
|