@lightcone-ai/daemon 0.19.0 → 0.20.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -348,7 +348,7 @@ server.tool(
|
|
|
348
348
|
// audio in production runs (Tasks #20/#25/#26), forcing re-records.
|
|
349
349
|
server.tool(
|
|
350
350
|
'record_url_narration',
|
|
351
|
-
'Record a silent video of a URL by driving Chromium on an Xvfb display and capturing it with Playwright recordVideo, driven by a video plan; ffmpeg then transcodes the recording to mp4. Outputs a silent mp4 that can be passed to compose_video_v2 as a video-kind segment with an audio_path for narration.\n\nUse this as the canonical recording step for URL-narration videos. Falls back: if the page needs interactions outside the visual_action vocabulary (clicks, waits, OCR loops), use Monitor (Bash) with custom Playwright instead.\n\nMUST be preceded by plan_video_segments in the same session — feed plan_video_segments\'s `segments` array as `plan.sections` so dwell_ms aligns mechanically with TTS audio_duration_ms (hand-written dwell_ms has drifted and forced re-records in production).\n\nRuntime requirements: this tool only works on a Linux daemon machine with Xvfb + Chromium + ffmpeg installed (ffmpeg is used to transcode the recording to mp4; no x11grab device support needed). macOS / Windows daemons will fail at startup.',
|
|
351
|
+
'Record a silent video of a URL by driving Chromium on an Xvfb display and capturing it with Playwright recordVideo, driven by a video plan; ffmpeg then transcodes the recording to mp4. Outputs a silent mp4 that can be passed to compose_video_v2 as a video-kind segment with an audio_path for narration.\n\nUse this as the canonical recording step for URL-narration videos. Falls back: if the page needs interactions outside the visual_action vocabulary (clicks, waits, OCR loops), use Monitor (Bash) with custom Playwright instead.\n\nMUST be preceded by plan_video_segments in the same session — feed plan_video_segments\'s `segments` array as `plan.sections` so dwell_ms aligns mechanically with TTS audio_duration_ms (hand-written dwell_ms has drifted and forced re-records in production).\n\nMULTI-SECTION OUTPUT (recommended for any URL with ≥2 sections): pass `output_paths` as an array with one path per plan.sections entry. The tool records the URL ONCE continuously (one browser session, one scrollTop, natural scroll flow through all sections), then slices the recording at section boundaries via ffmpeg. This avoids the per-segment scroll-back-to-top reset that happens when the agent splits N sections into N separate record_url_narration calls — that pattern reopens the browser and re-navigates for each segment, which looks visually disjointed even though the per-segment timing is correct.\n\nRuntime requirements: this tool only works on a Linux daemon machine with Xvfb + Chromium + ffmpeg installed (ffmpeg is used to transcode the recording to mp4; no x11grab device support needed). macOS / Windows daemons will fail at startup.',
|
|
352
352
|
{
|
|
353
353
|
url: z.string().describe('Page URL to record'),
|
|
354
354
|
plan: z.record(z.any()).describe(
|
|
@@ -367,7 +367,8 @@ server.tool(
|
|
|
367
367
|
+ 'frag.short.recruitment_url_mode_policy). Pick a different target_y in the 标题/岗位 '
|
|
368
368
|
+ 'information area and rewrite that section.'
|
|
369
369
|
),
|
|
370
|
-
output_path: z.string().optional().describe('Workspace-relative output mp4 path. Default tmp/wx3_video/recorded-{ts}.mp4'),
|
|
370
|
+
output_path: z.string().optional().describe('Workspace-relative output mp4 path for the CONSOLIDATED master recording. Default tmp/wx3_video/recorded-{ts}.mp4. When output_paths is also provided, this still receives the full continuous recording for verification/debugging.'),
|
|
371
|
+
output_paths: z.array(z.string()).optional().describe('Multi-section output mode. Pass an array of N workspace-relative paths matching plan.sections length. The tool records ONCE continuously then slices the result into N mp4s at section boundaries (derived from phase_start / phase_end events). RECOMMENDED whenever a URL has ≥2 sections — keeps visual flow natural between sections instead of reopening the browser per segment.'),
|
|
371
372
|
events_path: z.string().optional().describe('Workspace-relative events.json path. Default ${output_path}.events.json'),
|
|
372
373
|
viewport: z.object({
|
|
373
374
|
width: z.number().optional(),
|
package/package.json
CHANGED
|
@@ -215,12 +215,128 @@ async function transcodeWebmToMp4({
|
|
|
215
215
|
});
|
|
216
216
|
}
|
|
217
217
|
|
|
218
|
+
// Frame-accurate slice of an mp4 — re-encodes to honour the exact start/end
|
|
219
|
+
// instead of snapping to the nearest keyframe (which `-c copy` would do, and
|
|
220
|
+
// can drift by several seconds with libx264's default ~250-frame GOP).
|
|
221
|
+
// Re-encoding short clips (≤30s) at preset=veryfast is fast (<1s typical),
|
|
222
|
+
// so we trade a bit of CPU for being able to align section cuts to the
|
|
223
|
+
// per-segment TTS the rest of the pipeline expects.
|
|
224
|
+
async function cutMp4Slice({
|
|
225
|
+
inputPath,
|
|
226
|
+
outputPath,
|
|
227
|
+
startMs,
|
|
228
|
+
durationMs,
|
|
229
|
+
fps = DEFAULT_FPS,
|
|
230
|
+
ffmpegBin = 'ffmpeg',
|
|
231
|
+
} = {}) {
|
|
232
|
+
const startSec = Math.max(0, Number(startMs) || 0) / 1000;
|
|
233
|
+
const durationSec = Math.max(0.05, Number(durationMs) || 0) / 1000;
|
|
234
|
+
const args = [
|
|
235
|
+
'-y',
|
|
236
|
+
'-i', inputPath,
|
|
237
|
+
'-ss', startSec.toFixed(3),
|
|
238
|
+
'-t', durationSec.toFixed(3),
|
|
239
|
+
'-an',
|
|
240
|
+
'-c:v', 'libx264',
|
|
241
|
+
'-preset', 'veryfast',
|
|
242
|
+
'-pix_fmt', 'yuv420p',
|
|
243
|
+
...(Number.isFinite(Number(fps)) && Number(fps) > 0 ? ['-r', String(fps)] : []),
|
|
244
|
+
'-movflags', '+faststart',
|
|
245
|
+
outputPath,
|
|
246
|
+
];
|
|
247
|
+
await new Promise((resolve, reject) => {
|
|
248
|
+
const proc = spawn(ffmpegBin, args, { stdio: ['ignore', 'pipe', 'pipe'] });
|
|
249
|
+
const errChunks = [];
|
|
250
|
+
proc.stderr?.on('data', (chunk) => errChunks.push(chunk));
|
|
251
|
+
proc.once('error', (err) => {
|
|
252
|
+
const wrapped = new Error(`ffmpeg_spawn_failed:${err.message}`);
|
|
253
|
+
wrapped.code = 'FFMPEG_SPAWN_FAILED';
|
|
254
|
+
reject(wrapped);
|
|
255
|
+
});
|
|
256
|
+
proc.on('close', (code) => {
|
|
257
|
+
if (code === 0) return resolve();
|
|
258
|
+
const wrapped = new Error(
|
|
259
|
+
`ffmpeg_cut_failed:code=${code}: ${Buffer.concat(errChunks).toString().slice(-2000)}`
|
|
260
|
+
);
|
|
261
|
+
wrapped.code = 'FFMPEG_CUT_FAILED';
|
|
262
|
+
reject(wrapped);
|
|
263
|
+
});
|
|
264
|
+
});
|
|
265
|
+
}
|
|
266
|
+
|
|
267
|
+
// Derive per-section cut points from eventsLog. phase_start.t_ms / phase_end.t_ms
|
|
268
|
+
// are recorded against the trimmed mp4 timeline (head trim already happened),
|
|
269
|
+
// so we can use them as-is.
|
|
270
|
+
function deriveSectionCutPoints(eventsLog, phaseCount) {
|
|
271
|
+
if (!Array.isArray(eventsLog) || eventsLog.length === 0) {
|
|
272
|
+
throw new Error('events_log_empty');
|
|
273
|
+
}
|
|
274
|
+
const starts = new Map();
|
|
275
|
+
const ends = new Map();
|
|
276
|
+
for (const ev of eventsLog) {
|
|
277
|
+
if (!ev || typeof ev !== 'object') continue;
|
|
278
|
+
const id = ev.phase_id;
|
|
279
|
+
const t = Number(ev.t_ms);
|
|
280
|
+
if (!id || !Number.isFinite(t)) continue;
|
|
281
|
+
if (ev.action === 'phase_start' && !starts.has(id)) starts.set(id, t);
|
|
282
|
+
if (ev.action === 'phase_end') ends.set(id, t);
|
|
283
|
+
}
|
|
284
|
+
// Walk phases in order to preserve plan ordering even if events arrived
|
|
285
|
+
// out-of-order (they shouldn't, but guard against it).
|
|
286
|
+
const orderedIds = [];
|
|
287
|
+
for (const ev of eventsLog) {
|
|
288
|
+
if (ev?.action === 'phase_start' && !orderedIds.includes(ev.phase_id)) {
|
|
289
|
+
orderedIds.push(ev.phase_id);
|
|
290
|
+
}
|
|
291
|
+
}
|
|
292
|
+
if (orderedIds.length !== phaseCount) {
|
|
293
|
+
throw new Error(`events_phase_count_mismatch:expected=${phaseCount}:got=${orderedIds.length}`);
|
|
294
|
+
}
|
|
295
|
+
return orderedIds.map((id) => {
|
|
296
|
+
const startMs = starts.get(id);
|
|
297
|
+
const endMs = ends.get(id);
|
|
298
|
+
if (!Number.isFinite(startMs) || !Number.isFinite(endMs)) {
|
|
299
|
+
throw new Error(`phase_timing_missing:${id}`);
|
|
300
|
+
}
|
|
301
|
+
if (endMs <= startMs) {
|
|
302
|
+
throw new Error(`phase_timing_invalid:${id}:start=${startMs}:end=${endMs}`);
|
|
303
|
+
}
|
|
304
|
+
return { phase_id: id, start_ms: startMs, end_ms: endMs, duration_ms: endMs - startMs };
|
|
305
|
+
});
|
|
306
|
+
}
|
|
307
|
+
|
|
308
|
+
function normalizeOutputPaths(rawList) {
|
|
309
|
+
if (rawList == null) return null;
|
|
310
|
+
if (!Array.isArray(rawList)) {
|
|
311
|
+
const error = new Error('output_paths_must_be_array');
|
|
312
|
+
error.code = 'OUTPUT_PATHS_MUST_BE_ARRAY';
|
|
313
|
+
throw error;
|
|
314
|
+
}
|
|
315
|
+
if (rawList.length === 0) return null;
|
|
316
|
+
return rawList.map((entry, idx) => {
|
|
317
|
+
const normalized = normalizeText(entry);
|
|
318
|
+
if (!normalized) {
|
|
319
|
+
const error = new Error(`output_paths[${idx}]_empty`);
|
|
320
|
+
error.code = 'OUTPUT_PATHS_ENTRY_EMPTY';
|
|
321
|
+
throw error;
|
|
322
|
+
}
|
|
323
|
+
return path.resolve(normalized);
|
|
324
|
+
});
|
|
325
|
+
}
|
|
326
|
+
|
|
218
327
|
export async function recordUrlNarration({
|
|
219
328
|
plan,
|
|
220
329
|
output_path,
|
|
221
330
|
outputPath = output_path,
|
|
222
331
|
events_path,
|
|
223
332
|
eventsPath = events_path,
|
|
333
|
+
// Multi-section output: pass an array of N paths matching plan.sections length
|
|
334
|
+
// to record once continuously and slice the result into N per-section mp4s.
|
|
335
|
+
// The browser stays open for the whole recording, so visuals flow naturally
|
|
336
|
+
// between sections (no scroll-back-to-top between each, no page reload). When
|
|
337
|
+
// omitted, behaves exactly like before — single mp4 at outputPath.
|
|
338
|
+
output_paths,
|
|
339
|
+
outputPaths = output_paths,
|
|
224
340
|
url,
|
|
225
341
|
viewport = DEFAULT_VIEWPORT,
|
|
226
342
|
fps = DEFAULT_FPS,
|
|
@@ -234,6 +350,7 @@ export async function recordUrlNarration({
|
|
|
234
350
|
launchChromiumFn = launchChromiumMobile,
|
|
235
351
|
openPageFn = openPageAndSettle,
|
|
236
352
|
transcodeFn = transcodeWebmToMp4,
|
|
353
|
+
cutFn = cutMp4Slice,
|
|
237
354
|
nowMs = () => Date.now(),
|
|
238
355
|
} = {}) {
|
|
239
356
|
const zoom = Number.isFinite(Number(page_zoom)) && Number(page_zoom) > 0 ? Number(page_zoom) : 1.1;
|
|
@@ -249,6 +366,23 @@ export async function recordUrlNarration({
|
|
|
249
366
|
const resolvedUrl = resolveUrl({ url, plan });
|
|
250
367
|
const normalizedViewport = normalizeViewport(viewport);
|
|
251
368
|
const normalizedFps = normalizeInteger(fps, DEFAULT_FPS);
|
|
369
|
+
const resolvedOutputPaths = normalizeOutputPaths(outputPaths);
|
|
370
|
+
// When multi-section output is requested, the count must match plan.sections
|
|
371
|
+
// 1:1 — otherwise the agent will end up with audio/visual misalignment when
|
|
372
|
+
// it feeds these into plan_video_segments. Fail loud rather than silently
|
|
373
|
+
// truncating or padding.
|
|
374
|
+
if (resolvedOutputPaths && resolvedOutputPaths.length !== phases.length) {
|
|
375
|
+
const error = new Error(
|
|
376
|
+
`output_paths_count_mismatch:expected=${phases.length}:got=${resolvedOutputPaths.length}`,
|
|
377
|
+
);
|
|
378
|
+
error.code = 'OUTPUT_PATHS_COUNT_MISMATCH';
|
|
379
|
+
throw error;
|
|
380
|
+
}
|
|
381
|
+
if (resolvedOutputPaths) {
|
|
382
|
+
for (const p of resolvedOutputPaths) {
|
|
383
|
+
mkdirSync(path.dirname(p), { recursive: true });
|
|
384
|
+
}
|
|
385
|
+
}
|
|
252
386
|
|
|
253
387
|
mkdirSync(path.dirname(resolvedOutputPath), { recursive: true });
|
|
254
388
|
mkdirSync(path.dirname(resolvedEventsPath), { recursive: true });
|
|
@@ -367,12 +501,48 @@ export async function recordUrlNarration({
|
|
|
367
501
|
? eventsLog.reduce((max, ev) => Math.max(max, Number(ev?.t_ms) || 0), 0)
|
|
368
502
|
: 0;
|
|
369
503
|
|
|
504
|
+
// Multi-section output: slice the consolidated mp4 at section boundaries
|
|
505
|
+
// (derived from phase_start / phase_end events). All slices come from the
|
|
506
|
+
// SAME continuous recording, so the visual flow between sections stays
|
|
507
|
+
// natural — no browser reload, no scroll-back-to-top per segment.
|
|
508
|
+
let sectionOutputs = null;
|
|
509
|
+
if (resolvedOutputPaths) {
|
|
510
|
+
const cutPoints = deriveSectionCutPoints(eventsLog, phases.length);
|
|
511
|
+
sectionOutputs = [];
|
|
512
|
+
for (let i = 0; i < cutPoints.length; i += 1) {
|
|
513
|
+
const cut = cutPoints[i];
|
|
514
|
+
const outPath = resolvedOutputPaths[i];
|
|
515
|
+
await cutFn({
|
|
516
|
+
inputPath: resolvedOutputPath,
|
|
517
|
+
outputPath: outPath,
|
|
518
|
+
startMs: cut.start_ms,
|
|
519
|
+
durationMs: cut.duration_ms,
|
|
520
|
+
fps: normalizedFps,
|
|
521
|
+
});
|
|
522
|
+
const sliceStat = await stat(outPath);
|
|
523
|
+
if (!sliceStat.isFile() || sliceStat.size <= 0) {
|
|
524
|
+
const error = new Error(`section_slice_empty:${outPath}`);
|
|
525
|
+
error.code = 'SECTION_SLICE_EMPTY';
|
|
526
|
+
throw error;
|
|
527
|
+
}
|
|
528
|
+
sectionOutputs.push({
|
|
529
|
+
phase_id: cut.phase_id,
|
|
530
|
+
video_path: outPath,
|
|
531
|
+
start_ms: cut.start_ms,
|
|
532
|
+
end_ms: cut.end_ms,
|
|
533
|
+
duration_ms: cut.duration_ms,
|
|
534
|
+
size_bytes: Number(sliceStat.size ?? 0),
|
|
535
|
+
});
|
|
536
|
+
}
|
|
537
|
+
}
|
|
538
|
+
|
|
370
539
|
return {
|
|
371
540
|
video_path: resolvedOutputPath,
|
|
372
541
|
events_path: resolvedEventsPath,
|
|
373
542
|
events_log: eventsLog,
|
|
374
543
|
duration_ms: lastTms > 0 ? lastTms : null,
|
|
375
544
|
display,
|
|
545
|
+
sections: sectionOutputs,
|
|
376
546
|
};
|
|
377
547
|
} catch (error) {
|
|
378
548
|
primaryError = error;
|
|
@@ -181,6 +181,21 @@ export function resolveRecordUrlNarrationPaths({
|
|
|
181
181
|
};
|
|
182
182
|
}
|
|
183
183
|
|
|
184
|
+
function resolveOutputPaths(rawList, { workspaceDir }) {
|
|
185
|
+
if (rawList == null) return null;
|
|
186
|
+
if (!Array.isArray(rawList)) {
|
|
187
|
+
throw new Error('output_paths must be an array of file paths (one per section).');
|
|
188
|
+
}
|
|
189
|
+
if (rawList.length === 0) return null;
|
|
190
|
+
return rawList.map((entry, idx) => {
|
|
191
|
+
const normalized = normalizeText(entry);
|
|
192
|
+
if (!normalized) {
|
|
193
|
+
throw new Error(`output_paths[${idx}] is empty — every entry must be a non-empty path.`);
|
|
194
|
+
}
|
|
195
|
+
return path.resolve(workspaceDir, normalized);
|
|
196
|
+
});
|
|
197
|
+
}
|
|
198
|
+
|
|
184
199
|
export async function runRecordUrlNarrationTool({
|
|
185
200
|
args = {},
|
|
186
201
|
currentWorkspaceId = '',
|
|
@@ -252,23 +267,66 @@ export async function runRecordUrlNarrationTool({
|
|
|
252
267
|
mkdirSync(path.dirname(resolvedOutputPath), { recursive: true });
|
|
253
268
|
mkdirSync(path.dirname(resolvedEventsPath), { recursive: true });
|
|
254
269
|
|
|
270
|
+
// Multi-section mode: caller passed output_paths. Validate it 1:1 with
|
|
271
|
+
// plan.sections so the recorder can slice the continuous recording into
|
|
272
|
+
// per-section mp4s without ambiguity.
|
|
273
|
+
let resolvedOutputPaths = null;
|
|
274
|
+
try {
|
|
275
|
+
resolvedOutputPaths = resolveOutputPaths(validatedInput.output_paths, { workspaceDir });
|
|
276
|
+
} catch (error) {
|
|
277
|
+
return toolError(`Error: ${error.message}`);
|
|
278
|
+
}
|
|
279
|
+
if (resolvedOutputPaths) {
|
|
280
|
+
const planSectionCount = (planSegments(validatedInput.plan) ?? []).length;
|
|
281
|
+
if (resolvedOutputPaths.length !== planSectionCount) {
|
|
282
|
+
return toolError(
|
|
283
|
+
`Error: output_paths length (${resolvedOutputPaths.length}) must match `
|
|
284
|
+
+ `plan.sections length (${planSectionCount}). Each section produces exactly one mp4 — `
|
|
285
|
+
+ `don't pad or truncate.`,
|
|
286
|
+
);
|
|
287
|
+
}
|
|
288
|
+
}
|
|
289
|
+
|
|
255
290
|
const recorderOutput = await recordUrlNarrationFn({
|
|
256
291
|
url: validatedInput.url,
|
|
257
292
|
plan: validatedInput.plan,
|
|
258
293
|
output_path: resolvedOutputPath,
|
|
259
294
|
events_path: resolvedEventsPath,
|
|
295
|
+
output_paths: resolvedOutputPaths,
|
|
260
296
|
viewport: validatedInput.viewport,
|
|
261
297
|
fps: validatedInput.fps,
|
|
262
298
|
settle_ms: validatedInput.settle_ms,
|
|
263
299
|
});
|
|
264
300
|
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
301
|
+
// Single-output mode (legacy): same one-line summary as before.
|
|
302
|
+
if (!resolvedOutputPaths) {
|
|
303
|
+
return toolText(
|
|
304
|
+
`Recorded URL narration.\n`
|
|
305
|
+
+ `video_path=${resolvedOutputPath}\n`
|
|
306
|
+
+ `events_path=${resolvedEventsPath}\n`
|
|
307
|
+
+ `duration_ms=${deriveDurationMs(recorderOutput) ?? 'unknown'}\n`
|
|
308
|
+
+ `phases=${derivePhaseCount({ plan: validatedInput.plan, recorderOutput }) ?? 'n/a'}`,
|
|
309
|
+
);
|
|
310
|
+
}
|
|
311
|
+
|
|
312
|
+
// Multi-section mode: one section block per output mp4, plus the
|
|
313
|
+
// consolidated master mp4 path for debugging / verification.
|
|
314
|
+
const sections = Array.isArray(recorderOutput?.sections) ? recorderOutput.sections : [];
|
|
315
|
+
const lines = [
|
|
316
|
+
'Recorded URL narration (multi-section).',
|
|
317
|
+
`master_video_path=${resolvedOutputPath}`,
|
|
318
|
+
`events_path=${resolvedEventsPath}`,
|
|
319
|
+
`total_duration_ms=${deriveDurationMs(recorderOutput) ?? 'unknown'}`,
|
|
320
|
+
`sections=${sections.length}`,
|
|
321
|
+
];
|
|
322
|
+
sections.forEach((s, idx) => {
|
|
323
|
+
lines.push(`--- section ${idx} (${s.phase_id}) ---`);
|
|
324
|
+
lines.push(`video_path=${s.video_path}`);
|
|
325
|
+
lines.push(`start_ms=${s.start_ms}`);
|
|
326
|
+
lines.push(`duration_ms=${s.duration_ms}`);
|
|
327
|
+
lines.push(`size_bytes=${s.size_bytes ?? 'unknown'}`);
|
|
328
|
+
});
|
|
329
|
+
return toolText(lines.join('\n'));
|
|
272
330
|
} catch (error) {
|
|
273
331
|
return toolError(`Error: ${error.message}`);
|
|
274
332
|
}
|