@lightcone-ai/daemon 0.23.6 → 0.23.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/mcp-servers/official/media-tools/index.js +45 -33
- package/mcp-servers/official/media-tools/lib/render.js +5 -4
- package/package.json +3 -2
- package/src/_vendor/video/recorder/atoms.js +12 -17
- package/src/_vendor/video/recorder/chromium-driver.js +1 -5
- package/src/_vendor/video/recorder/index.js +15 -130
- package/src/_vendor/video/recorder/plan-executor.js +121 -10
- package/src/cli.js +293 -0
- package/src/doctor.js +52 -0
- package/src/index.js +36 -2
- package/src/local-api.js +106 -0
- package/src/local-config.js +116 -0
- package/src/tools/plan-video-segments.js +35 -2
- package/src/tools/record-url-narration.js +1 -1
- package/src/_vendor/video/recorder/display-pool.js +0 -126
- package/src/_vendor/video/recorder/ffmpeg-runner.js +0 -291
|
@@ -92,7 +92,19 @@ server.tool(
|
|
|
92
92
|
+ 'Takes any video produced by compose_video_v2 / record_url_narration / etc. and adds '
|
|
93
93
|
+ 'one or more on-screen title cards with animation presets (fade+zoom pop, per-character karaoke fill). '
|
|
94
94
|
+ 'The default narration subtitle burned by compose_video_v2 stays at the bottom; titles default to the top band so they do not collide. '
|
|
95
|
-
+ 'Output is a new mp4; original is not modified
|
|
95
|
+
+ 'Output is a new mp4; original is not modified.\n\n'
|
|
96
|
+
+ 'STANDARD opening 引导语 title (URL recruitment short videos — do this by default): the video '
|
|
97
|
+
+ 'opens with a 引导语 lead-in section, and that line is rendered HERE as an eye-catching centered '
|
|
98
|
+
+ 'title card — NOT as a plain bottom subtitle. Recipe:\n'
|
|
99
|
+
+ ' - preset: "karaoke_punch" (per-character fill reads as a real effect; "fade_zoom" is too plain for an opener)\n'
|
|
100
|
+
+ ' - position: "center"\n'
|
|
101
|
+
+ ' - style: { font_size 110-120 (large), color a vivid "#FFE000"-style, outline_color "#000000" }\n'
|
|
102
|
+
+ ' - start_ms: 0, end_ms: the opening section duration — the card clears exactly when the first content section begins.\n'
|
|
103
|
+
+ 'Long titles auto-wrap; you may also place an explicit "\\n" for a clean 2-line break.\n'
|
|
104
|
+
+ 'The opening section itself must be a FRAMELESS lead-in — its operations use raw `y` (no `block`, '
|
|
105
|
+
+ 'so the recorder draws no spotlight) and it carries NO bottom subtitle (subtitle_text empty); the '
|
|
106
|
+
+ '引导语 appears only as this centered card.\n\n'
|
|
107
|
+
+ 'Skip this tool when a plain video with no title cards is desired.',
|
|
96
108
|
{
|
|
97
109
|
input_path: z.string().min(1).describe('Absolute path to the source mp4 (e.g. the output of compose_video_v2).'),
|
|
98
110
|
output_path: z.string().optional().describe('Optional absolute output path. If omitted, writes to a tmp path and returns it.'),
|
|
@@ -258,7 +270,8 @@ server.tool(
|
|
|
258
270
|
operations: z.array(z.object({
|
|
259
271
|
atom: z.enum(['scroll_to', 'hold', 'cursor_focus']),
|
|
260
272
|
duration_ms: z.union([z.number(), z.literal('fill')]).describe('Atom duration in ms. "fill" allowed only on the LAST hold to auto-fill remaining audio time.'),
|
|
261
|
-
|
|
273
|
+
block: z.string().optional().describe('scroll_to: id of a page_understanding block to frame. The recorder centers it in the viewport. Use this for content sections — do NOT write pixel y.'),
|
|
274
|
+
y: z.number().optional().describe('scroll_to: raw scrollTop. Only for a content-agnostic opening drift — for content blocks use `block` instead.'),
|
|
262
275
|
x: z.number().optional(),
|
|
263
276
|
curve: z.enum(['easeInOutQuad', 'linear', 'easeOutQuad']).optional(),
|
|
264
277
|
mode: z.enum(['auto', 'touch', 'programmatic']).optional(),
|
|
@@ -266,32 +279,25 @@ server.tool(
|
|
|
266
279
|
})).optional().describe(
|
|
267
280
|
'For visual_kind=video URL recording sections: ordered atom sequence. Sum of duration_ms '
|
|
268
281
|
+ 'must equal audio_duration_ms (±200ms); use "fill" on the last hold to auto-balance.\n\n'
|
|
269
|
-
+ '
|
|
270
|
-
+ '
|
|
271
|
-
+ '
|
|
272
|
-
+ '
|
|
273
|
-
+ '
|
|
274
|
-
+ '
|
|
275
|
-
+ '
|
|
276
|
-
+
|
|
277
|
-
+ '
|
|
278
|
-
+ '
|
|
279
|
-
+ '
|
|
282
|
+
+ 'Every content section is exactly: scroll_to{block} → hold. scroll_to FRAMES A CONTENT '
|
|
283
|
+
+ 'BLOCK — pass `block: "<id>"` (a block id from page_understanding.blocks) and the recorder '
|
|
284
|
+
+ 'CENTERS that block in the viewport. Do NOT write pixel `y` for content blocks; raw `y` is '
|
|
285
|
+
+ 'only for a content-agnostic opening drift.\n'
|
|
286
|
+
+ ' • One section narrates ONE block — every scroll_to in a section references the SAME '
|
|
287
|
+
+ 'block id (2+ distinct block ids → REJECTED: section_spans_multiple_blocks).\n'
|
|
288
|
+
+ ' • scroll_to is a short TRANSITION between blocks (~500-800ms). hold is where the '
|
|
289
|
+
+ 'NARRATION happens and the picture is STILL — long holds (2-5s) are the norm.\n'
|
|
290
|
+
+ ' • A block taller than the viewport just shows its centered slice, HELD STILL. Do NOT '
|
|
291
|
+
+ 'pan / slow-scroll through it — the picture must not move while you narrate; partial '
|
|
292
|
+
+ 'visibility of a tall block is accepted.\n'
|
|
293
|
+
+ ' • Every non-opening segment MUST start with a scroll_to (REJECTED otherwise: transition_required).\n\n'
|
|
294
|
+
+ 'GOOD — a 5s segment narrating block b2:\n'
|
|
280
295
|
+ ' [\n'
|
|
281
|
-
+ ' { atom: "scroll_to",
|
|
282
|
-
+ ' { atom: "hold",
|
|
296
|
+
+ ' { atom: "scroll_to", block: "b2", duration_ms: 700 }, // 0.7s transition, recorder centers b2\n'
|
|
297
|
+
+ ' { atom: "hold", duration_ms: "fill" }, // ~4.3s: narrate b2, picture still\n'
|
|
283
298
|
+ ' ]\n\n'
|
|
284
|
-
+ '
|
|
285
|
-
+ '
|
|
286
|
-
+ ' { atom: "scroll_to", y: 980, duration_ms: 700 }, // transition to first block\n'
|
|
287
|
-
+ ' { atom: "hold", duration_ms: 4000 }, // narrate this block (~"金融产品应用开发岗 …")\n'
|
|
288
|
-
+ ' { atom: "scroll_to", y: 1450, duration_ms: 600 }, // short transition to next block\n'
|
|
289
|
-
+ ' { atom: "hold", duration_ms: "fill" }, // narrate next block (~3.7s)\n'
|
|
290
|
-
+ ' ]\n\n'
|
|
291
|
-
+ 'BAD example (REJECTED by transition_required):\n'
|
|
292
|
-
+ ' [\n'
|
|
293
|
-
+ ' { atom: "hold", duration_ms: 5000 }, // segment starts with hold ← rejected\n'
|
|
294
|
-
+ ' ]',
|
|
299
|
+
+ 'BAD (REJECTED): a segment starting with hold (transition_required); a segment whose '
|
|
300
|
+
+ 'scroll_to ops reference two different blocks (section_spans_multiple_blocks).',
|
|
295
301
|
),
|
|
296
302
|
})).describe('Segments to plan. audio_path is required for each. V5 fields (action, target_y, target_y_content_label, focus_region, transition_ms, dwell_ms, phase.beats[]) are rejected.'),
|
|
297
303
|
},
|
|
@@ -373,22 +379,27 @@ server.tool(
|
|
|
373
379
|
);
|
|
374
380
|
|
|
375
381
|
// ── record_url_narration (migrated from chat-bridge) ──────────────────────
|
|
376
|
-
// Records a silent mp4 of a URL via Chromium+
|
|
382
|
+
// Records a silent mp4 of a URL via headless Chromium + Playwright recordVideo,
|
|
377
383
|
// driven by a beat-by-beat plan. Hard-block: requires plan_video_segments to
|
|
378
384
|
// have run in this session — hand-written dwell_ms has drifted from TTS
|
|
379
385
|
// audio in production runs (Tasks #20/#25/#26), forcing re-records.
|
|
380
386
|
server.tool(
|
|
381
387
|
'record_url_narration',
|
|
382
|
-
'V6 record_url_narration. Drives Chromium
|
|
388
|
+
'V6 record_url_narration. Drives headless Chromium + Playwright recordVideo to capture a silent mp4 per section, then ffmpeg-slices into output_paths. Each mp4 passes to compose_video_v2 as a video-kind segment.\n\n'
|
|
383
389
|
+ 'REQUIRES page_understanding (from analyze_page) — used for safe-region check (scroll_to.y / cursor_focus.y rejected if in unsafe_regions) and preheat alignment (same full-scroll-then-top pre-roll as analyze_page).\n\n'
|
|
384
|
-
+ 'plan.sections[*].operations[] is the visual beat —
|
|
385
|
-
+ ' - scroll_to: { y, duration_ms, curve?, mode
|
|
390
|
+
+ 'plan.sections[*].operations[] is the visual beat — a content section is scroll_to{block} → hold:\n'
|
|
391
|
+
+ ' - scroll_to: { block | y, duration_ms, curve?, mode? } — pass `block` (a page_understanding '
|
|
392
|
+
+ 'block id) and the recorder CENTERS that block in the viewport, then the section holds STILL '
|
|
393
|
+
+ 'on it. A block taller than the viewport shows its centered slice held still (no pan). Raw '
|
|
394
|
+
+ '`y` is only for a content-agnostic opening drift.\n'
|
|
386
395
|
+ ' - hold: { duration_ms } — duration_ms="fill" allowed on the LAST hold to auto-balance with audio_duration_ms\n'
|
|
387
396
|
+ ' - cursor_focus: { x, y, duration_ms }\n\n'
|
|
397
|
+
+ 'The recorder automatically draws a spotlight highlight (bordered frame + dimmed surround) around '
|
|
398
|
+
+ "each section's block once its scroll lands — automatic, no plan field controls it.\n\n"
|
|
388
399
|
+ 'V5 fields are rejected: action / target_y / target_y_content_label / focus_region / transition_ms / dwell_ms (set by plan_video_segments only) / phase.beats[].\n\n'
|
|
389
400
|
+ 'Standard chain: analyze_page → synthesize_tts × N → plan_video_segments → record_url_narration + compose_video_v2.\n\n'
|
|
390
401
|
+ 'ALWAYS pass output_paths as an array with one mp4 path per plan.sections entry (single-section is a 1-element array). The tool records the URL ONCE continuously (one browser session, natural scroll flow across all sections), then ffmpeg-slices at section boundaries. One URL = one call.\n\n'
|
|
391
|
-
+ 'Runtime:
|
|
402
|
+
+ 'Runtime: daemon with Chromium + ffmpeg.',
|
|
392
403
|
{
|
|
393
404
|
url: z.string().describe('Page URL to record (must match the URL passed to analyze_page that produced page_understanding).'),
|
|
394
405
|
page_understanding: z.record(z.any()).describe('Output of analyze_page for this URL. Required. Provides full_height_px / viewport / preheat_strategy / unsafe_regions[] for safety validation, and blocks[] / narrative_arc as informational metadata (the recorder itself only needs the safety bits).'),
|
|
@@ -401,12 +412,13 @@ server.tool(
|
|
|
401
412
|
operations: z.array(z.object({
|
|
402
413
|
atom: z.enum(['scroll_to', 'hold', 'cursor_focus']),
|
|
403
414
|
duration_ms: z.number().describe('Atom duration in ms. (plan_video_segments may have expanded a "fill" value already.)'),
|
|
404
|
-
|
|
415
|
+
block: z.string().optional().describe('scroll_to: page_understanding block id to frame. Recorder centers it and the section holds still. Use for content sections instead of pixel y.'),
|
|
416
|
+
y: z.number().optional().describe('scroll_to: raw scrollTop — only for a content-agnostic opening drift.'),
|
|
405
417
|
x: z.number().optional(),
|
|
406
418
|
curve: z.enum(['easeInOutQuad', 'linear', 'easeOutQuad']).optional(),
|
|
407
419
|
mode: z.enum(['auto', 'touch', 'programmatic']).optional(),
|
|
408
420
|
jitter_px: z.number().optional(),
|
|
409
|
-
})).min(1).describe('Ordered atom sequence executed during this section.'),
|
|
421
|
+
})).min(1).describe('Ordered atom sequence executed during this section. Pass the plan_video_segments output verbatim.'),
|
|
410
422
|
})).min(1),
|
|
411
423
|
}).describe('plan.sections[] — each section has text/audio_path/dwell_ms (filled by plan_video_segments) and operations[].'),
|
|
412
424
|
output_paths: z.array(z.string()).min(1).describe('REQUIRED. Workspace-relative mp4 paths, one per plan.sections entry. The tool records ONCE continuously and slices at section boundaries (phase_start / phase_end events).'),
|
|
@@ -83,14 +83,15 @@ function buildAssContent({ playResX, playResY, overlays }) {
|
|
|
83
83
|
'ScriptType: v4.00+',
|
|
84
84
|
`PlayResX: ${playResX}`,
|
|
85
85
|
`PlayResY: ${playResY}`,
|
|
86
|
-
'WrapStyle:
|
|
86
|
+
'WrapStyle: 0',
|
|
87
87
|
'',
|
|
88
88
|
'[V4+ Styles]',
|
|
89
89
|
'Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding',
|
|
90
90
|
// PrimaryColour white, SecondaryColour orange (for karaoke fill), OutlineColour black,
|
|
91
|
-
// Bold on, Outline
|
|
92
|
-
// override per-line via \an.
|
|
93
|
-
|
|
91
|
+
// Bold on, Outline 6px (thick — punchy contrast over busy page backgrounds),
|
|
92
|
+
// Shadow 2px, default Alignment middle-center (5) — events override per-line via \an.
|
|
93
|
+
// WrapStyle 0 (above) auto-wraps long titles instead of clipping them.
|
|
94
|
+
`Style: Title,${DEFAULT_FONT},96,&H00FFFFFF,&H000066FF,&H00000000,&H80000000,-1,0,0,0,100,100,0,0,1,6,2,5,30,30,0,1`,
|
|
94
95
|
'',
|
|
95
96
|
'[Events]',
|
|
96
97
|
'Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text',
|
package/package.json
CHANGED
|
@@ -1,10 +1,11 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@lightcone-ai/daemon",
|
|
3
|
-
"version": "0.23.
|
|
3
|
+
"version": "0.23.8",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"main": "src/index.js",
|
|
6
6
|
"bin": {
|
|
7
|
-
"lightcone-daemon": "src/index.js"
|
|
7
|
+
"lightcone-daemon": "src/index.js",
|
|
8
|
+
"lightcone": "src/cli.js"
|
|
8
9
|
},
|
|
9
10
|
"files": [
|
|
10
11
|
"src",
|
|
@@ -24,23 +24,18 @@ async function readScrollY(page) {
|
|
|
24
24
|
// ── atomScrollTo ─────────────────────────────────────────────────────────────
|
|
25
25
|
// Animated scroll from current position to target_y over duration_ms.
|
|
26
26
|
//
|
|
27
|
-
// Mode selection
|
|
28
|
-
// - 'programmatic'
|
|
29
|
-
//
|
|
30
|
-
//
|
|
31
|
-
// - 'touch'
|
|
32
|
-
//
|
|
33
|
-
//
|
|
34
|
-
//
|
|
35
|
-
//
|
|
36
|
-
//
|
|
37
|
-
//
|
|
38
|
-
//
|
|
39
|
-
//
|
|
40
|
-
// Discovered the hard way: scroll_to_dwell macro with ~18% transition
|
|
41
|
-
// for 1100+ px distances dispatched 5 CDP swipes in ~1s; each touchEnd
|
|
42
|
-
// kicked off a fling that the next touchStart immediately cancelled, so
|
|
43
|
-
// the cumulative scroll never reached target.
|
|
27
|
+
// Mode selection:
|
|
28
|
+
// - 'programmatic': RAF-driven easing loop inside page.evaluate via
|
|
29
|
+
// root.scrollTo. Every frame moves, vertical only, lands EXACTLY at
|
|
30
|
+
// target_y. This is what a smooth between-blocks transition needs.
|
|
31
|
+
// - 'touch': humanizedScroll → CDP touch. Real gesture physics (rubber-
|
|
32
|
+
// band, inertia) but splits scroll > 260px into multiple swipes with
|
|
33
|
+
// fling-cancel-fling boundaries and ±18-26px horizontal nudge — looks
|
|
34
|
+
// like a shaky multi-tap drag, not a clean slide.
|
|
35
|
+
// - 'auto' (default): resolves to 'programmatic'. Touch's gesture physics
|
|
36
|
+
// lost out to a clean slide for narration video; it stays reachable
|
|
37
|
+
// only via explicit `mode: 'touch'`. See the resolvedMode block below
|
|
38
|
+
// for the full rationale.
|
|
44
39
|
//
|
|
45
40
|
// Params:
|
|
46
41
|
// target_y — absolute Y in page coordinates (required)
|
|
@@ -30,7 +30,6 @@ function normalizeUrl(value) {
|
|
|
30
30
|
}
|
|
31
31
|
|
|
32
32
|
export async function launchChromiumMobile({
|
|
33
|
-
display,
|
|
34
33
|
viewport = DEFAULT_VIEWPORT,
|
|
35
34
|
userAgent = IOS_UA,
|
|
36
35
|
deviceScaleFactor = 1,
|
|
@@ -70,10 +69,7 @@ export async function launchChromiumMobile({
|
|
|
70
69
|
headless,
|
|
71
70
|
channel,
|
|
72
71
|
args: launchArgs,
|
|
73
|
-
env:
|
|
74
|
-
...process.env,
|
|
75
|
-
DISPLAY: normalizeText(display) || process.env.DISPLAY,
|
|
76
|
-
},
|
|
72
|
+
env: process.env,
|
|
77
73
|
...launchOptions,
|
|
78
74
|
});
|
|
79
75
|
|
|
@@ -5,8 +5,6 @@ import os from 'node:os';
|
|
|
5
5
|
import path from 'node:path';
|
|
6
6
|
|
|
7
7
|
import { launchChromiumMobile, openPageAndSettle } from './chromium-driver.js';
|
|
8
|
-
import { defaultDisplayPool } from './display-pool.js';
|
|
9
|
-
import { createUnexpectedExitWatcher, waitForProcessExit } from './ffmpeg-runner.js';
|
|
10
8
|
import { executePlanPhases, normalizePlanSections } from './plan-executor.js';
|
|
11
9
|
|
|
12
10
|
const DEFAULT_VIEWPORT = Object.freeze({ width: 1080, height: 1920 });
|
|
@@ -60,91 +58,6 @@ function resolveUrl({ url, plan }) {
|
|
|
60
58
|
throw error;
|
|
61
59
|
}
|
|
62
60
|
|
|
63
|
-
function createXvfbExitError({ code, signal, stderr }) {
|
|
64
|
-
const error = new Error(`xvfb_exited_unexpectedly:code=${code ?? 'null'}:signal=${signal ?? 'none'}`);
|
|
65
|
-
error.code = 'XVFB_EXITED_UNEXPECTEDLY';
|
|
66
|
-
error.exitCode = code;
|
|
67
|
-
error.signal = signal;
|
|
68
|
-
error.stderr = stderr;
|
|
69
|
-
return error;
|
|
70
|
-
}
|
|
71
|
-
|
|
72
|
-
async function stopXvfb(runner, {
|
|
73
|
-
signal = 'SIGTERM',
|
|
74
|
-
timeoutMs = 5000,
|
|
75
|
-
killTimeoutMs = 2000,
|
|
76
|
-
} = {}) {
|
|
77
|
-
const child = runner?.child;
|
|
78
|
-
if (!child || child.exitCode !== null) return child?.exitCode ?? 0;
|
|
79
|
-
|
|
80
|
-
child.kill(signal);
|
|
81
|
-
const firstExit = await waitForProcessExit(child, timeoutMs);
|
|
82
|
-
if (!firstExit.timedOut) return firstExit.code;
|
|
83
|
-
|
|
84
|
-
child.kill('SIGKILL');
|
|
85
|
-
const forceExit = await waitForProcessExit(child, killTimeoutMs);
|
|
86
|
-
return forceExit.code;
|
|
87
|
-
}
|
|
88
|
-
|
|
89
|
-
async function startXvfb({
|
|
90
|
-
display,
|
|
91
|
-
width,
|
|
92
|
-
height,
|
|
93
|
-
colorDepth = 24,
|
|
94
|
-
startupProbeMs = 1200,
|
|
95
|
-
xvfbBin = 'Xvfb',
|
|
96
|
-
} = {}) {
|
|
97
|
-
const args = [
|
|
98
|
-
display,
|
|
99
|
-
'-screen',
|
|
100
|
-
'0',
|
|
101
|
-
`${width}x${height}x${colorDepth}`,
|
|
102
|
-
'-ac',
|
|
103
|
-
'+extension',
|
|
104
|
-
'RANDR',
|
|
105
|
-
];
|
|
106
|
-
|
|
107
|
-
let stderr = '';
|
|
108
|
-
let spawnError = null;
|
|
109
|
-
const child = spawn(xvfbBin, args, {
|
|
110
|
-
stdio: ['ignore', 'pipe', 'pipe'],
|
|
111
|
-
});
|
|
112
|
-
|
|
113
|
-
child.stderr?.on('data', (chunk) => {
|
|
114
|
-
const next = `${stderr}${String(chunk)}`;
|
|
115
|
-
stderr = next.length > 8000 ? next.slice(next.length - 8000) : next;
|
|
116
|
-
});
|
|
117
|
-
child.once('error', (error) => {
|
|
118
|
-
spawnError = error;
|
|
119
|
-
});
|
|
120
|
-
|
|
121
|
-
await new Promise(resolve => setTimeout(resolve, Math.max(0, Number(startupProbeMs) || 0)));
|
|
122
|
-
|
|
123
|
-
if (spawnError) {
|
|
124
|
-
const error = new Error(`xvfb_spawn_failed:${spawnError.message}`);
|
|
125
|
-
error.code = 'XVFB_SPAWN_FAILED';
|
|
126
|
-
throw error;
|
|
127
|
-
}
|
|
128
|
-
|
|
129
|
-
if (child.exitCode !== null) {
|
|
130
|
-
throw createXvfbExitError({
|
|
131
|
-
code: child.exitCode,
|
|
132
|
-
signal: child.signalCode,
|
|
133
|
-
stderr,
|
|
134
|
-
});
|
|
135
|
-
}
|
|
136
|
-
|
|
137
|
-
const runner = {
|
|
138
|
-
child,
|
|
139
|
-
display,
|
|
140
|
-
args,
|
|
141
|
-
getStderr: () => stderr,
|
|
142
|
-
stop: (options) => stopXvfb(runner, options),
|
|
143
|
-
};
|
|
144
|
-
|
|
145
|
-
return runner;
|
|
146
|
-
}
|
|
147
|
-
|
|
148
61
|
async function scrollToTop(page) {
|
|
149
62
|
await page.evaluate(() => {
|
|
150
63
|
const root = document.scrollingElement || document.documentElement;
|
|
@@ -363,9 +276,6 @@ export async function recordUrlNarration({
|
|
|
363
276
|
viewport = DEFAULT_VIEWPORT,
|
|
364
277
|
fps = DEFAULT_FPS,
|
|
365
278
|
settle_ms = 4000,
|
|
366
|
-
displayPool = defaultDisplayPool,
|
|
367
|
-
startupProbeMs = 1200,
|
|
368
|
-
xvfbStopTimeoutMs = 5000,
|
|
369
279
|
postPlanTailMs = 600,
|
|
370
280
|
recordingDir = null,
|
|
371
281
|
launchChromiumFn = launchChromiumMobile,
|
|
@@ -423,33 +333,16 @@ export async function recordUrlNarration({
|
|
|
423
333
|
const ownTempDir = !recordingDir;
|
|
424
334
|
const recVideoDir = recordingDir || await mkdtemp(path.join(os.tmpdir(), 'lc-recvid-'));
|
|
425
335
|
|
|
426
|
-
let displayLease;
|
|
427
|
-
let xvfb;
|
|
428
|
-
let xvfbWatcher;
|
|
429
336
|
let browserSession = null;
|
|
430
337
|
let primaryError = null;
|
|
431
338
|
const cleanupErrors = [];
|
|
432
339
|
|
|
433
340
|
try {
|
|
434
|
-
displayLease = await displayPool.acquireDisplay();
|
|
435
|
-
const display = displayLease.display;
|
|
436
|
-
|
|
437
|
-
xvfb = await startXvfb({
|
|
438
|
-
display,
|
|
439
|
-
width: normalizedViewport.width,
|
|
440
|
-
height: normalizedViewport.height,
|
|
441
|
-
startupProbeMs,
|
|
442
|
-
});
|
|
443
|
-
xvfbWatcher = createUnexpectedExitWatcher(xvfb.child, 'xvfb');
|
|
444
|
-
|
|
445
341
|
// The page recording captures the page viewport only (no browser chrome),
|
|
446
|
-
// regardless of the on-screen window.
|
|
447
|
-
// created, so the webm includes goto + settle; we measure that head and trim
|
|
448
|
-
// it off in transcodeFn.
|
|
449
|
-
const recordStartedAt = nowMs();
|
|
342
|
+
// regardless of the on-screen window.
|
|
450
343
|
browserSession = await launchChromiumFn({
|
|
451
|
-
display,
|
|
452
344
|
viewport: normalizedViewport,
|
|
345
|
+
headless: true,
|
|
453
346
|
contextOptions: {
|
|
454
347
|
recordVideo: {
|
|
455
348
|
dir: recVideoDir,
|
|
@@ -457,6 +350,14 @@ export async function recordUrlNarration({
|
|
|
457
350
|
},
|
|
458
351
|
},
|
|
459
352
|
});
|
|
353
|
+
// recordVideo's webm timeline starts at t=0 when the page is created —
|
|
354
|
+
// which happens INSIDE launchChromiumFn. Capture the head-trim reference
|
|
355
|
+
// here, right after it returns, NOT before the launch: the webm has no
|
|
356
|
+
// frames for the browser-launch interval, so measuring from before launch
|
|
357
|
+
// would make headTrimMs overshoot by the launch duration and ffmpeg's
|
|
358
|
+
// `-ss headTrimMs` would clip the opening of the first plan section,
|
|
359
|
+
// shifting every section's visuals late against its narration audio.
|
|
360
|
+
const recordStartedAt = nowMs();
|
|
460
361
|
const videoHandle = typeof browserSession.page.video === 'function'
|
|
461
362
|
? browserSession.page.video()
|
|
462
363
|
: null;
|
|
@@ -480,15 +381,14 @@ export async function recordUrlNarration({
|
|
|
480
381
|
|
|
481
382
|
const headTrimMs = Math.max(0, nowMs() - recordStartedAt);
|
|
482
383
|
|
|
483
|
-
const eventsLog = await
|
|
484
|
-
|
|
485
|
-
|
|
486
|
-
|
|
384
|
+
const eventsLog = await executePlanPhases(browserSession.page, executablePlan, {
|
|
385
|
+
pageUnderstanding,
|
|
386
|
+
viewportHeight: normalizedViewport.height,
|
|
387
|
+
viewportWidth: normalizedViewport.width,
|
|
388
|
+
});
|
|
487
389
|
|
|
488
390
|
await browserSession.page.waitForTimeout(Math.max(0, Number(postPlanTailMs) || 0));
|
|
489
391
|
|
|
490
|
-
xvfbWatcher.deactivate();
|
|
491
|
-
|
|
492
392
|
// Flush the recording: video is written when the context closes.
|
|
493
393
|
let webmPath = null;
|
|
494
394
|
try {
|
|
@@ -573,15 +473,12 @@ export async function recordUrlNarration({
|
|
|
573
473
|
events_path: resolvedEventsPath,
|
|
574
474
|
events_log: eventsLog,
|
|
575
475
|
duration_ms: lastTms > 0 ? lastTms : null,
|
|
576
|
-
display,
|
|
577
476
|
sections: sectionOutputs,
|
|
578
477
|
};
|
|
579
478
|
} catch (error) {
|
|
580
479
|
primaryError = error;
|
|
581
480
|
throw error;
|
|
582
481
|
} finally {
|
|
583
|
-
xvfbWatcher?.deactivate();
|
|
584
|
-
|
|
585
482
|
if (browserSession) {
|
|
586
483
|
try {
|
|
587
484
|
await browserSession.browser.close();
|
|
@@ -590,18 +487,6 @@ export async function recordUrlNarration({
|
|
|
590
487
|
}
|
|
591
488
|
}
|
|
592
489
|
|
|
593
|
-
if (xvfb) {
|
|
594
|
-
try {
|
|
595
|
-
await stopXvfb(xvfb, { timeoutMs: xvfbStopTimeoutMs });
|
|
596
|
-
} catch (stopError) {
|
|
597
|
-
cleanupErrors.push(`xvfb_stop_failed:${stopError.message}`);
|
|
598
|
-
}
|
|
599
|
-
}
|
|
600
|
-
|
|
601
|
-
if (displayLease) {
|
|
602
|
-
displayLease.release();
|
|
603
|
-
}
|
|
604
|
-
|
|
605
490
|
if (ownTempDir) {
|
|
606
491
|
await rm(recVideoDir, { recursive: true, force: true }).catch(() => {});
|
|
607
492
|
}
|
|
@@ -14,8 +14,9 @@
|
|
|
14
14
|
// or unsafe coordinates throw — no silent skipping.
|
|
15
15
|
|
|
16
16
|
import { ATOMS, ATOM_NAMES } from './atoms.js';
|
|
17
|
-
import { findOverlappingUnsafeRegion } from '../understanding/schema.js';
|
|
17
|
+
import { findBlockById, findOverlappingUnsafeRegion } from '../understanding/schema.js';
|
|
18
18
|
import { getCdpSession } from '../cdp-touch.js';
|
|
19
|
+
import { setSpotlight, clearSpotlight } from './spotlight.js';
|
|
19
20
|
|
|
20
21
|
const V5_FIELDS_ON_SECTION = Object.freeze([
|
|
21
22
|
'action',
|
|
@@ -93,7 +94,70 @@ function assertYNotInUnsafeRegion(y, { unsafeRegions, atomName, sectionId, opera
|
|
|
93
94
|
}
|
|
94
95
|
}
|
|
95
96
|
|
|
96
|
-
|
|
97
|
+
// Resolve a scroll_to operation's target scrollTop. Two mutually-exclusive forms:
|
|
98
|
+
//
|
|
99
|
+
// { block: 'b3' } — frame a content block: the block's middle is placed at
|
|
100
|
+
// the viewport center, then the section holds STILL on it. The recorder
|
|
101
|
+
// owns this geometry so the agent never writes pixels. A block taller than
|
|
102
|
+
// the viewport just shows its centered slice held still — there is no pan,
|
|
103
|
+
// partial visibility is accepted (decided 2026-05-16: 定住,接受看不全).
|
|
104
|
+
//
|
|
105
|
+
// { y: <number> } — raw scrollTop, for content-agnostic moves such as the
|
|
106
|
+
// opening lead-in drift (画面跟内容无关的漫滑).
|
|
107
|
+
//
|
|
108
|
+
// Result is clamped to [0, full_height_px - viewport_height].
|
|
109
|
+
export function resolveScrollTargetY(op, {
|
|
110
|
+
blocks = [],
|
|
111
|
+
viewportHeight = 1920,
|
|
112
|
+
fullHeightPx = null,
|
|
113
|
+
sectionId = '?',
|
|
114
|
+
operationIndex = 0,
|
|
115
|
+
} = {}) {
|
|
116
|
+
const hasBlock = typeof op?.block === 'string' && op.block.trim();
|
|
117
|
+
const hasY = Number.isFinite(Number(op?.y));
|
|
118
|
+
|
|
119
|
+
if (hasBlock && hasY) {
|
|
120
|
+
const error = new Error(
|
|
121
|
+
`operations_invalid: section "${sectionId}" operations[${operationIndex}] sets both `
|
|
122
|
+
+ '`block` and `y` on a scroll_to — specify exactly one (block to frame a content block, '
|
|
123
|
+
+ 'y for a raw content-agnostic move).',
|
|
124
|
+
);
|
|
125
|
+
error.code = 'OPERATIONS_INVALID';
|
|
126
|
+
throw error;
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
if (hasBlock) {
|
|
130
|
+
const blockId = op.block.trim();
|
|
131
|
+
const block = findBlockById(blocks, blockId);
|
|
132
|
+
if (!block) {
|
|
133
|
+
const known = blocks.map(b => b?.id).filter(Boolean).join(', ') || '(none)';
|
|
134
|
+
const error = new Error(
|
|
135
|
+
`block_not_found: section "${sectionId}" operations[${operationIndex}].block="${blockId}" `
|
|
136
|
+
+ `is not a block id in page_understanding.blocks. Known ids: ${known}.`,
|
|
137
|
+
);
|
|
138
|
+
error.code = 'BLOCK_NOT_FOUND';
|
|
139
|
+
throw error;
|
|
140
|
+
}
|
|
141
|
+
// Center the block. A block taller than the viewport shows its centered
|
|
142
|
+
// slice — the section then holds still on it (no pan).
|
|
143
|
+
const target = (block.y_top + block.y_bottom) / 2 - viewportHeight / 2;
|
|
144
|
+
const maxScroll = Number.isFinite(fullHeightPx) && fullHeightPx > 0
|
|
145
|
+
? Math.max(0, fullHeightPx - viewportHeight)
|
|
146
|
+
: Number.POSITIVE_INFINITY;
|
|
147
|
+
return Math.round(Math.min(Math.max(target, 0), maxScroll));
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
if (hasY) return Math.round(Number(op.y));
|
|
151
|
+
|
|
152
|
+
const error = new Error(
|
|
153
|
+
`operations_invalid: section "${sectionId}" operations[${operationIndex}] is a scroll_to with `
|
|
154
|
+
+ 'neither `block` nor `y`. Set `block` (centers/frames a content block) or `y` (raw scrollTop).',
|
|
155
|
+
);
|
|
156
|
+
error.code = 'OPERATIONS_INVALID';
|
|
157
|
+
throw error;
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
function validateOperation(op, { sectionId, operationIndex, fullHeightPx, unsafeRegions, blocks, viewportHeight }) {
|
|
97
161
|
if (!op || typeof op !== 'object' || Array.isArray(op)) {
|
|
98
162
|
const error = new Error(
|
|
99
163
|
`operations_invalid: section "${sectionId}" operations[${operationIndex}] is not an object.`,
|
|
@@ -120,9 +184,15 @@ function validateOperation(op, { sectionId, operationIndex, fullHeightPx, unsafe
|
|
|
120
184
|
throw error;
|
|
121
185
|
}
|
|
122
186
|
|
|
187
|
+
let scrollTargetY = null;
|
|
123
188
|
if (atomName === 'scroll_to') {
|
|
124
|
-
|
|
125
|
-
|
|
189
|
+
// scroll_to.y is no longer agent-authored pixels — resolveScrollTargetY
|
|
190
|
+
// turns { block, align } into a clamped scrollTop. Raw { y } is still
|
|
191
|
+
// accepted for content-agnostic moves. The resolved value is what gets
|
|
192
|
+
// bounds/unsafe-checked and handed to the atom.
|
|
193
|
+
scrollTargetY = resolveScrollTargetY(op, { blocks, viewportHeight, fullHeightPx, sectionId, operationIndex });
|
|
194
|
+
assertYWithinBounds(scrollTargetY, { fullHeightPx, atomName, sectionId, operationIndex });
|
|
195
|
+
assertYNotInUnsafeRegion(scrollTargetY, { unsafeRegions, atomName, sectionId, operationIndex });
|
|
126
196
|
} else if (atomName === 'cursor_focus') {
|
|
127
197
|
assertYWithinBounds(Number(op.y), { fullHeightPx, atomName, sectionId, operationIndex });
|
|
128
198
|
assertYNotInUnsafeRegion(Number(op.y), { unsafeRegions, atomName, sectionId, operationIndex });
|
|
@@ -134,16 +204,19 @@ function validateOperation(op, { sectionId, operationIndex, fullHeightPx, unsafe
|
|
|
134
204
|
throw error;
|
|
135
205
|
}
|
|
136
206
|
}
|
|
137
|
-
return atomName;
|
|
207
|
+
return { atomName, scrollTargetY };
|
|
138
208
|
}
|
|
139
209
|
|
|
140
|
-
function operationToAtomParams(op, atomName, anchorY) {
|
|
210
|
+
function operationToAtomParams(op, atomName, anchorY, scrollTargetY) {
|
|
141
211
|
if (atomName === 'scroll_to') {
|
|
142
212
|
return {
|
|
143
|
-
|
|
213
|
+
// scrollTargetY is resolved by resolveScrollTargetY (block-framed or raw y).
|
|
214
|
+
target_y: scrollTargetY,
|
|
144
215
|
duration_ms: Number(op.duration_ms),
|
|
145
216
|
curve: op.curve || 'easeInOutQuad',
|
|
146
|
-
|
|
217
|
+
// Default 0 — no pixel jitter (user requirement, repeatedly stated).
|
|
218
|
+
// Only consulted by touch mode; auto/programmatic ignore it.
|
|
219
|
+
jitter_px: Number.isFinite(Number(op.jitter_px)) ? Number(op.jitter_px) : 0,
|
|
147
220
|
from_y: anchorY,
|
|
148
221
|
mode: op.mode || 'auto',
|
|
149
222
|
};
|
|
@@ -198,22 +271,45 @@ export function normalizePlanSections(plan = {}) {
|
|
|
198
271
|
async function runOperations(page, ctx, operations, {
|
|
199
272
|
fullHeightPx,
|
|
200
273
|
unsafeRegions,
|
|
274
|
+
blocks,
|
|
275
|
+
viewportHeight,
|
|
276
|
+
viewportWidth,
|
|
201
277
|
sectionId,
|
|
202
278
|
fallbackAnchorY,
|
|
203
279
|
}) {
|
|
204
280
|
let anchorY = fallbackAnchorY;
|
|
281
|
+
// Spotlight off during the transition scroll — it appears once the section's
|
|
282
|
+
// block has landed centered (set right after the scroll_to below).
|
|
283
|
+
await clearSpotlight(page);
|
|
205
284
|
for (let i = 0; i < operations.length; i += 1) {
|
|
206
285
|
const op = operations[i];
|
|
207
|
-
const atomName = validateOperation(op, {
|
|
286
|
+
const { atomName, scrollTargetY } = validateOperation(op, {
|
|
208
287
|
sectionId,
|
|
209
288
|
operationIndex: i,
|
|
210
289
|
fullHeightPx,
|
|
211
290
|
unsafeRegions,
|
|
291
|
+
blocks,
|
|
292
|
+
viewportHeight,
|
|
212
293
|
});
|
|
213
|
-
const params = operationToAtomParams(op, atomName, anchorY);
|
|
294
|
+
const params = operationToAtomParams(op, atomName, anchorY, scrollTargetY);
|
|
214
295
|
const atomFn = ATOMS[atomName];
|
|
215
296
|
const result = await atomFn(page, ctx, params);
|
|
216
297
|
if (result?.anchorY != null) anchorY = result.anchorY;
|
|
298
|
+
// Once a scroll_to has landed on a content block, frame it: bordered box
|
|
299
|
+
// around the block + the rest of the page dimmed. The box then stays for
|
|
300
|
+
// the section's hold.
|
|
301
|
+
if (atomName === 'scroll_to' && typeof op.block === 'string' && op.block.trim()) {
|
|
302
|
+
const blk = findBlockById(blocks, op.block.trim());
|
|
303
|
+
if (blk) {
|
|
304
|
+
await setSpotlight(page, {
|
|
305
|
+
yTop: blk.y_top,
|
|
306
|
+
yBottom: blk.y_bottom,
|
|
307
|
+
viewportTop: scrollTargetY,
|
|
308
|
+
viewportHeight,
|
|
309
|
+
viewportWidth,
|
|
310
|
+
});
|
|
311
|
+
}
|
|
312
|
+
}
|
|
217
313
|
}
|
|
218
314
|
return { anchorY };
|
|
219
315
|
}
|
|
@@ -230,6 +326,11 @@ function createEvent({ tMs, action, sectionId, detail = {} }) {
|
|
|
230
326
|
|
|
231
327
|
export async function executePlanPhases(page, plan, {
|
|
232
328
|
pageUnderstanding = null,
|
|
329
|
+
// The actual record-browser viewport height — used to frame block-referenced
|
|
330
|
+
// scroll_to operations. Falls back to page_understanding.viewport.height,
|
|
331
|
+
// then 1920. Pass the real recorder viewport so centering is pixel-correct.
|
|
332
|
+
viewportHeight = null,
|
|
333
|
+
viewportWidth = null,
|
|
233
334
|
getNowMs = () => Date.now(),
|
|
234
335
|
onEvent = null,
|
|
235
336
|
} = {}) {
|
|
@@ -238,6 +339,13 @@ export async function executePlanPhases(page, plan, {
|
|
|
238
339
|
const unsafeRegions = Array.isArray(pageUnderstanding?.unsafe_regions)
|
|
239
340
|
? pageUnderstanding.unsafe_regions
|
|
240
341
|
: [];
|
|
342
|
+
const blocks = Array.isArray(pageUnderstanding?.blocks) ? pageUnderstanding.blocks : [];
|
|
343
|
+
const resolvedViewportHeight = Number(viewportHeight)
|
|
344
|
+
|| Number(pageUnderstanding?.viewport?.height)
|
|
345
|
+
|| 1920;
|
|
346
|
+
const resolvedViewportWidth = Number(viewportWidth)
|
|
347
|
+
|| Number(pageUnderstanding?.viewport?.width)
|
|
348
|
+
|| 1080;
|
|
241
349
|
|
|
242
350
|
const startedAt = nowMs(getNowMs);
|
|
243
351
|
const eventsLog = [];
|
|
@@ -263,6 +371,9 @@ export async function executePlanPhases(page, plan, {
|
|
|
263
371
|
const result = await runOperations(page, ctx, section.operations, {
|
|
264
372
|
fullHeightPx,
|
|
265
373
|
unsafeRegions,
|
|
374
|
+
blocks,
|
|
375
|
+
viewportHeight: resolvedViewportHeight,
|
|
376
|
+
viewportWidth: resolvedViewportWidth,
|
|
266
377
|
sectionId,
|
|
267
378
|
fallbackAnchorY: lastAnchorY,
|
|
268
379
|
});
|