@lightcone-ai/daemon 0.23.5 → 0.23.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -92,7 +92,19 @@ server.tool(
92
92
  + 'Takes any video produced by compose_video_v2 / record_url_narration / etc. and adds '
93
93
  + 'one or more on-screen title cards with animation presets (fade+zoom pop, per-character karaoke fill). '
94
94
  + 'The default narration subtitle burned by compose_video_v2 stays at the bottom; titles default to the top band so they do not collide. '
95
- + 'Output is a new mp4; original is not modified. Skip this tool entirely when a plain video is desired — not every video needs title effects.',
95
+ + 'Output is a new mp4; original is not modified.\n\n'
96
+ + 'STANDARD opening 引导语 title (URL recruitment short videos — do this by default): the video '
97
+ + 'opens with a 引导语 lead-in section, and that line is rendered HERE as an eye-catching centered '
98
+ + 'title card — NOT as a plain bottom subtitle. Recipe:\n'
99
+ + ' - preset: "karaoke_punch" (per-character fill reads as a real effect; "fade_zoom" is too plain for an opener)\n'
100
+ + ' - position: "center"\n'
101
+ + ' - style: { font_size 110-120 (large), color a vivid "#FFE000"-style, outline_color "#000000" }\n'
102
+ + ' - start_ms: 0, end_ms: the opening section duration — the card clears exactly when the first content section begins.\n'
103
+ + 'Long titles auto-wrap; you may also place an explicit "\\n" for a clean 2-line break.\n'
104
+ + 'The opening section itself must be a FRAMELESS lead-in — its operations use raw `y` (no `block`, '
105
+ + 'so the recorder draws no spotlight) and it carries NO bottom subtitle (subtitle_text empty); the '
106
+ + '引导语 appears only as this centered card.\n\n'
107
+ + 'Skip this tool when a plain video with no title cards is desired.',
96
108
  {
97
109
  input_path: z.string().min(1).describe('Absolute path to the source mp4 (e.g. the output of compose_video_v2).'),
98
110
  output_path: z.string().optional().describe('Optional absolute output path. If omitted, writes to a tmp path and returns it.'),
@@ -258,7 +270,8 @@ server.tool(
258
270
  operations: z.array(z.object({
259
271
  atom: z.enum(['scroll_to', 'hold', 'cursor_focus']),
260
272
  duration_ms: z.union([z.number(), z.literal('fill')]).describe('Atom duration in ms. "fill" allowed only on the LAST hold to auto-fill remaining audio time.'),
261
- y: z.number().optional(),
273
+ block: z.string().optional().describe('scroll_to: id of a page_understanding block to frame. The recorder centers it in the viewport. Use this for content sections — do NOT write pixel y.'),
274
+ y: z.number().optional().describe('scroll_to: raw scrollTop. Only for a content-agnostic opening drift — for content blocks use `block` instead.'),
262
275
  x: z.number().optional(),
263
276
  curve: z.enum(['easeInOutQuad', 'linear', 'easeOutQuad']).optional(),
264
277
  mode: z.enum(['auto', 'touch', 'programmatic']).optional(),
@@ -266,29 +279,25 @@ server.tool(
266
279
  })).optional().describe(
267
280
  'For visual_kind=video URL recording sections: ordered atom sequence. Sum of duration_ms '
268
281
  + 'must equal audio_duration_ms (±200ms); use "fill" on the last hold to auto-balance.\n\n'
269
- + 'READING-FLOW MODE (REQUIRED enforced by lint): operations must simulate a person '
270
- + 'sliding a finger through the page while narrating, pausing at key spots to explain. '
271
- + 'Concretely:\n'
272
- + ' Each non-opening segment MUST contain at least one scroll_to with duration_ms >= 1500.\n'
273
- + ' • Any hold with duration_ms > 2000 MUST be immediately preceded by a scroll_to with duration_ms >= 1500.\n'
274
- + ' Avoid the "jump + freeze" anti-pattern: scroll_to(duration_ms < 1000) followed by hold(duration_ms > 2000). '
275
- + 'It makes the recording feel like a screenshot slideshow, not a page being read.\n\n'
276
- + 'GOOD example for a 9.5s segment narrating "宁波银行金融科技部主推 FinTech 暑期专项":\n'
282
+ + 'Every content section is exactly: scroll_to{block} hold. scroll_to FRAMES A CONTENT '
283
+ + 'BLOCK pass `block: "<id>"` (a block id from page_understanding.blocks) and the recorder '
284
+ + 'CENTERS that block in the viewport. Do NOT write pixel `y` for content blocks; raw `y` is '
285
+ + 'only for a content-agnostic opening drift.\n'
286
+ + ' • One section narrates ONE block every scroll_to in a section references the SAME '
287
+ + 'block id (2+ distinct block ids REJECTED: section_spans_multiple_blocks).\n'
288
+ + ' scroll_to is a short TRANSITION between blocks (~500-800ms). hold is where the '
289
+ + 'NARRATION happens and the picture is STILL long holds (2-5s) are the norm.\n'
290
+ + ' • A block taller than the viewport just shows its centered slice, HELD STILL. Do NOT '
291
+ + 'pan / slow-scroll through it — the picture must not move while you narrate; partial '
292
+ + 'visibility of a tall block is accepted.\n'
293
+ + ' • Every non-opening segment MUST start with a scroll_to (REJECTED otherwise: transition_required).\n\n'
294
+ + 'GOOD — a 5s segment narrating block b2:\n'
277
295
  + ' [\n'
278
- + ' { atom: "scroll_to", y: 280, duration_ms: 2500 }, // slow slide while saying "宁波银行金融科技部,正式开放 FinTech 暑期专项"\n'
279
- + ' { atom: "hold", duration_ms: 1200 }, // brief pause on title to let viewer read\n'
280
- + ' { atom: "scroll_to", y: 980, duration_ms: 3200 }, // continue sliding while narrating job content\n'
281
- + ' { atom: "hold", duration_ms: 1400 }, // pause on key bullet list\n'
282
- + ' { atom: "scroll_to", y: 1450, duration_ms: 1500 }, // final slide to closing block\n'
283
- + ' { atom: "hold", duration_ms: "fill" }, // remaining audio time (~700ms expected)\n'
296
+ + ' { atom: "scroll_to", block: "b2", duration_ms: 700 }, // 0.7s transition, recorder centers b2\n'
297
+ + ' { atom: "hold", duration_ms: "fill" }, // ~4.3s: narrate b2, picture still\n'
284
298
  + ' ]\n\n'
285
- + 'BAD example (will be REJECTED by reading_flow_violation):\n'
286
- + ' [\n'
287
- + ' { atom: "scroll_to", y: 1000, duration_ms: 600 }, // jump cut\n'
288
- + ' { atom: "hold", duration_ms: 5000 }, // 5s freeze ← rejected\n'
289
- + ' { atom: "scroll_to", y: 2500, duration_ms: 800 }, // jump cut\n'
290
- + ' { atom: "hold", duration_ms: "fill" }, // ← rejected\n'
291
- + ' ]',
299
+ + 'BAD (REJECTED): a segment starting with hold (transition_required); a segment whose '
300
+ + 'scroll_to ops reference two different blocks (section_spans_multiple_blocks).',
292
301
  ),
293
302
  })).describe('Segments to plan. audio_path is required for each. V5 fields (action, target_y, target_y_content_label, focus_region, transition_ms, dwell_ms, phase.beats[]) are rejected.'),
294
303
  },
@@ -370,22 +379,27 @@ server.tool(
370
379
  );
371
380
 
372
381
  // ── record_url_narration (migrated from chat-bridge) ──────────────────────
373
- // Records a silent mp4 of a URL via Chromium+Xvfb+Playwright recordVideo,
382
+ // Records a silent mp4 of a URL via headless Chromium + Playwright recordVideo,
374
383
  // driven by a beat-by-beat plan. Hard-block: requires plan_video_segments to
375
384
  // have run in this session — hand-written dwell_ms has drifted from TTS
376
385
  // audio in production runs (Tasks #20/#25/#26), forcing re-records.
377
386
  server.tool(
378
387
  'record_url_narration',
379
- 'V6 record_url_narration. Drives Chromium on Xvfb + Playwright recordVideo to capture a silent mp4 per section, then ffmpeg-slices into output_paths. Each mp4 passes to compose_video_v2 as a video-kind segment.\n\n'
388
+ 'V6 record_url_narration. Drives headless Chromium + Playwright recordVideo to capture a silent mp4 per section, then ffmpeg-slices into output_paths. Each mp4 passes to compose_video_v2 as a video-kind segment.\n\n'
380
389
  + 'REQUIRES page_understanding (from analyze_page) — used for safe-region check (scroll_to.y / cursor_focus.y rejected if in unsafe_regions) and preheat alignment (same full-scroll-then-top pre-roll as analyze_page).\n\n'
381
- + 'plan.sections[*].operations[] is the visual beat — each operation is one of three atom calls:\n'
382
- + ' - scroll_to: { y, duration_ms, curve?, mode?, jitter_px? }\n'
390
+ + 'plan.sections[*].operations[] is the visual beat — a content section is scroll_to{block} hold:\n'
391
+ + ' - scroll_to: { block | y, duration_ms, curve?, mode? } — pass `block` (a page_understanding '
392
+ + 'block id) and the recorder CENTERS that block in the viewport, then the section holds STILL '
393
+ + 'on it. A block taller than the viewport shows its centered slice held still (no pan). Raw '
394
+ + '`y` is only for a content-agnostic opening drift.\n'
383
395
  + ' - hold: { duration_ms } — duration_ms="fill" allowed on the LAST hold to auto-balance with audio_duration_ms\n'
384
396
  + ' - cursor_focus: { x, y, duration_ms }\n\n'
397
+ + 'The recorder automatically draws a spotlight highlight (bordered frame + dimmed surround) around '
398
+ + "each section's block once its scroll lands — automatic, no plan field controls it.\n\n"
385
399
  + 'V5 fields are rejected: action / target_y / target_y_content_label / focus_region / transition_ms / dwell_ms (set by plan_video_segments only) / phase.beats[].\n\n'
386
400
  + 'Standard chain: analyze_page → synthesize_tts × N → plan_video_segments → record_url_narration + compose_video_v2.\n\n'
387
401
  + 'ALWAYS pass output_paths as an array with one mp4 path per plan.sections entry (single-section is a 1-element array). The tool records the URL ONCE continuously (one browser session, natural scroll flow across all sections), then ffmpeg-slices at section boundaries. One URL = one call.\n\n'
388
- + 'Runtime: Linux daemon with Xvfb + Chromium + ffmpeg. macOS / Windows fail at startup.',
402
+ + 'Runtime: daemon with Chromium + ffmpeg.',
389
403
  {
390
404
  url: z.string().describe('Page URL to record (must match the URL passed to analyze_page that produced page_understanding).'),
391
405
  page_understanding: z.record(z.any()).describe('Output of analyze_page for this URL. Required. Provides full_height_px / viewport / preheat_strategy / unsafe_regions[] for safety validation, and blocks[] / narrative_arc as informational metadata (the recorder itself only needs the safety bits).'),
@@ -398,12 +412,13 @@ server.tool(
398
412
  operations: z.array(z.object({
399
413
  atom: z.enum(['scroll_to', 'hold', 'cursor_focus']),
400
414
  duration_ms: z.number().describe('Atom duration in ms. (plan_video_segments may have expanded a "fill" value already.)'),
401
- y: z.number().optional(),
415
+ block: z.string().optional().describe('scroll_to: page_understanding block id to frame. Recorder centers it and the section holds still. Use for content sections instead of pixel y.'),
416
+ y: z.number().optional().describe('scroll_to: raw scrollTop — only for a content-agnostic opening drift.'),
402
417
  x: z.number().optional(),
403
418
  curve: z.enum(['easeInOutQuad', 'linear', 'easeOutQuad']).optional(),
404
419
  mode: z.enum(['auto', 'touch', 'programmatic']).optional(),
405
420
  jitter_px: z.number().optional(),
406
- })).min(1).describe('Ordered atom sequence executed during this section.'),
421
+ })).min(1).describe('Ordered atom sequence executed during this section. Pass the plan_video_segments output verbatim.'),
407
422
  })).min(1),
408
423
  }).describe('plan.sections[] — each section has text/audio_path/dwell_ms (filled by plan_video_segments) and operations[].'),
409
424
  output_paths: z.array(z.string()).min(1).describe('REQUIRED. Workspace-relative mp4 paths, one per plan.sections entry. The tool records ONCE continuously and slices at section boundaries (phase_start / phase_end events).'),
@@ -83,14 +83,15 @@ function buildAssContent({ playResX, playResY, overlays }) {
83
83
  'ScriptType: v4.00+',
84
84
  `PlayResX: ${playResX}`,
85
85
  `PlayResY: ${playResY}`,
86
- 'WrapStyle: 2',
86
+ 'WrapStyle: 0',
87
87
  '',
88
88
  '[V4+ Styles]',
89
89
  'Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding',
90
90
  // PrimaryColour white, SecondaryColour orange (for karaoke fill), OutlineColour black,
91
- // Bold on, Outline 4px, Shadow 2px, default Alignment middle-center (5) events
92
- // override per-line via \an.
93
- `Style: Title,${DEFAULT_FONT},96,&H00FFFFFF,&H000066FF,&H00000000,&H80000000,-1,0,0,0,100,100,0,0,1,4,2,5,30,30,0,1`,
91
+ // Bold on, Outline 6px (thick punchy contrast over busy page backgrounds),
92
+ // Shadow 2px, default Alignment middle-center (5) — events override per-line via \an.
93
+ // WrapStyle 0 (above) auto-wraps long titles instead of clipping them.
94
+ `Style: Title,${DEFAULT_FONT},96,&H00FFFFFF,&H000066FF,&H00000000,&H80000000,-1,0,0,0,100,100,0,0,1,6,2,5,30,30,0,1`,
94
95
  '',
95
96
  '[Events]',
96
97
  'Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text',
package/package.json CHANGED
@@ -1,10 +1,11 @@
1
1
  {
2
2
  "name": "@lightcone-ai/daemon",
3
- "version": "0.23.5",
3
+ "version": "0.23.7",
4
4
  "type": "module",
5
5
  "main": "src/index.js",
6
6
  "bin": {
7
- "lightcone-daemon": "src/index.js"
7
+ "lightcone-daemon": "src/index.js",
8
+ "lightcone": "src/cli.js"
8
9
  },
9
10
  "files": [
10
11
  "src",
@@ -24,23 +24,18 @@ async function readScrollY(page) {
24
24
  // ── atomScrollTo ─────────────────────────────────────────────────────────────
25
25
  // Animated scroll from current position to target_y over duration_ms.
26
26
  //
27
- // Mode selection (the bit that took an end-to-end failure to learn):
28
- // - 'programmatic' (default for distance >= 240px): runs the easing loop
29
- // inside page.evaluate via root.scrollTo. Lands EXACTLY at target_y.
30
- // Lacks the rubber-band/fling physics of real touch, but reliable.
31
- // - 'touch' (default for short distances): humanizedScroll → CDP touch.
32
- // Produces natural gesture physics (rubber-band, inertia) but for
33
- // larger distances the multi-segment swipe gets broken up into rapid
34
- // micro-flings that interfere with each other, and the page often
35
- // ends up nowhere near the intended target_y. Safe for distances
36
- // that fit in a single ~260px finger swipe.
37
- // - 'auto' (default): picks 'touch' for distance < 240px (single
38
- // segment, no fling interference), 'programmatic' otherwise.
39
- //
40
- // Discovered the hard way: scroll_to_dwell macro with ~18% transition
41
- // for 1100+ px distances dispatched 5 CDP swipes in ~1s; each touchEnd
42
- // kicked off a fling that the next touchStart immediately cancelled, so
43
- // the cumulative scroll never reached target.
27
+ // Mode selection:
28
+ // - 'programmatic': RAF-driven easing loop inside page.evaluate via
29
+ // root.scrollTo. Every frame moves, vertical only, lands EXACTLY at
30
+ // target_y. This is what a smooth between-blocks transition needs.
31
+ // - 'touch': humanizedScroll → CDP touch. Real gesture physics (rubber-
32
+ // band, inertia) but splits scroll > 260px into multiple swipes with
33
+ // fling-cancel-fling boundaries and ±18-26px horizontal nudge looks
34
+ // like a shaky multi-tap drag, not a clean slide.
35
+ // - 'auto' (default): resolves to 'programmatic'. Touch's gesture physics
36
+ // lost out to a clean slide for narration video; it stays reachable
37
+ // only via explicit `mode: 'touch'`. See the resolvedMode block below
38
+ // for the full rationale.
44
39
  //
45
40
  // Params:
46
41
  // target_y — absolute Y in page coordinates (required)
@@ -53,7 +48,7 @@ export async function atomScrollTo(page, _ctx, {
53
48
  target_y,
54
49
  duration_ms,
55
50
  curve = 'easeInOutQuad',
56
- jitter_px = 2,
51
+ jitter_px = 0, // 不要微动 — 用户反复明确要求
57
52
  from_y = null,
58
53
  mode = 'auto',
59
54
  } = {}) {
@@ -70,17 +65,21 @@ export async function atomScrollTo(page, _ctx, {
70
65
  const durationMs = Number(duration_ms);
71
66
  const distance = Math.abs(targetY - fromY);
72
67
 
73
- // Auto-mode heuristic: touch works for short distances (single segment, no
74
- // inter-segment fling interference) OR slow velocities (each segment has
75
- // time to settle before the next starts). Fast long-distance scrolls fall
76
- // back to programmatic, where the easing loop drives root.scrollTo
77
- // deterministically.
78
- // Thresholds chosen empirically against the v4 failure mode (~1100px in
79
- // ~1000ms ~1100 px/s, fling-interrupt-fling, page never reached target).
80
- const velocity = durationMs > 0 ? (distance / durationMs) * 1000 : 0; // px/s
68
+ // Auto-mode: default to programmatic (RAF-driven smooth scroll). The touch
69
+ // path uses humanizedScroll which splits any scroll > 260px into multiple
70
+ // CDP swipes, each with ±18-26px random horizontal nudge and fling-cancel-
71
+ // fling boundaries that looks like "颤抖着分多次拨", not a clean slide.
72
+ // User feedback is unambiguous: scroll must be a smooth transition between
73
+ // content blocks, not a teleport (instant snap) and not a wobble (multi-
74
+ // segment touch with horizontal drift). Programmatic with RAF achieves
75
+ // both every frame moves, vertical only, no inter-segment pauses.
76
+ // Touch mode remains available via explicit `mode: 'touch'` for callers
77
+ // that specifically want gesture physics.
78
+ const velocity = durationMs > 0 ? (distance / durationMs) * 1000 : 0; // px/s (kept for diagnostics)
79
+ void velocity;
81
80
  const resolvedMode = mode === 'programmatic' || mode === 'touch'
82
81
  ? mode
83
- : (distance < 240 || velocity < 500 ? 'touch' : 'programmatic');
82
+ : 'programmatic';
84
83
 
85
84
  if (resolvedMode === 'touch') {
86
85
  await humanizedScroll(page, {
@@ -96,68 +95,75 @@ export async function atomScrollTo(page, _ctx, {
96
95
  targetY,
97
96
  durationMs,
98
97
  curve,
99
- jitterPx: Math.max(0, Number(jitter_px) || 0),
100
98
  });
101
99
  }
102
100
  return { anchorY: Math.round(targetY) };
103
101
  }
104
102
 
105
- // Programmatic scroll: hands the animation off to the browser's native
106
- // scroll engine via `scroll-behavior: smooth`. That way the easing runs on
107
- // the compositor thread at the display refresh rate, independent of how
108
- // busy the page's JS is. The JS-driven setTimeout approach we tried first
109
- // gets badly throttled on JS-heavy article pages (60Hz timers can stretch
110
- // to 150-200ms), turning a 1s transition into 5-8s.
103
+ // Programmatic scroll: JS-driven RAF loop that incrementally updates the
104
+ // scroll position frame-by-frame over `durationMs`. This produces an actual
105
+ // smooth scroll the viewer sees in the recording the previous version
106
+ // did a hard instant snap and then a static wait, which looked like a
107
+ // teleport ("跳一下然后定格"), not like a person sliding a page.
108
+ //
109
+ // Why not native `scroll-behavior: smooth` or `scrollTo({behavior:'smooth'})`?
110
+ // In Playwright + a headless mobile context, native smooth-scroll often
111
+ // gets capped to a fixed short duration (~300-500ms) regardless of distance,
112
+ // or is throttled by the page's own scroll logic. We need a duration we
113
+ // control end-to-end.
111
114
  //
112
- // The wait is Node-side, so even if the in-page scrollend never fires we
113
- // still cap the section at durationMs and move on. We deliberately do NOT
114
- // wait for scrollend — empirically faster than dispatching the event on
115
- // page-heavy mobile sites.
115
+ // Frame loop runs inside page.evaluate so it stays in lockstep with the
116
+ // page's render thread important when recordVideo is capturing 30fps.
116
117
  async function programmaticScroll(page, {
117
118
  fromY,
118
119
  targetY,
119
120
  durationMs,
121
+ curve = 'easeInOutQuad',
120
122
  } = {}) {
121
- // Try every plausible scroll target — mobile article pages sometimes have
122
- // a fixed-position outer body and scroll happens on an inner container.
123
- // We dispatch to all candidates and let whichever one is actually the
124
- // scroller win. Returns the diagnostics so we can debug when the page
125
- // refuses to scroll.
126
- const diag = await page.evaluate((input) => {
127
- const candidates = [];
128
- if (document.scrollingElement) candidates.push(document.scrollingElement);
129
- if (document.documentElement) candidates.push(document.documentElement);
130
- if (document.body) candidates.push(document.body);
131
- candidates.push(window);
132
-
133
- const before = candidates.map((c) => {
134
- if (c === window) return { tag: 'window', y: window.scrollY };
135
- return { tag: c.tagName, y: c.scrollTop };
136
- });
137
-
138
- // Hard snap to target on every candidate (instant, no animation).
139
- for (const c of candidates) {
140
- try {
141
- if (c === window) window.scrollTo(0, input.targetY);
142
- else { c.scrollTop = input.targetY; }
143
- } catch { /* ignore */ }
123
+ await page.evaluate(async (input) => {
124
+ function pickScroller() {
125
+ if (document.scrollingElement) return document.scrollingElement;
126
+ if (document.documentElement) return document.documentElement;
127
+ return document.body;
144
128
  }
129
+ function easeInOutQuad(t) { return t < 0.5 ? 2 * t * t : 1 - Math.pow(-2 * t + 2, 2) / 2; }
130
+ function easeOutQuad(t) { return 1 - (1 - t) * (1 - t); }
131
+ function linear(t) { return t; }
132
+ const ease = input.curve === 'linear' ? linear
133
+ : input.curve === 'easeOutQuad' ? easeOutQuad
134
+ : easeInOutQuad;
145
135
 
146
- const after = candidates.map((c) => {
147
- if (c === window) return { tag: 'window', y: window.scrollY };
148
- return { tag: c.tagName, y: c.scrollTop };
149
- });
136
+ const scroller = pickScroller();
137
+ const startY = (scroller === document.scrollingElement || scroller === document.documentElement)
138
+ ? scroller.scrollTop : window.scrollY;
139
+ const delta = input.targetY - startY;
140
+ const start = performance.now();
150
141
 
151
- return {
152
- requested_target: input.targetY,
153
- before, after,
154
- maxScroll: document.documentElement?.scrollHeight,
155
- innerHeight: window.innerHeight,
156
- };
157
- }, { fromY, targetY });
158
- void diag;
159
- // Brief dwell to let the page settle the snap before next atom starts.
160
- await page.waitForTimeout(Math.max(80, Math.round(durationMs * 0.3)));
142
+ return new Promise((resolve) => {
143
+ function tick(now) {
144
+ const elapsed = now - start;
145
+ const t = Math.min(1, elapsed / input.durationMs);
146
+ const y = startY + delta * ease(t);
147
+ try {
148
+ if (scroller === window) window.scrollTo(0, y);
149
+ else { scroller.scrollTop = y; }
150
+ } catch { /* ignore */ }
151
+ if (t < 1) {
152
+ requestAnimationFrame(tick);
153
+ } else {
154
+ // Final snap to exact target (in case of sub-pixel drift).
155
+ try {
156
+ if (scroller === window) window.scrollTo(0, input.targetY);
157
+ else { scroller.scrollTop = input.targetY; }
158
+ } catch { /* ignore */ }
159
+ resolve();
160
+ }
161
+ }
162
+ requestAnimationFrame(tick);
163
+ });
164
+ }, { fromY, targetY, durationMs, curve });
165
+ // Tiny settle so the next atom sees the scroll committed.
166
+ await page.waitForTimeout(50);
161
167
  }
162
168
 
163
169
  // ── atomHold ─────────────────────────────────────────────────────────────────
@@ -30,7 +30,6 @@ function normalizeUrl(value) {
30
30
  }
31
31
 
32
32
  export async function launchChromiumMobile({
33
- display,
34
33
  viewport = DEFAULT_VIEWPORT,
35
34
  userAgent = IOS_UA,
36
35
  deviceScaleFactor = 1,
@@ -70,10 +69,7 @@ export async function launchChromiumMobile({
70
69
  headless,
71
70
  channel,
72
71
  args: launchArgs,
73
- env: {
74
- ...process.env,
75
- DISPLAY: normalizeText(display) || process.env.DISPLAY,
76
- },
72
+ env: process.env,
77
73
  ...launchOptions,
78
74
  });
79
75
 
@@ -5,8 +5,6 @@ import os from 'node:os';
5
5
  import path from 'node:path';
6
6
 
7
7
  import { launchChromiumMobile, openPageAndSettle } from './chromium-driver.js';
8
- import { defaultDisplayPool } from './display-pool.js';
9
- import { createUnexpectedExitWatcher, waitForProcessExit } from './ffmpeg-runner.js';
10
8
  import { executePlanPhases, normalizePlanSections } from './plan-executor.js';
11
9
 
12
10
  const DEFAULT_VIEWPORT = Object.freeze({ width: 1080, height: 1920 });
@@ -60,91 +58,6 @@ function resolveUrl({ url, plan }) {
60
58
  throw error;
61
59
  }
62
60
 
63
- function createXvfbExitError({ code, signal, stderr }) {
64
- const error = new Error(`xvfb_exited_unexpectedly:code=${code ?? 'null'}:signal=${signal ?? 'none'}`);
65
- error.code = 'XVFB_EXITED_UNEXPECTEDLY';
66
- error.exitCode = code;
67
- error.signal = signal;
68
- error.stderr = stderr;
69
- return error;
70
- }
71
-
72
- async function stopXvfb(runner, {
73
- signal = 'SIGTERM',
74
- timeoutMs = 5000,
75
- killTimeoutMs = 2000,
76
- } = {}) {
77
- const child = runner?.child;
78
- if (!child || child.exitCode !== null) return child?.exitCode ?? 0;
79
-
80
- child.kill(signal);
81
- const firstExit = await waitForProcessExit(child, timeoutMs);
82
- if (!firstExit.timedOut) return firstExit.code;
83
-
84
- child.kill('SIGKILL');
85
- const forceExit = await waitForProcessExit(child, killTimeoutMs);
86
- return forceExit.code;
87
- }
88
-
89
- async function startXvfb({
90
- display,
91
- width,
92
- height,
93
- colorDepth = 24,
94
- startupProbeMs = 1200,
95
- xvfbBin = 'Xvfb',
96
- } = {}) {
97
- const args = [
98
- display,
99
- '-screen',
100
- '0',
101
- `${width}x${height}x${colorDepth}`,
102
- '-ac',
103
- '+extension',
104
- 'RANDR',
105
- ];
106
-
107
- let stderr = '';
108
- let spawnError = null;
109
- const child = spawn(xvfbBin, args, {
110
- stdio: ['ignore', 'pipe', 'pipe'],
111
- });
112
-
113
- child.stderr?.on('data', (chunk) => {
114
- const next = `${stderr}${String(chunk)}`;
115
- stderr = next.length > 8000 ? next.slice(next.length - 8000) : next;
116
- });
117
- child.once('error', (error) => {
118
- spawnError = error;
119
- });
120
-
121
- await new Promise(resolve => setTimeout(resolve, Math.max(0, Number(startupProbeMs) || 0)));
122
-
123
- if (spawnError) {
124
- const error = new Error(`xvfb_spawn_failed:${spawnError.message}`);
125
- error.code = 'XVFB_SPAWN_FAILED';
126
- throw error;
127
- }
128
-
129
- if (child.exitCode !== null) {
130
- throw createXvfbExitError({
131
- code: child.exitCode,
132
- signal: child.signalCode,
133
- stderr,
134
- });
135
- }
136
-
137
- const runner = {
138
- child,
139
- display,
140
- args,
141
- getStderr: () => stderr,
142
- stop: (options) => stopXvfb(runner, options),
143
- };
144
-
145
- return runner;
146
- }
147
-
148
61
  async function scrollToTop(page) {
149
62
  await page.evaluate(() => {
150
63
  const root = document.scrollingElement || document.documentElement;
@@ -363,9 +276,6 @@ export async function recordUrlNarration({
363
276
  viewport = DEFAULT_VIEWPORT,
364
277
  fps = DEFAULT_FPS,
365
278
  settle_ms = 4000,
366
- displayPool = defaultDisplayPool,
367
- startupProbeMs = 1200,
368
- xvfbStopTimeoutMs = 5000,
369
279
  postPlanTailMs = 600,
370
280
  recordingDir = null,
371
281
  launchChromiumFn = launchChromiumMobile,
@@ -423,33 +333,16 @@ export async function recordUrlNarration({
423
333
  const ownTempDir = !recordingDir;
424
334
  const recVideoDir = recordingDir || await mkdtemp(path.join(os.tmpdir(), 'lc-recvid-'));
425
335
 
426
- let displayLease;
427
- let xvfb;
428
- let xvfbWatcher;
429
336
  let browserSession = null;
430
337
  let primaryError = null;
431
338
  const cleanupErrors = [];
432
339
 
433
340
  try {
434
- displayLease = await displayPool.acquireDisplay();
435
- const display = displayLease.display;
436
-
437
- xvfb = await startXvfb({
438
- display,
439
- width: normalizedViewport.width,
440
- height: normalizedViewport.height,
441
- startupProbeMs,
442
- });
443
- xvfbWatcher = createUnexpectedExitWatcher(xvfb.child, 'xvfb');
444
-
445
341
  // The page recording captures the page viewport only (no browser chrome),
446
- // regardless of the on-screen window. recordVideo starts when the page is
447
- // created, so the webm includes goto + settle; we measure that head and trim
448
- // it off in transcodeFn.
449
- const recordStartedAt = nowMs();
342
+ // regardless of the on-screen window.
450
343
  browserSession = await launchChromiumFn({
451
- display,
452
344
  viewport: normalizedViewport,
345
+ headless: true,
453
346
  contextOptions: {
454
347
  recordVideo: {
455
348
  dir: recVideoDir,
@@ -457,6 +350,14 @@ export async function recordUrlNarration({
457
350
  },
458
351
  },
459
352
  });
353
+ // recordVideo's webm timeline starts at t=0 when the page is created —
354
+ // which happens INSIDE launchChromiumFn. Capture the head-trim reference
355
+ // here, right after it returns, NOT before the launch: the webm has no
356
+ // frames for the browser-launch interval, so measuring from before launch
357
+ // would make headTrimMs overshoot by the launch duration and ffmpeg's
358
+ // `-ss headTrimMs` would clip the opening of the first plan section,
359
+ // shifting every section's visuals late against its narration audio.
360
+ const recordStartedAt = nowMs();
460
361
  const videoHandle = typeof browserSession.page.video === 'function'
461
362
  ? browserSession.page.video()
462
363
  : null;
@@ -480,15 +381,14 @@ export async function recordUrlNarration({
480
381
 
481
382
  const headTrimMs = Math.max(0, nowMs() - recordStartedAt);
482
383
 
483
- const eventsLog = await Promise.race([
484
- executePlanPhases(browserSession.page, executablePlan, { pageUnderstanding }),
485
- xvfbWatcher.promise,
486
- ]);
384
+ const eventsLog = await executePlanPhases(browserSession.page, executablePlan, {
385
+ pageUnderstanding,
386
+ viewportHeight: normalizedViewport.height,
387
+ viewportWidth: normalizedViewport.width,
388
+ });
487
389
 
488
390
  await browserSession.page.waitForTimeout(Math.max(0, Number(postPlanTailMs) || 0));
489
391
 
490
- xvfbWatcher.deactivate();
491
-
492
392
  // Flush the recording: video is written when the context closes.
493
393
  let webmPath = null;
494
394
  try {
@@ -573,15 +473,12 @@ export async function recordUrlNarration({
573
473
  events_path: resolvedEventsPath,
574
474
  events_log: eventsLog,
575
475
  duration_ms: lastTms > 0 ? lastTms : null,
576
- display,
577
476
  sections: sectionOutputs,
578
477
  };
579
478
  } catch (error) {
580
479
  primaryError = error;
581
480
  throw error;
582
481
  } finally {
583
- xvfbWatcher?.deactivate();
584
-
585
482
  if (browserSession) {
586
483
  try {
587
484
  await browserSession.browser.close();
@@ -590,18 +487,6 @@ export async function recordUrlNarration({
590
487
  }
591
488
  }
592
489
 
593
- if (xvfb) {
594
- try {
595
- await stopXvfb(xvfb, { timeoutMs: xvfbStopTimeoutMs });
596
- } catch (stopError) {
597
- cleanupErrors.push(`xvfb_stop_failed:${stopError.message}`);
598
- }
599
- }
600
-
601
- if (displayLease) {
602
- displayLease.release();
603
- }
604
-
605
490
  if (ownTempDir) {
606
491
  await rm(recVideoDir, { recursive: true, force: true }).catch(() => {});
607
492
  }