@lightcone-ai/daemon 0.22.0 → 0.23.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -255,8 +255,16 @@ server.tool(
255
255
  presentation: z.object({
256
256
  style: z.enum(['static', 'scroll']).optional(),
257
257
  }).optional().describe('Optional presentation hints (style only). duration/per_card_duration are computed.'),
258
- dwell_ms: z.number().optional().describe('Optional override for record_url_narration phase duration. Default = audio_duration_ms.'),
259
- })).describe('Segments to plan. audio_path is required for each.'),
258
+ operations: z.array(z.object({
259
+ atom: z.enum(['scroll_to', 'hold', 'cursor_focus']),
260
+ duration_ms: z.union([z.number(), z.literal('fill')]).describe('Atom duration. "fill" allowed only on the LAST hold to auto-fill remaining audio time.'),
261
+ y: z.number().optional(),
262
+ x: z.number().optional(),
263
+ curve: z.enum(['easeInOutQuad', 'linear', 'easeOutQuad']).optional(),
264
+ mode: z.enum(['auto', 'touch', 'programmatic']).optional(),
265
+ jitter_px: z.number().optional(),
266
+ })).optional().describe('For visual_kind=video URL recording sections: ordered atom sequence. Sum of duration_ms must equal audio_duration_ms (±200ms); use "fill" on the last hold to auto-balance.'),
267
+ })).describe('Segments to plan. audio_path is required for each. V5 fields (action, target_y, target_y_content_label, focus_region, transition_ms, dwell_ms, phase.beats[]) are rejected.'),
260
268
  },
261
269
  async ({ segments }) => {
262
270
  const result = await runPlanVideoSegmentsTool({ segments });
@@ -267,9 +275,9 @@ server.tool(
267
275
 
268
276
  // ── compose_video_v2 (migrated from chat-bridge) ──────────────────────────
269
277
  // Tool-level enforcement of the standard chain: TTS-bearing segments require
270
- // plan_video_segments to have run earlier in this session. Without it manual
271
- // dwell/duration math has repeatedly produced misaligned subtitles, silent
272
- // tails, and re-records (Task #25/#26 trial).
278
+ // plan_video_segments to have run earlier in this session. Without it,
279
+ // hand-written dwell/duration math has repeatedly produced misaligned
280
+ // subtitles, silent tails, and full re-records in production runs.
273
281
  server.tool(
274
282
  'compose_video_v2',
275
283
  'Compose video(s) from a list of segments using ffmpeg. Each segment has a visual source (image / scroll / '
@@ -342,28 +350,43 @@ server.tool(
342
350
  // audio in production runs (Tasks #20/#25/#26), forcing re-records.
343
351
  server.tool(
344
352
  'record_url_narration',
345
- 'Record silent mp4s of a URL by driving Chromium on an Xvfb display and capturing it with Playwright recordVideo, then ffmpeg-transcoding. Each output mp4 can be passed to compose_video_v2 as a video-kind segment with an audio_path for narration.\n\nUse this as the canonical recording step for URL-narration videos. Falls back: if the page needs interactions outside the visual_action vocabulary (clicks, waits, OCR loops), use Monitor (Bash) with custom Playwright instead.\n\nMUST be preceded by plan_video_segments in the same session — feed plan_video_segments\'s `segments` array as `plan.sections` so dwell_ms aligns mechanically with TTS audio_duration_ms (hand-written dwell_ms has drifted and forced re-records in production).\n\nALWAYS pass output_paths as an array with one mp4 path per plan.sections entry (single-section recording is a 1-element array). The tool records the URL ONCE continuously (one browser session, one scrollTop, natural scroll flow through all sections), then slices the recording at section boundaries via ffmpeg. There is NO mode that records N sections in N separate calls — that pattern reopened the browser and re-scrolled-from-top for each segment, which looked visually disjointed. One URL = one call.\n\nRuntime requirements: this tool only works on a Linux daemon machine with Xvfb + Chromium + ffmpeg installed (ffmpeg is used to transcode the recording to mp4; no x11grab device support needed). macOS / Windows daemons will fail at startup.',
353
+ 'V6 record_url_narration. Drives Chromium on Xvfb + Playwright recordVideo to capture a silent mp4 per section, then ffmpeg-slices into output_paths. Each mp4 passes to compose_video_v2 as a video-kind segment.\n\n'
354
+ + 'REQUIRES page_understanding (from analyze_page) — used for safe-region check (scroll_to.y / cursor_focus.y rejected if in unsafe_regions) and preheat alignment (same full-scroll-then-top pre-roll as analyze_page).\n\n'
355
+ + 'plan.sections[*].operations[] is the visual beat — each operation is one of three atom calls:\n'
356
+ + ' - scroll_to: { y, duration_ms, curve?, mode?, jitter_px? }\n'
357
+ + ' - hold: { duration_ms } — duration_ms="fill" allowed on the LAST hold to auto-balance with audio_duration_ms\n'
358
+ + ' - cursor_focus: { x, y, duration_ms }\n\n'
359
+ + 'V5 fields are rejected: action / target_y / target_y_content_label / focus_region / transition_ms / dwell_ms (set by plan_video_segments only) / phase.beats[].\n\n'
360
+ + 'Standard chain: analyze_page → synthesize_tts × N → plan_video_segments → record_url_narration + compose_video_v2.\n\n'
361
+ + 'ALWAYS pass output_paths as an array with one mp4 path per plan.sections entry (single-section is a 1-element array). The tool records the URL ONCE continuously (one browser session, natural scroll flow across all sections), then ffmpeg-slices at section boundaries. One URL = one call.\n\n'
362
+ + 'Runtime: Linux daemon with Xvfb + Chromium + ffmpeg. macOS / Windows fail at startup.',
346
363
  {
347
- url: z.string().describe('Page URL to record'),
348
- plan: z.record(z.any()).describe(
349
- 'A video plan: an object with `phases` (or `sections`), each a "visual beat".\n\n'
350
- + 'ACTION VOCABULARY = atoms + macros. Pick by content type:\n'
351
- + ' - scroll_to_dwell (default for most sections): fast transition + dwell with subtle micro-motion at target_y. Use for titles, content cards, single focal areas.\n'
352
- + ' - narrated_pan: continuous linear scroll over the full section duration. Use ONLY when the speech actually narrates a long visible list (e.g. reading every job title in order). Was called linear_scroll_during; that name still works as an alias.\n'
353
- + ' - focal_arc: NO scroll; cursor moves between N visual focal points. Use for SHORT pages where consecutive sections share basically the same target_y (within ~150px) — scrolling would be invisible, the cursor carries the rhythm. Requires `points: [{x,y}, ...]` instead of target_y.\n'
354
- + ' - hold: pure pause, no motion. Rare.\n\n'
355
- + 'ATOMS (for power use via phase.beats[]): scroll_to / hold / micro_oscillate / cursor_focus. Any custom sequence the macros do not cover can be written as a beats array.\n\n'
356
- + 'Each section needs: action (or beats[]), target (`target_y` / `focus_region:[y1,y2]` / `points`), and `dwell_ms` (= section total duration; for narrated content this should match the segment\'s TTS audio_duration_ms).\n\n'
357
- + 'Standard chain: pass plan_video_segments\'s `segments` array directly as `plan.sections` each segment\'s `dwell_ms` is already its `audio_duration_ms`.\n\n'
358
- + 'For RECRUITMENT URLs (mp.weixin.qq.com / 校招 / 实习 / 岗位 content), each section MUST also declare `target_y_content_label` — a short Chinese label describing what content sits at that pixel y position on the page (e.g. "标题区" / "岗位信息卡片" / "公司介绍" / "届别说明"). Labels matching forbidden regions ("二维码" / "扫码" / "投递入口" / "投递方式" / "联系方式" / "微信号" / "QR" / "阅读原文" / "外链") will cause the tool to refuse the recording — recruitment content must NOT dwell on these areas (see fragments.md frag.short.recruitment_url_mode_policy). Pick a different target_y in the 标题/岗位 information area and rewrite that section.'
359
- ),
360
- output_paths: z.array(z.string()).min(1).describe('REQUIRED. Workspace-relative mp4 paths, one per plan.sections entry (single-section is a 1-element array). The tool records ONCE continuously and slices the result at section boundaries (derived from phase_start / phase_end events) — each section produces exactly one of these mp4s.'),
361
- output_path: z.string().optional().describe('Optional debug-only path for the CONSOLIDATED master recording (the full continuous webm transcoded). Auto-generated under tmp/ if omitted. Agents normally do not need to set this — they consume output_paths.'),
364
+ url: z.string().describe('Page URL to record (must match the URL passed to analyze_page that produced page_understanding).'),
365
+ page_understanding: z.record(z.any()).describe('Output of analyze_page for this URL. Required. Provides full_height_px / viewport / preheat_strategy / unsafe_regions[] for safety validation, and blocks[] / narrative_arc as informational metadata (the recorder itself only needs the safety bits).'),
366
+ plan: z.object({
367
+ sections: z.array(z.object({
368
+ id: z.string().optional(),
369
+ text: z.string().optional(),
370
+ audio_path: z.string().optional(),
371
+ dwell_ms: z.number().optional(),
372
+ operations: z.array(z.object({
373
+ atom: z.enum(['scroll_to', 'hold', 'cursor_focus']),
374
+ duration_ms: z.number().describe('Atom duration in ms. (plan_video_segments may have expanded a "fill" value already.)'),
375
+ y: z.number().optional(),
376
+ x: z.number().optional(),
377
+ curve: z.enum(['easeInOutQuad', 'linear', 'easeOutQuad']).optional(),
378
+ mode: z.enum(['auto', 'touch', 'programmatic']).optional(),
379
+ jitter_px: z.number().optional(),
380
+ })).min(1).describe('Ordered atom sequence executed during this section.'),
381
+ })).min(1),
382
+ }).describe('plan.sections[] — each section has text/audio_path/dwell_ms (filled by plan_video_segments) and operations[].'),
383
+ output_paths: z.array(z.string()).min(1).describe('REQUIRED. Workspace-relative mp4 paths, one per plan.sections entry. The tool records ONCE continuously and slices at section boundaries (phase_start / phase_end events).'),
384
+ output_path: z.string().optional().describe('Debug-only path for the consolidated master recording. Auto-generated under tmp/ if omitted.'),
362
385
  events_path: z.string().optional().describe('Workspace-relative events.json path. Default ${master}.events.json'),
363
386
  viewport: z.object({
364
387
  width: z.number().optional(),
365
388
  height: z.number().optional(),
366
- }).optional().describe('Default 1080x1920 (mobile portrait). Override only if the plan requires a different shape.'),
389
+ }).optional().describe('Default 1080x1920 (mobile portrait). Must match page_understanding.viewport.'),
367
390
  fps: z.number().optional().describe('Default 30. Do not change unless needed.'),
368
391
  settle_ms: z.number().optional().describe('Default 4000. Settle wait after navigation before recording starts.'),
369
392
  },
@@ -5,14 +5,13 @@ import { z } from 'zod';
5
5
  import { startThinProxy } from '../../_thin-proxy/forward.js';
6
6
 
7
7
  const AnalyzePageOptionsSchema = z.object({
8
- settleMs: z.number().int().min(500).max(30000).optional(),
9
- timeoutMs: z.number().int().min(5000).max(240000).optional(),
10
8
  viewportWidth: z.number().int().min(360).max(2160).optional(),
11
9
  viewportHeight: z.number().int().min(480).max(3840).optional(),
12
- minTextBins: z.number().int().min(3).max(40).optional(),
13
- minBinChars: z.number().int().min(12).max(200).optional(),
14
- allowVisionFallback: z.boolean().optional(),
15
- useLlm: z.boolean().optional(),
10
+ settleMs: z.number().int().min(500).max(30000).optional(),
11
+ timeoutMs: z.number().int().min(5000).max(240000).optional(),
12
+ preheatStrategy: z.enum(['none', 'full_scroll_then_top']).optional(),
13
+ chunkHeight: z.number().int().min(1200).max(6000).optional(),
14
+ maxChunks: z.number().int().min(1).max(8).optional(),
16
15
  fixture_mode: z.boolean().optional(),
17
16
  }).passthrough();
18
17
 
@@ -22,7 +21,7 @@ await startThinProxy({
22
21
  tools: [
23
22
  {
24
23
  name: 'analyze_page',
25
- description: 'Analyze webpage structure for short-video narration planning. Returns page_understanding schema.',
24
+ description: 'Analyze a webpage for short-video planning. Returns V6 page_understanding: blocks[] (id/y_top/y_bottom/visual_kind/text/summary/keywords/density/visual_weight/contains_image/reading_priority/pacing_hint/narration_hint) + unsafe_regions[] (y_top/y_bottom/reason) + narrative_arc (structure/suggested_flow) + url/page_type/primary_topic/viewport/preheat_strategy/full_height_px. Required for any URL-narration video.',
26
25
  inputSchema: {
27
26
  url: z.string().url(),
28
27
  persona: z.string().optional(),
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@lightcone-ai/daemon",
3
- "version": "0.22.0",
3
+ "version": "0.23.0",
4
4
  "type": "module",
5
5
  "main": "src/index.js",
6
6
  "bin": {
@@ -0,0 +1,184 @@
1
+ // CDP-level touch dispatch. Sends real Input.dispatchTouchEvent through a
2
+ // Chrome DevTools Protocol session, so the browser's gesture engine treats
3
+ // the input as a real finger swipe — getting rubber-band overscroll, fling
4
+ // inertia, and paint-synced scroll updates for free. The previous approach
5
+ // (page.evaluate synthesizing TouchEvents + calling scrollBy) bypassed the
6
+ // gesture pipeline entirely, so scrolls looked smooth in isolation but felt
7
+ // "robotic" because none of the physics-driven feedback ran.
8
+ //
9
+ // We pace touchMove dispatches at ~16ms from the Node side. The actual scroll
10
+ // rendering is then driven by Chrome's compositor thread at the display's
11
+ // native refresh rate — the Node-side cadence only determines the velocity
12
+ // vector that Chrome sees, not the visible frame rate. That means
13
+ // requestAnimationFrame-style smoothness comes free as long as touchMoves
14
+ // land at roughly 60Hz; we don't need a JS-side rAF loop anymore.
15
+
16
+ const DEFAULT_TOUCH_ID = 0;
17
+ const DEFAULT_FORCE = 0.5;
18
+ const DEFAULT_RADIUS = 2;
19
+ const DEFAULT_FRAME_INTERVAL_MS = 16;
20
+
21
+ function sleep(ms) {
22
+ return new Promise(resolve => setTimeout(resolve, Math.max(0, Math.round(ms))));
23
+ }
24
+
25
+ function clamp(value, min, max) {
26
+ return Math.min(max, Math.max(min, value));
27
+ }
28
+
29
+ function easeInOutQuad(t) {
30
+ if (t < 0.5) return 2 * t * t;
31
+ return 1 - (Math.pow(-2 * t + 2, 2) / 2);
32
+ }
33
+
34
+ function easeOutQuad(t) {
35
+ return 1 - ((1 - t) * (1 - t));
36
+ }
37
+
38
+ function linear(t) {
39
+ return t;
40
+ }
41
+
42
+ function resolveCurveFn(name) {
43
+ if (name === 'linear') return linear;
44
+ if (name === 'easeOutQuad') return easeOutQuad;
45
+ return easeInOutQuad;
46
+ }
47
+
48
+ function randomIntInclusive(min, max, randomFn = Math.random) {
49
+ return Math.floor(randomFn() * (max - min + 1)) + min;
50
+ }
51
+
52
+ function buildTouchPoint(x, y, { id = DEFAULT_TOUCH_ID, force = DEFAULT_FORCE } = {}) {
53
+ return {
54
+ x: Math.round(x),
55
+ y: Math.round(y),
56
+ radiusX: DEFAULT_RADIUS,
57
+ radiusY: DEFAULT_RADIUS,
58
+ rotationAngle: 0,
59
+ force,
60
+ id,
61
+ };
62
+ }
63
+
64
+ // Dispatches a single touch-press / drag / release through CDP. The browser
65
+ // sees this as a real finger gesture, so it produces native fling/momentum
66
+ // behaviour when the release velocity is non-zero (e.g. ease-out releases
67
+ // at speed) and rubber-band when the gesture overruns scroll bounds.
68
+ //
69
+ // Coordinates are viewport pixels (not page pixels). The caller is responsible
70
+ // for translating "I want to scroll the page down by N px" into finger
71
+ // trajectory (finger drags UP to scroll DOWN).
72
+ export async function dispatchSwipe(cdp, {
73
+ startX,
74
+ startY,
75
+ endX,
76
+ endY,
77
+ durationMs,
78
+ curve = 'easeInOutQuad',
79
+ pixelJitterMin = 0,
80
+ pixelJitterMax = 0,
81
+ timingJitterMin = 0,
82
+ timingJitterMax = 0,
83
+ frameIntervalMs = DEFAULT_FRAME_INTERVAL_MS,
84
+ touchId = DEFAULT_TOUCH_ID,
85
+ randomFn = Math.random,
86
+ } = {}) {
87
+ if (!cdp || typeof cdp.send !== 'function') {
88
+ throw new Error('dispatchSwipe requires a CDP session with .send()');
89
+ }
90
+ if (![startX, startY, endX, endY, durationMs].every(Number.isFinite)) {
91
+ throw new Error('dispatchSwipe requires finite startX, startY, endX, endY, durationMs');
92
+ }
93
+ const totalMs = Math.max(50, Math.round(durationMs));
94
+ const interval = Math.max(8, Math.round(frameIntervalMs));
95
+ const steps = Math.max(2, Math.round(totalMs / interval));
96
+ const curveFn = resolveCurveFn(curve);
97
+
98
+ // touchStart at (startX, startY)
99
+ await cdp.send('Input.dispatchTouchEvent', {
100
+ type: 'touchStart',
101
+ touchPoints: [buildTouchPoint(startX, startY, { id: touchId })],
102
+ });
103
+
104
+ // touchMove sequence: eased interpolation between start and end with optional
105
+ // small per-step jitter. Chrome computes release velocity from the last few
106
+ // moves, so the curve shape directly determines whether the gesture has
107
+ // post-release inertia (ease-in / linear release at speed → fling) or stops
108
+ // dead (ease-in-out → release at v≈0).
109
+ let lastDispatchAt = Date.now();
110
+ for (let i = 1; i < steps; i += 1) {
111
+ const t = i / steps;
112
+ const eased = curveFn(t);
113
+ const baseX = startX + (endX - startX) * eased;
114
+ const baseY = startY + (endY - startY) * eased;
115
+ const jitterMag = pixelJitterMin === 0 && pixelJitterMax === 0
116
+ ? 0
117
+ : randomIntInclusive(pixelJitterMin, pixelJitterMax, randomFn);
118
+ const jitterDir = randomFn() < 0.5 ? -1 : 1;
119
+ const x = baseX;
120
+ const y = baseY + (jitterMag * jitterDir);
121
+
122
+ await cdp.send('Input.dispatchTouchEvent', {
123
+ type: 'touchMove',
124
+ touchPoints: [buildTouchPoint(x, y, { id: touchId })],
125
+ });
126
+
127
+ const baseDelay = totalMs / steps;
128
+ const timingJitter = timingJitterMin === 0 && timingJitterMax === 0
129
+ ? 0
130
+ : randomIntInclusive(timingJitterMin, timingJitterMax, randomFn);
131
+ const timingDir = randomFn() < 0.5 ? -1 : 1;
132
+ const targetDelay = Math.max(4, Math.round(baseDelay + (timingJitter * timingDir)));
133
+
134
+ // Compensate for the time already spent in the CDP send. CDP round-trips
135
+ // are typically 1-3ms over the local pipe, but if a frame stalls we don't
136
+ // want to oversleep.
137
+ const elapsed = Date.now() - lastDispatchAt;
138
+ const wait = Math.max(0, targetDelay - elapsed);
139
+ if (wait > 0) await sleep(wait);
140
+ lastDispatchAt = Date.now();
141
+ }
142
+
143
+ // Final touchMove at exact end coordinates (no jitter) — Chrome reads this
144
+ // as the release point.
145
+ await cdp.send('Input.dispatchTouchEvent', {
146
+ type: 'touchMove',
147
+ touchPoints: [buildTouchPoint(endX, endY, { id: touchId })],
148
+ });
149
+
150
+ // touchEnd with empty touchPoints array (the touch is released).
151
+ await cdp.send('Input.dispatchTouchEvent', {
152
+ type: 'touchEnd',
153
+ touchPoints: [],
154
+ });
155
+ }
156
+
157
+ // Lazy CDP session cache keyed by page. Creating a CDP session per call would
158
+ // add ~1 round-trip of overhead per scroll segment. We attach the session to
159
+ // the page via WeakMap so callers don't have to thread it through.
160
+ const SESSION_CACHE = new WeakMap();
161
+
162
+ export async function getCdpSession(page) {
163
+ if (!page || typeof page.context !== 'function') {
164
+ throw new Error('getCdpSession requires a Playwright page');
165
+ }
166
+ const cached = SESSION_CACHE.get(page);
167
+ if (cached) return cached;
168
+ const session = await page.context().newCDPSession(page);
169
+ SESSION_CACHE.set(page, session);
170
+ return session;
171
+ }
172
+
173
+ // Test seam: lets unit tests inject a fake CDP session for a fake page object.
174
+ export function __setCdpSessionForTest(page, session) {
175
+ if (page) SESSION_CACHE.set(page, session);
176
+ }
177
+
178
+ export const __internals = {
179
+ easeInOutQuad,
180
+ easeOutQuad,
181
+ linear,
182
+ resolveCurveFn,
183
+ clamp,
184
+ };
@@ -0,0 +1,251 @@
1
+ import { dispatchSwipe, getCdpSession } from './cdp-touch.js';
2
+
3
+ const MIN_SEGMENT_DISTANCE_PX = 260;
4
+ const TARGET_SEGMENT_DURATION_MS = 650;
5
+ const MAX_SEGMENTS = 36;
6
+
7
+ const JITTER_PRESETS = {
8
+ low: {
9
+ pixelMin: 2,
10
+ pixelMax: 3,
11
+ timingMin: 8,
12
+ timingMax: 11,
13
+ },
14
+ medium: {
15
+ pixelMin: 2,
16
+ pixelMax: 4,
17
+ timingMin: 8,
18
+ timingMax: 15,
19
+ },
20
+ high: {
21
+ pixelMin: 3,
22
+ pixelMax: 4,
23
+ timingMin: 11,
24
+ timingMax: 15,
25
+ },
26
+ };
27
+
28
+ function clampNumber(value, min, max) {
29
+ return Math.min(max, Math.max(min, value));
30
+ }
31
+
32
+ function easeInOutQuad(t) {
33
+ if (t < 0.5) return 2 * t * t;
34
+ return 1 - (Math.pow(-2 * t + 2, 2) / 2);
35
+ }
36
+
37
+ function easeOutQuad(t) {
38
+ return 1 - ((1 - t) * (1 - t));
39
+ }
40
+
41
+ function linear(t) {
42
+ return t;
43
+ }
44
+
45
+ function curveValue(name, t) {
46
+ if (name === 'linear') return linear(t);
47
+ if (name === 'easeOutQuad') return easeOutQuad(t);
48
+ return easeInOutQuad(t);
49
+ }
50
+
51
+ function randomInt(min, max, randomFn) {
52
+ return Math.floor(randomFn() * (max - min + 1)) + min;
53
+ }
54
+
55
+ function resolveJitterPreset(jitterLevel) {
56
+ if (typeof jitterLevel === 'string' && JITTER_PRESETS[jitterLevel]) {
57
+ return JITTER_PRESETS[jitterLevel];
58
+ }
59
+ return JITTER_PRESETS.medium;
60
+ }
61
+
62
+ function resolveJitterConfig({ pixelJitterPx, jitterLevel }) {
63
+ // Direct numeric override wins — used by plan-executor which has historical
64
+ // per-call jitter budgets (e.g. 1px for cursor_focus, 3px for fast_scroll).
65
+ // Setting pixel_jitter_px=0 explicitly disables jitter entirely (clean swipe).
66
+ if (Number.isFinite(pixelJitterPx)) {
67
+ const px = Math.max(0, Math.floor(pixelJitterPx));
68
+ if (px === 0) {
69
+ return { pixelMin: 0, pixelMax: 0, timingMin: 0, timingMax: 0 };
70
+ }
71
+ return {
72
+ pixelMin: Math.max(1, Math.floor(px / 2)),
73
+ pixelMax: px,
74
+ timingMin: 6,
75
+ timingMax: 12,
76
+ };
77
+ }
78
+ return resolveJitterPreset(jitterLevel);
79
+ }
80
+
81
+ function resolveCurveName(name) {
82
+ if (name === 'linear') return 'linear';
83
+ if (name === 'easeOutQuad') return 'easeOutQuad';
84
+ return 'easeInOutQuad';
85
+ }
86
+
87
+ function assertFinite(name, value) {
88
+ if (!Number.isFinite(value)) {
89
+ throw new Error(`${name} must be a finite number`);
90
+ }
91
+ }
92
+
93
+ async function applyMouseVisibility(page, mouseVisible) {
94
+ if (typeof mouseVisible !== 'boolean') return;
95
+ await page.evaluate((visible) => {
96
+ const root = document.documentElement;
97
+ const body = document.body;
98
+ const cursorValue = visible ? 'auto' : 'none';
99
+ if (root) root.style.cursor = cursorValue;
100
+ if (body) body.style.cursor = cursorValue;
101
+ }, mouseVisible);
102
+ }
103
+
104
+ function buildSegments({ fromY, toY, durationMs }) {
105
+ const distance = Math.abs(toY - fromY);
106
+ if (distance < 1 || durationMs < 1) return [];
107
+ const countByDistance = Math.ceil(distance / MIN_SEGMENT_DISTANCE_PX);
108
+ const countByDuration = Math.ceil(durationMs / TARGET_SEGMENT_DURATION_MS);
109
+ const segmentCount = clampNumber(Math.max(countByDistance, countByDuration), 1, MAX_SEGMENTS);
110
+
111
+ const segments = [];
112
+ for (let i = 0; i < segmentCount; i += 1) {
113
+ const startT = i / segmentCount;
114
+ const endT = (i + 1) / segmentCount;
115
+ segments.push({
116
+ index: i,
117
+ startProgress: startT,
118
+ endProgress: endT,
119
+ fromY: Math.round(fromY + ((toY - fromY) * startT)),
120
+ toY: Math.round(fromY + ((toY - fromY) * endT)),
121
+ });
122
+ }
123
+ return segments;
124
+ }
125
+
126
+ // Resolve viewport once per call. Falls back to a typical mobile portrait
127
+ // viewport so unit tests can mock minimally.
128
+ function getViewportSize(page) {
129
+ if (typeof page.viewportSize === 'function') {
130
+ const size = page.viewportSize();
131
+ if (size && Number.isFinite(size.width) && Number.isFinite(size.height)) return size;
132
+ }
133
+ return { width: 1080, height: 1920 };
134
+ }
135
+
136
+ // Translate a scroll-by-deltaY request into a finger trajectory and dispatch
137
+ // it through CDP. 1:1 mapping: the finger physically traverses the same
138
+ // number of pixels the page should scroll. Drag UP to scroll DOWN, and vice
139
+ // versa. The browser's gesture engine handles the actual scrollTop update,
140
+ // inertia, and rubber-band — we just supply realistic input events.
141
+ async function runSwipeSegment(page, cdp, {
142
+ fromY,
143
+ toY,
144
+ durationMs,
145
+ curveName,
146
+ jitterPreset,
147
+ randomFn,
148
+ }) {
149
+ const viewport = getViewportSize(page);
150
+ const distance = toY - fromY;
151
+ if (Math.abs(distance) < 1) return;
152
+ const direction = distance >= 0 ? 1 : -1;
153
+
154
+ // Finger starts roughly 78% down the viewport (thumb-friendly zone) and
155
+ // travels in the opposite direction of the desired page scroll. Travel
156
+ // length equals scroll distance (1:1).
157
+ const fingerStartY = clampNumber(Math.round(viewport.height * 0.78), Math.round(viewport.height * 0.5), viewport.height - 100);
158
+ const requestedTravel = Math.abs(distance);
159
+ // Hard cap by viewport (a real finger can't go past the screen). Segments
160
+ // longer than this are split upstream by buildSegments; if one ever slips
161
+ // through, we clamp here and the caller's scroll will undershoot — that's
162
+ // fine, the next segment picks up the rest.
163
+ const maxTravel = fingerStartY - 80;
164
+ const fingerTravel = Math.min(requestedTravel, maxTravel);
165
+ const fingerEndY = clampNumber(fingerStartY - (direction * fingerTravel), 60, viewport.height - 60);
166
+ const horizontalNudge = randomInt(-18, 18, randomFn);
167
+ const horizontalDrift = randomInt(-26, 26, randomFn);
168
+ const fingerStartX = clampNumber(Math.round((viewport.width * 0.52) + horizontalNudge), 48, viewport.width - 48);
169
+ const fingerEndX = clampNumber(fingerStartX + horizontalDrift, 48, viewport.width - 48);
170
+
171
+ await dispatchSwipe(cdp, {
172
+ startX: fingerStartX,
173
+ startY: fingerStartY,
174
+ endX: fingerEndX,
175
+ endY: fingerEndY,
176
+ durationMs,
177
+ curve: curveName,
178
+ pixelJitterMin: jitterPreset.pixelMin,
179
+ pixelJitterMax: jitterPreset.pixelMax,
180
+ timingJitterMin: jitterPreset.timingMin,
181
+ timingJitterMax: jitterPreset.timingMax,
182
+ randomFn,
183
+ });
184
+ }
185
+
186
+ function buildConfig(options = {}) {
187
+ const randomFn = typeof options.random_fn === 'function' ? options.random_fn : Math.random;
188
+ const fromY = Number(options.from_y);
189
+ const toY = Number(options.to_y);
190
+ const durationMs = Number(options.duration_ms);
191
+ assertFinite('from_y', fromY);
192
+ assertFinite('to_y', toY);
193
+ assertFinite('duration_ms', durationMs);
194
+ return {
195
+ fromY: Math.max(0, fromY),
196
+ toY: Math.max(0, toY),
197
+ durationMs: Math.max(80, Math.round(durationMs)),
198
+ jitterPreset: resolveJitterConfig({
199
+ pixelJitterPx: options.pixel_jitter_px,
200
+ jitterLevel: options.jitter_level,
201
+ }),
202
+ mouseVisible: options.mouseVisible,
203
+ curveName: resolveCurveName(options.motion_curve),
204
+ randomFn,
205
+ };
206
+ }
207
+
208
+ export async function humanizedScroll(page, options = {}) {
209
+ if (!page || typeof page.evaluate !== 'function' || typeof page.waitForTimeout !== 'function') {
210
+ throw new Error('humanizedScroll requires a Playwright page-like object');
211
+ }
212
+ const config = buildConfig(options);
213
+ if (Math.abs(config.toY - config.fromY) < 1) return;
214
+
215
+ await applyMouseVisibility(page, config.mouseVisible);
216
+ const segments = buildSegments(config);
217
+ if (segments.length === 0) return;
218
+
219
+ // Resolve CDP session once per scroll call (cached on page via WeakMap).
220
+ // Tests can pre-seed the cache with a fake session via __setCdpSessionForTest.
221
+ const cdp = await getCdpSession(page);
222
+
223
+ let segmentsUntilPause = randomInt(3, 5, config.randomFn);
224
+ for (const segment of segments) {
225
+ const easedStart = curveValue(config.curveName, segment.startProgress);
226
+ const easedEnd = curveValue(config.curveName, segment.endProgress);
227
+ const segmentDuration = Math.max(90, Math.round(config.durationMs * (easedEnd - easedStart)));
228
+
229
+ await runSwipeSegment(page, cdp, {
230
+ fromY: segment.fromY,
231
+ toY: segment.toY,
232
+ durationMs: segmentDuration,
233
+ curveName: config.curveName,
234
+ jitterPreset: config.jitterPreset,
235
+ randomFn: config.randomFn,
236
+ });
237
+
238
+ if (segment.index >= segments.length - 1) continue;
239
+ segmentsUntilPause -= 1;
240
+ if (segmentsUntilPause <= 0) {
241
+ const pauseMs = randomInt(100, 300, config.randomFn);
242
+ await page.waitForTimeout(pauseMs);
243
+ segmentsUntilPause = randomInt(3, 5, config.randomFn);
244
+ }
245
+ }
246
+ }
247
+
248
+ // V5 convenience wrappers (scrollToDwell / linearScrollDuring / fastScroll)
249
+ // and the snake_case re-exports are removed in V6 — atoms.js calls
250
+ // humanizedScroll directly with explicit params, and there's no longer a
251
+ // separate "fast/slow/dwell" vocabulary at this layer.