@lightcone-ai/daemon 0.22.1 → 0.23.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/mcp-servers/official/media-tools/index.js +42 -19
- package/mcp-servers/official/page-understanding/index.js +6 -7
- package/package.json +1 -1
- package/src/_vendor/video/cdp-touch.js +184 -0
- package/src/_vendor/video/humanized-scroll.js +251 -0
- package/src/_vendor/video/recorder/atoms.js +212 -0
- package/src/_vendor/video/recorder/index.js +68 -38
- package/src/_vendor/video/recorder/plan-executor.js +191 -394
- package/src/_vendor/video/understanding/schema.js +316 -0
- package/src/drivers/codex.js +11 -2
- package/src/tools/plan-video-segments.js +152 -22
- package/src/tools/record-url-narration.js +44 -137
- package/src/_vendor/video/recorder/phase-duration.js +0 -18
- package/src/_vendor/video/recorder/plan-estimator.js +0 -43
|
@@ -255,8 +255,16 @@ server.tool(
|
|
|
255
255
|
presentation: z.object({
|
|
256
256
|
style: z.enum(['static', 'scroll']).optional(),
|
|
257
257
|
}).optional().describe('Optional presentation hints (style only). duration/per_card_duration are computed.'),
|
|
258
|
-
|
|
259
|
-
|
|
258
|
+
operations: z.array(z.object({
|
|
259
|
+
atom: z.enum(['scroll_to', 'hold', 'cursor_focus']),
|
|
260
|
+
duration_ms: z.union([z.number(), z.literal('fill')]).describe('Atom duration. "fill" allowed only on the LAST hold to auto-fill remaining audio time.'),
|
|
261
|
+
y: z.number().optional(),
|
|
262
|
+
x: z.number().optional(),
|
|
263
|
+
curve: z.enum(['easeInOutQuad', 'linear', 'easeOutQuad']).optional(),
|
|
264
|
+
mode: z.enum(['auto', 'touch', 'programmatic']).optional(),
|
|
265
|
+
jitter_px: z.number().optional(),
|
|
266
|
+
})).optional().describe('For visual_kind=video URL recording sections: ordered atom sequence. Sum of duration_ms must equal audio_duration_ms (±200ms); use "fill" on the last hold to auto-balance.'),
|
|
267
|
+
})).describe('Segments to plan. audio_path is required for each. V5 fields (action, target_y, target_y_content_label, focus_region, transition_ms, dwell_ms, phase.beats[]) are rejected.'),
|
|
260
268
|
},
|
|
261
269
|
async ({ segments }) => {
|
|
262
270
|
const result = await runPlanVideoSegmentsTool({ segments });
|
|
@@ -342,28 +350,43 @@ server.tool(
|
|
|
342
350
|
// audio in production runs (Tasks #20/#25/#26), forcing re-records.
|
|
343
351
|
server.tool(
|
|
344
352
|
'record_url_narration',
|
|
345
|
-
'
|
|
353
|
+
'V6 record_url_narration. Drives Chromium on Xvfb + Playwright recordVideo to capture a silent mp4 per section, then ffmpeg-slices into output_paths. Each mp4 passes to compose_video_v2 as a video-kind segment.\n\n'
|
|
354
|
+
+ 'REQUIRES page_understanding (from analyze_page) — used for safe-region check (scroll_to.y / cursor_focus.y rejected if in unsafe_regions) and preheat alignment (same full-scroll-then-top pre-roll as analyze_page).\n\n'
|
|
355
|
+
+ 'plan.sections[*].operations[] is the visual beat — each operation is one of three atom calls:\n'
|
|
356
|
+
+ ' - scroll_to: { y, duration_ms, curve?, mode?, jitter_px? }\n'
|
|
357
|
+
+ ' - hold: { duration_ms } — duration_ms="fill" allowed on the LAST hold to auto-balance with audio_duration_ms\n'
|
|
358
|
+
+ ' - cursor_focus: { x, y, duration_ms }\n\n'
|
|
359
|
+
+ 'V5 fields are rejected: action / target_y / target_y_content_label / focus_region / transition_ms / dwell_ms (set by plan_video_segments only) / phase.beats[].\n\n'
|
|
360
|
+
+ 'Standard chain: analyze_page → synthesize_tts × N → plan_video_segments → record_url_narration + compose_video_v2.\n\n'
|
|
361
|
+
+ 'ALWAYS pass output_paths as an array with one mp4 path per plan.sections entry (single-section is a 1-element array). The tool records the URL ONCE continuously (one browser session, natural scroll flow across all sections), then ffmpeg-slices at section boundaries. One URL = one call.\n\n'
|
|
362
|
+
+ 'Runtime: Linux daemon with Xvfb + Chromium + ffmpeg. macOS / Windows fail at startup.',
|
|
346
363
|
{
|
|
347
|
-
url: z.string().describe('Page URL to record'),
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
364
|
+
url: z.string().describe('Page URL to record (must match the URL passed to analyze_page that produced page_understanding).'),
|
|
365
|
+
page_understanding: z.record(z.any()).describe('Output of analyze_page for this URL. Required. Provides full_height_px / viewport / preheat_strategy / unsafe_regions[] for safety validation, and blocks[] / narrative_arc as informational metadata (the recorder itself only needs the safety bits).'),
|
|
366
|
+
plan: z.object({
|
|
367
|
+
sections: z.array(z.object({
|
|
368
|
+
id: z.string().optional(),
|
|
369
|
+
text: z.string().optional(),
|
|
370
|
+
audio_path: z.string().optional(),
|
|
371
|
+
dwell_ms: z.number().optional(),
|
|
372
|
+
operations: z.array(z.object({
|
|
373
|
+
atom: z.enum(['scroll_to', 'hold', 'cursor_focus']),
|
|
374
|
+
duration_ms: z.number().describe('Atom duration in ms. (plan_video_segments may have expanded a "fill" value already.)'),
|
|
375
|
+
y: z.number().optional(),
|
|
376
|
+
x: z.number().optional(),
|
|
377
|
+
curve: z.enum(['easeInOutQuad', 'linear', 'easeOutQuad']).optional(),
|
|
378
|
+
mode: z.enum(['auto', 'touch', 'programmatic']).optional(),
|
|
379
|
+
jitter_px: z.number().optional(),
|
|
380
|
+
})).min(1).describe('Ordered atom sequence executed during this section.'),
|
|
381
|
+
})).min(1),
|
|
382
|
+
}).describe('plan.sections[] — each section has text/audio_path/dwell_ms (filled by plan_video_segments) and operations[].'),
|
|
383
|
+
output_paths: z.array(z.string()).min(1).describe('REQUIRED. Workspace-relative mp4 paths, one per plan.sections entry. The tool records ONCE continuously and slices at section boundaries (phase_start / phase_end events).'),
|
|
384
|
+
output_path: z.string().optional().describe('Debug-only path for the consolidated master recording. Auto-generated under tmp/ if omitted.'),
|
|
362
385
|
events_path: z.string().optional().describe('Workspace-relative events.json path. Default ${master}.events.json'),
|
|
363
386
|
viewport: z.object({
|
|
364
387
|
width: z.number().optional(),
|
|
365
388
|
height: z.number().optional(),
|
|
366
|
-
}).optional().describe('Default 1080x1920 (mobile portrait).
|
|
389
|
+
}).optional().describe('Default 1080x1920 (mobile portrait). Must match page_understanding.viewport.'),
|
|
367
390
|
fps: z.number().optional().describe('Default 30. Do not change unless needed.'),
|
|
368
391
|
settle_ms: z.number().optional().describe('Default 4000. Settle wait after navigation before recording starts.'),
|
|
369
392
|
},
|
|
@@ -5,14 +5,13 @@ import { z } from 'zod';
|
|
|
5
5
|
import { startThinProxy } from '../../_thin-proxy/forward.js';
|
|
6
6
|
|
|
7
7
|
const AnalyzePageOptionsSchema = z.object({
|
|
8
|
-
settleMs: z.number().int().min(500).max(30000).optional(),
|
|
9
|
-
timeoutMs: z.number().int().min(5000).max(240000).optional(),
|
|
10
8
|
viewportWidth: z.number().int().min(360).max(2160).optional(),
|
|
11
9
|
viewportHeight: z.number().int().min(480).max(3840).optional(),
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
10
|
+
settleMs: z.number().int().min(500).max(30000).optional(),
|
|
11
|
+
timeoutMs: z.number().int().min(5000).max(240000).optional(),
|
|
12
|
+
preheatStrategy: z.enum(['none', 'full_scroll_then_top']).optional(),
|
|
13
|
+
chunkHeight: z.number().int().min(1200).max(6000).optional(),
|
|
14
|
+
maxChunks: z.number().int().min(1).max(8).optional(),
|
|
16
15
|
fixture_mode: z.boolean().optional(),
|
|
17
16
|
}).passthrough();
|
|
18
17
|
|
|
@@ -22,7 +21,7 @@ await startThinProxy({
|
|
|
22
21
|
tools: [
|
|
23
22
|
{
|
|
24
23
|
name: 'analyze_page',
|
|
25
|
-
description: 'Analyze webpage
|
|
24
|
+
description: 'Analyze a webpage for short-video planning. Returns V6 page_understanding: blocks[] (id/y_top/y_bottom/visual_kind/text/summary/keywords/density/visual_weight/contains_image/reading_priority/pacing_hint/narration_hint) + unsafe_regions[] (y_top/y_bottom/reason) + narrative_arc (structure/suggested_flow) + url/page_type/primary_topic/viewport/preheat_strategy/full_height_px. Required for any URL-narration video.',
|
|
26
25
|
inputSchema: {
|
|
27
26
|
url: z.string().url(),
|
|
28
27
|
persona: z.string().optional(),
|
package/package.json
CHANGED
|
@@ -0,0 +1,184 @@
|
|
|
1
|
+
// CDP-level touch dispatch. Sends real Input.dispatchTouchEvent through a
|
|
2
|
+
// Chrome DevTools Protocol session, so the browser's gesture engine treats
|
|
3
|
+
// the input as a real finger swipe — getting rubber-band overscroll, fling
|
|
4
|
+
// inertia, and paint-synced scroll updates for free. The previous approach
|
|
5
|
+
// (page.evaluate synthesizing TouchEvents + calling scrollBy) bypassed the
|
|
6
|
+
// gesture pipeline entirely, so scrolls looked smooth in isolation but felt
|
|
7
|
+
// "robotic" because none of the physics-driven feedback ran.
|
|
8
|
+
//
|
|
9
|
+
// We pace touchMove dispatches at ~16ms from the Node side. The actual scroll
|
|
10
|
+
// rendering is then driven by Chrome's compositor thread at the display's
|
|
11
|
+
// native refresh rate — the Node-side cadence only determines the velocity
|
|
12
|
+
// vector that Chrome sees, not the visible frame rate. That means
|
|
13
|
+
// requestAnimationFrame-style smoothness comes free as long as touchMoves
|
|
14
|
+
// land at roughly 60Hz; we don't need a JS-side rAF loop anymore.
|
|
15
|
+
|
|
16
|
+
const DEFAULT_TOUCH_ID = 0;
|
|
17
|
+
const DEFAULT_FORCE = 0.5;
|
|
18
|
+
const DEFAULT_RADIUS = 2;
|
|
19
|
+
const DEFAULT_FRAME_INTERVAL_MS = 16;
|
|
20
|
+
|
|
21
|
+
function sleep(ms) {
|
|
22
|
+
return new Promise(resolve => setTimeout(resolve, Math.max(0, Math.round(ms))));
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
function clamp(value, min, max) {
|
|
26
|
+
return Math.min(max, Math.max(min, value));
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
function easeInOutQuad(t) {
|
|
30
|
+
if (t < 0.5) return 2 * t * t;
|
|
31
|
+
return 1 - (Math.pow(-2 * t + 2, 2) / 2);
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
function easeOutQuad(t) {
|
|
35
|
+
return 1 - ((1 - t) * (1 - t));
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
function linear(t) {
|
|
39
|
+
return t;
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
function resolveCurveFn(name) {
|
|
43
|
+
if (name === 'linear') return linear;
|
|
44
|
+
if (name === 'easeOutQuad') return easeOutQuad;
|
|
45
|
+
return easeInOutQuad;
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
function randomIntInclusive(min, max, randomFn = Math.random) {
|
|
49
|
+
return Math.floor(randomFn() * (max - min + 1)) + min;
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
function buildTouchPoint(x, y, { id = DEFAULT_TOUCH_ID, force = DEFAULT_FORCE } = {}) {
|
|
53
|
+
return {
|
|
54
|
+
x: Math.round(x),
|
|
55
|
+
y: Math.round(y),
|
|
56
|
+
radiusX: DEFAULT_RADIUS,
|
|
57
|
+
radiusY: DEFAULT_RADIUS,
|
|
58
|
+
rotationAngle: 0,
|
|
59
|
+
force,
|
|
60
|
+
id,
|
|
61
|
+
};
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
// Dispatches a single touch-press / drag / release through CDP. The browser
|
|
65
|
+
// sees this as a real finger gesture, so it produces native fling/momentum
|
|
66
|
+
// behaviour when the release velocity is non-zero (e.g. ease-out releases
|
|
67
|
+
// at speed) and rubber-band when the gesture overruns scroll bounds.
|
|
68
|
+
//
|
|
69
|
+
// Coordinates are viewport pixels (not page pixels). The caller is responsible
|
|
70
|
+
// for translating "I want to scroll the page down by N px" into finger
|
|
71
|
+
// trajectory (finger drags UP to scroll DOWN).
|
|
72
|
+
export async function dispatchSwipe(cdp, {
|
|
73
|
+
startX,
|
|
74
|
+
startY,
|
|
75
|
+
endX,
|
|
76
|
+
endY,
|
|
77
|
+
durationMs,
|
|
78
|
+
curve = 'easeInOutQuad',
|
|
79
|
+
pixelJitterMin = 0,
|
|
80
|
+
pixelJitterMax = 0,
|
|
81
|
+
timingJitterMin = 0,
|
|
82
|
+
timingJitterMax = 0,
|
|
83
|
+
frameIntervalMs = DEFAULT_FRAME_INTERVAL_MS,
|
|
84
|
+
touchId = DEFAULT_TOUCH_ID,
|
|
85
|
+
randomFn = Math.random,
|
|
86
|
+
} = {}) {
|
|
87
|
+
if (!cdp || typeof cdp.send !== 'function') {
|
|
88
|
+
throw new Error('dispatchSwipe requires a CDP session with .send()');
|
|
89
|
+
}
|
|
90
|
+
if (![startX, startY, endX, endY, durationMs].every(Number.isFinite)) {
|
|
91
|
+
throw new Error('dispatchSwipe requires finite startX, startY, endX, endY, durationMs');
|
|
92
|
+
}
|
|
93
|
+
const totalMs = Math.max(50, Math.round(durationMs));
|
|
94
|
+
const interval = Math.max(8, Math.round(frameIntervalMs));
|
|
95
|
+
const steps = Math.max(2, Math.round(totalMs / interval));
|
|
96
|
+
const curveFn = resolveCurveFn(curve);
|
|
97
|
+
|
|
98
|
+
// touchStart at (startX, startY)
|
|
99
|
+
await cdp.send('Input.dispatchTouchEvent', {
|
|
100
|
+
type: 'touchStart',
|
|
101
|
+
touchPoints: [buildTouchPoint(startX, startY, { id: touchId })],
|
|
102
|
+
});
|
|
103
|
+
|
|
104
|
+
// touchMove sequence: eased interpolation between start and end with optional
|
|
105
|
+
// small per-step jitter. Chrome computes release velocity from the last few
|
|
106
|
+
// moves, so the curve shape directly determines whether the gesture has
|
|
107
|
+
// post-release inertia (ease-in / linear release at speed → fling) or stops
|
|
108
|
+
// dead (ease-in-out → release at v≈0).
|
|
109
|
+
let lastDispatchAt = Date.now();
|
|
110
|
+
for (let i = 1; i < steps; i += 1) {
|
|
111
|
+
const t = i / steps;
|
|
112
|
+
const eased = curveFn(t);
|
|
113
|
+
const baseX = startX + (endX - startX) * eased;
|
|
114
|
+
const baseY = startY + (endY - startY) * eased;
|
|
115
|
+
const jitterMag = pixelJitterMin === 0 && pixelJitterMax === 0
|
|
116
|
+
? 0
|
|
117
|
+
: randomIntInclusive(pixelJitterMin, pixelJitterMax, randomFn);
|
|
118
|
+
const jitterDir = randomFn() < 0.5 ? -1 : 1;
|
|
119
|
+
const x = baseX;
|
|
120
|
+
const y = baseY + (jitterMag * jitterDir);
|
|
121
|
+
|
|
122
|
+
await cdp.send('Input.dispatchTouchEvent', {
|
|
123
|
+
type: 'touchMove',
|
|
124
|
+
touchPoints: [buildTouchPoint(x, y, { id: touchId })],
|
|
125
|
+
});
|
|
126
|
+
|
|
127
|
+
const baseDelay = totalMs / steps;
|
|
128
|
+
const timingJitter = timingJitterMin === 0 && timingJitterMax === 0
|
|
129
|
+
? 0
|
|
130
|
+
: randomIntInclusive(timingJitterMin, timingJitterMax, randomFn);
|
|
131
|
+
const timingDir = randomFn() < 0.5 ? -1 : 1;
|
|
132
|
+
const targetDelay = Math.max(4, Math.round(baseDelay + (timingJitter * timingDir)));
|
|
133
|
+
|
|
134
|
+
// Compensate for the time already spent in the CDP send. CDP round-trips
|
|
135
|
+
// are typically 1-3ms over the local pipe, but if a frame stalls we don't
|
|
136
|
+
// want to oversleep.
|
|
137
|
+
const elapsed = Date.now() - lastDispatchAt;
|
|
138
|
+
const wait = Math.max(0, targetDelay - elapsed);
|
|
139
|
+
if (wait > 0) await sleep(wait);
|
|
140
|
+
lastDispatchAt = Date.now();
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
// Final touchMove at exact end coordinates (no jitter) — Chrome reads this
|
|
144
|
+
// as the release point.
|
|
145
|
+
await cdp.send('Input.dispatchTouchEvent', {
|
|
146
|
+
type: 'touchMove',
|
|
147
|
+
touchPoints: [buildTouchPoint(endX, endY, { id: touchId })],
|
|
148
|
+
});
|
|
149
|
+
|
|
150
|
+
// touchEnd with empty touchPoints array (the touch is released).
|
|
151
|
+
await cdp.send('Input.dispatchTouchEvent', {
|
|
152
|
+
type: 'touchEnd',
|
|
153
|
+
touchPoints: [],
|
|
154
|
+
});
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
// Lazy CDP session cache keyed by page. Creating a CDP session per call would
|
|
158
|
+
// add ~1 round-trip of overhead per scroll segment. We attach the session to
|
|
159
|
+
// the page via WeakMap so callers don't have to thread it through.
|
|
160
|
+
const SESSION_CACHE = new WeakMap();
|
|
161
|
+
|
|
162
|
+
export async function getCdpSession(page) {
|
|
163
|
+
if (!page || typeof page.context !== 'function') {
|
|
164
|
+
throw new Error('getCdpSession requires a Playwright page');
|
|
165
|
+
}
|
|
166
|
+
const cached = SESSION_CACHE.get(page);
|
|
167
|
+
if (cached) return cached;
|
|
168
|
+
const session = await page.context().newCDPSession(page);
|
|
169
|
+
SESSION_CACHE.set(page, session);
|
|
170
|
+
return session;
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
// Test seam: lets unit tests inject a fake CDP session for a fake page object.
|
|
174
|
+
export function __setCdpSessionForTest(page, session) {
|
|
175
|
+
if (page) SESSION_CACHE.set(page, session);
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
export const __internals = {
|
|
179
|
+
easeInOutQuad,
|
|
180
|
+
easeOutQuad,
|
|
181
|
+
linear,
|
|
182
|
+
resolveCurveFn,
|
|
183
|
+
clamp,
|
|
184
|
+
};
|
|
@@ -0,0 +1,251 @@
|
|
|
1
|
+
import { dispatchSwipe, getCdpSession } from './cdp-touch.js';
|
|
2
|
+
|
|
3
|
+
const MIN_SEGMENT_DISTANCE_PX = 260;
|
|
4
|
+
const TARGET_SEGMENT_DURATION_MS = 650;
|
|
5
|
+
const MAX_SEGMENTS = 36;
|
|
6
|
+
|
|
7
|
+
const JITTER_PRESETS = {
|
|
8
|
+
low: {
|
|
9
|
+
pixelMin: 2,
|
|
10
|
+
pixelMax: 3,
|
|
11
|
+
timingMin: 8,
|
|
12
|
+
timingMax: 11,
|
|
13
|
+
},
|
|
14
|
+
medium: {
|
|
15
|
+
pixelMin: 2,
|
|
16
|
+
pixelMax: 4,
|
|
17
|
+
timingMin: 8,
|
|
18
|
+
timingMax: 15,
|
|
19
|
+
},
|
|
20
|
+
high: {
|
|
21
|
+
pixelMin: 3,
|
|
22
|
+
pixelMax: 4,
|
|
23
|
+
timingMin: 11,
|
|
24
|
+
timingMax: 15,
|
|
25
|
+
},
|
|
26
|
+
};
|
|
27
|
+
|
|
28
|
+
function clampNumber(value, min, max) {
|
|
29
|
+
return Math.min(max, Math.max(min, value));
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
function easeInOutQuad(t) {
|
|
33
|
+
if (t < 0.5) return 2 * t * t;
|
|
34
|
+
return 1 - (Math.pow(-2 * t + 2, 2) / 2);
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
function easeOutQuad(t) {
|
|
38
|
+
return 1 - ((1 - t) * (1 - t));
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
function linear(t) {
|
|
42
|
+
return t;
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
function curveValue(name, t) {
|
|
46
|
+
if (name === 'linear') return linear(t);
|
|
47
|
+
if (name === 'easeOutQuad') return easeOutQuad(t);
|
|
48
|
+
return easeInOutQuad(t);
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
function randomInt(min, max, randomFn) {
|
|
52
|
+
return Math.floor(randomFn() * (max - min + 1)) + min;
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
function resolveJitterPreset(jitterLevel) {
|
|
56
|
+
if (typeof jitterLevel === 'string' && JITTER_PRESETS[jitterLevel]) {
|
|
57
|
+
return JITTER_PRESETS[jitterLevel];
|
|
58
|
+
}
|
|
59
|
+
return JITTER_PRESETS.medium;
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
function resolveJitterConfig({ pixelJitterPx, jitterLevel }) {
|
|
63
|
+
// Direct numeric override wins — used by plan-executor which has historical
|
|
64
|
+
// per-call jitter budgets (e.g. 1px for cursor_focus, 3px for fast_scroll).
|
|
65
|
+
// Setting pixel_jitter_px=0 explicitly disables jitter entirely (clean swipe).
|
|
66
|
+
if (Number.isFinite(pixelJitterPx)) {
|
|
67
|
+
const px = Math.max(0, Math.floor(pixelJitterPx));
|
|
68
|
+
if (px === 0) {
|
|
69
|
+
return { pixelMin: 0, pixelMax: 0, timingMin: 0, timingMax: 0 };
|
|
70
|
+
}
|
|
71
|
+
return {
|
|
72
|
+
pixelMin: Math.max(1, Math.floor(px / 2)),
|
|
73
|
+
pixelMax: px,
|
|
74
|
+
timingMin: 6,
|
|
75
|
+
timingMax: 12,
|
|
76
|
+
};
|
|
77
|
+
}
|
|
78
|
+
return resolveJitterPreset(jitterLevel);
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
function resolveCurveName(name) {
|
|
82
|
+
if (name === 'linear') return 'linear';
|
|
83
|
+
if (name === 'easeOutQuad') return 'easeOutQuad';
|
|
84
|
+
return 'easeInOutQuad';
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
function assertFinite(name, value) {
|
|
88
|
+
if (!Number.isFinite(value)) {
|
|
89
|
+
throw new Error(`${name} must be a finite number`);
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
async function applyMouseVisibility(page, mouseVisible) {
|
|
94
|
+
if (typeof mouseVisible !== 'boolean') return;
|
|
95
|
+
await page.evaluate((visible) => {
|
|
96
|
+
const root = document.documentElement;
|
|
97
|
+
const body = document.body;
|
|
98
|
+
const cursorValue = visible ? 'auto' : 'none';
|
|
99
|
+
if (root) root.style.cursor = cursorValue;
|
|
100
|
+
if (body) body.style.cursor = cursorValue;
|
|
101
|
+
}, mouseVisible);
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
function buildSegments({ fromY, toY, durationMs }) {
|
|
105
|
+
const distance = Math.abs(toY - fromY);
|
|
106
|
+
if (distance < 1 || durationMs < 1) return [];
|
|
107
|
+
const countByDistance = Math.ceil(distance / MIN_SEGMENT_DISTANCE_PX);
|
|
108
|
+
const countByDuration = Math.ceil(durationMs / TARGET_SEGMENT_DURATION_MS);
|
|
109
|
+
const segmentCount = clampNumber(Math.max(countByDistance, countByDuration), 1, MAX_SEGMENTS);
|
|
110
|
+
|
|
111
|
+
const segments = [];
|
|
112
|
+
for (let i = 0; i < segmentCount; i += 1) {
|
|
113
|
+
const startT = i / segmentCount;
|
|
114
|
+
const endT = (i + 1) / segmentCount;
|
|
115
|
+
segments.push({
|
|
116
|
+
index: i,
|
|
117
|
+
startProgress: startT,
|
|
118
|
+
endProgress: endT,
|
|
119
|
+
fromY: Math.round(fromY + ((toY - fromY) * startT)),
|
|
120
|
+
toY: Math.round(fromY + ((toY - fromY) * endT)),
|
|
121
|
+
});
|
|
122
|
+
}
|
|
123
|
+
return segments;
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
// Resolve viewport once per call. Falls back to a typical mobile portrait
|
|
127
|
+
// viewport so unit tests can mock minimally.
|
|
128
|
+
function getViewportSize(page) {
|
|
129
|
+
if (typeof page.viewportSize === 'function') {
|
|
130
|
+
const size = page.viewportSize();
|
|
131
|
+
if (size && Number.isFinite(size.width) && Number.isFinite(size.height)) return size;
|
|
132
|
+
}
|
|
133
|
+
return { width: 1080, height: 1920 };
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
// Translate a scroll-by-deltaY request into a finger trajectory and dispatch
|
|
137
|
+
// it through CDP. 1:1 mapping: the finger physically traverses the same
|
|
138
|
+
// number of pixels the page should scroll. Drag UP to scroll DOWN, and vice
|
|
139
|
+
// versa. The browser's gesture engine handles the actual scrollTop update,
|
|
140
|
+
// inertia, and rubber-band — we just supply realistic input events.
|
|
141
|
+
async function runSwipeSegment(page, cdp, {
|
|
142
|
+
fromY,
|
|
143
|
+
toY,
|
|
144
|
+
durationMs,
|
|
145
|
+
curveName,
|
|
146
|
+
jitterPreset,
|
|
147
|
+
randomFn,
|
|
148
|
+
}) {
|
|
149
|
+
const viewport = getViewportSize(page);
|
|
150
|
+
const distance = toY - fromY;
|
|
151
|
+
if (Math.abs(distance) < 1) return;
|
|
152
|
+
const direction = distance >= 0 ? 1 : -1;
|
|
153
|
+
|
|
154
|
+
// Finger starts roughly 78% down the viewport (thumb-friendly zone) and
|
|
155
|
+
// travels in the opposite direction of the desired page scroll. Travel
|
|
156
|
+
// length equals scroll distance (1:1).
|
|
157
|
+
const fingerStartY = clampNumber(Math.round(viewport.height * 0.78), Math.round(viewport.height * 0.5), viewport.height - 100);
|
|
158
|
+
const requestedTravel = Math.abs(distance);
|
|
159
|
+
// Hard cap by viewport (a real finger can't go past the screen). Segments
|
|
160
|
+
// longer than this are split upstream by buildSegments; if one ever slips
|
|
161
|
+
// through, we clamp here and the caller's scroll will undershoot — that's
|
|
162
|
+
// fine, the next segment picks up the rest.
|
|
163
|
+
const maxTravel = fingerStartY - 80;
|
|
164
|
+
const fingerTravel = Math.min(requestedTravel, maxTravel);
|
|
165
|
+
const fingerEndY = clampNumber(fingerStartY - (direction * fingerTravel), 60, viewport.height - 60);
|
|
166
|
+
const horizontalNudge = randomInt(-18, 18, randomFn);
|
|
167
|
+
const horizontalDrift = randomInt(-26, 26, randomFn);
|
|
168
|
+
const fingerStartX = clampNumber(Math.round((viewport.width * 0.52) + horizontalNudge), 48, viewport.width - 48);
|
|
169
|
+
const fingerEndX = clampNumber(fingerStartX + horizontalDrift, 48, viewport.width - 48);
|
|
170
|
+
|
|
171
|
+
await dispatchSwipe(cdp, {
|
|
172
|
+
startX: fingerStartX,
|
|
173
|
+
startY: fingerStartY,
|
|
174
|
+
endX: fingerEndX,
|
|
175
|
+
endY: fingerEndY,
|
|
176
|
+
durationMs,
|
|
177
|
+
curve: curveName,
|
|
178
|
+
pixelJitterMin: jitterPreset.pixelMin,
|
|
179
|
+
pixelJitterMax: jitterPreset.pixelMax,
|
|
180
|
+
timingJitterMin: jitterPreset.timingMin,
|
|
181
|
+
timingJitterMax: jitterPreset.timingMax,
|
|
182
|
+
randomFn,
|
|
183
|
+
});
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
function buildConfig(options = {}) {
|
|
187
|
+
const randomFn = typeof options.random_fn === 'function' ? options.random_fn : Math.random;
|
|
188
|
+
const fromY = Number(options.from_y);
|
|
189
|
+
const toY = Number(options.to_y);
|
|
190
|
+
const durationMs = Number(options.duration_ms);
|
|
191
|
+
assertFinite('from_y', fromY);
|
|
192
|
+
assertFinite('to_y', toY);
|
|
193
|
+
assertFinite('duration_ms', durationMs);
|
|
194
|
+
return {
|
|
195
|
+
fromY: Math.max(0, fromY),
|
|
196
|
+
toY: Math.max(0, toY),
|
|
197
|
+
durationMs: Math.max(80, Math.round(durationMs)),
|
|
198
|
+
jitterPreset: resolveJitterConfig({
|
|
199
|
+
pixelJitterPx: options.pixel_jitter_px,
|
|
200
|
+
jitterLevel: options.jitter_level,
|
|
201
|
+
}),
|
|
202
|
+
mouseVisible: options.mouseVisible,
|
|
203
|
+
curveName: resolveCurveName(options.motion_curve),
|
|
204
|
+
randomFn,
|
|
205
|
+
};
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
export async function humanizedScroll(page, options = {}) {
|
|
209
|
+
if (!page || typeof page.evaluate !== 'function' || typeof page.waitForTimeout !== 'function') {
|
|
210
|
+
throw new Error('humanizedScroll requires a Playwright page-like object');
|
|
211
|
+
}
|
|
212
|
+
const config = buildConfig(options);
|
|
213
|
+
if (Math.abs(config.toY - config.fromY) < 1) return;
|
|
214
|
+
|
|
215
|
+
await applyMouseVisibility(page, config.mouseVisible);
|
|
216
|
+
const segments = buildSegments(config);
|
|
217
|
+
if (segments.length === 0) return;
|
|
218
|
+
|
|
219
|
+
// Resolve CDP session once per scroll call (cached on page via WeakMap).
|
|
220
|
+
// Tests can pre-seed the cache with a fake session via __setCdpSessionForTest.
|
|
221
|
+
const cdp = await getCdpSession(page);
|
|
222
|
+
|
|
223
|
+
let segmentsUntilPause = randomInt(3, 5, config.randomFn);
|
|
224
|
+
for (const segment of segments) {
|
|
225
|
+
const easedStart = curveValue(config.curveName, segment.startProgress);
|
|
226
|
+
const easedEnd = curveValue(config.curveName, segment.endProgress);
|
|
227
|
+
const segmentDuration = Math.max(90, Math.round(config.durationMs * (easedEnd - easedStart)));
|
|
228
|
+
|
|
229
|
+
await runSwipeSegment(page, cdp, {
|
|
230
|
+
fromY: segment.fromY,
|
|
231
|
+
toY: segment.toY,
|
|
232
|
+
durationMs: segmentDuration,
|
|
233
|
+
curveName: config.curveName,
|
|
234
|
+
jitterPreset: config.jitterPreset,
|
|
235
|
+
randomFn: config.randomFn,
|
|
236
|
+
});
|
|
237
|
+
|
|
238
|
+
if (segment.index >= segments.length - 1) continue;
|
|
239
|
+
segmentsUntilPause -= 1;
|
|
240
|
+
if (segmentsUntilPause <= 0) {
|
|
241
|
+
const pauseMs = randomInt(100, 300, config.randomFn);
|
|
242
|
+
await page.waitForTimeout(pauseMs);
|
|
243
|
+
segmentsUntilPause = randomInt(3, 5, config.randomFn);
|
|
244
|
+
}
|
|
245
|
+
}
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
// V5 convenience wrappers (scrollToDwell / linearScrollDuring / fastScroll)
|
|
249
|
+
// and the snake_case re-exports are removed in V6 — atoms.js calls
|
|
250
|
+
// humanizedScroll directly with explicit params, and there's no longer a
|
|
251
|
+
// separate "fast/slow/dwell" vocabulary at this layer.
|