@lightcone-ai/daemon 0.23.4 → 0.23.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -257,13 +257,42 @@ server.tool(
|
|
|
257
257
|
}).optional().describe('Optional presentation hints (style only). duration/per_card_duration are computed.'),
|
|
258
258
|
operations: z.array(z.object({
|
|
259
259
|
atom: z.enum(['scroll_to', 'hold', 'cursor_focus']),
|
|
260
|
-
duration_ms: z.union([z.number(), z.literal('fill')]).describe('Atom duration. "fill" allowed only on the LAST hold to auto-fill remaining audio time.'),
|
|
260
|
+
duration_ms: z.union([z.number(), z.literal('fill')]).describe('Atom duration in ms. "fill" allowed only on the LAST hold to auto-fill remaining audio time.'),
|
|
261
261
|
y: z.number().optional(),
|
|
262
262
|
x: z.number().optional(),
|
|
263
263
|
curve: z.enum(['easeInOutQuad', 'linear', 'easeOutQuad']).optional(),
|
|
264
264
|
mode: z.enum(['auto', 'touch', 'programmatic']).optional(),
|
|
265
265
|
jitter_px: z.number().optional(),
|
|
266
|
-
})).optional().describe(
|
|
266
|
+
})).optional().describe(
|
|
267
|
+
'For visual_kind=video URL recording sections: ordered atom sequence. Sum of duration_ms '
|
|
268
|
+
+ 'must equal audio_duration_ms (±200ms); use "fill" on the last hold to auto-balance.\n\n'
|
|
269
|
+
+ 'TRANSITION + EXPLAIN MODE (REQUIRED — enforced by lint): the recording should feel '
|
|
270
|
+
+ 'like a person opening a page and walking the viewer through it block by block. '
|
|
271
|
+
+ 'Concretely:\n'
|
|
272
|
+
+ ' • scroll_to is a TRANSITION between content blocks — short (~500-800ms is fine), '
|
|
273
|
+
+ 'smooth (atomScrollTo programmatic mode handles smoothness automatically; speed does NOT need to be slow).\n'
|
|
274
|
+
+ ' • hold is where the NARRATION happens — long holds (2-5s) are the norm, not the exception. '
|
|
275
|
+
+ 'This is when the agent says the actual sentences about this block.\n'
|
|
276
|
+
+ " • Every non-opening segment MUST start with a scroll_to (the transition into this segment's "
|
|
277
|
+
+ 'content block). Segments starting with hold are REJECTED — they cause jump cuts.\n'
|
|
278
|
+
+ ' • The shape is: "scroll to new block → pause and explain → scroll to next block → pause and explain".\n\n'
|
|
279
|
+
+ 'GOOD example for a 5s segment narrating "宁波银行金融科技部 FinTech 暑期专项":\n'
|
|
280
|
+
+ ' [\n'
|
|
281
|
+
+ ' { atom: "scroll_to", y: 280, duration_ms: 700 }, // 0.7s smooth transition to title\n'
|
|
282
|
+
+ ' { atom: "hold", duration_ms: "fill" }, // ~4.3s: agent narrates this block\n'
|
|
283
|
+
+ ' ]\n\n'
|
|
284
|
+
+ 'GOOD example for a 9s segment with two content blocks inside:\n'
|
|
285
|
+
+ ' [\n'
|
|
286
|
+
+ ' { atom: "scroll_to", y: 980, duration_ms: 700 }, // transition to first block\n'
|
|
287
|
+
+ ' { atom: "hold", duration_ms: 4000 }, // narrate this block (~"金融产品应用开发岗 …")\n'
|
|
288
|
+
+ ' { atom: "scroll_to", y: 1450, duration_ms: 600 }, // short transition to next block\n'
|
|
289
|
+
+ ' { atom: "hold", duration_ms: "fill" }, // narrate next block (~3.7s)\n'
|
|
290
|
+
+ ' ]\n\n'
|
|
291
|
+
+ 'BAD example (REJECTED by transition_required):\n'
|
|
292
|
+
+ ' [\n'
|
|
293
|
+
+ ' { atom: "hold", duration_ms: 5000 }, // segment starts with hold ← rejected\n'
|
|
294
|
+
+ ' ]',
|
|
295
|
+
),
|
|
267
296
|
})).describe('Segments to plan. audio_path is required for each. V5 fields (action, target_y, target_y_content_label, focus_region, transition_ms, dwell_ms, phase.beats[]) are rejected.'),
|
|
268
297
|
},
|
|
269
298
|
async ({ segments }) => {
|
package/package.json
CHANGED
|
@@ -53,7 +53,7 @@ export async function atomScrollTo(page, _ctx, {
|
|
|
53
53
|
target_y,
|
|
54
54
|
duration_ms,
|
|
55
55
|
curve = 'easeInOutQuad',
|
|
56
|
-
jitter_px =
|
|
56
|
+
jitter_px = 0, // 不要微动 — 用户反复明确要求
|
|
57
57
|
from_y = null,
|
|
58
58
|
mode = 'auto',
|
|
59
59
|
} = {}) {
|
|
@@ -70,17 +70,21 @@ export async function atomScrollTo(page, _ctx, {
|
|
|
70
70
|
const durationMs = Number(duration_ms);
|
|
71
71
|
const distance = Math.abs(targetY - fromY);
|
|
72
72
|
|
|
73
|
-
// Auto-mode
|
|
74
|
-
//
|
|
75
|
-
//
|
|
76
|
-
//
|
|
77
|
-
//
|
|
78
|
-
//
|
|
79
|
-
//
|
|
80
|
-
|
|
73
|
+
// Auto-mode: default to programmatic (RAF-driven smooth scroll). The touch
|
|
74
|
+
// path uses humanizedScroll which splits any scroll > 260px into multiple
|
|
75
|
+
// CDP swipes, each with ±18-26px random horizontal nudge and fling-cancel-
|
|
76
|
+
// fling boundaries — that looks like "颤抖着分多次拨", not a clean slide.
|
|
77
|
+
// User feedback is unambiguous: scroll must be a smooth transition between
|
|
78
|
+
// content blocks, not a teleport (instant snap) and not a wobble (multi-
|
|
79
|
+
// segment touch with horizontal drift). Programmatic with RAF achieves
|
|
80
|
+
// both — every frame moves, vertical only, no inter-segment pauses.
|
|
81
|
+
// Touch mode remains available via explicit `mode: 'touch'` for callers
|
|
82
|
+
// that specifically want gesture physics.
|
|
83
|
+
const velocity = durationMs > 0 ? (distance / durationMs) * 1000 : 0; // px/s (kept for diagnostics)
|
|
84
|
+
void velocity;
|
|
81
85
|
const resolvedMode = mode === 'programmatic' || mode === 'touch'
|
|
82
86
|
? mode
|
|
83
|
-
:
|
|
87
|
+
: 'programmatic';
|
|
84
88
|
|
|
85
89
|
if (resolvedMode === 'touch') {
|
|
86
90
|
await humanizedScroll(page, {
|
|
@@ -96,68 +100,75 @@ export async function atomScrollTo(page, _ctx, {
|
|
|
96
100
|
targetY,
|
|
97
101
|
durationMs,
|
|
98
102
|
curve,
|
|
99
|
-
jitterPx: Math.max(0, Number(jitter_px) || 0),
|
|
100
103
|
});
|
|
101
104
|
}
|
|
102
105
|
return { anchorY: Math.round(targetY) };
|
|
103
106
|
}
|
|
104
107
|
|
|
105
|
-
// Programmatic scroll:
|
|
106
|
-
// scroll
|
|
107
|
-
// the
|
|
108
|
-
//
|
|
109
|
-
//
|
|
110
|
-
// to 150-200ms), turning a 1s transition into 5-8s.
|
|
108
|
+
// Programmatic scroll: JS-driven RAF loop that incrementally updates the
|
|
109
|
+
// scroll position frame-by-frame over `durationMs`. This produces an actual
|
|
110
|
+
// smooth scroll the viewer sees in the recording — the previous version
|
|
111
|
+
// did a hard instant snap and then a static wait, which looked like a
|
|
112
|
+
// teleport ("跳一下然后定格"), not like a person sliding a page.
|
|
111
113
|
//
|
|
112
|
-
//
|
|
113
|
-
//
|
|
114
|
-
//
|
|
115
|
-
// page
|
|
114
|
+
// Why not native `scroll-behavior: smooth` or `scrollTo({behavior:'smooth'})`?
|
|
115
|
+
// In Playwright + a headless mobile context, native smooth-scroll often
|
|
116
|
+
// gets capped to a fixed short duration (~300-500ms) regardless of distance,
|
|
117
|
+
// or is throttled by the page's own scroll logic. We need a duration we
|
|
118
|
+
// control end-to-end.
|
|
119
|
+
//
|
|
120
|
+
// Frame loop runs inside page.evaluate so it stays in lockstep with the
|
|
121
|
+
// page's render thread — important when recordVideo is capturing 30fps.
|
|
116
122
|
async function programmaticScroll(page, {
|
|
117
123
|
fromY,
|
|
118
124
|
targetY,
|
|
119
125
|
durationMs,
|
|
126
|
+
curve = 'easeInOutQuad',
|
|
120
127
|
} = {}) {
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
const diag = await page.evaluate((input) => {
|
|
127
|
-
const candidates = [];
|
|
128
|
-
if (document.scrollingElement) candidates.push(document.scrollingElement);
|
|
129
|
-
if (document.documentElement) candidates.push(document.documentElement);
|
|
130
|
-
if (document.body) candidates.push(document.body);
|
|
131
|
-
candidates.push(window);
|
|
132
|
-
|
|
133
|
-
const before = candidates.map((c) => {
|
|
134
|
-
if (c === window) return { tag: 'window', y: window.scrollY };
|
|
135
|
-
return { tag: c.tagName, y: c.scrollTop };
|
|
136
|
-
});
|
|
137
|
-
|
|
138
|
-
// Hard snap to target on every candidate (instant, no animation).
|
|
139
|
-
for (const c of candidates) {
|
|
140
|
-
try {
|
|
141
|
-
if (c === window) window.scrollTo(0, input.targetY);
|
|
142
|
-
else { c.scrollTop = input.targetY; }
|
|
143
|
-
} catch { /* ignore */ }
|
|
128
|
+
await page.evaluate(async (input) => {
|
|
129
|
+
function pickScroller() {
|
|
130
|
+
if (document.scrollingElement) return document.scrollingElement;
|
|
131
|
+
if (document.documentElement) return document.documentElement;
|
|
132
|
+
return document.body;
|
|
144
133
|
}
|
|
134
|
+
function easeInOutQuad(t) { return t < 0.5 ? 2 * t * t : 1 - Math.pow(-2 * t + 2, 2) / 2; }
|
|
135
|
+
function easeOutQuad(t) { return 1 - (1 - t) * (1 - t); }
|
|
136
|
+
function linear(t) { return t; }
|
|
137
|
+
const ease = input.curve === 'linear' ? linear
|
|
138
|
+
: input.curve === 'easeOutQuad' ? easeOutQuad
|
|
139
|
+
: easeInOutQuad;
|
|
145
140
|
|
|
146
|
-
const
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
141
|
+
const scroller = pickScroller();
|
|
142
|
+
const startY = (scroller === document.scrollingElement || scroller === document.documentElement)
|
|
143
|
+
? scroller.scrollTop : window.scrollY;
|
|
144
|
+
const delta = input.targetY - startY;
|
|
145
|
+
const start = performance.now();
|
|
150
146
|
|
|
151
|
-
return {
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
147
|
+
return new Promise((resolve) => {
|
|
148
|
+
function tick(now) {
|
|
149
|
+
const elapsed = now - start;
|
|
150
|
+
const t = Math.min(1, elapsed / input.durationMs);
|
|
151
|
+
const y = startY + delta * ease(t);
|
|
152
|
+
try {
|
|
153
|
+
if (scroller === window) window.scrollTo(0, y);
|
|
154
|
+
else { scroller.scrollTop = y; }
|
|
155
|
+
} catch { /* ignore */ }
|
|
156
|
+
if (t < 1) {
|
|
157
|
+
requestAnimationFrame(tick);
|
|
158
|
+
} else {
|
|
159
|
+
// Final snap to exact target (in case of sub-pixel drift).
|
|
160
|
+
try {
|
|
161
|
+
if (scroller === window) window.scrollTo(0, input.targetY);
|
|
162
|
+
else { scroller.scrollTop = input.targetY; }
|
|
163
|
+
} catch { /* ignore */ }
|
|
164
|
+
resolve();
|
|
165
|
+
}
|
|
166
|
+
}
|
|
167
|
+
requestAnimationFrame(tick);
|
|
168
|
+
});
|
|
169
|
+
}, { fromY, targetY, durationMs, curve });
|
|
170
|
+
// Tiny settle so the next atom sees the scroll committed.
|
|
171
|
+
await page.waitForTimeout(50);
|
|
161
172
|
}
|
|
162
173
|
|
|
163
174
|
// ── atomHold ─────────────────────────────────────────────────────────────────
|
|
@@ -94,6 +94,54 @@ function assertNoV5Fields(seg, index) {
|
|
|
94
94
|
}
|
|
95
95
|
}
|
|
96
96
|
|
|
97
|
+
// Transition-mode lint — enforce the "explain block → smooth transition → explain block"
|
|
98
|
+
// pattern the user described:
|
|
99
|
+
// "先说一句话, 然后再往下滑, 介绍内容 1, 再往下滑, 停住介绍内容 2"
|
|
100
|
+
//
|
|
101
|
+
// Key insight: scroll_to is a TRANSITION between content blocks, not a
|
|
102
|
+
// narration vehicle. It can be short (~500-800ms) — speed doesn't matter,
|
|
103
|
+
// only smoothness. Long narration happens during hold, not during scroll.
|
|
104
|
+
//
|
|
105
|
+
// Rule (single rule): every non-opening segment MUST start with a scroll_to.
|
|
106
|
+
// This guarantees a visible transition from the previous segment's anchor
|
|
107
|
+
// to the new content block. Without this, an agent can string back-to-back
|
|
108
|
+
// hold-only segments and the viewer just sees jump cuts in audio with no
|
|
109
|
+
// page movement.
|
|
110
|
+
//
|
|
111
|
+
// What's NOT enforced anymore:
|
|
112
|
+
// - scroll_to duration_ms is not bounded — short transitions (500ms) and
|
|
113
|
+
// longer ones (2s+) are both fine. Smoothness comes from atomScrollTo's
|
|
114
|
+
// RAF-based programmatic implementation, not from duration.
|
|
115
|
+
// - hold duration_ms is not bounded — long holds (3-5s) are the normal
|
|
116
|
+
// case (this is where the agent narrates the current block).
|
|
117
|
+
|
|
118
|
+
function validateReadingFlow(operations, segmentIndex) {
|
|
119
|
+
// Opening hook segment is exempt — first segment may legitimately be
|
|
120
|
+
// a fully static hero shot (e.g. "校招,实习岗位更新,速投" over a poster).
|
|
121
|
+
if (segmentIndex === 0) return;
|
|
122
|
+
|
|
123
|
+
const ops = Array.isArray(operations) ? operations : [];
|
|
124
|
+
if (ops.length === 0) return;
|
|
125
|
+
|
|
126
|
+
// The first op of a non-opening segment must be a scroll_to (the
|
|
127
|
+
// transition into this block's content). All-hold segments produce
|
|
128
|
+
// back-to-back jump cuts with no visible page movement, which the user
|
|
129
|
+
// has explicitly rejected.
|
|
130
|
+
const first = ops[0];
|
|
131
|
+
if (first?.atom !== 'scroll_to') {
|
|
132
|
+
const err = new Error(
|
|
133
|
+
`transition_required: segments[${segmentIndex}] must start with a scroll_to atom — `
|
|
134
|
+
+ 'this is the smooth transition from the previous block to this one. '
|
|
135
|
+
+ `Got first atom "${first?.atom ?? 'none'}". All-hold segments produce jump cuts. `
|
|
136
|
+
+ 'Fix: prepend a scroll_to(target_y=<new block top>, duration_ms=500~1000) before '
|
|
137
|
+
+ 'the hold. The scroll can be short (~600ms is fine); what matters is that the '
|
|
138
|
+
+ "page visibly slides — atomScrollTo's programmatic mode handles smoothness.",
|
|
139
|
+
);
|
|
140
|
+
err.code = 'TRANSITION_REQUIRED';
|
|
141
|
+
throw err;
|
|
142
|
+
}
|
|
143
|
+
}
|
|
144
|
+
|
|
97
145
|
// Process operations[]: expand "fill" on the last hold, validate atom shape.
|
|
98
146
|
function processOperations(operations, audioDurationMs, segmentIndex) {
|
|
99
147
|
if (!Array.isArray(operations) || operations.length === 0) {
|
|
@@ -165,6 +213,7 @@ function processOperations(operations, audioDurationMs, segmentIndex) {
|
|
|
165
213
|
}
|
|
166
214
|
sum += n;
|
|
167
215
|
}
|
|
216
|
+
validateReadingFlow(expanded, segmentIndex);
|
|
168
217
|
return { operations: expanded, durationSumMs: Math.round(sum) };
|
|
169
218
|
}
|
|
170
219
|
|