@lightcone-ai/daemon 0.23.5 → 0.23.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -266,28 +266,31 @@ server.tool(
266
266
  })).optional().describe(
267
267
  'For visual_kind=video URL recording sections: ordered atom sequence. Sum of duration_ms '
268
268
  + 'must equal audio_duration_ms (±200ms); use "fill" on the last hold to auto-balance.\n\n'
269
- + 'READING-FLOW MODE (REQUIRED — enforced by lint): operations must simulate a person '
270
- + 'sliding a finger through the page while narrating, pausing at key spots to explain. '
269
+ + 'TRANSITION + EXPLAIN MODE (REQUIRED — enforced by lint): the recording should feel '
270
+ + 'like a person opening a page and walking the viewer through it block by block. '
271
271
  + 'Concretely:\n'
272
- + ' • Each non-opening segment MUST contain at least one scroll_to with duration_ms >= 1500.\n'
273
- + ' Any hold with duration_ms > 2000 MUST be immediately preceded by a scroll_to with duration_ms >= 1500.\n'
274
- + ' • Avoid the "jump + freeze" anti-pattern: scroll_to(duration_ms < 1000) followed by hold(duration_ms > 2000). '
275
- + 'It makes the recording feel like a screenshot slideshow, not a page being read.\n\n'
276
- + 'GOOD example for a 9.5s segment narrating "宁波银行金融科技部主推 FinTech 暑期专项":\n'
272
+ + ' • scroll_to is a TRANSITION between content blocks short (~500-800ms is fine), '
273
+ + 'smooth (atomScrollTo programmatic mode handles smoothness automatically; speed does NOT need to be slow).\n'
274
+ + ' • hold is where the NARRATION happens long holds (2-5s) are the norm, not the exception. '
275
+ + 'This is when the agent says the actual sentences about this block.\n'
276
+ + " • Every non-opening segment MUST start with a scroll_to (the transition into this segment's "
277
+ + 'content block). Segments starting with hold are REJECTED — they cause jump cuts.\n'
278
+ + ' • The shape is: "scroll to new block → pause and explain → scroll to next block → pause and explain".\n\n'
279
+ + 'GOOD example for a 5s segment narrating "宁波银行金融科技部 FinTech 暑期专项":\n'
277
280
  + ' [\n'
278
- + ' { atom: "scroll_to", y: 280, duration_ms: 2500 }, // slow slide while saying "宁波银行金融科技部,正式开放 FinTech 暑期专项"\n'
279
- + ' { atom: "hold", duration_ms: 1200 }, // brief pause on title to let viewer read\n'
280
- + ' { atom: "scroll_to", y: 980, duration_ms: 3200 }, // continue sliding while narrating job content\n'
281
- + ' { atom: "hold", duration_ms: 1400 }, // pause on key bullet list\n'
282
- + ' { atom: "scroll_to", y: 1450, duration_ms: 1500 }, // final slide to closing block\n'
283
- + ' { atom: "hold", duration_ms: "fill" }, // remaining audio time (~700ms expected)\n'
281
+ + ' { atom: "scroll_to", y: 280, duration_ms: 700 }, // 0.7s smooth transition to title\n'
282
+ + ' { atom: "hold", duration_ms: "fill" }, // ~4.3s: agent narrates this block\n'
284
283
  + ' ]\n\n'
285
- + 'BAD example (will be REJECTED by reading_flow_violation):\n'
284
+ + 'GOOD example for a 9s segment with two content blocks inside:\n'
286
285
  + ' [\n'
287
- + ' { atom: "scroll_to", y: 1000, duration_ms: 600 }, // jump cut\n'
288
- + ' { atom: "hold", duration_ms: 5000 }, // 5s freeze rejected\n'
289
- + ' { atom: "scroll_to", y: 2500, duration_ms: 800 }, // jump cut\n'
290
- + ' { atom: "hold", duration_ms: "fill" }, // rejected\n'
286
+ + ' { atom: "scroll_to", y: 980, duration_ms: 700 }, // transition to first block\n'
287
+ + ' { atom: "hold", duration_ms: 4000 }, // narrate this block (~"金融产品应用开发岗 …")\n'
288
+ + ' { atom: "scroll_to", y: 1450, duration_ms: 600 }, // short transition to next block\n'
289
+ + ' { atom: "hold", duration_ms: "fill" }, // narrate next block (~3.7s)\n'
290
+ + ' ]\n\n'
291
+ + 'BAD example (REJECTED by transition_required):\n'
292
+ + ' [\n'
293
+ + ' { atom: "hold", duration_ms: 5000 }, // segment starts with hold ← rejected\n'
291
294
  + ' ]',
292
295
  ),
293
296
  })).describe('Segments to plan. audio_path is required for each. V5 fields (action, target_y, target_y_content_label, focus_region, transition_ms, dwell_ms, phase.beats[]) are rejected.'),
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@lightcone-ai/daemon",
3
- "version": "0.23.5",
3
+ "version": "0.23.6",
4
4
  "type": "module",
5
5
  "main": "src/index.js",
6
6
  "bin": {
@@ -53,7 +53,7 @@ export async function atomScrollTo(page, _ctx, {
53
53
  target_y,
54
54
  duration_ms,
55
55
  curve = 'easeInOutQuad',
56
- jitter_px = 2,
56
+ jitter_px = 0, // 不要微动 — 用户反复明确要求
57
57
  from_y = null,
58
58
  mode = 'auto',
59
59
  } = {}) {
@@ -70,17 +70,21 @@ export async function atomScrollTo(page, _ctx, {
70
70
  const durationMs = Number(duration_ms);
71
71
  const distance = Math.abs(targetY - fromY);
72
72
 
73
- // Auto-mode heuristic: touch works for short distances (single segment, no
74
- // inter-segment fling interference) OR slow velocities (each segment has
75
- // time to settle before the next starts). Fast long-distance scrolls fall
76
- // back to programmatic, where the easing loop drives root.scrollTo
77
- // deterministically.
78
- // Thresholds chosen empirically against the v4 failure mode (~1100px in
79
- // ~1000ms ~1100 px/s, fling-interrupt-fling, page never reached target).
80
- const velocity = durationMs > 0 ? (distance / durationMs) * 1000 : 0; // px/s
73
+ // Auto-mode: default to programmatic (RAF-driven smooth scroll). The touch
74
+ // path uses humanizedScroll which splits any scroll > 260px into multiple
75
+ // CDP swipes, each with ±18-26px random horizontal nudge and fling-cancel-
76
+ // fling boundaries that looks like "颤抖着分多次拨", not a clean slide.
77
+ // User feedback is unambiguous: scroll must be a smooth transition between
78
+ // content blocks, not a teleport (instant snap) and not a wobble (multi-
79
+ // segment touch with horizontal drift). Programmatic with RAF achieves
80
+ // both every frame moves, vertical only, no inter-segment pauses.
81
+ // Touch mode remains available via explicit `mode: 'touch'` for callers
82
+ // that specifically want gesture physics.
83
+ const velocity = durationMs > 0 ? (distance / durationMs) * 1000 : 0; // px/s (kept for diagnostics)
84
+ void velocity;
81
85
  const resolvedMode = mode === 'programmatic' || mode === 'touch'
82
86
  ? mode
83
- : (distance < 240 || velocity < 500 ? 'touch' : 'programmatic');
87
+ : 'programmatic';
84
88
 
85
89
  if (resolvedMode === 'touch') {
86
90
  await humanizedScroll(page, {
@@ -96,68 +100,75 @@ export async function atomScrollTo(page, _ctx, {
96
100
  targetY,
97
101
  durationMs,
98
102
  curve,
99
- jitterPx: Math.max(0, Number(jitter_px) || 0),
100
103
  });
101
104
  }
102
105
  return { anchorY: Math.round(targetY) };
103
106
  }
104
107
 
105
- // Programmatic scroll: hands the animation off to the browser's native
106
- // scroll engine via `scroll-behavior: smooth`. That way the easing runs on
107
- // the compositor thread at the display refresh rate, independent of how
108
- // busy the page's JS is. The JS-driven setTimeout approach we tried first
109
- // gets badly throttled on JS-heavy article pages (60Hz timers can stretch
110
- // to 150-200ms), turning a 1s transition into 5-8s.
108
+ // Programmatic scroll: JS-driven RAF loop that incrementally updates the
109
+ // scroll position frame-by-frame over `durationMs`. This produces an actual
110
+ // smooth scroll the viewer sees in the recording the previous version
111
+ // did a hard instant snap and then a static wait, which looked like a
112
+ // teleport ("跳一下然后定格"), not like a person sliding a page.
111
113
  //
112
- // The wait is Node-side, so even if the in-page scrollend never fires we
113
- // still cap the section at durationMs and move on. We deliberately do NOT
114
- // wait for scrollend empirically faster than dispatching the event on
115
- // page-heavy mobile sites.
114
+ // Why not native `scroll-behavior: smooth` or `scrollTo({behavior:'smooth'})`?
115
+ // In Playwright + a headless mobile context, native smooth-scroll often
116
+ // gets capped to a fixed short duration (~300-500ms) regardless of distance,
117
+ // or is throttled by the page's own scroll logic. We need a duration we
118
+ // control end-to-end.
119
+ //
120
+ // Frame loop runs inside page.evaluate so it stays in lockstep with the
121
+ // page's render thread — important when recordVideo is capturing 30fps.
116
122
  async function programmaticScroll(page, {
117
123
  fromY,
118
124
  targetY,
119
125
  durationMs,
126
+ curve = 'easeInOutQuad',
120
127
  } = {}) {
121
- // Try every plausible scroll target — mobile article pages sometimes have
122
- // a fixed-position outer body and scroll happens on an inner container.
123
- // We dispatch to all candidates and let whichever one is actually the
124
- // scroller win. Returns the diagnostics so we can debug when the page
125
- // refuses to scroll.
126
- const diag = await page.evaluate((input) => {
127
- const candidates = [];
128
- if (document.scrollingElement) candidates.push(document.scrollingElement);
129
- if (document.documentElement) candidates.push(document.documentElement);
130
- if (document.body) candidates.push(document.body);
131
- candidates.push(window);
132
-
133
- const before = candidates.map((c) => {
134
- if (c === window) return { tag: 'window', y: window.scrollY };
135
- return { tag: c.tagName, y: c.scrollTop };
136
- });
137
-
138
- // Hard snap to target on every candidate (instant, no animation).
139
- for (const c of candidates) {
140
- try {
141
- if (c === window) window.scrollTo(0, input.targetY);
142
- else { c.scrollTop = input.targetY; }
143
- } catch { /* ignore */ }
128
+ await page.evaluate(async (input) => {
129
+ function pickScroller() {
130
+ if (document.scrollingElement) return document.scrollingElement;
131
+ if (document.documentElement) return document.documentElement;
132
+ return document.body;
144
133
  }
134
+ function easeInOutQuad(t) { return t < 0.5 ? 2 * t * t : 1 - Math.pow(-2 * t + 2, 2) / 2; }
135
+ function easeOutQuad(t) { return 1 - (1 - t) * (1 - t); }
136
+ function linear(t) { return t; }
137
+ const ease = input.curve === 'linear' ? linear
138
+ : input.curve === 'easeOutQuad' ? easeOutQuad
139
+ : easeInOutQuad;
145
140
 
146
- const after = candidates.map((c) => {
147
- if (c === window) return { tag: 'window', y: window.scrollY };
148
- return { tag: c.tagName, y: c.scrollTop };
149
- });
141
+ const scroller = pickScroller();
142
+ const startY = (scroller === document.scrollingElement || scroller === document.documentElement)
143
+ ? scroller.scrollTop : window.scrollY;
144
+ const delta = input.targetY - startY;
145
+ const start = performance.now();
150
146
 
151
- return {
152
- requested_target: input.targetY,
153
- before, after,
154
- maxScroll: document.documentElement?.scrollHeight,
155
- innerHeight: window.innerHeight,
156
- };
157
- }, { fromY, targetY });
158
- void diag;
159
- // Brief dwell to let the page settle the snap before next atom starts.
160
- await page.waitForTimeout(Math.max(80, Math.round(durationMs * 0.3)));
147
+ return new Promise((resolve) => {
148
+ function tick(now) {
149
+ const elapsed = now - start;
150
+ const t = Math.min(1, elapsed / input.durationMs);
151
+ const y = startY + delta * ease(t);
152
+ try {
153
+ if (scroller === window) window.scrollTo(0, y);
154
+ else { scroller.scrollTop = y; }
155
+ } catch { /* ignore */ }
156
+ if (t < 1) {
157
+ requestAnimationFrame(tick);
158
+ } else {
159
+ // Final snap to exact target (in case of sub-pixel drift).
160
+ try {
161
+ if (scroller === window) window.scrollTo(0, input.targetY);
162
+ else { scroller.scrollTop = input.targetY; }
163
+ } catch { /* ignore */ }
164
+ resolve();
165
+ }
166
+ }
167
+ requestAnimationFrame(tick);
168
+ });
169
+ }, { fromY, targetY, durationMs, curve });
170
+ // Tiny settle so the next atom sees the scroll committed.
171
+ await page.waitForTimeout(50);
161
172
  }
162
173
 
163
174
  // ── atomHold ─────────────────────────────────────────────────────────────────
@@ -94,79 +94,52 @@ function assertNoV5Fields(seg, index) {
94
94
  }
95
95
  }
96
96
 
97
- // Reading-flow lint — reject the "jump + long hold" anti-pattern that makes
98
- // recordings feel like a slideshow of screenshots instead of a person
99
- // scrolling through a page and pausing at key spots to explain. This is what
100
- // the user repeatedly asked for ("从上往下滑动着介绍,到重点处停一下"). The
101
- // V6 atom toolkit is fully capable of producing reading-flow output; the
102
- // problem is that agents default to short-scroll + long-hold without an
103
- // explicit constraint, so we enforce it here.
97
+ // Transition-mode lint — enforce the "explain block smooth transition explain block"
98
+ // pattern the user described:
99
+ // "先说一句话, 然后再往下滑, 介绍内容 1, 再往下滑, 停住介绍内容 2"
104
100
  //
105
- // Rules:
106
- // - Each segment (except the opening hook, segment 0) MUST contain at least
107
- // one scroll_to with duration_ms >= 1500ms the "slow scroll while
108
- // narrating" beat.
109
- // - Any hold with duration_ms > 2000ms MUST be immediately preceded by a
110
- // scroll_to with duration_ms >= 1500ms long holds are only legal as
111
- // "I just slowly scrolled to a key spot, now I'm pausing on it".
112
- const READING_FLOW_SLOW_SCROLL_MIN_MS = 1500;
113
- const READING_FLOW_LONG_HOLD_THRESHOLD_MS = 2000;
101
+ // Key insight: scroll_to is a TRANSITION between content blocks, not a
102
+ // narration vehicle. It can be short (~500-800ms) speed doesn't matter,
103
+ // only smoothness. Long narration happens during hold, not during scroll.
104
+ //
105
+ // Rule (single rule): every non-opening segment MUST start with a scroll_to.
106
+ // This guarantees a visible transition from the previous segment's anchor
107
+ // to the new content block. Without this, an agent can string back-to-back
108
+ // hold-only segments and the viewer just sees jump cuts in audio with no
109
+ // page movement.
110
+ //
111
+ // What's NOT enforced anymore:
112
+ // - scroll_to duration_ms is not bounded — short transitions (500ms) and
113
+ // longer ones (2s+) are both fine. Smoothness comes from atomScrollTo's
114
+ // RAF-based programmatic implementation, not from duration.
115
+ // - hold duration_ms is not bounded — long holds (3-5s) are the normal
116
+ // case (this is where the agent narrates the current block).
114
117
 
115
118
  function validateReadingFlow(operations, segmentIndex) {
116
- // Opening hook segment may legitimately be a fully static hero shot
117
- // with no scroll (e.g. "校招,实习岗位更新,速投" over a poster).
119
+ // Opening hook segment is exempt first segment may legitimately be
120
+ // a fully static hero shot (e.g. "校招,实习岗位更新,速投" over a poster).
118
121
  if (segmentIndex === 0) return;
119
122
 
120
123
  const ops = Array.isArray(operations) ? operations : [];
121
124
  if (ops.length === 0) return;
122
125
 
123
- const hasSlowScroll = ops.some(
124
- op => op?.atom === 'scroll_to' && Number(op.duration_ms) >= READING_FLOW_SLOW_SCROLL_MIN_MS,
125
- );
126
- if (!hasSlowScroll) {
126
+ // The first op of a non-opening segment must be a scroll_to (the
127
+ // transition into this block's content). All-hold segments produce
128
+ // back-to-back jump cuts with no visible page movement, which the user
129
+ // has explicitly rejected.
130
+ const first = ops[0];
131
+ if (first?.atom !== 'scroll_to') {
127
132
  const err = new Error(
128
- `reading_flow_violation: segments[${segmentIndex}] has no slow scroll. `
129
- + `Reading-flow mode requires at least one scroll_to with duration_ms >= ${READING_FLOW_SLOW_SCROLL_MIN_MS}ms `
130
- + 'per non-opening segment this simulates a finger sliding through the page '
131
- + 'while narration plays, instead of jumping cut-style to a position. '
132
- + 'Fix: replace any "short scroll_to(duration_ms<1000) + long hold(>2000)" pair '
133
- + `with one "slow scroll_to(duration_ms=2000~3500)" + "short hold(duration_ms=800~1500)".`,
133
+ `transition_required: segments[${segmentIndex}] must start with a scroll_to atom — `
134
+ + 'this is the smooth transition from the previous block to this one. '
135
+ + `Got first atom "${first?.atom ?? 'none'}". All-hold segments produce jump cuts. `
136
+ + 'Fix: prepend a scroll_to(target_y=<new block top>, duration_ms=500~1000) before '
137
+ + 'the hold. The scroll can be short (~600ms is fine); what matters is that the '
138
+ + "page visibly slides atomScrollTo's programmatic mode handles smoothness.",
134
139
  );
135
- err.code = 'READING_FLOW_VIOLATION';
140
+ err.code = 'TRANSITION_REQUIRED';
136
141
  throw err;
137
142
  }
138
-
139
- for (let i = 1; i < ops.length; i += 1) {
140
- const op = ops[i];
141
- if (op?.atom !== 'hold') continue;
142
- const holdMs = Number(op.duration_ms);
143
- if (!Number.isFinite(holdMs) || holdMs <= READING_FLOW_LONG_HOLD_THRESHOLD_MS) continue;
144
-
145
- const prev = ops[i - 1];
146
- if (prev?.atom !== 'scroll_to') {
147
- const err = new Error(
148
- `reading_flow_violation: segments[${segmentIndex}].operations[${i}] is a long hold `
149
- + `(${holdMs}ms) but its preceding atom is "${prev?.atom ?? 'none'}", not scroll_to. `
150
- + 'Long holds (>2000ms) must immediately follow a scroll_to — '
151
- + 'the natural reading pattern is "slow scroll to a key spot → pause to explain".',
152
- );
153
- err.code = 'READING_FLOW_VIOLATION';
154
- throw err;
155
- }
156
- const prevScrollMs = Number(prev.duration_ms);
157
- if (!Number.isFinite(prevScrollMs) || prevScrollMs < READING_FLOW_SLOW_SCROLL_MIN_MS) {
158
- const err = new Error(
159
- `reading_flow_violation: segments[${segmentIndex}].operations[${i}] is a long hold `
160
- + `(${holdMs}ms) following a fast scroll_to (${prevScrollMs}ms). This is the "跳页+长停" `
161
- + 'anti-pattern — viewers see a hard cut to a new position then a frozen frame. '
162
- + `Fix: extend the preceding scroll_to to duration_ms >= ${READING_FLOW_SLOW_SCROLL_MIN_MS}ms `
163
- + '(narrate WHILE you scroll), and shorten this hold to duration_ms <= 1500ms '
164
- + '(brief pause to stress the key point, then move on).',
165
- );
166
- err.code = 'READING_FLOW_VIOLATION';
167
- throw err;
168
- }
169
- }
170
143
  }
171
144
 
172
145
  // Process operations[]: expand "fill" on the last hold, validate atom shape.