@lightcone-ai/daemon 0.23.4 → 0.23.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -257,13 +257,39 @@ server.tool(
257
257
  }).optional().describe('Optional presentation hints (style only). duration/per_card_duration are computed.'),
258
258
  operations: z.array(z.object({
259
259
  atom: z.enum(['scroll_to', 'hold', 'cursor_focus']),
260
- duration_ms: z.union([z.number(), z.literal('fill')]).describe('Atom duration. "fill" allowed only on the LAST hold to auto-fill remaining audio time.'),
260
+ duration_ms: z.union([z.number(), z.literal('fill')]).describe('Atom duration in ms. "fill" allowed only on the LAST hold to auto-fill remaining audio time.'),
261
261
  y: z.number().optional(),
262
262
  x: z.number().optional(),
263
263
  curve: z.enum(['easeInOutQuad', 'linear', 'easeOutQuad']).optional(),
264
264
  mode: z.enum(['auto', 'touch', 'programmatic']).optional(),
265
265
  jitter_px: z.number().optional(),
266
- })).optional().describe('For visual_kind=video URL recording sections: ordered atom sequence. Sum of duration_ms must equal audio_duration_ms (±200ms); use "fill" on the last hold to auto-balance.'),
266
+ })).optional().describe(
267
+ 'For visual_kind=video URL recording sections: ordered atom sequence. Sum of duration_ms '
268
+ + 'must equal audio_duration_ms (±200ms); use "fill" on the last hold to auto-balance.\n\n'
269
+ + 'READING-FLOW MODE (REQUIRED — enforced by lint): operations must simulate a person '
270
+ + 'sliding a finger through the page while narrating, pausing at key spots to explain. '
271
+ + 'Concretely:\n'
272
+ + ' • Each non-opening segment MUST contain at least one scroll_to with duration_ms >= 1500.\n'
273
+ + ' • Any hold with duration_ms > 2000 MUST be immediately preceded by a scroll_to with duration_ms >= 1500.\n'
274
+ + ' • Avoid the "jump + freeze" anti-pattern: scroll_to(duration_ms < 1000) followed by hold(duration_ms > 2000). '
275
+ + 'It makes the recording feel like a screenshot slideshow, not a page being read.\n\n'
276
+ + 'GOOD example for a 9.5s segment narrating "宁波银行金融科技部主推 FinTech 暑期专项":\n'
277
+ + ' [\n'
278
+ + ' { atom: "scroll_to", y: 280, duration_ms: 2500 }, // slow slide while saying "宁波银行金融科技部,正式开放 FinTech 暑期专项"\n'
279
+ + ' { atom: "hold", duration_ms: 1200 }, // brief pause on title to let viewer read\n'
280
+ + ' { atom: "scroll_to", y: 980, duration_ms: 3200 }, // continue sliding while narrating job content\n'
281
+ + ' { atom: "hold", duration_ms: 1400 }, // pause on key bullet list\n'
282
+ + ' { atom: "scroll_to", y: 1450, duration_ms: 1500 }, // final slide to closing block\n'
283
+ + ' { atom: "hold", duration_ms: "fill" }, // remaining audio time (~700ms expected)\n'
284
+ + ' ]\n\n'
285
+ + 'BAD example (will be REJECTED by reading_flow_violation):\n'
286
+ + ' [\n'
287
+ + ' { atom: "scroll_to", y: 1000, duration_ms: 600 }, // jump cut\n'
288
+ + ' { atom: "hold", duration_ms: 5000 }, // 5s freeze ← rejected\n'
289
+ + ' { atom: "scroll_to", y: 2500, duration_ms: 800 }, // jump cut\n'
290
+ + ' { atom: "hold", duration_ms: "fill" }, // ← rejected\n'
291
+ + ' ]',
292
+ ),
267
293
  })).describe('Segments to plan. audio_path is required for each. V5 fields (action, target_y, target_y_content_label, focus_region, transition_ms, dwell_ms, phase.beats[]) are rejected.'),
268
294
  },
269
295
  async ({ segments }) => {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@lightcone-ai/daemon",
3
- "version": "0.23.4",
3
+ "version": "0.23.5",
4
4
  "type": "module",
5
5
  "main": "src/index.js",
6
6
  "bin": {
@@ -94,6 +94,81 @@ function assertNoV5Fields(seg, index) {
94
94
  }
95
95
  }
96
96
 
97
+ // Reading-flow lint — reject the "jump + long hold" anti-pattern that makes
98
+ // recordings feel like a slideshow of screenshots instead of a person
99
+ // scrolling through a page and pausing at key spots to explain. This is what
100
+ // the user repeatedly asked for ("从上往下滑动着介绍,到重点处停一下"). The
101
+ // V6 atom toolkit is fully capable of producing reading-flow output; the
102
+ // problem is that agents default to short-scroll + long-hold without an
103
+ // explicit constraint, so we enforce it here.
104
+ //
105
+ // Rules:
106
+ // - Each segment (except the opening hook, segment 0) MUST contain at least
107
+ // one scroll_to with duration_ms >= 1500ms — the "slow scroll while
108
+ // narrating" beat.
109
+ // - Any hold with duration_ms > 2000ms MUST be immediately preceded by a
110
+ // scroll_to with duration_ms >= 1500ms — long holds are only legal as
111
+ // "I just slowly scrolled to a key spot, now I'm pausing on it".
112
+ const READING_FLOW_SLOW_SCROLL_MIN_MS = 1500;
113
+ const READING_FLOW_LONG_HOLD_THRESHOLD_MS = 2000;
114
+
115
+ function validateReadingFlow(operations, segmentIndex) {
116
+ // Opening hook segment may legitimately be a fully static hero shot
117
+ // with no scroll (e.g. "校招,实习岗位更新,速投" over a poster).
118
+ if (segmentIndex === 0) return;
119
+
120
+ const ops = Array.isArray(operations) ? operations : [];
121
+ if (ops.length === 0) return;
122
+
123
+ const hasSlowScroll = ops.some(
124
+ op => op?.atom === 'scroll_to' && Number(op.duration_ms) >= READING_FLOW_SLOW_SCROLL_MIN_MS,
125
+ );
126
+ if (!hasSlowScroll) {
127
+ const err = new Error(
128
+ `reading_flow_violation: segments[${segmentIndex}] has no slow scroll. `
129
+ + `Reading-flow mode requires at least one scroll_to with duration_ms >= ${READING_FLOW_SLOW_SCROLL_MIN_MS}ms `
130
+ + 'per non-opening segment — this simulates a finger sliding through the page '
131
+ + 'while narration plays, instead of jumping cut-style to a position. '
132
+ + 'Fix: replace any "short scroll_to(duration_ms<1000) + long hold(>2000)" pair '
133
+ + `with one "slow scroll_to(duration_ms=2000~3500)" + "short hold(duration_ms=800~1500)".`,
134
+ );
135
+ err.code = 'READING_FLOW_VIOLATION';
136
+ throw err;
137
+ }
138
+
139
+ for (let i = 1; i < ops.length; i += 1) {
140
+ const op = ops[i];
141
+ if (op?.atom !== 'hold') continue;
142
+ const holdMs = Number(op.duration_ms);
143
+ if (!Number.isFinite(holdMs) || holdMs <= READING_FLOW_LONG_HOLD_THRESHOLD_MS) continue;
144
+
145
+ const prev = ops[i - 1];
146
+ if (prev?.atom !== 'scroll_to') {
147
+ const err = new Error(
148
+ `reading_flow_violation: segments[${segmentIndex}].operations[${i}] is a long hold `
149
+ + `(${holdMs}ms) but its preceding atom is "${prev?.atom ?? 'none'}", not scroll_to. `
150
+ + 'Long holds (>2000ms) must immediately follow a scroll_to — '
151
+ + 'the natural reading pattern is "slow scroll to a key spot → pause to explain".',
152
+ );
153
+ err.code = 'READING_FLOW_VIOLATION';
154
+ throw err;
155
+ }
156
+ const prevScrollMs = Number(prev.duration_ms);
157
+ if (!Number.isFinite(prevScrollMs) || prevScrollMs < READING_FLOW_SLOW_SCROLL_MIN_MS) {
158
+ const err = new Error(
159
+ `reading_flow_violation: segments[${segmentIndex}].operations[${i}] is a long hold `
160
+ + `(${holdMs}ms) following a fast scroll_to (${prevScrollMs}ms). This is the "跳页+长停" `
161
+ + 'anti-pattern — viewers see a hard cut to a new position then a frozen frame. '
162
+ + `Fix: extend the preceding scroll_to to duration_ms >= ${READING_FLOW_SLOW_SCROLL_MIN_MS}ms `
163
+ + '(narrate WHILE you scroll), and shorten this hold to duration_ms <= 1500ms '
164
+ + '(brief pause to stress the key point, then move on).',
165
+ );
166
+ err.code = 'READING_FLOW_VIOLATION';
167
+ throw err;
168
+ }
169
+ }
170
+ }
171
+
97
172
  // Process operations[]: expand "fill" on the last hold, validate atom shape.
98
173
  function processOperations(operations, audioDurationMs, segmentIndex) {
99
174
  if (!Array.isArray(operations) || operations.length === 0) {
@@ -165,6 +240,7 @@ function processOperations(operations, audioDurationMs, segmentIndex) {
165
240
  }
166
241
  sum += n;
167
242
  }
243
+ validateReadingFlow(expanded, segmentIndex);
168
244
  return { operations: expanded, durationSumMs: Math.round(sum) };
169
245
  }
170
246