@lightcone-ai/daemon 0.22.0 → 0.23.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/mcp-servers/official/media-tools/index.js +45 -22
- package/mcp-servers/official/page-understanding/index.js +6 -7
- package/package.json +1 -1
- package/src/_vendor/video/cdp-touch.js +184 -0
- package/src/_vendor/video/humanized-scroll.js +251 -0
- package/src/_vendor/video/recorder/atoms.js +212 -0
- package/src/_vendor/video/recorder/index.js +68 -38
- package/src/_vendor/video/recorder/plan-executor.js +192 -386
- package/src/_vendor/video/understanding/schema.js +316 -0
- package/src/tools/plan-video-segments.js +152 -22
- package/src/tools/record-url-narration.js +44 -136
- package/src/upload-job-manager.js +4 -4
- package/src/_vendor/video/recorder/phase-duration.js +0 -18
- package/src/_vendor/video/recorder/plan-estimator.js +0 -43
|
@@ -1,17 +1,16 @@
|
|
|
1
|
-
// record_url_narration
|
|
1
|
+
// V6 record_url_narration daemon tool wrapper.
|
|
2
2
|
//
|
|
3
|
-
// Drives Chromium on Xvfb + Playwright recordVideo to capture a silent mp4
|
|
4
|
-
//
|
|
5
|
-
//
|
|
6
|
-
// alongside narration audio.
|
|
3
|
+
// Drives Chromium on Xvfb + Playwright recordVideo to capture a silent mp4
|
|
4
|
+
// per section, then ffmpeg-transcodes + slices. The resulting silent mp4s
|
|
5
|
+
// feed compose_video_v2 as video-kind segments alongside narration audio.
|
|
7
6
|
//
|
|
8
|
-
//
|
|
9
|
-
//
|
|
10
|
-
//
|
|
11
|
-
//
|
|
12
|
-
//
|
|
13
|
-
//
|
|
14
|
-
//
|
|
7
|
+
// V6 contract (see docs/scenario-content-creation/video-synthesis-design.md):
|
|
8
|
+
// - page_understanding (from analyze_page) is required — drives safe-region
|
|
9
|
+
// check and preheat consistency
|
|
10
|
+
// - plan.sections each carry operations[] of atom calls; V5 fields are rejected
|
|
11
|
+
// - mp.weixin.qq.com-only keyword blacklist is gone — unsafe_regions from
|
|
12
|
+
// page_understanding is the universal safety mechanism
|
|
13
|
+
// - plan_video_segments must run earlier in this session (standard chain)
|
|
15
14
|
|
|
16
15
|
import { mkdirSync } from 'fs';
|
|
17
16
|
import path from 'path';
|
|
@@ -56,88 +55,9 @@ function deriveDurationMs(recorderOutput) {
|
|
|
56
55
|
return lastTms > 0 ? lastTms : null;
|
|
57
56
|
}
|
|
58
57
|
|
|
59
|
-
function
|
|
58
|
+
function planSections(plan) {
|
|
60
59
|
if (!isPlainObject(plan)) return null;
|
|
61
|
-
|
|
62
|
-
if (Array.isArray(plan[key]) && plan[key].length > 0) return plan[key];
|
|
63
|
-
}
|
|
64
|
-
return null;
|
|
65
|
-
}
|
|
66
|
-
|
|
67
|
-
function derivePhaseCount({ plan, recorderOutput }) {
|
|
68
|
-
const explicit = normalizeNumberOrNull(recorderOutput?.phases);
|
|
69
|
-
if (explicit != null) return explicit;
|
|
70
|
-
|
|
71
|
-
const segments = planSegments(plan);
|
|
72
|
-
return segments ? segments.length : null;
|
|
73
|
-
}
|
|
74
|
-
|
|
75
|
-
function assertPipelineCompliance(plan) {
|
|
76
|
-
if (!isPlainObject(plan)) return;
|
|
77
|
-
if (!planSegments(plan)) {
|
|
78
|
-
throw new Error(
|
|
79
|
-
'record_url_narration: `plan` must contain a non-empty `phases` (or `sections` / `segments`) array — '
|
|
80
|
-
+ 'either hand-written or from plan_video_segments. Each entry should carry a visual action and a duration.'
|
|
81
|
-
);
|
|
82
|
-
}
|
|
83
|
-
}
|
|
84
|
-
|
|
85
|
-
// Forbidden region keywords for recruitment content. If a section's
|
|
86
|
-
// target_y_content_label matches, we refuse to record — the resulting video
|
|
87
|
-
// would show 投递入口 / 二维码 / contact info, which violates the recruitment
|
|
88
|
-
// content policy (see fragments.md frag.short.recruitment_url_mode_policy).
|
|
89
|
-
//
|
|
90
|
-
// Discovered after Task #25 v1 ended up dwelling on FunPlus's QR/投递 area:
|
|
91
|
-
// the agent's plan declared target_y=2180 with dwell_ms=8500 without checking
|
|
92
|
-
// what content lived at that pixel position. This is a prompt-level rule
|
|
93
|
-
// that's been ignored often enough that we enforce it at the tool layer.
|
|
94
|
-
const FORBIDDEN_REGION_PATTERNS = [
|
|
95
|
-
/二维码/, /扫码/, /扫一扫/,
|
|
96
|
-
/投递入口/, /投递方式/, /投递通道/, /投递渠道/, /报名入口/, /报名方式/,
|
|
97
|
-
/联系方式/, /联系人/, /微信号/, /\bWeChat\b/i, /\bQQ群\b/,
|
|
98
|
-
/阅读原文/, /外链/, /\bQR\b/i,
|
|
99
|
-
];
|
|
100
|
-
|
|
101
|
-
function isRecruitmentLikeUrl(url) {
|
|
102
|
-
if (typeof url !== 'string') return false;
|
|
103
|
-
return /mp\.weixin\.qq\.com/.test(url);
|
|
104
|
-
}
|
|
105
|
-
|
|
106
|
-
function describeForbiddenMatch(label) {
|
|
107
|
-
for (const pattern of FORBIDDEN_REGION_PATTERNS) {
|
|
108
|
-
if (pattern.test(label)) return pattern.source;
|
|
109
|
-
}
|
|
110
|
-
return null;
|
|
111
|
-
}
|
|
112
|
-
|
|
113
|
-
function checkSafeRegionLabels({ url, plan }) {
|
|
114
|
-
if (!isRecruitmentLikeUrl(url)) return null;
|
|
115
|
-
const segments = planSegments(plan);
|
|
116
|
-
if (!segments) return null;
|
|
117
|
-
for (let i = 0; i < segments.length; i += 1) {
|
|
118
|
-
const seg = segments[i] ?? {};
|
|
119
|
-
const label = normalizeText(seg.target_y_content_label ?? seg.targetYContentLabel ?? '');
|
|
120
|
-
if (!label) {
|
|
121
|
-
return (
|
|
122
|
-
`record_url_narration: section[${i}] is missing required field `
|
|
123
|
-
+ `\`target_y_content_label\`. For recruitment URLs (mp.weixin.qq.com / `
|
|
124
|
-
+ `校招 / 实习等) you MUST label what content lives at target_y so the `
|
|
125
|
-
+ `tool can verify it is not 二维码/投递入口/联系方式. Look at the page `
|
|
126
|
-
+ `screenshot, find what is at target_y=${seg.target_y ?? '<unset>'}, `
|
|
127
|
-
+ `and add a short label like "标题区" / "岗位信息卡片" / "公司介绍".`
|
|
128
|
-
);
|
|
129
|
-
}
|
|
130
|
-
const match = describeForbiddenMatch(label);
|
|
131
|
-
if (match) {
|
|
132
|
-
return (
|
|
133
|
-
`record_url_narration: section[${i}] target_y=${seg.target_y ?? '?'} `
|
|
134
|
-
+ `is labeled "${label}", which matches a forbidden region pattern `
|
|
135
|
-
+ `/${match}/. Recruitment content must NOT dwell on 投递入口 / 二维码 / `
|
|
136
|
-
+ `联系方式 areas. Pick a different target_y inside the 标题区 / 岗位 `
|
|
137
|
-
+ `信息卡片 / 公司介绍 area and rewrite this section.`
|
|
138
|
-
);
|
|
139
|
-
}
|
|
140
|
-
}
|
|
60
|
+
if (Array.isArray(plan.sections) && plan.sections.length > 0) return plan.sections;
|
|
141
61
|
return null;
|
|
142
62
|
}
|
|
143
63
|
|
|
@@ -155,6 +75,24 @@ export function validateRecordUrlNarrationArgs(args = {}) {
|
|
|
155
75
|
throw error;
|
|
156
76
|
}
|
|
157
77
|
|
|
78
|
+
if (!planSections(args.plan)) {
|
|
79
|
+
const error = new Error(
|
|
80
|
+
'plan.sections is required (non-empty array). Each section: { id?, text?, audio_path?, dwell_ms?, operations: [{atom, duration_ms, ...}] }.',
|
|
81
|
+
);
|
|
82
|
+
error.code = 'PLAN_SECTIONS_REQUIRED';
|
|
83
|
+
throw error;
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
if (!isPlainObject(args.page_understanding)) {
|
|
87
|
+
const error = new Error(
|
|
88
|
+
'page_understanding is required — call analyze_page(url) first and pass its output here. '
|
|
89
|
+
+ 'V6 uses page_understanding.unsafe_regions[] to validate scroll_to.y / cursor_focus.y, and '
|
|
90
|
+
+ 'page_understanding.preheat_strategy to align the record browser with the analyze browser.',
|
|
91
|
+
);
|
|
92
|
+
error.code = 'PAGE_UNDERSTANDING_REQUIRED';
|
|
93
|
+
throw error;
|
|
94
|
+
}
|
|
95
|
+
|
|
158
96
|
return {
|
|
159
97
|
...(args ?? {}),
|
|
160
98
|
url: normalizedUrl,
|
|
@@ -218,49 +156,22 @@ export async function runRecordUrlNarrationTool({
|
|
|
218
156
|
return toolError(`Error: ${error.message}`);
|
|
219
157
|
}
|
|
220
158
|
|
|
221
|
-
try {
|
|
222
|
-
assertPipelineCompliance(validatedInput.plan);
|
|
223
|
-
} catch (error) {
|
|
224
|
-
return toolError(`Error: ${error.message}`);
|
|
225
|
-
}
|
|
226
|
-
|
|
227
|
-
// Safe-region check for recruitment URLs — refuse plans that dwell on
|
|
228
|
-
// forbidden regions (二维码 / 投递入口 / 联系方式) before we even start
|
|
229
|
-
// Chromium. The agent must label each target_y with the content that lives
|
|
230
|
-
// there, and the labels are pattern-matched against a forbidden list.
|
|
231
|
-
const safeRegionError = checkSafeRegionLabels({
|
|
232
|
-
url: validatedInput.url,
|
|
233
|
-
plan: validatedInput.plan,
|
|
234
|
-
});
|
|
235
|
-
if (safeRegionError) {
|
|
236
|
-
return toolError(`Error: ${safeRegionError}`);
|
|
237
|
-
}
|
|
238
|
-
|
|
239
159
|
// Standard-chain hard block: refuse recordings unless plan_video_segments
|
|
240
|
-
// ran in this session.
|
|
241
|
-
//
|
|
242
|
-
//
|
|
243
|
-
// forcing a full re-record. plan_video_segments fills dwell_ms mechanically
|
|
244
|
-
// from ffprobe audio duration, eliminating the drift.
|
|
160
|
+
// ran in this session. plan_video_segments is what aligns operations[]
|
|
161
|
+
// duration sums to the per-section TTS audio duration; skipping it lets
|
|
162
|
+
// audio/visual drift accumulate across sections.
|
|
245
163
|
if (!planVideoSegmentsCalled) {
|
|
246
164
|
return toolError(
|
|
247
165
|
'Error: record_url_narration refused: plan_video_segments must run earlier in this '
|
|
248
|
-
+ 'session so
|
|
249
|
-
+ '
|
|
250
|
-
+ '
|
|
251
|
-
+ '
|
|
252
|
-
+ '
|
|
253
|
-
+ 'plan_video_segments output as plan.sections — each section\'s dwell_ms is already '
|
|
254
|
-
+ 'set to audio_duration_ms) + compose_video_v2 (same plan output). Call plan_video_segments '
|
|
255
|
-
+ 'now, then pass its `segments` array as `plan.sections` here.'
|
|
166
|
+
+ 'session so per-section operations.duration_ms is reconciled with TTS audio_duration_ms.\n\n'
|
|
167
|
+
+ 'V6 standard chain: analyze_page(url) → synthesize_tts × N (per section) → '
|
|
168
|
+
+ 'plan_video_segments(segments with text + audio_path + visual_kind + operations) → '
|
|
169
|
+
+ 'record_url_narration(url, page_understanding, plan=…, output_paths=[…]) + '
|
|
170
|
+
+ 'compose_video_v2(segments=…, variants=[…]). Call plan_video_segments now, then retry.',
|
|
256
171
|
);
|
|
257
172
|
}
|
|
258
173
|
|
|
259
174
|
try {
|
|
260
|
-
// output_paths is REQUIRED. The legacy "default output_path master file"
|
|
261
|
-
// mode is gone — agents kept defaulting to one-call-per-section because
|
|
262
|
-
// that was the lowest-friction path. Now every recording is sliced, even
|
|
263
|
-
// single-section ones (which are just a 1-element output_paths array).
|
|
264
175
|
let resolvedOutputPaths;
|
|
265
176
|
try {
|
|
266
177
|
resolvedOutputPaths = resolveOutputPaths(validatedInput.output_paths, { workspaceDir });
|
|
@@ -271,22 +182,18 @@ export async function runRecordUrlNarrationTool({
|
|
|
271
182
|
return toolError(
|
|
272
183
|
'Error: output_paths is required — one workspace-relative mp4 path per plan.sections entry. '
|
|
273
184
|
+ 'Single-section recording is a 1-element array. Multi-section recording records once '
|
|
274
|
-
+ 'continuously (one browser session
|
|
275
|
-
+ 'boundaries. See frag.short.video_synthesis_tools.',
|
|
185
|
+
+ 'continuously (one browser session) and slices the result at section boundaries.',
|
|
276
186
|
);
|
|
277
187
|
}
|
|
278
|
-
const
|
|
279
|
-
if (resolvedOutputPaths.length !==
|
|
188
|
+
const sectionCount = (planSections(validatedInput.plan) ?? []).length;
|
|
189
|
+
if (resolvedOutputPaths.length !== sectionCount) {
|
|
280
190
|
return toolError(
|
|
281
191
|
`Error: output_paths length (${resolvedOutputPaths.length}) must match `
|
|
282
|
-
+ `plan.sections length (${
|
|
192
|
+
+ `plan.sections length (${sectionCount}). Each section produces exactly one mp4 — `
|
|
283
193
|
+ `don't pad or truncate.`,
|
|
284
194
|
);
|
|
285
195
|
}
|
|
286
196
|
|
|
287
|
-
// The master / events JSON paths are agent-optional debug artifacts.
|
|
288
|
-
// Default master to a tmp path next to the first output; events default
|
|
289
|
-
// to <master>.events.json. Agent can override either if they care.
|
|
290
197
|
const { resolvedOutputPath: masterPath, resolvedEventsPath } = resolveRecordUrlNarrationPaths({
|
|
291
198
|
workspaceDir,
|
|
292
199
|
outputPath: validatedInput.output_path,
|
|
@@ -299,6 +206,7 @@ export async function runRecordUrlNarrationTool({
|
|
|
299
206
|
const recorderOutput = await recordUrlNarrationFn({
|
|
300
207
|
url: validatedInput.url,
|
|
301
208
|
plan: validatedInput.plan,
|
|
209
|
+
page_understanding: validatedInput.page_understanding,
|
|
302
210
|
output_path: masterPath,
|
|
303
211
|
events_path: resolvedEventsPath,
|
|
304
212
|
output_paths: resolvedOutputPaths,
|
|
@@ -36,10 +36,10 @@ export const PART_RETRY_BASE_MS = 1_000; // 1s, 3s, 9s
|
|
|
36
36
|
export const TERMINAL_JOB_TTL_MS = 7 * 24 * 3600 * 1000; // sweep done/dead_letter after 7 days
|
|
37
37
|
export const HOUSEKEEPING_INTERVAL_MS = 6 * 3600 * 1000; // run housekeeping every 6h
|
|
38
38
|
// Per-PUT timeout — Node's fetch has no overall request timeout. Without this
|
|
39
|
-
// a stalled COS connection wedges the chunk loop forever (observed
|
|
40
|
-
//
|
|
41
|
-
//
|
|
42
|
-
//
|
|
39
|
+
// a stalled COS connection wedges the chunk loop forever (observed in
|
|
40
|
+
// production: a chunk PUT hung 7+ minutes with no progress and no error).
|
|
41
|
+
// 5 minutes covers slow networks for an 8MB chunk (~25kB/s floor) while
|
|
42
|
+
// still letting failures surface to the chunk-level retry loop.
|
|
43
43
|
export const PUT_REQUEST_TIMEOUT_MS = 5 * 60 * 1000;
|
|
44
44
|
|
|
45
45
|
function nowIso() { return new Date().toISOString(); }
|
|
@@ -1,18 +0,0 @@
|
|
|
1
|
-
function normalizeInteger(value, fallback = null) {
|
|
2
|
-
const parsed = Number.parseInt(String(value ?? ''), 10);
|
|
3
|
-
if (!Number.isFinite(parsed)) return fallback;
|
|
4
|
-
return parsed;
|
|
5
|
-
}
|
|
6
|
-
|
|
7
|
-
export function resolveDurationMs(phase, fallback = 0) {
|
|
8
|
-
const parsed = normalizeInteger(phase?.duration_ms, null);
|
|
9
|
-
if (parsed !== null && parsed >= 0) return parsed;
|
|
10
|
-
|
|
11
|
-
const dwellMs = normalizeInteger(phase?.dwell_ms, null);
|
|
12
|
-
if (dwellMs !== null && dwellMs >= 0) return dwellMs;
|
|
13
|
-
|
|
14
|
-
const secs = Number(phase?.duration_s);
|
|
15
|
-
if (Number.isFinite(secs) && secs >= 0) return Math.round(secs * 1000);
|
|
16
|
-
|
|
17
|
-
return fallback;
|
|
18
|
-
}
|
|
@@ -1,43 +0,0 @@
|
|
|
1
|
-
import { resolveDurationMs } from './phase-duration.js';
|
|
2
|
-
import { normalizePlanPhases } from './plan-executor.js';
|
|
3
|
-
|
|
4
|
-
export function estimatePlanDurationMs(plan = {}) {
|
|
5
|
-
let phases = [];
|
|
6
|
-
try {
|
|
7
|
-
phases = normalizePlanPhases(plan);
|
|
8
|
-
} catch {
|
|
9
|
-
phases = [];
|
|
10
|
-
}
|
|
11
|
-
|
|
12
|
-
return phases.reduce((total, phase) => {
|
|
13
|
-
const action = String(phase?.action ?? phase?.visual_action?.type ?? '').trim().toLowerCase();
|
|
14
|
-
const durationMs = resolveDurationMs(phase, Number.NaN);
|
|
15
|
-
const dwellMs = Number(phase?.dwell_ms);
|
|
16
|
-
const transitionMs = Number(phase?.transition_ms ?? phase?.visual_action?.transition_ms);
|
|
17
|
-
const effectiveHoldMs = Number.isFinite(dwellMs) && dwellMs > 0
|
|
18
|
-
? dwellMs
|
|
19
|
-
: durationMs;
|
|
20
|
-
|
|
21
|
-
if (action === 'hold' && Number.isFinite(effectiveHoldMs) && effectiveHoldMs > 0) {
|
|
22
|
-
return total + effectiveHoldMs;
|
|
23
|
-
}
|
|
24
|
-
if (action === 'linear_scroll_during') {
|
|
25
|
-
if (Number.isFinite(effectiveHoldMs) && effectiveHoldMs > 0) return total + effectiveHoldMs;
|
|
26
|
-
return total + 1200;
|
|
27
|
-
}
|
|
28
|
-
if (action === 'scroll_to_dwell' || action === 'cursor_focus' || action === 'scroll_back') {
|
|
29
|
-
let next = total;
|
|
30
|
-
if (Number.isFinite(transitionMs) && transitionMs > 0) next += transitionMs;
|
|
31
|
-
if (Number.isFinite(effectiveHoldMs) && effectiveHoldMs > 0) next += effectiveHoldMs;
|
|
32
|
-
if (next === total) next += 1200;
|
|
33
|
-
return next;
|
|
34
|
-
}
|
|
35
|
-
if (Number.isFinite(transitionMs) && transitionMs > 0) {
|
|
36
|
-
return total + transitionMs;
|
|
37
|
-
}
|
|
38
|
-
if (Number.isFinite(durationMs) && durationMs > 0) {
|
|
39
|
-
return total + durationMs;
|
|
40
|
-
}
|
|
41
|
-
return total + 800;
|
|
42
|
-
}, 0);
|
|
43
|
-
}
|