@lightcone-ai/daemon 0.15.72 → 0.15.73
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -56,7 +56,7 @@ export class KuaishouAdapter {
|
|
|
56
56
|
await this._clickByText('放弃');
|
|
57
57
|
await sleep(500);
|
|
58
58
|
try { await this._cdp.send('Runtime.evaluate', { expression: 'window.scrollTo(0, 300)', returnByValue: false }); } catch {}
|
|
59
|
-
await this._waitForSelector('input[type="file"], [class*="upload"], [class*="Upload"]',
|
|
59
|
+
await this._waitForSelector('input[type="file"], [class*="upload"], [class*="Upload"]', 120000);
|
|
60
60
|
|
|
61
61
|
const { loggedIn } = await this.checkLoginStatus();
|
|
62
62
|
if (!loggedIn) throw new Error('LOGIN_EXPIRED: 快手登录已过期,请重新扫码连接');
|
|
@@ -97,7 +97,7 @@ export class KuaishouAdapter {
|
|
|
97
97
|
|
|
98
98
|
// Scroll once to trigger any lazy-rendered upload widgets, then wait
|
|
99
99
|
try { await this._cdp.send('Runtime.evaluate', { expression: 'window.scrollTo(0, 300)', returnByValue: false }); } catch {}
|
|
100
|
-
await this._waitForSelector('input[type="file"], [class*="upload"], [class*="Upload"]',
|
|
100
|
+
await this._waitForSelector('input[type="file"], [class*="upload"], [class*="Upload"]', 120000);
|
|
101
101
|
|
|
102
102
|
const { loggedIn } = await this.checkLoginStatus();
|
|
103
103
|
if (!loggedIn) throw new Error('LOGIN_EXPIRED: 快手登录已过期,请重新扫码连接');
|
package/package.json
CHANGED
package/src/chat-bridge.js
CHANGED
|
@@ -1430,10 +1430,10 @@ server.tool('get_library_file',
|
|
|
1430
1430
|
|
|
1431
1431
|
// ── record_url_narration ────────────────────────────────────────────────────────
|
|
1432
1432
|
server.tool('record_url_narration',
|
|
1433
|
-
'Record a silent video of a URL by
|
|
1433
|
+
'Record a silent video of a URL by driving Chromium on an Xvfb display and capturing it with Playwright recordVideo, driven by a video plan; ffmpeg then transcodes the recording to mp4. Outputs a silent mp4 that can be passed to compose_video_v2 as a video-kind segment with an audio_path for narration.\n\nUse this as the canonical recording step for URL-narration videos. Falls back: if the page needs interactions outside the visual_action vocabulary (clicks, waits, OCR loops), use Monitor (Bash) with custom Playwright instead.\n\nRuntime requirements: this tool only works on a Linux daemon machine with Xvfb + Chromium + ffmpeg installed (ffmpeg is used to transcode the recording to mp4; no x11grab device support needed). macOS / Windows daemons will fail at startup.',
|
|
1434
1434
|
{
|
|
1435
1435
|
url: z.string().describe('Page URL to record'),
|
|
1436
|
-
plan: z.record(z.any()).describe('
|
|
1436
|
+
plan: z.record(z.any()).describe('A video plan: an object with `phases` (or `sections`), each a "visual beat" with `action` (scroll_to_dwell / linear_scroll_during / scroll_back / hold / ...), a target (`target_y` or `focus_region:[y1,y2]`) for scroll-type actions, and `dwell_ms` (how long to hold that beat — should match the segment\'s TTS duration). It can be hand-written or the output of plan_video_segments (whose returned segments array doubles as a valid plan).'),
|
|
1437
1437
|
output_path: z.string().optional().describe('Workspace-relative output mp4 path. Default tmp/wx3_video/recorded-{ts}.mp4'),
|
|
1438
1438
|
events_path: z.string().optional().describe('Workspace-relative events.json path. Default ${output_path}.events.json'),
|
|
1439
1439
|
viewport: z.object({
|
|
@@ -1468,7 +1468,7 @@ server.tool('submit_to_library',
|
|
|
1468
1468
|
target_platform: z.string().optional().describe('目标发布平台,如 xhs / douyin'),
|
|
1469
1469
|
metadata: z.record(z.any()).optional().describe('其它 metadata(brand_voice / persona / account / goal_state 等)'),
|
|
1470
1470
|
understanding: z.record(z.any()).optional().describe('analyze_page 输出'),
|
|
1471
|
-
plan: z.record(z.any()).optional().describe('
|
|
1471
|
+
plan: z.record(z.any()).optional().describe('plan_video_segments 输出(或手写的录屏 plan)'),
|
|
1472
1472
|
},
|
|
1473
1473
|
async (args) => {
|
|
1474
1474
|
if (isBlockedCvmaxEditorVideoTool('submit_to_library')) {
|
|
@@ -1529,7 +1529,7 @@ server.tool('request_approval',
|
|
|
1529
1529
|
platform: z.string().describe('Target platform, e.g. "x", "xhs", "email"'),
|
|
1530
1530
|
description: z.string().describe('Human-readable summary of what will happen if approved'),
|
|
1531
1531
|
payload: z.record(z.any()).describe('Full action parameters (content, media_urls, etc.)'),
|
|
1532
|
-
credential_id: z.string().optional().describe('Which account/credential to use.
|
|
1532
|
+
credential_id: z.string().optional().describe('Which account/credential to use. Accepts a workspace account_id, a real credential UUID, the account display name, or a role alias (主号/main/primary, 矩阵号/matrix/secondary, 测试号/test/incubator) — any value works as long as it uniquely matches one workspace account on the target platform. If publishing fails with publish_account_selection_required/ambiguous, pick a value from the returned candidates\' "selectors" list yourself instead of asking the user to re-type an account name.'),
|
|
1533
1533
|
},
|
|
1534
1534
|
async ({ action_type, platform, description, payload, credential_id }) => {
|
|
1535
1535
|
try {
|
|
@@ -45,6 +45,20 @@ function planDurationSec(audioDurationMs, bufferSec = 0.5) {
|
|
|
45
45
|
return Math.ceil(raw * 2) / 2; // round up to nearest 0.5s
|
|
46
46
|
}
|
|
47
47
|
|
|
48
|
+
// Run fn over items with a bounded number of concurrent workers (FIFO drain).
|
|
49
|
+
async function mapWithConcurrency(items, limit, fn) {
|
|
50
|
+
const queue = items.map((item, index) => ({ item, index }));
|
|
51
|
+
const workers = Array.from({ length: Math.max(1, Math.min(limit, queue.length)) }, async () => {
|
|
52
|
+
while (queue.length > 0) {
|
|
53
|
+
const next = queue.shift();
|
|
54
|
+
await fn(next.item, next.index);
|
|
55
|
+
}
|
|
56
|
+
});
|
|
57
|
+
await Promise.all(workers);
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
const TTS_CONCURRENCY = 5;
|
|
61
|
+
|
|
48
62
|
export async function runPlanVideoSegmentsTool({ segments, workspace_id, voice_id, currentWorkspaceId, api }) {
|
|
49
63
|
if (!Array.isArray(segments) || segments.length === 0) {
|
|
50
64
|
return toolError('segments must be a non-empty array.');
|
|
@@ -58,20 +72,31 @@ export async function runPlanVideoSegmentsTool({ segments, workspace_id, voice_i
|
|
|
58
72
|
const planned = [];
|
|
59
73
|
const errors = [];
|
|
60
74
|
|
|
75
|
+
// Synthesize TTS for every text-bearing segment up front, in parallel (bounded),
|
|
76
|
+
// so an N-segment plan no longer pays N sequential round-trips to the TTS API.
|
|
77
|
+
const audioResults = new Array(segments.length).fill(null);
|
|
78
|
+
const ttsJobs = segments
|
|
79
|
+
.map((seg, i) => ({ i, text: String(seg.text ?? '').trim() }))
|
|
80
|
+
.filter(job => job.text);
|
|
81
|
+
await mapWithConcurrency(ttsJobs, TTS_CONCURRENCY, async ({ i, text }) => {
|
|
82
|
+
try {
|
|
83
|
+
audioResults[i] = await synthesizeSegmentTts(text, { workspace_id: targetWorkspaceId, voice_id, api });
|
|
84
|
+
} catch (err) {
|
|
85
|
+
errors.push(`segments[${i}]: TTS failed — ${err.message}`);
|
|
86
|
+
audioResults[i] = { audio_path: null, audio_duration_ms: 3000 }; // fallback estimate
|
|
87
|
+
}
|
|
88
|
+
});
|
|
89
|
+
errors.sort((a, b) => {
|
|
90
|
+
const na = Number((a.match(/segments\[(\d+)\]/) ?? [])[1] ?? 0);
|
|
91
|
+
const nb = Number((b.match(/segments\[(\d+)\]/) ?? [])[1] ?? 0);
|
|
92
|
+
return na - nb;
|
|
93
|
+
});
|
|
94
|
+
|
|
61
95
|
for (let i = 0; i < segments.length; i++) {
|
|
62
96
|
const seg = segments[i];
|
|
63
97
|
const text = String(seg.text ?? '').trim();
|
|
64
98
|
const kind = String(seg.visual_kind ?? 'image');
|
|
65
|
-
|
|
66
|
-
let audioResult = null;
|
|
67
|
-
if (text) {
|
|
68
|
-
try {
|
|
69
|
-
audioResult = await synthesizeSegmentTts(text, { workspace_id: targetWorkspaceId, voice_id, api });
|
|
70
|
-
} catch (err) {
|
|
71
|
-
errors.push(`segments[${i}]: TTS failed — ${err.message}`);
|
|
72
|
-
audioResult = { audio_path: null, audio_duration_ms: 3000 }; // fallback estimate
|
|
73
|
-
}
|
|
74
|
-
}
|
|
99
|
+
const audioResult = audioResults[i];
|
|
75
100
|
|
|
76
101
|
const audioDurationMs = audioResult?.audio_duration_ms ?? 0;
|
|
77
102
|
let presentation;
|