@lightcone-ai/daemon 0.15.72 → 0.15.73

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -56,7 +56,7 @@ export class KuaishouAdapter {
56
56
  await this._clickByText('放弃');
57
57
  await sleep(500);
58
58
  try { await this._cdp.send('Runtime.evaluate', { expression: 'window.scrollTo(0, 300)', returnByValue: false }); } catch {}
59
- await this._waitForSelector('input[type="file"], [class*="upload"], [class*="Upload"]', 45000);
59
+ await this._waitForSelector('input[type="file"], [class*="upload"], [class*="Upload"]', 120000);
60
60
 
61
61
  const { loggedIn } = await this.checkLoginStatus();
62
62
  if (!loggedIn) throw new Error('LOGIN_EXPIRED: 快手登录已过期,请重新扫码连接');
@@ -97,7 +97,7 @@ export class KuaishouAdapter {
97
97
 
98
98
  // Scroll once to trigger any lazy-rendered upload widgets, then wait
99
99
  try { await this._cdp.send('Runtime.evaluate', { expression: 'window.scrollTo(0, 300)', returnByValue: false }); } catch {}
100
- await this._waitForSelector('input[type="file"], [class*="upload"], [class*="Upload"]', 45000);
100
+ await this._waitForSelector('input[type="file"], [class*="upload"], [class*="Upload"]', 120000);
101
101
 
102
102
  const { loggedIn } = await this.checkLoginStatus();
103
103
  if (!loggedIn) throw new Error('LOGIN_EXPIRED: 快手登录已过期,请重新扫码连接');
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@lightcone-ai/daemon",
3
- "version": "0.15.72",
3
+ "version": "0.15.73",
4
4
  "type": "module",
5
5
  "main": "src/index.js",
6
6
  "bin": {
@@ -1430,10 +1430,10 @@ server.tool('get_library_file',
1430
1430
 
1431
1431
  // ── record_url_narration ────────────────────────────────────────────────────────
1432
1432
  server.tool('record_url_narration',
1433
- 'Record a silent video of a URL by orchestrating Xvfb + Chromium + ffmpeg, driven by a video plan. Outputs a silent mp4 that can be passed to compose_video_v2 as a video-kind segment with an audio_path for narration.\n\nUse this as the canonical recording step for URL-narration videos. Falls back: if the page needs interactions outside the visual_action vocabulary (clicks, waits, OCR loops), use Monitor (Bash) with custom Playwright instead.\n\nRuntime requirements: this tool only works on a Linux daemon machine with Xvfb + ffmpeg (x11grab) + Chromium installed. macOS / Windows daemons will fail at startup.',
1433
+ 'Record a silent video of a URL by driving Chromium on an Xvfb display and capturing it with Playwright recordVideo, driven by a video plan; ffmpeg then transcodes the recording to mp4. Outputs a silent mp4 that can be passed to compose_video_v2 as a video-kind segment with an audio_path for narration.\n\nUse this as the canonical recording step for URL-narration videos. Falls back: if the page needs interactions outside the visual_action vocabulary (clicks, waits, OCR loops), use Monitor (Bash) with custom Playwright instead.\n\nRuntime requirements: this tool only works on a Linux daemon machine with Xvfb + Chromium + ffmpeg installed (ffmpeg is used to transcode the recording to mp4; no x11grab device support needed). macOS / Windows daemons will fail at startup.',
1434
1434
  {
1435
1435
  url: z.string().describe('Page URL to record'),
1436
- plan: z.record(z.any()).describe('Must be the full output from detail_sections (not plan_video). detail_sections output includes detail_sections_version, sections[], audio metadata, and dwell_ms per phase.'),
1436
+ plan: z.record(z.any()).describe('A video plan: an object with `phases` (or `sections`), each a "visual beat" with `action` (scroll_to_dwell / linear_scroll_during / scroll_back / hold / ...), a target (`target_y` or `focus_region:[y1,y2]`) for scroll-type actions, and `dwell_ms` (how long to hold that beat — should match the segment\'s TTS duration). It can be hand-written or the output of plan_video_segments (whose returned segments array doubles as a valid plan).'),
1437
1437
  output_path: z.string().optional().describe('Workspace-relative output mp4 path. Default tmp/wx3_video/recorded-{ts}.mp4'),
1438
1438
  events_path: z.string().optional().describe('Workspace-relative events.json path. Default ${output_path}.events.json'),
1439
1439
  viewport: z.object({
@@ -1468,7 +1468,7 @@ server.tool('submit_to_library',
1468
1468
  target_platform: z.string().optional().describe('目标发布平台,如 xhs / douyin'),
1469
1469
  metadata: z.record(z.any()).optional().describe('其它 metadata(brand_voice / persona / account / goal_state 等)'),
1470
1470
  understanding: z.record(z.any()).optional().describe('analyze_page 输出'),
1471
- plan: z.record(z.any()).optional().describe('plan_video / detail_sections 输出'),
1471
+ plan: z.record(z.any()).optional().describe('plan_video_segments 输出(或手写的录屏 plan)'),
1472
1472
  },
1473
1473
  async (args) => {
1474
1474
  if (isBlockedCvmaxEditorVideoTool('submit_to_library')) {
@@ -1529,7 +1529,7 @@ server.tool('request_approval',
1529
1529
  platform: z.string().describe('Target platform, e.g. "x", "xhs", "email"'),
1530
1530
  description: z.string().describe('Human-readable summary of what will happen if approved'),
1531
1531
  payload: z.record(z.any()).describe('Full action parameters (content, media_urls, etc.)'),
1532
- credential_id: z.string().optional().describe('Which account/credential to use. For publishing, prefer a workspace account_id or real credential UUID. Role aliases like primary/test are accepted only if they uniquely match a workspace account.'),
1532
+ credential_id: z.string().optional().describe('Which account/credential to use. Accepts a workspace account_id, a real credential UUID, the account display name, or a role alias (主号/main/primary, 矩阵号/matrix/secondary, 测试号/test/incubator) any value works as long as it uniquely matches one workspace account on the target platform. If publishing fails with publish_account_selection_required/ambiguous, pick a value from the returned candidates\' "selectors" list yourself instead of asking the user to re-type an account name.'),
1533
1533
  },
1534
1534
  async ({ action_type, platform, description, payload, credential_id }) => {
1535
1535
  try {
@@ -45,6 +45,20 @@ function planDurationSec(audioDurationMs, bufferSec = 0.5) {
45
45
  return Math.ceil(raw * 2) / 2; // round up to nearest 0.5s
46
46
  }
47
47
 
48
+ // Run fn over items with a bounded number of concurrent workers (FIFO drain).
49
+ async function mapWithConcurrency(items, limit, fn) {
50
+ const queue = items.map((item, index) => ({ item, index }));
51
+ const workers = Array.from({ length: Math.max(1, Math.min(limit, queue.length)) }, async () => {
52
+ while (queue.length > 0) {
53
+ const next = queue.shift();
54
+ await fn(next.item, next.index);
55
+ }
56
+ });
57
+ await Promise.all(workers);
58
+ }
59
+
60
+ const TTS_CONCURRENCY = 5;
61
+
48
62
  export async function runPlanVideoSegmentsTool({ segments, workspace_id, voice_id, currentWorkspaceId, api }) {
49
63
  if (!Array.isArray(segments) || segments.length === 0) {
50
64
  return toolError('segments must be a non-empty array.');
@@ -58,20 +72,31 @@ export async function runPlanVideoSegmentsTool({ segments, workspace_id, voice_i
58
72
  const planned = [];
59
73
  const errors = [];
60
74
 
75
+ // Synthesize TTS for every text-bearing segment up front, in parallel (bounded),
76
+ // so an N-segment plan no longer pays N sequential round-trips to the TTS API.
77
+ const audioResults = new Array(segments.length).fill(null);
78
+ const ttsJobs = segments
79
+ .map((seg, i) => ({ i, text: String(seg.text ?? '').trim() }))
80
+ .filter(job => job.text);
81
+ await mapWithConcurrency(ttsJobs, TTS_CONCURRENCY, async ({ i, text }) => {
82
+ try {
83
+ audioResults[i] = await synthesizeSegmentTts(text, { workspace_id: targetWorkspaceId, voice_id, api });
84
+ } catch (err) {
85
+ errors.push(`segments[${i}]: TTS failed — ${err.message}`);
86
+ audioResults[i] = { audio_path: null, audio_duration_ms: 3000 }; // fallback estimate
87
+ }
88
+ });
89
+ errors.sort((a, b) => {
90
+ const na = Number((a.match(/segments\[(\d+)\]/) ?? [])[1] ?? 0);
91
+ const nb = Number((b.match(/segments\[(\d+)\]/) ?? [])[1] ?? 0);
92
+ return na - nb;
93
+ });
94
+
61
95
  for (let i = 0; i < segments.length; i++) {
62
96
  const seg = segments[i];
63
97
  const text = String(seg.text ?? '').trim();
64
98
  const kind = String(seg.visual_kind ?? 'image');
65
-
66
- let audioResult = null;
67
- if (text) {
68
- try {
69
- audioResult = await synthesizeSegmentTts(text, { workspace_id: targetWorkspaceId, voice_id, api });
70
- } catch (err) {
71
- errors.push(`segments[${i}]: TTS failed — ${err.message}`);
72
- audioResult = { audio_path: null, audio_duration_ms: 3000 }; // fallback estimate
73
- }
74
- }
99
+ const audioResult = audioResults[i];
75
100
 
76
101
  const audioDurationMs = audioResult?.audio_duration_ms ?? 0;
77
102
  let presentation;