@lightcone-ai/daemon 0.17.0 → 0.17.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -12,6 +12,7 @@ import { runRecordUrlNarrationTool } from '../../../src/tools/record-url-narrati
|
|
|
12
12
|
import { runRenderTextToImageTool } from '../../../src/tools/render-text-to-image.js';
|
|
13
13
|
import { runRenderHtmlToImageTool } from '../../../src/tools/render-html-to-image.js';
|
|
14
14
|
import { runTakePageScreenshotTool } from '../../../src/tools/take-page-screenshot.js';
|
|
15
|
+
import { hasFreshVideoBrief } from '../../../src/video-brief-flag.js';
|
|
15
16
|
import { lightconeApi, CURRENT_WORKSPACE_ID, CURRENT_AGENT_ID } from './lib/lightcone-api.js';
|
|
16
17
|
|
|
17
18
|
const WORKSPACE_DIR = String(process.env.WORKSPACE_DIR ?? '');
|
|
@@ -222,6 +223,18 @@ server.tool(
|
|
|
222
223
|
+ 'visual_path/visual_kind for the real media). Call plan_video_segments now and pass its output here.'
|
|
223
224
|
);
|
|
224
225
|
}
|
|
226
|
+
if (hasNarration && !hasFreshVideoBrief({ workspaceId: CURRENT_WORKSPACE_ID, agentId: CURRENT_AGENT_ID })) {
|
|
227
|
+
return toolError(
|
|
228
|
+
'compose_video_v2 refused: must send a 确认稿 (production-brief) to the user via send_message before '
|
|
229
|
+
+ 'compositing a narration video. The system scans send_message content for a brief — a message that '
|
|
230
|
+
+ 'BOTH asks the user to confirm (确认 / 你看 / OK 吗 / 可以吗 / 同意 / 通过 / 行不行) AND describes '
|
|
231
|
+
+ 'at least two of: 画面 / 时长 / 文案 / 口播 / 字幕 / 顺序 / 口吻 / 分镜 / 配音 — no such message '
|
|
232
|
+
+ 'was sent in the last 6 hours for this workspace+agent.\n\n'
|
|
233
|
+
+ '"已生成 TTS" / "开始合成" / progress reports do NOT count. Send a concrete confirmation draft '
|
|
234
|
+
+ 'first (e.g. "我准备这么做:画面是真录屏,时长约 1 分钟,文案如下…,字幕开启,公司顺序 A→B→C,'
|
|
235
|
+
+ '口吻是…—— 你 OK 吗?") and wait for the user to reply OK before calling compose_video_v2 again.'
|
|
236
|
+
);
|
|
237
|
+
}
|
|
225
238
|
return runComposeVideoV2Tool({ ...args, workspaceDir: WORKSPACE_DIR });
|
|
226
239
|
}
|
|
227
240
|
);
|
|
@@ -265,6 +278,25 @@ server.tool(
|
|
|
265
278
|
if (isBlockedCvmaxEditorVideoTool('record_url_narration')) {
|
|
266
279
|
return cvmaxEditorVideoToolError('record_url_narration');
|
|
267
280
|
}
|
|
281
|
+
// record_url_narration is part of the narration-video pipeline (paired
|
|
282
|
+
// with synthesize_tts + plan_video_segments + compose_video_v2), so it
|
|
283
|
+
// requires the same 确认稿 gate as compose_video_v2 — catching the skip
|
|
284
|
+
// earlier saves TTS + recording time.
|
|
285
|
+
if (!hasFreshVideoBrief({ workspaceId: CURRENT_WORKSPACE_ID, agentId: CURRENT_AGENT_ID })) {
|
|
286
|
+
return {
|
|
287
|
+
isError: true,
|
|
288
|
+
content: [{ type: 'text', text:
|
|
289
|
+
'Error: record_url_narration refused: must send a 确认稿 (production-brief) to the user via '
|
|
290
|
+
+ 'send_message before starting a narration recording. The system scans send_message content for '
|
|
291
|
+
+ 'a brief — a message that BOTH asks the user to confirm (确认 / 你看 / OK 吗 / 可以吗 / 同意 / '
|
|
292
|
+
+ '通过 / 行不行) AND describes at least two of: 画面 / 时长 / 文案 / 口播 / 字幕 / 顺序 / 口吻 / '
|
|
293
|
+
+ '分镜 / 配音 — no such message was sent in the last 6 hours for this workspace+agent.\n\n'
|
|
294
|
+
+ '"已生成 TTS" / "开始合成" / progress reports do NOT count. Send a concrete confirmation draft '
|
|
295
|
+
+ 'first (e.g. "我准备这么做:画面是真录屏,时长约 1 分钟,文案如下…,字幕开启,公司顺序 A→B→C,'
|
|
296
|
+
+ '口吻是…—— 你 OK 吗?") and wait for the user to reply OK before calling record_url_narration.'
|
|
297
|
+
}],
|
|
298
|
+
};
|
|
299
|
+
}
|
|
268
300
|
return runRecordUrlNarrationTool({
|
|
269
301
|
args,
|
|
270
302
|
currentWorkspaceId: CURRENT_WORKSPACE_ID,
|
package/package.json
CHANGED
package/src/chat-bridge.js
CHANGED
|
@@ -16,6 +16,7 @@ import { runSubmitToLibraryTool } from './submit-to-library-tool.js';
|
|
|
16
16
|
// media-tools MCP server (V5 migration). Handlers still live in
|
|
17
17
|
// daemon/src/tools/ as shared modules and are imported there.
|
|
18
18
|
import { runGetLibraryFileTool } from './tools/get-library-file.js';
|
|
19
|
+
import { markVideoBriefSent, looksLikeVideoBrief } from './video-brief-flag.js';
|
|
19
20
|
import { isLeaseInvalidated, clearInvalidatedLease } from './governance-state.js';
|
|
20
21
|
import { classifyLeaseWindow } from './lease-window.js';
|
|
21
22
|
import {
|
|
@@ -795,6 +796,15 @@ server.tool('send_message', 'Send a message to a workspace, DM, or thread', {
|
|
|
795
796
|
content: z.string().describe('Message content'),
|
|
796
797
|
}, async ({ target, content }) => {
|
|
797
798
|
const data = await api('POST', '/send', { target, content });
|
|
799
|
+
// Heuristic: if this looks like a video-production 确认稿 (asks permission +
|
|
800
|
+
// describes plan), mark a cross-process flag so media-tools' compose_video_v2
|
|
801
|
+
// / record_url_narration can verify a brief was actually sent before running.
|
|
802
|
+
// See daemon/src/video-brief-flag.js for the detection rules.
|
|
803
|
+
if (looksLikeVideoBrief(content) && AGENT_ID && currentWorkspaceId) {
|
|
804
|
+
try {
|
|
805
|
+
markVideoBriefSent({ workspaceId: currentWorkspaceId, agentId: AGENT_ID, content });
|
|
806
|
+
} catch { /* best-effort; failure to mark is non-fatal */ }
|
|
807
|
+
}
|
|
798
808
|
return { content: [{ type: 'text', text: `Sent. messageId=${data.messageId} threadTarget=${data.threadTarget}` }] };
|
|
799
809
|
});
|
|
800
810
|
|
package/src/mcp-config.js
CHANGED
|
@@ -89,6 +89,11 @@ const SERVER_BACKED_MCP_SERVERS = new Set([
|
|
|
89
89
|
'audience-research',
|
|
90
90
|
'hook-pattern-library',
|
|
91
91
|
'weixin-tools',
|
|
92
|
+
// media-tools (V1–V5 chat-bridge → media-tools migration): synthesize_tts
|
|
93
|
+
// hits /tts/voiceover and the CvMax editor_in_chief gate + video-brief
|
|
94
|
+
// checks read CURRENT_AGENT_ID / CURRENT_WORKSPACE_ID. lib/lightcone-api.js
|
|
95
|
+
// throws at module load without the SERVER_URL/MACHINE_API_KEY/AGENT_ID triple.
|
|
96
|
+
'media-tools',
|
|
92
97
|
]);
|
|
93
98
|
|
|
94
99
|
function baseEnvForServer(serverKey, { serverUrl, authToken, agentId, workspaceId, workspaceDir }) {
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
// Cross-process flag for "the agent has sent a video-production 确认稿 (brief)
|
|
2
|
+
// to the user in this workspace+agent context recently". chat-bridge's
|
|
3
|
+
// send_message tool writes the flag when the outgoing message heuristically
|
|
4
|
+
// looks like a confirmation brief; media-tools' compose_video_v2 and
|
|
5
|
+
// record_url_narration read the flag and refuse to proceed without it.
|
|
6
|
+
//
|
|
7
|
+
// Why a file flag instead of in-process state: send_message lives in
|
|
8
|
+
// chat-bridge (one stdio MCP server), compose_video_v2/record_url_narration
|
|
9
|
+
// live in media-tools (a different stdio MCP server, same machine). Both are
|
|
10
|
+
// spawned by the same codex CLI session per agent, so they share env (notably
|
|
11
|
+
// AGENT_ID / WORKSPACE_ID) but not memory. A flag file under ~/.lightcone is
|
|
12
|
+
// the simplest cross-process medium and survives short-lived MCP restarts.
|
|
13
|
+
//
|
|
14
|
+
// The heuristic is intentionally specific (asks-permission marker AND
|
|
15
|
+
// 2+ plan-describing markers) so casual progress reports like "已生成 TTS"
|
|
16
|
+
// or "画面已就绪" do NOT satisfy it. A motivated agent could game the
|
|
17
|
+
// detection by stuffing keywords into any send_message, but the default
|
|
18
|
+
// codex behavior (which silently skipped the soft prompt rule) is what we
|
|
19
|
+
// need to interrupt — and gaming is observable in chat history.
|
|
20
|
+
|
|
21
|
+
import { mkdirSync, statSync, utimesSync, writeFileSync, existsSync } from 'node:fs';
|
|
22
|
+
import path from 'node:path';
|
|
23
|
+
import os from 'node:os';
|
|
24
|
+
|
|
25
|
+
const TTL_MS = 6 * 60 * 60 * 1000; // 6 hours
|
|
26
|
+
const FILE_NAME = 'video-brief-sent.flag';
|
|
27
|
+
|
|
28
|
+
function flagDir(workspaceId, agentId) {
|
|
29
|
+
return path.join(os.homedir(), '.lightcone', 'sessions', workspaceId, agentId);
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
function flagPath(workspaceId, agentId) {
|
|
33
|
+
return path.join(flagDir(workspaceId, agentId), FILE_NAME);
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
export function markVideoBriefSent({ workspaceId, agentId, content }) {
|
|
37
|
+
if (!workspaceId || !agentId) return;
|
|
38
|
+
const dir = flagDir(workspaceId, agentId);
|
|
39
|
+
const p = flagPath(workspaceId, agentId);
|
|
40
|
+
mkdirSync(dir, { recursive: true });
|
|
41
|
+
writeFileSync(p, String(content ?? '').slice(0, 4096));
|
|
42
|
+
const now = new Date();
|
|
43
|
+
utimesSync(p, now, now);
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
export function hasFreshVideoBrief({ workspaceId, agentId, ttlMs = TTL_MS } = {}) {
|
|
47
|
+
if (!workspaceId || !agentId) return false;
|
|
48
|
+
const p = flagPath(workspaceId, agentId);
|
|
49
|
+
if (!existsSync(p)) return false;
|
|
50
|
+
try {
|
|
51
|
+
const st = statSync(p);
|
|
52
|
+
return (Date.now() - st.mtimeMs) <= ttlMs;
|
|
53
|
+
} catch { return false; }
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
// Permission-asking markers — the message must ask the user to decide / OK.
|
|
57
|
+
// 确认 alone is too broad (matches "无需确认" / "已确认硬约束" / "确认收到"); require
|
|
58
|
+
// a specific permission-ask shape: 请确认 / 确认稿 / 你确认 / 确认[吗??] / 等确认.
|
|
59
|
+
const PERMISSION_MARKERS = [
|
|
60
|
+
/请.*确认/, /你.*确认/, /确认\s*[吗??]/, /等.*确认/, /确认稿/,
|
|
61
|
+
/你看/, /OK\s*吗/i, /可以吗/, /同意吗/, /通过吗/, /行不行/, /如何\?|如何?/,
|
|
62
|
+
];
|
|
63
|
+
|
|
64
|
+
// Plan-describing markers — must cover at least 2 different aspects of the brief.
|
|
65
|
+
const PLAN_MARKERS = [
|
|
66
|
+
/画面/, /时长/, /文案/, /口播/, /字幕/, /顺序/, /口吻/, /分镜/, /配音/,
|
|
67
|
+
];
|
|
68
|
+
|
|
69
|
+
export function looksLikeVideoBrief(content) {
|
|
70
|
+
if (typeof content !== 'string') return false;
|
|
71
|
+
// Min length 20 — Chinese is character-dense, a plausible brief like
|
|
72
|
+
// "请确认:画面/时长/字幕已定。同意吗?" is only ~20 chars but still a valid brief.
|
|
73
|
+
if (content.length < 20) return false;
|
|
74
|
+
const hasPermissionAsk = PERMISSION_MARKERS.some(rx => rx.test(content));
|
|
75
|
+
if (!hasPermissionAsk) return false;
|
|
76
|
+
const distinctPlanHits = PLAN_MARKERS.filter(rx => rx.test(content)).length;
|
|
77
|
+
return distinctPlanHits >= 2;
|
|
78
|
+
}
|