@lightcone-ai/daemon 0.17.0 → 0.17.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -12,6 +12,7 @@ import { runRecordUrlNarrationTool } from '../../../src/tools/record-url-narrati
12
12
  import { runRenderTextToImageTool } from '../../../src/tools/render-text-to-image.js';
13
13
  import { runRenderHtmlToImageTool } from '../../../src/tools/render-html-to-image.js';
14
14
  import { runTakePageScreenshotTool } from '../../../src/tools/take-page-screenshot.js';
15
+ import { hasFreshVideoBrief } from '../../../src/video-brief-flag.js';
15
16
  import { lightconeApi, CURRENT_WORKSPACE_ID, CURRENT_AGENT_ID } from './lib/lightcone-api.js';
16
17
 
17
18
  const WORKSPACE_DIR = String(process.env.WORKSPACE_DIR ?? '');
@@ -222,6 +223,18 @@ server.tool(
222
223
  + 'visual_path/visual_kind for the real media). Call plan_video_segments now and pass its output here.'
223
224
  );
224
225
  }
226
+ if (hasNarration && !hasFreshVideoBrief({ workspaceId: CURRENT_WORKSPACE_ID, agentId: CURRENT_AGENT_ID })) {
227
+ return toolError(
228
+ 'compose_video_v2 refused: must send a 确认稿 (production-brief) to the user via send_message before '
229
+ + 'compositing a narration video. The system scans send_message content for a brief — a message that '
230
+ + 'BOTH asks the user to confirm (确认 / 你看 / OK 吗 / 可以吗 / 同意 / 通过 / 行不行) AND describes '
231
+ + 'at least two of: 画面 / 时长 / 文案 / 口播 / 字幕 / 顺序 / 口吻 / 分镜 / 配音 — no such message '
232
+ + 'was sent in the last 6 hours for this workspace+agent.\n\n'
233
+ + '"已生成 TTS" / "开始合成" / progress reports do NOT count. Send a concrete confirmation draft '
234
+ + 'first (e.g. "我准备这么做:画面是真录屏,时长约 1 分钟,文案如下…,字幕开启,公司顺序 A→B→C,'
235
+ + '口吻是…—— 你 OK 吗?") and wait for the user to reply OK before calling compose_video_v2 again.'
236
+ );
237
+ }
225
238
  return runComposeVideoV2Tool({ ...args, workspaceDir: WORKSPACE_DIR });
226
239
  }
227
240
  );
@@ -265,6 +278,25 @@ server.tool(
265
278
  if (isBlockedCvmaxEditorVideoTool('record_url_narration')) {
266
279
  return cvmaxEditorVideoToolError('record_url_narration');
267
280
  }
281
+ // record_url_narration is part of the narration-video pipeline (paired
282
+ // with synthesize_tts + plan_video_segments + compose_video_v2), so it
283
+ // requires the same 确认稿 gate as compose_video_v2 — catching the skip
284
+ // earlier saves TTS + recording time.
285
+ if (!hasFreshVideoBrief({ workspaceId: CURRENT_WORKSPACE_ID, agentId: CURRENT_AGENT_ID })) {
286
+ return {
287
+ isError: true,
288
+ content: [{ type: 'text', text:
289
+ 'Error: record_url_narration refused: must send a 确认稿 (production-brief) to the user via '
290
+ + 'send_message before starting a narration recording. The system scans send_message content for '
291
+ + 'a brief — a message that BOTH asks the user to confirm (确认 / 你看 / OK 吗 / 可以吗 / 同意 / '
292
+ + '通过 / 行不行) AND describes at least two of: 画面 / 时长 / 文案 / 口播 / 字幕 / 顺序 / 口吻 / '
293
+ + '分镜 / 配音 — no such message was sent in the last 6 hours for this workspace+agent.\n\n'
294
+ + '"已生成 TTS" / "开始合成" / progress reports do NOT count. Send a concrete confirmation draft '
295
+ + 'first (e.g. "我准备这么做:画面是真录屏,时长约 1 分钟,文案如下…,字幕开启,公司顺序 A→B→C,'
296
+ + '口吻是…—— 你 OK 吗?") and wait for the user to reply OK before calling record_url_narration.'
297
+ }],
298
+ };
299
+ }
268
300
  return runRecordUrlNarrationTool({
269
301
  args,
270
302
  currentWorkspaceId: CURRENT_WORKSPACE_ID,
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@lightcone-ai/daemon",
3
- "version": "0.17.0",
3
+ "version": "0.17.1",
4
4
  "type": "module",
5
5
  "main": "src/index.js",
6
6
  "bin": {
@@ -16,6 +16,7 @@ import { runSubmitToLibraryTool } from './submit-to-library-tool.js';
16
16
  // media-tools MCP server (V5 migration). Handlers still live in
17
17
  // daemon/src/tools/ as shared modules and are imported there.
18
18
  import { runGetLibraryFileTool } from './tools/get-library-file.js';
19
+ import { markVideoBriefSent, looksLikeVideoBrief } from './video-brief-flag.js';
19
20
  import { isLeaseInvalidated, clearInvalidatedLease } from './governance-state.js';
20
21
  import { classifyLeaseWindow } from './lease-window.js';
21
22
  import {
@@ -795,6 +796,15 @@ server.tool('send_message', 'Send a message to a workspace, DM, or thread', {
795
796
  content: z.string().describe('Message content'),
796
797
  }, async ({ target, content }) => {
797
798
  const data = await api('POST', '/send', { target, content });
799
+ // Heuristic: if this looks like a video-production 确认稿 (asks permission +
800
+ // describes plan), mark a cross-process flag so media-tools' compose_video_v2
801
+ // / record_url_narration can verify a brief was actually sent before running.
802
+ // See daemon/src/video-brief-flag.js for the detection rules.
803
+ if (looksLikeVideoBrief(content) && AGENT_ID && currentWorkspaceId) {
804
+ try {
805
+ markVideoBriefSent({ workspaceId: currentWorkspaceId, agentId: AGENT_ID, content });
806
+ } catch { /* best-effort; failure to mark is non-fatal */ }
807
+ }
798
808
  return { content: [{ type: 'text', text: `Sent. messageId=${data.messageId} threadTarget=${data.threadTarget}` }] };
799
809
  });
800
810
 
package/src/mcp-config.js CHANGED
@@ -89,6 +89,11 @@ const SERVER_BACKED_MCP_SERVERS = new Set([
89
89
  'audience-research',
90
90
  'hook-pattern-library',
91
91
  'weixin-tools',
92
+ // media-tools (V1–V5 chat-bridge → media-tools migration): synthesize_tts
93
+ // hits /tts/voiceover and the CvMax editor_in_chief gate + video-brief
94
+ // checks read CURRENT_AGENT_ID / CURRENT_WORKSPACE_ID. lib/lightcone-api.js
95
+ // throws at module load without the SERVER_URL/MACHINE_API_KEY/AGENT_ID triple.
96
+ 'media-tools',
92
97
  ]);
93
98
 
94
99
  function baseEnvForServer(serverKey, { serverUrl, authToken, agentId, workspaceId, workspaceDir }) {
@@ -0,0 +1,78 @@
1
+ // Cross-process flag for "the agent has sent a video-production 确认稿 (brief)
2
+ // to the user in this workspace+agent context recently". chat-bridge's
3
+ // send_message tool writes the flag when the outgoing message heuristically
4
+ // looks like a confirmation brief; media-tools' compose_video_v2 and
5
+ // record_url_narration read the flag and refuse to proceed without it.
6
+ //
7
+ // Why a file flag instead of in-process state: send_message lives in
8
+ // chat-bridge (one stdio MCP server), compose_video_v2/record_url_narration
9
+ // live in media-tools (a different stdio MCP server, same machine). Both are
10
+ // spawned by the same codex CLI session per agent, so they share env (notably
11
+ // AGENT_ID / WORKSPACE_ID) but not memory. A flag file under ~/.lightcone is
12
+ // the simplest cross-process medium and survives short-lived MCP restarts.
13
+ //
14
+ // The heuristic is intentionally specific (asks-permission marker AND
15
+ // 2+ plan-describing markers) so casual progress reports like "已生成 TTS"
16
+ // or "画面已就绪" do NOT satisfy it. A motivated agent could game the
17
+ // detection by stuffing keywords into any send_message, but the default
18
+ // codex behavior (which silently skipped the soft prompt rule) is what we
19
+ // need to interrupt — and gaming is observable in chat history.
20
+
21
+ import { mkdirSync, statSync, utimesSync, writeFileSync, existsSync } from 'node:fs';
22
+ import path from 'node:path';
23
+ import os from 'node:os';
24
+
25
+ const TTL_MS = 6 * 60 * 60 * 1000; // 6 hours
26
+ const FILE_NAME = 'video-brief-sent.flag';
27
+
28
+ function flagDir(workspaceId, agentId) {
29
+ return path.join(os.homedir(), '.lightcone', 'sessions', workspaceId, agentId);
30
+ }
31
+
32
+ function flagPath(workspaceId, agentId) {
33
+ return path.join(flagDir(workspaceId, agentId), FILE_NAME);
34
+ }
35
+
36
+ export function markVideoBriefSent({ workspaceId, agentId, content }) {
37
+ if (!workspaceId || !agentId) return;
38
+ const dir = flagDir(workspaceId, agentId);
39
+ const p = flagPath(workspaceId, agentId);
40
+ mkdirSync(dir, { recursive: true });
41
+ writeFileSync(p, String(content ?? '').slice(0, 4096));
42
+ const now = new Date();
43
+ utimesSync(p, now, now);
44
+ }
45
+
46
+ export function hasFreshVideoBrief({ workspaceId, agentId, ttlMs = TTL_MS } = {}) {
47
+ if (!workspaceId || !agentId) return false;
48
+ const p = flagPath(workspaceId, agentId);
49
+ if (!existsSync(p)) return false;
50
+ try {
51
+ const st = statSync(p);
52
+ return (Date.now() - st.mtimeMs) <= ttlMs;
53
+ } catch { return false; }
54
+ }
55
+
56
+ // Permission-asking markers — the message must ask the user to decide / OK.
57
+ // 确认 alone is too broad (matches "无需确认" / "已确认硬约束" / "确认收到"); require
58
+ // a specific permission-ask shape: 请确认 / 确认稿 / 你确认 / 确认[吗??] / 等确认.
59
+ const PERMISSION_MARKERS = [
60
+ /请.*确认/, /你.*确认/, /确认\s*[吗??]/, /等.*确认/, /确认稿/,
61
+ /你看/, /OK\s*吗/i, /可以吗/, /同意吗/, /通过吗/, /行不行/, /如何\?|如何?/,
62
+ ];
63
+
64
+ // Plan-describing markers — must cover at least 2 different aspects of the brief.
65
+ const PLAN_MARKERS = [
66
+ /画面/, /时长/, /文案/, /口播/, /字幕/, /顺序/, /口吻/, /分镜/, /配音/,
67
+ ];
68
+
69
+ export function looksLikeVideoBrief(content) {
70
+ if (typeof content !== 'string') return false;
71
+ // Min length 20 — Chinese is character-dense, a plausible brief like
72
+ // "请确认:画面/时长/字幕已定。同意吗?" is only ~20 chars but still a valid brief.
73
+ if (content.length < 20) return false;
74
+ const hasPermissionAsk = PERMISSION_MARKERS.some(rx => rx.test(content));
75
+ if (!hasPermissionAsk) return false;
76
+ const distinctPlanHits = PLAN_MARKERS.filter(rx => rx.test(content)).length;
77
+ return distinctPlanHits >= 2;
78
+ }