npm - @lightcone-ai/daemon - Versions diffs - 0.17.0 → 0.17.1 - Mend

@lightcone-ai/daemon 0.17.0 → 0.17.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/mcp-servers/official/media-tools/index.js +32 -0
package/package.json +1 -1
package/src/chat-bridge.js +10 -0
package/src/mcp-config.js +5 -0
package/src/video-brief-flag.js +78 -0

package/mcp-servers/official/media-tools/index.js CHANGED Viewed

@@ -12,6 +12,7 @@ import { runRecordUrlNarrationTool } from '../../../src/tools/record-url-narrati
 import { runRenderTextToImageTool } from '../../../src/tools/render-text-to-image.js';
 import { runRenderHtmlToImageTool } from '../../../src/tools/render-html-to-image.js';
 import { runTakePageScreenshotTool } from '../../../src/tools/take-page-screenshot.js';
+import { hasFreshVideoBrief } from '../../../src/video-brief-flag.js';
 import { lightconeApi, CURRENT_WORKSPACE_ID, CURRENT_AGENT_ID } from './lib/lightcone-api.js';
 const WORKSPACE_DIR = String(process.env.WORKSPACE_DIR ?? '');
@@ -222,6 +223,18 @@ server.tool(
         + 'visual_path/visual_kind for the real media). Call plan_video_segments now and pass its output here.'
       );
     }
+    if (hasNarration && !hasFreshVideoBrief({ workspaceId: CURRENT_WORKSPACE_ID, agentId: CURRENT_AGENT_ID })) {
+      return toolError(
+        'compose_video_v2 refused: must send a 确认稿 (production-brief) to the user via send_message before '
+        + 'compositing a narration video. The system scans send_message content for a brief — a message that '
+        + 'BOTH asks the user to confirm (确认 / 你看 / OK 吗 / 可以吗 / 同意 / 通过 / 行不行) AND describes '
+        + 'at least two of: 画面 / 时长 / 文案 / 口播 / 字幕 / 顺序 / 口吻 / 分镜 / 配音 — no such message '
+        + 'was sent in the last 6 hours for this workspace+agent.\n\n'
+        + '"已生成 TTS" / "开始合成" / progress reports do NOT count. Send a concrete confirmation draft '
+        + 'first (e.g. "我准备这么做：画面是真录屏，时长约 1 分钟，文案如下…，字幕开启，公司顺序 A→B→C，'
+        + '口吻是…—— 你 OK 吗？") and wait for the user to reply OK before calling compose_video_v2 again.'
+      );
+    }
     return runComposeVideoV2Tool({ ...args, workspaceDir: WORKSPACE_DIR });
   }
 );
@@ -265,6 +278,25 @@ server.tool(
     if (isBlockedCvmaxEditorVideoTool('record_url_narration')) {
       return cvmaxEditorVideoToolError('record_url_narration');
     }
+    // record_url_narration is part of the narration-video pipeline (paired
+    // with synthesize_tts + plan_video_segments + compose_video_v2), so it
+    // requires the same 确认稿 gate as compose_video_v2 — catching the skip
+    // earlier saves TTS + recording time.
+    if (!hasFreshVideoBrief({ workspaceId: CURRENT_WORKSPACE_ID, agentId: CURRENT_AGENT_ID })) {
+      return {
+        isError: true,
+        content: [{ type: 'text', text:
+          'Error: record_url_narration refused: must send a 确认稿 (production-brief) to the user via '
+          + 'send_message before starting a narration recording. The system scans send_message content for '
+          + 'a brief — a message that BOTH asks the user to confirm (确认 / 你看 / OK 吗 / 可以吗 / 同意 / '
+          + '通过 / 行不行) AND describes at least two of: 画面 / 时长 / 文案 / 口播 / 字幕 / 顺序 / 口吻 / '
+          + '分镜 / 配音 — no such message was sent in the last 6 hours for this workspace+agent.\n\n'
+          + '"已生成 TTS" / "开始合成" / progress reports do NOT count. Send a concrete confirmation draft '
+          + 'first (e.g. "我准备这么做：画面是真录屏，时长约 1 分钟，文案如下…，字幕开启，公司顺序 A→B→C，'
+          + '口吻是…—— 你 OK 吗？") and wait for the user to reply OK before calling record_url_narration.'
+        }],
+      };
+    }
     return runRecordUrlNarrationTool({
       args,
       currentWorkspaceId: CURRENT_WORKSPACE_ID,

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@lightcone-ai/daemon",
-  "version": "0.17.0",
+  "version": "0.17.1",
   "type": "module",
   "main": "src/index.js",
   "bin": {

package/src/chat-bridge.js CHANGED Viewed

@@ -16,6 +16,7 @@ import { runSubmitToLibraryTool } from './submit-to-library-tool.js';
 // media-tools MCP server (V5 migration). Handlers still live in
 // daemon/src/tools/ as shared modules and are imported there.
 import { runGetLibraryFileTool } from './tools/get-library-file.js';
+import { markVideoBriefSent, looksLikeVideoBrief } from './video-brief-flag.js';
 import { isLeaseInvalidated, clearInvalidatedLease } from './governance-state.js';
 import { classifyLeaseWindow } from './lease-window.js';
 import {
@@ -795,6 +796,15 @@ server.tool('send_message', 'Send a message to a workspace, DM, or thread', {
   content: z.string().describe('Message content'),
 }, async ({ target, content }) => {
   const data = await api('POST', '/send', { target, content });
+  // Heuristic: if this looks like a video-production 确认稿 (asks permission +
+  // describes plan), mark a cross-process flag so media-tools' compose_video_v2
+  // / record_url_narration can verify a brief was actually sent before running.
+  // See daemon/src/video-brief-flag.js for the detection rules.
+  if (looksLikeVideoBrief(content) && AGENT_ID && currentWorkspaceId) {
+    try {
+      markVideoBriefSent({ workspaceId: currentWorkspaceId, agentId: AGENT_ID, content });
+    } catch { /* best-effort; failure to mark is non-fatal */ }
+  }
   return { content: [{ type: 'text', text: `Sent. messageId=${data.messageId} threadTarget=${data.threadTarget}` }] };
 });

package/src/mcp-config.js CHANGED Viewed

@@ -89,6 +89,11 @@ const SERVER_BACKED_MCP_SERVERS = new Set([
   'audience-research',
   'hook-pattern-library',
   'weixin-tools',
+  // media-tools (V1–V5 chat-bridge → media-tools migration): synthesize_tts
+  // hits /tts/voiceover and the CvMax editor_in_chief gate + video-brief
+  // checks read CURRENT_AGENT_ID / CURRENT_WORKSPACE_ID. lib/lightcone-api.js
+  // throws at module load without the SERVER_URL/MACHINE_API_KEY/AGENT_ID triple.
+  'media-tools',
 ]);
 function baseEnvForServer(serverKey, { serverUrl, authToken, agentId, workspaceId, workspaceDir }) {

package/src/video-brief-flag.js ADDED Viewed

@@ -0,0 +1,78 @@
+// Cross-process flag for "the agent has sent a video-production 确认稿 (brief)
+// to the user in this workspace+agent context recently". chat-bridge's
+// send_message tool writes the flag when the outgoing message heuristically
+// looks like a confirmation brief; media-tools' compose_video_v2 and
+// record_url_narration read the flag and refuse to proceed without it.
+//
+// Why a file flag instead of in-process state: send_message lives in
+// chat-bridge (one stdio MCP server), compose_video_v2/record_url_narration
+// live in media-tools (a different stdio MCP server, same machine). Both are
+// spawned by the same codex CLI session per agent, so they share env (notably
+// AGENT_ID / WORKSPACE_ID) but not memory. A flag file under ~/.lightcone is
+// the simplest cross-process medium and survives short-lived MCP restarts.
+//
+// The heuristic is intentionally specific (asks-permission marker AND
+// 2+ plan-describing markers) so casual progress reports like "已生成 TTS"
+// or "画面已就绪" do NOT satisfy it. A motivated agent could game the
+// detection by stuffing keywords into any send_message, but the default
+// codex behavior (which silently skipped the soft prompt rule) is what we
+// need to interrupt — and gaming is observable in chat history.
+import { mkdirSync, statSync, utimesSync, writeFileSync, existsSync } from 'node:fs';
+import path from 'node:path';
+import os from 'node:os';
+const TTL_MS = 6 * 60 * 60 * 1000; // 6 hours
+const FILE_NAME = 'video-brief-sent.flag';
+function flagDir(workspaceId, agentId) {
+  return path.join(os.homedir(), '.lightcone', 'sessions', workspaceId, agentId);
+}
+function flagPath(workspaceId, agentId) {
+  return path.join(flagDir(workspaceId, agentId), FILE_NAME);
+}
+export function markVideoBriefSent({ workspaceId, agentId, content }) {
+  if (!workspaceId || !agentId) return;
+  const dir = flagDir(workspaceId, agentId);
+  const p = flagPath(workspaceId, agentId);
+  mkdirSync(dir, { recursive: true });
+  writeFileSync(p, String(content ?? '').slice(0, 4096));
+  const now = new Date();
+  utimesSync(p, now, now);
+}
+export function hasFreshVideoBrief({ workspaceId, agentId, ttlMs = TTL_MS } = {}) {
+  if (!workspaceId || !agentId) return false;
+  const p = flagPath(workspaceId, agentId);
+  if (!existsSync(p)) return false;
+  try {
+    const st = statSync(p);
+    return (Date.now() - st.mtimeMs) <= ttlMs;
+  } catch { return false; }
+}
+// Permission-asking markers — the message must ask the user to decide / OK.
+// 确认 alone is too broad (matches "无需确认" / "已确认硬约束" / "确认收到"); require
+// a specific permission-ask shape: 请确认 / 确认稿 / 你确认 / 确认[吗?？] / 等确认.
+const PERMISSION_MARKERS = [
+  /请.*确认/, /你.*确认/, /确认\s*[吗?？]/, /等.*确认/, /确认稿/,
+  /你看/, /OK\s*吗/i, /可以吗/, /同意吗/, /通过吗/, /行不行/, /如何\?|如何？/,
+];
+// Plan-describing markers — must cover at least 2 different aspects of the brief.
+const PLAN_MARKERS = [
+  /画面/, /时长/, /文案/, /口播/, /字幕/, /顺序/, /口吻/, /分镜/, /配音/,
+];
+export function looksLikeVideoBrief(content) {
+  if (typeof content !== 'string') return false;
+  // Min length 20 — Chinese is character-dense, a plausible brief like
+  // "请确认：画面/时长/字幕已定。同意吗？" is only ~20 chars but still a valid brief.
+  if (content.length < 20) return false;
+  const hasPermissionAsk = PERMISSION_MARKERS.some(rx => rx.test(content));
+  if (!hasPermissionAsk) return false;
+  const distinctPlanHits = PLAN_MARKERS.filter(rx => rx.test(content)).length;
+  return distinctPlanHits >= 2;
+}