npm - @aiyiran/myclaw - Versions diffs - 1.0.243 → 1.0.245 - Mend

@aiyiran/myclaw 1.0.243 → 1.0.245

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

package/assets/myclaw-artifacts.js +15 -5
package/assets/myclaw-inject.js +254 -21
package/package.json +1 -1
package/skills/yiran-skill-media/SKILL.md +55 -20
package/skills/yiran-skill-media/config.json +36 -9
package/skills/yiran-skill-media/scripts/generate.py +43 -8
package/skills/yiran-skill-media/scripts/i2v.sh +47 -0
package/skills/yiran-skill-media/scripts/providers/__init__.py +8 -0
package/skills/yiran-skill-media/scripts/providers/jimeng_image.py +158 -0
package/skills/yiran-skill-media/scripts/providers/jimeng_video.py +115 -0
package/skills/yiran-skill-media/scripts/providers/minimax_video.py +115 -0
package/skills/yiran-skill-media/scripts/video.sh +47 -0

package/assets/myclaw-artifacts.js CHANGED Viewed

@@ -290,17 +290,27 @@
     });
   }
+  function fetchArtifactsFromServerAPI(wsPrefix) {
+    var url = window.location.origin + '/cmd/api/artifacts?workspace=' + encodeURIComponent(wsPrefix) + '&t=' + Date.now();
+    return fetch(url).then(function (res) {
+      if (!res.ok) throw new Error('HTTP ' + res.status);
+      return res.json();
+    });
+  }
   function fetchArtifacts(contentEl) {
+    // cachedConfig 未就绪时跳过，等 initConfig 完成后的 startPolling 重试
+    if (!cachedConfig) return;
     var wsPrefix = getWorkspaceId();
     var fetcher;
     if (envInfo && envInfo.remote) {
-      // 远程环境 → 走 CDN
-      fetcher = fetchArtifactsFromCDN(wsPrefix);
+      // 远程服务器 → 走 /cmd/api（服务器直接提供 JSON）
+      fetcher = fetchArtifactsFromServerAPI(wsPrefix);
     } else {
-      // 本地环境 → 优先本地 API，失败降级 CDN
-      fetcher = fetchArtifactsFromLocalAPI(wsPrefix)
-        .catch(function () { return fetchArtifactsFromCDN(wsPrefix); });
+      // 本地环境 → 走 CDN
+      fetcher = fetchArtifactsFromCDN(wsPrefix);
     }
     fetcher

package/assets/myclaw-inject.js CHANGED Viewed

@@ -27,6 +27,7 @@
   var committedText = "";     // 已经提交到 textarea 的文字（上一轮累积）
   var cursorOffset = 0;       // 录音开始时光标在 textarea 中的位置
   var injected = false;
+  var stopping = false;      // 正在等待最终识别结果（stopVoice 的 2 秒窗口）
   // ═══ 1. 右下角版本标签（点击测试麦克风） ═══
   function createVersionBar() {
@@ -323,7 +324,8 @@
       '</svg>',
     ].join("");
-    btn.addEventListener("click", function () {
+btn.addEventListener("click", function () {
+      console.log("[myclaw-voice] 按钮点击, recording=", recording);
       if (recording) {
         stopVoice();
       } else {
@@ -367,8 +369,8 @@
     voice = new window.VoiceInput({
       onResult: function (text) {
-        // 如果用户已点停止，忽略异步返回的残留结果（防止文字重复）
-        if (!recording) return;
+        // 完全停止后才忽略；stopping 期间（2秒等待窗口）仍允许写入
+        if (!recording && !stopping) return;
         // 讯飞实时返回识别文字，替换到光标位置
         pendingText = text;
         updateTextAtCursor(pendingText);
@@ -377,7 +379,7 @@
       onStatusChange: function (oldStatus, newStatus) {
         console.log("[myclaw-voice] \u72b6\u6001:", oldStatus, "->", newStatus);
-        if (newStatus === "idle" && recording) {
+        if (newStatus === "idle" && recording && !stopping) {
           // 讯飞 60 秒断开，但用户没有点停止 → 自动重连
           // 把当前识别的文字提交，并更新光标位置
           committedText = getTextareaValue();
@@ -435,23 +437,54 @@
     console.log("[myclaw-voice] \u5f00\u59cb\u5f55\u97f3\uff0c\u5149\u6807\u4f4d\u7f6e:", cursorOffset);
   }
-  function stopVoice() {
-    // 先关标志位，阻止 onResult 异步回调继续写入（核心防重复）
+  /**
+   * 语音录入结束时，等待 2 秒后关闭录音资源
+   * @param {Function} [onDone] - 等待完成后执行的回调（如发送）
+   */
+  var voiceStopTimer = null;
+  function stopVoice(onDone) {
+    console.log("[myclaw-voice] stopVoice called, recording=", recording, "onDone=", onDone ? "yes" : "no");
+    if (!recording) {
+      console.log("[myclaw-voice] stopVoice early return — not recording");
+      return;
+    }
+    // 进入 stopping 态：UI 立即更新，但 onResult 仍允许在 2 秒内写入文字
+    stopping = true;
     recording = false;
     updateButtonUI();
+    console.log("[myclaw-voice] stopping=true, UI updated, starting 2s timer...");
-    // 立即快照当前 textarea 值作为最终文字
-    var finalText = getTextareaValue();
-    if (voice) {
-      voice.stop();
+    // 延迟 2 秒后关闭录音资源（等讯飞把剩余识别结果全部推过来）
+    if (voiceStopTimer) {
+      console.log("[myclaw-voice] clearing previous timer");
+      clearTimeout(voiceStopTimer);
     }
+    voiceStopTimer = setTimeout(function () {
+      voiceStopTimer = null;
+      stopping = false;
+      console.log("[myclaw-voice] 2s timer fired, closing resources...");
+      // 快照当前 textarea 值（2 秒内 onResult 可能已更新）
+      var finalText = getTextareaValue();
+      console.log("[myclaw-voice] finalText:", JSON.stringify(finalText.substring(0, 50)));
+      if (voice) {
+        console.log("[myclaw-voice] calling voice.stop()");
+        voice.stop();
+      }
+      committedText = finalText;
+      pendingText = "";
-    // 用快照覆盖，确保后续异步返回不影响
-    committedText = finalText;
-    pendingText = "";
+      // 等待完成后执行回调（如发送）
+      if (onDone) {
+        console.log("[myclaw-voice] executing onDone callback...");
+        onDone();
+      }
-    console.log("[myclaw-voice] \u505c\u6b62\u5f55\u97f3");
+      console.log("[myclaw-voice] 停止录音完成");
+    }, 2000);
   }
   // ═══ 5. DOM 注入 ═══
@@ -522,6 +555,28 @@
   }
   // ═══ 6. 拦截发送按钮 ═══
+  /**
+   * 拦截 Enter 键：语音态下按回车 → 等待 2 秒后发送
+   */
+  function hookVoiceEnter() {
+    document.addEventListener("keydown", function (e) {
+      if (e.key !== "Enter") return;
+      if (!recording) return;
+      // 语音录入中，无论焦点在哪里（textarea 或语音按钮），Enter 统一触发"停止并发送"
+      e.preventDefault();
+      e.stopPropagation();
+      console.log("[myclaw-voice] Enter按下, recording=", recording);
+      stopVoice(function () {
+        console.log("[myclaw-voice] Enter stopVoice callback firing...");
+        var sendBtn = document.querySelector("button.chat-send-btn, button[title=\"Send\"]");
+        if (sendBtn) sendBtn.click();
+      });
+    }, true);
+  }
   var sendHooked = false;
   function hookSendButton() {
@@ -536,9 +591,17 @@
       var text = getTextareaValue();
       if (!text || !text.trim()) return;  // 空文字不处理
-      // 1) 停止语音输入
+      // 1) 停止语音输入（等待 2 秒后关闭，关闭后触发发送）
       if (recording) {
-        stopVoice();
+        e.preventDefault();
+        e.stopPropagation();
+        console.log("[myclaw-voice] 发送按钮点击(语音态), recording=", recording);
+        stopVoice(function () {
+          console.log("[myclaw-voice] 发送按钮 stopVoice callback firing...");
+          var sendBtn = document.querySelector("button.chat-send-btn, button[title=\"Send\"]");
+          if (sendBtn) sendBtn.click();
+        });
+        return;
       }
       // 2) 复制到剪贴板
@@ -546,7 +609,6 @@
         navigator.clipboard.writeText(text).then(function () {
           console.log("[myclaw-send] 📋 已复制到剪贴板:", text.substring(0, 50) + (text.length > 50 ? "..." : ""));
         }).catch(function () {
-          // fallback: 老方法
           fallbackCopy(text);
         });
       } catch (ex) {
@@ -554,9 +616,8 @@
       }
       // 3) 让原生 click 继续走（发送消息）
-      //    不 preventDefault，不 stopPropagation
-      // 4) 延迟清空 textarea（等原生 handler 读完值后再清）
+      // 4) 延迟清空 textarea
       setTimeout(function () {
         setTextareaValue("");
         committedText = "";
@@ -853,7 +914,7 @@
       { label: "\uD83D\uDCAC \u6DFB\u52A0\u5BF9\u8BDD", desc: "\u6253\u5F00\u5DF2\u6709\u4F19\u4F34\u7684\u5BF9\u8BDD\u7A97\u53E3", hasInput: true, inputTitle: "\u6DFB\u52A0\u5BF9\u8BDD", placeholder: "\u8F93\u5165\u4F19\u4F34\u540D\u79F0\uFF0C\u5982 kakaxi", hint: "\u8F93\u5165\u4F60\u7684\u4F19\u4F34\u7684\u540D\u79F0\uFF08\u82F1\u6587\u5B57\u6BCD\u3001\u6570\u5B57\u3001\u8FDE\u5B57\u7B26\uFF09\uFF0C\u70B9\u51FB\u540E\u4F1A\u6253\u5F00\u5BF9\u8BDD\u7A97\u53E3", cmd: "mc tui {name}", color: "#10b981" },
       { label: "\uD83D\uDE80 \u5347\u7EA7", desc: "\u5347\u7EA7 myclaw \u5230\u6700\u65B0\u7248\u672C", hasInput: false, cmd: "mc up", color: "#8b5cf6" },
       { label: "\uD83D\uDD04 \u91CD\u542F", desc: "\u91CD\u542F\u670D\u52A1\uFF0C\u4FEE\u590D\u5927\u591A\u6570\u95EE\u9898", hasInput: false, cmd: "mc restart", color: "#ef4444" },
-      { label: "\uD83E\uDD1D \u65B0\u4F19\u4F34", desc: "\u521B\u5EFA\u4E00\u4E2A\u65B0\u7684 AI \u4F19\u4F34", hasInput: true, inputTitle: "\u65B0\u5EFA\u4F19\u4F34", placeholder: "\u8F93\u5165\u65B0\u4F19\u4F34\u540D\u79F0\uFF0C\u5982 my-cat", hint: "\u7ED9\u4F60\u7684\u65B0 AI \u4F19\u4F34\u8D77\u4E2A\u540D\u5B57\uFF08\u82F1\u6587\u5B57\u6BCD\u3001\u6570\u5B57\u3001\u8FDE\u5B57\u7B26\uFF09\uFF0C\u70B9\u51FB\u540E\u4F1A\u81EA\u52A8\u521B\u5EFA", cmd: "mc tui {name}", color: "#3b82f6" },
+      { label: "\uD83E\uDD1D \u65B0\u4F19\u4F34", desc: "\u521B\u5EFA\u4E00\u4E2A\u65B0\u7684 AI \u4F19\u4F34", hasInput: true, inputTitle: "\u65B0\u5EFA\u4F19\u4F34", placeholder: "\u8F93\u5165\u65B0\u4F19\u4F34\u540D\u79F0\uFF0C\u5982 my-cat", hint: "\u7ED9\u4F60\u7684\u65B0 AI \u4F19\u4F34\u8D77\u4E2A\u540D\u5B57\uFF08\u82F1\u6587\u5B57\u6BCD\u3001\u6570\u5B57\u3001\u8FDE\u5B57\u7B26\uFF09\uFF0C\u70B9\u51FB\u540E\u4F1A\u81EA\u52A8\u521B\u5EFA", cmd: "mc new {name}", color: "#3b82f6" },
     ];
     btns.forEach(function (item) {
@@ -908,6 +969,175 @@
       form.appendChild(row);
     });
+    // ── 删除伙伴按钮 ──
+    var delRow = document.createElement("div");
+    delRow.style.cssText = [
+      "padding:10px 14px",
+      "background:#252536",
+      "border-radius:6px",
+      "cursor:pointer",
+      "transition:background 0.15s",
+      "display:flex",
+      "align-items:center",
+      "gap:10px",
+    ].join(";");
+    delRow.onmouseenter = function () { delRow.style.background = "#2f2f4a"; };
+    delRow.onmouseleave = function () { delRow.style.background = "#252536"; };
+    var delBar = document.createElement("div");
+    delBar.style.cssText = "width:3px;height:28px;border-radius:2px;background:#ef4444;flex-shrink:0;";
+    delRow.appendChild(delBar);
+    var delInfo = document.createElement("div");
+    delInfo.style.cssText = "flex:1;display:flex;flex-direction:column;gap:2px;";
+    var delName = document.createElement("div");
+    delName.textContent = "\uD83D\uDDD1 \u5220\u9664\u4F19\u4F34";
+    delName.style.cssText = "font-size:13px;font-weight:bold;color:#ef4444;";
+    delInfo.appendChild(delName);
+    var delDesc = document.createElement("div");
+    delDesc.textContent = "\u5220\u9664\u4E00\u4E2A AI \u4F19\u4F34\uFF0C\u6B64\u64CD\u4F5C\u65E0\u6CD5\u6062\u590D";
+    delDesc.style.cssText = "font-size:11px;color:#888;";
+    delInfo.appendChild(delDesc);
+    delRow.appendChild(delInfo);
+    var delArrow = document.createElement("div");
+    delArrow.textContent = "\u25B6";
+    delArrow.style.cssText = "color:#555;font-size:10px;";
+    delRow.appendChild(delArrow);
+    delRow.onclick = function () {
+      showDeleteConfirm();
+    };
+    form.appendChild(delRow);
+    // 删除伙伴 - 双重确认弹框
+    function showDeleteConfirm() {
+      var mask = document.createElement("div");
+      mask.style.cssText = [
+        "position:fixed",
+        "top:0;left:0;width:100vw;height:100vh",
+        "background:rgba(0,0,0,0.3)",
+        "z-index:999999",
+        "display:flex",
+        "align-items:center",
+        "justify-content:center",
+        "animation:myclaw-fade-in 0.15s ease",
+      ].join(";");
+      var box = document.createElement("div");
+      box.style.cssText = [
+        "width:360px",
+        "background:#1e1e2e",
+        "border-radius:8px",
+        "overflow:hidden",
+        "box-shadow:0 8px 32px rgba(0,0,0,0.5)",
+      ].join(";");
+      // 标题
+      var h = document.createElement("div");
+      h.style.cssText = "padding:10px 14px;background:#ef4444;color:#fff;font-size:13px;display:flex;justify-content:space-between;align-items:center;";
+      h.innerHTML = '<span>\uD83D\uDDD1 \u5220\u9664\u4F19\u4F34</span>';
+      var x = document.createElement("span");
+      x.textContent = "\u2715";
+      x.style.cssText = "cursor:pointer;padding:2px 6px;border-radius:3px;";
+      x.onclick = function () { mask.remove(); };
+      h.appendChild(x);
+      box.appendChild(h);
+      // body
+      var body = document.createElement("div");
+      body.style.cssText = "padding:16px;display:flex;flex-direction:column;gap:12px;";
+      var hint1 = document.createElement("div");
+      hint1.textContent = "\u8BF7\u8F93\u5165\u8981\u5220\u9664\u7684\u4F19\u4F34 ID\uFF1A";
+      hint1.style.cssText = "font-size:12px;color:#888;";
+      body.appendChild(hint1);
+      var input1 = document.createElement("input");
+      input1.type = "text";
+      input1.placeholder = "\u4F19\u4F34 ID";
+      input1.style.cssText = "padding:8px 10px;background:#252536;border:1px solid #3d3d5c;border-radius:4px;color:#cdd6f4;font-size:13px;font-family:monospace;outline:none;";
+      body.appendChild(input1);
+      var warn = document.createElement("div");
+      warn.textContent = "\u26A0 \u6B64\u64CD\u4F5C\u65E0\u6CD5\u6062\u590D\uFF0C\u786E\u8BA4\u540E\u5C06\u6C38\u4E45\u5220\u9664\uFF01";
+      warn.style.cssText = "font-size:11px;color:#ef4444;padding:8px;background:rgba(239,68,68,0.1);border-radius:4px;";
+      body.appendChild(warn);
+      var confirmHint = document.createElement("div");
+      confirmHint.textContent = '\u8BF7\u8F93\u5165 "YES" \u786E\u8BA4\u5220\u9664\uFF1A';
+      confirmHint.style.cssText = "font-size:12px;color:#888;";
+      confirmHint.style.display = "none";
+      body.appendChild(confirmHint);
+      var input2 = document.createElement("input");
+      input2.type = "text";
+      input2.placeholder = "YES";
+      input2.style.cssText = "padding:8px 10px;background:#252536;border:1px solid #3d3d5c;border-radius:4px;color:#cdd6f4;font-size:13px;font-family:monospace;outline:none;";
+      input2.style.display = "none";
+      body.appendChild(input2);
+      var submitBtn = document.createElement("button");
+      submitBtn.textContent = "\u7EE7\u7EED";
+      submitBtn.style.cssText = "padding:8px 16px;background:#ef4444;border:none;border-radius:4px;color:#fff;font-size:12px;font-family:monospace;cursor:pointer;";
+      body.appendChild(submitBtn);
+      var cancelBtn = document.createElement("button");
+      cancelBtn.textContent = "\u53D6\u6D88";
+      cancelBtn.style.cssText = "padding:8px 16px;background:#3d3d5c;border:none;border-radius:4px;color:#cdd6f4;font-size:12px;font-family:monospace;cursor:pointer;";
+      body.appendChild(cancelBtn);
+      box.appendChild(body);
+      mask.appendChild(box);
+      document.body.appendChild(mask);
+      input1.focus();
+      var agentId = "";
+      submitBtn.onclick = function () {
+        if (!agentId) {
+          // 第一步：输入 agent ID
+          agentId = input1.value.trim();
+          if (!agentId) {
+            input1.style.borderColor = "#ef4444";
+            input1.focus();
+            return;
+          }
+          // 显示第二步确认
+          input1.style.display = "none";
+          hint1.style.display = "none";
+          confirmHint.style.display = "block";
+          input2.style.display = "block";
+          submitBtn.textContent = "\u786E\u8BA4\u5220\u9664";
+          input2.value = "";
+          input2.focus();
+        } else {
+          // 第二步：确认删除
+          var confirmVal = input2.value.trim();
+          if (confirmVal.toUpperCase() !== "YES") {
+            input2.style.borderColor = "#ef4444";
+            input2.focus();
+            return;
+          }
+          submitBtn.disabled = true;
+          submitBtn.textContent = "\u6267\u884C\u4E2D...";
+          runCommand("openclaw agents delete " + agentId + " --force");
+          setTimeout(function () {
+            mask.remove();
+          }, 1000);
+        }
+      };
+      cancelBtn.onclick = function () {
+        mask.remove();
+      };
+      input2.onkeydown = function (e) {
+        if (e.key === "Enter") submitBtn.click();
+      };
+    }
     box.appendChild(header);
     box.appendChild(form);
     overlay.appendChild(box);
@@ -939,6 +1169,9 @@
     // 初始化 VoiceInput SDK
     initVoice();
+    // 拦截语音态 Enter 键
+    hookVoiceEnter();
     // 持续监听 DOM 变化，确保按钮始终在
     new MutationObserver(function () {
       if (!document.querySelector("#myclaw-voice-btn")) {

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@aiyiran/myclaw",
-  "version": "1.0.243",
+  "version": "1.0.245",
   "description": "",
   "main": "index.js",
   "bin": {

package/skills/yiran-skill-media/SKILL.md CHANGED Viewed

@@ -1,6 +1,6 @@
 ---
 name: yiran-skill-media
-description: 统一多媒体生成技能。支持图片和音乐生成，按资源类型自动路由到最优 provider，支持主备切换。资源生成规范：所有生成的资源必须存放在当前工作目录下，调用时通过 --output-dir 传入当前工作目录的绝对路径，通过 --name 传入资源的中文名称。
+description: 统一多媒体生成技能。支持图片、音乐、文生视频和图生视频，按资源类型自动路由到最优 provider，支持多级降级。资源生成规范：所有生成的资源必须存放在当前工作目录下，调用时通过 --output-dir 传入当前工作目录的绝对路径，通过 --name 传入资源的中文名称。
 ---
 # 统一多媒体生成
@@ -12,10 +12,6 @@ description: 统一多媒体生成技能。支持图片和音乐生成，按资
 **所有生成的资源文件必须存放在当前工作目录下。** 不允许省略，不允许猜测路径。
-例如：
-- 你当前在 `/root/.openclaw/workspace` → 传入 `--output-dir /root/.openclaw/workspace`
-- 生成一张日落图片 → 传入 `--name 日落风景`
 ## 一键脚本
 ```bash
@@ -24,6 +20,12 @@ description: 统一多媒体生成技能。支持图片和音乐生成，按资
 # 音乐生成
 ./music.sh --output-dir "$(pwd)" --name 开场音乐 "epic opening" [--instrumental]
+# 文生视频 (Text-to-Video)
+./video.sh --output-dir "$(pwd)" --name 日落延时 "a sunset timelapse" [--duration 6] [--resolution 768P] [--aspect-ratio 16:9]
+# 图生视频 (Image-to-Video)
+./i2v.sh --output-dir "$(pwd)" --name 猫咪跑步 --first-frame-image "https://example.com/cat.jpg" "cat running toward camera"
 ```
 ## 参数说明
@@ -32,40 +34,73 @@ description: 统一多媒体生成技能。支持图片和音乐生成，按资
 | 参数 | 必填 | 说明 |
 |------|------|------|
-| `--output-dir` | 是 | 输出目录的绝对路径，传入当前工作目录 |
+| `--output-dir` | 是 | 输出目录的绝对路径 |
 | `--name` | 是 | 资源中文名称（如：日落风景、产品封面） |
 | `prompt` | 是 | 图片描述 |
-| `--aspect-ratio` | 否 | 比例，默认 1:1。可选：16:9, 9:16, 4:3 等 |
+| `--aspect-ratio` | 否 | 比例，默认 16:9。可选：1:1, 16:9, 9:16, 4:3, 3:4 等 |
 ### music.sh
 | 参数 | 必填 | 说明 |
 |------|------|------|
-| `--output-dir` | 是 | 输出目录的绝对路径，传入当前工作目录 |
-| `--name` | 是 | 资源中文名称（如：开场音乐、背景配乐） |
+| `--output-dir` | 是 | 输出目录的绝对路径 |
+| `--name` | 是 | 资源中文名称 |
 | `prompt` | 是 | 音乐风格/情绪描述 |
 | `--lyrics` | 否 | 歌词文本 |
 | `--instrumental` | 否 | 纯音乐模式 |
+### video.sh — 文生视频
+| 参数 | 必填 | 说明 |
+|------|------|------|
+| `--output-dir` | 是 | 输出目录的绝对路径 |
+| `--name` | 是 | 资源中文名称 |
+| `prompt` | 是 | 视频内容描述（支持运镜指令如 `[推进]`、`[左摇]`） |
+| `--duration` | 否 | 视频时长（秒），默认 6，可选 6 或 10 |
+| `--resolution` | 否 | MiniMax 分辨率：768P/1080P（默认 768P） |
+| `--aspect-ratio` | 否 | 即梦降级时使用：16:9/9:16/4:3/1:1（默认 16:9） |
+### i2v.sh — 图生视频
+| 参数 | 必填 | 说明 |
+|------|------|------|
+| `--output-dir` | 是 | 输出目录的绝对路径 |
+| `--name` | 是 | 资源中文名称 |
+| `--first-frame-image` | 是 | 首帧图片的公网 URL（JPG/PNG/WebP，<20MB） |
+| `prompt` | 否 | 基于首帧图像的动作/变化描述 |
+| `--duration` | 否 | 视频时长，默认 6 |
+| `--resolution` | 否 | 分辨率，默认 768P |
+**注意**：视频生成为异步任务，耗时较长（通常 1-5 分钟），脚本会自动轮询等待完成。
 ## 架构
 ```
-image.sh / music.sh    →  智能体调用的薄壳入口
-generate.py            →  统一路由调度（主备切换）
-config.json            →  provider 配置中心（key、模型、地址）
-providers/             →  各 provider 适配器
-  minimax_image.py     →  MiniMax 图片（主）
-  vapi_image.py        →  VAPI 图片（备）
-  minimax_music.py     →  MiniMax 音乐（主）
+image.sh              →  图片生成入口
+music.sh              →  音乐生成入口
+video.sh              →  文生视频入口 (Text-to-Video)
+i2v.sh                →  图生视频入口 (Image-to-Video)
+generate.py           →  统一路由调度（优先级数组，依次尝试）
+config.json           →  provider 配置中心（优先级列表）
+providers/
+  vapi_image.py       →  VAPI 图片
+  minimax_image.py    →  MiniMax 图片
+  jimeng_image.py     →  即梦 图片 4.0（异步）
+  minimax_music.py    →  MiniMax 音乐
+  minimax_video.py    →  MiniMax 视频（文生+图生）
+  jimeng_video.py     →  即梦 视频 3.0（异步）
 ```
-## Provider 配置
+## Provider 配置（优先级列表）
-编辑 `config.json` 可切换主备 provider、更换模型或 API Key。
+`config.json` 中每个资源类型是一个数组，按优先级从高到低排列。
+第一个失败自动尝试下一个，直到成功或全部失败。
 当前配置：
-- **图片**：MiniMax (image-01) → fallback VAPI (nano-banana-pro)
-- **音乐**：MiniMax (music-2.6)
+- **图片**：① VAPI (nano-banana-2) → ② MiniMax (image-01) → ③ 即梦 (jimeng_t2i_v40)
+- **音乐**：① MiniMax (music-2.6)
+- **文生视频**：① MiniMax (MiniMax-Hailuo-2.3) → ② 即梦 (jimeng_t2v_v30)
+- **图生视频**：① MiniMax (MiniMax-Hailuo-2.3-Fast)
 ## 详细 API 参考

package/skills/yiran-skill-media/config.json CHANGED Viewed

@@ -1,26 +1,53 @@
 {
   "output_dir": "media",
-  "image": {
-    "primary": {
+  "image": [
+    {
+      "provider": "jimeng_image",
+      "model": "jimeng_t2i_v40",
+      "access_key": "AKLTYjZkY2FiZmZkYWU5NDkxNmEwZjNlYTRjNmRlZmYwNDI",
+      "secret_key": "TjJGbU5HVTBZek14TnpFeE5HWTVOVGhsTURRNE9XRXhNR1JoTm1FeVlqaw=="
+    },
+    {
       "provider": "vapi_image",
-      "model": "nano-banana-pro",
+      "model": "nano-banana-2",
       "base_url": "https://api.v3.cm/v1",
       "api_key": "sk-PXPUzqllWKJy2oj011Df510242264219Ba21093e3d2b2335"
     },
-    "fallback": {
+    {
       "provider": "minimax_image",
       "model": "image-01",
       "base_url": "https://api.minimaxi.com/v1",
       "api_key": "sk-cp-DC5lWd2Stt9CBFzLIT2awP4K-ZEn5AkYwjl3Cdj-mIBmgjxod518F2LaVF2L9c35Wv5-Eox0F1ctJD5vXtB9p3OmxoWLd9ge9zIUIMrCVuqBYdL_s6kb8Qs"
     }
-  },
-  "music": {
-    "primary": {
+  ],
+  "music": [
+    {
       "provider": "minimax_music",
       "model": "music-2.6",
       "base_url": "https://api.minimaxi.com/v1",
       "api_key": "sk-cp-DC5lWd2Stt9CBFzLIT2awP4K-ZEn5AkYwjl3Cdj-mIBmgjxod518F2LaVF2L9c35Wv5-Eox0F1ctJD5vXtB9p3OmxoWLd9ge9zIUIMrCVuqBYdL_s6kb8Qs"
+    }
+  ],
+  "video": [
+    {
+      "provider": "minimax_video",
+      "model": "MiniMax-Hailuo-2.3",
+      "base_url": "https://api.minimaxi.com/v1",
+      "api_key": "sk-cp-DC5lWd2Stt9CBFzLIT2awP4K-ZEn5AkYwjl3Cdj-mIBmgjxod518F2LaVF2L9c35Wv5-Eox0F1ctJD5vXtB9p3OmxoWLd9ge9zIUIMrCVuqBYdL_s6kb8Qs"
     },
-    "fallback": null
-  }
+    {
+      "provider": "jimeng_video",
+      "model": "jimeng_t2v_v30",
+      "access_key": "AKLTYjZkY2FiZmZkYWU5NDkxNmEwZjNlYTRjNmRlZmYwNDI",
+      "secret_key": "TjJGbU5HVTBZek14TnpFeE5HWTVOVGhsTURRNE9XRXhNR1JoTm1FeVlqaw=="
+    }
+  ],
+  "i2v": [
+    {
+      "provider": "minimax_video",
+      "model": "MiniMax-Hailuo-2.3-Fast",
+      "base_url": "https://api.minimaxi.com/v1",
+      "api_key": "sk-cp-DC5lWd2Stt9CBFzLIT2awP4K-ZEn5AkYwjl3Cdj-mIBmgjxod518F2LaVF2L9c35Wv5-Eox0F1ctJD5vXtB9p3OmxoWLd9ge9zIUIMrCVuqBYdL_s6kb8Qs"
+    }
+  ]
 }

package/skills/yiran-skill-media/scripts/generate.py CHANGED Viewed

@@ -102,9 +102,9 @@ def append_log(log_path, entry):
 def dispatch(resource_type, prompt, **kwargs):
-    """Route to primary provider, fallback on failure.
+    """Route to providers sequentially as a priority array.
-    Always tries primary first, then fallback if configured.
+    Always tries the first provider, then falls back to the next on failure.
     Returns (files, used_provider_cfg).
     """
     cfg = load_config()
@@ -112,9 +112,15 @@ def dispatch(resource_type, prompt, **kwargs):
     if not resource_cfg:
         raise ValueError(f"unknown resource type: {resource_type}")
-    providers = [resource_cfg["primary"]]
-    if resource_cfg.get("fallback"):
-        providers.append(resource_cfg["fallback"])
+    # 支持旧版 dict(primary/fallback) 过渡到新版 list
+    if isinstance(resource_cfg, dict):
+        providers = [resource_cfg["primary"]]
+        if resource_cfg.get("fallback"):
+            providers.append(resource_cfg["fallback"])
+    elif isinstance(resource_cfg, list):
+        providers = resource_cfg
+    else:
+        raise ValueError(f"invalid config structure for {resource_type}")
     sys.path.insert(0, SCRIPT_DIR)
     from providers import get_adapter
@@ -136,16 +142,23 @@ def dispatch(resource_type, prompt, **kwargs):
 def main():
     parser = argparse.ArgumentParser(description="Unified media generation dispatcher")
-    parser.add_argument("type", choices=["image", "music"], help="Resource type")
-    parser.add_argument("prompt", help="Generation prompt")
-    parser.add_argument("--aspect-ratio", default="1:1", help="Image aspect ratio (image only)")
+    parser.add_argument("type", choices=["image", "music", "video", "i2v"], help="Resource type")
+    parser.add_argument("prompt", nargs="?", default="", help="Generation prompt")
+    parser.add_argument("--aspect-ratio", default="16:9", help="Aspect ratio (image: 1:1; video: 16:9/9:16/4:3/1:1)")
     parser.add_argument("--lyrics", default=None, help="Lyrics text (music only)")
     parser.add_argument("--instrumental", action="store_true", help="Instrumental mode (music only)")
+    parser.add_argument("--duration", type=int, default=None, help="Video duration in seconds (video/i2v, default 6)")
+    parser.add_argument("--resolution", default=None, help="Video resolution: 720P/768P/1080P (video/i2v)")
+    parser.add_argument("--first-frame-image", default=None, help="First frame image URL (i2v only, required)")
     parser.add_argument("--output", default=None, help="Output file path")
     parser.add_argument("--output-dir", required=True, help="Absolute path to output directory (required)")
     parser.add_argument("--name", required=True, help="Resource name in Chinese (required, e.g. 日落风景)")
     args = parser.parse_args()
+    # i2v 必须有 --first-frame-image
+    if args.type == "i2v" and not args.first_frame_image:
+        parser.error("i2v (图生视频) 模式必须提供 --first-frame-image 参数")
     out_dir = ensure_output_dir(args.output_dir)
     # Prepare kwargs
@@ -155,6 +168,28 @@ def main():
             "aspect_ratio": args.aspect_ratio,
         }
         ext = "png"
+    elif args.type == "video":
+        # 文生视频 — 纯文本，无图片
+        kwargs = {
+            "out_dir": out_dir,
+            "aspect_ratio": args.aspect_ratio,
+        }
+        if args.duration:
+            kwargs["duration"] = args.duration
+        if args.resolution:
+            kwargs["resolution"] = args.resolution
+        ext = "mp4"
+    elif args.type == "i2v":
+        # 图生视频 — 必须有首帧图片
+        kwargs = {
+            "out_dir": out_dir,
+            "first_frame_image": args.first_frame_image,
+        }
+        if args.duration:
+            kwargs["duration"] = args.duration
+        if args.resolution:
+            kwargs["resolution"] = args.resolution
+        ext = "mp4"
     else:
         kwargs = {
             "out_dir": out_dir,

package/skills/yiran-skill-media/scripts/i2v.sh ADDED Viewed

@@ -0,0 +1,47 @@
+#!/bin/bash
+# 图生视频入口 (Image-to-Video)
+# 模型: MiniMax-Hailuo-2.3-Fast
+# 用法: ./i2v.sh --output-dir /abs/path --name 中文名 --first-frame-image URL ["描述"]
+set -euo pipefail
+OUTPUT_DIR=""
+NAME=""
+PROMPT=""
+DURATION=""
+RESOLUTION=""
+FIRST_FRAME=""
+while [ $# -gt 0 ]; do
+    case "$1" in
+        --output-dir) OUTPUT_DIR="$2"; shift 2 ;;
+        --name) NAME="$2"; shift 2 ;;
+        --duration) DURATION="$2"; shift 2 ;;
+        --resolution) RESOLUTION="$2"; shift 2 ;;
+        --first-frame-image) FIRST_FRAME="$2"; shift 2 ;;
+        *)
+            if [ -z "$PROMPT" ]; then
+                PROMPT="$1"
+            fi
+            shift
+            ;;
+    esac
+done
+if [ -z "$OUTPUT_DIR" ] || [ -z "$NAME" ] || [ -z "$FIRST_FRAME" ]; then
+    echo "用法: ./i2v.sh --output-dir <绝对路径> --name <中文名> --first-frame-image <图片URL> [\"动作描述\"]"
+    echo "示例: ./i2v.sh --output-dir \$(pwd) --name 猫咪跑步 --first-frame-image https://example.com/cat.jpg \"cat running toward camera\""
+    exit 1
+fi
+SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
+ARGS=()
+ARGS+=("i2v")
+[ -n "$PROMPT" ] && ARGS+=("$PROMPT")
+ARGS+=(--output-dir "$OUTPUT_DIR")
+ARGS+=(--name "$NAME")
+ARGS+=(--first-frame-image "$FIRST_FRAME")
+[ -n "$DURATION" ] && ARGS+=(--duration "$DURATION")
+[ -n "$RESOLUTION" ] && ARGS+=(--resolution "$RESOLUTION")
+python3 "$SCRIPT_DIR/generate.py" "${ARGS[@]}"

package/skills/yiran-skill-media/scripts/providers/__init__.py CHANGED Viewed

@@ -1,11 +1,17 @@
 from .vapi_image import VAPIImageAdapter
 from .minimax_image import MiniMaxImageAdapter
 from .minimax_music import MiniMaxMusicAdapter
+from .minimax_video import MiniMaxVideoAdapter
+from .jimeng_video import JimengVideoAdapter
+from .jimeng_image import JimengImageAdapter
 ADAPTERS = {
     "vapi_image": VAPIImageAdapter(),
     "minimax_image": MiniMaxImageAdapter(),
     "minimax_music": MiniMaxMusicAdapter(),
+    "minimax_video": MiniMaxVideoAdapter(),
+    "jimeng_video": JimengVideoAdapter(),
+    "jimeng_image": JimengImageAdapter(),
 }
 def get_adapter(name: str):
@@ -13,3 +19,5 @@ def get_adapter(name: str):
     if not adapter:
         raise ValueError(f"unknown provider: {name}")
     return adapter

package/skills/yiran-skill-media/scripts/providers/jimeng_image.py ADDED Viewed

@@ -0,0 +1,158 @@
+"""即梦(Jimeng) image adapter — Volcengine SDK, async workflow.
+Uses CVSync2AsyncSubmitTask → CVSync2AsyncGetResult polling.
+Supports text-to-image via jimeng_t2i_v40."""
+import json
+import os
+import sys
+import time
+import requests
+from volcengine.visual.VisualService import VisualService
+class JimengImageAdapter:
+    POLL_INTERVAL = 5    # 图片生成比视频快，5 秒轮询
+    MAX_WAIT = 120       # 最长等 2 分钟
+    # 比例 → 推荐的 2K 分辨率（官方文档中的推荐值）
+    RATIO_MAP = {
+        "1:1":  (2048, 2048),
+        "4:3":  (2304, 1728),
+        "3:4":  (1728, 2304),
+        "3:2":  (2496, 1664),
+        "2:3":  (1664, 2496),
+        "16:9": (2560, 1440),
+        "9:16": (1440, 2560),
+        "21:9": (3024, 1296),
+        "9:21": (1296, 3024),
+    }
+    def generate(self, prompt, config, **kwargs):
+        access_key = config["access_key"]
+        secret_key = config["secret_key"]
+        req_key = config.get("model", "jimeng_t2i_v40")
+        out_dir = kwargs["out_dir"]
+        aspect_ratio = kwargs.get("aspect_ratio", "1:1")
+        vs = VisualService()
+        vs.set_ak(access_key)
+        vs.set_sk(secret_key)
+        # ═══ Step 1: 提交任务 ═══
+        body = {
+            "req_key": req_key,
+            "prompt": prompt,
+            "force_single": True,  # 强制输出单图，确保智能体调用行为可预测
+        }
+        # 宽高映射
+        if aspect_ratio in self.RATIO_MAP:
+            w, h = self.RATIO_MAP[aspect_ratio]
+            body["width"] = w
+            body["height"] = h
+        else:
+            # 不传 width/height，让模型根据 prompt 自动判断
+            body["size"] = 2048 * 2048  # 2K 默认面积
+        print(f"[jimeng_image] 提交文生图任务 req_key={req_key} ratio={aspect_ratio}...", file=sys.stderr)
+        try:
+            data = vs.cv_sync2async_submit_task(body)
+        except Exception as e:
+            if hasattr(e, 'args') and len(e.args) > 0 and isinstance(e.args[0], bytes):
+                try:
+                    err_json = json.loads(e.args[0].decode('utf-8'))
+                    code = err_json.get("code", 0)
+                    msg = err_json.get("message", "")
+                    raise RuntimeError(f"即梦 API 错误: {msg} (code={code})")
+                except RuntimeError:
+                    raise
+                except Exception:
+                    pass
+            raise RuntimeError(f"即梦提交失败: {str(e)}")
+        if data.get("code") != 10000:
+            raise RuntimeError(f"即梦 API 错误: {data.get('message')} (code={data.get('code')})")
+        task_id = data["data"]["task_id"]
+        print(f"[jimeng_image] task_id={task_id}, 开始轮询...", file=sys.stderr)
+        # ═══ Step 2: 轮询状态 ═══
+        image_urls = None
+        binary_data = None
+        elapsed = 0
+        while elapsed < self.MAX_WAIT:
+            time.sleep(self.POLL_INTERVAL)
+            elapsed += self.POLL_INTERVAL
+            query_body = {
+                "req_key": req_key,
+                "task_id": task_id,
+                "req_json": json.dumps({"return_url": True}),
+            }
+            try:
+                qdata = vs.cv_sync2async_get_result(query_body)
+            except Exception as e:
+                if hasattr(e, 'args') and len(e.args) > 0 and isinstance(e.args[0], bytes):
+                    try:
+                        err_json = json.loads(e.args[0].decode('utf-8'))
+                        code = err_json.get("code", 0)
+                        if code in (50429, 50430, 50500, 50501):
+                            print(f"[jimeng_image] 轮询 {elapsed}s: 可重试错误 code={code}", file=sys.stderr)
+                            continue
+                        raise RuntimeError(f"即梦查询失败: {err_json.get('message')} (code={code})")
+                    except RuntimeError:
+                        raise
+                    except Exception:
+                        pass
+                raise RuntimeError(f"即梦查询异常: {str(e)}")
+            if qdata.get("code") != 10000:
+                print(f"[jimeng_image] 轮询 {elapsed}s: code={qdata.get('code')} msg={qdata.get('message')}", file=sys.stderr)
+                if qdata.get("code") in (50429, 50430, 50500, 50501):
+                    continue
+                raise RuntimeError(f"即梦状态获取失败: {qdata.get('message')} (code={qdata.get('code')})")
+            status = qdata.get("data", {}).get("status", "")
+            print(f"[jimeng_image] 轮询 {elapsed}s: status={status}", file=sys.stderr)
+            if status == "done":
+                image_urls = qdata["data"].get("image_urls", [])
+                binary_data = qdata["data"].get("binary_data_base64", [])
+                break
+            elif status in ("in_queue", "generating"):
+                continue
+            elif status in ("not_found", "expired"):
+                raise RuntimeError(f"即梦任务异常: status={status}")
+        if not image_urls and not binary_data:
+            raise RuntimeError(f"即梦图片生成超时 ({self.MAX_WAIT}s), task_id={task_id}")
+        print(f"[jimeng_image] 生成完成, 下载中...", file=sys.stderr)
+        # ═══ Step 3: 下载图片 ═══
+        import base64
+        saved = []
+        if image_urls:
+            for i, url in enumerate(image_urls):
+                fname = kwargs.get("output_path") or os.path.join(out_dir, f"image_{i}.png")
+                r = requests.get(url, timeout=60)
+                r.raise_for_status()
+                with open(fname, "wb") as f:
+                    f.write(r.content)
+                saved.append(fname)
+                print(f"[jimeng_image] 已保存: {fname}", file=sys.stderr)
+        elif binary_data:
+            for i, b64 in enumerate(binary_data):
+                if not b64:
+                    continue
+                fname = kwargs.get("output_path") or os.path.join(out_dir, f"image_{i}.png")
+                with open(fname, "wb") as f:
+                    f.write(base64.b64decode(b64))
+                saved.append(fname)
+                print(f"[jimeng_image] 已保存 (base64): {fname}", file=sys.stderr)
+        if not saved:
+            raise RuntimeError("即梦未返回任何图片数据")
+        return saved

package/skills/yiran-skill-media/scripts/providers/jimeng_video.py ADDED Viewed

@@ -0,0 +1,115 @@
+"""即梦(Jimeng) video adapter — Volcengine SDK Implementation.
+Supports text-to-video via CVSync2AsyncSubmitTask."""
+import json
+import os
+import sys
+import time
+import requests
+from volcengine.visual.VisualService import VisualService
+class JimengVideoAdapter:
+    POLL_INTERVAL = 10
+    MAX_WAIT = 600
+    def generate(self, prompt, config, **kwargs):
+        access_key = config["access_key"]
+        secret_key = config["secret_key"]
+        req_key = config.get("model", "jimeng_t2v_v30")
+        out_dir = kwargs["out_dir"]
+        # duration → frames: 5s=121, 10s=241 (assuming default 5s max logic for now, standard expects 121 for 5s)
+        duration = int(kwargs.get("duration", 5))
+        frames = 24 * duration + 1
+        aspect_ratio = kwargs.get("aspect_ratio", "16:9")
+        # ═══ Step 1: 提交任务 ═══
+        vs = VisualService()
+        vs.set_ak(access_key)
+        vs.set_sk(secret_key)
+        body = {
+            "req_key": req_key,
+            "prompt": prompt,
+            "seed": -1,
+            "frames": frames,
+            "aspect_ratio": aspect_ratio,
+        }
+        print(f"[jimeng_video] 提交文生视频任务 req_key={req_key}...", file=sys.stderr)
+        try:
+            data = vs.cv_sync2async_submit_task(body)
+        except Exception as e:
+            # Handle SDK exceptions which wrap the raw response
+            if hasattr(e, 'args') and len(e.args) > 0 and isinstance(e.args[0], bytes):
+                try:
+                    err_json = json.loads(e.args[0].decode('utf-8'))
+                    raise RuntimeError(f"即梦 API 错误: {err_json}")
+                except Exception:
+                    pass
+            raise RuntimeError(f"即梦提交失败: {str(e)}")
+        if data.get("code") != 10000:
+            raise RuntimeError(f"即梦 API 错误: {data.get('message')} (code={data.get('code')})")
+        task_id = data["data"]["task_id"]
+        print(f"[jimeng_video] task_id={task_id}, 开始轮询...", file=sys.stderr)
+        # ═══ Step 2: 轮询状态 ═══
+        video_url = None
+        elapsed = 0
+        while elapsed < self.MAX_WAIT:
+            time.sleep(self.POLL_INTERVAL)
+            elapsed += self.POLL_INTERVAL
+            query_body = {
+                "req_key": req_key,
+                "task_id": task_id,
+            }
+            try:
+                qdata = vs.cv_sync2async_get_result(query_body)
+            except Exception as e:
+                # Same exception handling for polling
+                if hasattr(e, 'args') and len(e.args) > 0 and isinstance(e.args[0], bytes):
+                    try:
+                        err_json = json.loads(e.args[0].decode('utf-8'))
+                        if err_json.get("code") in (50500, 50501):
+                            continue # Server error, can retry
+                        raise RuntimeError(f"即梦查询失败: {err_json}")
+                    except Exception:
+                        pass
+                raise RuntimeError(f"即梦查询异常: {str(e)}")
+            if qdata.get("code") != 10000:
+                print(f"[jimeng_video] 轮询 {elapsed}s: code={qdata.get('code')} msg={qdata.get('message')}", file=sys.stderr)
+                if qdata.get("code") in (50500, 50501):
+                    continue  # 可重试的内部错误
+                raise RuntimeError(f"即梦状态获取失败: {qdata.get('message')} (code={qdata.get('code')})")
+            status = qdata.get("data", {}).get("status", "")
+            print(f"[jimeng_video] 轮询 {elapsed}s: status={status}", file=sys.stderr)
+            if status == "done":
+                video_url = qdata["data"].get("video_url")
+                break
+            elif status in ("in_queue", "generating"):
+                continue
+            elif status in ("not_found", "expired"):
+                raise RuntimeError(f"即梦任务异常停机: status={status}")
+        if not video_url:
+            raise RuntimeError(f"即梦视频生成超时 ({self.MAX_WAIT}s), task_id={task_id}")
+        print(f"[jimeng_video] 生成完成, 下载中...", file=sys.stderr)
+        # ═══ Step 3: 下载视频 ═══
+        fname = kwargs.get("output_path") or os.path.join(out_dir, "video.mp4")
+        r = requests.get(video_url, timeout=300)
+        r.raise_for_status()
+        with open(fname, "wb") as f:
+            f.write(r.content)
+        print(f"[jimeng_video] 已保存: {fname}", file=sys.stderr)
+        return [fname]

package/skills/yiran-skill-media/scripts/providers/minimax_video.py ADDED Viewed

@@ -0,0 +1,115 @@
+"""MiniMax video adapter — POST /video_generation, poll status, download via file_id.
+Supports both Text-to-Video and Image-to-Video modes."""
+import json
+import os
+import sys
+import time
+import requests
+class MiniMaxVideoAdapter:
+    # 轮询间隔和超时
+    POLL_INTERVAL = 10   # 每 10 秒查一次
+    MAX_WAIT = 600       # 最长等 10 分钟
+    def generate(self, prompt, config, **kwargs):
+        base_url = config["base_url"].rstrip("/")
+        api_key = config["api_key"]
+        model = config["model"]
+        out_dir = kwargs["out_dir"]
+        headers = {
+            "Authorization": f"Bearer {api_key}",
+            "Content-Type": "application/json",
+        }
+        # ═══ Step 1: 提交生成任务 ═══
+        payload = {
+            "model": model,
+        }
+        # prompt（图生视频时可选，文生视频时必填）
+        if prompt:
+            payload["prompt"] = prompt
+        # 图生视频模式：传入首帧图片
+        if kwargs.get("first_frame_image"):
+            payload["first_frame_image"] = kwargs["first_frame_image"]
+        # 可选参数
+        if kwargs.get("duration"):
+            payload["duration"] = int(kwargs["duration"])
+        if kwargs.get("resolution"):
+            payload["resolution"] = kwargs["resolution"]
+        if kwargs.get("prompt_optimizer") is not None:
+            payload["prompt_optimizer"] = kwargs["prompt_optimizer"]
+        mode = "图生视频" if "first_frame_image" in payload else "文生视频"
+        print(f"[minimax_video] 提交{mode}任务 model={model}...", file=sys.stderr)
+        resp = requests.post(
+            f"{base_url}/video_generation",
+            headers=headers,
+            json=payload,
+            timeout=60,
+        )
+        resp.raise_for_status()
+        data = resp.json()
+        # 检查业务错误
+        base_resp = data.get("base_resp", {})
+        if base_resp.get("status_code", 0) != 0:
+            raise RuntimeError(f"MiniMax error: {base_resp.get('status_msg')}")
+        task_id = data.get("task_id")
+        if not task_id:
+            raise RuntimeError(f"未返回 task_id: {data}")
+        print(f"[minimax_video] task_id={task_id}, 开始轮询...", file=sys.stderr)
+        # ═══ Step 2: 轮询任务状态 ═══
+        file_id = None
+        elapsed = 0
+        while elapsed < self.MAX_WAIT:
+            time.sleep(self.POLL_INTERVAL)
+            elapsed += self.POLL_INTERVAL
+            query_resp = requests.get(
+                f"{base_url}/query/video_generation",
+                headers={"Authorization": f"Bearer {api_key}"},
+                params={"task_id": task_id},
+                timeout=30,
+            )
+            query_resp.raise_for_status()
+            qdata = query_resp.json()
+            status = qdata.get("status", "")
+            print(f"[minimax_video] 轮询 {elapsed}s: status={status}", file=sys.stderr)
+            if status == "Success":
+                file_id = qdata.get("file_id")
+                break
+            elif status == "Fail":
+                err_msg = qdata.get("error_message", "未知错误")
+                raise RuntimeError(f"视频生成失败: {err_msg}")
+            # Processing / Queueing → 继续等
+        if not file_id:
+            raise RuntimeError(f"视频生成超时 ({self.MAX_WAIT}s), task_id={task_id}")
+        print(f"[minimax_video] 生成完成 file_id={file_id}, 下载中...", file=sys.stderr)
+        # ═══ Step 3: 获取下载 URL 并下载 ═══
+        file_resp = requests.get(
+            f"{base_url}/files/retrieve",
+            headers={"Authorization": f"Bearer {api_key}"},
+            params={"file_id": file_id},
+            timeout=30,
+        )
+        file_resp.raise_for_status()
+        download_url = file_resp.json()["file"]["download_url"]
+        fname = kwargs.get("output_path") or os.path.join(out_dir, "video.mp4")
+        r = requests.get(download_url, timeout=300)
+        r.raise_for_status()
+        with open(fname, "wb") as f:
+            f.write(r.content)
+        print(f"[minimax_video] 已保存: {fname}", file=sys.stderr)
+        return [fname]

package/skills/yiran-skill-media/scripts/video.sh ADDED Viewed

@@ -0,0 +1,47 @@
+#!/bin/bash
+# 文生视频入口 (Text-to-Video)
+# 主力模型: MiniMax-Hailuo-2.3 | 降级: 即梦 jimeng_t2v_v30
+# 用法: ./video.sh --output-dir /abs/path --name 中文名 "描述" [--duration 6] [--resolution 768P] [--aspect-ratio 16:9]
+set -euo pipefail
+OUTPUT_DIR=""
+NAME=""
+PROMPT=""
+DURATION=""
+RESOLUTION=""
+ASPECT_RATIO=""
+while [ $# -gt 0 ]; do
+    case "$1" in
+        --output-dir) OUTPUT_DIR="$2"; shift 2 ;;
+        --name) NAME="$2"; shift 2 ;;
+        --duration) DURATION="$2"; shift 2 ;;
+        --resolution) RESOLUTION="$2"; shift 2 ;;
+        --aspect-ratio) ASPECT_RATIO="$2"; shift 2 ;;
+        *)
+            if [ -z "$PROMPT" ]; then
+                PROMPT="$1"
+            fi
+            shift
+            ;;
+    esac
+done
+if [ -z "$OUTPUT_DIR" ] || [ -z "$NAME" ] || [ -z "$PROMPT" ]; then
+    echo "用法: ./video.sh --output-dir <绝对路径> --name <中文名> \"视频描述\""
+    echo "示例: ./video.sh --output-dir /root/.openclaw/workspace --name 日落延时 \"a sunset timelapse\""
+    exit 1
+fi
+SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
+ARGS=()
+ARGS+=("video")
+ARGS+=("$PROMPT")
+ARGS+=(--output-dir "$OUTPUT_DIR")
+ARGS+=(--name "$NAME")
+[ -n "$DURATION" ] && ARGS+=(--duration "$DURATION")
+[ -n "$RESOLUTION" ] && ARGS+=(--resolution "$RESOLUTION")
+[ -n "$ASPECT_RATIO" ] && ARGS+=(--aspect-ratio "$ASPECT_RATIO")
+python3 "$SCRIPT_DIR/generate.py" "${ARGS[@]}"