@aiyiran/myclaw 1.0.243 → 1.0.244

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -522,6 +522,51 @@
522
522
  }
523
523
  // ═══ 6. 拦截发送按钮 ═══
524
524
 
525
+ var voiceEnterListening = false;
526
+ var voiceEnterTimeout = null;
527
+
528
+ /**
529
+ * 拦截 Enter 键:语音态下按回车 → 显示"等待中...",2秒后自动发送
530
+ */
531
+ function hookVoiceEnter() {
532
+ if (voiceEnterListening) return;
533
+ voiceEnterListening = true;
534
+
535
+ document.addEventListener("keydown", function (e) {
536
+ // 只拦截 textarea 上的 Enter
537
+ if (e.key !== "Enter") return;
538
+ var ta = e.target.closest ? e.target.closest(".agent-chat__input textarea") : null;
539
+ if (!ta) return;
540
+ if (!recording) return;
541
+
542
+ // 阻止默认行为(不在 textarea 里插入换行)
543
+ e.preventDefault();
544
+ e.stopPropagation();
545
+
546
+ // 1) 显示"等待中..."
547
+ setTextareaValue("\u5F85\u6B3A\u4E2D...");
548
+ // 把光标移到末尾
549
+ try {
550
+ ta.setSelectionRange(ta.value.length, ta.value.length);
551
+ } catch (err) {}
552
+
553
+ // 2) 停止录音
554
+ stopVoice();
555
+
556
+ // 3) 2秒后自动点击发送按钮
557
+ if (voiceEnterTimeout) clearTimeout(voiceEnterTimeout);
558
+ voiceEnterTimeout = setTimeout(function () {
559
+ voiceEnterTimeout = null;
560
+ var sendBtn = document.querySelector("button.chat-send-btn, button[title=\"Send\"]");
561
+ if (sendBtn) {
562
+ sendBtn.click();
563
+ }
564
+ console.log("[myclaw-voice] Enter\u89E6\u53D1\u81EA\u52A8\u53D1\u9001");
565
+ }, 2000);
566
+
567
+ }, true); // 捕获阶段,确保早于原生处理
568
+ }
569
+
525
570
  var sendHooked = false;
526
571
 
527
572
  function hookSendButton() {
@@ -853,7 +898,7 @@
853
898
  { label: "\uD83D\uDCAC \u6DFB\u52A0\u5BF9\u8BDD", desc: "\u6253\u5F00\u5DF2\u6709\u4F19\u4F34\u7684\u5BF9\u8BDD\u7A97\u53E3", hasInput: true, inputTitle: "\u6DFB\u52A0\u5BF9\u8BDD", placeholder: "\u8F93\u5165\u4F19\u4F34\u540D\u79F0\uFF0C\u5982 kakaxi", hint: "\u8F93\u5165\u4F60\u7684\u4F19\u4F34\u7684\u540D\u79F0\uFF08\u82F1\u6587\u5B57\u6BCD\u3001\u6570\u5B57\u3001\u8FDE\u5B57\u7B26\uFF09\uFF0C\u70B9\u51FB\u540E\u4F1A\u6253\u5F00\u5BF9\u8BDD\u7A97\u53E3", cmd: "mc tui {name}", color: "#10b981" },
854
899
  { label: "\uD83D\uDE80 \u5347\u7EA7", desc: "\u5347\u7EA7 myclaw \u5230\u6700\u65B0\u7248\u672C", hasInput: false, cmd: "mc up", color: "#8b5cf6" },
855
900
  { label: "\uD83D\uDD04 \u91CD\u542F", desc: "\u91CD\u542F\u670D\u52A1\uFF0C\u4FEE\u590D\u5927\u591A\u6570\u95EE\u9898", hasInput: false, cmd: "mc restart", color: "#ef4444" },
856
- { label: "\uD83E\uDD1D \u65B0\u4F19\u4F34", desc: "\u521B\u5EFA\u4E00\u4E2A\u65B0\u7684 AI \u4F19\u4F34", hasInput: true, inputTitle: "\u65B0\u5EFA\u4F19\u4F34", placeholder: "\u8F93\u5165\u65B0\u4F19\u4F34\u540D\u79F0\uFF0C\u5982 my-cat", hint: "\u7ED9\u4F60\u7684\u65B0 AI \u4F19\u4F34\u8D77\u4E2A\u540D\u5B57\uFF08\u82F1\u6587\u5B57\u6BCD\u3001\u6570\u5B57\u3001\u8FDE\u5B57\u7B26\uFF09\uFF0C\u70B9\u51FB\u540E\u4F1A\u81EA\u52A8\u521B\u5EFA", cmd: "mc tui {name}", color: "#3b82f6" },
901
+ { label: "\uD83E\uDD1D \u65B0\u4F19\u4F34", desc: "\u521B\u5EFA\u4E00\u4E2A\u65B0\u7684 AI \u4F19\u4F34", hasInput: true, inputTitle: "\u65B0\u5EFA\u4F19\u4F34", placeholder: "\u8F93\u5165\u65B0\u4F19\u4F34\u540D\u79F0\uFF0C\u5982 my-cat", hint: "\u7ED9\u4F60\u7684\u65B0 AI \u4F19\u4F34\u8D77\u4E2A\u540D\u5B57\uFF08\u82F1\u6587\u5B57\u6BCD\u3001\u6570\u5B57\u3001\u8FDE\u5B57\u7B26\uFF09\uFF0C\u70B9\u51FB\u540E\u4F1A\u81EA\u52A8\u521B\u5EFA", cmd: "mc new {name}", color: "#3b82f6" },
857
902
  ];
858
903
 
859
904
  btns.forEach(function (item) {
@@ -908,6 +953,175 @@
908
953
  form.appendChild(row);
909
954
  });
910
955
 
956
+ // ── 删除伙伴按钮 ──
957
+ var delRow = document.createElement("div");
958
+ delRow.style.cssText = [
959
+ "padding:10px 14px",
960
+ "background:#252536",
961
+ "border-radius:6px",
962
+ "cursor:pointer",
963
+ "transition:background 0.15s",
964
+ "display:flex",
965
+ "align-items:center",
966
+ "gap:10px",
967
+ ].join(";");
968
+ delRow.onmouseenter = function () { delRow.style.background = "#2f2f4a"; };
969
+ delRow.onmouseleave = function () { delRow.style.background = "#252536"; };
970
+
971
+ var delBar = document.createElement("div");
972
+ delBar.style.cssText = "width:3px;height:28px;border-radius:2px;background:#ef4444;flex-shrink:0;";
973
+ delRow.appendChild(delBar);
974
+
975
+ var delInfo = document.createElement("div");
976
+ delInfo.style.cssText = "flex:1;display:flex;flex-direction:column;gap:2px;";
977
+ var delName = document.createElement("div");
978
+ delName.textContent = "\uD83D\uDDD1 \u5220\u9664\u4F19\u4F34";
979
+ delName.style.cssText = "font-size:13px;font-weight:bold;color:#ef4444;";
980
+ delInfo.appendChild(delName);
981
+ var delDesc = document.createElement("div");
982
+ delDesc.textContent = "\u5220\u9664\u4E00\u4E2A AI \u4F19\u4F34\uFF0C\u6B64\u64CD\u4F5C\u65E0\u6CD5\u6062\u590D";
983
+ delDesc.style.cssText = "font-size:11px;color:#888;";
984
+ delInfo.appendChild(delDesc);
985
+ delRow.appendChild(delInfo);
986
+
987
+ var delArrow = document.createElement("div");
988
+ delArrow.textContent = "\u25B6";
989
+ delArrow.style.cssText = "color:#555;font-size:10px;";
990
+ delRow.appendChild(delArrow);
991
+
992
+ delRow.onclick = function () {
993
+ showDeleteConfirm();
994
+ };
995
+
996
+ form.appendChild(delRow);
997
+
998
+ // 删除伙伴 - 双重确认弹框
999
+ function showDeleteConfirm() {
1000
+ var mask = document.createElement("div");
1001
+ mask.style.cssText = [
1002
+ "position:fixed",
1003
+ "top:0;left:0;width:100vw;height:100vh",
1004
+ "background:rgba(0,0,0,0.3)",
1005
+ "z-index:999999",
1006
+ "display:flex",
1007
+ "align-items:center",
1008
+ "justify-content:center",
1009
+ "animation:myclaw-fade-in 0.15s ease",
1010
+ ].join(";");
1011
+
1012
+ var box = document.createElement("div");
1013
+ box.style.cssText = [
1014
+ "width:360px",
1015
+ "background:#1e1e2e",
1016
+ "border-radius:8px",
1017
+ "overflow:hidden",
1018
+ "box-shadow:0 8px 32px rgba(0,0,0,0.5)",
1019
+ ].join(";");
1020
+
1021
+ // 标题
1022
+ var h = document.createElement("div");
1023
+ h.style.cssText = "padding:10px 14px;background:#ef4444;color:#fff;font-size:13px;display:flex;justify-content:space-between;align-items:center;";
1024
+ h.innerHTML = '<span>\uD83D\uDDD1 \u5220\u9664\u4F19\u4F34</span>';
1025
+ var x = document.createElement("span");
1026
+ x.textContent = "\u2715";
1027
+ x.style.cssText = "cursor:pointer;padding:2px 6px;border-radius:3px;";
1028
+ x.onclick = function () { mask.remove(); };
1029
+ h.appendChild(x);
1030
+ box.appendChild(h);
1031
+
1032
+ // body
1033
+ var body = document.createElement("div");
1034
+ body.style.cssText = "padding:16px;display:flex;flex-direction:column;gap:12px;";
1035
+
1036
+ var hint1 = document.createElement("div");
1037
+ hint1.textContent = "\u8BF7\u8F93\u5165\u8981\u5220\u9664\u7684\u4F19\u4F34 ID\uFF1A";
1038
+ hint1.style.cssText = "font-size:12px;color:#888;";
1039
+ body.appendChild(hint1);
1040
+
1041
+ var input1 = document.createElement("input");
1042
+ input1.type = "text";
1043
+ input1.placeholder = "\u4F19\u4F34 ID";
1044
+ input1.style.cssText = "padding:8px 10px;background:#252536;border:1px solid #3d3d5c;border-radius:4px;color:#cdd6f4;font-size:13px;font-family:monospace;outline:none;";
1045
+ body.appendChild(input1);
1046
+
1047
+ var warn = document.createElement("div");
1048
+ warn.textContent = "\u26A0 \u6B64\u64CD\u4F5C\u65E0\u6CD5\u6062\u590D\uFF0C\u786E\u8BA4\u540E\u5C06\u6C38\u4E45\u5220\u9664\uFF01";
1049
+ warn.style.cssText = "font-size:11px;color:#ef4444;padding:8px;background:rgba(239,68,68,0.1);border-radius:4px;";
1050
+ body.appendChild(warn);
1051
+
1052
+ var confirmHint = document.createElement("div");
1053
+ confirmHint.textContent = '\u8BF7\u8F93\u5165 "YES" \u786E\u8BA4\u5220\u9664\uFF1A';
1054
+ confirmHint.style.cssText = "font-size:12px;color:#888;";
1055
+ confirmHint.style.display = "none";
1056
+ body.appendChild(confirmHint);
1057
+
1058
+ var input2 = document.createElement("input");
1059
+ input2.type = "text";
1060
+ input2.placeholder = "YES";
1061
+ input2.style.cssText = "padding:8px 10px;background:#252536;border:1px solid #3d3d5c;border-radius:4px;color:#cdd6f4;font-size:13px;font-family:monospace;outline:none;";
1062
+ input2.style.display = "none";
1063
+ body.appendChild(input2);
1064
+
1065
+ var submitBtn = document.createElement("button");
1066
+ submitBtn.textContent = "\u7EE7\u7EED";
1067
+ submitBtn.style.cssText = "padding:8px 16px;background:#ef4444;border:none;border-radius:4px;color:#fff;font-size:12px;font-family:monospace;cursor:pointer;";
1068
+ body.appendChild(submitBtn);
1069
+
1070
+ var cancelBtn = document.createElement("button");
1071
+ cancelBtn.textContent = "\u53D6\u6D88";
1072
+ cancelBtn.style.cssText = "padding:8px 16px;background:#3d3d5c;border:none;border-radius:4px;color:#cdd6f4;font-size:12px;font-family:monospace;cursor:pointer;";
1073
+ body.appendChild(cancelBtn);
1074
+
1075
+ box.appendChild(body);
1076
+ mask.appendChild(box);
1077
+ document.body.appendChild(mask);
1078
+
1079
+ input1.focus();
1080
+
1081
+ var agentId = "";
1082
+ submitBtn.onclick = function () {
1083
+ if (!agentId) {
1084
+ // 第一步:输入 agent ID
1085
+ agentId = input1.value.trim();
1086
+ if (!agentId) {
1087
+ input1.style.borderColor = "#ef4444";
1088
+ input1.focus();
1089
+ return;
1090
+ }
1091
+ // 显示第二步确认
1092
+ input1.style.display = "none";
1093
+ hint1.style.display = "none";
1094
+ confirmHint.style.display = "block";
1095
+ input2.style.display = "block";
1096
+ submitBtn.textContent = "\u786E\u8BA4\u5220\u9664";
1097
+ input2.value = "";
1098
+ input2.focus();
1099
+ } else {
1100
+ // 第二步:确认删除
1101
+ var confirmVal = input2.value.trim();
1102
+ if (confirmVal.toUpperCase() !== "YES") {
1103
+ input2.style.borderColor = "#ef4444";
1104
+ input2.focus();
1105
+ return;
1106
+ }
1107
+ submitBtn.disabled = true;
1108
+ submitBtn.textContent = "\u6267\u884C\u4E2D...";
1109
+ runCommand("openclaw agents delete " + agentId + " --force");
1110
+ setTimeout(function () {
1111
+ mask.remove();
1112
+ }, 1000);
1113
+ }
1114
+ };
1115
+
1116
+ cancelBtn.onclick = function () {
1117
+ mask.remove();
1118
+ };
1119
+
1120
+ input2.onkeydown = function (e) {
1121
+ if (e.key === "Enter") submitBtn.click();
1122
+ };
1123
+ }
1124
+
911
1125
  box.appendChild(header);
912
1126
  box.appendChild(form);
913
1127
  overlay.appendChild(box);
@@ -939,6 +1153,9 @@
939
1153
  // 初始化 VoiceInput SDK
940
1154
  initVoice();
941
1155
 
1156
+ // 拦截语音态 Enter 键
1157
+ hookVoiceEnter();
1158
+
942
1159
  // 持续监听 DOM 变化,确保按钮始终在
943
1160
  new MutationObserver(function () {
944
1161
  if (!document.querySelector("#myclaw-voice-btn")) {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@aiyiran/myclaw",
3
- "version": "1.0.243",
3
+ "version": "1.0.244",
4
4
  "description": "",
5
5
  "main": "index.js",
6
6
  "bin": {
@@ -1,6 +1,6 @@
1
1
  ---
2
2
  name: yiran-skill-media
3
- description: 统一多媒体生成技能。支持图片和音乐生成,按资源类型自动路由到最优 provider,支持主备切换。资源生成规范:所有生成的资源必须存放在当前工作目录下,调用时通过 --output-dir 传入当前工作目录的绝对路径,通过 --name 传入资源的中文名称。
3
+ description: 统一多媒体生成技能。支持图片、音乐、文生视频和图生视频,按资源类型自动路由到最优 provider,支持主备切换。资源生成规范:所有生成的资源必须存放在当前工作目录下,调用时通过 --output-dir 传入当前工作目录的绝对路径,通过 --name 传入资源的中文名称。
4
4
  ---
5
5
 
6
6
  # 统一多媒体生成
@@ -12,10 +12,6 @@ description: 统一多媒体生成技能。支持图片和音乐生成,按资
12
12
 
13
13
  **所有生成的资源文件必须存放在当前工作目录下。** 不允许省略,不允许猜测路径。
14
14
 
15
- 例如:
16
- - 你当前在 `/root/.openclaw/workspace` → 传入 `--output-dir /root/.openclaw/workspace`
17
- - 生成一张日落图片 → 传入 `--name 日落风景`
18
-
19
15
  ## 一键脚本
20
16
 
21
17
  ```bash
@@ -24,6 +20,12 @@ description: 统一多媒体生成技能。支持图片和音乐生成,按资
24
20
 
25
21
  # 音乐生成
26
22
  ./music.sh --output-dir "$(pwd)" --name 开场音乐 "epic opening" [--instrumental]
23
+
24
+ # 文生视频 (Text-to-Video)
25
+ ./video.sh --output-dir "$(pwd)" --name 日落延时 "a sunset timelapse" [--duration 6] [--resolution 768P]
26
+
27
+ # 图生视频 (Image-to-Video)
28
+ ./i2v.sh --output-dir "$(pwd)" --name 猫咪跑步 --first-frame-image "https://example.com/cat.jpg" "cat running toward camera"
27
29
  ```
28
30
 
29
31
  ## 参数说明
@@ -47,16 +49,43 @@ description: 统一多媒体生成技能。支持图片和音乐生成,按资
47
49
  | `--lyrics` | 否 | 歌词文本 |
48
50
  | `--instrumental` | 否 | 纯音乐模式 |
49
51
 
52
+ ### video.sh — 文生视频
53
+
54
+ | 参数 | 必填 | 说明 |
55
+ |------|------|------|
56
+ | `--output-dir` | 是 | 输出目录的绝对路径 |
57
+ | `--name` | 是 | 资源中文名称(如:日落延时、产品展示) |
58
+ | `prompt` | 是 | 视频内容描述(支持运镜指令如 `[推进]`、`[左摇]`) |
59
+ | `--duration` | 否 | 视频时长(秒),默认 6,可选 6 或 10 |
60
+ | `--resolution` | 否 | 分辨率,默认 768P,可选 720P/768P/1080P |
61
+
62
+ ### i2v.sh — 图生视频
63
+
64
+ | 参数 | 必填 | 说明 |
65
+ |------|------|------|
66
+ | `--output-dir` | 是 | 输出目录的绝对路径 |
67
+ | `--name` | 是 | 资源中文名称(如:猫咪跑步、舞蹈表演) |
68
+ | `--first-frame-image` | 是 | 首帧图片的公网 URL(JPG/PNG/WebP,<20MB) |
69
+ | `prompt` | 否 | 基于首帧图像的动作/变化描述 |
70
+ | `--duration` | 否 | 视频时长(秒),默认 6,可选 6 或 10 |
71
+ | `--resolution` | 否 | 分辨率,默认 768P,可选 768P/1080P |
72
+
73
+ **注意**:视频生成为异步任务,耗时较长(通常 1-5 分钟),脚本会自动轮询等待完成。
74
+
50
75
  ## 架构
51
76
 
52
77
  ```
53
- image.sh / music.sh 智能体调用的薄壳入口
54
- generate.py 统一路由调度(主备切换)
55
- config.json provider 配置中心(key、模型、地址)
56
- providers/ provider 适配器
57
- minimax_image.py MiniMax 图片(主)
58
- vapi_image.py VAPI 图片(备)
59
- minimax_music.py → MiniMax 音乐(主)
78
+ image.sh 图片生成入口
79
+ music.sh 音乐生成入口
80
+ video.sh 文生视频入口 (Text-to-Video)
81
+ i2v.sh 图生视频入口 (Image-to-Video)
82
+ generate.py 统一路由调度(主备切换)
83
+ config.json provider 配置中心(key、模型、地址)
84
+ providers/
85
+ vapi_image.py → VAPI 图片
86
+ minimax_image.py → MiniMax 图片
87
+ minimax_music.py → MiniMax 音乐
88
+ minimax_video.py → MiniMax 视频(文生+图生)
60
89
  ```
61
90
 
62
91
  ## Provider 配置
@@ -64,8 +93,10 @@ providers/ → 各 provider 适配器
64
93
  编辑 `config.json` 可切换主备 provider、更换模型或 API Key。
65
94
 
66
95
  当前配置:
67
- - **图片**:MiniMax (image-01) → fallback VAPI (nano-banana-pro)
96
+ - **图片**:VAPI (nano-banana-2) → fallback MiniMax (image-01)
68
97
  - **音乐**:MiniMax (music-2.6)
98
+ - **文生视频**:MiniMax (MiniMax-Hailuo-2.3)
99
+ - **图生视频**:MiniMax (MiniMax-Hailuo-2.3-Fast)
69
100
 
70
101
  ## 详细 API 参考
71
102
 
@@ -1,26 +1,53 @@
1
1
  {
2
2
  "output_dir": "media",
3
- "image": {
4
- "primary": {
3
+ "image": [
4
+ {
5
5
  "provider": "vapi_image",
6
- "model": "nano-banana-pro",
6
+ "model": "nano-banana-2",
7
7
  "base_url": "https://api.v3.cm/v1",
8
8
  "api_key": "sk-PXPUzqllWKJy2oj011Df510242264219Ba21093e3d2b2335"
9
9
  },
10
- "fallback": {
10
+ {
11
11
  "provider": "minimax_image",
12
12
  "model": "image-01",
13
13
  "base_url": "https://api.minimaxi.com/v1",
14
14
  "api_key": "sk-cp-DC5lWd2Stt9CBFzLIT2awP4K-ZEn5AkYwjl3Cdj-mIBmgjxod518F2LaVF2L9c35Wv5-Eox0F1ctJD5vXtB9p3OmxoWLd9ge9zIUIMrCVuqBYdL_s6kb8Qs"
15
+ },
16
+ {
17
+ "provider": "jimeng_image",
18
+ "model": "jimeng_t2i_v40",
19
+ "access_key": "AKLTYjZkY2FiZmZkYWU5NDkxNmEwZjNlYTRjNmRlZmYwNDI",
20
+ "secret_key": "TjJGbU5HVTBZek14TnpFeE5HWTVOVGhsTURRNE9XRXhNR1JoTm1FeVlqaw=="
15
21
  }
16
- },
17
- "music": {
18
- "primary": {
22
+ ],
23
+ "music": [
24
+ {
19
25
  "provider": "minimax_music",
20
26
  "model": "music-2.6",
21
27
  "base_url": "https://api.minimaxi.com/v1",
22
28
  "api_key": "sk-cp-DC5lWd2Stt9CBFzLIT2awP4K-ZEn5AkYwjl3Cdj-mIBmgjxod518F2LaVF2L9c35Wv5-Eox0F1ctJD5vXtB9p3OmxoWLd9ge9zIUIMrCVuqBYdL_s6kb8Qs"
29
+ }
30
+ ],
31
+ "video": [
32
+ {
33
+ "provider": "minimax_video",
34
+ "model": "MiniMax-Hailuo-2.3",
35
+ "base_url": "https://api.minimaxi.com/v1",
36
+ "api_key": "sk-cp-DC5lWd2Stt9CBFzLIT2awP4K-ZEn5AkYwjl3Cdj-mIBmgjxod518F2LaVF2L9c35Wv5-Eox0F1ctJD5vXtB9p3OmxoWLd9ge9zIUIMrCVuqBYdL_s6kb8Qs"
23
37
  },
24
- "fallback": null
25
- }
38
+ {
39
+ "provider": "jimeng_video",
40
+ "model": "jimeng_t2v_v30",
41
+ "access_key": "AKLTYjZkY2FiZmZkYWU5NDkxNmEwZjNlYTRjNmRlZmYwNDI",
42
+ "secret_key": "TjJGbU5HVTBZek14TnpFeE5HWTVOVGhsTURRNE9XRXhNR1JoTm1FeVlqaw=="
43
+ }
44
+ ],
45
+ "i2v": [
46
+ {
47
+ "provider": "minimax_video",
48
+ "model": "MiniMax-Hailuo-2.3-Fast",
49
+ "base_url": "https://api.minimaxi.com/v1",
50
+ "api_key": "sk-cp-DC5lWd2Stt9CBFzLIT2awP4K-ZEn5AkYwjl3Cdj-mIBmgjxod518F2LaVF2L9c35Wv5-Eox0F1ctJD5vXtB9p3OmxoWLd9ge9zIUIMrCVuqBYdL_s6kb8Qs"
51
+ }
52
+ ]
26
53
  }
@@ -102,9 +102,9 @@ def append_log(log_path, entry):
102
102
 
103
103
 
104
104
  def dispatch(resource_type, prompt, **kwargs):
105
- """Route to primary provider, fallback on failure.
105
+ """Route to providers sequentially as a priority array.
106
106
 
107
- Always tries primary first, then fallback if configured.
107
+ Always tries the first provider, then falls back to the next on failure.
108
108
  Returns (files, used_provider_cfg).
109
109
  """
110
110
  cfg = load_config()
@@ -112,9 +112,15 @@ def dispatch(resource_type, prompt, **kwargs):
112
112
  if not resource_cfg:
113
113
  raise ValueError(f"unknown resource type: {resource_type}")
114
114
 
115
- providers = [resource_cfg["primary"]]
116
- if resource_cfg.get("fallback"):
117
- providers.append(resource_cfg["fallback"])
115
+ # 支持旧版 dict(primary/fallback) 过渡到新版 list
116
+ if isinstance(resource_cfg, dict):
117
+ providers = [resource_cfg["primary"]]
118
+ if resource_cfg.get("fallback"):
119
+ providers.append(resource_cfg["fallback"])
120
+ elif isinstance(resource_cfg, list):
121
+ providers = resource_cfg
122
+ else:
123
+ raise ValueError(f"invalid config structure for {resource_type}")
118
124
 
119
125
  sys.path.insert(0, SCRIPT_DIR)
120
126
  from providers import get_adapter
@@ -136,16 +142,23 @@ def dispatch(resource_type, prompt, **kwargs):
136
142
 
137
143
  def main():
138
144
  parser = argparse.ArgumentParser(description="Unified media generation dispatcher")
139
- parser.add_argument("type", choices=["image", "music"], help="Resource type")
140
- parser.add_argument("prompt", help="Generation prompt")
141
- parser.add_argument("--aspect-ratio", default="1:1", help="Image aspect ratio (image only)")
145
+ parser.add_argument("type", choices=["image", "music", "video", "i2v"], help="Resource type")
146
+ parser.add_argument("prompt", nargs="?", default="", help="Generation prompt")
147
+ parser.add_argument("--aspect-ratio", default="16:9", help="Aspect ratio (image: 1:1; video: 16:9/9:16/4:3/1:1)")
142
148
  parser.add_argument("--lyrics", default=None, help="Lyrics text (music only)")
143
149
  parser.add_argument("--instrumental", action="store_true", help="Instrumental mode (music only)")
150
+ parser.add_argument("--duration", type=int, default=None, help="Video duration in seconds (video/i2v, default 6)")
151
+ parser.add_argument("--resolution", default=None, help="Video resolution: 720P/768P/1080P (video/i2v)")
152
+ parser.add_argument("--first-frame-image", default=None, help="First frame image URL (i2v only, required)")
144
153
  parser.add_argument("--output", default=None, help="Output file path")
145
154
  parser.add_argument("--output-dir", required=True, help="Absolute path to output directory (required)")
146
155
  parser.add_argument("--name", required=True, help="Resource name in Chinese (required, e.g. 日落风景)")
147
156
  args = parser.parse_args()
148
157
 
158
+ # i2v 必须有 --first-frame-image
159
+ if args.type == "i2v" and not args.first_frame_image:
160
+ parser.error("i2v (图生视频) 模式必须提供 --first-frame-image 参数")
161
+
149
162
  out_dir = ensure_output_dir(args.output_dir)
150
163
 
151
164
  # Prepare kwargs
@@ -155,6 +168,28 @@ def main():
155
168
  "aspect_ratio": args.aspect_ratio,
156
169
  }
157
170
  ext = "png"
171
+ elif args.type == "video":
172
+ # 文生视频 — 纯文本,无图片
173
+ kwargs = {
174
+ "out_dir": out_dir,
175
+ "aspect_ratio": args.aspect_ratio,
176
+ }
177
+ if args.duration:
178
+ kwargs["duration"] = args.duration
179
+ if args.resolution:
180
+ kwargs["resolution"] = args.resolution
181
+ ext = "mp4"
182
+ elif args.type == "i2v":
183
+ # 图生视频 — 必须有首帧图片
184
+ kwargs = {
185
+ "out_dir": out_dir,
186
+ "first_frame_image": args.first_frame_image,
187
+ }
188
+ if args.duration:
189
+ kwargs["duration"] = args.duration
190
+ if args.resolution:
191
+ kwargs["resolution"] = args.resolution
192
+ ext = "mp4"
158
193
  else:
159
194
  kwargs = {
160
195
  "out_dir": out_dir,
@@ -0,0 +1,47 @@
1
+ #!/bin/bash
2
+ # 图生视频入口 (Image-to-Video)
3
+ # 模型: MiniMax-Hailuo-2.3-Fast
4
+ # 用法: ./i2v.sh --output-dir /abs/path --name 中文名 --first-frame-image URL ["描述"]
5
+ set -euo pipefail
6
+
7
+ OUTPUT_DIR=""
8
+ NAME=""
9
+ PROMPT=""
10
+ DURATION=""
11
+ RESOLUTION=""
12
+ FIRST_FRAME=""
13
+
14
+ while [ $# -gt 0 ]; do
15
+ case "$1" in
16
+ --output-dir) OUTPUT_DIR="$2"; shift 2 ;;
17
+ --name) NAME="$2"; shift 2 ;;
18
+ --duration) DURATION="$2"; shift 2 ;;
19
+ --resolution) RESOLUTION="$2"; shift 2 ;;
20
+ --first-frame-image) FIRST_FRAME="$2"; shift 2 ;;
21
+ *)
22
+ if [ -z "$PROMPT" ]; then
23
+ PROMPT="$1"
24
+ fi
25
+ shift
26
+ ;;
27
+ esac
28
+ done
29
+
30
+ if [ -z "$OUTPUT_DIR" ] || [ -z "$NAME" ] || [ -z "$FIRST_FRAME" ]; then
31
+ echo "用法: ./i2v.sh --output-dir <绝对路径> --name <中文名> --first-frame-image <图片URL> [\"动作描述\"]"
32
+ echo "示例: ./i2v.sh --output-dir \$(pwd) --name 猫咪跑步 --first-frame-image https://example.com/cat.jpg \"cat running toward camera\""
33
+ exit 1
34
+ fi
35
+
36
+ SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
37
+
38
+ ARGS=()
39
+ ARGS+=("i2v")
40
+ [ -n "$PROMPT" ] && ARGS+=("$PROMPT")
41
+ ARGS+=(--output-dir "$OUTPUT_DIR")
42
+ ARGS+=(--name "$NAME")
43
+ ARGS+=(--first-frame-image "$FIRST_FRAME")
44
+ [ -n "$DURATION" ] && ARGS+=(--duration "$DURATION")
45
+ [ -n "$RESOLUTION" ] && ARGS+=(--resolution "$RESOLUTION")
46
+
47
+ python3 "$SCRIPT_DIR/generate.py" "${ARGS[@]}"
@@ -1,11 +1,17 @@
1
1
  from .vapi_image import VAPIImageAdapter
2
2
  from .minimax_image import MiniMaxImageAdapter
3
3
  from .minimax_music import MiniMaxMusicAdapter
4
+ from .minimax_video import MiniMaxVideoAdapter
5
+ from .jimeng_video import JimengVideoAdapter
6
+ from .jimeng_image import JimengImageAdapter
4
7
 
5
8
  ADAPTERS = {
6
9
  "vapi_image": VAPIImageAdapter(),
7
10
  "minimax_image": MiniMaxImageAdapter(),
8
11
  "minimax_music": MiniMaxMusicAdapter(),
12
+ "minimax_video": MiniMaxVideoAdapter(),
13
+ "jimeng_video": JimengVideoAdapter(),
14
+ "jimeng_image": JimengImageAdapter(),
9
15
  }
10
16
 
11
17
  def get_adapter(name: str):
@@ -13,3 +19,5 @@ def get_adapter(name: str):
13
19
  if not adapter:
14
20
  raise ValueError(f"unknown provider: {name}")
15
21
  return adapter
22
+
23
+
@@ -0,0 +1,75 @@
1
+ """即梦(Jimeng) image adapter — Volcengine SDK Implementation.
2
+ Supports text-to-image via CVProcess."""
3
+ import json
4
+ import os
5
+ import sys
6
+
7
+ from volcengine.visual.VisualService import VisualService
8
+
9
+
10
+ class JimengImageAdapter:
11
+ def generate(self, prompt, config, **kwargs):
12
+ access_key = config["access_key"]
13
+ secret_key = config["secret_key"]
14
+ req_key = config.get("model", "jimeng_t2i_v40")
15
+ out_dir = kwargs["out_dir"]
16
+
17
+ aspect_ratio = kwargs.get("aspect_ratio", "1:1")
18
+
19
+ # 将常见的比例映射为合适的宽高组合 (即梦支持指定 width 和 height)
20
+ # 根据比例简单给一些通用分辨率
21
+ width, height = 2048, 2048
22
+ if aspect_ratio == "16:9":
23
+ width, height = 2560, 1440
24
+ elif aspect_ratio == "9:16":
25
+ width, height = 1440, 2560
26
+ elif aspect_ratio == "4:3":
27
+ width, height = 2304, 1728
28
+ elif aspect_ratio == "3:4":
29
+ width, height = 1728, 2304
30
+
31
+ # ═══ 提交任务 ═══
32
+ vs = VisualService()
33
+ vs.set_ak(access_key)
34
+ vs.set_sk(secret_key)
35
+
36
+ body = {
37
+ "req_key": req_key,
38
+ "prompt": prompt,
39
+ "width": width,
40
+ "height": height,
41
+ }
42
+
43
+ print(f"[jimeng_image] 提交文生图任务 req_key={req_key}...", file=sys.stderr)
44
+ try:
45
+ data = vs.cv_process(body)
46
+ except Exception as e:
47
+ if hasattr(e, 'args') and len(e.args) > 0 and isinstance(e.args[0], bytes):
48
+ try:
49
+ err_json = json.loads(e.args[0].decode('utf-8'))
50
+ raise RuntimeError(f"即梦 API 错误: {err_json}")
51
+ except Exception:
52
+ pass
53
+ raise RuntimeError(f"即梦提交失败: {str(e)}")
54
+
55
+ if data.get("code") != 10000:
56
+ raise RuntimeError(f"即梦 API 错误: {data.get('message')} (code={data.get('code')})")
57
+
58
+ image_urls = data.get("data", {}).get("image_urls", [])
59
+ if not image_urls:
60
+ raise RuntimeError(f"即梦未返回图片数据: {data}")
61
+
62
+ print(f"[jimeng_image] 生成成功, 下载中...", file=sys.stderr)
63
+
64
+ import requests
65
+ saved = []
66
+ for i, url in enumerate(image_urls):
67
+ fname = kwargs.get("output_path") or os.path.join(out_dir, f"image_{i}.png")
68
+ r = requests.get(url, timeout=30)
69
+ r.raise_for_status()
70
+ with open(fname, "wb") as f:
71
+ f.write(r.content)
72
+ print(f"[jimeng_image] 已保存: {fname}", file=sys.stderr)
73
+ saved.append(fname)
74
+
75
+ return saved
@@ -0,0 +1,115 @@
1
+ """即梦(Jimeng) video adapter — Volcengine SDK Implementation.
2
+ Supports text-to-video via CVSync2AsyncSubmitTask."""
3
+ import json
4
+ import os
5
+ import sys
6
+ import time
7
+
8
+ import requests
9
+ from volcengine.visual.VisualService import VisualService
10
+
11
+
12
+ class JimengVideoAdapter:
13
+ POLL_INTERVAL = 10
14
+ MAX_WAIT = 600
15
+
16
+ def generate(self, prompt, config, **kwargs):
17
+ access_key = config["access_key"]
18
+ secret_key = config["secret_key"]
19
+ req_key = config.get("model", "jimeng_t2v_v30")
20
+ out_dir = kwargs["out_dir"]
21
+
22
+ # duration → frames: 5s=121, 10s=241 (assuming default 5s max logic for now, standard expects 121 for 5s)
23
+ duration = int(kwargs.get("duration", 5))
24
+ frames = 24 * duration + 1
25
+
26
+ aspect_ratio = kwargs.get("aspect_ratio", "16:9")
27
+
28
+ # ═══ Step 1: 提交任务 ═══
29
+ vs = VisualService()
30
+ vs.set_ak(access_key)
31
+ vs.set_sk(secret_key)
32
+
33
+ body = {
34
+ "req_key": req_key,
35
+ "prompt": prompt,
36
+ "seed": -1,
37
+ "frames": frames,
38
+ "aspect_ratio": aspect_ratio,
39
+ }
40
+
41
+ print(f"[jimeng_video] 提交文生视频任务 req_key={req_key}...", file=sys.stderr)
42
+ try:
43
+ data = vs.cv_sync2async_submit_task(body)
44
+ except Exception as e:
45
+ # Handle SDK exceptions which wrap the raw response
46
+ if hasattr(e, 'args') and len(e.args) > 0 and isinstance(e.args[0], bytes):
47
+ try:
48
+ err_json = json.loads(e.args[0].decode('utf-8'))
49
+ raise RuntimeError(f"即梦 API 错误: {err_json}")
50
+ except Exception:
51
+ pass
52
+ raise RuntimeError(f"即梦提交失败: {str(e)}")
53
+
54
+ if data.get("code") != 10000:
55
+ raise RuntimeError(f"即梦 API 错误: {data.get('message')} (code={data.get('code')})")
56
+
57
+ task_id = data["data"]["task_id"]
58
+ print(f"[jimeng_video] task_id={task_id}, 开始轮询...", file=sys.stderr)
59
+
60
+ # ═══ Step 2: 轮询状态 ═══
61
+ video_url = None
62
+ elapsed = 0
63
+ while elapsed < self.MAX_WAIT:
64
+ time.sleep(self.POLL_INTERVAL)
65
+ elapsed += self.POLL_INTERVAL
66
+
67
+ query_body = {
68
+ "req_key": req_key,
69
+ "task_id": task_id,
70
+ }
71
+ try:
72
+ qdata = vs.cv_sync2async_get_result(query_body)
73
+ except Exception as e:
74
+ # Same exception handling for polling
75
+ if hasattr(e, 'args') and len(e.args) > 0 and isinstance(e.args[0], bytes):
76
+ try:
77
+ err_json = json.loads(e.args[0].decode('utf-8'))
78
+ if err_json.get("code") in (50500, 50501):
79
+ continue # Server error, can retry
80
+ raise RuntimeError(f"即梦查询失败: {err_json}")
81
+ except Exception:
82
+ pass
83
+ raise RuntimeError(f"即梦查询异常: {str(e)}")
84
+
85
+ if qdata.get("code") != 10000:
86
+ print(f"[jimeng_video] 轮询 {elapsed}s: code={qdata.get('code')} msg={qdata.get('message')}", file=sys.stderr)
87
+ if qdata.get("code") in (50500, 50501):
88
+ continue # 可重试的内部错误
89
+ raise RuntimeError(f"即梦状态获取失败: {qdata.get('message')} (code={qdata.get('code')})")
90
+
91
+ status = qdata.get("data", {}).get("status", "")
92
+ print(f"[jimeng_video] 轮询 {elapsed}s: status={status}", file=sys.stderr)
93
+
94
+ if status == "done":
95
+ video_url = qdata["data"].get("video_url")
96
+ break
97
+ elif status in ("in_queue", "generating"):
98
+ continue
99
+ elif status in ("not_found", "expired"):
100
+ raise RuntimeError(f"即梦任务异常停机: status={status}")
101
+
102
+ if not video_url:
103
+ raise RuntimeError(f"即梦视频生成超时 ({self.MAX_WAIT}s), task_id={task_id}")
104
+
105
+ print(f"[jimeng_video] 生成完成, 下载中...", file=sys.stderr)
106
+
107
+ # ═══ Step 3: 下载视频 ═══
108
+ fname = kwargs.get("output_path") or os.path.join(out_dir, "video.mp4")
109
+ r = requests.get(video_url, timeout=300)
110
+ r.raise_for_status()
111
+ with open(fname, "wb") as f:
112
+ f.write(r.content)
113
+
114
+ print(f"[jimeng_video] 已保存: {fname}", file=sys.stderr)
115
+ return [fname]
@@ -0,0 +1,115 @@
1
+ """MiniMax video adapter — POST /video_generation, poll status, download via file_id.
2
+ Supports both Text-to-Video and Image-to-Video modes."""
3
+ import json
4
+ import os
5
+ import sys
6
+ import time
7
+
8
+ import requests
9
+
10
+
11
+ class MiniMaxVideoAdapter:
12
+ # 轮询间隔和超时
13
+ POLL_INTERVAL = 10 # 每 10 秒查一次
14
+ MAX_WAIT = 600 # 最长等 10 分钟
15
+
16
+ def generate(self, prompt, config, **kwargs):
17
+ base_url = config["base_url"].rstrip("/")
18
+ api_key = config["api_key"]
19
+ model = config["model"]
20
+ out_dir = kwargs["out_dir"]
21
+
22
+ headers = {
23
+ "Authorization": f"Bearer {api_key}",
24
+ "Content-Type": "application/json",
25
+ }
26
+
27
+ # ═══ Step 1: 提交生成任务 ═══
28
+ payload = {
29
+ "model": model,
30
+ }
31
+ # prompt(图生视频时可选,文生视频时必填)
32
+ if prompt:
33
+ payload["prompt"] = prompt
34
+ # 图生视频模式:传入首帧图片
35
+ if kwargs.get("first_frame_image"):
36
+ payload["first_frame_image"] = kwargs["first_frame_image"]
37
+ # 可选参数
38
+ if kwargs.get("duration"):
39
+ payload["duration"] = int(kwargs["duration"])
40
+ if kwargs.get("resolution"):
41
+ payload["resolution"] = kwargs["resolution"]
42
+ if kwargs.get("prompt_optimizer") is not None:
43
+ payload["prompt_optimizer"] = kwargs["prompt_optimizer"]
44
+
45
+ mode = "图生视频" if "first_frame_image" in payload else "文生视频"
46
+ print(f"[minimax_video] 提交{mode}任务 model={model}...", file=sys.stderr)
47
+ resp = requests.post(
48
+ f"{base_url}/video_generation",
49
+ headers=headers,
50
+ json=payload,
51
+ timeout=60,
52
+ )
53
+ resp.raise_for_status()
54
+ data = resp.json()
55
+
56
+ # 检查业务错误
57
+ base_resp = data.get("base_resp", {})
58
+ if base_resp.get("status_code", 0) != 0:
59
+ raise RuntimeError(f"MiniMax error: {base_resp.get('status_msg')}")
60
+
61
+ task_id = data.get("task_id")
62
+ if not task_id:
63
+ raise RuntimeError(f"未返回 task_id: {data}")
64
+
65
+ print(f"[minimax_video] task_id={task_id}, 开始轮询...", file=sys.stderr)
66
+
67
+ # ═══ Step 2: 轮询任务状态 ═══
68
+ file_id = None
69
+ elapsed = 0
70
+ while elapsed < self.MAX_WAIT:
71
+ time.sleep(self.POLL_INTERVAL)
72
+ elapsed += self.POLL_INTERVAL
73
+
74
+ query_resp = requests.get(
75
+ f"{base_url}/query/video_generation",
76
+ headers={"Authorization": f"Bearer {api_key}"},
77
+ params={"task_id": task_id},
78
+ timeout=30,
79
+ )
80
+ query_resp.raise_for_status()
81
+ qdata = query_resp.json()
82
+ status = qdata.get("status", "")
83
+ print(f"[minimax_video] 轮询 {elapsed}s: status={status}", file=sys.stderr)
84
+
85
+ if status == "Success":
86
+ file_id = qdata.get("file_id")
87
+ break
88
+ elif status == "Fail":
89
+ err_msg = qdata.get("error_message", "未知错误")
90
+ raise RuntimeError(f"视频生成失败: {err_msg}")
91
+ # Processing / Queueing → 继续等
92
+
93
+ if not file_id:
94
+ raise RuntimeError(f"视频生成超时 ({self.MAX_WAIT}s), task_id={task_id}")
95
+
96
+ print(f"[minimax_video] 生成完成 file_id={file_id}, 下载中...", file=sys.stderr)
97
+
98
+ # ═══ Step 3: 获取下载 URL 并下载 ═══
99
+ file_resp = requests.get(
100
+ f"{base_url}/files/retrieve",
101
+ headers={"Authorization": f"Bearer {api_key}"},
102
+ params={"file_id": file_id},
103
+ timeout=30,
104
+ )
105
+ file_resp.raise_for_status()
106
+ download_url = file_resp.json()["file"]["download_url"]
107
+
108
+ fname = kwargs.get("output_path") or os.path.join(out_dir, "video.mp4")
109
+ r = requests.get(download_url, timeout=300)
110
+ r.raise_for_status()
111
+ with open(fname, "wb") as f:
112
+ f.write(r.content)
113
+
114
+ print(f"[minimax_video] 已保存: {fname}", file=sys.stderr)
115
+ return [fname]
@@ -0,0 +1,47 @@
1
+ #!/bin/bash
2
+ # 文生视频入口 (Text-to-Video)
3
+ # 主力模型: MiniMax-Hailuo-2.3 | 降级: 即梦 jimeng_t2v_v30
4
+ # 用法: ./video.sh --output-dir /abs/path --name 中文名 "描述" [--duration 6] [--resolution 768P] [--aspect-ratio 16:9]
5
+ set -euo pipefail
6
+
7
+ OUTPUT_DIR=""
8
+ NAME=""
9
+ PROMPT=""
10
+ DURATION=""
11
+ RESOLUTION=""
12
+ ASPECT_RATIO=""
13
+
14
+ while [ $# -gt 0 ]; do
15
+ case "$1" in
16
+ --output-dir) OUTPUT_DIR="$2"; shift 2 ;;
17
+ --name) NAME="$2"; shift 2 ;;
18
+ --duration) DURATION="$2"; shift 2 ;;
19
+ --resolution) RESOLUTION="$2"; shift 2 ;;
20
+ --aspect-ratio) ASPECT_RATIO="$2"; shift 2 ;;
21
+ *)
22
+ if [ -z "$PROMPT" ]; then
23
+ PROMPT="$1"
24
+ fi
25
+ shift
26
+ ;;
27
+ esac
28
+ done
29
+
30
+ if [ -z "$OUTPUT_DIR" ] || [ -z "$NAME" ] || [ -z "$PROMPT" ]; then
31
+ echo "用法: ./video.sh --output-dir <绝对路径> --name <中文名> \"视频描述\""
32
+ echo "示例: ./video.sh --output-dir /root/.openclaw/workspace --name 日落延时 \"a sunset timelapse\""
33
+ exit 1
34
+ fi
35
+
36
+ SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
37
+
38
+ ARGS=()
39
+ ARGS+=("video")
40
+ ARGS+=("$PROMPT")
41
+ ARGS+=(--output-dir "$OUTPUT_DIR")
42
+ ARGS+=(--name "$NAME")
43
+ [ -n "$DURATION" ] && ARGS+=(--duration "$DURATION")
44
+ [ -n "$RESOLUTION" ] && ARGS+=(--resolution "$RESOLUTION")
45
+ [ -n "$ASPECT_RATIO" ] && ARGS+=(--aspect-ratio "$ASPECT_RATIO")
46
+
47
+ python3 "$SCRIPT_DIR/generate.py" "${ARGS[@]}"