@aiyiran/myclaw 1.0.244 → 1.0.246
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -290,17 +290,27 @@
|
|
|
290
290
|
});
|
|
291
291
|
}
|
|
292
292
|
|
|
293
|
+
function fetchArtifactsFromServerAPI(wsPrefix) {
|
|
294
|
+
var url = window.location.origin + '/cmd/api/preview?path=' + encodeURIComponent(wsPrefix + '/.myclaw/__MY_ARTIFACTS__.json');
|
|
295
|
+
return fetch(url).then(function (res) {
|
|
296
|
+
if (!res.ok) throw new Error('HTTP ' + res.status);
|
|
297
|
+
return res.json();
|
|
298
|
+
});
|
|
299
|
+
}
|
|
300
|
+
|
|
293
301
|
function fetchArtifacts(contentEl) {
|
|
302
|
+
// cachedConfig 未就绪时跳过,等 initConfig 完成后的 startPolling 重试
|
|
303
|
+
if (!cachedConfig) return;
|
|
304
|
+
|
|
294
305
|
var wsPrefix = getWorkspaceId();
|
|
295
306
|
var fetcher;
|
|
296
307
|
|
|
297
308
|
if (envInfo && envInfo.remote) {
|
|
298
|
-
//
|
|
299
|
-
fetcher =
|
|
309
|
+
// 远程服务器 → 走 /cmd/api(服务器直接提供 JSON)
|
|
310
|
+
fetcher = fetchArtifactsFromServerAPI(wsPrefix);
|
|
300
311
|
} else {
|
|
301
|
-
// 本地环境 →
|
|
302
|
-
fetcher =
|
|
303
|
-
.catch(function () { return fetchArtifactsFromCDN(wsPrefix); });
|
|
312
|
+
// 本地环境 → 走 CDN
|
|
313
|
+
fetcher = fetchArtifactsFromCDN(wsPrefix);
|
|
304
314
|
}
|
|
305
315
|
|
|
306
316
|
fetcher
|
package/assets/myclaw-inject.js
CHANGED
|
@@ -27,6 +27,7 @@
|
|
|
27
27
|
var committedText = ""; // 已经提交到 textarea 的文字(上一轮累积)
|
|
28
28
|
var cursorOffset = 0; // 录音开始时光标在 textarea 中的位置
|
|
29
29
|
var injected = false;
|
|
30
|
+
var stopping = false; // 正在等待最终识别结果(stopVoice 的 2 秒窗口)
|
|
30
31
|
|
|
31
32
|
// ═══ 1. 右下角版本标签(点击测试麦克风) ═══
|
|
32
33
|
function createVersionBar() {
|
|
@@ -323,7 +324,8 @@
|
|
|
323
324
|
'</svg>',
|
|
324
325
|
].join("");
|
|
325
326
|
|
|
326
|
-
|
|
327
|
+
btn.addEventListener("click", function () {
|
|
328
|
+
console.log("[myclaw-voice] 按钮点击, recording=", recording);
|
|
327
329
|
if (recording) {
|
|
328
330
|
stopVoice();
|
|
329
331
|
} else {
|
|
@@ -367,8 +369,8 @@
|
|
|
367
369
|
|
|
368
370
|
voice = new window.VoiceInput({
|
|
369
371
|
onResult: function (text) {
|
|
370
|
-
//
|
|
371
|
-
if (!recording) return;
|
|
372
|
+
// 完全停止后才忽略;stopping 期间(2秒等待窗口)仍允许写入
|
|
373
|
+
if (!recording && !stopping) return;
|
|
372
374
|
// 讯飞实时返回识别文字,替换到光标位置
|
|
373
375
|
pendingText = text;
|
|
374
376
|
updateTextAtCursor(pendingText);
|
|
@@ -377,7 +379,7 @@
|
|
|
377
379
|
onStatusChange: function (oldStatus, newStatus) {
|
|
378
380
|
console.log("[myclaw-voice] \u72b6\u6001:", oldStatus, "->", newStatus);
|
|
379
381
|
|
|
380
|
-
if (newStatus === "idle" && recording) {
|
|
382
|
+
if (newStatus === "idle" && recording && !stopping) {
|
|
381
383
|
// 讯飞 60 秒断开,但用户没有点停止 → 自动重连
|
|
382
384
|
// 把当前识别的文字提交,并更新光标位置
|
|
383
385
|
committedText = getTextareaValue();
|
|
@@ -435,23 +437,54 @@
|
|
|
435
437
|
console.log("[myclaw-voice] \u5f00\u59cb\u5f55\u97f3\uff0c\u5149\u6807\u4f4d\u7f6e:", cursorOffset);
|
|
436
438
|
}
|
|
437
439
|
|
|
438
|
-
|
|
439
|
-
|
|
440
|
+
|
|
441
|
+
/**
|
|
442
|
+
* 语音录入结束时,等待 2 秒后关闭录音资源
|
|
443
|
+
* @param {Function} [onDone] - 等待完成后执行的回调(如发送)
|
|
444
|
+
*/
|
|
445
|
+
var voiceStopTimer = null;
|
|
446
|
+
|
|
447
|
+
function stopVoice(onDone) {
|
|
448
|
+
console.log("[myclaw-voice] stopVoice called, recording=", recording, "onDone=", onDone ? "yes" : "no");
|
|
449
|
+
if (!recording) {
|
|
450
|
+
console.log("[myclaw-voice] stopVoice early return — not recording");
|
|
451
|
+
return;
|
|
452
|
+
}
|
|
453
|
+
|
|
454
|
+
// 进入 stopping 态:UI 立即更新,但 onResult 仍允许在 2 秒内写入文字
|
|
455
|
+
stopping = true;
|
|
440
456
|
recording = false;
|
|
441
457
|
updateButtonUI();
|
|
458
|
+
console.log("[myclaw-voice] stopping=true, UI updated, starting 2s timer...");
|
|
442
459
|
|
|
443
|
-
//
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
voice.stop();
|
|
460
|
+
// 延迟 2 秒后关闭录音资源(等讯飞把剩余识别结果全部推过来)
|
|
461
|
+
if (voiceStopTimer) {
|
|
462
|
+
console.log("[myclaw-voice] clearing previous timer");
|
|
463
|
+
clearTimeout(voiceStopTimer);
|
|
448
464
|
}
|
|
465
|
+
voiceStopTimer = setTimeout(function () {
|
|
466
|
+
voiceStopTimer = null;
|
|
467
|
+
stopping = false;
|
|
468
|
+
console.log("[myclaw-voice] 2s timer fired, closing resources...");
|
|
469
|
+
|
|
470
|
+
// 快照当前 textarea 值(2 秒内 onResult 可能已更新)
|
|
471
|
+
var finalText = getTextareaValue();
|
|
472
|
+
console.log("[myclaw-voice] finalText:", JSON.stringify(finalText.substring(0, 50)));
|
|
473
|
+
if (voice) {
|
|
474
|
+
console.log("[myclaw-voice] calling voice.stop()");
|
|
475
|
+
voice.stop();
|
|
476
|
+
}
|
|
477
|
+
committedText = finalText;
|
|
478
|
+
pendingText = "";
|
|
449
479
|
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
|
|
480
|
+
// 等待完成后执行回调(如发送)
|
|
481
|
+
if (onDone) {
|
|
482
|
+
console.log("[myclaw-voice] executing onDone callback...");
|
|
483
|
+
onDone();
|
|
484
|
+
}
|
|
453
485
|
|
|
454
|
-
|
|
486
|
+
console.log("[myclaw-voice] 停止录音完成");
|
|
487
|
+
}, 2000);
|
|
455
488
|
}
|
|
456
489
|
|
|
457
490
|
// ═══ 5. DOM 注入 ═══
|
|
@@ -522,49 +555,26 @@
|
|
|
522
555
|
}
|
|
523
556
|
// ═══ 6. 拦截发送按钮 ═══
|
|
524
557
|
|
|
525
|
-
var voiceEnterListening = false;
|
|
526
|
-
var voiceEnterTimeout = null;
|
|
527
|
-
|
|
528
558
|
/**
|
|
529
|
-
* 拦截 Enter 键:语音态下按回车 →
|
|
559
|
+
* 拦截 Enter 键:语音态下按回车 → 等待 2 秒后发送
|
|
530
560
|
*/
|
|
531
561
|
function hookVoiceEnter() {
|
|
532
|
-
if (voiceEnterListening) return;
|
|
533
|
-
voiceEnterListening = true;
|
|
534
|
-
|
|
535
562
|
document.addEventListener("keydown", function (e) {
|
|
536
|
-
// 只拦截 textarea 上的 Enter
|
|
537
563
|
if (e.key !== "Enter") return;
|
|
538
|
-
var ta = e.target.closest ? e.target.closest(".agent-chat__input textarea") : null;
|
|
539
|
-
if (!ta) return;
|
|
540
564
|
if (!recording) return;
|
|
541
565
|
|
|
542
|
-
//
|
|
566
|
+
// 语音录入中,无论焦点在哪里(textarea 或语音按钮),Enter 统一触发"停止并发送"
|
|
543
567
|
e.preventDefault();
|
|
544
568
|
e.stopPropagation();
|
|
545
569
|
|
|
546
|
-
|
|
547
|
-
|
|
548
|
-
|
|
549
|
-
try {
|
|
550
|
-
ta.setSelectionRange(ta.value.length, ta.value.length);
|
|
551
|
-
} catch (err) {}
|
|
552
|
-
|
|
553
|
-
// 2) 停止录音
|
|
554
|
-
stopVoice();
|
|
555
|
-
|
|
556
|
-
// 3) 2秒后自动点击发送按钮
|
|
557
|
-
if (voiceEnterTimeout) clearTimeout(voiceEnterTimeout);
|
|
558
|
-
voiceEnterTimeout = setTimeout(function () {
|
|
559
|
-
voiceEnterTimeout = null;
|
|
570
|
+
console.log("[myclaw-voice] Enter按下, recording=", recording);
|
|
571
|
+
stopVoice(function () {
|
|
572
|
+
console.log("[myclaw-voice] Enter stopVoice callback firing...");
|
|
560
573
|
var sendBtn = document.querySelector("button.chat-send-btn, button[title=\"Send\"]");
|
|
561
|
-
if (sendBtn)
|
|
562
|
-
|
|
563
|
-
}
|
|
564
|
-
console.log("[myclaw-voice] Enter\u89E6\u53D1\u81EA\u52A8\u53D1\u9001");
|
|
565
|
-
}, 2000);
|
|
574
|
+
if (sendBtn) sendBtn.click();
|
|
575
|
+
});
|
|
566
576
|
|
|
567
|
-
}, true);
|
|
577
|
+
}, true);
|
|
568
578
|
}
|
|
569
579
|
|
|
570
580
|
var sendHooked = false;
|
|
@@ -581,9 +591,17 @@
|
|
|
581
591
|
var text = getTextareaValue();
|
|
582
592
|
if (!text || !text.trim()) return; // 空文字不处理
|
|
583
593
|
|
|
584
|
-
// 1)
|
|
594
|
+
// 1) 停止语音输入(等待 2 秒后关闭,关闭后触发发送)
|
|
585
595
|
if (recording) {
|
|
586
|
-
|
|
596
|
+
e.preventDefault();
|
|
597
|
+
e.stopPropagation();
|
|
598
|
+
console.log("[myclaw-voice] 发送按钮点击(语音态), recording=", recording);
|
|
599
|
+
stopVoice(function () {
|
|
600
|
+
console.log("[myclaw-voice] 发送按钮 stopVoice callback firing...");
|
|
601
|
+
var sendBtn = document.querySelector("button.chat-send-btn, button[title=\"Send\"]");
|
|
602
|
+
if (sendBtn) sendBtn.click();
|
|
603
|
+
});
|
|
604
|
+
return;
|
|
587
605
|
}
|
|
588
606
|
|
|
589
607
|
// 2) 复制到剪贴板
|
|
@@ -591,7 +609,6 @@
|
|
|
591
609
|
navigator.clipboard.writeText(text).then(function () {
|
|
592
610
|
console.log("[myclaw-send] 📋 已复制到剪贴板:", text.substring(0, 50) + (text.length > 50 ? "..." : ""));
|
|
593
611
|
}).catch(function () {
|
|
594
|
-
// fallback: 老方法
|
|
595
612
|
fallbackCopy(text);
|
|
596
613
|
});
|
|
597
614
|
} catch (ex) {
|
|
@@ -599,9 +616,8 @@
|
|
|
599
616
|
}
|
|
600
617
|
|
|
601
618
|
// 3) 让原生 click 继续走(发送消息)
|
|
602
|
-
// 不 preventDefault,不 stopPropagation
|
|
603
619
|
|
|
604
|
-
// 4) 延迟清空 textarea
|
|
620
|
+
// 4) 延迟清空 textarea
|
|
605
621
|
setTimeout(function () {
|
|
606
622
|
setTextareaValue("");
|
|
607
623
|
committedText = "";
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
---
|
|
2
2
|
name: yiran-skill-media
|
|
3
|
-
description: 统一多媒体生成技能。支持图片、音乐、文生视频和图生视频,按资源类型自动路由到最优 provider
|
|
3
|
+
description: 统一多媒体生成技能。支持图片、音乐、文生视频和图生视频,按资源类型自动路由到最优 provider,支持多级降级。资源生成规范:所有生成的资源必须存放在当前工作目录下,调用时通过 --output-dir 传入当前工作目录的绝对路径,通过 --name 传入资源的中文名称。
|
|
4
4
|
---
|
|
5
5
|
|
|
6
6
|
# 统一多媒体生成
|
|
@@ -22,7 +22,7 @@ description: 统一多媒体生成技能。支持图片、音乐、文生视频
|
|
|
22
22
|
./music.sh --output-dir "$(pwd)" --name 开场音乐 "epic opening" [--instrumental]
|
|
23
23
|
|
|
24
24
|
# 文生视频 (Text-to-Video)
|
|
25
|
-
./video.sh --output-dir "$(pwd)" --name 日落延时 "a sunset timelapse" [--duration 6] [--resolution 768P]
|
|
25
|
+
./video.sh --output-dir "$(pwd)" --name 日落延时 "a sunset timelapse" [--duration 6] [--resolution 768P] [--aspect-ratio 16:9]
|
|
26
26
|
|
|
27
27
|
# 图生视频 (Image-to-Video)
|
|
28
28
|
./i2v.sh --output-dir "$(pwd)" --name 猫咪跑步 --first-frame-image "https://example.com/cat.jpg" "cat running toward camera"
|
|
@@ -34,17 +34,17 @@ description: 统一多媒体生成技能。支持图片、音乐、文生视频
|
|
|
34
34
|
|
|
35
35
|
| 参数 | 必填 | 说明 |
|
|
36
36
|
|------|------|------|
|
|
37
|
-
| `--output-dir` | 是 |
|
|
37
|
+
| `--output-dir` | 是 | 输出目录的绝对路径 |
|
|
38
38
|
| `--name` | 是 | 资源中文名称(如:日落风景、产品封面) |
|
|
39
39
|
| `prompt` | 是 | 图片描述 |
|
|
40
|
-
| `--aspect-ratio` | 否 | 比例,默认 1:1
|
|
40
|
+
| `--aspect-ratio` | 否 | 比例,默认 16:9。可选:1:1, 16:9, 9:16, 4:3, 3:4 等 |
|
|
41
41
|
|
|
42
42
|
### music.sh
|
|
43
43
|
|
|
44
44
|
| 参数 | 必填 | 说明 |
|
|
45
45
|
|------|------|------|
|
|
46
|
-
| `--output-dir` | 是 |
|
|
47
|
-
| `--name` | 是 |
|
|
46
|
+
| `--output-dir` | 是 | 输出目录的绝对路径 |
|
|
47
|
+
| `--name` | 是 | 资源中文名称 |
|
|
48
48
|
| `prompt` | 是 | 音乐风格/情绪描述 |
|
|
49
49
|
| `--lyrics` | 否 | 歌词文本 |
|
|
50
50
|
| `--instrumental` | 否 | 纯音乐模式 |
|
|
@@ -54,21 +54,22 @@ description: 统一多媒体生成技能。支持图片、音乐、文生视频
|
|
|
54
54
|
| 参数 | 必填 | 说明 |
|
|
55
55
|
|------|------|------|
|
|
56
56
|
| `--output-dir` | 是 | 输出目录的绝对路径 |
|
|
57
|
-
| `--name` | 是 |
|
|
57
|
+
| `--name` | 是 | 资源中文名称 |
|
|
58
58
|
| `prompt` | 是 | 视频内容描述(支持运镜指令如 `[推进]`、`[左摇]`) |
|
|
59
59
|
| `--duration` | 否 | 视频时长(秒),默认 6,可选 6 或 10 |
|
|
60
|
-
| `--resolution` | 否 |
|
|
60
|
+
| `--resolution` | 否 | MiniMax 分辨率:768P/1080P(默认 768P) |
|
|
61
|
+
| `--aspect-ratio` | 否 | 即梦降级时使用:16:9/9:16/4:3/1:1(默认 16:9) |
|
|
61
62
|
|
|
62
63
|
### i2v.sh — 图生视频
|
|
63
64
|
|
|
64
65
|
| 参数 | 必填 | 说明 |
|
|
65
66
|
|------|------|------|
|
|
66
67
|
| `--output-dir` | 是 | 输出目录的绝对路径 |
|
|
67
|
-
| `--name` | 是 |
|
|
68
|
+
| `--name` | 是 | 资源中文名称 |
|
|
68
69
|
| `--first-frame-image` | 是 | 首帧图片的公网 URL(JPG/PNG/WebP,<20MB) |
|
|
69
70
|
| `prompt` | 否 | 基于首帧图像的动作/变化描述 |
|
|
70
|
-
| `--duration` | 否 |
|
|
71
|
-
| `--resolution` | 否 | 分辨率,默认 768P
|
|
71
|
+
| `--duration` | 否 | 视频时长,默认 6 |
|
|
72
|
+
| `--resolution` | 否 | 分辨率,默认 768P |
|
|
72
73
|
|
|
73
74
|
**注意**:视频生成为异步任务,耗时较长(通常 1-5 分钟),脚本会自动轮询等待完成。
|
|
74
75
|
|
|
@@ -79,24 +80,27 @@ image.sh → 图片生成入口
|
|
|
79
80
|
music.sh → 音乐生成入口
|
|
80
81
|
video.sh → 文生视频入口 (Text-to-Video)
|
|
81
82
|
i2v.sh → 图生视频入口 (Image-to-Video)
|
|
82
|
-
generate.py →
|
|
83
|
-
config.json → provider
|
|
83
|
+
generate.py → 统一路由调度(优先级数组,依次尝试)
|
|
84
|
+
config.json → provider 配置中心(优先级列表)
|
|
84
85
|
providers/
|
|
85
86
|
vapi_image.py → VAPI 图片
|
|
86
87
|
minimax_image.py → MiniMax 图片
|
|
88
|
+
jimeng_image.py → 即梦 图片 4.0(异步)
|
|
87
89
|
minimax_music.py → MiniMax 音乐
|
|
88
90
|
minimax_video.py → MiniMax 视频(文生+图生)
|
|
91
|
+
jimeng_video.py → 即梦 视频 3.0(异步)
|
|
89
92
|
```
|
|
90
93
|
|
|
91
|
-
## Provider
|
|
94
|
+
## Provider 配置(优先级列表)
|
|
92
95
|
|
|
93
|
-
|
|
96
|
+
`config.json` 中每个资源类型是一个数组,按优先级从高到低排列。
|
|
97
|
+
第一个失败自动尝试下一个,直到成功或全部失败。
|
|
94
98
|
|
|
95
99
|
当前配置:
|
|
96
|
-
-
|
|
97
|
-
-
|
|
98
|
-
-
|
|
99
|
-
-
|
|
100
|
+
- **图片**:① VAPI (nano-banana-2) → ② MiniMax (image-01) → ③ 即梦 (jimeng_t2i_v40)
|
|
101
|
+
- **音乐**:① MiniMax (music-2.6)
|
|
102
|
+
- **文生视频**:① MiniMax (MiniMax-Hailuo-2.3) → ② 即梦 (jimeng_t2v_v30)
|
|
103
|
+
- **图生视频**:① MiniMax (MiniMax-Hailuo-2.3-Fast)
|
|
100
104
|
|
|
101
105
|
## 详细 API 参考
|
|
102
106
|
|
|
@@ -1,6 +1,12 @@
|
|
|
1
1
|
{
|
|
2
2
|
"output_dir": "media",
|
|
3
3
|
"image": [
|
|
4
|
+
{
|
|
5
|
+
"provider": "jimeng_image",
|
|
6
|
+
"model": "jimeng_t2i_v40",
|
|
7
|
+
"access_key": "AKLTYjZkY2FiZmZkYWU5NDkxNmEwZjNlYTRjNmRlZmYwNDI",
|
|
8
|
+
"secret_key": "TjJGbU5HVTBZek14TnpFeE5HWTVOVGhsTURRNE9XRXhNR1JoTm1FeVlqaw=="
|
|
9
|
+
},
|
|
4
10
|
{
|
|
5
11
|
"provider": "vapi_image",
|
|
6
12
|
"model": "nano-banana-2",
|
|
@@ -12,12 +18,6 @@
|
|
|
12
18
|
"model": "image-01",
|
|
13
19
|
"base_url": "https://api.minimaxi.com/v1",
|
|
14
20
|
"api_key": "sk-cp-DC5lWd2Stt9CBFzLIT2awP4K-ZEn5AkYwjl3Cdj-mIBmgjxod518F2LaVF2L9c35Wv5-Eox0F1ctJD5vXtB9p3OmxoWLd9ge9zIUIMrCVuqBYdL_s6kb8Qs"
|
|
15
|
-
},
|
|
16
|
-
{
|
|
17
|
-
"provider": "jimeng_image",
|
|
18
|
-
"model": "jimeng_t2i_v40",
|
|
19
|
-
"access_key": "AKLTYjZkY2FiZmZkYWU5NDkxNmEwZjNlYTRjNmRlZmYwNDI",
|
|
20
|
-
"secret_key": "TjJGbU5HVTBZek14TnpFeE5HWTVOVGhsTURRNE9XRXhNR1JoTm1FeVlqaw=="
|
|
21
21
|
}
|
|
22
22
|
],
|
|
23
23
|
"music": [
|
|
@@ -1,53 +1,71 @@
|
|
|
1
|
-
"""即梦(Jimeng) image adapter — Volcengine SDK
|
|
2
|
-
|
|
1
|
+
"""即梦(Jimeng) image adapter — Volcengine SDK, async workflow.
|
|
2
|
+
Uses CVSync2AsyncSubmitTask → CVSync2AsyncGetResult polling.
|
|
3
|
+
Supports text-to-image via jimeng_t2i_v40."""
|
|
3
4
|
import json
|
|
4
5
|
import os
|
|
5
6
|
import sys
|
|
7
|
+
import time
|
|
6
8
|
|
|
9
|
+
import requests
|
|
7
10
|
from volcengine.visual.VisualService import VisualService
|
|
8
11
|
|
|
9
12
|
|
|
10
13
|
class JimengImageAdapter:
|
|
14
|
+
POLL_INTERVAL = 5 # 图片生成比视频快,5 秒轮询
|
|
15
|
+
MAX_WAIT = 120 # 最长等 2 分钟
|
|
16
|
+
|
|
17
|
+
# 比例 → 推荐的 2K 分辨率(官方文档中的推荐值)
|
|
18
|
+
RATIO_MAP = {
|
|
19
|
+
"1:1": (2048, 2048),
|
|
20
|
+
"4:3": (2304, 1728),
|
|
21
|
+
"3:4": (1728, 2304),
|
|
22
|
+
"3:2": (2496, 1664),
|
|
23
|
+
"2:3": (1664, 2496),
|
|
24
|
+
"16:9": (2560, 1440),
|
|
25
|
+
"9:16": (1440, 2560),
|
|
26
|
+
"21:9": (3024, 1296),
|
|
27
|
+
"9:21": (1296, 3024),
|
|
28
|
+
}
|
|
29
|
+
|
|
11
30
|
def generate(self, prompt, config, **kwargs):
|
|
12
31
|
access_key = config["access_key"]
|
|
13
32
|
secret_key = config["secret_key"]
|
|
14
33
|
req_key = config.get("model", "jimeng_t2i_v40")
|
|
15
34
|
out_dir = kwargs["out_dir"]
|
|
16
|
-
|
|
17
35
|
aspect_ratio = kwargs.get("aspect_ratio", "1:1")
|
|
18
36
|
|
|
19
|
-
# 将常见的比例映射为合适的宽高组合 (即梦支持指定 width 和 height)
|
|
20
|
-
# 根据比例简单给一些通用分辨率
|
|
21
|
-
width, height = 2048, 2048
|
|
22
|
-
if aspect_ratio == "16:9":
|
|
23
|
-
width, height = 2560, 1440
|
|
24
|
-
elif aspect_ratio == "9:16":
|
|
25
|
-
width, height = 1440, 2560
|
|
26
|
-
elif aspect_ratio == "4:3":
|
|
27
|
-
width, height = 2304, 1728
|
|
28
|
-
elif aspect_ratio == "3:4":
|
|
29
|
-
width, height = 1728, 2304
|
|
30
|
-
|
|
31
|
-
# ═══ 提交任务 ═══
|
|
32
37
|
vs = VisualService()
|
|
33
38
|
vs.set_ak(access_key)
|
|
34
39
|
vs.set_sk(secret_key)
|
|
35
40
|
|
|
41
|
+
# ═══ Step 1: 提交任务 ═══
|
|
36
42
|
body = {
|
|
37
43
|
"req_key": req_key,
|
|
38
44
|
"prompt": prompt,
|
|
39
|
-
"
|
|
40
|
-
"height": height,
|
|
45
|
+
"force_single": True, # 强制输出单图,确保智能体调用行为可预测
|
|
41
46
|
}
|
|
42
47
|
|
|
43
|
-
|
|
48
|
+
# 宽高映射
|
|
49
|
+
if aspect_ratio in self.RATIO_MAP:
|
|
50
|
+
w, h = self.RATIO_MAP[aspect_ratio]
|
|
51
|
+
body["width"] = w
|
|
52
|
+
body["height"] = h
|
|
53
|
+
else:
|
|
54
|
+
# 不传 width/height,让模型根据 prompt 自动判断
|
|
55
|
+
body["size"] = 2048 * 2048 # 2K 默认面积
|
|
56
|
+
|
|
57
|
+
print(f"[jimeng_image] 提交文生图任务 req_key={req_key} ratio={aspect_ratio}...", file=sys.stderr)
|
|
44
58
|
try:
|
|
45
|
-
data = vs.
|
|
59
|
+
data = vs.cv_sync2async_submit_task(body)
|
|
46
60
|
except Exception as e:
|
|
47
61
|
if hasattr(e, 'args') and len(e.args) > 0 and isinstance(e.args[0], bytes):
|
|
48
62
|
try:
|
|
49
63
|
err_json = json.loads(e.args[0].decode('utf-8'))
|
|
50
|
-
|
|
64
|
+
code = err_json.get("code", 0)
|
|
65
|
+
msg = err_json.get("message", "")
|
|
66
|
+
raise RuntimeError(f"即梦 API 错误: {msg} (code={code})")
|
|
67
|
+
except RuntimeError:
|
|
68
|
+
raise
|
|
51
69
|
except Exception:
|
|
52
70
|
pass
|
|
53
71
|
raise RuntimeError(f"即梦提交失败: {str(e)}")
|
|
@@ -55,21 +73,86 @@ class JimengImageAdapter:
|
|
|
55
73
|
if data.get("code") != 10000:
|
|
56
74
|
raise RuntimeError(f"即梦 API 错误: {data.get('message')} (code={data.get('code')})")
|
|
57
75
|
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
76
|
+
task_id = data["data"]["task_id"]
|
|
77
|
+
print(f"[jimeng_image] task_id={task_id}, 开始轮询...", file=sys.stderr)
|
|
78
|
+
|
|
79
|
+
# ═══ Step 2: 轮询状态 ═══
|
|
80
|
+
image_urls = None
|
|
81
|
+
binary_data = None
|
|
82
|
+
elapsed = 0
|
|
83
|
+
while elapsed < self.MAX_WAIT:
|
|
84
|
+
time.sleep(self.POLL_INTERVAL)
|
|
85
|
+
elapsed += self.POLL_INTERVAL
|
|
86
|
+
|
|
87
|
+
query_body = {
|
|
88
|
+
"req_key": req_key,
|
|
89
|
+
"task_id": task_id,
|
|
90
|
+
"req_json": json.dumps({"return_url": True}),
|
|
91
|
+
}
|
|
92
|
+
try:
|
|
93
|
+
qdata = vs.cv_sync2async_get_result(query_body)
|
|
94
|
+
except Exception as e:
|
|
95
|
+
if hasattr(e, 'args') and len(e.args) > 0 and isinstance(e.args[0], bytes):
|
|
96
|
+
try:
|
|
97
|
+
err_json = json.loads(e.args[0].decode('utf-8'))
|
|
98
|
+
code = err_json.get("code", 0)
|
|
99
|
+
if code in (50429, 50430, 50500, 50501):
|
|
100
|
+
print(f"[jimeng_image] 轮询 {elapsed}s: 可重试错误 code={code}", file=sys.stderr)
|
|
101
|
+
continue
|
|
102
|
+
raise RuntimeError(f"即梦查询失败: {err_json.get('message')} (code={code})")
|
|
103
|
+
except RuntimeError:
|
|
104
|
+
raise
|
|
105
|
+
except Exception:
|
|
106
|
+
pass
|
|
107
|
+
raise RuntimeError(f"即梦查询异常: {str(e)}")
|
|
61
108
|
|
|
62
|
-
|
|
109
|
+
if qdata.get("code") != 10000:
|
|
110
|
+
print(f"[jimeng_image] 轮询 {elapsed}s: code={qdata.get('code')} msg={qdata.get('message')}", file=sys.stderr)
|
|
111
|
+
if qdata.get("code") in (50429, 50430, 50500, 50501):
|
|
112
|
+
continue
|
|
113
|
+
raise RuntimeError(f"即梦状态获取失败: {qdata.get('message')} (code={qdata.get('code')})")
|
|
63
114
|
|
|
64
|
-
|
|
115
|
+
status = qdata.get("data", {}).get("status", "")
|
|
116
|
+
print(f"[jimeng_image] 轮询 {elapsed}s: status={status}", file=sys.stderr)
|
|
117
|
+
|
|
118
|
+
if status == "done":
|
|
119
|
+
image_urls = qdata["data"].get("image_urls", [])
|
|
120
|
+
binary_data = qdata["data"].get("binary_data_base64", [])
|
|
121
|
+
break
|
|
122
|
+
elif status in ("in_queue", "generating"):
|
|
123
|
+
continue
|
|
124
|
+
elif status in ("not_found", "expired"):
|
|
125
|
+
raise RuntimeError(f"即梦任务异常: status={status}")
|
|
126
|
+
|
|
127
|
+
if not image_urls and not binary_data:
|
|
128
|
+
raise RuntimeError(f"即梦图片生成超时 ({self.MAX_WAIT}s), task_id={task_id}")
|
|
129
|
+
|
|
130
|
+
print(f"[jimeng_image] 生成完成, 下载中...", file=sys.stderr)
|
|
131
|
+
|
|
132
|
+
# ═══ Step 3: 下载图片 ═══
|
|
133
|
+
import base64
|
|
65
134
|
saved = []
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
135
|
+
|
|
136
|
+
if image_urls:
|
|
137
|
+
for i, url in enumerate(image_urls):
|
|
138
|
+
fname = kwargs.get("output_path") or os.path.join(out_dir, f"image_{i}.png")
|
|
139
|
+
r = requests.get(url, timeout=60)
|
|
140
|
+
r.raise_for_status()
|
|
141
|
+
with open(fname, "wb") as f:
|
|
142
|
+
f.write(r.content)
|
|
143
|
+
saved.append(fname)
|
|
144
|
+
print(f"[jimeng_image] 已保存: {fname}", file=sys.stderr)
|
|
145
|
+
elif binary_data:
|
|
146
|
+
for i, b64 in enumerate(binary_data):
|
|
147
|
+
if not b64:
|
|
148
|
+
continue
|
|
149
|
+
fname = kwargs.get("output_path") or os.path.join(out_dir, f"image_{i}.png")
|
|
150
|
+
with open(fname, "wb") as f:
|
|
151
|
+
f.write(base64.b64decode(b64))
|
|
152
|
+
saved.append(fname)
|
|
153
|
+
print(f"[jimeng_image] 已保存 (base64): {fname}", file=sys.stderr)
|
|
154
|
+
|
|
155
|
+
if not saved:
|
|
156
|
+
raise RuntimeError("即梦未返回任何图片数据")
|
|
74
157
|
|
|
75
158
|
return saved
|