ai-world-sdk 1.0.9 → 1.0.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +119 -17
- package/dist/__tests__/example.test.js +66 -52
- package/dist/doubao-image-generation.d.ts +11 -3
- package/dist/doubao-image-generation.js +28 -8
- package/dist/gemini-image-generation.d.ts +1 -23
- package/dist/gemini-image-generation.js +4 -50
- package/dist/index.d.ts +2 -3
- package/dist/index.js +1 -2
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -88,7 +88,7 @@ import { DoubaoImageGenerationClient, GeminiImageGenerationClient } from 'ai-wor
|
|
|
88
88
|
const doubaoClient = new DoubaoImageGenerationClient({});
|
|
89
89
|
const result = await doubaoClient.generate({
|
|
90
90
|
prompt: 'A beautiful sunset over the ocean',
|
|
91
|
-
size: '2K',
|
|
91
|
+
size: '2K', // 或使用 '2K', '4K' 等
|
|
92
92
|
quality: 'hd',
|
|
93
93
|
n: 1,
|
|
94
94
|
});
|
|
@@ -200,7 +200,7 @@ const client = new DoubaoImageGenerationClient({});
|
|
|
200
200
|
const result = await client.generate({
|
|
201
201
|
prompt: 'A beautiful landscape', // 必需
|
|
202
202
|
model: 'doubao-seedream-4-5-251128', // 可选,默认值
|
|
203
|
-
size: '2K',
|
|
203
|
+
size: '2K', // 可选: 像素值(2048x2048, 2560x1440等)或K值(1K, 2K, 4K)
|
|
204
204
|
quality: 'hd', // 可选: standard, hd
|
|
205
205
|
n: 1, // 可选: 1-10
|
|
206
206
|
response_format: 'url', // 可选: url, b64_json
|
|
@@ -248,8 +248,8 @@ const result3 = await aihubmixClient.generate({
|
|
|
248
248
|
response_modalities: ['IMAGE'],
|
|
249
249
|
});
|
|
250
250
|
|
|
251
|
-
//
|
|
252
|
-
const editResult = await client.
|
|
251
|
+
// 单图输入(文本 + 单张图片,实现编辑效果)
|
|
252
|
+
const editResult = await client.generate({
|
|
253
253
|
prompt: 'Add a small wizard hat on the cat\'s head',
|
|
254
254
|
image: 'data:image/png;base64,iVBORw0KGgo...', // base64 编码的图片数据或 data URL
|
|
255
255
|
model: 'gemini-2.5-flash-image',
|
|
@@ -283,6 +283,7 @@ const secondResponse = await client.chat({
|
|
|
283
283
|
|------|------|------|--------|
|
|
284
284
|
| `prompt` | `string` | 图像生成提示词(必需) | - |
|
|
285
285
|
| `model` | `string` | 模型名称 | `gemini-2.0-flash-exp-image-generation` |
|
|
286
|
+
| `image` | `string \| string[]` | 输入图片(base64 或 data URL)。可以是单个图片或图片数组(多图输入) | - |
|
|
286
287
|
| `aspect_ratio` | `string` | 宽高比 | - |
|
|
287
288
|
| `image_size` | `string` | 图片大小(仅 gemini-3-pro-image-preview) | - |
|
|
288
289
|
| `number_of_images` | `number` | 生成图片数量 | `1` |
|
|
@@ -290,6 +291,10 @@ const secondResponse = await client.chat({
|
|
|
290
291
|
| `temperature` | `number` | 温度参数 | `0.7` |
|
|
291
292
|
| `max_output_tokens` | `number` | 最大输出 token 数 | `1000` |
|
|
292
293
|
|
|
294
|
+
**多图输入限制:**
|
|
295
|
+
- `gemini-2.5-flash-image`: 最多支持 3 张输入图片
|
|
296
|
+
- `gemini-3-pro-image-preview`: 最多支持 14 张输入图片(其中最多 5 张高保真图片)
|
|
297
|
+
|
|
293
298
|
**支持的宽高比:** `1:1`, `2:3`, `3:2`, `3:4`, `4:3`, `4:5`, `5:4`, `9:16`, `16:9`, `21:9`
|
|
294
299
|
|
|
295
300
|
**Provider 说明:**
|
|
@@ -304,11 +309,7 @@ const secondResponse = await client.chat({
|
|
|
304
309
|
| `gemini-2.5-flash-image` | 1024px | 快速、高效、成本低 | 日常使用、批量生成 |
|
|
305
310
|
| `gemini-3-pro-image-preview` | 1K/2K/4K | 专业级、高分辨率、高级功能 | 专业设计、高分辨率需求 |
|
|
306
311
|
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
- **`edit()`** - 图像编辑:使用文本提示编辑图片,支持添加、移除或修改元素
|
|
310
|
-
- 需要提供输入图片(base64 编码或 data URL)
|
|
311
|
-
- 适用于单次编辑操作
|
|
312
|
+
**多轮图片修改:**
|
|
312
313
|
|
|
313
314
|
- **`chat()`** - 多轮图片修改:通过对话迭代式优化图片
|
|
314
315
|
- 首次调用创建新的聊天会话,返回 `chat_id`
|
|
@@ -556,6 +557,27 @@ const response = await modelWithTools.invoke([
|
|
|
556
557
|
|
|
557
558
|
#### 豆包图像生成
|
|
558
559
|
|
|
560
|
+
豆包 Seedream 支持多种图像生成模式:
|
|
561
|
+
|
|
562
|
+
**支持的尺寸选项:**
|
|
563
|
+
|
|
564
|
+
- **像素值(宽x高)**:
|
|
565
|
+
- `2048x2048` (1:1 正方形)
|
|
566
|
+
- `2304x1728` (4:3 横屏)
|
|
567
|
+
- `1728x2304` (3:4 竖屏)
|
|
568
|
+
- `2560x1440` (16:9 横屏)
|
|
569
|
+
- `1440x2560` (9:16 竖屏)
|
|
570
|
+
- `2496x1664` (3:2 横屏)
|
|
571
|
+
- `1664x2496` (2:3 竖屏)
|
|
572
|
+
- `3024x1296` (21:9 超宽屏)
|
|
573
|
+
|
|
574
|
+
- **K 值(根据模型版本)**:
|
|
575
|
+
- `1K` - 仅 4.0 版本支持
|
|
576
|
+
- `2K` - 4.0 和 4.5 版本支持
|
|
577
|
+
- `4K` - 4.0 和 4.5 版本支持
|
|
578
|
+
|
|
579
|
+
**1. 文生图(文本生成图像)**
|
|
580
|
+
|
|
559
581
|
```typescript
|
|
560
582
|
import { DoubaoImageGenerationClient } from 'ai-world-sdk';
|
|
561
583
|
|
|
@@ -564,17 +586,58 @@ const client = new DoubaoImageGenerationClient({});
|
|
|
564
586
|
// 生成单张图像
|
|
565
587
|
const result = await client.generate({
|
|
566
588
|
prompt: 'A futuristic city skyline at sunset',
|
|
567
|
-
size: '2K',
|
|
589
|
+
size: '2048x2048', // 1:1 正方形,或使用 '2K', '4K' 等
|
|
568
590
|
quality: 'hd',
|
|
569
591
|
});
|
|
570
592
|
|
|
571
593
|
console.log('图像 URL:', result.data[0]?.url);
|
|
594
|
+
```
|
|
595
|
+
|
|
596
|
+
**2. 图文生图(单图输入单图输出)**
|
|
572
597
|
|
|
573
|
-
|
|
598
|
+
基于一张参考图片生成新图像:
|
|
599
|
+
|
|
600
|
+
```typescript
|
|
601
|
+
// 使用单张图片作为输入
|
|
602
|
+
const result = await client.generate({
|
|
603
|
+
prompt: '将这张图片转换为水彩画风格',
|
|
604
|
+
image: 'data:image/png;base64,iVBORw0KGgo...', // base64 编码的图片或 data URL
|
|
605
|
+
size: '2560x1440', // 16:9 横屏
|
|
606
|
+
quality: 'hd',
|
|
607
|
+
});
|
|
608
|
+
|
|
609
|
+
console.log('生成的图像 URL:', result.data[0]?.url);
|
|
610
|
+
```
|
|
611
|
+
|
|
612
|
+
**3. 多图融合(多图输入单图输出)**
|
|
613
|
+
|
|
614
|
+
融合多张参考图片生成新图像:
|
|
615
|
+
|
|
616
|
+
```typescript
|
|
617
|
+
// 使用多张图片作为输入
|
|
618
|
+
const result = await client.generate({
|
|
619
|
+
prompt: '将图1的服装换为图2的服装风格',
|
|
620
|
+
image: [
|
|
621
|
+
'data:image/png;base64,iVBORw0KGgo...', // 第一张图片
|
|
622
|
+
'data:image/png;base64,iVBORw0KGgo...', // 第二张图片
|
|
623
|
+
],
|
|
624
|
+
size: '2048x2048', // 1:1 正方形
|
|
625
|
+
quality: 'hd',
|
|
626
|
+
});
|
|
627
|
+
|
|
628
|
+
console.log('融合后的图像 URL:', result.data[0]?.url);
|
|
629
|
+
```
|
|
630
|
+
|
|
631
|
+
**4. 组图输出(多图输出)**
|
|
632
|
+
|
|
633
|
+
一次生成多张不同风格的图像:
|
|
634
|
+
|
|
635
|
+
```typescript
|
|
636
|
+
// 生成多张图像(组图输出)
|
|
574
637
|
const multiResult = await client.generate({
|
|
575
638
|
prompt: 'A beautiful landscape',
|
|
576
|
-
n: 3,
|
|
577
|
-
size: '
|
|
639
|
+
n: 3, // 生成 3 张图像
|
|
640
|
+
size: '2560x1440', // 16:9 横屏
|
|
578
641
|
});
|
|
579
642
|
|
|
580
643
|
multiResult.data.forEach((image, index) => {
|
|
@@ -582,6 +645,19 @@ multiResult.data.forEach((image, index) => {
|
|
|
582
645
|
});
|
|
583
646
|
```
|
|
584
647
|
|
|
648
|
+
**组合使用示例**
|
|
649
|
+
|
|
650
|
+
```typescript
|
|
651
|
+
// 图文生图 + 组图输出:基于一张图片生成多张不同风格的图像
|
|
652
|
+
const result = await client.generate({
|
|
653
|
+
prompt: '生成不同风格的艺术作品',
|
|
654
|
+
image: 'data:image/png;base64,iVBORw0KGgo...',
|
|
655
|
+
n: 4, // 生成 4 张不同风格的图像
|
|
656
|
+
size: '2048x2048', // 1:1 正方形
|
|
657
|
+
quality: 'hd',
|
|
658
|
+
});
|
|
659
|
+
```
|
|
660
|
+
|
|
585
661
|
#### Gemini 图像生成
|
|
586
662
|
|
|
587
663
|
```typescript
|
|
@@ -589,7 +665,7 @@ import { GeminiImageGenerationClient } from 'ai-world-sdk';
|
|
|
589
665
|
|
|
590
666
|
const client = new GeminiImageGenerationClient({});
|
|
591
667
|
|
|
592
|
-
//
|
|
668
|
+
// 1. 文生图(仅文本提示)
|
|
593
669
|
const result = await client.generate({
|
|
594
670
|
prompt: 'A beautiful sunset over the ocean',
|
|
595
671
|
model: 'gemini-2.5-flash-image',
|
|
@@ -602,8 +678,32 @@ if (result.text) {
|
|
|
602
678
|
console.log('图像描述:', result.text);
|
|
603
679
|
}
|
|
604
680
|
|
|
605
|
-
//
|
|
681
|
+
// 2. 单图输入(文本 + 单张图片)
|
|
606
682
|
const result2 = await client.generate({
|
|
683
|
+
prompt: 'Create a picture of my cat eating a nano-banana in a fancy restaurant',
|
|
684
|
+
image: 'data:image/png;base64,iVBORw0KGgo...', // base64 编码的图片或 data URL
|
|
685
|
+
model: 'gemini-2.5-flash-image',
|
|
686
|
+
aspect_ratio: '16:9',
|
|
687
|
+
});
|
|
688
|
+
|
|
689
|
+
// 3. 多图输入(文本 + 多张图片)
|
|
690
|
+
// gemini-2.5-flash-image 最多支持 3 张图片
|
|
691
|
+
// gemini-3-pro-image-preview 最多支持 14 张图片
|
|
692
|
+
const result3 = await client.generate({
|
|
693
|
+
prompt: 'An office group photo of these people, they are making funny faces.',
|
|
694
|
+
image: [
|
|
695
|
+
'data:image/png;base64,iVBORw0KGgo...', // 第一张图片
|
|
696
|
+
'data:image/png;base64,iVBORw0KGgo...', // 第二张图片
|
|
697
|
+
'data:image/png;base64,iVBORw0KGgo...', // 第三张图片
|
|
698
|
+
],
|
|
699
|
+
model: 'gemini-3-pro-image-preview',
|
|
700
|
+
aspect_ratio: '5:4',
|
|
701
|
+
image_size: '2K',
|
|
702
|
+
response_modalities: ['IMAGE'],
|
|
703
|
+
});
|
|
704
|
+
|
|
705
|
+
// 4. 使用 Gemini 3 Pro(专业模型,支持高分辨率)
|
|
706
|
+
const result4 = await client.generate({
|
|
607
707
|
prompt: 'A futuristic city at night',
|
|
608
708
|
model: 'gemini-3-pro-image-preview',
|
|
609
709
|
aspect_ratio: '21:9', // 超宽屏
|
|
@@ -611,7 +711,7 @@ const result2 = await client.generate({
|
|
|
611
711
|
response_modalities: ['IMAGE'], // 仅返回图片
|
|
612
712
|
});
|
|
613
713
|
|
|
614
|
-
console.log('4K 图像:',
|
|
714
|
+
console.log('4K 图像:', result4.data[0]?.b64_json ? 'Base64 编码' : result4.data[0]?.url);
|
|
615
715
|
```
|
|
616
716
|
|
|
617
717
|
### 视频生成工作流
|
|
@@ -662,7 +762,9 @@ if (result.status === 'succeeded') {
|
|
|
662
762
|
|
|
663
763
|
### 图像生成模型
|
|
664
764
|
|
|
665
|
-
- **豆包 Seedream**:
|
|
765
|
+
- **豆包 Seedream**:
|
|
766
|
+
- `doubao-seedream-4-5-251128` (4.5版本,默认) - 支持 2K、4K 和像素值尺寸
|
|
767
|
+
- `doubao-seedream-4-0` (4.0版本) - 支持 1K、2K、4K 和像素值尺寸
|
|
666
768
|
- **Google Gemini**:
|
|
667
769
|
- `gemini-2.5-flash-image` (Nano Banana) - **推荐**,快速、高效,1024px 分辨率,支持所有宽高比
|
|
668
770
|
- `gemini-3-pro-image-preview` (Nano Banana Pro) - 专业级,支持 1K/2K/4K 分辨率,支持 Google 搜索、思考模式,最多 14 张参考图片
|
|
@@ -267,7 +267,7 @@ describe("Langchain SDK Tests", () => {
|
|
|
267
267
|
expect(result.data).toBeDefined();
|
|
268
268
|
expect(Array.isArray(result.data)).toBe(true);
|
|
269
269
|
expect(result.data.length).toBe(2);
|
|
270
|
-
console.log("✅
|
|
270
|
+
console.log("✅ 多图像生成测试成功(组图输出)");
|
|
271
271
|
console.log(`生成图像数量: ${result.data.length}`);
|
|
272
272
|
result.data.forEach((item, index) => {
|
|
273
273
|
console.log(`图像 ${index + 1}:`, item.url || "Base64 编码");
|
|
@@ -275,7 +275,10 @@ describe("Langchain SDK Tests", () => {
|
|
|
275
275
|
}, 120000);
|
|
276
276
|
test("DoubaoImageGenerationClient - 不同尺寸测试", async () => {
|
|
277
277
|
const imageClient = new index_1.DoubaoImageGenerationClient({});
|
|
278
|
-
const sizes = [
|
|
278
|
+
const sizes = [
|
|
279
|
+
"2K",
|
|
280
|
+
"4K",
|
|
281
|
+
];
|
|
279
282
|
for (const size of sizes) {
|
|
280
283
|
const result = await imageClient.generate({
|
|
281
284
|
prompt: "A futuristic city skyline at sunset",
|
|
@@ -554,7 +557,7 @@ describe("Langchain SDK Tests", () => {
|
|
|
554
557
|
expect(Array.isArray(message.content)).toBe(true);
|
|
555
558
|
console.log("✅ HumanMessage with array content 测试成功");
|
|
556
559
|
});
|
|
557
|
-
test("GeminiImageGenerationClient -
|
|
560
|
+
test("GeminiImageGenerationClient - 基础图像生成(文生图)", async () => {
|
|
558
561
|
const imageClient = new index_1.GeminiImageGenerationClient({});
|
|
559
562
|
const result = await imageClient.generate({
|
|
560
563
|
prompt: 'A beautiful sunset over the ocean',
|
|
@@ -574,13 +577,72 @@ describe("Langchain SDK Tests", () => {
|
|
|
574
577
|
expect(item).toBeDefined();
|
|
575
578
|
expect(item.url || item.b64_json).toBeDefined();
|
|
576
579
|
});
|
|
577
|
-
console.log("✅ GeminiImageGenerationClient
|
|
580
|
+
console.log("✅ GeminiImageGenerationClient 基础测试成功(文生图)");
|
|
578
581
|
console.log(`生成图像数量: ${result.data.length}`);
|
|
579
582
|
console.log("图像 URL:", result.data[0]?.url || ("Base64 编码" + result.data[0]?.b64_json));
|
|
580
583
|
if (result.text) {
|
|
581
584
|
console.log("图像描述:", result.text);
|
|
582
585
|
}
|
|
583
586
|
}, 120000);
|
|
587
|
+
test("GeminiImageGenerationClient - 单图输入图像生成", async () => {
|
|
588
|
+
const imageClient = new index_1.GeminiImageGenerationClient({});
|
|
589
|
+
// 先生成一张基础图片
|
|
590
|
+
const baseResult = await imageClient.generate({
|
|
591
|
+
prompt: 'A photorealistic picture of a fluffy ginger cat sitting on a wooden floor',
|
|
592
|
+
model: 'gemini-2.5-flash-image',
|
|
593
|
+
aspect_ratio: '1:1',
|
|
594
|
+
response_modalities: ['IMAGE'],
|
|
595
|
+
});
|
|
596
|
+
expect(baseResult.data.length).toBeGreaterThan(0);
|
|
597
|
+
const baseImage = baseResult.data[0]?.b64_json || baseResult.data[0]?.url;
|
|
598
|
+
expect(baseImage).toBeDefined();
|
|
599
|
+
// 使用生成的图片作为输入
|
|
600
|
+
const result = await imageClient.generate({
|
|
601
|
+
prompt: 'Using the provided image of my cat, please add a small, knitted wizard hat on its head',
|
|
602
|
+
image: baseImage,
|
|
603
|
+
model: 'gemini-2.5-flash-image',
|
|
604
|
+
aspect_ratio: '1:1',
|
|
605
|
+
response_modalities: ['IMAGE'],
|
|
606
|
+
});
|
|
607
|
+
expect(result).toBeDefined();
|
|
608
|
+
expect(result.data).toBeDefined();
|
|
609
|
+
expect(result.data.length).toBeGreaterThan(0);
|
|
610
|
+
console.log("✅ GeminiImageGenerationClient 单图输入测试成功");
|
|
611
|
+
}, 120000);
|
|
612
|
+
test("GeminiImageGenerationClient - 多图输入图像生成", async () => {
|
|
613
|
+
const imageClient = new index_1.GeminiImageGenerationClient({});
|
|
614
|
+
// 先生成两张基础图片
|
|
615
|
+
const baseResult1 = await imageClient.generate({
|
|
616
|
+
prompt: 'A professional headshot of a woman with brown hair and blue eyes',
|
|
617
|
+
model: 'gemini-2.5-flash-image',
|
|
618
|
+
aspect_ratio: '1:1',
|
|
619
|
+
response_modalities: ['IMAGE'],
|
|
620
|
+
});
|
|
621
|
+
const baseResult2 = await imageClient.generate({
|
|
622
|
+
prompt: 'A simple, modern logo with the letters G and A in a white circle',
|
|
623
|
+
model: 'gemini-2.5-flash-image',
|
|
624
|
+
aspect_ratio: '1:1',
|
|
625
|
+
response_modalities: ['IMAGE'],
|
|
626
|
+
});
|
|
627
|
+
expect(baseResult1.data.length).toBeGreaterThan(0);
|
|
628
|
+
expect(baseResult2.data.length).toBeGreaterThan(0);
|
|
629
|
+
const image1 = baseResult1.data[0]?.b64_json || baseResult1.data[0]?.url;
|
|
630
|
+
const image2 = baseResult2.data[0]?.b64_json || baseResult2.data[0]?.url;
|
|
631
|
+
expect(image1).toBeDefined();
|
|
632
|
+
expect(image2).toBeDefined();
|
|
633
|
+
// 使用多张图片作为输入(gemini-2.5-flash-image 最多支持 3 张)
|
|
634
|
+
const result = await imageClient.generate({
|
|
635
|
+
prompt: 'Take the first image of the woman and add the logo from the second image onto her black t-shirt',
|
|
636
|
+
image: [image1, image2],
|
|
637
|
+
model: 'gemini-2.5-flash-image',
|
|
638
|
+
aspect_ratio: '1:1',
|
|
639
|
+
response_modalities: ['IMAGE'],
|
|
640
|
+
});
|
|
641
|
+
expect(result).toBeDefined();
|
|
642
|
+
expect(result.data).toBeDefined();
|
|
643
|
+
expect(result.data.length).toBeGreaterThan(0);
|
|
644
|
+
console.log("✅ GeminiImageGenerationClient 多图输入测试成功");
|
|
645
|
+
}, 180000);
|
|
584
646
|
test("GeminiImageGenerationClient - 使用 aihubmix provider", async () => {
|
|
585
647
|
const imageClient = new index_1.GeminiImageGenerationClient({
|
|
586
648
|
provider: "aihubmix",
|
|
@@ -610,54 +672,6 @@ describe("Langchain SDK Tests", () => {
|
|
|
610
672
|
console.log("图像描述:", result.text);
|
|
611
673
|
}
|
|
612
674
|
}, 120000);
|
|
613
|
-
test("GeminiImageGenerationClient - 图像编辑", async () => {
|
|
614
|
-
const imageClient = new index_1.GeminiImageGenerationClient({});
|
|
615
|
-
// 首先生成一张基础图片
|
|
616
|
-
const generateResult = await imageClient.generate({
|
|
617
|
-
prompt: 'A simple red apple on a white background',
|
|
618
|
-
model: 'gemini-2.5-flash-image',
|
|
619
|
-
aspect_ratio: '1:1',
|
|
620
|
-
response_modalities: ['IMAGE'],
|
|
621
|
-
});
|
|
622
|
-
expect(generateResult).toBeDefined();
|
|
623
|
-
expect(generateResult.data).toBeDefined();
|
|
624
|
-
expect(generateResult.data.length).toBeGreaterThan(0);
|
|
625
|
-
// 获取第一张图片的 base64 数据
|
|
626
|
-
const firstImage = generateResult.data[0];
|
|
627
|
-
expect(firstImage).toBeDefined();
|
|
628
|
-
// 提取 base64 数据(优先使用 b64_json,否则从 text 中提取)
|
|
629
|
-
let base64Image;
|
|
630
|
-
if (firstImage.b64_json) {
|
|
631
|
-
base64Image = firstImage.b64_json;
|
|
632
|
-
}
|
|
633
|
-
else if (firstImage.text && firstImage.text.startsWith('data:')) {
|
|
634
|
-
// 从 data URL 中提取 base64 部分
|
|
635
|
-
base64Image = firstImage.text.split(',')[1];
|
|
636
|
-
}
|
|
637
|
-
else {
|
|
638
|
-
throw new Error('无法获取图片的 base64 数据');
|
|
639
|
-
}
|
|
640
|
-
expect(base64Image).toBeDefined();
|
|
641
|
-
// 使用编辑功能添加元素
|
|
642
|
-
const editResult = await imageClient.edit({
|
|
643
|
-
prompt: 'Add a small green leaf on top of the apple',
|
|
644
|
-
image: base64Image, // 直接使用 base64 字符串
|
|
645
|
-
model: 'gemini-2.5-flash-image',
|
|
646
|
-
aspect_ratio: '1:1',
|
|
647
|
-
response_modalities: ['IMAGE'],
|
|
648
|
-
});
|
|
649
|
-
expect(editResult).toBeDefined();
|
|
650
|
-
expect(editResult.created).toBeDefined();
|
|
651
|
-
expect(editResult.data).toBeDefined();
|
|
652
|
-
expect(Array.isArray(editResult.data)).toBe(true);
|
|
653
|
-
expect(editResult.data.length).toBeGreaterThan(0);
|
|
654
|
-
editResult.data.forEach((item) => {
|
|
655
|
-
expect(item).toBeDefined();
|
|
656
|
-
expect(item.url || item.b64_json).toBeDefined();
|
|
657
|
-
});
|
|
658
|
-
console.log("✅ GeminiImageGenerationClient 图像编辑测试成功");
|
|
659
|
-
console.log(`编辑后图像数量: ${editResult.data.length}`);
|
|
660
|
-
}, 180000);
|
|
661
675
|
test("GeminiImageGenerationClient - 多轮图片修改", async () => {
|
|
662
676
|
const imageClient = new index_1.GeminiImageGenerationClient({});
|
|
663
677
|
// 第一轮:创建初始图片
|
|
@@ -6,16 +6,24 @@ export interface DoubaoImageGenerationConfig {
|
|
|
6
6
|
baseUrl?: string;
|
|
7
7
|
headers?: Record<string, string>;
|
|
8
8
|
}
|
|
9
|
+
export type DoubaoImageSize = "2048x2048" | "2304x1728" | "1728x2304" | "2560x1440" | "1440x2560" | "2496x1664" | "1664x2496" | "3024x1296" | "1K" | "2K" | "4K";
|
|
9
10
|
export interface DoubaoImageGenerationRequest {
|
|
10
11
|
model?: string;
|
|
11
12
|
prompt: string;
|
|
12
|
-
|
|
13
|
-
|
|
13
|
+
background?: "transparent" | "opaque" | "auto";
|
|
14
|
+
moderation?: "low" | "auto";
|
|
14
15
|
n?: number;
|
|
15
|
-
|
|
16
|
+
output_compression?: number;
|
|
17
|
+
output_format?: "png" | "jpeg" | "webp";
|
|
18
|
+
partial_images?: number;
|
|
19
|
+
quality?: "standard" | "hd" | "low" | "medium" | "high" | "auto";
|
|
16
20
|
response_format?: "url" | "b64_json";
|
|
21
|
+
size?: DoubaoImageSize;
|
|
22
|
+
stream?: false;
|
|
17
23
|
style?: "vivid" | "natural";
|
|
24
|
+
image?: string | string[];
|
|
18
25
|
user?: string;
|
|
26
|
+
watermark?: boolean;
|
|
19
27
|
}
|
|
20
28
|
export interface DoubaoImageData {
|
|
21
29
|
url?: string;
|
|
@@ -27,25 +27,45 @@ class DoubaoImageGenerationClient {
|
|
|
27
27
|
const requestBody = {
|
|
28
28
|
model: request.model || "doubao-seedream-4-5-251128",
|
|
29
29
|
prompt: request.prompt,
|
|
30
|
-
size: request.size || "
|
|
30
|
+
size: request.size || "2048x2048",
|
|
31
31
|
n: request.n || 1,
|
|
32
32
|
};
|
|
33
|
-
//
|
|
34
|
-
if (request.
|
|
35
|
-
requestBody.
|
|
33
|
+
// 添加可选参数(按照 OpenAI SDK 参数顺序)
|
|
34
|
+
if (request.background !== undefined) {
|
|
35
|
+
requestBody.background = request.background;
|
|
36
36
|
}
|
|
37
|
-
if (request.
|
|
37
|
+
if (request.moderation !== undefined) {
|
|
38
|
+
requestBody.moderation = request.moderation;
|
|
39
|
+
}
|
|
40
|
+
if (request.output_compression !== undefined) {
|
|
41
|
+
requestBody.output_compression = request.output_compression;
|
|
42
|
+
}
|
|
43
|
+
if (request.output_format !== undefined) {
|
|
44
|
+
requestBody.output_format = request.output_format;
|
|
45
|
+
}
|
|
46
|
+
if (request.partial_images !== undefined) {
|
|
47
|
+
requestBody.partial_images = request.partial_images;
|
|
48
|
+
}
|
|
49
|
+
if (request.quality !== undefined) {
|
|
38
50
|
requestBody.quality = request.quality;
|
|
39
51
|
}
|
|
40
|
-
if (request.response_format) {
|
|
52
|
+
if (request.response_format !== undefined) {
|
|
41
53
|
requestBody.response_format = request.response_format;
|
|
42
54
|
}
|
|
43
|
-
if (request.
|
|
55
|
+
if (request.stream !== undefined) {
|
|
56
|
+
requestBody.stream = request.stream;
|
|
57
|
+
}
|
|
58
|
+
if (request.style !== undefined) {
|
|
44
59
|
requestBody.style = request.style;
|
|
45
60
|
}
|
|
46
|
-
if (request.user) {
|
|
61
|
+
if (request.user !== undefined) {
|
|
47
62
|
requestBody.user = request.user;
|
|
48
63
|
}
|
|
64
|
+
// 添加图片输入参数(图文生图或多图融合)
|
|
65
|
+
if (request.image !== undefined) {
|
|
66
|
+
requestBody.image = request.image;
|
|
67
|
+
}
|
|
68
|
+
requestBody.watermark = request.watermark || false;
|
|
49
69
|
const url = `${config_1.sdkConfig.getServerUrl()}/api/doubao-image-proxy/generate`;
|
|
50
70
|
(0, log_1.logRequest)("POST", url, this.headers, requestBody);
|
|
51
71
|
const response = await fetch(url, {
|
|
@@ -10,6 +10,7 @@ export interface GeminiImageGenerationConfig {
|
|
|
10
10
|
export interface GeminiImageGenerationRequest {
|
|
11
11
|
prompt: string;
|
|
12
12
|
model?: string;
|
|
13
|
+
image?: string | string[];
|
|
13
14
|
number_of_images?: number;
|
|
14
15
|
aspect_ratio?: "1:1" | "2:3" | "3:2" | "3:4" | "4:3" | "4:5" | "5:4" | "9:16" | "16:9" | "21:9";
|
|
15
16
|
image_size?: "1K" | "2K" | "4K";
|
|
@@ -28,16 +29,6 @@ export interface GeminiImageGenerationResponse {
|
|
|
28
29
|
data: GeminiImageData[];
|
|
29
30
|
text?: string;
|
|
30
31
|
}
|
|
31
|
-
export interface GeminiImageEditRequest {
|
|
32
|
-
prompt: string;
|
|
33
|
-
image: string;
|
|
34
|
-
model?: string;
|
|
35
|
-
provider?: "aihubmix" | "gemini";
|
|
36
|
-
aspect_ratio?: "1:1" | "2:3" | "3:2" | "3:4" | "4:3" | "4:5" | "5:4" | "9:16" | "16:9" | "21:9";
|
|
37
|
-
image_size?: "1K" | "2K" | "4K";
|
|
38
|
-
response_modalities?: ("TEXT" | "IMAGE")[];
|
|
39
|
-
user?: string;
|
|
40
|
-
}
|
|
41
32
|
export interface GeminiImageChatRequest {
|
|
42
33
|
message: string;
|
|
43
34
|
chat_id?: string;
|
|
@@ -69,19 +60,6 @@ export declare class GeminiImageGenerationClient {
|
|
|
69
60
|
* - number_of_images: 生成图片数量(1-4)
|
|
70
61
|
*/
|
|
71
62
|
generate(request: GeminiImageGenerationRequest): Promise<GeminiImageGenerationResponse>;
|
|
72
|
-
/**
|
|
73
|
-
* Edit images
|
|
74
|
-
* 编辑图像
|
|
75
|
-
*
|
|
76
|
-
* 使用文本提示编辑图片,支持添加、移除或修改元素
|
|
77
|
-
*
|
|
78
|
-
* 支持的参数:
|
|
79
|
-
* - image: base64 编码的图片数据,或 data URL(如 data:image/png;base64,...)
|
|
80
|
-
* - aspect_ratio: 宽高比
|
|
81
|
-
* - image_size: 图片大小(仅 gemini-3-pro-image-preview)
|
|
82
|
-
* - response_modalities: 响应模态
|
|
83
|
-
*/
|
|
84
|
-
edit(request: GeminiImageEditRequest): Promise<GeminiImageGenerationResponse>;
|
|
85
63
|
/**
|
|
86
64
|
* Chat with images (multi-turn image editing)
|
|
87
65
|
* 图像多轮对话(用于多轮图片修改)
|
|
@@ -41,6 +41,10 @@ class GeminiImageGenerationClient {
|
|
|
41
41
|
model: request.model || "gemini-2.0-flash-exp-image-generation",
|
|
42
42
|
number_of_images: request.number_of_images || 1,
|
|
43
43
|
};
|
|
44
|
+
// 添加图片输入参数(单图或多图)
|
|
45
|
+
if (request.image) {
|
|
46
|
+
requestBody.image = request.image;
|
|
47
|
+
}
|
|
44
48
|
// 添加可选参数
|
|
45
49
|
if (request.aspect_ratio) {
|
|
46
50
|
requestBody.aspect_ratio = request.aspect_ratio;
|
|
@@ -76,56 +80,6 @@ class GeminiImageGenerationClient {
|
|
|
76
80
|
(0, log_1.logResponse)(response.status, response.statusText, response.headers, data);
|
|
77
81
|
return data;
|
|
78
82
|
}
|
|
79
|
-
/**
|
|
80
|
-
* Edit images
|
|
81
|
-
* 编辑图像
|
|
82
|
-
*
|
|
83
|
-
* 使用文本提示编辑图片,支持添加、移除或修改元素
|
|
84
|
-
*
|
|
85
|
-
* 支持的参数:
|
|
86
|
-
* - image: base64 编码的图片数据,或 data URL(如 data:image/png;base64,...)
|
|
87
|
-
* - aspect_ratio: 宽高比
|
|
88
|
-
* - image_size: 图片大小(仅 gemini-3-pro-image-preview)
|
|
89
|
-
* - response_modalities: 响应模态
|
|
90
|
-
*/
|
|
91
|
-
async edit(request) {
|
|
92
|
-
const requestBody = {
|
|
93
|
-
prompt: request.prompt,
|
|
94
|
-
image: request.image,
|
|
95
|
-
model: request.model || "gemini-2.5-flash-image",
|
|
96
|
-
};
|
|
97
|
-
// 添加可选参数
|
|
98
|
-
if (request.aspect_ratio) {
|
|
99
|
-
requestBody.aspect_ratio = request.aspect_ratio;
|
|
100
|
-
}
|
|
101
|
-
if (request.image_size) {
|
|
102
|
-
requestBody.image_size = request.image_size;
|
|
103
|
-
}
|
|
104
|
-
if (request.response_modalities) {
|
|
105
|
-
requestBody.response_modalities = request.response_modalities;
|
|
106
|
-
}
|
|
107
|
-
if (request.user) {
|
|
108
|
-
requestBody.user = request.user;
|
|
109
|
-
}
|
|
110
|
-
if (request.provider) {
|
|
111
|
-
requestBody.provider = request.provider;
|
|
112
|
-
}
|
|
113
|
-
const url = `${config_1.sdkConfig.getServerUrl()}/api/gemini-image-proxy/edit`;
|
|
114
|
-
(0, log_1.logRequest)("POST", url, this.headers, { ...requestBody, image: "[base64 data]" });
|
|
115
|
-
const response = await fetch(url, {
|
|
116
|
-
method: "POST",
|
|
117
|
-
headers: this.headers,
|
|
118
|
-
body: JSON.stringify(requestBody),
|
|
119
|
-
});
|
|
120
|
-
if (!response.ok) {
|
|
121
|
-
const errorText = await response.text();
|
|
122
|
-
(0, log_1.logResponse)(response.status, response.statusText, response.headers, errorText);
|
|
123
|
-
throw new Error(`Gemini image edit API error: ${response.status} ${errorText}`);
|
|
124
|
-
}
|
|
125
|
-
const data = (await response.json());
|
|
126
|
-
(0, log_1.logResponse)(response.status, response.statusText, response.headers, data);
|
|
127
|
-
return data;
|
|
128
|
-
}
|
|
129
83
|
/**
|
|
130
84
|
* Chat with images (multi-turn image editing)
|
|
131
85
|
* 图像多轮对话(用于多轮图片修改)
|
package/dist/index.d.ts
CHANGED
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
* @see https://github.com/langchain-ai/langchainjs
|
|
6
6
|
*/
|
|
7
7
|
import { BaseChatModel, BaseChatModelParams } from "./base";
|
|
8
|
-
import { DoubaoImageGenerationClient, type DoubaoImageGenerationConfig, type DoubaoImageGenerationRequest, type DoubaoImageGenerationResponse } from "./doubao-image-generation";
|
|
8
|
+
import { DoubaoImageGenerationClient, DoubaoImageSize, type DoubaoImageGenerationConfig, type DoubaoImageGenerationRequest, type DoubaoImageGenerationResponse } from "./doubao-image-generation";
|
|
9
9
|
import { GeminiImageGenerationClient, type GeminiImageGenerationConfig, type GeminiImageGenerationRequest, type GeminiImageGenerationResponse } from "./gemini-image-generation";
|
|
10
10
|
import { VideoGenerationClient, type VideoGenerationConfig, type VideoGenerationRequest, type ContentGenerationTaskID, type ContentGenerationTask } from "./video_generation";
|
|
11
11
|
import { sdkConfig } from "./config";
|
|
@@ -19,9 +19,8 @@ export interface LangchainClientConfig {
|
|
|
19
19
|
token?: string;
|
|
20
20
|
headers?: Record<string, string>;
|
|
21
21
|
}
|
|
22
|
-
export { DoubaoImageGenerationClient, type DoubaoImageGenerationConfig, type DoubaoImageGenerationRequest, type DoubaoImageGenerationResponse, };
|
|
22
|
+
export { DoubaoImageGenerationClient, type DoubaoImageGenerationConfig, type DoubaoImageGenerationRequest, type DoubaoImageGenerationResponse, type DoubaoImageSize, };
|
|
23
23
|
export { GeminiImageGenerationClient, type GeminiImageGenerationConfig, type GeminiImageGenerationRequest, type GeminiImageGenerationResponse, };
|
|
24
|
-
export { DoubaoImageGenerationClient as ImageGenerationClient, type DoubaoImageGenerationConfig as ImageGenerationConfig, type DoubaoImageGenerationRequest as ImageGenerationRequest, type DoubaoImageGenerationResponse as ImageGenerationResponse, };
|
|
25
24
|
export { VideoGenerationClient, type VideoGenerationConfig, type VideoGenerationRequest, type ContentGenerationTaskID, type ContentGenerationTask, };
|
|
26
25
|
export { sdkConfig };
|
|
27
26
|
/**
|
package/dist/index.js
CHANGED
|
@@ -6,13 +6,12 @@
|
|
|
6
6
|
* @see https://github.com/langchain-ai/langchainjs
|
|
7
7
|
*/
|
|
8
8
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
9
|
-
exports.sdkConfig = exports.VideoGenerationClient = exports.
|
|
9
|
+
exports.sdkConfig = exports.VideoGenerationClient = exports.GeminiImageGenerationClient = exports.DoubaoImageGenerationClient = exports.ChatAnthropic = exports.ChatGoogleGenerativeAI = exports.ChatOpenAI = exports.BaseChatModel = exports.AIMessageChunk = exports.SystemMessage = exports.AIMessage = exports.HumanMessage = void 0;
|
|
10
10
|
exports.createChatModel = createChatModel;
|
|
11
11
|
const openai_1 = require("./chat_models/openai");
|
|
12
12
|
const google_1 = require("./chat_models/google");
|
|
13
13
|
const doubao_image_generation_1 = require("./doubao-image-generation");
|
|
14
14
|
Object.defineProperty(exports, "DoubaoImageGenerationClient", { enumerable: true, get: function () { return doubao_image_generation_1.DoubaoImageGenerationClient; } });
|
|
15
|
-
Object.defineProperty(exports, "ImageGenerationClient", { enumerable: true, get: function () { return doubao_image_generation_1.DoubaoImageGenerationClient; } });
|
|
16
15
|
const gemini_image_generation_1 = require("./gemini-image-generation");
|
|
17
16
|
Object.defineProperty(exports, "GeminiImageGenerationClient", { enumerable: true, get: function () { return gemini_image_generation_1.GeminiImageGenerationClient; } });
|
|
18
17
|
const video_generation_1 = require("./video_generation");
|
package/package.json
CHANGED