ai-world-sdk 1.0.10 → 1.0.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +35 -10
- package/dist/__tests__/example.test.js +61 -50
- package/dist/gemini-image-generation.d.ts +1 -23
- package/dist/gemini-image-generation.js +4 -50
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -248,8 +248,8 @@ const result3 = await aihubmixClient.generate({
|
|
|
248
248
|
response_modalities: ['IMAGE'],
|
|
249
249
|
});
|
|
250
250
|
|
|
251
|
-
//
|
|
252
|
-
const editResult = await client.
|
|
251
|
+
// 单图输入(文本 + 单张图片,实现编辑效果)
|
|
252
|
+
const editResult = await client.generate({
|
|
253
253
|
prompt: 'Add a small wizard hat on the cat\'s head',
|
|
254
254
|
image: 'data:image/png;base64,iVBORw0KGgo...', // base64 编码的图片数据或 data URL
|
|
255
255
|
model: 'gemini-2.5-flash-image',
|
|
@@ -283,6 +283,7 @@ const secondResponse = await client.chat({
|
|
|
283
283
|
|------|------|------|--------|
|
|
284
284
|
| `prompt` | `string` | 图像生成提示词(必需) | - |
|
|
285
285
|
| `model` | `string` | 模型名称 | `gemini-2.0-flash-exp-image-generation` |
|
|
286
|
+
| `image` | `string \| string[]` | 输入图片(base64 或 data URL)。可以是单个图片或图片数组(多图输入) | - |
|
|
286
287
|
| `aspect_ratio` | `string` | 宽高比 | - |
|
|
287
288
|
| `image_size` | `string` | 图片大小(仅 gemini-3-pro-image-preview) | - |
|
|
288
289
|
| `number_of_images` | `number` | 生成图片数量 | `1` |
|
|
@@ -290,6 +291,10 @@ const secondResponse = await client.chat({
|
|
|
290
291
|
| `temperature` | `number` | 温度参数 | `0.7` |
|
|
291
292
|
| `max_output_tokens` | `number` | 最大输出 token 数 | `1000` |
|
|
292
293
|
|
|
294
|
+
**多图输入限制:**
|
|
295
|
+
- `gemini-2.5-flash-image`: 最多支持 3 张输入图片
|
|
296
|
+
- `gemini-3-pro-image-preview`: 最多支持 14 张输入图片(其中最多 5 张高保真图片)
|
|
297
|
+
|
|
293
298
|
**支持的宽高比:** `1:1`, `2:3`, `3:2`, `3:4`, `4:3`, `4:5`, `5:4`, `9:16`, `16:9`, `21:9`
|
|
294
299
|
|
|
295
300
|
**Provider 说明:**
|
|
@@ -304,11 +309,7 @@ const secondResponse = await client.chat({
|
|
|
304
309
|
| `gemini-2.5-flash-image` | 1024px | 快速、高效、成本低 | 日常使用、批量生成 |
|
|
305
310
|
| `gemini-3-pro-image-preview` | 1K/2K/4K | 专业级、高分辨率、高级功能 | 专业设计、高分辨率需求 |
|
|
306
311
|
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
- **`edit()`** - 图像编辑:使用文本提示编辑图片,支持添加、移除或修改元素
|
|
310
|
-
- 需要提供输入图片(base64 编码或 data URL)
|
|
311
|
-
- 适用于单次编辑操作
|
|
312
|
+
**多轮图片修改:**
|
|
312
313
|
|
|
313
314
|
- **`chat()`** - 多轮图片修改:通过对话迭代式优化图片
|
|
314
315
|
- 首次调用创建新的聊天会话,返回 `chat_id`
|
|
@@ -664,7 +665,7 @@ import { GeminiImageGenerationClient } from 'ai-world-sdk';
|
|
|
664
665
|
|
|
665
666
|
const client = new GeminiImageGenerationClient({});
|
|
666
667
|
|
|
667
|
-
//
|
|
668
|
+
// 1. 文生图(仅文本提示)
|
|
668
669
|
const result = await client.generate({
|
|
669
670
|
prompt: 'A beautiful sunset over the ocean',
|
|
670
671
|
model: 'gemini-2.5-flash-image',
|
|
@@ -677,8 +678,32 @@ if (result.text) {
|
|
|
677
678
|
console.log('图像描述:', result.text);
|
|
678
679
|
}
|
|
679
680
|
|
|
680
|
-
//
|
|
681
|
+
// 2. 单图输入(文本 + 单张图片)
|
|
681
682
|
const result2 = await client.generate({
|
|
683
|
+
prompt: 'Create a picture of my cat eating a nano-banana in a fancy restaurant',
|
|
684
|
+
image: 'data:image/png;base64,iVBORw0KGgo...', // base64 编码的图片或 data URL
|
|
685
|
+
model: 'gemini-2.5-flash-image',
|
|
686
|
+
aspect_ratio: '16:9',
|
|
687
|
+
});
|
|
688
|
+
|
|
689
|
+
// 3. 多图输入(文本 + 多张图片)
|
|
690
|
+
// gemini-2.5-flash-image 最多支持 3 张图片
|
|
691
|
+
// gemini-3-pro-image-preview 最多支持 14 张图片
|
|
692
|
+
const result3 = await client.generate({
|
|
693
|
+
prompt: 'An office group photo of these people, they are making funny faces.',
|
|
694
|
+
image: [
|
|
695
|
+
'data:image/png;base64,iVBORw0KGgo...', // 第一张图片
|
|
696
|
+
'data:image/png;base64,iVBORw0KGgo...', // 第二张图片
|
|
697
|
+
'data:image/png;base64,iVBORw0KGgo...', // 第三张图片
|
|
698
|
+
],
|
|
699
|
+
model: 'gemini-3-pro-image-preview',
|
|
700
|
+
aspect_ratio: '5:4',
|
|
701
|
+
image_size: '2K',
|
|
702
|
+
response_modalities: ['IMAGE'],
|
|
703
|
+
});
|
|
704
|
+
|
|
705
|
+
// 4. 使用 Gemini 3 Pro(专业模型,支持高分辨率)
|
|
706
|
+
const result4 = await client.generate({
|
|
682
707
|
prompt: 'A futuristic city at night',
|
|
683
708
|
model: 'gemini-3-pro-image-preview',
|
|
684
709
|
aspect_ratio: '21:9', // 超宽屏
|
|
@@ -686,7 +711,7 @@ const result2 = await client.generate({
|
|
|
686
711
|
response_modalities: ['IMAGE'], // 仅返回图片
|
|
687
712
|
});
|
|
688
713
|
|
|
689
|
-
console.log('4K 图像:',
|
|
714
|
+
console.log('4K 图像:', result4.data[0]?.b64_json ? 'Base64 编码' : result4.data[0]?.url);
|
|
690
715
|
```
|
|
691
716
|
|
|
692
717
|
### 视频生成工作流
|
|
@@ -557,7 +557,7 @@ describe("Langchain SDK Tests", () => {
|
|
|
557
557
|
expect(Array.isArray(message.content)).toBe(true);
|
|
558
558
|
console.log("✅ HumanMessage with array content 测试成功");
|
|
559
559
|
});
|
|
560
|
-
test("GeminiImageGenerationClient -
|
|
560
|
+
test("GeminiImageGenerationClient - 基础图像生成(文生图)", async () => {
|
|
561
561
|
const imageClient = new index_1.GeminiImageGenerationClient({});
|
|
562
562
|
const result = await imageClient.generate({
|
|
563
563
|
prompt: 'A beautiful sunset over the ocean',
|
|
@@ -577,13 +577,72 @@ describe("Langchain SDK Tests", () => {
|
|
|
577
577
|
expect(item).toBeDefined();
|
|
578
578
|
expect(item.url || item.b64_json).toBeDefined();
|
|
579
579
|
});
|
|
580
|
-
console.log("✅ GeminiImageGenerationClient
|
|
580
|
+
console.log("✅ GeminiImageGenerationClient 基础测试成功(文生图)");
|
|
581
581
|
console.log(`生成图像数量: ${result.data.length}`);
|
|
582
582
|
console.log("图像 URL:", result.data[0]?.url || ("Base64 编码" + result.data[0]?.b64_json));
|
|
583
583
|
if (result.text) {
|
|
584
584
|
console.log("图像描述:", result.text);
|
|
585
585
|
}
|
|
586
586
|
}, 120000);
|
|
587
|
+
test("GeminiImageGenerationClient - 单图输入图像生成", async () => {
|
|
588
|
+
const imageClient = new index_1.GeminiImageGenerationClient({});
|
|
589
|
+
// 先生成一张基础图片
|
|
590
|
+
const baseResult = await imageClient.generate({
|
|
591
|
+
prompt: 'A photorealistic picture of a fluffy ginger cat sitting on a wooden floor',
|
|
592
|
+
model: 'gemini-2.5-flash-image',
|
|
593
|
+
aspect_ratio: '1:1',
|
|
594
|
+
response_modalities: ['IMAGE'],
|
|
595
|
+
});
|
|
596
|
+
expect(baseResult.data.length).toBeGreaterThan(0);
|
|
597
|
+
const baseImage = baseResult.data[0]?.b64_json || baseResult.data[0]?.url;
|
|
598
|
+
expect(baseImage).toBeDefined();
|
|
599
|
+
// 使用生成的图片作为输入
|
|
600
|
+
const result = await imageClient.generate({
|
|
601
|
+
prompt: 'Using the provided image of my cat, please add a small, knitted wizard hat on its head',
|
|
602
|
+
image: baseImage,
|
|
603
|
+
model: 'gemini-2.5-flash-image',
|
|
604
|
+
aspect_ratio: '1:1',
|
|
605
|
+
response_modalities: ['IMAGE'],
|
|
606
|
+
});
|
|
607
|
+
expect(result).toBeDefined();
|
|
608
|
+
expect(result.data).toBeDefined();
|
|
609
|
+
expect(result.data.length).toBeGreaterThan(0);
|
|
610
|
+
console.log("✅ GeminiImageGenerationClient 单图输入测试成功");
|
|
611
|
+
}, 120000);
|
|
612
|
+
test("GeminiImageGenerationClient - 多图输入图像生成", async () => {
|
|
613
|
+
const imageClient = new index_1.GeminiImageGenerationClient({});
|
|
614
|
+
// 先生成两张基础图片
|
|
615
|
+
const baseResult1 = await imageClient.generate({
|
|
616
|
+
prompt: 'A professional headshot of a woman with brown hair and blue eyes',
|
|
617
|
+
model: 'gemini-2.5-flash-image',
|
|
618
|
+
aspect_ratio: '1:1',
|
|
619
|
+
response_modalities: ['IMAGE'],
|
|
620
|
+
});
|
|
621
|
+
const baseResult2 = await imageClient.generate({
|
|
622
|
+
prompt: 'A simple, modern logo with the letters G and A in a white circle',
|
|
623
|
+
model: 'gemini-2.5-flash-image',
|
|
624
|
+
aspect_ratio: '1:1',
|
|
625
|
+
response_modalities: ['IMAGE'],
|
|
626
|
+
});
|
|
627
|
+
expect(baseResult1.data.length).toBeGreaterThan(0);
|
|
628
|
+
expect(baseResult2.data.length).toBeGreaterThan(0);
|
|
629
|
+
const image1 = baseResult1.data[0]?.b64_json || baseResult1.data[0]?.url;
|
|
630
|
+
const image2 = baseResult2.data[0]?.b64_json || baseResult2.data[0]?.url;
|
|
631
|
+
expect(image1).toBeDefined();
|
|
632
|
+
expect(image2).toBeDefined();
|
|
633
|
+
// 使用多张图片作为输入(gemini-2.5-flash-image 最多支持 3 张)
|
|
634
|
+
const result = await imageClient.generate({
|
|
635
|
+
prompt: 'Take the first image of the woman and add the logo from the second image onto her black t-shirt',
|
|
636
|
+
image: [image1, image2],
|
|
637
|
+
model: 'gemini-2.5-flash-image',
|
|
638
|
+
aspect_ratio: '1:1',
|
|
639
|
+
response_modalities: ['IMAGE'],
|
|
640
|
+
});
|
|
641
|
+
expect(result).toBeDefined();
|
|
642
|
+
expect(result.data).toBeDefined();
|
|
643
|
+
expect(result.data.length).toBeGreaterThan(0);
|
|
644
|
+
console.log("✅ GeminiImageGenerationClient 多图输入测试成功");
|
|
645
|
+
}, 180000);
|
|
587
646
|
test("GeminiImageGenerationClient - 使用 aihubmix provider", async () => {
|
|
588
647
|
const imageClient = new index_1.GeminiImageGenerationClient({
|
|
589
648
|
provider: "aihubmix",
|
|
@@ -613,54 +672,6 @@ describe("Langchain SDK Tests", () => {
|
|
|
613
672
|
console.log("图像描述:", result.text);
|
|
614
673
|
}
|
|
615
674
|
}, 120000);
|
|
616
|
-
test("GeminiImageGenerationClient - 图像编辑", async () => {
|
|
617
|
-
const imageClient = new index_1.GeminiImageGenerationClient({});
|
|
618
|
-
// 首先生成一张基础图片
|
|
619
|
-
const generateResult = await imageClient.generate({
|
|
620
|
-
prompt: 'A simple red apple on a white background',
|
|
621
|
-
model: 'gemini-2.5-flash-image',
|
|
622
|
-
aspect_ratio: '1:1',
|
|
623
|
-
response_modalities: ['IMAGE'],
|
|
624
|
-
});
|
|
625
|
-
expect(generateResult).toBeDefined();
|
|
626
|
-
expect(generateResult.data).toBeDefined();
|
|
627
|
-
expect(generateResult.data.length).toBeGreaterThan(0);
|
|
628
|
-
// 获取第一张图片的 base64 数据
|
|
629
|
-
const firstImage = generateResult.data[0];
|
|
630
|
-
expect(firstImage).toBeDefined();
|
|
631
|
-
// 提取 base64 数据(优先使用 b64_json,否则从 text 中提取)
|
|
632
|
-
let base64Image;
|
|
633
|
-
if (firstImage.b64_json) {
|
|
634
|
-
base64Image = firstImage.b64_json;
|
|
635
|
-
}
|
|
636
|
-
else if (firstImage.text && firstImage.text.startsWith('data:')) {
|
|
637
|
-
// 从 data URL 中提取 base64 部分
|
|
638
|
-
base64Image = firstImage.text.split(',')[1];
|
|
639
|
-
}
|
|
640
|
-
else {
|
|
641
|
-
throw new Error('无法获取图片的 base64 数据');
|
|
642
|
-
}
|
|
643
|
-
expect(base64Image).toBeDefined();
|
|
644
|
-
// 使用编辑功能添加元素
|
|
645
|
-
const editResult = await imageClient.edit({
|
|
646
|
-
prompt: 'Add a small green leaf on top of the apple',
|
|
647
|
-
image: base64Image, // 直接使用 base64 字符串
|
|
648
|
-
model: 'gemini-2.5-flash-image',
|
|
649
|
-
aspect_ratio: '1:1',
|
|
650
|
-
response_modalities: ['IMAGE'],
|
|
651
|
-
});
|
|
652
|
-
expect(editResult).toBeDefined();
|
|
653
|
-
expect(editResult.created).toBeDefined();
|
|
654
|
-
expect(editResult.data).toBeDefined();
|
|
655
|
-
expect(Array.isArray(editResult.data)).toBe(true);
|
|
656
|
-
expect(editResult.data.length).toBeGreaterThan(0);
|
|
657
|
-
editResult.data.forEach((item) => {
|
|
658
|
-
expect(item).toBeDefined();
|
|
659
|
-
expect(item.url || item.b64_json).toBeDefined();
|
|
660
|
-
});
|
|
661
|
-
console.log("✅ GeminiImageGenerationClient 图像编辑测试成功");
|
|
662
|
-
console.log(`编辑后图像数量: ${editResult.data.length}`);
|
|
663
|
-
}, 180000);
|
|
664
675
|
test("GeminiImageGenerationClient - 多轮图片修改", async () => {
|
|
665
676
|
const imageClient = new index_1.GeminiImageGenerationClient({});
|
|
666
677
|
// 第一轮:创建初始图片
|
|
@@ -10,6 +10,7 @@ export interface GeminiImageGenerationConfig {
|
|
|
10
10
|
export interface GeminiImageGenerationRequest {
|
|
11
11
|
prompt: string;
|
|
12
12
|
model?: string;
|
|
13
|
+
image?: string | string[];
|
|
13
14
|
number_of_images?: number;
|
|
14
15
|
aspect_ratio?: "1:1" | "2:3" | "3:2" | "3:4" | "4:3" | "4:5" | "5:4" | "9:16" | "16:9" | "21:9";
|
|
15
16
|
image_size?: "1K" | "2K" | "4K";
|
|
@@ -28,16 +29,6 @@ export interface GeminiImageGenerationResponse {
|
|
|
28
29
|
data: GeminiImageData[];
|
|
29
30
|
text?: string;
|
|
30
31
|
}
|
|
31
|
-
export interface GeminiImageEditRequest {
|
|
32
|
-
prompt: string;
|
|
33
|
-
image: string;
|
|
34
|
-
model?: string;
|
|
35
|
-
provider?: "aihubmix" | "gemini";
|
|
36
|
-
aspect_ratio?: "1:1" | "2:3" | "3:2" | "3:4" | "4:3" | "4:5" | "5:4" | "9:16" | "16:9" | "21:9";
|
|
37
|
-
image_size?: "1K" | "2K" | "4K";
|
|
38
|
-
response_modalities?: ("TEXT" | "IMAGE")[];
|
|
39
|
-
user?: string;
|
|
40
|
-
}
|
|
41
32
|
export interface GeminiImageChatRequest {
|
|
42
33
|
message: string;
|
|
43
34
|
chat_id?: string;
|
|
@@ -69,19 +60,6 @@ export declare class GeminiImageGenerationClient {
|
|
|
69
60
|
* - number_of_images: 生成图片数量(1-4)
|
|
70
61
|
*/
|
|
71
62
|
generate(request: GeminiImageGenerationRequest): Promise<GeminiImageGenerationResponse>;
|
|
72
|
-
/**
|
|
73
|
-
* Edit images
|
|
74
|
-
* 编辑图像
|
|
75
|
-
*
|
|
76
|
-
* 使用文本提示编辑图片,支持添加、移除或修改元素
|
|
77
|
-
*
|
|
78
|
-
* 支持的参数:
|
|
79
|
-
* - image: base64 编码的图片数据,或 data URL(如 data:image/png;base64,...)
|
|
80
|
-
* - aspect_ratio: 宽高比
|
|
81
|
-
* - image_size: 图片大小(仅 gemini-3-pro-image-preview)
|
|
82
|
-
* - response_modalities: 响应模态
|
|
83
|
-
*/
|
|
84
|
-
edit(request: GeminiImageEditRequest): Promise<GeminiImageGenerationResponse>;
|
|
85
63
|
/**
|
|
86
64
|
* Chat with images (multi-turn image editing)
|
|
87
65
|
* 图像多轮对话(用于多轮图片修改)
|
|
@@ -41,6 +41,10 @@ class GeminiImageGenerationClient {
|
|
|
41
41
|
model: request.model || "gemini-2.0-flash-exp-image-generation",
|
|
42
42
|
number_of_images: request.number_of_images || 1,
|
|
43
43
|
};
|
|
44
|
+
// 添加图片输入参数(单图或多图)
|
|
45
|
+
if (request.image) {
|
|
46
|
+
requestBody.image = request.image;
|
|
47
|
+
}
|
|
44
48
|
// 添加可选参数
|
|
45
49
|
if (request.aspect_ratio) {
|
|
46
50
|
requestBody.aspect_ratio = request.aspect_ratio;
|
|
@@ -76,56 +80,6 @@ class GeminiImageGenerationClient {
|
|
|
76
80
|
(0, log_1.logResponse)(response.status, response.statusText, response.headers, data);
|
|
77
81
|
return data;
|
|
78
82
|
}
|
|
79
|
-
/**
|
|
80
|
-
* Edit images
|
|
81
|
-
* 编辑图像
|
|
82
|
-
*
|
|
83
|
-
* 使用文本提示编辑图片,支持添加、移除或修改元素
|
|
84
|
-
*
|
|
85
|
-
* 支持的参数:
|
|
86
|
-
* - image: base64 编码的图片数据,或 data URL(如 data:image/png;base64,...)
|
|
87
|
-
* - aspect_ratio: 宽高比
|
|
88
|
-
* - image_size: 图片大小(仅 gemini-3-pro-image-preview)
|
|
89
|
-
* - response_modalities: 响应模态
|
|
90
|
-
*/
|
|
91
|
-
async edit(request) {
|
|
92
|
-
const requestBody = {
|
|
93
|
-
prompt: request.prompt,
|
|
94
|
-
image: request.image,
|
|
95
|
-
model: request.model || "gemini-2.5-flash-image",
|
|
96
|
-
};
|
|
97
|
-
// 添加可选参数
|
|
98
|
-
if (request.aspect_ratio) {
|
|
99
|
-
requestBody.aspect_ratio = request.aspect_ratio;
|
|
100
|
-
}
|
|
101
|
-
if (request.image_size) {
|
|
102
|
-
requestBody.image_size = request.image_size;
|
|
103
|
-
}
|
|
104
|
-
if (request.response_modalities) {
|
|
105
|
-
requestBody.response_modalities = request.response_modalities;
|
|
106
|
-
}
|
|
107
|
-
if (request.user) {
|
|
108
|
-
requestBody.user = request.user;
|
|
109
|
-
}
|
|
110
|
-
if (request.provider) {
|
|
111
|
-
requestBody.provider = request.provider;
|
|
112
|
-
}
|
|
113
|
-
const url = `${config_1.sdkConfig.getServerUrl()}/api/gemini-image-proxy/edit`;
|
|
114
|
-
(0, log_1.logRequest)("POST", url, this.headers, { ...requestBody, image: "[base64 data]" });
|
|
115
|
-
const response = await fetch(url, {
|
|
116
|
-
method: "POST",
|
|
117
|
-
headers: this.headers,
|
|
118
|
-
body: JSON.stringify(requestBody),
|
|
119
|
-
});
|
|
120
|
-
if (!response.ok) {
|
|
121
|
-
const errorText = await response.text();
|
|
122
|
-
(0, log_1.logResponse)(response.status, response.statusText, response.headers, errorText);
|
|
123
|
-
throw new Error(`Gemini image edit API error: ${response.status} ${errorText}`);
|
|
124
|
-
}
|
|
125
|
-
const data = (await response.json());
|
|
126
|
-
(0, log_1.logResponse)(response.status, response.statusText, response.headers, data);
|
|
127
|
-
return data;
|
|
128
|
-
}
|
|
129
83
|
/**
|
|
130
84
|
* Chat with images (multi-turn image editing)
|
|
131
85
|
* 图像多轮对话(用于多轮图片修改)
|
package/package.json
CHANGED