ai-world-sdk 1.0.10 → 1.0.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -219,7 +219,6 @@ const result = await client.generate({
219
219
  prompt: 'A beautiful landscape', // 必需
220
220
  model: 'gemini-2.5-flash-image', // 推荐:快速、高效
221
221
  aspect_ratio: '16:9', // 可选: 1:1, 2:3, 3:2, 3:4, 4:3, 4:5, 5:4, 9:16, 16:9, 21:9
222
- number_of_images: 1, // 可选: 1-4
223
222
  });
224
223
 
225
224
  // 高级用法(使用 Gemini 3 Pro - 专业模型,支持高分辨率)
@@ -228,7 +227,6 @@ const result2 = await client.generate({
228
227
  model: 'gemini-3-pro-image-preview', // 专业级模型
229
228
  aspect_ratio: '16:9',
230
229
  image_size: '2K', // 可选: 1K, 2K, 4K(仅适用于 gemini-3-pro-image-preview)
231
- number_of_images: 1,
232
230
  response_modalities: ['IMAGE'], // 仅返回图片,不返回文本
233
231
  temperature: 0.7, // 可选: 0.0-2.0
234
232
  max_output_tokens: 1000, // 可选
@@ -244,12 +242,11 @@ const result3 = await aihubmixClient.generate({
244
242
  model: 'gemini-3-pro-image-preview',
245
243
  aspect_ratio: '16:9',
246
244
  image_size: '1K',
247
- number_of_images: 1,
248
245
  response_modalities: ['IMAGE'],
249
246
  });
250
247
 
251
- // 图像编辑(使用文本提示编辑图片)
252
- const editResult = await client.edit({
248
+ // 单图输入(文本 + 单张图片,实现编辑效果)
249
+ const editResult = await client.generate({
253
250
  prompt: 'Add a small wizard hat on the cat\'s head',
254
251
  image: 'data:image/png;base64,iVBORw0KGgo...', // base64 编码的图片数据或 data URL
255
252
  model: 'gemini-2.5-flash-image',
@@ -283,13 +280,17 @@ const secondResponse = await client.chat({
283
280
  |------|------|------|--------|
284
281
  | `prompt` | `string` | 图像生成提示词(必需) | - |
285
282
  | `model` | `string` | 模型名称 | `gemini-2.0-flash-exp-image-generation` |
283
+ | `image` | `string \| string[]` | 输入图片(base64 或 data URL)。可以是单个图片或图片数组(多图输入) | - |
286
284
  | `aspect_ratio` | `string` | 宽高比 | - |
287
285
  | `image_size` | `string` | 图片大小(仅 gemini-3-pro-image-preview) | - |
288
- | `number_of_images` | `number` | 生成图片数量 | `1` |
289
286
  | `response_modalities` | `array` | 响应模态 | `['TEXT', 'IMAGE']` |
290
287
  | `temperature` | `number` | 温度参数 | `0.7` |
291
288
  | `max_output_tokens` | `number` | 最大输出 token 数 | `1000` |
292
289
 
290
+ **多图输入限制:**
291
+ - `gemini-2.5-flash-image`: 最多支持 3 张输入图片
292
+ - `gemini-3-pro-image-preview`: 最多支持 14 张输入图片(其中最多 5 张高保真图片)
293
+
293
294
  **支持的宽高比:** `1:1`, `2:3`, `3:2`, `3:4`, `4:3`, `4:5`, `5:4`, `9:16`, `16:9`, `21:9`
294
295
 
295
296
  **Provider 说明:**
@@ -304,11 +305,7 @@ const secondResponse = await client.chat({
304
305
  | `gemini-2.5-flash-image` | 1024px | 快速、高效、成本低 | 日常使用、批量生成 |
305
306
  | `gemini-3-pro-image-preview` | 1K/2K/4K | 专业级、高分辨率、高级功能 | 专业设计、高分辨率需求 |
306
307
 
307
- **图像编辑和多轮修改:**
308
-
309
- - **`edit()`** - 图像编辑:使用文本提示编辑图片,支持添加、移除或修改元素
310
- - 需要提供输入图片(base64 编码或 data URL)
311
- - 适用于单次编辑操作
308
+ **多轮图片修改:**
312
309
 
313
310
  - **`chat()`** - 多轮图片修改:通过对话迭代式优化图片
314
311
  - 首次调用创建新的聊天会话,返回 `chat_id`
@@ -664,12 +661,11 @@ import { GeminiImageGenerationClient } from 'ai-world-sdk';
664
661
 
665
662
  const client = new GeminiImageGenerationClient({});
666
663
 
667
- // 使用 Gemini 2.5 Flash(快速模型)
664
+ // 1. 文生图(仅文本提示)
668
665
  const result = await client.generate({
669
666
  prompt: 'A beautiful sunset over the ocean',
670
667
  model: 'gemini-2.5-flash-image',
671
668
  aspect_ratio: '16:9',
672
- number_of_images: 1,
673
669
  });
674
670
 
675
671
  console.log('图像 URL:', result.data[0]?.url || 'Base64 编码');
@@ -677,8 +673,32 @@ if (result.text) {
677
673
  console.log('图像描述:', result.text);
678
674
  }
679
675
 
680
- // 使用 Gemini 3 Pro(专业模型,支持高分辨率)
676
+ // 2. 单图输入(文本 + 单张图片)
681
677
  const result2 = await client.generate({
678
+ prompt: 'Create a picture of my cat eating a nano-banana in a fancy restaurant',
679
+ image: 'data:image/png;base64,iVBORw0KGgo...', // base64 编码的图片或 data URL
680
+ model: 'gemini-2.5-flash-image',
681
+ aspect_ratio: '16:9',
682
+ });
683
+
684
+ // 3. 多图输入(文本 + 多张图片)
685
+ // gemini-2.5-flash-image 最多支持 3 张图片
686
+ // gemini-3-pro-image-preview 最多支持 14 张图片
687
+ const result3 = await client.generate({
688
+ prompt: 'An office group photo of these people, they are making funny faces.',
689
+ image: [
690
+ 'data:image/png;base64,iVBORw0KGgo...', // 第一张图片
691
+ 'data:image/png;base64,iVBORw0KGgo...', // 第二张图片
692
+ 'data:image/png;base64,iVBORw0KGgo...', // 第三张图片
693
+ ],
694
+ model: 'gemini-3-pro-image-preview',
695
+ aspect_ratio: '5:4',
696
+ image_size: '2K',
697
+ response_modalities: ['IMAGE'],
698
+ });
699
+
700
+ // 4. 使用 Gemini 3 Pro(专业模型,支持高分辨率)
701
+ const result4 = await client.generate({
682
702
  prompt: 'A futuristic city at night',
683
703
  model: 'gemini-3-pro-image-preview',
684
704
  aspect_ratio: '21:9', // 超宽屏
@@ -686,7 +706,7 @@ const result2 = await client.generate({
686
706
  response_modalities: ['IMAGE'], // 仅返回图片
687
707
  });
688
708
 
689
- console.log('4K 图像:', result2.data[0]?.b64_json ? 'Base64 编码' : result2.data[0]?.url);
709
+ console.log('4K 图像:', result4.data[0]?.b64_json ? 'Base64 编码' : result4.data[0]?.url);
690
710
  ```
691
711
 
692
712
  ### 视频生成工作流
@@ -557,14 +557,13 @@ describe("Langchain SDK Tests", () => {
557
557
  expect(Array.isArray(message.content)).toBe(true);
558
558
  console.log("✅ HumanMessage with array content 测试成功");
559
559
  });
560
- test("GeminiImageGenerationClient - 基础图像生成", async () => {
560
+ test("GeminiImageGenerationClient - 基础图像生成(文生图)", async () => {
561
561
  const imageClient = new index_1.GeminiImageGenerationClient({});
562
562
  const result = await imageClient.generate({
563
563
  prompt: 'A beautiful sunset over the ocean',
564
564
  model: 'gemini-3-pro-image-preview',
565
565
  aspect_ratio: '16:9',
566
566
  image_size: '1K', // 仅适用于 gemini-3-pro-image-preview
567
- number_of_images: 1,
568
567
  response_modalities: ['IMAGE'], // 仅返回图片
569
568
  });
570
569
  expect(result).toBeDefined();
@@ -577,13 +576,72 @@ describe("Langchain SDK Tests", () => {
577
576
  expect(item).toBeDefined();
578
577
  expect(item.url || item.b64_json).toBeDefined();
579
578
  });
580
- console.log("✅ GeminiImageGenerationClient 基础测试成功");
579
+ console.log("✅ GeminiImageGenerationClient 基础测试成功(文生图)");
581
580
  console.log(`生成图像数量: ${result.data.length}`);
582
581
  console.log("图像 URL:", result.data[0]?.url || ("Base64 编码" + result.data[0]?.b64_json));
583
582
  if (result.text) {
584
583
  console.log("图像描述:", result.text);
585
584
  }
586
585
  }, 120000);
586
+ test("GeminiImageGenerationClient - 单图输入图像生成", async () => {
587
+ const imageClient = new index_1.GeminiImageGenerationClient({});
588
+ // 先生成一张基础图片
589
+ const baseResult = await imageClient.generate({
590
+ prompt: 'A photorealistic picture of a fluffy ginger cat sitting on a wooden floor',
591
+ model: 'gemini-2.5-flash-image',
592
+ aspect_ratio: '1:1',
593
+ response_modalities: ['IMAGE'],
594
+ });
595
+ expect(baseResult.data.length).toBeGreaterThan(0);
596
+ const baseImage = baseResult.data[0]?.b64_json || baseResult.data[0]?.url;
597
+ expect(baseImage).toBeDefined();
598
+ // 使用生成的图片作为输入
599
+ const result = await imageClient.generate({
600
+ prompt: 'Using the provided image of my cat, please add a small, knitted wizard hat on its head',
601
+ image: baseImage,
602
+ model: 'gemini-2.5-flash-image',
603
+ aspect_ratio: '1:1',
604
+ response_modalities: ['IMAGE'],
605
+ });
606
+ expect(result).toBeDefined();
607
+ expect(result.data).toBeDefined();
608
+ expect(result.data.length).toBeGreaterThan(0);
609
+ console.log("✅ GeminiImageGenerationClient 单图输入测试成功");
610
+ }, 120000);
611
+ test("GeminiImageGenerationClient - 多图输入图像生成", async () => {
612
+ const imageClient = new index_1.GeminiImageGenerationClient({});
613
+ // 先生成两张基础图片
614
+ const baseResult1 = await imageClient.generate({
615
+ prompt: 'A professional headshot of a woman with brown hair and blue eyes',
616
+ model: 'gemini-2.5-flash-image',
617
+ aspect_ratio: '1:1',
618
+ response_modalities: ['IMAGE'],
619
+ });
620
+ const baseResult2 = await imageClient.generate({
621
+ prompt: 'A simple, modern logo with the letters G and A in a white circle',
622
+ model: 'gemini-2.5-flash-image',
623
+ aspect_ratio: '1:1',
624
+ response_modalities: ['IMAGE'],
625
+ });
626
+ expect(baseResult1.data.length).toBeGreaterThan(0);
627
+ expect(baseResult2.data.length).toBeGreaterThan(0);
628
+ const image1 = baseResult1.data[0]?.b64_json || baseResult1.data[0]?.url;
629
+ const image2 = baseResult2.data[0]?.b64_json || baseResult2.data[0]?.url;
630
+ expect(image1).toBeDefined();
631
+ expect(image2).toBeDefined();
632
+ // 使用多张图片作为输入(gemini-2.5-flash-image 最多支持 3 张)
633
+ const result = await imageClient.generate({
634
+ prompt: 'Take the first image of the woman and add the logo from the second image onto her black t-shirt',
635
+ image: [image1, image2],
636
+ model: 'gemini-2.5-flash-image',
637
+ aspect_ratio: '1:1',
638
+ response_modalities: ['IMAGE'],
639
+ });
640
+ expect(result).toBeDefined();
641
+ expect(result.data).toBeDefined();
642
+ expect(result.data.length).toBeGreaterThan(0);
643
+ console.log("✅ GeminiImageGenerationClient 多图输入测试成功");
644
+ }, 180000);
587
645
  test("GeminiImageGenerationClient - 使用 aihubmix provider", async () => {
588
646
  const imageClient = new index_1.GeminiImageGenerationClient({
589
647
  provider: "aihubmix",
@@ -593,7 +651,6 @@ describe("Langchain SDK Tests", () => {
593
651
  model: 'gemini-3-pro-image-preview',
594
652
  aspect_ratio: '16:9',
595
653
  image_size: '1K',
596
- number_of_images: 1,
597
654
  response_modalities: ['IMAGE'], // 仅返回图片
598
655
  });
599
656
  expect(result).toBeDefined();
@@ -613,54 +670,6 @@ describe("Langchain SDK Tests", () => {
613
670
  console.log("图像描述:", result.text);
614
671
  }
615
672
  }, 120000);
616
- test("GeminiImageGenerationClient - 图像编辑", async () => {
617
- const imageClient = new index_1.GeminiImageGenerationClient({});
618
- // 首先生成一张基础图片
619
- const generateResult = await imageClient.generate({
620
- prompt: 'A simple red apple on a white background',
621
- model: 'gemini-2.5-flash-image',
622
- aspect_ratio: '1:1',
623
- response_modalities: ['IMAGE'],
624
- });
625
- expect(generateResult).toBeDefined();
626
- expect(generateResult.data).toBeDefined();
627
- expect(generateResult.data.length).toBeGreaterThan(0);
628
- // 获取第一张图片的 base64 数据
629
- const firstImage = generateResult.data[0];
630
- expect(firstImage).toBeDefined();
631
- // 提取 base64 数据(优先使用 b64_json,否则从 text 中提取)
632
- let base64Image;
633
- if (firstImage.b64_json) {
634
- base64Image = firstImage.b64_json;
635
- }
636
- else if (firstImage.text && firstImage.text.startsWith('data:')) {
637
- // 从 data URL 中提取 base64 部分
638
- base64Image = firstImage.text.split(',')[1];
639
- }
640
- else {
641
- throw new Error('无法获取图片的 base64 数据');
642
- }
643
- expect(base64Image).toBeDefined();
644
- // 使用编辑功能添加元素
645
- const editResult = await imageClient.edit({
646
- prompt: 'Add a small green leaf on top of the apple',
647
- image: base64Image, // 直接使用 base64 字符串
648
- model: 'gemini-2.5-flash-image',
649
- aspect_ratio: '1:1',
650
- response_modalities: ['IMAGE'],
651
- });
652
- expect(editResult).toBeDefined();
653
- expect(editResult.created).toBeDefined();
654
- expect(editResult.data).toBeDefined();
655
- expect(Array.isArray(editResult.data)).toBe(true);
656
- expect(editResult.data.length).toBeGreaterThan(0);
657
- editResult.data.forEach((item) => {
658
- expect(item).toBeDefined();
659
- expect(item.url || item.b64_json).toBeDefined();
660
- });
661
- console.log("✅ GeminiImageGenerationClient 图像编辑测试成功");
662
- console.log(`编辑后图像数量: ${editResult.data.length}`);
663
- }, 180000);
664
673
  test("GeminiImageGenerationClient - 多轮图片修改", async () => {
665
674
  const imageClient = new index_1.GeminiImageGenerationClient({});
666
675
  // 第一轮:创建初始图片
@@ -10,7 +10,7 @@ export interface GeminiImageGenerationConfig {
10
10
  export interface GeminiImageGenerationRequest {
11
11
  prompt: string;
12
12
  model?: string;
13
- number_of_images?: number;
13
+ image?: string | string[];
14
14
  aspect_ratio?: "1:1" | "2:3" | "3:2" | "3:4" | "4:3" | "4:5" | "5:4" | "9:16" | "16:9" | "21:9";
15
15
  image_size?: "1K" | "2K" | "4K";
16
16
  temperature?: number;
@@ -28,16 +28,6 @@ export interface GeminiImageGenerationResponse {
28
28
  data: GeminiImageData[];
29
29
  text?: string;
30
30
  }
31
- export interface GeminiImageEditRequest {
32
- prompt: string;
33
- image: string;
34
- model?: string;
35
- provider?: "aihubmix" | "gemini";
36
- aspect_ratio?: "1:1" | "2:3" | "3:2" | "3:4" | "4:3" | "4:5" | "5:4" | "9:16" | "16:9" | "21:9";
37
- image_size?: "1K" | "2K" | "4K";
38
- response_modalities?: ("TEXT" | "IMAGE")[];
39
- user?: string;
40
- }
41
31
  export interface GeminiImageChatRequest {
42
32
  message: string;
43
33
  chat_id?: string;
@@ -66,22 +56,8 @@ export declare class GeminiImageGenerationClient {
66
56
  * - aspect_ratio: 宽高比,支持 1:1, 2:3, 3:2, 3:4, 4:3, 4:5, 5:4, 9:16, 16:9, 21:9
67
57
  * - image_size: 图片大小(仅 gemini-3-pro-image-preview),支持 1K, 2K, 4K
68
58
  * - response_modalities: 响应模态,["TEXT", "IMAGE"] 或 ["IMAGE"]
69
- * - number_of_images: 生成图片数量(1-4)
70
59
  */
71
60
  generate(request: GeminiImageGenerationRequest): Promise<GeminiImageGenerationResponse>;
72
- /**
73
- * Edit images
74
- * 编辑图像
75
- *
76
- * 使用文本提示编辑图片,支持添加、移除或修改元素
77
- *
78
- * 支持的参数:
79
- * - image: base64 编码的图片数据,或 data URL(如 data:image/png;base64,...)
80
- * - aspect_ratio: 宽高比
81
- * - image_size: 图片大小(仅 gemini-3-pro-image-preview)
82
- * - response_modalities: 响应模态
83
- */
84
- edit(request: GeminiImageEditRequest): Promise<GeminiImageGenerationResponse>;
85
61
  /**
86
62
  * Chat with images (multi-turn image editing)
87
63
  * 图像多轮对话(用于多轮图片修改)
@@ -33,14 +33,16 @@ class GeminiImageGenerationClient {
33
33
  * - aspect_ratio: 宽高比,支持 1:1, 2:3, 3:2, 3:4, 4:3, 4:5, 5:4, 9:16, 16:9, 21:9
34
34
  * - image_size: 图片大小(仅 gemini-3-pro-image-preview),支持 1K, 2K, 4K
35
35
  * - response_modalities: 响应模态,["TEXT", "IMAGE"] 或 ["IMAGE"]
36
- * - number_of_images: 生成图片数量(1-4)
37
36
  */
38
37
  async generate(request) {
39
38
  const requestBody = {
40
39
  prompt: request.prompt,
41
40
  model: request.model || "gemini-2.0-flash-exp-image-generation",
42
- number_of_images: request.number_of_images || 1,
43
41
  };
42
+ // 添加图片输入参数(单图或多图)
43
+ if (request.image) {
44
+ requestBody.image = request.image;
45
+ }
44
46
  // 添加可选参数
45
47
  if (request.aspect_ratio) {
46
48
  requestBody.aspect_ratio = request.aspect_ratio;
@@ -76,56 +78,6 @@ class GeminiImageGenerationClient {
76
78
  (0, log_1.logResponse)(response.status, response.statusText, response.headers, data);
77
79
  return data;
78
80
  }
79
- /**
80
- * Edit images
81
- * 编辑图像
82
- *
83
- * 使用文本提示编辑图片,支持添加、移除或修改元素
84
- *
85
- * 支持的参数:
86
- * - image: base64 编码的图片数据,或 data URL(如 data:image/png;base64,...)
87
- * - aspect_ratio: 宽高比
88
- * - image_size: 图片大小(仅 gemini-3-pro-image-preview)
89
- * - response_modalities: 响应模态
90
- */
91
- async edit(request) {
92
- const requestBody = {
93
- prompt: request.prompt,
94
- image: request.image,
95
- model: request.model || "gemini-2.5-flash-image",
96
- };
97
- // 添加可选参数
98
- if (request.aspect_ratio) {
99
- requestBody.aspect_ratio = request.aspect_ratio;
100
- }
101
- if (request.image_size) {
102
- requestBody.image_size = request.image_size;
103
- }
104
- if (request.response_modalities) {
105
- requestBody.response_modalities = request.response_modalities;
106
- }
107
- if (request.user) {
108
- requestBody.user = request.user;
109
- }
110
- if (request.provider) {
111
- requestBody.provider = request.provider;
112
- }
113
- const url = `${config_1.sdkConfig.getServerUrl()}/api/gemini-image-proxy/edit`;
114
- (0, log_1.logRequest)("POST", url, this.headers, { ...requestBody, image: "[base64 data]" });
115
- const response = await fetch(url, {
116
- method: "POST",
117
- headers: this.headers,
118
- body: JSON.stringify(requestBody),
119
- });
120
- if (!response.ok) {
121
- const errorText = await response.text();
122
- (0, log_1.logResponse)(response.status, response.statusText, response.headers, errorText);
123
- throw new Error(`Gemini image edit API error: ${response.status} ${errorText}`);
124
- }
125
- const data = (await response.json());
126
- (0, log_1.logResponse)(response.status, response.statusText, response.headers, data);
127
- return data;
128
- }
129
81
  /**
130
82
  * Chat with images (multi-turn image editing)
131
83
  * 图像多轮对话(用于多轮图片修改)
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "ai-world-sdk",
3
- "version": "1.0.10",
3
+ "version": "1.0.12",
4
4
  "description": "TypeScript SDK for AI World Platform - Chat Models, Image Generation, and Video Generation",
5
5
  "main": "dist/index.js",
6
6
  "types": "dist/index.d.ts",