ai-world-sdk 1.0.10 → 1.0.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -248,8 +248,8 @@ const result3 = await aihubmixClient.generate({
248
248
  response_modalities: ['IMAGE'],
249
249
  });
250
250
 
251
- // 图像编辑(使用文本提示编辑图片)
252
- const editResult = await client.edit({
251
+ // 单图输入(文本 + 单张图片,实现编辑效果)
252
+ const editResult = await client.generate({
253
253
  prompt: 'Add a small wizard hat on the cat\'s head',
254
254
  image: 'data:image/png;base64,iVBORw0KGgo...', // base64 编码的图片数据或 data URL
255
255
  model: 'gemini-2.5-flash-image',
@@ -283,6 +283,7 @@ const secondResponse = await client.chat({
283
283
  |------|------|------|--------|
284
284
  | `prompt` | `string` | 图像生成提示词(必需) | - |
285
285
  | `model` | `string` | 模型名称 | `gemini-2.0-flash-exp-image-generation` |
286
+ | `image` | `string \| string[]` | 输入图片(base64 或 data URL)。可以是单个图片或图片数组(多图输入) | - |
286
287
  | `aspect_ratio` | `string` | 宽高比 | - |
287
288
  | `image_size` | `string` | 图片大小(仅 gemini-3-pro-image-preview) | - |
288
289
  | `number_of_images` | `number` | 生成图片数量 | `1` |
@@ -290,6 +291,10 @@ const secondResponse = await client.chat({
290
291
  | `temperature` | `number` | 温度参数 | `0.7` |
291
292
  | `max_output_tokens` | `number` | 最大输出 token 数 | `1000` |
292
293
 
294
+ **多图输入限制:**
295
+ - `gemini-2.5-flash-image`: 最多支持 3 张输入图片
296
+ - `gemini-3-pro-image-preview`: 最多支持 14 张输入图片(其中最多 5 张高保真图片)
297
+
293
298
  **支持的宽高比:** `1:1`, `2:3`, `3:2`, `3:4`, `4:3`, `4:5`, `5:4`, `9:16`, `16:9`, `21:9`
294
299
 
295
300
  **Provider 说明:**
@@ -304,11 +309,7 @@ const secondResponse = await client.chat({
304
309
  | `gemini-2.5-flash-image` | 1024px | 快速、高效、成本低 | 日常使用、批量生成 |
305
310
  | `gemini-3-pro-image-preview` | 1K/2K/4K | 专业级、高分辨率、高级功能 | 专业设计、高分辨率需求 |
306
311
 
307
- **图像编辑和多轮修改:**
308
-
309
- - **`edit()`** - 图像编辑:使用文本提示编辑图片,支持添加、移除或修改元素
310
- - 需要提供输入图片(base64 编码或 data URL)
311
- - 适用于单次编辑操作
312
+ **多轮图片修改:**
312
313
 
313
314
  - **`chat()`** - 多轮图片修改:通过对话迭代式优化图片
314
315
  - 首次调用创建新的聊天会话,返回 `chat_id`
@@ -664,7 +665,7 @@ import { GeminiImageGenerationClient } from 'ai-world-sdk';
664
665
 
665
666
  const client = new GeminiImageGenerationClient({});
666
667
 
667
- // 使用 Gemini 2.5 Flash(快速模型)
668
+ // 1. 文生图(仅文本提示)
668
669
  const result = await client.generate({
669
670
  prompt: 'A beautiful sunset over the ocean',
670
671
  model: 'gemini-2.5-flash-image',
@@ -677,8 +678,32 @@ if (result.text) {
677
678
  console.log('图像描述:', result.text);
678
679
  }
679
680
 
680
- // 使用 Gemini 3 Pro(专业模型,支持高分辨率)
681
+ // 2. 单图输入(文本 + 单张图片)
681
682
  const result2 = await client.generate({
683
+ prompt: 'Create a picture of my cat eating a nano-banana in a fancy restaurant',
684
+ image: 'data:image/png;base64,iVBORw0KGgo...', // base64 编码的图片或 data URL
685
+ model: 'gemini-2.5-flash-image',
686
+ aspect_ratio: '16:9',
687
+ });
688
+
689
+ // 3. 多图输入(文本 + 多张图片)
690
+ // gemini-2.5-flash-image 最多支持 3 张图片
691
+ // gemini-3-pro-image-preview 最多支持 14 张图片
692
+ const result3 = await client.generate({
693
+ prompt: 'An office group photo of these people, they are making funny faces.',
694
+ image: [
695
+ 'data:image/png;base64,iVBORw0KGgo...', // 第一张图片
696
+ 'data:image/png;base64,iVBORw0KGgo...', // 第二张图片
697
+ 'data:image/png;base64,iVBORw0KGgo...', // 第三张图片
698
+ ],
699
+ model: 'gemini-3-pro-image-preview',
700
+ aspect_ratio: '5:4',
701
+ image_size: '2K',
702
+ response_modalities: ['IMAGE'],
703
+ });
704
+
705
+ // 4. 使用 Gemini 3 Pro(专业模型,支持高分辨率)
706
+ const result4 = await client.generate({
682
707
  prompt: 'A futuristic city at night',
683
708
  model: 'gemini-3-pro-image-preview',
684
709
  aspect_ratio: '21:9', // 超宽屏
@@ -686,7 +711,7 @@ const result2 = await client.generate({
686
711
  response_modalities: ['IMAGE'], // 仅返回图片
687
712
  });
688
713
 
689
- console.log('4K 图像:', result2.data[0]?.b64_json ? 'Base64 编码' : result2.data[0]?.url);
714
+ console.log('4K 图像:', result4.data[0]?.b64_json ? 'Base64 编码' : result4.data[0]?.url);
690
715
  ```
691
716
 
692
717
  ### 视频生成工作流
@@ -557,7 +557,7 @@ describe("Langchain SDK Tests", () => {
557
557
  expect(Array.isArray(message.content)).toBe(true);
558
558
  console.log("✅ HumanMessage with array content 测试成功");
559
559
  });
560
- test("GeminiImageGenerationClient - 基础图像生成", async () => {
560
+ test("GeminiImageGenerationClient - 基础图像生成(文生图)", async () => {
561
561
  const imageClient = new index_1.GeminiImageGenerationClient({});
562
562
  const result = await imageClient.generate({
563
563
  prompt: 'A beautiful sunset over the ocean',
@@ -577,13 +577,72 @@ describe("Langchain SDK Tests", () => {
577
577
  expect(item).toBeDefined();
578
578
  expect(item.url || item.b64_json).toBeDefined();
579
579
  });
580
- console.log("✅ GeminiImageGenerationClient 基础测试成功");
580
+ console.log("✅ GeminiImageGenerationClient 基础测试成功(文生图)");
581
581
  console.log(`生成图像数量: ${result.data.length}`);
582
582
  console.log("图像 URL:", result.data[0]?.url || ("Base64 编码" + result.data[0]?.b64_json));
583
583
  if (result.text) {
584
584
  console.log("图像描述:", result.text);
585
585
  }
586
586
  }, 120000);
587
+ test("GeminiImageGenerationClient - 单图输入图像生成", async () => {
588
+ const imageClient = new index_1.GeminiImageGenerationClient({});
589
+ // 先生成一张基础图片
590
+ const baseResult = await imageClient.generate({
591
+ prompt: 'A photorealistic picture of a fluffy ginger cat sitting on a wooden floor',
592
+ model: 'gemini-2.5-flash-image',
593
+ aspect_ratio: '1:1',
594
+ response_modalities: ['IMAGE'],
595
+ });
596
+ expect(baseResult.data.length).toBeGreaterThan(0);
597
+ const baseImage = baseResult.data[0]?.b64_json || baseResult.data[0]?.url;
598
+ expect(baseImage).toBeDefined();
599
+ // 使用生成的图片作为输入
600
+ const result = await imageClient.generate({
601
+ prompt: 'Using the provided image of my cat, please add a small, knitted wizard hat on its head',
602
+ image: baseImage,
603
+ model: 'gemini-2.5-flash-image',
604
+ aspect_ratio: '1:1',
605
+ response_modalities: ['IMAGE'],
606
+ });
607
+ expect(result).toBeDefined();
608
+ expect(result.data).toBeDefined();
609
+ expect(result.data.length).toBeGreaterThan(0);
610
+ console.log("✅ GeminiImageGenerationClient 单图输入测试成功");
611
+ }, 120000);
612
+ test("GeminiImageGenerationClient - 多图输入图像生成", async () => {
613
+ const imageClient = new index_1.GeminiImageGenerationClient({});
614
+ // 先生成两张基础图片
615
+ const baseResult1 = await imageClient.generate({
616
+ prompt: 'A professional headshot of a woman with brown hair and blue eyes',
617
+ model: 'gemini-2.5-flash-image',
618
+ aspect_ratio: '1:1',
619
+ response_modalities: ['IMAGE'],
620
+ });
621
+ const baseResult2 = await imageClient.generate({
622
+ prompt: 'A simple, modern logo with the letters G and A in a white circle',
623
+ model: 'gemini-2.5-flash-image',
624
+ aspect_ratio: '1:1',
625
+ response_modalities: ['IMAGE'],
626
+ });
627
+ expect(baseResult1.data.length).toBeGreaterThan(0);
628
+ expect(baseResult2.data.length).toBeGreaterThan(0);
629
+ const image1 = baseResult1.data[0]?.b64_json || baseResult1.data[0]?.url;
630
+ const image2 = baseResult2.data[0]?.b64_json || baseResult2.data[0]?.url;
631
+ expect(image1).toBeDefined();
632
+ expect(image2).toBeDefined();
633
+ // 使用多张图片作为输入(gemini-2.5-flash-image 最多支持 3 张)
634
+ const result = await imageClient.generate({
635
+ prompt: 'Take the first image of the woman and add the logo from the second image onto her black t-shirt',
636
+ image: [image1, image2],
637
+ model: 'gemini-2.5-flash-image',
638
+ aspect_ratio: '1:1',
639
+ response_modalities: ['IMAGE'],
640
+ });
641
+ expect(result).toBeDefined();
642
+ expect(result.data).toBeDefined();
643
+ expect(result.data.length).toBeGreaterThan(0);
644
+ console.log("✅ GeminiImageGenerationClient 多图输入测试成功");
645
+ }, 180000);
587
646
  test("GeminiImageGenerationClient - 使用 aihubmix provider", async () => {
588
647
  const imageClient = new index_1.GeminiImageGenerationClient({
589
648
  provider: "aihubmix",
@@ -613,54 +672,6 @@ describe("Langchain SDK Tests", () => {
613
672
  console.log("图像描述:", result.text);
614
673
  }
615
674
  }, 120000);
616
- test("GeminiImageGenerationClient - 图像编辑", async () => {
617
- const imageClient = new index_1.GeminiImageGenerationClient({});
618
- // 首先生成一张基础图片
619
- const generateResult = await imageClient.generate({
620
- prompt: 'A simple red apple on a white background',
621
- model: 'gemini-2.5-flash-image',
622
- aspect_ratio: '1:1',
623
- response_modalities: ['IMAGE'],
624
- });
625
- expect(generateResult).toBeDefined();
626
- expect(generateResult.data).toBeDefined();
627
- expect(generateResult.data.length).toBeGreaterThan(0);
628
- // 获取第一张图片的 base64 数据
629
- const firstImage = generateResult.data[0];
630
- expect(firstImage).toBeDefined();
631
- // 提取 base64 数据(优先使用 b64_json,否则从 text 中提取)
632
- let base64Image;
633
- if (firstImage.b64_json) {
634
- base64Image = firstImage.b64_json;
635
- }
636
- else if (firstImage.text && firstImage.text.startsWith('data:')) {
637
- // 从 data URL 中提取 base64 部分
638
- base64Image = firstImage.text.split(',')[1];
639
- }
640
- else {
641
- throw new Error('无法获取图片的 base64 数据');
642
- }
643
- expect(base64Image).toBeDefined();
644
- // 使用编辑功能添加元素
645
- const editResult = await imageClient.edit({
646
- prompt: 'Add a small green leaf on top of the apple',
647
- image: base64Image, // 直接使用 base64 字符串
648
- model: 'gemini-2.5-flash-image',
649
- aspect_ratio: '1:1',
650
- response_modalities: ['IMAGE'],
651
- });
652
- expect(editResult).toBeDefined();
653
- expect(editResult.created).toBeDefined();
654
- expect(editResult.data).toBeDefined();
655
- expect(Array.isArray(editResult.data)).toBe(true);
656
- expect(editResult.data.length).toBeGreaterThan(0);
657
- editResult.data.forEach((item) => {
658
- expect(item).toBeDefined();
659
- expect(item.url || item.b64_json).toBeDefined();
660
- });
661
- console.log("✅ GeminiImageGenerationClient 图像编辑测试成功");
662
- console.log(`编辑后图像数量: ${editResult.data.length}`);
663
- }, 180000);
664
675
  test("GeminiImageGenerationClient - 多轮图片修改", async () => {
665
676
  const imageClient = new index_1.GeminiImageGenerationClient({});
666
677
  // 第一轮:创建初始图片
@@ -10,6 +10,7 @@ export interface GeminiImageGenerationConfig {
10
10
  export interface GeminiImageGenerationRequest {
11
11
  prompt: string;
12
12
  model?: string;
13
+ image?: string | string[];
13
14
  number_of_images?: number;
14
15
  aspect_ratio?: "1:1" | "2:3" | "3:2" | "3:4" | "4:3" | "4:5" | "5:4" | "9:16" | "16:9" | "21:9";
15
16
  image_size?: "1K" | "2K" | "4K";
@@ -28,16 +29,6 @@ export interface GeminiImageGenerationResponse {
28
29
  data: GeminiImageData[];
29
30
  text?: string;
30
31
  }
31
- export interface GeminiImageEditRequest {
32
- prompt: string;
33
- image: string;
34
- model?: string;
35
- provider?: "aihubmix" | "gemini";
36
- aspect_ratio?: "1:1" | "2:3" | "3:2" | "3:4" | "4:3" | "4:5" | "5:4" | "9:16" | "16:9" | "21:9";
37
- image_size?: "1K" | "2K" | "4K";
38
- response_modalities?: ("TEXT" | "IMAGE")[];
39
- user?: string;
40
- }
41
32
  export interface GeminiImageChatRequest {
42
33
  message: string;
43
34
  chat_id?: string;
@@ -69,19 +60,6 @@ export declare class GeminiImageGenerationClient {
69
60
  * - number_of_images: 生成图片数量(1-4)
70
61
  */
71
62
  generate(request: GeminiImageGenerationRequest): Promise<GeminiImageGenerationResponse>;
72
- /**
73
- * Edit images
74
- * 编辑图像
75
- *
76
- * 使用文本提示编辑图片,支持添加、移除或修改元素
77
- *
78
- * 支持的参数:
79
- * - image: base64 编码的图片数据,或 data URL(如 data:image/png;base64,...)
80
- * - aspect_ratio: 宽高比
81
- * - image_size: 图片大小(仅 gemini-3-pro-image-preview)
82
- * - response_modalities: 响应模态
83
- */
84
- edit(request: GeminiImageEditRequest): Promise<GeminiImageGenerationResponse>;
85
63
  /**
86
64
  * Chat with images (multi-turn image editing)
87
65
  * 图像多轮对话(用于多轮图片修改)
@@ -41,6 +41,10 @@ class GeminiImageGenerationClient {
41
41
  model: request.model || "gemini-2.0-flash-exp-image-generation",
42
42
  number_of_images: request.number_of_images || 1,
43
43
  };
44
+ // 添加图片输入参数(单图或多图)
45
+ if (request.image) {
46
+ requestBody.image = request.image;
47
+ }
44
48
  // 添加可选参数
45
49
  if (request.aspect_ratio) {
46
50
  requestBody.aspect_ratio = request.aspect_ratio;
@@ -76,56 +80,6 @@ class GeminiImageGenerationClient {
76
80
  (0, log_1.logResponse)(response.status, response.statusText, response.headers, data);
77
81
  return data;
78
82
  }
79
- /**
80
- * Edit images
81
- * 编辑图像
82
- *
83
- * 使用文本提示编辑图片,支持添加、移除或修改元素
84
- *
85
- * 支持的参数:
86
- * - image: base64 编码的图片数据,或 data URL(如 data:image/png;base64,...)
87
- * - aspect_ratio: 宽高比
88
- * - image_size: 图片大小(仅 gemini-3-pro-image-preview)
89
- * - response_modalities: 响应模态
90
- */
91
- async edit(request) {
92
- const requestBody = {
93
- prompt: request.prompt,
94
- image: request.image,
95
- model: request.model || "gemini-2.5-flash-image",
96
- };
97
- // 添加可选参数
98
- if (request.aspect_ratio) {
99
- requestBody.aspect_ratio = request.aspect_ratio;
100
- }
101
- if (request.image_size) {
102
- requestBody.image_size = request.image_size;
103
- }
104
- if (request.response_modalities) {
105
- requestBody.response_modalities = request.response_modalities;
106
- }
107
- if (request.user) {
108
- requestBody.user = request.user;
109
- }
110
- if (request.provider) {
111
- requestBody.provider = request.provider;
112
- }
113
- const url = `${config_1.sdkConfig.getServerUrl()}/api/gemini-image-proxy/edit`;
114
- (0, log_1.logRequest)("POST", url, this.headers, { ...requestBody, image: "[base64 data]" });
115
- const response = await fetch(url, {
116
- method: "POST",
117
- headers: this.headers,
118
- body: JSON.stringify(requestBody),
119
- });
120
- if (!response.ok) {
121
- const errorText = await response.text();
122
- (0, log_1.logResponse)(response.status, response.statusText, response.headers, errorText);
123
- throw new Error(`Gemini image edit API error: ${response.status} ${errorText}`);
124
- }
125
- const data = (await response.json());
126
- (0, log_1.logResponse)(response.status, response.statusText, response.headers, data);
127
- return data;
128
- }
129
83
  /**
130
84
  * Chat with images (multi-turn image editing)
131
85
  * 图像多轮对话(用于多轮图片修改)
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "ai-world-sdk",
3
- "version": "1.0.10",
3
+ "version": "1.0.11",
4
4
  "description": "TypeScript SDK for AI World Platform - Chat Models, Image Generation, and Video Generation",
5
5
  "main": "dist/index.js",
6
6
  "types": "dist/index.d.ts",