ai-world-sdk 1.0.8 → 1.0.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -247,6 +247,34 @@ const result3 = await aihubmixClient.generate({
247
247
  number_of_images: 1,
248
248
  response_modalities: ['IMAGE'],
249
249
  });
250
+
251
+ // 图像编辑(使用文本提示编辑图片)
252
+ const editResult = await client.edit({
253
+ prompt: 'Add a small wizard hat on the cat\'s head',
254
+ image: 'data:image/png;base64,iVBORw0KGgo...', // base64 编码的图片数据或 data URL
255
+ model: 'gemini-2.5-flash-image',
256
+ aspect_ratio: '1:1',
257
+ response_modalities: ['IMAGE'],
258
+ });
259
+
260
+ // 多轮图片修改(迭代式优化图片)
261
+ // 第一轮:创建初始图片
262
+ const firstResponse = await client.chat({
263
+ message: 'Create a vibrant infographic about photosynthesis',
264
+ model: 'gemini-3-pro-image-preview',
265
+ aspect_ratio: '16:9',
266
+ response_modalities: ['TEXT', 'IMAGE'],
267
+ });
268
+
269
+ // 后续轮次:修改图片(使用返回的 chat_id)
270
+ const secondResponse = await client.chat({
271
+ chat_id: firstResponse.chat_id,
272
+ message: 'Update this infographic to be in Spanish',
273
+ model: 'gemini-3-pro-image-preview',
274
+ aspect_ratio: '16:9',
275
+ image_size: '2K',
276
+ response_modalities: ['TEXT', 'IMAGE'],
277
+ });
250
278
  ```
251
279
 
252
280
  **参数说明:**
@@ -276,6 +304,17 @@ const result3 = await aihubmixClient.generate({
276
304
  | `gemini-2.5-flash-image` | 1024px | 快速、高效、成本低 | 日常使用、批量生成 |
277
305
  | `gemini-3-pro-image-preview` | 1K/2K/4K | 专业级、高分辨率、高级功能 | 专业设计、高分辨率需求 |
278
306
 
307
+ **图像编辑和多轮修改:**
308
+
309
+ - **`edit()`** - 图像编辑:使用文本提示编辑图片,支持添加、移除或修改元素
310
+ - 需要提供输入图片(base64 编码或 data URL)
311
+ - 适用于单次编辑操作
312
+
313
+ - **`chat()`** - 多轮图片修改:通过对话迭代式优化图片
314
+ - 首次调用创建新的聊天会话,返回 `chat_id`
315
+ - 后续调用使用 `chat_id` 继续对话
316
+ - 推荐使用 `gemini-3-pro-image-preview` 模型进行多轮编辑
317
+
279
318
  ### 视频生成
280
319
 
281
320
  #### VideoGenerationClient
@@ -610,6 +610,92 @@ describe("Langchain SDK Tests", () => {
610
610
  console.log("图像描述:", result.text);
611
611
  }
612
612
  }, 120000);
613
+ test("GeminiImageGenerationClient - 图像编辑", async () => {
614
+ const imageClient = new index_1.GeminiImageGenerationClient({});
615
+ // 首先生成一张基础图片
616
+ const generateResult = await imageClient.generate({
617
+ prompt: 'A simple red apple on a white background',
618
+ model: 'gemini-2.5-flash-image',
619
+ aspect_ratio: '1:1',
620
+ response_modalities: ['IMAGE'],
621
+ });
622
+ expect(generateResult).toBeDefined();
623
+ expect(generateResult.data).toBeDefined();
624
+ expect(generateResult.data.length).toBeGreaterThan(0);
625
+ // 获取第一张图片的 base64 数据
626
+ const firstImage = generateResult.data[0];
627
+ expect(firstImage).toBeDefined();
628
+ // 提取 base64 数据(优先使用 b64_json,否则从 text 中提取)
629
+ let base64Image;
630
+ if (firstImage.b64_json) {
631
+ base64Image = firstImage.b64_json;
632
+ }
633
+ else if (firstImage.text && firstImage.text.startsWith('data:')) {
634
+ // 从 data URL 中提取 base64 部分
635
+ base64Image = firstImage.text.split(',')[1];
636
+ }
637
+ else {
638
+ throw new Error('无法获取图片的 base64 数据');
639
+ }
640
+ expect(base64Image).toBeDefined();
641
+ // 使用编辑功能添加元素
642
+ const editResult = await imageClient.edit({
643
+ prompt: 'Add a small green leaf on top of the apple',
644
+ image: base64Image, // 直接使用 base64 字符串
645
+ model: 'gemini-2.5-flash-image',
646
+ aspect_ratio: '1:1',
647
+ response_modalities: ['IMAGE'],
648
+ });
649
+ expect(editResult).toBeDefined();
650
+ expect(editResult.created).toBeDefined();
651
+ expect(editResult.data).toBeDefined();
652
+ expect(Array.isArray(editResult.data)).toBe(true);
653
+ expect(editResult.data.length).toBeGreaterThan(0);
654
+ editResult.data.forEach((item) => {
655
+ expect(item).toBeDefined();
656
+ expect(item.url || item.b64_json).toBeDefined();
657
+ });
658
+ console.log("✅ GeminiImageGenerationClient 图像编辑测试成功");
659
+ console.log(`编辑后图像数量: ${editResult.data.length}`);
660
+ }, 180000);
661
+ test("GeminiImageGenerationClient - 多轮图片修改", async () => {
662
+ const imageClient = new index_1.GeminiImageGenerationClient({});
663
+ // 第一轮:创建初始图片
664
+ const firstResponse = await imageClient.chat({
665
+ message: 'Create a vibrant infographic that explains photosynthesis as if it were a recipe for a plant\'s favorite food. Show the "ingredients" (sunlight, water, CO2) and the "finished dish" (sugar/energy). The style should be like a page from a colorful kids\' cookbook, suitable for a 4th grader.',
666
+ model: 'gemini-3-pro-image-preview',
667
+ aspect_ratio: '16:9',
668
+ response_modalities: ['TEXT', 'IMAGE'],
669
+ });
670
+ expect(firstResponse).toBeDefined();
671
+ expect(firstResponse.chat_id).toBeDefined();
672
+ expect(typeof firstResponse.chat_id).toBe("string");
673
+ expect(firstResponse.data).toBeDefined();
674
+ expect(Array.isArray(firstResponse.data)).toBe(true);
675
+ expect(firstResponse.data.length).toBeGreaterThan(0);
676
+ console.log("✅ 第一轮图片生成成功");
677
+ console.log(`Chat ID: ${firstResponse.chat_id}`);
678
+ console.log(`生成图像数量: ${firstResponse.data.length}`);
679
+ // 第二轮:修改图片(将语言改为西班牙语)
680
+ const secondResponse = await imageClient.chat({
681
+ chat_id: firstResponse.chat_id,
682
+ message: 'Update this infographic to be in Spanish. Do not change any other elements of the image.',
683
+ model: 'gemini-3-pro-image-preview',
684
+ aspect_ratio: '16:9',
685
+ image_size: '2K',
686
+ response_modalities: ['TEXT', 'IMAGE'],
687
+ });
688
+ expect(secondResponse).toBeDefined();
689
+ expect(secondResponse.chat_id).toBe(firstResponse.chat_id); // chat_id 应该保持一致
690
+ expect(secondResponse.data).toBeDefined();
691
+ expect(Array.isArray(secondResponse.data)).toBe(true);
692
+ expect(secondResponse.data.length).toBeGreaterThan(0);
693
+ console.log("✅ 第二轮图片修改成功");
694
+ console.log(`修改后图像数量: ${secondResponse.data.length}`);
695
+ if (secondResponse.text) {
696
+ console.log("文本响应:", secondResponse.text);
697
+ }
698
+ }, 240000);
613
699
  test("DoubaoImageGenerationClient - quality 参数测试", async () => {
614
700
  const imageClient = new index_1.DoubaoImageGenerationClient({});
615
701
  const qualities = ["standard", "hd"];
@@ -28,6 +28,32 @@ export interface GeminiImageGenerationResponse {
28
28
  data: GeminiImageData[];
29
29
  text?: string;
30
30
  }
31
+ export interface GeminiImageEditRequest {
32
+ prompt: string;
33
+ image: string;
34
+ model?: string;
35
+ provider?: "aihubmix" | "gemini";
36
+ aspect_ratio?: "1:1" | "2:3" | "3:2" | "3:4" | "4:3" | "4:5" | "5:4" | "9:16" | "16:9" | "21:9";
37
+ image_size?: "1K" | "2K" | "4K";
38
+ response_modalities?: ("TEXT" | "IMAGE")[];
39
+ user?: string;
40
+ }
41
+ export interface GeminiImageChatRequest {
42
+ message: string;
43
+ chat_id?: string;
44
+ model?: string;
45
+ provider?: "aihubmix" | "gemini";
46
+ aspect_ratio?: "1:1" | "2:3" | "3:2" | "3:4" | "4:3" | "4:5" | "5:4" | "9:16" | "16:9" | "21:9";
47
+ image_size?: "1K" | "2K" | "4K";
48
+ response_modalities?: ("TEXT" | "IMAGE")[];
49
+ user?: string;
50
+ }
51
+ export interface GeminiImageChatResponse {
52
+ chat_id: string;
53
+ created: number;
54
+ data: GeminiImageData[];
55
+ text?: string;
56
+ }
31
57
  export declare class GeminiImageGenerationClient {
32
58
  private headers;
33
59
  private provider;
@@ -43,4 +69,45 @@ export declare class GeminiImageGenerationClient {
43
69
  * - number_of_images: 生成图片数量(1-4)
44
70
  */
45
71
  generate(request: GeminiImageGenerationRequest): Promise<GeminiImageGenerationResponse>;
72
+ /**
73
+ * Edit images
74
+ * 编辑图像
75
+ *
76
+ * 使用文本提示编辑图片,支持添加、移除或修改元素
77
+ *
78
+ * 支持的参数:
79
+ * - image: base64 编码的图片数据,或 data URL(如 data:image/png;base64,...)
80
+ * - aspect_ratio: 宽高比
81
+ * - image_size: 图片大小(仅 gemini-3-pro-image-preview)
82
+ * - response_modalities: 响应模态
83
+ */
84
+ edit(request: GeminiImageEditRequest): Promise<GeminiImageGenerationResponse>;
85
+ /**
86
+ * Chat with images (multi-turn image editing)
87
+ * 图像多轮对话(用于多轮图片修改)
88
+ *
89
+ * 支持通过多轮对话迭代式优化图片
90
+ *
91
+ * 使用示例:
92
+ * ```typescript
93
+ * // 第一轮:创建初始图片
94
+ * const firstResponse = await client.chat({
95
+ * message: "Create a vibrant infographic about photosynthesis"
96
+ * });
97
+ *
98
+ * // 后续轮次:修改图片(使用返回的 chat_id)
99
+ * const secondResponse = await client.chat({
100
+ * chat_id: firstResponse.chat_id,
101
+ * message: "Update this infographic to be in Spanish"
102
+ * });
103
+ * ```
104
+ *
105
+ * 支持的参数:
106
+ * - message: 当前轮次的提示词
107
+ * - chat_id: 聊天 ID(用于多轮对话,首次请求时不需要)
108
+ * - aspect_ratio: 宽高比
109
+ * - image_size: 图片大小(仅 gemini-3-pro-image-preview)
110
+ * - response_modalities: 响应模态
111
+ */
112
+ chat(request: GeminiImageChatRequest): Promise<GeminiImageChatResponse>;
46
113
  }
@@ -76,5 +76,122 @@ class GeminiImageGenerationClient {
76
76
  (0, log_1.logResponse)(response.status, response.statusText, response.headers, data);
77
77
  return data;
78
78
  }
79
+ /**
80
+ * Edit images
81
+ * 编辑图像
82
+ *
83
+ * 使用文本提示编辑图片,支持添加、移除或修改元素
84
+ *
85
+ * 支持的参数:
86
+ * - image: base64 编码的图片数据,或 data URL(如 data:image/png;base64,...)
87
+ * - aspect_ratio: 宽高比
88
+ * - image_size: 图片大小(仅 gemini-3-pro-image-preview)
89
+ * - response_modalities: 响应模态
90
+ */
91
+ async edit(request) {
92
+ const requestBody = {
93
+ prompt: request.prompt,
94
+ image: request.image,
95
+ model: request.model || "gemini-2.5-flash-image",
96
+ };
97
+ // 添加可选参数
98
+ if (request.aspect_ratio) {
99
+ requestBody.aspect_ratio = request.aspect_ratio;
100
+ }
101
+ if (request.image_size) {
102
+ requestBody.image_size = request.image_size;
103
+ }
104
+ if (request.response_modalities) {
105
+ requestBody.response_modalities = request.response_modalities;
106
+ }
107
+ if (request.user) {
108
+ requestBody.user = request.user;
109
+ }
110
+ if (request.provider) {
111
+ requestBody.provider = request.provider;
112
+ }
113
+ const url = `${config_1.sdkConfig.getServerUrl()}/api/gemini-image-proxy/edit`;
114
+ (0, log_1.logRequest)("POST", url, this.headers, { ...requestBody, image: "[base64 data]" });
115
+ const response = await fetch(url, {
116
+ method: "POST",
117
+ headers: this.headers,
118
+ body: JSON.stringify(requestBody),
119
+ });
120
+ if (!response.ok) {
121
+ const errorText = await response.text();
122
+ (0, log_1.logResponse)(response.status, response.statusText, response.headers, errorText);
123
+ throw new Error(`Gemini image edit API error: ${response.status} ${errorText}`);
124
+ }
125
+ const data = (await response.json());
126
+ (0, log_1.logResponse)(response.status, response.statusText, response.headers, data);
127
+ return data;
128
+ }
129
+ /**
130
+ * Chat with images (multi-turn image editing)
131
+ * 图像多轮对话(用于多轮图片修改)
132
+ *
133
+ * 支持通过多轮对话迭代式优化图片
134
+ *
135
+ * 使用示例:
136
+ * ```typescript
137
+ * // 第一轮:创建初始图片
138
+ * const firstResponse = await client.chat({
139
+ * message: "Create a vibrant infographic about photosynthesis"
140
+ * });
141
+ *
142
+ * // 后续轮次:修改图片(使用返回的 chat_id)
143
+ * const secondResponse = await client.chat({
144
+ * chat_id: firstResponse.chat_id,
145
+ * message: "Update this infographic to be in Spanish"
146
+ * });
147
+ * ```
148
+ *
149
+ * 支持的参数:
150
+ * - message: 当前轮次的提示词
151
+ * - chat_id: 聊天 ID(用于多轮对话,首次请求时不需要)
152
+ * - aspect_ratio: 宽高比
153
+ * - image_size: 图片大小(仅 gemini-3-pro-image-preview)
154
+ * - response_modalities: 响应模态
155
+ */
156
+ async chat(request) {
157
+ const requestBody = {
158
+ message: request.message,
159
+ model: request.model || "gemini-3-pro-image-preview",
160
+ };
161
+ // 添加可选参数
162
+ if (request.chat_id) {
163
+ requestBody.chat_id = request.chat_id;
164
+ }
165
+ if (request.aspect_ratio) {
166
+ requestBody.aspect_ratio = request.aspect_ratio;
167
+ }
168
+ if (request.image_size) {
169
+ requestBody.image_size = request.image_size;
170
+ }
171
+ if (request.response_modalities) {
172
+ requestBody.response_modalities = request.response_modalities;
173
+ }
174
+ if (request.user) {
175
+ requestBody.user = request.user;
176
+ }
177
+ if (request.provider) {
178
+ requestBody.provider = request.provider;
179
+ }
180
+ const url = `${config_1.sdkConfig.getServerUrl()}/api/gemini-image-proxy/chat`;
181
+ (0, log_1.logRequest)("POST", url, this.headers, requestBody);
182
+ const response = await fetch(url, {
183
+ method: "POST",
184
+ headers: this.headers,
185
+ body: JSON.stringify(requestBody),
186
+ });
187
+ if (!response.ok) {
188
+ const errorText = await response.text();
189
+ (0, log_1.logResponse)(response.status, response.statusText, response.headers, errorText);
190
+ throw new Error(`Gemini image chat API error: ${response.status} ${errorText}`);
191
+ }
192
+ const data = (await response.json());
193
+ (0, log_1.logResponse)(response.status, response.statusText, response.headers, data);
194
+ return data;
195
+ }
79
196
  }
80
197
  exports.GeminiImageGenerationClient = GeminiImageGenerationClient;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "ai-world-sdk",
3
- "version": "1.0.8",
3
+ "version": "1.0.9",
4
4
  "description": "TypeScript SDK for AI World Platform - Chat Models, Image Generation, and Video Generation",
5
5
  "main": "dist/index.js",
6
6
  "types": "dist/index.d.ts",