ai-world-sdk 1.0.6 → 1.0.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -29,7 +29,7 @@ npm install ai-world-sdk
29
29
  import { sdkConfig } from 'ai-world-sdk';
30
30
 
31
31
  // 设置全局配置(只需一次)
32
- sdkConfig.setBaseUrl('http://localhost:8000');
32
+ sdkConfig.setBaseUrl('your-base-url');
33
33
  sdkConfig.setToken('your-jwt-token');
34
34
  sdkConfig.setDebug(true); // 可选:启用调试模式
35
35
 
@@ -98,7 +98,10 @@ console.log('图像 URL:', result.data[0]?.url);
98
98
  const geminiClient = new GeminiImageGenerationClient({});
99
99
  const geminiResult = await geminiClient.generate({
100
100
  prompt: 'A futuristic city',
101
+ model: 'gemini-3-pro-image-preview', // 使用 Gemini 3 Pro 模型
101
102
  aspect_ratio: '16:9',
103
+ image_size: '2K', // 仅适用于 gemini-3-pro-image-preview
104
+ response_modalities: ['IMAGE'], // 仅返回图片,不返回文本
102
105
  });
103
106
  console.log('图像 URL:', geminiResult.data[0]?.url);
104
107
  ```
@@ -211,15 +214,68 @@ const result = await client.generate({
211
214
  ```typescript
212
215
  const client = new GeminiImageGenerationClient({});
213
216
 
217
+ // 基础用法(使用 Gemini 2.5 Flash - 快速模型)
214
218
  const result = await client.generate({
215
219
  prompt: 'A beautiful landscape', // 必需
216
- model: 'gemini-2.0-flash-exp-image-generation', // 可选,默认值
217
- number_of_images: 1, // 可选
218
- aspect_ratio: '16:9', // 可选: 1:1, 9:16, 16:9, 4:3, 3:4
219
- temperature: 0.7, // 可选
220
+ model: 'gemini-2.5-flash-image', // 推荐:快速、高效
221
+ aspect_ratio: '16:9', // 可选: 1:1, 2:3, 3:2, 3:4, 4:3, 4:5, 5:4, 9:16, 16:9, 21:9
222
+ number_of_images: 1, // 可选: 1-4
223
+ });
224
+
225
+ // 高级用法(使用 Gemini 3 Pro - 专业模型,支持高分辨率)
226
+ const result2 = await client.generate({
227
+ prompt: 'A futuristic city at sunset',
228
+ model: 'gemini-3-pro-image-preview', // 专业级模型
229
+ aspect_ratio: '16:9',
230
+ image_size: '2K', // 可选: 1K, 2K, 4K(仅适用于 gemini-3-pro-image-preview)
231
+ number_of_images: 1,
232
+ response_modalities: ['IMAGE'], // 仅返回图片,不返回文本
233
+ temperature: 0.7, // 可选: 0.0-2.0
234
+ max_output_tokens: 1000, // 可选
235
+ });
236
+
237
+ // 使用 aihubmix provider(通过 aihubmix 代理服务)
238
+ const aihubmixClient = new GeminiImageGenerationClient({
239
+ provider: 'aihubmix', // 使用 aihubmix 代理
240
+ });
241
+
242
+ const result3 = await aihubmixClient.generate({
243
+ prompt: 'A beautiful landscape with mountains',
244
+ model: 'gemini-3-pro-image-preview',
245
+ aspect_ratio: '16:9',
246
+ image_size: '1K',
247
+ number_of_images: 1,
248
+ response_modalities: ['IMAGE'],
220
249
  });
221
250
  ```
222
251
 
252
+ **参数说明:**
253
+
254
+ | 参数 | 类型 | 说明 | 默认值 |
255
+ |------|------|------|--------|
256
+ | `prompt` | `string` | 图像生成提示词(必需) | - |
257
+ | `model` | `string` | 模型名称 | `gemini-2.0-flash-exp-image-generation` |
258
+ | `aspect_ratio` | `string` | 宽高比 | - |
259
+ | `image_size` | `string` | 图片大小(仅 gemini-3-pro-image-preview) | - |
260
+ | `number_of_images` | `number` | 生成图片数量 | `1` |
261
+ | `response_modalities` | `array` | 响应模态 | `['TEXT', 'IMAGE']` |
262
+ | `temperature` | `number` | 温度参数 | `0.7` |
263
+ | `max_output_tokens` | `number` | 最大输出 token 数 | `1000` |
264
+
265
+ **支持的宽高比:** `1:1`, `2:3`, `3:2`, `3:4`, `4:3`, `4:5`, `5:4`, `9:16`, `16:9`, `21:9`
266
+
267
+ **Provider 说明:**
268
+
269
+ - `provider: 'gemini'`(默认)- 直接使用 Google Gemini 官方 API
270
+ - `provider: 'aihubmix'` - 通过 aihubmix 代理服务调用,适合需要统一管理多个 API 的场景
271
+
272
+ **模型对比:**
273
+
274
+ | 模型 | 分辨率 | 特点 | 适用场景 |
275
+ |------|--------|------|----------|
276
+ | `gemini-2.5-flash-image` | 1024px | 快速、高效、成本低 | 日常使用、批量生成 |
277
+ | `gemini-3-pro-image-preview` | 1K/2K/4K | 专业级、高分辨率、高级功能 | 专业设计、高分辨率需求 |
278
+
223
279
  ### 视频生成
224
280
 
225
281
  #### VideoGenerationClient
@@ -253,7 +309,7 @@ const result = await client.poll(task.id, {
253
309
  import { sdkConfig } from 'ai-world-sdk';
254
310
 
255
311
  // 设置配置
256
- sdkConfig.setBaseUrl('http://localhost:8000');
312
+ sdkConfig.setBaseUrl('your-base-url');
257
313
  sdkConfig.setToken('your-jwt-token');
258
314
  sdkConfig.setHeaders({ 'X-Custom-Header': 'value' });
259
315
  sdkConfig.setDebug(true);
@@ -308,11 +364,26 @@ const gptModel = new ChatOpenAI({
308
364
 
309
365
  // Gemini 模型(通过 aihubmix)
310
366
  const geminiModel = new ChatGoogleGenerativeAI({
311
- modelName: 'gemini-2.5-flash-image',
367
+ modelName: 'gemini-2.5-flash',
312
368
  temperature: 0.7,
313
369
  provider: 'aihubmix',
314
370
  });
315
371
 
372
+ // Gemini 图像生成(通过 aihubmix)
373
+ import { GeminiImageGenerationClient } from 'ai-world-sdk';
374
+
375
+ const geminiImageClient = new GeminiImageGenerationClient({
376
+ provider: 'aihubmix',
377
+ });
378
+
379
+ const imageResult = await geminiImageClient.generate({
380
+ prompt: 'A beautiful landscape',
381
+ model: 'gemini-3-pro-image-preview',
382
+ aspect_ratio: '16:9',
383
+ image_size: '1K',
384
+ response_modalities: ['IMAGE'],
385
+ });
386
+
316
387
  // Claude 模型(通过 aihubmix)
317
388
  const claudeModel = new ChatAnthropic({
318
389
  modelName: 'claude-3-sonnet-20240229',
@@ -444,6 +515,8 @@ const response = await modelWithTools.invoke([
444
515
 
445
516
  ### 图像生成工作流
446
517
 
518
+ #### 豆包图像生成
519
+
447
520
  ```typescript
448
521
  import { DoubaoImageGenerationClient } from 'ai-world-sdk';
449
522
 
@@ -470,6 +543,38 @@ multiResult.data.forEach((image, index) => {
470
543
  });
471
544
  ```
472
545
 
546
+ #### Gemini 图像生成
547
+
548
+ ```typescript
549
+ import { GeminiImageGenerationClient } from 'ai-world-sdk';
550
+
551
+ const client = new GeminiImageGenerationClient({});
552
+
553
+ // 使用 Gemini 2.5 Flash(快速模型)
554
+ const result = await client.generate({
555
+ prompt: 'A beautiful sunset over the ocean',
556
+ model: 'gemini-2.5-flash-image',
557
+ aspect_ratio: '16:9',
558
+ number_of_images: 1,
559
+ });
560
+
561
+ console.log('图像 URL:', result.data[0]?.url || 'Base64 编码');
562
+ if (result.text) {
563
+ console.log('图像描述:', result.text);
564
+ }
565
+
566
+ // 使用 Gemini 3 Pro(专业模型,支持高分辨率)
567
+ const result2 = await client.generate({
568
+ prompt: 'A futuristic city at night',
569
+ model: 'gemini-3-pro-image-preview',
570
+ aspect_ratio: '21:9', // 超宽屏
571
+ image_size: '4K', // 4K 分辨率
572
+ response_modalities: ['IMAGE'], // 仅返回图片
573
+ });
574
+
575
+ console.log('4K 图像:', result2.data[0]?.b64_json ? 'Base64 编码' : result2.data[0]?.url);
576
+ ```
577
+
473
578
  ### 视频生成工作流
474
579
 
475
580
  ```typescript
@@ -519,7 +624,14 @@ if (result.status === 'succeeded') {
519
624
  ### 图像生成模型
520
625
 
521
626
  - **豆包 Seedream**: `doubao-seedream-4-5-251128` (默认)
522
- - **Google Gemini**: `gemini-2.0-flash-exp-image-generation` (默认)
627
+ - **Google Gemini**:
628
+ - `gemini-2.5-flash-image` (Nano Banana) - **推荐**,快速、高效,1024px 分辨率,支持所有宽高比
629
+ - `gemini-3-pro-image-preview` (Nano Banana Pro) - 专业级,支持 1K/2K/4K 分辨率,支持 Google 搜索、思考模式,最多 14 张参考图片
630
+ - `gemini-2.0-flash-exp-image-generation` (已弃用,建议使用 `gemini-2.5-flash-image`)
631
+
632
+ **模型选择建议:**
633
+ - **日常使用**: `gemini-2.5-flash-image` - 速度快,成本低
634
+ - **专业需求**: `gemini-3-pro-image-preview` - 高分辨率、高级功能(需要 `image_size` 参数)
523
635
 
524
636
  ### 视频生成模型
525
637
 
@@ -228,38 +228,6 @@ describe("Langchain SDK Tests", () => {
228
228
  console.log("响应内容:", content);
229
229
  }
230
230
  }, 60000);
231
- test("ChatGoogleGenerativeAI - aihubmix.com 图像生成", async () => {
232
- const gemini = new index_1.ChatGoogleGenerativeAI({
233
- modelName: "gemini-3-pro-image-preview",
234
- temperature: 0.7,
235
- provider: "aihubmix",
236
- });
237
- const response = await gemini.invoke([
238
- new index_1.HumanMessage("请生成一张展示人工智能概念的图片"),
239
- ]);
240
- expect(response).toBeDefined();
241
- expect(response.content).toBeDefined();
242
- // 图像生成响应可能包含文本和图像数据
243
- // 检查响应内容(可能是字符串、数组或包含图像的对象)
244
- const content = response.content;
245
- if (typeof content === "string") {
246
- console.log("✅ aihubmix.com 图像生成测试成功(文本响应)");
247
- console.log("AI:", content);
248
- }
249
- else if (Array.isArray(content)) {
250
- // 多模态响应,可能包含图像
251
- const hasImage = content.some((item) => item?.type === "image_url" ||
252
- item?.image_url ||
253
- (typeof item === "string" && item.startsWith("data:image")));
254
- console.log("✅ aihubmix.com 图像生成测试成功(多模态响应)");
255
- console.log("响应类型: 数组,包含图像:", hasImage);
256
- console.log("响应内容:", JSON.stringify(content, null, 2));
257
- }
258
- else {
259
- console.log("✅ aihubmix.com 图像生成测试成功(其他格式)");
260
- console.log("响应内容:", content);
261
- }
262
- }, 60000);
263
231
  test("DoubaoImageGenerationClient - 基础图像生成", async () => {
264
232
  const imageClient = new index_1.DoubaoImageGenerationClient({});
265
233
  const result = await imageClient.generate({
@@ -589,10 +557,12 @@ describe("Langchain SDK Tests", () => {
589
557
  test("GeminiImageGenerationClient - 基础图像生成", async () => {
590
558
  const imageClient = new index_1.GeminiImageGenerationClient({});
591
559
  const result = await imageClient.generate({
592
- prompt: "A beautiful sunset over the ocean with vibrant colors",
593
- model: "gemini-2.0-flash-exp-image-generation",
560
+ prompt: 'A beautiful sunset over the ocean',
561
+ model: 'gemini-3-pro-image-preview',
562
+ aspect_ratio: '16:9',
563
+ image_size: '1K', // 仅适用于 gemini-3-pro-image-preview
594
564
  number_of_images: 1,
595
- aspect_ratio: "16:9",
565
+ response_modalities: ['IMAGE'], // 仅返回图片
596
566
  });
597
567
  expect(result).toBeDefined();
598
568
  expect(result.created).toBeDefined();
@@ -606,46 +576,40 @@ describe("Langchain SDK Tests", () => {
606
576
  });
607
577
  console.log("✅ GeminiImageGenerationClient 基础测试成功");
608
578
  console.log(`生成图像数量: ${result.data.length}`);
609
- console.log("图像 URL:", result.data[0]?.url || "Base64 编码");
579
+ console.log("图像 URL:", result.data[0]?.url || ("Base64 编码" + result.data[0]?.b64_json));
610
580
  if (result.text) {
611
581
  console.log("图像描述:", result.text);
612
582
  }
613
583
  }, 120000);
614
- test("GeminiImageGenerationClient - 多图像生成", async () => {
615
- const imageClient = new index_1.GeminiImageGenerationClient({});
584
+ test("GeminiImageGenerationClient - 使用 aihubmix provider", async () => {
585
+ const imageClient = new index_1.GeminiImageGenerationClient({
586
+ provider: "aihubmix",
587
+ });
616
588
  const result = await imageClient.generate({
617
- prompt: "A futuristic city skyline",
618
- model: "gemini-2.0-flash-exp-image-generation",
619
- number_of_images: 2,
620
- aspect_ratio: "16:9",
589
+ prompt: 'A beautiful sunset over the ocean',
590
+ model: 'gemini-3-pro-image-preview',
591
+ aspect_ratio: '16:9',
592
+ image_size: '1K',
593
+ number_of_images: 1,
594
+ response_modalities: ['IMAGE'], // 仅返回图片
621
595
  });
622
596
  expect(result).toBeDefined();
597
+ expect(result.created).toBeDefined();
598
+ expect(typeof result.created).toBe("number");
623
599
  expect(result.data).toBeDefined();
624
600
  expect(Array.isArray(result.data)).toBe(true);
625
- expect(result.data.length).toBe(2);
626
- console.log("✅ GeminiImageGenerationClient 多图像生成测试成功");
601
+ expect(result.data.length).toBeGreaterThan(0);
602
+ result.data.forEach((item) => {
603
+ expect(item).toBeDefined();
604
+ expect(item.url || item.b64_json).toBeDefined();
605
+ });
606
+ console.log("✅ GeminiImageGenerationClient (aihubmix provider) 测试成功");
627
607
  console.log(`生成图像数量: ${result.data.length}`);
628
- }, 120000);
629
- test("GeminiImageGenerationClient - 不同宽高比测试", async () => {
630
- const imageClient = new index_1.GeminiImageGenerationClient({});
631
- const aspectRatios = [
632
- "1:1",
633
- "16:9",
634
- "9:16",
635
- ];
636
- for (const aspectRatio of aspectRatios) {
637
- const result = await imageClient.generate({
638
- prompt: "A beautiful landscape",
639
- model: "gemini-2.0-flash-exp-image-generation",
640
- aspect_ratio: aspectRatio,
641
- number_of_images: 1,
642
- });
643
- expect(result).toBeDefined();
644
- expect(result.data).toBeDefined();
645
- expect(result.data.length).toBeGreaterThan(0);
646
- console.log(`✅ Gemini 宽高比 ${aspectRatio} 测试成功`);
608
+ console.log("图像 URL:", result.data[0]?.url || ("Base64 编码" + result.data[0]?.b64_json));
609
+ if (result.text) {
610
+ console.log("图像描述:", result.text);
647
611
  }
648
- }, 180000);
612
+ }, 120000);
649
613
  test("DoubaoImageGenerationClient - quality 参数测试", async () => {
650
614
  const imageClient = new index_1.DoubaoImageGenerationClient({});
651
615
  const qualities = ["standard", "hd"];
@@ -660,7 +624,7 @@ describe("Langchain SDK Tests", () => {
660
624
  expect(result).toBeDefined();
661
625
  expect(result.data).toBeDefined();
662
626
  expect(result.data.length).toBeGreaterThan(0);
663
- console.log(`✅ quality ${quality} 测试成功`);
627
+ console.log(`✅ quality ${quality} 测试成功`, result.data);
664
628
  }
665
629
  }, 180000);
666
630
  test("DoubaoImageGenerationClient - style 参数测试", async () => {
@@ -677,7 +641,7 @@ describe("Langchain SDK Tests", () => {
677
641
  expect(result).toBeDefined();
678
642
  expect(result.data).toBeDefined();
679
643
  expect(result.data.length).toBeGreaterThan(0);
680
- console.log(`✅ style ${style} 测试成功`);
644
+ console.log(`✅ style ${style} 测试成功`, result.data);
681
645
  }
682
646
  }, 180000);
683
647
  test("DoubaoImageGenerationClient - response_format 参数测试", async () => {
@@ -717,7 +681,7 @@ describe("Langchain SDK Tests", () => {
717
681
  expect(result).toBeDefined();
718
682
  expect(result.data).toBeDefined();
719
683
  expect(result.data.length).toBeGreaterThan(0);
720
- console.log("✅ watermark 参数测试成功");
684
+ console.log("✅ watermark 参数测试成功", result.data);
721
685
  }, 120000);
722
686
  test("sdkConfig - 全局配置测试", () => {
723
687
  const originalBaseUrl = index_1.sdkConfig.getServerUrl();
@@ -3,6 +3,7 @@
3
3
  * Google Gemini 图像生成客户端
4
4
  */
5
5
  export interface GeminiImageGenerationConfig {
6
+ provider?: "aihubmix" | "gemini";
6
7
  baseUrl?: string;
7
8
  headers?: Record<string, string>;
8
9
  }
@@ -10,9 +11,11 @@ export interface GeminiImageGenerationRequest {
10
11
  prompt: string;
11
12
  model?: string;
12
13
  number_of_images?: number;
13
- aspect_ratio?: "1:1" | "9:16" | "16:9" | "4:3" | "3:4";
14
+ aspect_ratio?: "1:1" | "2:3" | "3:2" | "3:4" | "4:3" | "4:5" | "5:4" | "9:16" | "16:9" | "21:9";
15
+ image_size?: "1K" | "2K" | "4K";
14
16
  temperature?: number;
15
17
  max_output_tokens?: number;
18
+ response_modalities?: ("TEXT" | "IMAGE")[];
16
19
  user?: string;
17
20
  }
18
21
  export interface GeminiImageData {
@@ -27,10 +30,17 @@ export interface GeminiImageGenerationResponse {
27
30
  }
28
31
  export declare class GeminiImageGenerationClient {
29
32
  private headers;
33
+ private provider;
30
34
  constructor(config?: GeminiImageGenerationConfig);
31
35
  /**
32
36
  * Generate images
33
37
  * 生成图像
38
+ *
39
+ * 支持的参数:
40
+ * - aspect_ratio: 宽高比,支持 1:1, 2:3, 3:2, 3:4, 4:3, 4:5, 5:4, 9:16, 16:9, 21:9
41
+ * - image_size: 图片大小(仅 gemini-3-pro-image-preview),支持 1K, 2K, 4K
42
+ * - response_modalities: 响应模态,["TEXT", "IMAGE"] 或 ["IMAGE"]
43
+ * - number_of_images: 生成图片数量(1-4)
34
44
  */
35
45
  generate(request: GeminiImageGenerationRequest): Promise<GeminiImageGenerationResponse>;
36
46
  }
@@ -9,19 +9,31 @@ const config_1 = require("./config");
9
9
  const log_1 = require("./log");
10
10
  class GeminiImageGenerationClient {
11
11
  constructor(config) {
12
+ // 确定 provider(默认为 gemini)
13
+ this.provider = config?.provider || "gemini";
12
14
  // 合并全局 headers 和配置 headers
13
15
  const globalHeaders = config_1.sdkConfig.getHeaders();
14
16
  this.headers = {
15
17
  "Content-Type": "application/json",
16
18
  "Authorization": `Bearer ${config_1.sdkConfig.getToken()}`,
17
- "X-Base-Url": config?.baseUrl || "",
19
+ "X-Provider": this.provider, // 在 header 中传递 provider,由后端确定 baseUrl
18
20
  ...globalHeaders,
19
21
  ...config?.headers,
20
22
  };
23
+ // 如果明确指定了 baseUrl,也添加到 header 中
24
+ if (config?.baseUrl) {
25
+ this.headers["X-Base-Url"] = config.baseUrl;
26
+ }
21
27
  }
22
28
  /**
23
29
  * Generate images
24
30
  * 生成图像
31
+ *
32
+ * 支持的参数:
33
+ * - aspect_ratio: 宽高比,支持 1:1, 2:3, 3:2, 3:4, 4:3, 4:5, 5:4, 9:16, 16:9, 21:9
34
+ * - image_size: 图片大小(仅 gemini-3-pro-image-preview),支持 1K, 2K, 4K
35
+ * - response_modalities: 响应模态,["TEXT", "IMAGE"] 或 ["IMAGE"]
36
+ * - number_of_images: 生成图片数量(1-4)
25
37
  */
26
38
  async generate(request) {
27
39
  const requestBody = {
@@ -33,12 +45,18 @@ class GeminiImageGenerationClient {
33
45
  if (request.aspect_ratio) {
34
46
  requestBody.aspect_ratio = request.aspect_ratio;
35
47
  }
48
+ if (request.image_size) {
49
+ requestBody.image_size = request.image_size;
50
+ }
36
51
  if (request.temperature !== undefined) {
37
52
  requestBody.temperature = request.temperature;
38
53
  }
39
54
  if (request.max_output_tokens) {
40
55
  requestBody.max_output_tokens = request.max_output_tokens;
41
56
  }
57
+ if (request.response_modalities) {
58
+ requestBody.response_modalities = request.response_modalities;
59
+ }
42
60
  if (request.user) {
43
61
  requestBody.user = request.user;
44
62
  }
package/dist/index.d.ts CHANGED
@@ -10,7 +10,7 @@ import { GeminiImageGenerationClient, type GeminiImageGenerationConfig, type Gem
10
10
  import { VideoGenerationClient, type VideoGenerationConfig, type VideoGenerationRequest, type ContentGenerationTaskID, type ContentGenerationTask } from "./video_generation";
11
11
  import { sdkConfig } from "./config";
12
12
  export { BaseMessage, HumanMessage, AIMessage, SystemMessage, AIMessageChunk, type MessageContent, type AIMessageChunkData, } from "./messages";
13
- export { BaseChatModel, type BaseChatModelParams, type ToolDefinition, type BindOptions, } from "./base";
13
+ export { BaseChatModel, type BaseChatModelParams, type AIModelProvider, type ToolDefinition, type BindOptions, } from "./base";
14
14
  export { ChatOpenAI } from "./chat_models/openai";
15
15
  export { ChatGoogleGenerativeAI } from "./chat_models/google";
16
16
  export { ChatAnthropic } from "./chat_models/anthropic";
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "ai-world-sdk",
3
- "version": "1.0.6",
3
+ "version": "1.0.8",
4
4
  "description": "TypeScript SDK for AI World Platform - Chat Models, Image Generation, and Video Generation",
5
5
  "main": "dist/index.js",
6
6
  "types": "dist/index.d.ts",