utilitas 1999.1.93 → 1999.1.95

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/lib/alan.mjs CHANGED
@@ -1,16 +1,15 @@
1
1
  import { checkSearch, distill, search } from './web.mjs';
2
2
  import { create as createUoid } from './uoid.mjs';
3
- import { end, loop } from './event.mjs';
4
3
  import { fileTypeFromBuffer } from 'file-type';
5
4
  import { packPcmToWav } from './media.mjs';
6
5
  import { v4 as uuidv4 } from 'uuid';
7
6
 
8
7
  import {
9
- BASE64, BUFFER, DATAURL, MIME_BINARY, MIME_JSON, MIME_TEXT, MIME_PNG,
10
- MIME_JPEG, MIME_MOV, MIME_MPEG, MIME_MP4, MIME_MPG, MIME_AVI, MIME_WMV,
11
- MIME_MPEGPS, MIME_FLV, MIME_GIF, MIME_WEBP, MIME_PDF, MIME_AAC, MIME_FLAC,
12
- MIME_MP3, MIME_MPEGA, MIME_M4A, MIME_MPGA, MIME_OPUS, MIME_PCM, MIME_WAV,
13
- MIME_WEBM, MIME_TGPP, MIME_PCM16, MIME_OGG, STREAM, convert, formatDataURL
8
+ BASE64, BUFFER, DATAURL, MIME_BINARY, MIME_TEXT, MIME_PNG, MIME_JPEG,
9
+ MIME_MOV, MIME_MPEG, MIME_MP4, MIME_MPG, MIME_AVI, MIME_WMV, MIME_MPEGPS,
10
+ MIME_FLV, MIME_GIF, MIME_WEBP, MIME_PDF, MIME_AAC, MIME_FLAC, MIME_MP3,
11
+ MIME_MPEGA, MIME_M4A, MIME_MPGA, MIME_OPUS, MIME_PCM, MIME_WAV, MIME_WEBM,
12
+ MIME_TGPP, MIME_PCM16, MIME_OGG, convert, formatDataURL, decodeBase64DataURL,
14
13
  } from './storage.mjs';
15
14
 
16
15
  import {
@@ -43,42 +42,32 @@ You may be provided with some tools(functions) to help you gather information an
43
42
  - When the tools are not suitable and you have to answer questions based on your understanding, please do not mention any tool-related information in your response.
44
43
  - Unless otherwise specified to require the original result, in most cases, you may reorganize the information obtained after using the tool to solve the problem as needed.`;
45
44
 
46
- const _NEED = [
47
- '@anthropic-ai/sdk', '@anthropic-ai/vertex-sdk', '@google/genai',
48
- 'js-tiktoken', 'OpenAI',
49
- ];
45
+ const _NEED = ['js-tiktoken', 'OpenAI'];
50
46
 
51
47
  const [
52
- OPENAI, GEMINI, OPENAI_TRAINING, OLLAMA, GEMINI_25_FLASH, NOVA,
53
- DEEPSEEK_R1, MD_CODE, TEXT_EMBEDDING_3_SMALL, TEXT_EMBEDDING_3_LARGE,
54
- CLOUD_SONNET_45, AUDIO, WAV, ATTACHMENTS, CHAT, OPENAI_VOICE, MEDIUM, LOW,
55
- HIGH, GPT_REASONING_EFFORT, THINK, THINK_STR, THINK_END, AZURE, TOOLS_STR,
56
- TOOLS_END, TOOLS, TEXT, THINKING, OK, FUNC, GPT_5, GPT_5_MINI,
57
- REDACTED_THINKING, GEMMA_3_27B, AZURE_OPENAI, ANTHROPIC, VERTEX_ANTHROPIC,
58
- v8k, ais, MAX_TOOL_RECURSION, LOG, name, user, system, assistant, MODEL,
59
- JSON_OBJECT, TOOL, silent, GEMINI_EMBEDDING_M, INVALID_FILE, tokenSafeRatio,
60
- GPT_QUERY_LIMIT, CONTENT_IS_REQUIRED, OPENAI_HI_RES_SIZE, k, kT, m,
61
- minute, hour, gb, trimTailing, EBD, GEMINI_25_FLASH_IMAGE, IMAGE, JINA,
62
- JINA_DEEPSEARCH, JINA_CLIP, VERTEX, GEMINI_25_PRO, SILICONFLOW,
63
- SF_DEEPSEEK_R1, MAX_TIRE,
48
+ OPENAI, GEMINI, OLLAMA, GEMINI_25_FLASH, NOVA, DEEPSEEK_R1, MD_CODE,
49
+ CLOUD_SONNET_45, AUDIO, WAV, ATTACHMENTS, OPENAI_VOICE,
50
+ GPT_REASONING_EFFORT, THINK, THINK_STR, THINK_END, TOOLS_STR, TOOLS_END,
51
+ TOOLS, TEXT, OK, FUNC, GPT_51, GPT_51_CODEX, GEMMA_3_27B, ANTHROPIC, v8k, ais,
52
+ MAX_TOOL_RECURSION, LOG, name, user, system, assistant, MODEL, JSON_OBJECT,
53
+ tokenSafeRatio, CONTENT_IS_REQUIRED, OPENAI_HI_RES_SIZE, k, kT, m, minute,
54
+ hour, gb, trimTailing, GEMINI_25_FLASH_IMAGE, IMAGE, JINA, JINA_DEEPSEARCH,
55
+ GEMINI_25_PRO, SILICONFLOW, SF_DEEPSEEK_R1, MAX_TIRE, OPENROUTER_API,
56
+ OPENROUTER, AUTO, TOOL,
64
57
  ] = [
65
- 'OpenAI', 'Gemini', 'OPENAI_TRAINING', 'Ollama',
66
- 'gemini-2.5-flash-preview-09-2025', 'nova', 'deepseek-r1', '```',
67
- 'text-embedding-3-small', 'text-embedding-3-large',
68
- 'claude-sonnet-4-5@20250929', 'audio', 'wav', '[ATTACHMENTS]', 'CHAT',
69
- 'OPENAI_VOICE', 'medium', 'low', 'high', 'medium', 'think', '<think>',
70
- '</think>', 'AZURE', '<tools>', '</tools>', 'tools', 'text', 'thinking',
71
- 'OK', 'function', 'gpt-5', 'gpt-5-mini', 'redacted_thinking',
72
- 'gemma3:27b', 'Azure Openai', 'Anthropic', 'Vertex Anthropic',
73
- 7680 * 4320, [], 30, { log: true }, 'Alan', 'user', 'system',
74
- 'assistant', 'model', 'json_object', 'tool', true,
75
- 'gemini-embedding-001', 'Invalid file data.', 1.1, 100,
76
- 'Content is required.', 2048 * 2048, x => 1024 * x, x => 1000 * x,
77
- x => 1024 * 1024 * x, x => 60 * x, x => 60 * 60 * x,
78
- x => 1024 * 1024 * 1024 * x, x => x.replace(/[\.\s]*$/, ''),
79
- { embedding: true }, 'gemini-2.5-flash-image', 'image', 'Jina',
80
- 'jina-deepsearch-v1', 'jina-clip-v2', 'Vertex', 'gemini-2.5-pro',
81
- 'SiliconFlow', 'Pro/deepseek-ai/DeepSeek-R1', 768 * 768,
58
+ 'OpenAI', 'Gemini', 'Ollama', 'gemini-2.5-flash-preview-09-2025',
59
+ 'nova', 'deepseek-r1', '```', 'anthropic/claude-sonnet-4.5', 'audio',
60
+ 'wav', '[ATTACHMENTS]', 'OPENAI_VOICE', 'medium', 'think', '<think>',
61
+ '</think>', '<tools>', '</tools>', 'tools', 'text', 'OK', 'function',
62
+ 'gpt-5.1', 'gpt-5.1-codex', 'gemma3:27b', 'Anthropic', 7680 * 4320, [],
63
+ 30, { log: true }, 'Alan', 'user', { role: 'system' }, 'assistant',
64
+ 'model', 'json_object', 1.1, 'Content is required.', 2048 * 2048,
65
+ x => 1024 * x, x => 1000 * x, x => 1024 * 1024 * x, x => 60 * x,
66
+ x => 60 * 60 * x, x => 1024 * 1024 * 1024 * x,
67
+ x => x.replace(/[\.\s]*$/, ''), 'gemini-2.5-flash-image', 'image',
68
+ 'Jina', 'jina-deepsearch-v1', 'gemini-2.5-pro', 'SiliconFlow',
69
+ 'Pro/deepseek-ai/DeepSeek-R1', 768 * 768,
70
+ 'https://openrouter.ai/api/v1', 'OpenRouter', 'openrouter/auto', 'tool',
82
71
  ];
83
72
 
84
73
  const [tool, messages, text]
@@ -98,21 +87,21 @@ const assertApiKey = (p, o) => assert(o?.apiKey, `${p} api key is required.`);
98
87
  const getProviderIcon = provider => PROVIDER_ICONS[provider] || '🔮';
99
88
  const libOpenAi = async opts => await need('openai', { ...opts, raw: true });
100
89
  const OpenAI = async opts => new (await libOpenAi(opts)).OpenAI(opts);
101
- const AzureOpenAI = async opts => new (await libOpenAi(opts)).AzureOpenAI(opts);
102
- const OPENAI_EBD = { ...EBD, maxInputTokens: k(8) - 1 };
103
-
104
90
  const OPENAI_RULES = {
91
+ source: 'openai',
105
92
  contextWindow: kT(400), maxOutputTokens: k(128),
106
93
  imageCostTokens: ~~(OPENAI_HI_RES_SIZE / MAX_TIRE * 140 + 70),
107
94
  maxFileSize: m(50), maxImageSize: OPENAI_HI_RES_SIZE,
108
95
  supportedMimeTypes: [MIME_PNG, MIME_JPEG, MIME_GIF, MIME_WEBP],
109
96
  supportedDocTypes: [MIME_PDF],
110
- supportedAudioTypes: [MIME_WAV], audio: 'gpt-4o-audio-preview',
97
+ supportedAudioTypes: [MIME_WAV],
98
+ // audio: 'gpt-4o-audio-preview',
111
99
  json: true, tools: true, vision: true,
112
- reasoning: true, defaultProvider: OPENAI,
100
+ reasoning: true, defaultProvider: OPENROUTER,
113
101
  };
114
102
 
115
103
  const GEMINI_RULES = {
104
+ source: 'google',
116
105
  json: true, audioCostTokens: 1000 * 1000 * 1, // 8.4 hours => 1 million tokens
117
106
  imageCostTokens: ~~(v8k / MAX_TIRE * 258), maxAudioLength: hour(8.4),
118
107
  maxAudioPerPrompt: 1, maxFileSize: m(20), maxImagePerPrompt: 3000,
@@ -125,7 +114,7 @@ const GEMINI_RULES = {
125
114
  ], supportedAudioTypes: [MIME_WAV, MIME_OGG, MIME_OPUS],
126
115
  // audio: 'gemini-2.5-flash-exp-native-audio-thinking-dialog',
127
116
  // gemini-2.5-flash-preview-native-audio-dialog
128
- defaultProvider: GEMINI,
117
+ defaultProvider: OPENROUTER,
129
118
  };
130
119
 
131
120
  const DEEPSEEK_R1_RULES = {
@@ -135,9 +124,10 @@ const DEEPSEEK_R1_RULES = {
135
124
 
136
125
  // https://platform.openai.com/docs/models
137
126
  // https://cloud.google.com/vertex-ai/docs/generative-ai/learn/models
127
+ // https://openrouter.ai/docs/features/multimodal/audio (only support input audio)
138
128
  const MODELS = {
139
- [GPT_5]: { ...OPENAI_RULES, },
140
- [GPT_5_MINI]: { ...OPENAI_RULES, fast: true },
129
+ [GPT_51]: { ...OPENAI_RULES, fast: true },
130
+ [GPT_51_CODEX]: { ...OPENAI_RULES },
141
131
  [GEMINI_25_FLASH_IMAGE]: {
142
132
  ...GEMINI_RULES, contextWindow: k(64), maxOutputTokens: k(32),
143
133
  fast: true, image: true,
@@ -166,12 +156,6 @@ const MODELS = {
166
156
  },
167
157
  [DEEPSEEK_R1]: DEEPSEEK_R1_RULES,
168
158
  [SF_DEEPSEEK_R1]: { ...DEEPSEEK_R1_RULES, defaultProvider: SILICONFLOW },
169
- [TEXT_EMBEDDING_3_LARGE]: { ...OPENAI_EBD, dimension: k(3) },
170
- [TEXT_EMBEDDING_3_SMALL]: { ...OPENAI_EBD, dimension: k(1.5) },
171
- [GEMINI_EMBEDDING_M]: { ...EBD, maxInputTokens: k(8), dimension: k(3) },
172
- [JINA_CLIP]: {
173
- maxInputTokens: k(8), maxImageSize: 512 * 512, dimension: k(1),
174
- },
175
159
  [CLOUD_SONNET_45]: {
176
160
  contextWindow: kT(200), maxOutputTokens: kT(64),
177
161
  documentCostTokens: 3000 * 10, maxDocumentFile: m(32),
@@ -179,7 +163,7 @@ const MODELS = {
179
163
  maxImagePerPrompt: 100, maxFileSize: m(5), maxImageSize: 2000 * 2000,
180
164
  supportedMimeTypes: [MIME_TEXT, MIME_PNG, MIME_JPEG, MIME_GIF, MIME_WEBP, MIME_PDF],
181
165
  json: true, reasoning: true, tools: true, vision: true,
182
- defaultProvider: [ANTHROPIC, VERTEX_ANTHROPIC],
166
+ defaultProvider: OPENROUTER,
183
167
  },
184
168
  // https://docs.anthropic.com/en/docs/build-with-claude/vision
185
169
  // https://cloud.google.com/vertex-ai/generative-ai/docs/partner-models/claude/sonnet-4-5
@@ -189,43 +173,70 @@ const MODELS = {
189
173
  let ATTACHMENT_TOKEN_COST = 0;
190
174
  for (const n in MODELS) {
191
175
  MODELS[n]['name'] = n;
192
- if (!MODELS[n].embedding) {
193
- MODELS[n].supportedMimeTypes = MODELS[n].supportedMimeTypes || [];
194
- MODELS[n].supportedDocTypes = MODELS[n].supportedDocTypes || [];
195
- MODELS[n].supportedAudioTypes = MODELS[n].supportedAudioTypes || [];
196
- MODELS[n].maxOutputTokens = MODELS[n].maxOutputTokens
197
- || Math.ceil(MODELS[n].contextWindow * 0.4);
198
- MODELS[n].maxInputTokens = MODELS[n].maxInputTokens
199
- || (MODELS[n].contextWindow - MODELS[n].maxOutputTokens);
200
- ATTACHMENT_TOKEN_COST = ATTACHMENT_TOKEN_COST ? Math.max(
201
- ATTACHMENT_TOKEN_COST, MODELS[n].imageCostTokens || 0
202
- ) : MODELS[n].imageCostTokens;
203
- }
176
+ MODELS[n].supportedMimeTypes = MODELS[n].supportedMimeTypes || [];
177
+ MODELS[n].supportedDocTypes = MODELS[n].supportedDocTypes || [];
178
+ MODELS[n].supportedAudioTypes = MODELS[n].supportedAudioTypes || [];
179
+ MODELS[n].maxOutputTokens = MODELS[n].maxOutputTokens
180
+ || Math.ceil(MODELS[n].contextWindow * 0.4);
181
+ MODELS[n].maxInputTokens = MODELS[n].maxInputTokens
182
+ || (MODELS[n].contextWindow - MODELS[n].maxOutputTokens);
183
+ ATTACHMENT_TOKEN_COST = ATTACHMENT_TOKEN_COST ? Math.max(
184
+ ATTACHMENT_TOKEN_COST, MODELS[n].imageCostTokens || 0
185
+ ) : MODELS[n].imageCostTokens;
204
186
  }
187
+ MODELS[AUTO] = { name: AUTO, defaultProvider: OPENROUTER, };
188
+ for (const n of [GPT_51, GPT_51_CODEX, GEMINI_25_PRO, GEMINI_25_FLASH]) {
189
+ // get the most restrictive limits
190
+ for (const key of [
191
+ 'contextWindow', 'maxInputTokens', 'maxDocumentFile', 'maxAudioLength',
192
+ 'maxImagePerPrompt', 'maxFileSize', 'maxImageSize', 'maxOutputTokens',
193
+ 'maxAudioPerPrompt', 'maxDocumentPages', 'maxUrlSize', 'maxVideoLength',
194
+ 'maxVideoPerPrompt',
195
+ ]) {
196
+ MODELS[AUTO][key] = Math.min(
197
+ MODELS[AUTO][key] || Infinity, MODELS[n][key] || Infinity,
198
+ );
199
+ }
200
+ // get the most permissive costs
201
+ for (const key of [
202
+ 'documentCostTokens', 'imageCostTokens', 'audioCostTokens',
203
+ ]) {
204
+ MODELS[AUTO][key] = Math.max(
205
+ MODELS[AUTO][key] || 0, MODELS[n][key] || 0,
206
+ );
207
+ }
208
+ // combine supported types
209
+ for (const key of [
210
+ 'supportedAudioTypes', 'supportedDocTypes', 'supportedMimeTypes',
211
+ ]) {
212
+ MODELS[AUTO][key] = [...new Set(
213
+ [...MODELS[AUTO][key] || [], ...MODELS[n][key] || []]
214
+ )];
215
+ }
216
+ // for other features, if any model supports it, then AUTO supports it
217
+ for (const key of [
218
+ 'json', 'reasoning', 'tools', 'vision', 'fast', 'deepsearch', 'image',
219
+ ]) {
220
+ MODELS[AUTO][key] = MODELS[AUTO][key] || MODELS[n][key];
221
+ }
222
+ // catch first possible support
223
+ for (const key of ['audio']) {
224
+ MODELS[AUTO][key] = MODELS[AUTO][key] || MODELS[n][key];
225
+ }
226
+ };
205
227
 
206
228
  // Default models for each provider
207
229
  const DEFAULT_MODELS = {
208
- [OPENAI]: GPT_5,
230
+ [OPENROUTER]: AUTO,
209
231
  [SILICONFLOW]: SF_DEEPSEEK_R1,
210
- [GEMINI]: GEMINI_25_PRO,
211
- [ANTHROPIC]: CLOUD_SONNET_45,
212
- [VERTEX_ANTHROPIC]: CLOUD_SONNET_45,
213
232
  [JINA]: JINA_DEEPSEARCH,
214
233
  [OLLAMA]: GEMMA_3_27B,
215
234
  [OPENAI_VOICE]: NOVA,
216
- [OPENAI_TRAINING]: null, // https://platform.openai.com/docs/guides/fine-tuning
217
- };
218
-
219
- const DEFAULT_EMBEDDING = {
220
- [OPENAI]: TEXT_EMBEDDING_3_SMALL,
221
- [GEMINI]: GEMINI_EMBEDDING_M,
222
- [JINA]: JINA_CLIP,
223
235
  };
224
236
 
225
237
  const PROVIDER_ICONS = {
226
- [OPENAI]: '⚛️', [AZURE_OPENAI]: '⚛️', [AZURE]: '☁️', [JINA]: '✴️',
227
- [GEMINI]: '♊️', [VERTEX]: '📖', [OLLAMA]: '🦙',
228
- [ANTHROPIC]: '✳️', [VERTEX_ANTHROPIC]: '✳️', [SILICONFLOW]: '🧬',
238
+ [OPENROUTER]: '🔀', [OPENAI]: '⚛️', [JINA]: '✴️', [GEMINI]: '♊️',
239
+ [OLLAMA]: '🦙', [ANTHROPIC]: '✳️', [SILICONFLOW]: '🧬',
229
240
  };
230
241
 
231
242
  const FEATURE_ICONS = {
@@ -248,8 +259,7 @@ let tokeniser;
248
259
 
249
260
  const unifyProvider = provider => {
250
261
  assert(provider = (provider || '').trim(), 'AI provider is required.');
251
- for (let type of [OPENAI, AZURE_OPENAI, AZURE, GEMINI, VERTEX, ANTHROPIC,
252
- VERTEX_ANTHROPIC, JINA, OLLAMA, SILICONFLOW]) {
262
+ for (let type of [OPENROUTER, JINA, OLLAMA, SILICONFLOW]) {
253
263
  if (insensitiveCompare(provider, type)) { return type; }
254
264
  }
255
265
  throwError(`Invalid AI provider: ${provider}.`);
@@ -326,51 +336,37 @@ const toolsOpenAI = async () => {
326
336
  return _tools;
327
337
  };
328
338
 
329
- const toolsClaude = async () => (await toolsOpenAI()).map(x => ({
330
- ...x, def: {
331
- name: x.def.function.name,
332
- description: x.def.function.description,
333
- input_schema: x.def.function.parameters,
334
- }
335
- }));
339
+ const buildAiId = (provider, model) => [
340
+ provider, ...isOpenrouter(provider, model) ? [model.source] : [],
341
+ model?.name
342
+ ].map(x => ensureString(x, { case: 'SNAKE' })).join('_');
336
343
 
337
- const toolsGemini = async (options) => (await toolsOpenAI()).map(x => ({
338
- ...x, def: {
339
- name: x.def.function.name, description: x.def.function.description,
340
- parameters: {
341
- type: 'object',
342
- properties: x.def.function.parameters.properties,
343
- required: x.def.function.parameters.required,
344
- }, ...options?.provider === 'VERTEX' ? {
345
- response: x.def.function?.response ?? {
346
- type: 'string', description: 'It could be a string or JSON',
347
- }
348
- } : {},
349
- }
350
- }));
344
+ const buildAiName = (provider, model) => [
345
+ getProviderIcon(provider), provider,
346
+ `(${isOpenrouter(provider, model) ? `${model.source}/` : ''}${model.name})`
347
+ ].join(' ');
351
348
 
352
- const buildAiId = (provider, model) => [provider, model].map(
353
- x => ensureString(x, { case: 'SNAKE' })
354
- ).join('_');
349
+ const buildAiFeatures = model => Object.entries(FEATURE_ICONS).map(
350
+ x => model[x[0]] ? x[1] : ''
351
+ ).join('');
355
352
 
356
353
  const setupAi = ai => {
357
- const id = buildAiId(ai.provider, ai.model.name);
358
- const icon = getProviderIcon(ai.provider);
359
- const features = Object.entries(FEATURE_ICONS).map(
360
- x => ai.model[x[0]] ? x[1] : ''
361
- ).join('');
354
+ const id = buildAiId(ai.provider, ai.model);
362
355
  ais.push({
363
- id, name: `${icon} ${ai.provider} (${ai.model.name})`,
364
- features, initOrder: ais.length,
356
+ id, name: buildAiName(ai.provider, ai.model),
357
+ features: buildAiFeatures(ai.model), initOrder: ais.length,
365
358
  priority: DEFAULT_MODELS[ai.provider] === ai.model.name ? -1 : 0,
366
- modelEmbedding: MODELS[DEFAULT_EMBEDDING[ai.provider]], ...ai,
367
- prompt: ai.prompt && (async (c, o) => await ai.prompt(id, c, o)),
368
- embedding: ai.embedding && (async (c, o) => await ai.embedding(id, c, o)),
359
+ ...ai, prompt: async (text, opts) => await ai.prompt(id, text, opts),
369
360
  });
370
361
  };
371
362
 
372
363
  const init = async (options = {}) => {
373
- const provider = unifyProvider(options?.provider);
364
+ if (options?.debug) {
365
+ (await need('node:util')).inspect.defaultOptions.depth = null;
366
+ options.logLevel = 'debug';
367
+ }
368
+ options.provider = options.provider || OPENROUTER;
369
+ const provider = unifyProvider(options.provider);
374
370
  let models;
375
371
  if (options.model === '*') { // All models
376
372
  models = Object.values(MODELS).filter(
@@ -388,106 +384,22 @@ const init = async (options = {}) => {
388
384
  assert(models.length,
389
385
  `Model name or description is required for provider: ${provider}.`);
390
386
  switch (provider) {
391
- case OPENAI:
392
- assertApiKey(provider, options);
393
- var client = await OpenAI(options);
394
- for (let model of models) {
395
- setupAi({
396
- provider, model, client,
397
- prompt: promptOpenAI, embedding: createOpenAIEmbedding,
398
- });
399
- }
400
- break;
401
- case AZURE_OPENAI:
402
- assertApiKey(provider, options);
403
- assert(options.endpoint,
404
- `${provider} api endpoint and deployment are required.`);
405
- var model = models[0];
406
- var client = await AzureOpenAI({
407
- apiVersion: '2025-01-01-preview',
408
- deployment: model.name, ...options,
409
- });
410
- setupAi({ provider, model, client, prompt: promptOpenAI });
411
- break;
412
- case AZURE:
413
- assertApiKey(provider, options);
414
- assert(options.baseURL, `${provider} api endpoint is required.`);
415
- var model = models[0];
416
- var client = await OpenAI(options);
417
- setupAi({ provider, model, client, prompt: promptOpenAI });
418
- break;
419
- case SILICONFLOW:
420
- assertApiKey(provider, options);
421
- var client = await OpenAI({ baseURL: 'https://api.siliconflow.cn/v1', ...options });
422
- var model = models[0];
423
- setupAi({ provider, model, client, prompt: promptOpenAI });
424
- break;
425
- case GEMINI:
426
- assert(options.apiKey, `${provider} api key is required.`);
427
- var { GoogleGenAI } = await need('@google/genai');
428
- var client = new GoogleGenAI(options);
429
- for (let model of models) {
430
- setupAi({
431
- provider, model, client,
432
- prompt: promptGemini, embedding: createGeminiEmbedding,
433
- });
434
- }
435
- break;
436
- case VERTEX:
437
- assert(options.credentials && options.project,
438
- `${provider} credentials and project id are required.`);
439
- process.env['GOOGLE_APPLICATION_CREDENTIALS'] = options.credentials;
440
- var { GoogleGenAI } = await need('@google/genai');
441
- var client = new GoogleGenAI({
442
- vertexai: true, location: 'global', ...options
443
- });
444
- for (let model of models) {
445
- setupAi({
446
- provider, model, client,
447
- prompt: promptGemini, embedding: createGeminiEmbedding,
448
- });
449
- }
450
- break;
451
- case ANTHROPIC:
452
- assertApiKey(provider, options);
453
- var client = new ((
454
- await need('@anthropic-ai/sdk')
455
- ).Anthropic)(options)
456
- for (let model of models) {
457
- setupAi({ provider, model, client, prompt: promptAnthropic });
458
- }
459
- break;
460
- case VERTEX_ANTHROPIC:
461
- // https://github.com/anthropics/anthropic-sdk-typescript/tree/main/packages/vertex-sdk
462
- assert(options.credentials && options.projectId,
463
- `${provider} credentials and project id are required.`);
464
- process.env['GOOGLE_APPLICATION_CREDENTIALS'] = options.credentials;
465
- process.env['ANTHROPIC_VERTEX_PROJECT_ID'] = options.projectId;
466
- var model = models[0];
467
- var client = new ((
468
- await need('@anthropic-ai/vertex-sdk', { raw: true })
469
- ).AnthropicVertex)({ region: options?.region || 'global' });
470
- setupAi({ provider, model, client, prompt: promptAnthropic });
471
- break;
472
387
  case JINA:
473
388
  assertApiKey(provider, options);
474
- var [client, clientEmbedding] = [await OpenAI({
389
+ var client = await OpenAI({
475
390
  baseURL: 'https://deepsearch.jina.ai/v1/', ...options,
476
- }), await OpenAI({
477
- baseURL: 'https://api.jina.ai/v1/', ...options,
478
- })];
391
+ });
479
392
  for (let model of models) {
480
- setupAi({
481
- provider, model, client, clientEmbedding,
482
- prompt: promptOpenAI, embedding: createOpenAIEmbedding,
483
- });
393
+ setupAi({ provider, model, client, prompt: promptOpenAI });
484
394
  }
485
395
  break;
486
396
  case OLLAMA:
487
397
  // https://github.com/ollama/ollama/blob/main/docs/openai.md
488
398
  const baseURL = 'http://localhost:11434/v1/';
489
399
  const phLog = m => log(`Ollama preheat: ${m?.message || m}`);
490
- var client = await OpenAI({ baseURL, apiKey: 'ollama', ...options });
400
+ var client = await OpenAI({
401
+ baseURL, apiKey: 'ollama', ...options,
402
+ });
491
403
  for (let model of models) {
492
404
  setupAi({ provider, model, client, prompt: promptOpenAI });
493
405
  ignoreErrFunc(async () => {
@@ -499,8 +411,24 @@ const init = async (options = {}) => {
499
411
  }, { log: phLog });
500
412
  }
501
413
  break;
414
+ case SILICONFLOW:
415
+ assertApiKey(provider, options);
416
+ var client = await OpenAI({
417
+ baseURL: 'https://api.siliconflow.cn/v1', ...options,
418
+ });
419
+ for (let model of models) {
420
+ setupAi({ provider, model, client, prompt: promptOpenAI });
421
+ }
422
+ break;
502
423
  default:
503
- throwError(`Invalid AI provider: ${options.provider || 'null'}.`);
424
+ assertApiKey(provider, options);
425
+ var client = await OpenAI({ baseURL: OPENROUTER_API, ...options || {} });
426
+ for (let model of models) {
427
+ setupAi({
428
+ provider: OPENROUTER || provider, model, client,
429
+ prompt: promptOpenAI,
430
+ });
431
+ }
504
432
  }
505
433
  ais.sort((a, b) => a.priority - b.priority || a.initOrder - b.initOrder);
506
434
  return ais;
@@ -510,8 +438,7 @@ const packAi = (ais, options = {}) => {
510
438
  const res = options.basic ? ais.map(x => ({
511
439
  id: x.id, name: x.name, features: x.features,
512
440
  initOrder: x.initOrder, priority: x.priority,
513
- provider: x.provider, model: x.model, modelEmbedding: x.modelEmbedding,
514
- prompt: !!x.prompt, embedding: !!x.embedding,
441
+ provider: x.provider, model: x.model,
515
442
  })) : ais;
516
443
  return options.all ? res : res[0];
517
444
  };
@@ -561,6 +488,10 @@ const countTokens = async (input, options) => {
561
488
  );
562
489
  };
563
490
 
491
+ const isOpenrouter = (provider, model) => insensitiveCompare(
492
+ provider, OPENROUTER
493
+ ) && (model ? model?.source : true);
494
+
564
495
  const selectVisionModel = options => {
565
496
  assert(
566
497
  MODELS[options.model]?.vision,
@@ -579,7 +510,7 @@ const selectAudioModel = options => {
579
510
  ? MODELS[options.model]?.audio : null;
580
511
  };
581
512
 
582
- const buildGptMessage = (content, options) => {
513
+ const buildMessage = (content, options) => {
583
514
  content = content || '';
584
515
  let alterModel = options?.audioMode && selectAudioModel(options);
585
516
  const attachments = (options?.attachments || []).map(x => {
@@ -620,70 +551,6 @@ const buildGptMessage = (content, options) => {
620
551
  return message;
621
552
  };
622
553
 
623
- const buildGeminiParts = (text, attachments) => {
624
- // Gemini API does not allow empty text, even you prompt with attachments.
625
- const message = [...text?.length || attachments?.length ? [{
626
- text: text?.length ? text : ' '
627
- }] : [], ...attachments || []];
628
- assertContent(message);
629
- return message;
630
- };
631
-
632
- const buildGeminiMessage = (content, options) => {
633
- content = content || '';
634
- // @todo: for future audio mode support
635
- // let alterModel = options?.audioMode && selectAudioModel(options);
636
- const attachments = (
637
- options?.attachments?.length ? options.attachments : []
638
- ).map(x => {
639
- // assert(MODELS[options?.model], 'Model is required.');
640
- // if (MODELS[options.model]?.supportedAudioTypes?.includes?.(x.mime_type)
641
- // && !options.imageMode) {
642
- // alterModel = selectAudioModel(options);
643
- // }
644
- return {
645
- inlineData: { mimeType: x.mime_type, data: x.data }
646
- };
647
- });
648
- // if (alterModel) {
649
- // options.model = alterModel;
650
- // options.audioMode = true;
651
- // }
652
- return String.isString(content) ? (options?.history ? {
653
- role: options?.role || user,
654
- parts: buildGeminiParts(content, attachments),
655
- } : buildGeminiParts(content, attachments)) : content;
656
- };
657
-
658
- const buildClaudeMessage = (text, options) => {
659
- assert(text, 'Text is required.');
660
- const attachments = (options?.attachments?.length ? options?.attachments : []).map(x => {
661
- let type = '';
662
- if ([MIME_PDF].includes(x.mime_type)) {
663
- type = 'document';
664
- } else if ([MIME_PNG, MIME_JPEG, MIME_GIF, MIME_WEBP].includes(x.mime_type)) {
665
- type = 'image';
666
- } else { throwError(`Unsupported mime type: ${x.mime_type}`); }
667
- return {
668
- type, source: {
669
- type: BASE64.toLowerCase(),
670
- media_type: x.mime_type, data: x.data,
671
- },
672
- }
673
- });
674
- return String.isString(text) ? {
675
- role: options?.role || user, content: [...attachments, {
676
- type: TEXT, text, ...options?.cache_control ? {
677
- cache_control: { type: 'ephemeral' },
678
- } : {},
679
- }],
680
- } : text;
681
- };
682
-
683
- const buildGeminiHistory = (text, options) => buildGeminiMessage(
684
- text, { ...options || {}, history: true }
685
- );
686
-
687
554
  const listOpenAIModels = async (aiId, options) => {
688
555
  const { client } = await getAi(aiId);
689
556
  const resp = await client.models.list();
@@ -692,7 +559,8 @@ const listOpenAIModels = async (aiId, options) => {
692
559
 
693
560
  const streamResp = async (resp, options) => {
694
561
  const msg = await packResp(resp, { ...options, processing: true });
695
- return options?.stream && (msg?.text || msg?.audio?.length)
562
+ return options?.stream
563
+ && (msg?.text || msg?.audio?.length || msg?.images?.length)
696
564
  && await ignoreErrFunc(async () => await options.stream(msg), LOG);
697
565
  };
698
566
 
@@ -705,13 +573,14 @@ const getInfoEnd = text => Math.max(...[THINK_END, TOOLS_END].map(x => {
705
573
  const packResp = async (resp, options) => {
706
574
  if (options?.raw) { return resp; }
707
575
  let [
708
- txt, audio, references, simpleText, referencesMarkdown, end, json,
709
- images
576
+ txt, audio, images, references, simpleText, referencesMarkdown, end,
577
+ json, audioMimeType,
710
578
  ] = [
711
579
  resp.text || '', // ChatGPT / Claude / Gemini / Ollama
712
580
  resp?.audio?.data, // ChatGPT audio mode
581
+ resp?.images || [], // Gemini images via Openrouter
713
582
  resp?.references, // Gemini references
714
- '', '', '', null, resp?.images || [],
583
+ '', '', '', null, MIME_PCM16
715
584
  ];
716
585
  simpleText = txt;
717
586
  while ((end = getInfoEnd(simpleText))) {
@@ -722,16 +591,18 @@ const packResp = async (resp, options) => {
722
591
  const str = simpleText.indexOf(x);
723
592
  str >= 0 && (simpleText = simpleText.slice(0, str).trim());
724
593
  });
725
- audio = await ignoreErrFunc(async () => await packPcmToWav(audio, {
726
- input: Buffer.isBuffer(audio) ? BUFFER : BASE64, expected: BUFFER,
594
+ audio = await ignoreErrFunc(async () => ({
595
+ data: await packPcmToWav(audio, {
596
+ input: BUFFER, expected: BUFFER, suffix: 'pcm.wav', ...options
597
+ }), mime: audioMimeType,
727
598
  }));
728
- if (images?.length) {
729
- for (let i in images) {
730
- images[i].data = await convert(images[i].data, {
731
- input: BASE64, expected: BUFFER,
732
- });
733
- }
734
- }
599
+ images = await Promise.all(
600
+ images.map(async x => ({
601
+ data: await convert(x.buffer, {
602
+ input: BUFFER, expected: BUFFER, ...options
603
+ }), mime: x.mime,
604
+ }))
605
+ );
735
606
  options?.jsonMode && !options?.delta && (json = parseJson(simpleText, null));
736
607
  if (options?.simple && options?.audioMode) { return audio; }
737
608
  else if (options?.simple && options?.jsonMode) { return json; }
@@ -803,8 +674,7 @@ const packResp = async (resp, options) => {
803
674
  ...text(txt), ...options?.jsonMode ? { json } : {},
804
675
  ...references ? { references } : {},
805
676
  ...referencesMarkdown ? { referencesMarkdown } : {},
806
- ...audio ? { audio, audioMimeType: options?.audioMimeType } : {},
807
- ...images?.length ? { images } : {},
677
+ ...audio ? { audio } : {}, ...images?.length ? { images } : {},
808
678
  processing: !!options?.processing,
809
679
  model: options?.model,
810
680
  };
@@ -817,13 +687,8 @@ const buildPrompts = async (model, input, options = {}) => {
817
687
  assert(!(
818
688
  options.reasoning && !model?.reasoning
819
689
  ), `This model does not support reasoning: ${options.model}`);
820
- let [
821
- systemPrompt, history, content, prompt, _system, _model, _assistant,
822
- _history,
823
- ] = [
824
- null, null, input, null, { role: system }, { role: MODEL },
825
- { role: assistant }, null,
826
- ];
690
+ let [history, content, prompt, _model, _assistant, _history]
691
+ = [null, input, null, { role: MODEL }, { role: assistant }, null];
827
692
  options.systemPrompt = options.systemPrompt || INSTRUCTIONS;
828
693
  options.attachments = (
829
694
  options.attachments?.length ? options.attachments : []
@@ -832,70 +697,17 @@ const buildPrompts = async (model, input, options = {}) => {
832
697
  ...model?.supportedDocTypes,
833
698
  ...model?.supportedAudioTypes,
834
699
  ].includes(x.mime_type));
835
- switch (options.flavor) {
836
- case OPENAI:
837
- systemPrompt = buildGptMessage(options.systemPrompt, _system);
838
- break;
839
- case ANTHROPIC:
840
- systemPrompt = options.systemPrompt;
841
- break;
842
- case GEMINI:
843
- const _role = {
844
- role: [GEMINI_25_FLASH_IMAGE].includes(options.model)
845
- ? user : system
846
- };
847
- systemPrompt = buildGeminiHistory(options.systemPrompt, _role);
848
- break;
849
- }
700
+ const systemPrompt = buildMessage(options.systemPrompt, system);
850
701
  const msgBuilder = () => {
851
702
  [history, _history] = [[], []];
852
703
  (options.messages?.length ? options.messages : []).map((x, i) => {
853
- switch (options.flavor) {
854
- case OPENAI:
855
- _history.push(buildGptMessage(x.request));
856
- _history.push(buildGptMessage(x.response, _assistant));
857
- break;
858
- case ANTHROPIC:
859
- _history.push(buildClaudeMessage(x.request));
860
- _history.push(buildClaudeMessage(x.response, _assistant));
861
- break;
862
- case GEMINI:
863
- // https://github.com/google/generative-ai-js/blob/main/samples/node/advanced-chat.js
864
- // Google's bug: history is not allowed while using inline_data?
865
- if (options.attachments?.length) { return; }
866
- _history.push(buildGeminiHistory(x.request));
867
- _history.push(buildGeminiHistory(x.response, _model));
868
- break;
869
- }
704
+ _history.push(buildMessage(x.request));
705
+ _history.push(buildMessage(x.response, _assistant));
870
706
  });
871
- switch (options.flavor) {
872
- case OPENAI:
873
- history = messages([
874
- systemPrompt, ..._history, buildGptMessage(content, options),
875
- ...options.toolsResult?.length ? options.toolsResult : []
876
- ]);
877
- break;
878
- case ANTHROPIC:
879
- history = messages([
880
- ..._history, buildClaudeMessage(content, {
881
- ...options, cache_control: true
882
- }), ...options.toolsResult?.length ? options.toolsResult : []
883
- ]);
884
- break;
885
- case GEMINI:
886
- [history, prompt] = options.toolsResult?.length ? [
887
- [
888
- ..._history,
889
- buildGeminiHistory(content, options),
890
- ...options.toolsResult.slice(0, options.toolsResult.length - 1),
891
- ],
892
- options.toolsResult[options.toolsResult?.length - 1].parts,
893
- ] : [
894
- [..._history],
895
- buildGeminiMessage(content, options),
896
- ];
897
- break;
898
- }
707
+ history = messages([
708
+ systemPrompt, ..._history, buildMessage(content, options),
709
+ ...options.toolsResult?.length ? options.toolsResult : []
710
+ ]);
899
711
  };
900
712
  msgBuilder();
901
713
  await trimPrompt(() => [
@@ -908,11 +720,7 @@ const buildPrompts = async (model, input, options = {}) => {
908
720
  content = trimTailing(trimTailing(content).slice(0, -1)) + '...';
909
721
  } // @todo: audioCostTokens (needs to calculate the audio length):
910
722
  }, model.maxInputTokens - options.attachments?.length * model.imageCostTokens);
911
- if ([OPENAI].includes(options.flavor)
912
- || [GEMINI_25_FLASH_IMAGE].includes(options.model)) {
913
- systemPrompt = null;
914
- }
915
- return { systemPrompt, history, prompt };
723
+ return { history, prompt };
916
724
  };
917
725
 
918
726
  const handleToolsCall = async (msg, options) => {
@@ -932,34 +740,12 @@ const handleToolsCall = async (msg, options) => {
932
740
  if (calls.length) {
933
741
  preRes.push(msg);
934
742
  for (const fn of calls) {
935
- switch (options?.flavor) {
936
- case ANTHROPIC:
937
- input = fn.input = String.isString(fn?.input)
938
- ? parseJson(fn.input) : fn?.input;
939
- packMsg = (content, is_error) => ({
940
- type: 'tool_result', tool_use_id: fn.id,
941
- content, is_error,
942
- });
943
- break;
944
- case GEMINI:
945
- input = fn?.functionCall?.args;
946
- packMsg = (t, e) => ({
947
- functionResponse: {
948
- name: fn?.functionCall?.name, response: {
949
- name: fn?.functionCall?.name,
950
- content: e ? `[Error] ${t}` : t,
951
- }
952
- }
953
- });
954
- break;
955
- case OPENAI: default:
956
- input = parseJson(fn?.function?.arguments);
957
- packMsg = (content = '', e = false) => ({
958
- role: TOOL, tool_call_id: fn.id,
959
- ...e ? { error: content, content: '' } : { content }
960
- });
961
- break;
962
- }
743
+ input = fn?.functionCall?.args;
744
+ packMsg = (t, e) => ({
745
+ role: 'tool',
746
+ tool_call_id: fn.id,
747
+ content: e ? `[Error] ${t}` : t
748
+ });
963
749
  const name = (fn?.function || fn?.functionCall || fn)?.name;
964
750
  if (!name) { continue; }
965
751
  await resp(`${callIdx++ ? '\n' : ''}Name: ${name}`);
@@ -987,12 +773,7 @@ const handleToolsCall = async (msg, options) => {
987
773
  log(rt);
988
774
  }
989
775
  }
990
- if (content.length) {
991
- switch (options?.flavor) {
992
- case ANTHROPIC: content = [{ role: user, content }]; break;
993
- case GEMINI: content = [{ role: user, parts: content }]; break;
994
- }
995
- }
776
+ content = content.map(x => ({ role: TOOL, ...x }));
996
777
  responded && await resp(TOOLS_END);
997
778
  }
998
779
  return {
@@ -1007,37 +788,51 @@ const mergeMsgs = (resp, calls) => [resp, ...calls.length ? [
1007
788
 
1008
789
  const promptOpenAI = async (aiId, content, options = {}) => {
1009
790
  let { provider, client, model } = await getAi(aiId);
1010
- let [result, resultAudio, event, resultTools, responded, azure] = [
1011
- options.result ?? '', Buffer.alloc(0), null, [], false,
1012
- provider === AZURE
1013
- ];
1014
- options.flavor = OPENAI;
791
+ let [
792
+ result, resultAudio, resultImages, resultReasoning, event, resultTools,
793
+ responded, modalities, source, reasoningEnd
794
+ ] = [
795
+ options.result ?? '', Buffer.alloc(0), [], '', null, [], false,
796
+ options.modalities, model?.source, false
797
+ ];
1015
798
  options.model = options.model || model.name;
1016
799
  const { history }
1017
800
  = await buildPrompts(MODELS[options.model], content, options);
1018
801
  model = MODELS[options.model];
1019
- model?.reasoning && !azure && !options.reasoning_effort
802
+ model?.reasoning && !options.reasoning_effort
1020
803
  && (options.reasoning_effort = GPT_REASONING_EFFORT);
1021
- const modalities = options.modalities
1022
- || (options.audioMode ? [TEXT, AUDIO] : undefined);
1023
- [options.audioMimeType, options.suffix] = [MIME_PCM16, 'pcm.wav'];
804
+ if (!modalities && options.audioMode) {
805
+ modalities = [TEXT, AUDIO];
806
+ } else if (!modalities && model.image) {
807
+ modalities = [TEXT, IMAGE];
808
+ }
809
+ const googleImageMode = source === 'google' && modalities?.has?.(IMAGE);
810
+ const targetModel = `${isOpenrouter(provider, model) ? `${source}/` : ''}${options.model}`;
1024
811
  const resp = await client.chat.completions.create({
1025
- model: azure ? undefined : options.model, ...history,
812
+ model: targetModel, ...history,
1026
813
  ...options.jsonMode ? { response_format: { type: JSON_OBJECT } } : {},
1027
814
  ...provider === OLLAMA ? { keep_alive: -1 } : {},
1028
815
  modalities, audio: options.audio || (
1029
816
  modalities?.find?.(x => x === AUDIO)
1030
817
  && { voice: DEFAULT_MODELS[OPENAI_VOICE], format: 'pcm16' }
1031
- ), ...model?.tools && !azure ? {
818
+ ), ...model?.tools && !googleImageMode ? {
1032
819
  tools: options.tools ?? (await toolsOpenAI()).map(x => x.def),
1033
820
  tool_choice: 'auto',
1034
- } : {}, ...azure ? {} : { store: true }, stream: true,
821
+ } : {},
822
+ store: true, stream: true,
1035
823
  reasoning_effort: options.reasoning_effort,
1036
824
  });
1037
825
  for await (event of resp) {
826
+ print(JSON.stringify(event, null, 2));
1038
827
  event = event?.choices?.[0] || {};
1039
828
  const delta = event.delta || {};
1040
- let deltaText = delta.content || delta.audio?.transcript || '';
829
+ let [delteReasoning, deltaText] = [
830
+ delta.reasoning || '',
831
+ delta.content || delta.audio?.transcript || ''
832
+ ];
833
+ const deltaImages = (delta?.images || []).map(
834
+ x => decodeBase64DataURL(x.image_url.url)
835
+ );
1041
836
  const deltaAudio = delta.audio?.data ? await convert(
1042
837
  delta.audio.data, { input: BASE64, expected: BUFFER }
1043
838
  ) : Buffer.alloc(0);
@@ -1055,16 +850,28 @@ const promptOpenAI = async (aiId, content, options = {}) => {
1055
850
  }
1056
851
  options.result && deltaText
1057
852
  && (responded = responded || (deltaText = `\n\n${deltaText}`));
853
+ resultReasoning += delteReasoning;
854
+ delteReasoning && delteReasoning === resultReasoning
855
+ && (delteReasoning = `${THINK_STR}\n${delteReasoning}`);
856
+ resultReasoning && deltaText && !reasoningEnd && (
857
+ reasoningEnd = delteReasoning = `${delteReasoning}${THINK_END}\n\n`
858
+ );
859
+ deltaText = delteReasoning + deltaText;
1058
860
  result += deltaText;
861
+ resultImages.push(...deltaImages);
1059
862
  resultAudio = Buffer.concat([resultAudio, deltaAudio]);
863
+ const respImages = options.delta ? deltaImages : resultImages;
1060
864
  const respAudio = options.delta ? deltaAudio : resultAudio;
1061
- (deltaText || deltaAudio?.length) && await streamResp({
1062
- text: options.delta ? deltaText : result,
1063
- ...respAudio.length ? { audio: { data: respAudio } } : {},
1064
- }, options);
865
+ (deltaText || deltaAudio?.length || deltaImages.length)
866
+ && await streamResp({
867
+ text: options.delta ? deltaText : result,
868
+ ...respAudio.length ? { audio: { data: respAudio } } : {},
869
+ ...respImages.length ? { images: respImages } : {},
870
+ }, options);
1065
871
  }
1066
872
  event = {
1067
873
  role: assistant, text: result, tool_calls: resultTools,
874
+ ...resultImages.length ? { images: resultImages } : {},
1068
875
  ...resultAudio.length ? { audio: { data: resultAudio } } : {},
1069
876
  };
1070
877
  const { toolsResult, toolsResponse }
@@ -1079,329 +886,122 @@ const promptOpenAI = async (aiId, content, options = {}) => {
1079
886
  return await packResp(event, options);
1080
887
  };
1081
888
 
1082
- const promptAnthropic = async (aiId, content, options = {}) => {
1083
- const { client, model } = await getAi(aiId);
1084
- let [
1085
- event, text, thinking, signature, result, thinkEnd, tool_use,
1086
- responded, redacted_thinking
1087
- ] = [null, '', '', '', options.result ?? '', '', [], false, []];
1088
- options.model = options.model || model.name;
1089
- options.test_redacted_thinking && !result && (
1090
- content += '\n\nANTHROPIC_MAGIC_STRING_TRIGGER_REDACTED_THINKING_'
1091
- + '46C9A13E193C177646C7398A98432ECCCE4C1253D5E2D82641AC0E52CC2876CB'
1092
- ); // https://docs.anthropic.com/en/docs/build-with-claude/extended-thinking
1093
- const { systemPrompt: system, history }
1094
- = await buildPrompts(model, content, { ...options, flavor: ANTHROPIC });
1095
- const resp = await client.beta.messages.create({
1096
- model: options.model, ...history, system, stream: true,
1097
- max_tokens: options.extendedThinking ? kT(128) : model.maxOutputTokens,
1098
- ...(options.reasoning ?? model.reasoning) ? {
1099
- thinking: options.thinking || {
1100
- type: 'enabled',
1101
- budget_tokens: options?.extendedThinking ? 16000 : 1024,
1102
- },
1103
- } : {}, ...model?.tools ? {
1104
- tools: options.tools ?? (await toolsClaude()).map(x => x.def),
1105
- tool_choice: { type: 'auto' }, betas: [
1106
- // https://docs.anthropic.com/en/docs/build-with-claude/tool-use/token-efficient-tool-use
1107
- 'token-efficient-tools-2025-02-19',
1108
- 'interleaved-thinking-2025-05-14',
1109
- // https://docs.anthropic.com/en/docs/build-with-claude/extended-thinking
1110
- ...options?.extendedThinking ? ['output-128k-2025-02-19'] : [],
1111
- ],
1112
- } : {},
1113
- });
1114
- for await (const chunk of resp) {
1115
- event = chunk?.content_block || chunk?.delta || {};
1116
- let [deltaThink, deltaText] = [event.thinking || '', event.text || ''];
1117
- text += deltaText;
1118
- thinking += deltaThink;
1119
- signature = signature || event?.signature || '';
1120
- deltaThink && deltaThink === thinking
1121
- && (deltaThink = `${THINK_STR}\n${deltaThink}`);
1122
- thinking && deltaText && !thinkEnd
1123
- && (thinkEnd = deltaThink = `${deltaThink}\n${THINK_END}\n\n`);
1124
- event?.type === REDACTED_THINKING && redacted_thinking.push(event);
1125
- if (event?.type === 'tool_use') {
1126
- tool_use.push({ ...event, input: '' });
1127
- } else if (event.partial_json) {
1128
- tool_use[tool_use.length - 1].input += event.partial_json;
1129
- }
1130
- deltaText = deltaThink + deltaText;
1131
- options.result && deltaText
1132
- && (responded = responded || (deltaText = `\n\n${deltaText}`));
1133
- result += deltaText;
1134
- deltaText && await streamResp({
1135
- text: options.delta ? deltaText : result,
1136
- }, options);
1137
- }
1138
- event = {
1139
- role: assistant, content: [
1140
- ...thinking ? [{ type: THINKING, thinking, signature }] : [],
1141
- ...redacted_thinking,
1142
- ...text ? [{ type: TEXT, text }] : [], ...tool_use,
1143
- ]
1144
- };
1145
- const { toolsResult, toolsResponse } = await handleToolsCall(
1146
- event, { ...options, result, flavor: ANTHROPIC },
1147
- );
1148
- if (tool_use.length && countToolCalls(toolsResponse) < MAX_TOOL_RECURSION) {
1149
- return await promptAnthropic(aiId, content, {
1150
- ...options, toolsResult: [...options.toolsResult || [],
1151
- ...toolsResult], result: toolsResponse,
1152
- });
1153
- }
1154
- return packResp({ text: mergeMsgs(toolsResponse, tool_use) }, options);
1155
- };
1156
-
1157
- const uploadFile = async (aiId, input, options) => {
1158
- const { client } = await getAi(aiId);
1159
- const { content: file, cleanup } = await convert(input, {
1160
- input: options?.input, ...options || {}, expected: STREAM,
1161
- errorMessage: INVALID_FILE, suffix: options?.suffix,
1162
- withCleanupFunc: true,
1163
- });
1164
- const resp = await client.files.create({ file, ...options?.params || {} });
1165
- await cleanup();
1166
- return resp;
1167
- };
1168
-
1169
- const uploadFileForFineTuning = async (aiId, content, options) => await uploadFile(
1170
- aiId, content, { suffix: 'jsonl', ...options, params: { purpose: 'fine-tune' } }
1171
- );
1172
-
1173
- const listFiles = async (aiId, options) => {
1174
- const { client } = await getAi(aiId);
1175
- const files = [];
1176
- const list = await client.files.list(options?.params || {});
1177
- for await (const file of list) { files.push(file); }
1178
- return files;
1179
- };
1180
-
1181
- const deleteFile = async (aiId, file_id, options) => {
1182
- const { client } = await getAi(aiId);
1183
- return await client.files.del(file_id);
1184
- };
1185
-
1186
- const packGeminiReferences = (chunks, supports) => {
1187
- let references = null;
1188
- if (chunks?.length && supports?.length) {
1189
- references = { segments: [], links: [] };
1190
- supports.map(s => references.segments.push({
1191
- ...s.segment, indices: s.groundingChunkIndices,
1192
- confidence: s.confidenceScores,
1193
- }));
1194
- chunks.map(c => references.links.push(c.web));
1195
- }
1196
- return references;
1197
- };
1198
-
1199
- const promptGemini = async (aiId, content, options = {}) => {
1200
- let { provider, client, model } = await getAi(aiId);
1201
- let [
1202
- event, result, text, thinking, references, functionCalls, responded,
1203
- images, thinkEnd,
1204
- ] = [null, options.result ?? '', '', '', null, [], false, [], false];
1205
- options.model = options.model || model.name;
1206
- model?.image === true && (options.imageMode = true);
1207
- assert(!(options.imageMode && !model.image), 'Image mode is not supported.');
1208
- if (options.imageMode && String.isString(model.image)) {
1209
- options.model = model.image;
1210
- options.imageMode = true;
1211
- model = MODELS[options.model];
1212
- }
1213
- options.flavor = GEMINI;
1214
- const { systemPrompt: systemInstruction, history, prompt }
1215
- = await buildPrompts(model, content, options);
1216
- const responseModalities = options.modalities
1217
- || (options.imageMode ? [TEXT, IMAGE] : undefined)
1218
- || (options.audioMode ? [TEXT, AUDIO] : undefined);
1219
- const chat = client.chats.create({
1220
- model: options.model, history, config: {
1221
- responseMimeType: options.jsonMode ? MIME_JSON : MIME_TEXT,
1222
- ...model.reasoning ? {
1223
- thinkingConfig: { includeThoughts: true },
1224
- } : {}, systemInstruction, responseModalities,
1225
- ...options?.config || {}, ...model?.tools && !options.jsonMode
1226
- && ![GEMINI_25_FLASH_IMAGE].includes(options.model)
1227
- ? (options.tools ?? {
1228
- tools: [
1229
- // @todo: Gemini will failed when using these tools together.
1230
- // https://ai.google.dev/gemini-api/docs/function-calling
1231
- // { codeExecution: {} },
1232
- // { googleSearch: {} },
1233
- // { urlContext: {} },
1234
- // @todo: test these tools in next version 👆
1235
- {
1236
- functionDeclarations: (
1237
- await toolsGemini({ provider })
1238
- ).map(x => x.def)
1239
- },
1240
- ], toolConfig: { functionCallingConfig: { mode: 'AUTO' } },
1241
- }) : {},
1242
- },
1243
- });
1244
- const resp = await chat.sendMessageStream({ message: prompt });
1245
- for await (const chunk of resp) {
1246
- assert(
1247
- !chunk?.promptFeedback?.blockReason,
1248
- chunk?.promptFeedback?.blockReason
1249
- );
1250
- event = chunk?.candidates?.[0];
1251
- let [deltaText, deltaThink, deltaImages] = ['', '', []];
1252
- event?.content?.parts?.map(x => {
1253
- if (x.text && x.thought) { deltaThink = x.text; }
1254
- else if (x.text) { deltaText = x.text; }
1255
- else if (x.functionCall) { functionCalls.push(x); }
1256
- else if (x.inlineData?.mimeType === MIME_PNG) {
1257
- deltaImages.push(x.inlineData);
1258
- images.push(x.inlineData);
1259
- }
1260
- });
1261
- text += deltaText;
1262
- thinking += deltaThink;
1263
- deltaThink && deltaThink === thinking
1264
- && (deltaThink = `${THINK_STR}\n${deltaThink}`);
1265
- thinking && deltaText && !thinkEnd
1266
- && (thinkEnd = deltaThink = `${deltaThink}${THINK_END}\n\n`);
1267
- deltaText = deltaThink + deltaText;
1268
- const rfc = packGeminiReferences(
1269
- event?.groundingMetadata?.groundingChunks,
1270
- event?.groundingMetadata?.groundingSupports
1271
- );
1272
- rfc && (references = rfc);
1273
- options.result && deltaText
1274
- && (responded = responded || (deltaText = `\n\n${deltaText}`));
1275
- result += deltaText;
1276
- (deltaText || deltaImages.length) && await streamResp({
1277
- text: options.delta ? deltaText : result,
1278
- images: options.delta ? deltaImages : images,
1279
- }, options);
1280
- }
1281
- event = {
1282
- role: MODEL, parts: [
1283
- ...thinking ? [{ thought: true, text: thinking }] : [],
1284
- ...text ? [{ text }] : [],
1285
- ...functionCalls,
1286
- ],
1287
- };
1288
- const { toolsResult, toolsResponse } = await handleToolsCall(
1289
- event, { ...options, result, flavor: GEMINI }
1290
- );
1291
- if (toolsResult.length
1292
- && countToolCalls(toolsResponse) < MAX_TOOL_RECURSION) {
1293
- return promptGemini(aiId, content, {
1294
- ...options || {}, result: toolsResponse,
1295
- toolsResult: [...options?.toolsResult || [], ...toolsResult],
1296
- });
1297
- }
1298
- return await packResp({
1299
- text: mergeMsgs(toolsResponse, toolsResult), images, references,
1300
- }, options);
1301
- };
1302
-
1303
- const checkEmbeddingInput = async (input, model) => {
1304
- assert(input, 'Text is required.', 400);
1305
- const arrInput = input.split(' ');
1306
- const getInput = () => arrInput.join(' ');
1307
- const _model = MODELS[model];
1308
- assert(_model, `Invalid model: '${model}'.`);
1309
- await trimPrompt(getInput, arrInput.pop, _model.contextWindow);
1310
- return getInput();
1311
- };
1312
-
1313
- const createOpenAIEmbedding = async (aiId, input, options) => {
1314
- // args from vertex embedding may be useful uere
1315
- // https://cloud.google.com/vertex-ai/docs/generative-ai/embeddings/get-text-embeddings
1316
- // task_type Description
1317
- // RETRIEVAL_QUERY Specifies the given text is a query in a search/ retrieval setting.
1318
- // RETRIEVAL_DOCUMENT Specifies the given text is a document in a search / retrieval setting.
1319
- // SEMANTIC_SIMILARITY Specifies the given text will be used for Semantic Textual Similarity(STS).
1320
- // CLASSIFICATION Specifies that the embeddings will be used for classification.
1321
- // CLUSTERING Specifies that the embeddings will be used for clustering.
1322
- let { client, modelEmbedding, clientEmbedding } = await getAi(aiId);
1323
- const model = options?.model || modelEmbedding.name;
1324
- const resp = await (clientEmbedding || client).embeddings.create({
1325
- model, input: await checkEmbeddingInput(input, model),
1326
- });
1327
- return options?.raw ? resp : resp?.data[0].embedding;
1328
- };
1329
-
1330
- const createGeminiEmbedding = async (aiId, input, options) => {
1331
- const { client, modelEmbedding, clientEmbedding } = await getAi(aiId);
1332
- const model = options?.model || modelEmbedding.name;
1333
- const resp = await (
1334
- clientEmbedding || client
1335
- ).getGenerativeModel({ model }).embedContent(
1336
- await checkEmbeddingInput(input, model)
1337
- );
1338
- return options?.raw ? resp : resp?.embedding.values;
1339
- };
1340
-
1341
- const buildGptTrainingCase = (prompt, response, options) => messages([
1342
- ...options?.systemPrompt ? [
1343
- buildGptMessage(options.systemPrompt, { role: system })
1344
- ] : [], buildGptMessage(prompt),
1345
- buildGptMessage(response, { role: assistant }),
1346
- ]);
1347
-
1348
- const buildGptTrainingCases = (cases, opts) => cases.map(x => JSON.stringify(
1349
- buildGptTrainingCase(x.prompt, x.response, { ...x.options, ...opts })
1350
- )).join('\n');
1351
-
1352
- const createGptFineTuningJob = async (aiId, training_file, options) => {
1353
- const { client } = await getAi(aiId);
1354
- return await client.fineTuning.jobs.create({
1355
- training_file, model: options?.model || DEFAULT_MODELS[OPENAI_TRAINING],
1356
- })
1357
- };
1358
-
1359
- const getGptFineTuningJob = async (aiId, job_id, options) => {
1360
- const { client } = await getAi(aiId);
1361
- // https://platform.openai.com/finetune/[job_id]?filter=all
1362
- return await client.fineTuning.jobs.retrieve(job_id);
1363
- };
1364
-
1365
- const cancelGptFineTuningJob = async (aiId, job_id, options) => {
1366
- const { client } = await getAi(aiId);
1367
- return await client.fineTuning.jobs.cancel(job_id);
1368
- };
1369
-
1370
- const listGptFineTuningJobs = async (aiId, options) => {
1371
- const { client } = await getAi(aiId);
1372
- const resp = await client.fineTuning.jobs.list({
1373
- limit: GPT_QUERY_LIMIT, ...options?.params
1374
- });
1375
- return options?.raw ? resp : resp.data;
1376
- };
1377
-
1378
- const listGptFineTuningEvents = async (aiId, job_id, options) => {
1379
- const { client } = await getAi(aiId);
1380
- const resp = await client.fineTuning.jobs.listEvents(job_id, {
1381
- limit: GPT_QUERY_LIMIT, ...options?.params,
1382
- });
1383
- return options?.raw ? resp : resp.data;
1384
- };
1385
-
1386
- const tailGptFineTuningEvents = async (aiId, job_id, options) => {
1387
- assert(job_id, 'Job ID is required.');
1388
- const [loopName, listOpts] = [`GPT-${job_id}`, {
1389
- ...options, params: { ...options?.params, order: 'ascending' }
1390
- }];
1391
- let lastEvent;
1392
- return await loop(async () => {
1393
- const resp = await listGptFineTuningEvents(aiId, job_id, {
1394
- ...listOpts, params: {
1395
- ...listOpts?.params,
1396
- ...(lastEvent ? { after: lastEvent.id } : {}),
1397
- },
1398
- });
1399
- for (lastEvent of resp) {
1400
- lastEvent.message.includes('completed') && await end(loopName);
1401
- await options?.stream(lastEvent);
1402
- }
1403
- }, 3, 2, 1, loopName, { silent, ...options });
1404
- };
889
+ // const packGeminiReferences = (chunks, supports) => {
890
+ // let references = null;
891
+ // if (chunks?.length && supports?.length) {
892
+ // references = { segments: [], links: [] };
893
+ // supports.map(s => references.segments.push({
894
+ // ...s.segment, indices: s.groundingChunkIndices,
895
+ // confidence: s.confidenceScores,
896
+ // }));
897
+ // chunks.map(c => references.links.push(c.web));
898
+ // }
899
+ // return references;
900
+ // };
901
+
902
+ // const promptGemini = async (aiId, content, options = {}) => {
903
+ // let { provider, client, model } = await getAi(aiId);
904
+ // let [
905
+ // event, result, text, thinking, references, functionCalls, responded,
906
+ // images, thinkEnd,
907
+ // ] = [null, options.result ?? '', '', '', null, [], false, [], false];
908
+ // options.model = options.model || model.name;
909
+ // model?.image === true && (options.imageMode = true);
910
+ // assert(!(options.imageMode && !model.image), 'Image mode is not supported.');
911
+ // if (options.imageMode && String.isString(model.image)) {
912
+ // options.model = model.image;
913
+ // options.imageMode = true;
914
+ // model = MODELS[options.model];
915
+ // }
916
+ // options.flavor = GEMINI;
917
+ // const { systemPrompt: systemInstruction, history, prompt }
918
+ // = await buildPrompts(model, content, options);
919
+ // const responseModalities = options.modalities
920
+ // || (options.imageMode ? [TEXT, IMAGE] : undefined)
921
+ // || (options.audioMode ? [TEXT, AUDIO] : undefined);
922
+ // const chat = client.chats.create({
923
+ // model: options.model, history, config: {
924
+ // responseMimeType: options.jsonMode ? MIME_JSON : MIME_TEXT,
925
+ // ...model.reasoning ? {
926
+ // thinkingConfig: { includeThoughts: true },
927
+ // } : {}, systemInstruction, responseModalities,
928
+ // ...options?.config || {}, ...model?.tools && !options.jsonMode
929
+ // && ![GEMINI_25_FLASH_IMAGE].includes(options.model)
930
+ // ? (options.tools ?? {
931
+ // tools: [
932
+ // // @todo: Gemini will failed when using these tools together.
933
+ // // https://ai.google.dev/gemini-api/docs/function-calling
934
+ // // { codeExecution: {} },
935
+ // // { googleSearch: {} },
936
+ // // { urlContext: {} },
937
+ // // @todo: test these tools in next version 👆
938
+ // {
939
+ // functionDeclarations: (
940
+ // await toolsGemini({ provider })
941
+ // ).map(x => x.def)
942
+ // },
943
+ // ], toolConfig: { functionCallingConfig: { mode: 'AUTO' } },
944
+ // }) : {},
945
+ // },
946
+ // });
947
+ // const resp = await chat.sendMessageStream({ message: prompt });
948
+ // for await (const chunk of resp) {
949
+ // assert(
950
+ // !chunk?.promptFeedback?.blockReason,
951
+ // chunk?.promptFeedback?.blockReason
952
+ // );
953
+ // event = chunk?.candidates?.[0];
954
+ // let [deltaText, deltaThink, deltaImages] = ['', '', []];
955
+ // event?.content?.parts?.map(x => {
956
+ // if (x.text && x.thought) { deltaThink = x.text; }
957
+ // else if (x.text) { deltaText = x.text; }
958
+ // else if (x.functionCall) { functionCalls.push(x); }
959
+ // else if (x.inlineData?.mimeType === MIME_PNG) {
960
+ // deltaImages.push(x.inlineData);
961
+ // images.push(x.inlineData);
962
+ // }
963
+ // });
964
+ // text += deltaText;
965
+ // thinking += deltaThink;
966
+ // deltaThink && deltaThink === thinking
967
+ // && (deltaThink = `${THINK_STR}\n${deltaThink}`);
968
+ // thinking && deltaText && !thinkEnd
969
+ // && (thinkEnd = deltaThink = `${deltaThink}${THINK_END}\n\n`);
970
+ // deltaText = deltaThink + deltaText;
971
+ // const rfc = packGeminiReferences(
972
+ // event?.groundingMetadata?.groundingChunks,
973
+ // event?.groundingMetadata?.groundingSupports
974
+ // );
975
+ // rfc && (references = rfc);
976
+ // options.result && deltaText
977
+ // && (responded = responded || (deltaText = `\n\n${deltaText}`));
978
+ // result += deltaText;
979
+ // (deltaText || deltaImages.length) && await streamResp({
980
+ // text: options.delta ? deltaText : result,
981
+ // images: options.delta ? deltaImages : images,
982
+ // }, options);
983
+ // }
984
+ // event = {
985
+ // role: MODEL, parts: [
986
+ // ...thinking ? [{ thought: true, text: thinking }] : [],
987
+ // ...text ? [{ text }] : [],
988
+ // ...functionCalls,
989
+ // ],
990
+ // };
991
+ // const { toolsResult, toolsResponse } = await handleToolsCall(
992
+ // event, { ...options, result, flavor: GEMINI }
993
+ // );
994
+ // if (toolsResult.length
995
+ // && countToolCalls(toolsResponse) < MAX_TOOL_RECURSION) {
996
+ // return promptGemini(aiId, content, {
997
+ // ...options || {}, result: toolsResponse,
998
+ // toolsResult: [...options?.toolsResult || [], ...toolsResult],
999
+ // });
1000
+ // }
1001
+ // return await packResp({
1002
+ // text: mergeMsgs(toolsResponse, toolsResult), images, references,
1003
+ // }, options);
1004
+ // };
1405
1005
 
1406
1006
  const initChat = async (options = {}) => {
1407
1007
  if (options.sessions) {
@@ -1413,8 +1013,8 @@ const initChat = async (options = {}) => {
1413
1013
  }
1414
1014
  options.instructions && (chatConfig.systemPrompt = options.instructions);
1415
1015
  // Use Gemini instead of ChatGPT because of the longer package.
1416
- const [spTokens, ais] = await Promise.all([countTokens([buildGeminiHistory(
1417
- chatConfig.systemPrompt, { role: system }
1016
+ const [spTokens, ais] = await Promise.all([countTokens([buildMessage(
1017
+ options.systemPrompt, system
1418
1018
  )]), getAi(null, { all: true })]);
1419
1019
  for (const ai of ais) {
1420
1020
  const mxPmpt = ai.model.maxInputTokens / 2;
@@ -1597,42 +1197,25 @@ export {
1597
1197
  FUNCTION,
1598
1198
  GEMINI_25_FLASH,
1599
1199
  GEMINI_25_FLASH_IMAGE,
1600
- GPT_5,
1200
+ GPT_51,
1601
1201
  INSTRUCTIONS,
1602
1202
  MODELS,
1603
1203
  OPENAI_VOICE,
1604
1204
  RETRIEVAL,
1605
- TEXT_EMBEDDING_3_SMALL,
1606
1205
  analyzeSessions,
1607
- buildGptTrainingCase,
1608
- buildGptTrainingCases,
1609
- cancelGptFineTuningJob,
1610
1206
  countTokens,
1611
- createGeminiEmbedding,
1612
- createGptFineTuningJob,
1613
- createOpenAIEmbedding,
1614
- deleteFile,
1615
1207
  distillFile,
1616
1208
  getAi,
1617
1209
  getChatAttachmentCost,
1618
1210
  getChatPromptLimit,
1619
- getGptFineTuningJob,
1620
1211
  getSession,
1621
1212
  init,
1622
1213
  initChat,
1623
1214
  k,
1624
- listFiles,
1625
- listGptFineTuningEvents,
1626
- listGptFineTuningJobs,
1627
1215
  listOpenAIModels,
1628
1216
  prompt,
1629
- promptAnthropic,
1630
- promptGemini,
1631
1217
  promptOpenAI,
1632
1218
  resetSession,
1633
- tailGptFineTuningEvents,
1634
1219
  talk,
1635
1220
  trimPrompt,
1636
- uploadFile,
1637
- uploadFileForFineTuning,
1638
1221
  };