utilitas 1999.1.7 → 1999.1.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/lib/alan.mjs CHANGED
@@ -45,36 +45,6 @@ const _NEED = [
45
45
  'js-tiktoken', 'OpenAI',
46
46
  ];
47
47
 
48
- const [
49
- OPENAI, GEMINI, OPENAI_EMBEDDING, GEMINI_EMEDDING, OPENAI_TRAINING, OLLAMA,
50
- GPT_4O_MINI, GPT_4O, GPT_O1, GPT_O3_MINI, GEMINI_20_FLASH,
51
- GEMINI_20_FLASH_THINKING, GEMINI_20_PRO, NOVA, DEEPSEEK_R1, DEEPSEEK_R1_70B,
52
- DEEPSEEK_R1_32B, MD_CODE, TEXT_EMBEDDING_3_SMALL, TEXT_EMBEDDING_3_LARGE,
53
- CLOUD_37_SONNET, AUDIO, WAV, ATTACHMENTS, CHAT, OPENAI_VOICE, MEDIUM, LOW,
54
- HIGH, GPT_REASONING_EFFORT, THINK, THINK_STR, THINK_END, AZURE, TOOLS_STR,
55
- TOOLS_END, TOOLS, TEXT, THINKING, OK, FUNC, GPT_45, REDACTED_THINKING,
56
- GEMMA_3_27B, AZURE_OPENAI, ANTHROPIC, VERTEX_ANTHROPIC, GEMMA327B, size8k,
57
- ais, MAX_TOOL_RECURSION, LOG, name, user, system, assistant, MODEL,
58
- JSON_OBJECT, TOOL, silent, GEMINI_EMBEDDING_M, INVALID_FILE, tokenSafeRatio,
59
- GPT_QUERY_LIMIT, minsOfDay, CONTENT_IS_REQUIRED,
60
- ] = [
61
- 'OpenAI', 'Gemini', 'OPENAI_EMBEDDING', 'GEMINI_EMEDDING',
62
- 'OPENAI_TRAINING', 'Ollama', 'gpt-4o-mini', 'gpt-4o', 'o1', 'o3-mini',
63
- 'gemini-2.0-flash', 'gemini-2.0-flash-thinking-exp',
64
- 'gemini-2.0-pro-exp', 'nova', 'deepseek-r1', 'deepseek-r1:70b',
65
- 'deepseek-r1:32b', '```', 'text-embedding-3-small',
66
- 'text-embedding-3-large', 'claude-3-7-sonnet@20250219', 'audio', 'wav',
67
- '[ATTACHMENTS]', 'CHAT', 'OPENAI_VOICE', 'medium', 'low', 'high',
68
- 'medium', 'think', '<think>', '</think>', 'AZURE', '<tools>',
69
- '</tools>', 'tools', 'text', 'thinking', 'OK', 'function',
70
- 'gpt-4.5-preview', 'redacted_thinking', 'gemma-3-27b-it',
71
- 'Azure Openai', 'Anthropic', 'Vertex Anthropic', 'gemma3:27b',
72
- 7680 * 4320, [], 10, { log: true }, 'Alan', 'user', 'system',
73
- 'assistant', 'model', 'json_object', 'tool', true,
74
- 'gemini-embedding-exp-03-07', 'Invalid file data.', 1.1, 100, 60 * 24,
75
- 'Content is required.',
76
- ];
77
-
78
48
  const [
79
49
  png, jpeg, mov, mpeg, mp4, mpg, avi, wmv, mpegps, flv, gif, webp, pdf, aac,
80
50
  flac, mp3, m4a, mpga, opus, pcm, wav, webm, tgpp, mimeJson, mimeText, pcm16,
@@ -88,6 +58,39 @@ const [
88
58
  'text/plain', 'audio/x-wav', 'audio/ogg',
89
59
  ];
90
60
 
61
+ const [
62
+ OPENAI, GEMINI, OPENAI_EMBEDDING, GEMINI_EMEDDING, OPENAI_TRAINING, OLLAMA,
63
+ GPT_4O_MINI, GPT_4O, GPT_O1, GPT_O3_MINI, GEMINI_20_FLASH,
64
+ GEMINI_20_FLASH_THINKING, GEMINI_20_PRO, NOVA, DEEPSEEK_R1, MD_CODE,
65
+ TEXT_EMBEDDING_3_SMALL, TEXT_EMBEDDING_3_LARGE, CLOUD_37_SONNET, AUDIO, WAV,
66
+ ATTACHMENTS, CHAT, OPENAI_VOICE, MEDIUM, LOW, HIGH, GPT_REASONING_EFFORT,
67
+ THINK, THINK_STR, THINK_END, AZURE, TOOLS_STR, TOOLS_END, TOOLS, TEXT,
68
+ THINKING, OK, FUNC, GPT_45, REDACTED_THINKING, GEMMA_3_27B, AZURE_OPENAI,
69
+ ANTHROPIC, VERTEX_ANTHROPIC, GEMMA327B, v8k, ais, MAX_TOOL_RECURSION, LOG,
70
+ name, user, system, assistant, MODEL, JSON_OBJECT, TOOL, silent,
71
+ GEMINI_EMBEDDING_M, INVALID_FILE, tokenSafeRatio, GPT_QUERY_LIMIT,
72
+ CONTENT_IS_REQUIRED, OPENAI_HI_RES_SIZE, k, kT, m, minute, hour,
73
+ gb, trimTailing, EBD, GEMINI_20_FLASH_EXP, IMAGE
74
+ ] = [
75
+ 'OpenAI', 'Gemini', 'OPENAI_EMBEDDING', 'GEMINI_EMEDDING',
76
+ 'OPENAI_TRAINING', 'Ollama', 'gpt-4o-mini', 'gpt-4o', 'o1', 'o3-mini',
77
+ 'gemini-2.0-flash', 'gemini-2.0-flash-thinking-exp',
78
+ 'gemini-2.0-pro-exp', 'nova', 'deepseek-r1', '```',
79
+ 'text-embedding-3-small', 'text-embedding-3-large',
80
+ 'claude-3-7-sonnet@20250219', 'audio', 'wav', '[ATTACHMENTS]', 'CHAT',
81
+ 'OPENAI_VOICE', 'medium', 'low', 'high', 'medium', 'think', '<think>',
82
+ '</think>', 'AZURE', '<tools>', '</tools>', 'tools', 'text', 'thinking',
83
+ 'OK', 'function', 'gpt-4.5-preview', 'redacted_thinking',
84
+ 'gemma-3-27b-it', 'Azure Openai', 'Anthropic', 'Vertex Anthropic',
85
+ 'gemma3:27b', 7680 * 4320, [], 10, { log: true }, 'Alan', 'user',
86
+ 'system', 'assistant', 'model', 'json_object', 'tool', true,
87
+ 'gemini-embedding-exp-03-07', 'Invalid file data.', 1.1, 100,
88
+ 'Content is required.', 2000 * 768, x => 1024 * x, x => 1000 * x,
89
+ x => 1024 * 1024 * x, x => 60 * x, x => 60 * 60 * x,
90
+ x => 1024 * 1024 * 1024 * x, x => x.replace(/[\.\s]*$/, ''),
91
+ { embedding: true }, 'gemini-2.0-flash-exp', 'image',
92
+ ];
93
+
91
94
  const [tool, messages, text]
92
95
  = [type => ({ type }), messages => ({ messages }), text => ({ text })];
93
96
  const [CODE_INTERPRETER, RETRIEVAL, FUNCTION]
@@ -98,7 +101,6 @@ const [newSessionId, newAiId]
98
101
  = [sessionType, aiType].map(type => () => createUoid({ type }));
99
102
  const chatConfig = { sessions: new Map(), systemPrompt: INSTRUCTIONS };
100
103
  const tokenSafe = count => Math.ceil(count * tokenSafeRatio);
101
- const trimTailing = text => text.replace(/[\.\s]*$/, '');
102
104
  const renderText = (t, o) => _renderText(t, { extraCodeBlock: 0, ...o || {} });
103
105
  const log = (cnt, opt) => _log(cnt, import.meta.url, { time: 1, ...opt || {} });
104
106
  const assertContent = content => assert(content.length, CONTENT_IS_REQUIRED);
@@ -107,267 +109,129 @@ const assertApiKey = (p, o) => assert(o?.apiKey, `${p} api key is required.`);
107
109
  const libOpenAi = async opts => await need('openai', { ...opts, raw: true });
108
110
  const OpenAI = async opts => new (await libOpenAi(opts)).OpenAI(opts);
109
111
  const AzureOpenAI = async opts => new (await libOpenAi(opts)).AzureOpenAI(opts);
110
-
111
- const DEFAULT_MODELS = {
112
- [OPENAI]: GPT_4O,
113
- [AZURE_OPENAI]: GPT_4O,
114
- [GEMINI]: GEMINI_20_FLASH,
115
- [ANTHROPIC]: CLOUD_37_SONNET,
116
- [VERTEX_ANTHROPIC]: CLOUD_37_SONNET,
117
- [OLLAMA]: GEMMA327B,
118
- [OPENAI_VOICE]: NOVA,
119
- [OPENAI_EMBEDDING]: TEXT_EMBEDDING_3_SMALL,
120
- [GEMINI_EMEDDING]: GEMINI_EMBEDDING_M,
121
- [OPENAI_TRAINING]: GPT_4O_MINI, // https://platform.openai.com/docs/guides/fine-tuning
112
+ const OPENAI_S1 = { contextWindow: kT(128), maxOutputTokens: k(16) };
113
+ const OPENAI_S2 = { contextWindow: kT(200), maxOutputTokens: kT(100) };
114
+ const OPENAI_EBD = { ...EBD, maxInputTokens: k(8) - 1 };
115
+ const OPENAI_AUDIO_TYPES = { supportedAudioTypes: [wav] };
116
+ const GPT_4O_AUDIO = { ...OPENAI_AUDIO_TYPES, audio: 'gpt-4o-audio-preview' };
117
+
118
+ const OPENAI_RULES = {
119
+ imageCostTokens: ~~(OPENAI_HI_RES_SIZE / (512 * 512) * 170 + 85),
120
+ maxFileSize: m(20), maxImageSize: OPENAI_HI_RES_SIZE,
121
+ supportedMimeTypes: [png, jpeg, gif, webp],
122
+ json: true, tools: true, vision: true,
122
123
  };
123
124
 
124
- DEFAULT_MODELS[CHAT] = DEFAULT_MODELS[GEMINI];
125
-
126
- const tokenRatioByWords = Math.min(
127
- 100 / 75, // ChatGPT: https://platform.openai.com/tokenizer
128
- Math.min(100 / 60, 100 / 80), // Gemini: https://ai.google.dev/gemini-api/docs/tokens?lang=node
129
- );
130
-
131
- const tokenRatioByCharacters = Math.max(
132
- 3.5, // Claude: https://docs.anthropic.com/en/docs/resources/glossary
133
- 4, // Gemini: https://ai.google.dev/gemini-api/docs/tokens?lang=node
134
- );
125
+ const GEMINI_RULES = {
126
+ audioCostTokens: 1000 * 1000 * 1, // 8.4 hours => 1 million tokens
127
+ imageCostTokens: ~~(v8k / (768 * 768) * 258), maxAudioLength: hour(8.4),
128
+ maxAudioPerPrompt: 1, maxFileSize: m(20), maxImagePerPrompt: 3000,
129
+ maxImageSize: Infinity, maxUrlSize: gb(2), maxVideoLength: minute(45),
130
+ maxVideoPerPrompt: 10, vision: true, supportedMimeTypes: [
131
+ png, jpeg, mov, mpeg, mp4, mpg, avi, wmv, mpegps, flv, pdf, aac,
132
+ flac, mp3, m4a, mpga, opus, pcm, wav, webm, tgpp,
133
+ ],
134
+ };
135
135
 
136
- // https://platform.openai.com/docs/models/continuous-model-upgrades
137
- // https://platform.openai.com/settings/organization/limits // Tier 3
138
- // https://cloud.google.com/vertex-ai/docs/generative-ai/model-reference/gemini
136
+ // https://platform.openai.com/docs/models
139
137
  // https://cloud.google.com/vertex-ai/docs/generative-ai/learn/models
140
138
  const MODELS = {
139
+ [GPT_4O]: { ...OPENAI_S1, ...OPENAI_RULES, ...GPT_4O_AUDIO },
140
+ [GPT_45]: { ...OPENAI_S1, ...OPENAI_RULES, ...GPT_4O_AUDIO },
141
141
  [GPT_4O_MINI]: {
142
- contextWindow: 128000,
143
- imageCostTokens: 1105,
144
- maxOutputTokens: 16384,
145
- requestLimitsRPM: 10000,
146
- tokenLimitsTPD: 1000000000,
147
- tokenLimitsTPM: 10000000,
148
- audio: 'gpt-4o-mini-audio-preview',
149
- fast: true,
150
- json: true,
151
- tools: true,
152
- vision: true,
153
- supportedMimeTypes: [png, jpeg, gif, webp],
154
- supportedAudioTypes: [wav],
155
- trainingData: 'Oct 2023',
156
- },
157
- [GPT_4O]: {
158
- contextWindow: 128000,
159
- imageCostTokens: 1105,
160
- maxOutputTokens: 16384,
161
- requestLimitsRPM: 10000,
162
- tokenLimitsTPD: 20000000,
163
- tokenLimitsTPM: 2000000,
164
- audio: 'gpt-4o-audio-preview',
165
- json: true,
166
- tools: true,
167
- vision: true,
168
- supportedMimeTypes: [png, jpeg, gif, webp],
169
- supportedAudioTypes: [wav],
170
- trainingData: 'Oct 2023',
142
+ ...OPENAI_S1, ...OPENAI_RULES, ...OPENAI_AUDIO_TYPES,
143
+ audio: 'gpt-4o-mini-audio-preview', fast: true,
171
144
  },
172
145
  [GPT_O1]: {
173
- contextWindow: 200000,
174
- imageCostTokens: 1105,
175
- maxOutputTokens: 100000,
176
- requestLimitsRPM: 10000,
177
- tokenLimitsTPD: 200000000,
178
- tokenLimitsTPM: 2000000,
179
- json: true,
146
+ ...OPENAI_S2, ...OPENAI_RULES, ...GPT_4O_AUDIO,
180
147
  reasoning: true,
181
- tools: true,
182
- vision: true,
183
- supportedMimeTypes: [
184
- png, jpeg, gif, webp,
185
- ],
186
- trainingData: 'Oct 2023',
187
148
  },
188
149
  [GPT_O3_MINI]: {
189
- contextWindow: 200000,
190
- imageCostTokens: 1105,
191
- maxOutputTokens: 100000,
192
- requestLimitsRPM: 10000,
193
- tokenLimitsTPD: 1000000000,
194
- tokenLimitsTPM: 10000000,
195
- fast: true,
196
- json: true,
197
- reasoning: true,
198
- tools: true,
199
- vision: true,
200
- supportedMimeTypes: [png, jpeg, gif, webp],
201
- trainingData: 'Oct 2023',
202
- },
203
- [GPT_45]: {
204
- contextWindow: 128000,
205
- imageCostTokens: 1105,
206
- maxOutputTokens: 16384,
207
- requestLimitsRPM: 10000,
208
- tokenLimitsTPD: 100000000,
209
- tokenLimitsTPM: 1000000,
210
- json: true,
211
- tools: true,
212
- vision: true,
213
- supportedMimeTypes: [png, jpeg, gif, webp],
214
- trainingData: 'Oct 2023',
150
+ ...OPENAI_S2, ...OPENAI_RULES, ...GPT_4O_AUDIO,
151
+ fast: true, reasoning: true,
215
152
  },
216
153
  [GEMINI_20_FLASH]: {
217
- // https://ai.google.dev/gemini-api/docs/models/gemini
218
- // https://cloud.google.com/vertex-ai/generative-ai/docs/multimodal/send-multimodal-prompts?hl=en#gemini-send-multimodal-samples-pdf-nodejs
219
- // Audio / Video Comming Soon: https://ai.google.dev/gemini-api/docs/models/gemini#gemini-2.0-flash
220
- audioCostTokens: 1000000, // 8.4 hours => 1 million tokens
221
- contextWindow: 1048576,
222
- imageCostTokens: size8k / (768 * 768) * 258,
223
- maxAudioLength: 60 * 60 * 8.4, // 9.5 hours
224
- maxAudioPerPrompt: 1,
225
- maxFileSize: 20 * 1024 * 1024, // 20 MB
226
- maxImagePerPrompt: 3000,
227
- maxImageSize: Infinity,
228
- maxOutputTokens: 1024 * 8,
229
- maxUrlSize: 1024 * 1024 * 1024 * 2, // 2 GB
230
- maxVideoLength: 60 * 50, // 50 minutes
231
- maxVideoLengthWithAudio: 60 * 50, // 50 minutes
232
- maxVideoLengthWithoutAudio: 60 * 60, // 1 hour
233
- maxVideoPerPrompt: 10,
234
- requestLimitsRPD: 1500,
235
- requestLimitsRPM: 2000,
236
- tokenLimitsTPM: 4 * 1000000,
237
- fast: true,
238
- json: true,
239
- tools: true,
240
- vision: true,
241
- supportedMimeTypes: [
242
- png, jpeg, mov, mpeg, mp4, mpg, avi, wmv, mpegps, flv, pdf, aac,
243
- flac, mp3, m4a, mpga, opus, pcm, wav, webm, tgpp,
244
- ],
245
- trainingData: 'August 2024',
154
+ ...GEMINI_RULES, contextWindow: m(1), maxOutputTokens: k(8),
155
+ fast: true, json: true, tools: true,
246
156
  },
247
157
  [GEMINI_20_FLASH_THINKING]: {
248
- // https://cloud.google.com/vertex-ai/generative-ai/docs/thinking-mode?hl=en
249
- contextWindow: 1024 * (8 + 32),
250
- imageCostTokens: size8k / (768 * 768) * 258,
251
- maxFileSize: 20 * 1024 * 1024, // 20 MB
252
- maxImagePerPrompt: 3000,
253
- maxImageSize: Infinity,
254
- maxOutputTokens: 1024 * 8,
255
- maxUrlSize: 1024 * 1024 * 1024 * 2, // 2 GB
256
- requestLimitsRPM: 1000,
257
- requestLimitsRPD: 1500,
258
- tokenLimitsTPM: 4 * 1000000,
158
+ ...GEMINI_RULES, contextWindow: m(1), maxOutputTokens: k(64),
259
159
  reasoning: true,
260
- vision: true,
261
- supportedMimeTypes: [png, jpeg],
262
- trainingData: 'August 2024',
263
160
  },
264
161
  [GEMINI_20_PRO]: {
265
- contextWindow: 2097152,
266
- imageCostTokens: size8k / (768 * 768) * 258,
267
- maxFileSize: 20 * 1024 * 1024, // 20 MB
268
- maxImagePerPrompt: 3000,
269
- maxImageSize: Infinity,
270
- maxOutputTokens: 1024 * 8,
271
- maxUrlSize: 1024 * 1024 * 1024 * 2, // 2 GB
272
- requestLimitsRPM: 1000,
273
- requestLimitsRPD: 1500,
274
- tokenLimitsTPM: 4 * 1000000,
162
+ ...GEMINI_RULES, contextWindow: m(2), maxOutputTokens: k(8),
275
163
  json: true,
276
- vision: true,
277
- supportedMimeTypes: [
278
- png, jpeg, mov, mpeg, mp4, mpg, avi, wmv, mpegps, flv, pdf, aac,
279
- flac, mp3, m4a, mpga, opus, pcm, wav, webm, tgpp,
280
- ],
281
- trainingData: 'August 2024',
282
164
  },
283
165
  [GEMMA_3_27B]: {
284
- contextWindow: 128 * 1000,
285
- imageCostTokens: 256,
286
- maxImageSize: 896 * 896,
287
- maxOutputTokens: 1024 * 8,
288
- fast: true,
289
- json: true,
290
- vision: true,
291
- supportedMimeTypes: [png, jpeg],
166
+ contextWindow: kT(128), maxOutputTokens: k(8),
167
+ imageCostTokens: 256, maxImageSize: 896 * 896,
168
+ supportedMimeTypes: [png, jpeg, gif],
169
+ fast: true, json: true, vision: true,
292
170
  },
293
171
  [DEEPSEEK_R1]: {
294
- contextWindow: 128 * 1000,
295
- maxOutputTokens: 32768,
296
- requestLimitsRPM: Infinity,
297
- tokenLimitsTPM: Infinity,
172
+ contextWindow: kT(128), maxOutputTokens: k(32),
298
173
  reasoning: true,
299
- },
300
- [TEXT_EMBEDDING_3_SMALL]: {
301
- contextWindow: 8191,
302
- embedding: true,
303
- outputDimension: 1536,
304
- requestLimitsRPM: 500,
305
- tokenLimitsTPM: 1000000,
306
- trainingData: 'Sep 2021',
307
- },
308
- [TEXT_EMBEDDING_3_LARGE]: {
309
- contextWindow: 8191,
310
- embedding: true,
311
- outputDimension: 3072, // ERROR: column cannot have more than 2000 dimensions for hnsw index
312
- requestLimitsRPM: 500,
313
- tokenLimitsTPM: 1000000,
314
- trainingData: 'Sep 2021',
315
- },
316
- [GEMINI_EMBEDDING_M]: {
317
- contextWindow: 1024 * 8,
318
- outputDimension: 1024 * 3, // ERROR: column cannot have more than 2000 dimensions for hnsw index
319
- embedding: true,
320
- },
321
- // https://console.cloud.google.com/vertex-ai/publishers/anthropic/model-garden/claude-3-7-sonnet?authuser=5&inv=1&invt=Abqftg&project=backend-alpha-97077
322
- [CLOUD_37_SONNET]: {
323
- contextWindow: 200 * 1000,
324
- maxOutputTokens: 64 * 1000, // Should be 128 * 1000, but Anthropic SDK limits it to 64 * 1000
325
- imageCostTokens: size8k / 750,
326
- documentCostTokens: 3000 * 100, // 100 pages: https://docs.anthropic.com/en/docs/build-with-claude/pdf-support
327
- maxImagePerPrompt: 5, // https://docs.anthropic.com/en/docs/build-with-claude/vision
328
- maxImageSize: 1092, // by pixels
329
- maxDocumentPages: 100,
330
- maxDocumentFile: 1024 * 1024 * 32, // 32MB
331
- requestLimitsRPM: 50,
332
- tokenLimitsITPM: 40000,
333
- tokenLimitsOTPM: 8000,
334
- json: true,
335
- reasoning: true,
336
- tools: true,
337
- vision: true,
174
+ }, // ERROR: column cannot have more than 2000 dimensions for hnsw index
175
+ [TEXT_EMBEDDING_3_LARGE]: { ...OPENAI_EBD, dimension: k(3) },
176
+ [TEXT_EMBEDDING_3_SMALL]: { ...OPENAI_EBD, dimension: k(1.5) },
177
+ [GEMINI_EMBEDDING_M]: { ...EBD, maxInputTokens: k(8), dimension: k(3) },
178
+ [CLOUD_37_SONNET]: { // 100 pages: https://docs.anthropic.com/en/docs/build-with-claude/pdf-support
179
+ contextWindow: kT(200), maxOutputTokens: kT(64),
180
+ documentCostTokens: 3000 * 100, maxDocumentFile: m(32),
181
+ maxDocumentPages: 100, imageCostTokens: ~~(v8k / 750),
182
+ maxImagePerPrompt: 100, maxImageSize: 2000 * 2000,
338
183
  supportedMimeTypes: [png, jpeg, gif, webp, pdf],
339
- trainingData: 'Apr 2024',
340
- },
341
- };
184
+ json: true, reasoning: true, tools: true, vision: true,
185
+ }, // https://docs.anthropic.com/en/docs/build-with-claude/vision
342
186
 
343
- MODELS[DEEPSEEK_R1_70B] = MODELS[DEEPSEEK_R1];
344
- MODELS[DEEPSEEK_R1_32B] = MODELS[DEEPSEEK_R1];
345
- MODELS[GEMMA327B] = MODELS[GEMMA_3_27B];
187
+ };
346
188
 
189
+ // Unifiy model configurations
190
+ let ATTACHMENT_TOKEN_COST = 0;
347
191
  for (const n in MODELS) {
348
192
  MODELS[n]['name'] = n;
349
- if (MODELS[n].embedding) {
350
- MODELS[n].maxInputTokens = MODELS[n].contextWindow;
351
- } else {
193
+ if (!MODELS[n].embedding) {
352
194
  MODELS[n].supportedMimeTypes = MODELS[n].supportedMimeTypes || [];
353
195
  MODELS[n].maxOutputTokens = MODELS[n].maxOutputTokens
354
196
  || Math.ceil(MODELS[n].contextWindow * 0.4);
355
197
  MODELS[n].maxInputTokens = MODELS[n].maxInputTokens
356
198
  || (MODELS[n].contextWindow - MODELS[n].maxOutputTokens);
357
- MODELS[n].tokenLimitsTPD = MODELS[n].tokenLimitsTPD
358
- || (MODELS[n].tokenLimitsTPM * minsOfDay);
359
- MODELS[n].requestLimitsRPD = MODELS[n].requestLimitsRPD
360
- || (MODELS[n].requestLimitsRPM * minsOfDay);
361
- MODELS[n].requestCapacityRPM = Math.ceil(Math.min(
362
- MODELS[n].tokenLimitsTPM / MODELS[n].maxInputTokens,
363
- MODELS[n].requestLimitsRPM, MODELS[n].requestLimitsRPD / minsOfDay
364
- ));
199
+ ATTACHMENT_TOKEN_COST = ATTACHMENT_TOKEN_COST ? Math.max(
200
+ ATTACHMENT_TOKEN_COST, MODELS[n].imageCostTokens || 0
201
+ ) : MODELS[n].imageCostTokens;
365
202
  }
366
203
  }
204
+ MODELS[GEMMA327B] = MODELS[GEMMA_3_27B]; // Ollama Alias
205
+ MODELS[GEMINI_20_FLASH].image = GEMINI_20_FLASH_EXP;
206
+ MODELS[GEMINI_20_FLASH_EXP] = {
207
+ ...MODELS[GEMINI_20_FLASH],
208
+ name: GEMINI_20_FLASH_EXP, image: true, tools: false,
209
+ };
210
+
211
+ // Default models for each provider
212
+ const DEFAULT_MODELS = {
213
+ [OPENAI]: GPT_4O,
214
+ [AZURE_OPENAI]: GPT_4O,
215
+ [GEMINI]: GEMINI_20_FLASH,
216
+ [ANTHROPIC]: CLOUD_37_SONNET,
217
+ [VERTEX_ANTHROPIC]: CLOUD_37_SONNET,
218
+ [OLLAMA]: GEMMA327B,
219
+ [OPENAI_VOICE]: NOVA,
220
+ [OPENAI_EMBEDDING]: TEXT_EMBEDDING_3_SMALL,
221
+ [GEMINI_EMEDDING]: GEMINI_EMBEDDING_M,
222
+ [OPENAI_TRAINING]: GPT_4O_MINI, // https://platform.openai.com/docs/guides/fine-tuning
223
+ };
224
+ DEFAULT_MODELS[CHAT] = DEFAULT_MODELS[GEMINI];
225
+
226
+ const tokenRatioByWords = Math.min(
227
+ 100 / 75, // ChatGPT: https://platform.openai.com/tokenizer
228
+ Math.min(100 / 60, 100 / 80), // Gemini: https://ai.google.dev/gemini-api/docs/tokens?lang=node
229
+ );
367
230
 
368
- const MAX_INPUT_TOKENS = MODELS[GPT_4O_MINI].maxInputTokens;
369
- const ATTACHMENT_TOKEN_COST = Math.max(MODELS[GPT_4O].imageCostTokens, 5000);
370
- const MAX_TRIM_TRY = MAX_INPUT_TOKENS / 1000;
231
+ const tokenRatioByCharacters = Math.max(
232
+ 3.5, // Claude: https://docs.anthropic.com/en/docs/resources/glossary
233
+ 4, // Gemini: https://ai.google.dev/gemini-api/docs/tokens?lang=node
234
+ );
371
235
 
372
236
 
373
237
  let tokeniser;
@@ -629,7 +493,7 @@ const buildGptMessage = (content, options) => {
629
493
  const attachments = (options?.attachments || []).map(x => {
630
494
  assert(MODELS[options?.model], 'Model is required.');
631
495
  if (MODELS[options.model]?.supportedMimeTypes?.includes?.(x.mime_type)) {
632
- return { type: 'image_url', image_url: { url: x.url } };
496
+ return { type: 'image_url', image_url: { url: x.url, detail: 'high' } };
633
497
  } else if (MODELS[options.model]?.supportedAudioTypes?.includes?.(x.mime_type)) {
634
498
  alterModel = selectGptAudioModel(options);
635
499
  return {
@@ -719,12 +583,15 @@ const getInfoEnd = text => Math.max(...[THINK_END, TOOLS_END].map(x => {
719
583
  // @todo: escape ``` in think and tools
720
584
  const packResp = async (resp, options) => {
721
585
  if (options?.raw) { return resp; }
722
- let [txt, audio, references, simpleText, referencesMarkdown, end, json] = [
723
- resp.text || '', // ChatGPT / Claude / Gemini / Ollama
724
- resp?.audio?.data, // ChatGPT audio mode
725
- resp?.references, // Gemini references
726
- '', '', '', null,
727
- ];
586
+ let [
587
+ txt, audio, references, simpleText, referencesMarkdown, end, json,
588
+ images
589
+ ] = [
590
+ resp.text || '', // ChatGPT / Claude / Gemini / Ollama
591
+ resp?.audio?.data, // ChatGPT audio mode
592
+ resp?.references, // Gemini references
593
+ '', '', '', null, resp?.images || [],
594
+ ];
728
595
  simpleText = txt;
729
596
  while ((end = getInfoEnd(simpleText))) {
730
597
  simpleText = simpleText.slice(end).trim();
@@ -741,9 +608,17 @@ const packResp = async (resp, options) => {
741
608
  ])) && (audio = await convert(audio, {
742
609
  input: BUFFER, expected: BUFFER, ...options || {},
743
610
  }));
611
+ if (images?.length) {
612
+ for (let i in images) {
613
+ images[i].data = await convert(images[i].data, {
614
+ input: BASE64, expected: BUFFER,
615
+ });
616
+ }
617
+ }
744
618
  options?.jsonMode && !options?.delta && (json = parseJson(simpleText, null));
745
619
  if (options?.simple && options?.audioMode) { return audio; }
746
620
  else if (options?.simple && options?.jsonMode) { return json; }
621
+ else if (options?.simple && options?.imageMode) { return images; }
747
622
  else if (options?.simple) { return simpleText; }
748
623
  else if (options?.jsonMode) { txt = `\`\`\`json\n${simpleText}\n\`\`\``; }
749
624
  // references debug codes:
@@ -796,6 +671,7 @@ const packResp = async (resp, options) => {
796
671
  ...references ? { references } : {},
797
672
  ...referencesMarkdown ? { referencesMarkdown } : {},
798
673
  ...audio ? { audio, audioMimeType: options?.audioMimeType } : {},
674
+ ...images?.length ? { images } : {},
799
675
  processing: !!options?.processing,
800
676
  model: options?.model,
801
677
  };
@@ -828,7 +704,10 @@ const buildPrompts = async (model, input, options = {}) => {
828
704
  prompt = buildClaudeMessage(content, { ...options, cache_control: true });
829
705
  break;
830
706
  case GEMINI:
831
- const _role = { role: options.model === GEMMA_3_27B ? user : system };
707
+ const _role = {
708
+ role: [GEMINI_20_FLASH_EXP, GEMMA_3_27B].includes(options.model)
709
+ ? user : system
710
+ };
832
711
  systemPrompt = buildGeminiHistory(options.systemPrompt, _role);
833
712
  prompt = options.toolsResult?.[options.toolsResult?.length - 1]?.parts
834
713
  || buildGeminiMessage(content, options)
@@ -884,9 +763,10 @@ const buildPrompts = async (model, input, options = {}) => {
884
763
  msgBuilder();
885
764
  } else {
886
765
  content = trimTailing(trimTailing(content).slice(0, -1)) + '...';
887
- }
888
- }, model.maxInputTokens - options.attachments?.length * ATTACHMENT_TOKEN_COST);
889
- if ([OPENAI].includes(options.flavor) || options.model === GEMMA_3_27B) {
766
+ } // @todo: audioCostTokens (needs to calculate the audio length):
767
+ }, model.maxInputTokens - options.attachments?.length * model.imageCostTokens);
768
+ if ([OPENAI].includes(options.flavor)
769
+ || [GEMINI_20_FLASH_EXP, GEMMA_3_27B].includes(options.model)) {
890
770
  systemPrompt = null;
891
771
  }
892
772
  return { systemPrompt, history, prompt };
@@ -1073,7 +953,7 @@ const promptAnthropic = async (aiId, content, options = {}) => {
1073
953
  = await buildPrompts(model, content, { ...options, flavor: ANTHROPIC });
1074
954
  const resp = await client.beta.messages.create({
1075
955
  model: options.model, ...history, system, stream: true,
1076
- max_tokens: options.extendedThinking ? 128000 : model.maxOutputTokens,
956
+ max_tokens: options.extendedThinking ? kT(128) : model.maxOutputTokens,
1077
957
  ...(options.reasoning ?? model.reasoning) ? {
1078
958
  thinking: options.thinking || {
1079
959
  type: 'enabled',
@@ -1163,8 +1043,10 @@ const deleteFile = async (aiId, file_id, options) => {
1163
1043
 
1164
1044
  const generationConfig = options => ({
1165
1045
  generationConfig: {
1046
+ responseMimeType: options.jsonMode ? mimeJson : mimeText,
1047
+ responseModalities: options.modalities
1048
+ || (options.imageMode ? [TEXT, IMAGE] : undefined),
1166
1049
  ...options?.generationConfig || {},
1167
- responseMimeType: options?.jsonMode ? mimeJson : mimeText,
1168
1050
  },
1169
1051
  });
1170
1052
 
@@ -1182,43 +1064,62 @@ const packGeminiReferences = (chunks, supports) => {
1182
1064
  };
1183
1065
 
1184
1066
  const promptGemini = async (aiId, content, options = {}) => {
1185
- const { client, model } = await getAi(aiId);
1186
- let [result, references, functionCalls, responded]
1187
- = [options.result ?? '', null, null, false];
1067
+ let { client, model } = await getAi(aiId);
1068
+ let [result, references, functionCalls, responded, images]
1069
+ = [options.result ?? '', null, null, false, []];
1188
1070
  options.model = options.model || model.name;
1071
+ model.image && (options.imageMode = true);
1072
+ assert(!(options.imageMode && !model.image), 'Image mode is not supported.');
1073
+ if (String.isString(model.image)) {
1074
+ options.model = model.image;
1075
+ options.imageMode = true;
1076
+ model = MODELS[options.model];
1077
+ }
1189
1078
  const { systemPrompt: systemInstruction, history, prompt }
1190
1079
  = await buildPrompts(model, content, { ...options, flavor: GEMINI });
1191
1080
  const _client = client.getGenerativeModel({
1192
1081
  model: options.model, systemInstruction,
1193
- ...model?.tools && !options.jsonMode ? (options.tools ?? {
1194
- tools: [
1195
- // @todo: Gemini will failed when using these tools together.
1196
- // https://ai.google.dev/gemini-api/docs/function-calling
1197
- // { codeExecution: {} },
1198
- // { googleSearch: {} },
1199
- { functionDeclarations: (await toolsGemini()).map(x => x.def) },
1200
- ],
1201
- toolConfig: { functionCallingConfig: { mode: 'AUTO' } },
1202
- }) : {},
1082
+ ...model?.tools && !options.jsonMode
1083
+ && options.model !== GEMINI_20_FLASH_EXP ? (options.tools ?? {
1084
+ tools: [
1085
+ // @todo: Gemini will failed when using these tools together.
1086
+ // https://ai.google.dev/gemini-api/docs/function-calling
1087
+ // { codeExecution: {} },
1088
+ // { googleSearch: {} },
1089
+ {
1090
+ functionDeclarations: (
1091
+ await toolsGemini()
1092
+ ).map(x => x.def)
1093
+ },
1094
+ ],
1095
+ toolConfig: { functionCallingConfig: { mode: 'AUTO' } },
1096
+ }) : {},
1203
1097
  });
1204
1098
  // https://github.com/google/generative-ai-js/blob/main/samples/node/advanced-chat.js
1205
1099
  // Google's bug: history is not allowed while using inline_data?
1206
1100
  const chat = _client.startChat({ history, ...generationConfig(options) });
1207
1101
  const resp = await chat.sendMessageStream(prompt);
1208
1102
  for await (const chunk of resp.stream) {
1209
- print(chunk);
1103
+ const deltaImages = [];
1104
+ chunk.candidates[0].content?.parts?.filter(
1105
+ x => x?.inlineData?.mimeType === png
1106
+ )?.map?.(x => {
1107
+ deltaImages.push(x.inlineData);
1108
+ images.push(x.inlineData);
1109
+ });
1210
1110
  functionCalls || (functionCalls = chunk.functionCalls);
1211
1111
  const rfc = packGeminiReferences(
1212
1112
  chunk.candidates[0]?.groundingMetadata?.groundingChunks,
1213
1113
  chunk.candidates[0]?.groundingMetadata?.groundingSupports
1214
1114
  );
1215
1115
  rfc && (references = rfc);
1216
- let delta = chunk?.text?.() || '';
1217
- options.result && delta
1218
- && (responded = responded || (delta = `\n\n${delta}`));
1219
- result += delta;
1220
- delta && await streamResp({
1221
- text: options.delta ? delta : result,
1116
+ let deltaText = chunk?.text?.() || '';
1117
+ options.result && deltaText
1118
+ && (responded = responded || (deltaText = `\n\n${deltaText}`));
1119
+ result += deltaText;
1120
+ (deltaText || deltaImages.length) && await streamResp({
1121
+ text: options.delta ? deltaText : result,
1122
+ images: options.delta ? deltaImages : images,
1222
1123
  }, options);
1223
1124
  }
1224
1125
  const _resp = await resp.response;
@@ -1237,7 +1138,7 @@ const promptGemini = async (aiId, content, options = {}) => {
1237
1138
  });
1238
1139
  }
1239
1140
  return await packResp({
1240
- text: mergeMsgs(toolsResponse, toolsResult), references,
1141
+ text: mergeMsgs(toolsResponse, toolsResult), images, references,
1241
1142
  }, options);
1242
1143
  };
1243
1144
 
@@ -1411,7 +1312,7 @@ const talk = async (input, options = {}) => {
1411
1312
  };
1412
1313
  };
1413
1314
 
1414
- const getMaxChatPromptLimit = async (options) => {
1315
+ const getChatPromptLimit = async (options) => {
1415
1316
  let resp = 0;
1416
1317
  (await getAi(null, { all: true })).map(x => {
1417
1318
  if (options?.aiId && options?.aiId !== x.id) { return; }
@@ -1419,7 +1320,17 @@ const getMaxChatPromptLimit = async (options) => {
1419
1320
  resp = resp ? Math.min(resp, maxInputTokens) : maxInputTokens;
1420
1321
  });
1421
1322
  assert(resp > 0, 'Chat engine has not been initialized.');
1422
- return options?.raw ? resp : Math.min(resp, MAX_INPUT_TOKENS);
1323
+ return resp;
1324
+ };
1325
+
1326
+ const getChatAttachmentCost = async (options) => {
1327
+ let resp = 0;
1328
+ (await getAi(null, { all: true })).map(x => {
1329
+ if (options?.aiId && options?.aiId !== x.id) { return; }
1330
+ resp = Math.max(resp, x.model.imageCostTokens || 0);
1331
+ });
1332
+ assert(resp > 0, 'Chat engine has not been initialized.');
1333
+ return resp;
1423
1334
  };
1424
1335
 
1425
1336
  const distillFile = async (attachments, o) => {
@@ -1465,7 +1376,7 @@ const prompt = async (input, options = {}) => {
1465
1376
  };
1466
1377
 
1467
1378
  const trimPrompt = async (getPrompt, trimFunc, contextWindow, options) => {
1468
- let [i, maxTry] = [0, ~~options?.maxTry || MAX_TRIM_TRY];
1379
+ let [i, maxTry] = [0, ~~options?.maxTry || kT(128)];
1469
1380
  while ((await countTokens(await getPrompt(), { fast: true }) > contextWindow)
1470
1381
  || (await countTokens(await getPrompt()) > contextWindow)) {
1471
1382
  await trimFunc();
@@ -1515,38 +1426,54 @@ const analyzeSessions = async (sessionIds, options) => {
1515
1426
 
1516
1427
  export default init;
1517
1428
  export {
1518
- ATTACHMENT_TOKEN_COST, CLOUD_37_SONNET, CODE_INTERPRETER, DEEPSEEK_R1,
1519
- DEEPSEEK_R1_32B, DEEPSEEK_R1_70B, DEFAULT_MODELS,
1520
- FUNCTION, GEMINI_20_FLASH, GEMINI_20_FLASH_THINKING,
1521
- GPT_45, GPT_4O, GPT_4O_MINI, GPT_O1, GPT_O3_MINI, INSTRUCTIONS, MODELS,
1522
- OPENAI_VOICE, RETRIEVAL,
1523
- TEXT_EMBEDDING_3_SMALL, _NEED, analyzeSessions,
1429
+ _NEED,
1430
+ CLOUD_37_SONNET,
1431
+ CODE_INTERPRETER,
1432
+ DEEPSEEK_R1,
1433
+ DEFAULT_MODELS,
1434
+ FUNCTION,
1435
+ GEMINI_20_FLASH_THINKING,
1436
+ GEMINI_20_FLASH,
1437
+ GPT_45,
1438
+ GPT_4O_MINI,
1439
+ GPT_4O,
1440
+ GPT_O1,
1441
+ GPT_O3_MINI,
1442
+ INSTRUCTIONS,
1443
+ MODELS,
1444
+ OPENAI_VOICE,
1445
+ RETRIEVAL,
1446
+ analyzeSessions,
1524
1447
  buildGptTrainingCase,
1525
1448
  buildGptTrainingCases,
1526
1449
  cancelGptFineTuningJob,
1527
1450
  countTokens,
1528
- createGeminiEmbedding, createGptFineTuningJob,
1451
+ createGeminiEmbedding,
1452
+ createGptFineTuningJob,
1529
1453
  createOpenAIEmbedding,
1530
1454
  deleteFile,
1531
1455
  distillFile,
1456
+ getAi,
1457
+ getChatAttachmentCost,
1458
+ getChatPromptLimit,
1532
1459
  getGptFineTuningJob,
1533
- getMaxChatPromptLimit,
1534
1460
  getSession,
1535
1461
  init,
1536
1462
  initChat,
1537
1463
  jpeg,
1538
- getAi,
1539
1464
  listFiles,
1540
1465
  listGptFineTuningEvents,
1541
1466
  listGptFineTuningJobs,
1542
1467
  listOpenAIModels,
1543
1468
  ogg,
1544
- prompt, promptOpenAI,
1469
+ prompt,
1545
1470
  promptAnthropic,
1546
1471
  promptGemini,
1472
+ promptOpenAI,
1547
1473
  resetSession,
1548
1474
  tailGptFineTuningEvents,
1549
1475
  talk,
1476
+ TEXT_EMBEDDING_3_SMALL,
1550
1477
  trimPrompt,
1551
1478
  uploadFile,
1552
1479
  uploadFileForFineTuning,