utilitas 1999.1.7 → 1999.1.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/lib/alan.mjs CHANGED
@@ -45,36 +45,6 @@ const _NEED = [
45
45
  'js-tiktoken', 'OpenAI',
46
46
  ];
47
47
 
48
- const [
49
- OPENAI, GEMINI, OPENAI_EMBEDDING, GEMINI_EMEDDING, OPENAI_TRAINING, OLLAMA,
50
- GPT_4O_MINI, GPT_4O, GPT_O1, GPT_O3_MINI, GEMINI_20_FLASH,
51
- GEMINI_20_FLASH_THINKING, GEMINI_20_PRO, NOVA, DEEPSEEK_R1, DEEPSEEK_R1_70B,
52
- DEEPSEEK_R1_32B, MD_CODE, TEXT_EMBEDDING_3_SMALL, TEXT_EMBEDDING_3_LARGE,
53
- CLOUD_37_SONNET, AUDIO, WAV, ATTACHMENTS, CHAT, OPENAI_VOICE, MEDIUM, LOW,
54
- HIGH, GPT_REASONING_EFFORT, THINK, THINK_STR, THINK_END, AZURE, TOOLS_STR,
55
- TOOLS_END, TOOLS, TEXT, THINKING, OK, FUNC, GPT_45, REDACTED_THINKING,
56
- GEMMA_3_27B, AZURE_OPENAI, ANTHROPIC, VERTEX_ANTHROPIC, GEMMA327B, size8k,
57
- ais, MAX_TOOL_RECURSION, LOG, name, user, system, assistant, MODEL,
58
- JSON_OBJECT, TOOL, silent, GEMINI_EMBEDDING_M, INVALID_FILE, tokenSafeRatio,
59
- GPT_QUERY_LIMIT, minsOfDay, CONTENT_IS_REQUIRED,
60
- ] = [
61
- 'OpenAI', 'Gemini', 'OPENAI_EMBEDDING', 'GEMINI_EMEDDING',
62
- 'OPENAI_TRAINING', 'Ollama', 'gpt-4o-mini', 'gpt-4o', 'o1', 'o3-mini',
63
- 'gemini-2.0-flash', 'gemini-2.0-flash-thinking-exp',
64
- 'gemini-2.0-pro-exp', 'nova', 'deepseek-r1', 'deepseek-r1:70b',
65
- 'deepseek-r1:32b', '```', 'text-embedding-3-small',
66
- 'text-embedding-3-large', 'claude-3-7-sonnet@20250219', 'audio', 'wav',
67
- '[ATTACHMENTS]', 'CHAT', 'OPENAI_VOICE', 'medium', 'low', 'high',
68
- 'medium', 'think', '<think>', '</think>', 'AZURE', '<tools>',
69
- '</tools>', 'tools', 'text', 'thinking', 'OK', 'function',
70
- 'gpt-4.5-preview', 'redacted_thinking', 'gemma-3-27b-it',
71
- 'Azure Openai', 'Anthropic', 'Vertex Anthropic', 'gemma3:27b',
72
- 7680 * 4320, [], 10, { log: true }, 'Alan', 'user', 'system',
73
- 'assistant', 'model', 'json_object', 'tool', true,
74
- 'gemini-embedding-exp-03-07', 'Invalid file data.', 1.1, 100, 60 * 24,
75
- 'Content is required.',
76
- ];
77
-
78
48
  const [
79
49
  png, jpeg, mov, mpeg, mp4, mpg, avi, wmv, mpegps, flv, gif, webp, pdf, aac,
80
50
  flac, mp3, m4a, mpga, opus, pcm, wav, webm, tgpp, mimeJson, mimeText, pcm16,
@@ -88,6 +58,39 @@ const [
88
58
  'text/plain', 'audio/x-wav', 'audio/ogg',
89
59
  ];
90
60
 
61
+ const [
62
+ OPENAI, GEMINI, OPENAI_EMBEDDING, GEMINI_EMEDDING, OPENAI_TRAINING, OLLAMA,
63
+ GPT_4O_MINI, GPT_4O, GPT_O1, GPT_O3_MINI, GEMINI_20_FLASH,
64
+ GEMINI_20_FLASH_THINKING, GEMINI_20_PRO, NOVA, DEEPSEEK_R1, MD_CODE,
65
+ TEXT_EMBEDDING_3_SMALL, TEXT_EMBEDDING_3_LARGE, CLOUD_37_SONNET, AUDIO, WAV,
66
+ ATTACHMENTS, CHAT, OPENAI_VOICE, MEDIUM, LOW, HIGH, GPT_REASONING_EFFORT,
67
+ THINK, THINK_STR, THINK_END, AZURE, TOOLS_STR, TOOLS_END, TOOLS, TEXT,
68
+ THINKING, OK, FUNC, GPT_45, REDACTED_THINKING, GEMMA_3_27B, AZURE_OPENAI,
69
+ ANTHROPIC, VERTEX_ANTHROPIC, GEMMA327B, v8k, ais, MAX_TOOL_RECURSION, LOG,
70
+ name, user, system, assistant, MODEL, JSON_OBJECT, TOOL, silent,
71
+ GEMINI_EMBEDDING_M, INVALID_FILE, tokenSafeRatio, GPT_QUERY_LIMIT,
72
+ CONTENT_IS_REQUIRED, OPENAI_HI_RES_SIZE, k, kT, m, minute, hour,
73
+ gb, trimTailing, EBD, GEMINI_20_FLASH_EXP, IMAGE
74
+ ] = [
75
+ 'OpenAI', 'Gemini', 'OPENAI_EMBEDDING', 'GEMINI_EMEDDING',
76
+ 'OPENAI_TRAINING', 'Ollama', 'gpt-4o-mini', 'gpt-4o', 'o1', 'o3-mini',
77
+ 'gemini-2.0-flash', 'gemini-2.0-flash-thinking-exp',
78
+ 'gemini-2.0-pro-exp', 'nova', 'deepseek-r1', '```',
79
+ 'text-embedding-3-small', 'text-embedding-3-large',
80
+ 'claude-3-7-sonnet@20250219', 'audio', 'wav', '[ATTACHMENTS]', 'CHAT',
81
+ 'OPENAI_VOICE', 'medium', 'low', 'high', 'medium', 'think', '<think>',
82
+ '</think>', 'AZURE', '<tools>', '</tools>', 'tools', 'text', 'thinking',
83
+ 'OK', 'function', 'gpt-4.5-preview', 'redacted_thinking',
84
+ 'gemma-3-27b-it', 'Azure Openai', 'Anthropic', 'Vertex Anthropic',
85
+ 'gemma3:27b', 7680 * 4320, [], 10, { log: true }, 'Alan', 'user',
86
+ 'system', 'assistant', 'model', 'json_object', 'tool', true,
87
+ 'gemini-embedding-exp-03-07', 'Invalid file data.', 1.1, 100,
88
+ 'Content is required.', 2000 * 768, x => 1024 * x, x => 1000 * x,
89
+ x => 1024 * 1024 * x, x => 60 * x, x => 60 * 60 * x,
90
+ x => 1024 * 1024 * 1024 * x, x => x.replace(/[\.\s]*$/, ''),
91
+ { embedding: true }, 'gemini-2.0-flash-exp', 'image',
92
+ ];
93
+
91
94
  const [tool, messages, text]
92
95
  = [type => ({ type }), messages => ({ messages }), text => ({ text })];
93
96
  const [CODE_INTERPRETER, RETRIEVAL, FUNCTION]
@@ -98,7 +101,6 @@ const [newSessionId, newAiId]
98
101
  = [sessionType, aiType].map(type => () => createUoid({ type }));
99
102
  const chatConfig = { sessions: new Map(), systemPrompt: INSTRUCTIONS };
100
103
  const tokenSafe = count => Math.ceil(count * tokenSafeRatio);
101
- const trimTailing = text => text.replace(/[\.\s]*$/, '');
102
104
  const renderText = (t, o) => _renderText(t, { extraCodeBlock: 0, ...o || {} });
103
105
  const log = (cnt, opt) => _log(cnt, import.meta.url, { time: 1, ...opt || {} });
104
106
  const assertContent = content => assert(content.length, CONTENT_IS_REQUIRED);
@@ -107,267 +109,128 @@ const assertApiKey = (p, o) => assert(o?.apiKey, `${p} api key is required.`);
107
109
  const libOpenAi = async opts => await need('openai', { ...opts, raw: true });
108
110
  const OpenAI = async opts => new (await libOpenAi(opts)).OpenAI(opts);
109
111
  const AzureOpenAI = async opts => new (await libOpenAi(opts)).AzureOpenAI(opts);
110
-
111
- const DEFAULT_MODELS = {
112
- [OPENAI]: GPT_4O,
113
- [AZURE_OPENAI]: GPT_4O,
114
- [GEMINI]: GEMINI_20_FLASH,
115
- [ANTHROPIC]: CLOUD_37_SONNET,
116
- [VERTEX_ANTHROPIC]: CLOUD_37_SONNET,
117
- [OLLAMA]: GEMMA327B,
118
- [OPENAI_VOICE]: NOVA,
119
- [OPENAI_EMBEDDING]: TEXT_EMBEDDING_3_SMALL,
120
- [GEMINI_EMEDDING]: GEMINI_EMBEDDING_M,
121
- [OPENAI_TRAINING]: GPT_4O_MINI, // https://platform.openai.com/docs/guides/fine-tuning
112
+ const OPENAI_S1 = { contextWindow: kT(128), maxOutputTokens: k(16) };
113
+ const OPENAI_S2 = { contextWindow: kT(200), maxOutputTokens: kT(100) };
114
+ const OPENAI_EBD = { ...EBD, maxInputTokens: k(8) - 1 };
115
+ const OPENAI_AUDIO_TYPES = { supportedAudioTypes: [wav] };
116
+ const GPT_4O_AUDIO = { ...OPENAI_AUDIO_TYPES, audio: 'gpt-4o-audio-preview' };
117
+
118
+ const OPENAI_RULES = {
119
+ imageCostTokens: ~~(OPENAI_HI_RES_SIZE / (512 * 512) * 170 + 85),
120
+ maxFileSize: m(20), maxImageSize: OPENAI_HI_RES_SIZE,
121
+ supportedMimeTypes: [png, jpeg, gif, webp],
122
+ json: true, tools: true, vision: true,
122
123
  };
123
124
 
124
- DEFAULT_MODELS[CHAT] = DEFAULT_MODELS[GEMINI];
125
-
126
- const tokenRatioByWords = Math.min(
127
- 100 / 75, // ChatGPT: https://platform.openai.com/tokenizer
128
- Math.min(100 / 60, 100 / 80), // Gemini: https://ai.google.dev/gemini-api/docs/tokens?lang=node
129
- );
130
-
131
- const tokenRatioByCharacters = Math.max(
132
- 3.5, // Claude: https://docs.anthropic.com/en/docs/resources/glossary
133
- 4, // Gemini: https://ai.google.dev/gemini-api/docs/tokens?lang=node
134
- );
125
+ const GEMINI_RULES = {
126
+ audioCostTokens: 1000 * 1000 * 1, // 8.4 hours => 1 million tokens
127
+ imageCostTokens: ~~(v8k / (768 * 768) * 258), maxAudioLength: hour(8.4),
128
+ maxAudioPerPrompt: 1, maxFileSize: m(20), maxImagePerPrompt: 3000,
129
+ maxImageSize: Infinity, maxUrlSize: gb(2), maxVideoLength: minute(45),
130
+ maxVideoPerPrompt: 10, vision: true, supportedMimeTypes: [
131
+ png, jpeg, mov, mpeg, mp4, mpg, avi, wmv, mpegps, flv, pdf, aac,
132
+ flac, mp3, m4a, mpga, opus, pcm, wav, webm, tgpp,
133
+ ],
134
+ };
135
135
 
136
- // https://platform.openai.com/docs/models/continuous-model-upgrades
137
- // https://platform.openai.com/settings/organization/limits // Tier 3
138
- // https://cloud.google.com/vertex-ai/docs/generative-ai/model-reference/gemini
136
+ // https://platform.openai.com/docs/models
139
137
  // https://cloud.google.com/vertex-ai/docs/generative-ai/learn/models
140
138
  const MODELS = {
139
+ [GPT_4O]: { ...OPENAI_S1, ...OPENAI_RULES, ...GPT_4O_AUDIO },
140
+ [GPT_45]: { ...OPENAI_S1, ...OPENAI_RULES, ...GPT_4O_AUDIO },
141
141
  [GPT_4O_MINI]: {
142
- contextWindow: 128000,
143
- imageCostTokens: 1105,
144
- maxOutputTokens: 16384,
145
- requestLimitsRPM: 10000,
146
- tokenLimitsTPD: 1000000000,
147
- tokenLimitsTPM: 10000000,
148
- audio: 'gpt-4o-mini-audio-preview',
149
- fast: true,
150
- json: true,
151
- tools: true,
152
- vision: true,
153
- supportedMimeTypes: [png, jpeg, gif, webp],
154
- supportedAudioTypes: [wav],
155
- trainingData: 'Oct 2023',
156
- },
157
- [GPT_4O]: {
158
- contextWindow: 128000,
159
- imageCostTokens: 1105,
160
- maxOutputTokens: 16384,
161
- requestLimitsRPM: 10000,
162
- tokenLimitsTPD: 20000000,
163
- tokenLimitsTPM: 2000000,
164
- audio: 'gpt-4o-audio-preview',
165
- json: true,
166
- tools: true,
167
- vision: true,
168
- supportedMimeTypes: [png, jpeg, gif, webp],
169
- supportedAudioTypes: [wav],
170
- trainingData: 'Oct 2023',
142
+ ...OPENAI_S1, ...OPENAI_RULES, ...OPENAI_AUDIO_TYPES,
143
+ audio: 'gpt-4o-mini-audio-preview', fast: true,
171
144
  },
172
145
  [GPT_O1]: {
173
- contextWindow: 200000,
174
- imageCostTokens: 1105,
175
- maxOutputTokens: 100000,
176
- requestLimitsRPM: 10000,
177
- tokenLimitsTPD: 200000000,
178
- tokenLimitsTPM: 2000000,
179
- json: true,
146
+ ...OPENAI_S2, ...OPENAI_RULES, ...GPT_4O_AUDIO,
180
147
  reasoning: true,
181
- tools: true,
182
- vision: true,
183
- supportedMimeTypes: [
184
- png, jpeg, gif, webp,
185
- ],
186
- trainingData: 'Oct 2023',
187
148
  },
188
149
  [GPT_O3_MINI]: {
189
- contextWindow: 200000,
190
- imageCostTokens: 1105,
191
- maxOutputTokens: 100000,
192
- requestLimitsRPM: 10000,
193
- tokenLimitsTPD: 1000000000,
194
- tokenLimitsTPM: 10000000,
195
- fast: true,
196
- json: true,
197
- reasoning: true,
198
- tools: true,
199
- vision: true,
200
- supportedMimeTypes: [png, jpeg, gif, webp],
201
- trainingData: 'Oct 2023',
202
- },
203
- [GPT_45]: {
204
- contextWindow: 128000,
205
- imageCostTokens: 1105,
206
- maxOutputTokens: 16384,
207
- requestLimitsRPM: 10000,
208
- tokenLimitsTPD: 100000000,
209
- tokenLimitsTPM: 1000000,
210
- json: true,
211
- tools: true,
212
- vision: true,
213
- supportedMimeTypes: [png, jpeg, gif, webp],
214
- trainingData: 'Oct 2023',
150
+ ...OPENAI_S2, ...OPENAI_RULES, ...GPT_4O_AUDIO,
151
+ fast: true, reasoning: true,
215
152
  },
216
153
  [GEMINI_20_FLASH]: {
217
- // https://ai.google.dev/gemini-api/docs/models/gemini
218
- // https://cloud.google.com/vertex-ai/generative-ai/docs/multimodal/send-multimodal-prompts?hl=en#gemini-send-multimodal-samples-pdf-nodejs
219
- // Audio / Video Comming Soon: https://ai.google.dev/gemini-api/docs/models/gemini#gemini-2.0-flash
220
- audioCostTokens: 1000000, // 8.4 hours => 1 million tokens
221
- contextWindow: 1048576,
222
- imageCostTokens: size8k / (768 * 768) * 258,
223
- maxAudioLength: 60 * 60 * 8.4, // 9.5 hours
224
- maxAudioPerPrompt: 1,
225
- maxFileSize: 20 * 1024 * 1024, // 20 MB
226
- maxImagePerPrompt: 3000,
227
- maxImageSize: Infinity,
228
- maxOutputTokens: 1024 * 8,
229
- maxUrlSize: 1024 * 1024 * 1024 * 2, // 2 GB
230
- maxVideoLength: 60 * 50, // 50 minutes
231
- maxVideoLengthWithAudio: 60 * 50, // 50 minutes
232
- maxVideoLengthWithoutAudio: 60 * 60, // 1 hour
233
- maxVideoPerPrompt: 10,
234
- requestLimitsRPD: 1500,
235
- requestLimitsRPM: 2000,
236
- tokenLimitsTPM: 4 * 1000000,
237
- fast: true,
238
- json: true,
239
- tools: true,
240
- vision: true,
241
- supportedMimeTypes: [
242
- png, jpeg, mov, mpeg, mp4, mpg, avi, wmv, mpegps, flv, pdf, aac,
243
- flac, mp3, m4a, mpga, opus, pcm, wav, webm, tgpp,
244
- ],
245
- trainingData: 'August 2024',
154
+ ...GEMINI_RULES, contextWindow: m(1), maxOutputTokens: k(8),
155
+ fast: true, json: true, tools: true,
246
156
  },
247
157
  [GEMINI_20_FLASH_THINKING]: {
248
- // https://cloud.google.com/vertex-ai/generative-ai/docs/thinking-mode?hl=en
249
- contextWindow: 1024 * (8 + 32),
250
- imageCostTokens: size8k / (768 * 768) * 258,
251
- maxFileSize: 20 * 1024 * 1024, // 20 MB
252
- maxImagePerPrompt: 3000,
253
- maxImageSize: Infinity,
254
- maxOutputTokens: 1024 * 8,
255
- maxUrlSize: 1024 * 1024 * 1024 * 2, // 2 GB
256
- requestLimitsRPM: 1000,
257
- requestLimitsRPD: 1500,
258
- tokenLimitsTPM: 4 * 1000000,
158
+ ...GEMINI_RULES, contextWindow: m(1), maxOutputTokens: k(64),
259
159
  reasoning: true,
260
- vision: true,
261
- supportedMimeTypes: [png, jpeg],
262
- trainingData: 'August 2024',
263
160
  },
264
161
  [GEMINI_20_PRO]: {
265
- contextWindow: 2097152,
266
- imageCostTokens: size8k / (768 * 768) * 258,
267
- maxFileSize: 20 * 1024 * 1024, // 20 MB
268
- maxImagePerPrompt: 3000,
269
- maxImageSize: Infinity,
270
- maxOutputTokens: 1024 * 8,
271
- maxUrlSize: 1024 * 1024 * 1024 * 2, // 2 GB
272
- requestLimitsRPM: 1000,
273
- requestLimitsRPD: 1500,
274
- tokenLimitsTPM: 4 * 1000000,
162
+ ...GEMINI_RULES, contextWindow: m(2), maxOutputTokens: k(8),
275
163
  json: true,
276
- vision: true,
277
- supportedMimeTypes: [
278
- png, jpeg, mov, mpeg, mp4, mpg, avi, wmv, mpegps, flv, pdf, aac,
279
- flac, mp3, m4a, mpga, opus, pcm, wav, webm, tgpp,
280
- ],
281
- trainingData: 'August 2024',
282
164
  },
283
165
  [GEMMA_3_27B]: {
284
- contextWindow: 128 * 1000,
285
- imageCostTokens: 256,
286
- maxImageSize: 896 * 896,
287
- maxOutputTokens: 1024 * 8,
288
- fast: true,
289
- json: true,
290
- vision: true,
291
- supportedMimeTypes: [png, jpeg],
166
+ contextWindow: kT(128), maxOutputTokens: k(8),
167
+ imageCostTokens: 256, maxImageSize: 896 * 896,
168
+ supportedMimeTypes: [png, jpeg, gif],
169
+ fast: true, json: true, vision: true,
292
170
  },
293
171
  [DEEPSEEK_R1]: {
294
- contextWindow: 128 * 1000,
295
- maxOutputTokens: 32768,
296
- requestLimitsRPM: Infinity,
297
- tokenLimitsTPM: Infinity,
172
+ contextWindow: kT(128), maxOutputTokens: k(32),
298
173
  reasoning: true,
299
- },
300
- [TEXT_EMBEDDING_3_SMALL]: {
301
- contextWindow: 8191,
302
- embedding: true,
303
- outputDimension: 1536,
304
- requestLimitsRPM: 500,
305
- tokenLimitsTPM: 1000000,
306
- trainingData: 'Sep 2021',
307
- },
308
- [TEXT_EMBEDDING_3_LARGE]: {
309
- contextWindow: 8191,
310
- embedding: true,
311
- outputDimension: 3072, // ERROR: column cannot have more than 2000 dimensions for hnsw index
312
- requestLimitsRPM: 500,
313
- tokenLimitsTPM: 1000000,
314
- trainingData: 'Sep 2021',
315
- },
316
- [GEMINI_EMBEDDING_M]: {
317
- contextWindow: 1024 * 8,
318
- outputDimension: 1024 * 3, // ERROR: column cannot have more than 2000 dimensions for hnsw index
319
- embedding: true,
320
- },
321
- // https://console.cloud.google.com/vertex-ai/publishers/anthropic/model-garden/claude-3-7-sonnet?authuser=5&inv=1&invt=Abqftg&project=backend-alpha-97077
322
- [CLOUD_37_SONNET]: {
323
- contextWindow: 200 * 1000,
324
- maxOutputTokens: 64 * 1000, // Should be 128 * 1000, but Anthropic SDK limits it to 64 * 1000
325
- imageCostTokens: size8k / 750,
326
- documentCostTokens: 3000 * 100, // 100 pages: https://docs.anthropic.com/en/docs/build-with-claude/pdf-support
327
- maxImagePerPrompt: 5, // https://docs.anthropic.com/en/docs/build-with-claude/vision
328
- maxImageSize: 1092, // by pixels
329
- maxDocumentPages: 100,
330
- maxDocumentFile: 1024 * 1024 * 32, // 32MB
331
- requestLimitsRPM: 50,
332
- tokenLimitsITPM: 40000,
333
- tokenLimitsOTPM: 8000,
334
- json: true,
335
- reasoning: true,
336
- tools: true,
337
- vision: true,
174
+ }, // ERROR: column cannot have more than 2000 dimensions for hnsw index
175
+ [TEXT_EMBEDDING_3_LARGE]: { ...OPENAI_EBD, dimension: k(3) },
176
+ [TEXT_EMBEDDING_3_SMALL]: { ...OPENAI_EBD, dimension: k(1.5) },
177
+ [GEMINI_EMBEDDING_M]: { ...EBD, maxInputTokens: k(8), dimension: k(3) },
178
+ [CLOUD_37_SONNET]: { // 100 pages: https://docs.anthropic.com/en/docs/build-with-claude/pdf-support
179
+ contextWindow: kT(200), maxOutputTokens: kT(64),
180
+ documentCostTokens: 3000 * 100, maxDocumentFile: m(32),
181
+ maxDocumentPages: 100, imageCostTokens: ~~(v8k / 750),
182
+ maxImagePerPrompt: 100, maxImageSize: 2000 * 2000,
338
183
  supportedMimeTypes: [png, jpeg, gif, webp, pdf],
339
- trainingData: 'Apr 2024',
340
- },
341
- };
184
+ json: true, reasoning: true, tools: true, vision: true,
185
+ }, // https://docs.anthropic.com/en/docs/build-with-claude/vision
342
186
 
343
- MODELS[DEEPSEEK_R1_70B] = MODELS[DEEPSEEK_R1];
344
- MODELS[DEEPSEEK_R1_32B] = MODELS[DEEPSEEK_R1];
345
- MODELS[GEMMA327B] = MODELS[GEMMA_3_27B];
187
+ };
346
188
 
189
+ // Unifiy model configurations
190
+ let ATTACHMENT_TOKEN_COST = 0;
347
191
  for (const n in MODELS) {
348
192
  MODELS[n]['name'] = n;
349
- if (MODELS[n].embedding) {
350
- MODELS[n].maxInputTokens = MODELS[n].contextWindow;
351
- } else {
193
+ if (!MODELS[n].embedding) {
352
194
  MODELS[n].supportedMimeTypes = MODELS[n].supportedMimeTypes || [];
353
195
  MODELS[n].maxOutputTokens = MODELS[n].maxOutputTokens
354
196
  || Math.ceil(MODELS[n].contextWindow * 0.4);
355
197
  MODELS[n].maxInputTokens = MODELS[n].maxInputTokens
356
198
  || (MODELS[n].contextWindow - MODELS[n].maxOutputTokens);
357
- MODELS[n].tokenLimitsTPD = MODELS[n].tokenLimitsTPD
358
- || (MODELS[n].tokenLimitsTPM * minsOfDay);
359
- MODELS[n].requestLimitsRPD = MODELS[n].requestLimitsRPD
360
- || (MODELS[n].requestLimitsRPM * minsOfDay);
361
- MODELS[n].requestCapacityRPM = Math.ceil(Math.min(
362
- MODELS[n].tokenLimitsTPM / MODELS[n].maxInputTokens,
363
- MODELS[n].requestLimitsRPM, MODELS[n].requestLimitsRPD / minsOfDay
364
- ));
199
+ ATTACHMENT_TOKEN_COST = ATTACHMENT_TOKEN_COST ? Math.max(
200
+ ATTACHMENT_TOKEN_COST, MODELS[n].imageCostTokens || 0
201
+ ) : MODELS[n].imageCostTokens;
365
202
  }
366
203
  }
204
+ MODELS[GEMMA327B] = MODELS[GEMMA_3_27B]; // Ollama Alias
205
+ MODELS[GEMINI_20_FLASH].image = GEMINI_20_FLASH_EXP;
206
+ MODELS[GEMINI_20_FLASH_EXP] = {
207
+ ...MODELS[GEMINI_20_FLASH], image: true, tools: false,
208
+ };
209
+
210
+ // Default models for each provider
211
+ const DEFAULT_MODELS = {
212
+ [OPENAI]: GPT_4O,
213
+ [AZURE_OPENAI]: GPT_4O,
214
+ [GEMINI]: GEMINI_20_FLASH,
215
+ [ANTHROPIC]: CLOUD_37_SONNET,
216
+ [VERTEX_ANTHROPIC]: CLOUD_37_SONNET,
217
+ [OLLAMA]: GEMMA327B,
218
+ [OPENAI_VOICE]: NOVA,
219
+ [OPENAI_EMBEDDING]: TEXT_EMBEDDING_3_SMALL,
220
+ [GEMINI_EMEDDING]: GEMINI_EMBEDDING_M,
221
+ [OPENAI_TRAINING]: GPT_4O_MINI, // https://platform.openai.com/docs/guides/fine-tuning
222
+ };
223
+ DEFAULT_MODELS[CHAT] = DEFAULT_MODELS[GEMINI];
224
+
225
+ const tokenRatioByWords = Math.min(
226
+ 100 / 75, // ChatGPT: https://platform.openai.com/tokenizer
227
+ Math.min(100 / 60, 100 / 80), // Gemini: https://ai.google.dev/gemini-api/docs/tokens?lang=node
228
+ );
367
229
 
368
- const MAX_INPUT_TOKENS = MODELS[GPT_4O_MINI].maxInputTokens;
369
- const ATTACHMENT_TOKEN_COST = Math.max(MODELS[GPT_4O].imageCostTokens, 5000);
370
- const MAX_TRIM_TRY = MAX_INPUT_TOKENS / 1000;
230
+ const tokenRatioByCharacters = Math.max(
231
+ 3.5, // Claude: https://docs.anthropic.com/en/docs/resources/glossary
232
+ 4, // Gemini: https://ai.google.dev/gemini-api/docs/tokens?lang=node
233
+ );
371
234
 
372
235
 
373
236
  let tokeniser;
@@ -629,7 +492,7 @@ const buildGptMessage = (content, options) => {
629
492
  const attachments = (options?.attachments || []).map(x => {
630
493
  assert(MODELS[options?.model], 'Model is required.');
631
494
  if (MODELS[options.model]?.supportedMimeTypes?.includes?.(x.mime_type)) {
632
- return { type: 'image_url', image_url: { url: x.url } };
495
+ return { type: 'image_url', image_url: { url: x.url, detail: 'high' } };
633
496
  } else if (MODELS[options.model]?.supportedAudioTypes?.includes?.(x.mime_type)) {
634
497
  alterModel = selectGptAudioModel(options);
635
498
  return {
@@ -719,12 +582,15 @@ const getInfoEnd = text => Math.max(...[THINK_END, TOOLS_END].map(x => {
719
582
  // @todo: escape ``` in think and tools
720
583
  const packResp = async (resp, options) => {
721
584
  if (options?.raw) { return resp; }
722
- let [txt, audio, references, simpleText, referencesMarkdown, end, json] = [
723
- resp.text || '', // ChatGPT / Claude / Gemini / Ollama
724
- resp?.audio?.data, // ChatGPT audio mode
725
- resp?.references, // Gemini references
726
- '', '', '', null,
727
- ];
585
+ let [
586
+ txt, audio, references, simpleText, referencesMarkdown, end, json,
587
+ images
588
+ ] = [
589
+ resp.text || '', // ChatGPT / Claude / Gemini / Ollama
590
+ resp?.audio?.data, // ChatGPT audio mode
591
+ resp?.references, // Gemini references
592
+ '', '', '', null, resp?.images || [],
593
+ ];
728
594
  simpleText = txt;
729
595
  while ((end = getInfoEnd(simpleText))) {
730
596
  simpleText = simpleText.slice(end).trim();
@@ -741,9 +607,17 @@ const packResp = async (resp, options) => {
741
607
  ])) && (audio = await convert(audio, {
742
608
  input: BUFFER, expected: BUFFER, ...options || {},
743
609
  }));
610
+ if (images?.length) {
611
+ for (let i in images) {
612
+ images[i].data = await convert(images[i].data, {
613
+ input: BASE64, expected: BUFFER,
614
+ });
615
+ }
616
+ }
744
617
  options?.jsonMode && !options?.delta && (json = parseJson(simpleText, null));
745
618
  if (options?.simple && options?.audioMode) { return audio; }
746
619
  else if (options?.simple && options?.jsonMode) { return json; }
620
+ else if (options?.simple && options?.imageMode) { return images; }
747
621
  else if (options?.simple) { return simpleText; }
748
622
  else if (options?.jsonMode) { txt = `\`\`\`json\n${simpleText}\n\`\`\``; }
749
623
  // references debug codes:
@@ -796,6 +670,7 @@ const packResp = async (resp, options) => {
796
670
  ...references ? { references } : {},
797
671
  ...referencesMarkdown ? { referencesMarkdown } : {},
798
672
  ...audio ? { audio, audioMimeType: options?.audioMimeType } : {},
673
+ ...images?.length ? { images } : {},
799
674
  processing: !!options?.processing,
800
675
  model: options?.model,
801
676
  };
@@ -828,7 +703,10 @@ const buildPrompts = async (model, input, options = {}) => {
828
703
  prompt = buildClaudeMessage(content, { ...options, cache_control: true });
829
704
  break;
830
705
  case GEMINI:
831
- const _role = { role: options.model === GEMMA_3_27B ? user : system };
706
+ const _role = {
707
+ role: [GEMINI_20_FLASH_EXP, GEMMA_3_27B].includes(options.model)
708
+ ? user : system
709
+ };
832
710
  systemPrompt = buildGeminiHistory(options.systemPrompt, _role);
833
711
  prompt = options.toolsResult?.[options.toolsResult?.length - 1]?.parts
834
712
  || buildGeminiMessage(content, options)
@@ -884,9 +762,10 @@ const buildPrompts = async (model, input, options = {}) => {
884
762
  msgBuilder();
885
763
  } else {
886
764
  content = trimTailing(trimTailing(content).slice(0, -1)) + '...';
887
- }
888
- }, model.maxInputTokens - options.attachments?.length * ATTACHMENT_TOKEN_COST);
889
- if ([OPENAI].includes(options.flavor) || options.model === GEMMA_3_27B) {
765
+ } // @todo: audioCostTokens (needs to calculate the audio length):
766
+ }, model.maxInputTokens - options.attachments?.length * model.imageCostTokens);
767
+ if ([OPENAI].includes(options.flavor)
768
+ || [GEMINI_20_FLASH_EXP, GEMMA_3_27B].includes(options.model)) {
890
769
  systemPrompt = null;
891
770
  }
892
771
  return { systemPrompt, history, prompt };
@@ -1073,7 +952,7 @@ const promptAnthropic = async (aiId, content, options = {}) => {
1073
952
  = await buildPrompts(model, content, { ...options, flavor: ANTHROPIC });
1074
953
  const resp = await client.beta.messages.create({
1075
954
  model: options.model, ...history, system, stream: true,
1076
- max_tokens: options.extendedThinking ? 128000 : model.maxOutputTokens,
955
+ max_tokens: options.extendedThinking ? kT(128) : model.maxOutputTokens,
1077
956
  ...(options.reasoning ?? model.reasoning) ? {
1078
957
  thinking: options.thinking || {
1079
958
  type: 'enabled',
@@ -1163,8 +1042,10 @@ const deleteFile = async (aiId, file_id, options) => {
1163
1042
 
1164
1043
  const generationConfig = options => ({
1165
1044
  generationConfig: {
1045
+ responseMimeType: options.jsonMode ? mimeJson : mimeText,
1046
+ responseModalities: options.modalities
1047
+ || (options.imageMode ? [TEXT, IMAGE] : undefined),
1166
1048
  ...options?.generationConfig || {},
1167
- responseMimeType: options?.jsonMode ? mimeJson : mimeText,
1168
1049
  },
1169
1050
  });
1170
1051
 
@@ -1182,43 +1063,61 @@ const packGeminiReferences = (chunks, supports) => {
1182
1063
  };
1183
1064
 
1184
1065
  const promptGemini = async (aiId, content, options = {}) => {
1185
- const { client, model } = await getAi(aiId);
1186
- let [result, references, functionCalls, responded]
1187
- = [options.result ?? '', null, null, false];
1066
+ let { client, model } = await getAi(aiId);
1067
+ let [result, references, functionCalls, responded, images]
1068
+ = [options.result ?? '', null, null, false, []];
1188
1069
  options.model = options.model || model.name;
1070
+ assert(!(options.imageMode && !model.image), 'Image mode is not supported.');
1071
+ if (String.isString(model.image)) {
1072
+ options.model = model.image;
1073
+ options.imageMode = true;
1074
+ model = MODELS[options.model];
1075
+ }
1189
1076
  const { systemPrompt: systemInstruction, history, prompt }
1190
1077
  = await buildPrompts(model, content, { ...options, flavor: GEMINI });
1191
1078
  const _client = client.getGenerativeModel({
1192
1079
  model: options.model, systemInstruction,
1193
- ...model?.tools && !options.jsonMode ? (options.tools ?? {
1194
- tools: [
1195
- // @todo: Gemini will failed when using these tools together.
1196
- // https://ai.google.dev/gemini-api/docs/function-calling
1197
- // { codeExecution: {} },
1198
- // { googleSearch: {} },
1199
- { functionDeclarations: (await toolsGemini()).map(x => x.def) },
1200
- ],
1201
- toolConfig: { functionCallingConfig: { mode: 'AUTO' } },
1202
- }) : {},
1080
+ ...model?.tools && !options.jsonMode
1081
+ && options.model !== GEMINI_20_FLASH_EXP ? (options.tools ?? {
1082
+ tools: [
1083
+ // @todo: Gemini will failed when using these tools together.
1084
+ // https://ai.google.dev/gemini-api/docs/function-calling
1085
+ // { codeExecution: {} },
1086
+ // { googleSearch: {} },
1087
+ {
1088
+ functionDeclarations: (
1089
+ await toolsGemini()
1090
+ ).map(x => x.def)
1091
+ },
1092
+ ],
1093
+ toolConfig: { functionCallingConfig: { mode: 'AUTO' } },
1094
+ }) : {},
1203
1095
  });
1204
1096
  // https://github.com/google/generative-ai-js/blob/main/samples/node/advanced-chat.js
1205
1097
  // Google's bug: history is not allowed while using inline_data?
1206
1098
  const chat = _client.startChat({ history, ...generationConfig(options) });
1207
1099
  const resp = await chat.sendMessageStream(prompt);
1208
1100
  for await (const chunk of resp.stream) {
1209
- print(chunk);
1101
+ const deltaImages = [];
1102
+ chunk.candidates[0].content?.parts?.filter(
1103
+ x => x?.inlineData?.mimeType === png
1104
+ )?.map?.(x => {
1105
+ deltaImages.push(x.inlineData);
1106
+ images.push(x.inlineData);
1107
+ });
1210
1108
  functionCalls || (functionCalls = chunk.functionCalls);
1211
1109
  const rfc = packGeminiReferences(
1212
1110
  chunk.candidates[0]?.groundingMetadata?.groundingChunks,
1213
1111
  chunk.candidates[0]?.groundingMetadata?.groundingSupports
1214
1112
  );
1215
1113
  rfc && (references = rfc);
1216
- let delta = chunk?.text?.() || '';
1217
- options.result && delta
1218
- && (responded = responded || (delta = `\n\n${delta}`));
1219
- result += delta;
1220
- delta && await streamResp({
1221
- text: options.delta ? delta : result,
1114
+ let deltaText = chunk?.text?.() || '';
1115
+ options.result && deltaText
1116
+ && (responded = responded || (deltaText = `\n\n${deltaText}`));
1117
+ result += deltaText;
1118
+ (deltaText || deltaImages.length) && await streamResp({
1119
+ text: options.delta ? deltaText : result,
1120
+ images: options.delta ? deltaImages : images,
1222
1121
  }, options);
1223
1122
  }
1224
1123
  const _resp = await resp.response;
@@ -1237,7 +1136,7 @@ const promptGemini = async (aiId, content, options = {}) => {
1237
1136
  });
1238
1137
  }
1239
1138
  return await packResp({
1240
- text: mergeMsgs(toolsResponse, toolsResult), references,
1139
+ text: mergeMsgs(toolsResponse, toolsResult), images, references,
1241
1140
  }, options);
1242
1141
  };
1243
1142
 
@@ -1411,7 +1310,7 @@ const talk = async (input, options = {}) => {
1411
1310
  };
1412
1311
  };
1413
1312
 
1414
- const getMaxChatPromptLimit = async (options) => {
1313
+ const getChatPromptLimit = async (options) => {
1415
1314
  let resp = 0;
1416
1315
  (await getAi(null, { all: true })).map(x => {
1417
1316
  if (options?.aiId && options?.aiId !== x.id) { return; }
@@ -1419,7 +1318,17 @@ const getMaxChatPromptLimit = async (options) => {
1419
1318
  resp = resp ? Math.min(resp, maxInputTokens) : maxInputTokens;
1420
1319
  });
1421
1320
  assert(resp > 0, 'Chat engine has not been initialized.');
1422
- return options?.raw ? resp : Math.min(resp, MAX_INPUT_TOKENS);
1321
+ return resp;
1322
+ };
1323
+
1324
+ const getChatAttachmentCost = async (options) => {
1325
+ let resp = 0;
1326
+ (await getAi(null, { all: true })).map(x => {
1327
+ if (options?.aiId && options?.aiId !== x.id) { return; }
1328
+ resp = Math.max(resp, x.model.imageCostTokens || 0);
1329
+ });
1330
+ assert(resp > 0, 'Chat engine has not been initialized.');
1331
+ return resp;
1423
1332
  };
1424
1333
 
1425
1334
  const distillFile = async (attachments, o) => {
@@ -1465,7 +1374,7 @@ const prompt = async (input, options = {}) => {
1465
1374
  };
1466
1375
 
1467
1376
  const trimPrompt = async (getPrompt, trimFunc, contextWindow, options) => {
1468
- let [i, maxTry] = [0, ~~options?.maxTry || MAX_TRIM_TRY];
1377
+ let [i, maxTry] = [0, ~~options?.maxTry || kT(128)];
1469
1378
  while ((await countTokens(await getPrompt(), { fast: true }) > contextWindow)
1470
1379
  || (await countTokens(await getPrompt()) > contextWindow)) {
1471
1380
  await trimFunc();
@@ -1515,38 +1424,54 @@ const analyzeSessions = async (sessionIds, options) => {
1515
1424
 
1516
1425
  export default init;
1517
1426
  export {
1518
- ATTACHMENT_TOKEN_COST, CLOUD_37_SONNET, CODE_INTERPRETER, DEEPSEEK_R1,
1519
- DEEPSEEK_R1_32B, DEEPSEEK_R1_70B, DEFAULT_MODELS,
1520
- FUNCTION, GEMINI_20_FLASH, GEMINI_20_FLASH_THINKING,
1521
- GPT_45, GPT_4O, GPT_4O_MINI, GPT_O1, GPT_O3_MINI, INSTRUCTIONS, MODELS,
1522
- OPENAI_VOICE, RETRIEVAL,
1523
- TEXT_EMBEDDING_3_SMALL, _NEED, analyzeSessions,
1427
+ _NEED,
1428
+ CLOUD_37_SONNET,
1429
+ CODE_INTERPRETER,
1430
+ DEEPSEEK_R1,
1431
+ DEFAULT_MODELS,
1432
+ FUNCTION,
1433
+ GEMINI_20_FLASH_THINKING,
1434
+ GEMINI_20_FLASH,
1435
+ GPT_45,
1436
+ GPT_4O_MINI,
1437
+ GPT_4O,
1438
+ GPT_O1,
1439
+ GPT_O3_MINI,
1440
+ INSTRUCTIONS,
1441
+ MODELS,
1442
+ OPENAI_VOICE,
1443
+ RETRIEVAL,
1444
+ analyzeSessions,
1524
1445
  buildGptTrainingCase,
1525
1446
  buildGptTrainingCases,
1526
1447
  cancelGptFineTuningJob,
1527
1448
  countTokens,
1528
- createGeminiEmbedding, createGptFineTuningJob,
1449
+ createGeminiEmbedding,
1450
+ createGptFineTuningJob,
1529
1451
  createOpenAIEmbedding,
1530
1452
  deleteFile,
1531
1453
  distillFile,
1454
+ getAi,
1455
+ getChatAttachmentCost,
1456
+ getChatPromptLimit,
1532
1457
  getGptFineTuningJob,
1533
- getMaxChatPromptLimit,
1534
1458
  getSession,
1535
1459
  init,
1536
1460
  initChat,
1537
1461
  jpeg,
1538
- getAi,
1539
1462
  listFiles,
1540
1463
  listGptFineTuningEvents,
1541
1464
  listGptFineTuningJobs,
1542
1465
  listOpenAIModels,
1543
1466
  ogg,
1544
- prompt, promptOpenAI,
1467
+ prompt,
1545
1468
  promptAnthropic,
1546
1469
  promptGemini,
1470
+ promptOpenAI,
1547
1471
  resetSession,
1548
1472
  tailGptFineTuningEvents,
1549
1473
  talk,
1474
+ TEXT_EMBEDDING_3_SMALL,
1550
1475
  trimPrompt,
1551
1476
  uploadFile,
1552
1477
  uploadFileForFineTuning,