utilitas 1999.1.6 → 1999.1.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/lib/alan.mjs CHANGED
@@ -45,37 +45,6 @@ const _NEED = [
45
45
  'js-tiktoken', 'OpenAI',
46
46
  ];
47
47
 
48
- const [
49
- OPENAI, GEMINI, OPENAI_EMBEDDING, GEMINI_EMEDDING, OPENAI_TRAINING,
50
- OLLAMA, GPT_4O_MINI, GPT_4O, GPT_O1, GPT_O3_MINI, GEMINI_20_FLASH,
51
- GEMINI_20_FLASH_THINKING, GEMINI_20_PRO, NOVA, EMBEDDING_001, DEEPSEEK_R1,
52
- DEEPSEEK_R1_70B, DEEPSEEK_R1_32B, MD_CODE, TEXT_EMBEDDING_3_SMALL,
53
- TEXT_EMBEDDING_3_LARGE, CLOUD_37_SONNET, AUDIO, WAV, ATTACHMENTS, CHAT,
54
- OPENAI_VOICE, MEDIUM, LOW, HIGH, GPT_REASONING_EFFORT, THINK, THINK_STR,
55
- THINK_END, AZURE, TOOLS_STR, TOOLS_END, TOOLS, TEXT, THINKING, OK, FUNC,
56
- GPT_45, REDACTED_THINKING, GEMMA_3_27B, AZURE_OPENAI, ANTHROPIC,
57
- VERTEX_ANTHROPIC, GEMMA327B, size8k, ais, MAX_TOOL_RECURSION, LOG, name,
58
- user, system, assistant, MODEL, JSON_OBJECT, TOOL, silent, NOT_INIT,
59
- INVALID_FILE, tokenSafeRatio, GPT_QUERY_LIMIT, minsOfDay,
60
- CONTENT_IS_REQUIRED,
61
- ] = [
62
- 'OpenAI', 'Gemini', 'OPENAI_EMBEDDING', 'GEMINI_EMEDDING',
63
- 'OPENAI_TRAINING', 'Ollama', 'gpt-4o-mini', 'gpt-4o', 'o1', 'o3-mini',
64
- 'gemini-2.0-flash', 'gemini-2.0-flash-thinking-exp',
65
- 'gemini-2.0-pro-exp', 'nova', 'embedding-001', 'deepseek-r1',
66
- 'deepseek-r1:70b', 'deepseek-r1:32b', '```', 'text-embedding-3-small',
67
- 'text-embedding-3-large', 'claude-3-7-sonnet@20250219', 'audio', 'wav',
68
- '[ATTACHMENTS]', 'CHAT', 'OPENAI_VOICE', 'medium', 'low', 'high',
69
- 'medium', 'think', '<think>', '</think>', 'AZURE', '<tools>',
70
- '</tools>', 'tools', 'text', 'thinking', 'OK', 'function',
71
- 'gpt-4.5-preview', 'redacted_thinking', 'gemma-3-27b-it',
72
- 'Azure Openai', 'Anthropic', 'Vertex Anthropic', 'gemma3:27b',
73
- 7680 * 4320, [], 10, { log: true }, 'Alan', 'user', 'system',
74
- 'assistant', 'model', 'json_object', 'tool', true,
75
- 'AI engine has not been initialized.', 'Invalid file data.', 1.1, 100,
76
- 60 * 24, 'Content is required.',
77
- ];
78
-
79
48
  const [
80
49
  png, jpeg, mov, mpeg, mp4, mpg, avi, wmv, mpegps, flv, gif, webp, pdf, aac,
81
50
  flac, mp3, m4a, mpga, opus, pcm, wav, webm, tgpp, mimeJson, mimeText, pcm16,
@@ -89,6 +58,39 @@ const [
89
58
  'text/plain', 'audio/x-wav', 'audio/ogg',
90
59
  ];
91
60
 
61
+ const [
62
+ OPENAI, GEMINI, OPENAI_EMBEDDING, GEMINI_EMEDDING, OPENAI_TRAINING, OLLAMA,
63
+ GPT_4O_MINI, GPT_4O, GPT_O1, GPT_O3_MINI, GEMINI_20_FLASH,
64
+ GEMINI_20_FLASH_THINKING, GEMINI_20_PRO, NOVA, DEEPSEEK_R1, MD_CODE,
65
+ TEXT_EMBEDDING_3_SMALL, TEXT_EMBEDDING_3_LARGE, CLOUD_37_SONNET, AUDIO, WAV,
66
+ ATTACHMENTS, CHAT, OPENAI_VOICE, MEDIUM, LOW, HIGH, GPT_REASONING_EFFORT,
67
+ THINK, THINK_STR, THINK_END, AZURE, TOOLS_STR, TOOLS_END, TOOLS, TEXT,
68
+ THINKING, OK, FUNC, GPT_45, REDACTED_THINKING, GEMMA_3_27B, AZURE_OPENAI,
69
+ ANTHROPIC, VERTEX_ANTHROPIC, GEMMA327B, v8k, ais, MAX_TOOL_RECURSION, LOG,
70
+ name, user, system, assistant, MODEL, JSON_OBJECT, TOOL, silent,
71
+ GEMINI_EMBEDDING_M, INVALID_FILE, tokenSafeRatio, GPT_QUERY_LIMIT,
72
+ CONTENT_IS_REQUIRED, OPENAI_HI_RES_SIZE, k, kT, m, minute, hour,
73
+ gb, trimTailing, EBD, GEMINI_20_FLASH_EXP, IMAGE
74
+ ] = [
75
+ 'OpenAI', 'Gemini', 'OPENAI_EMBEDDING', 'GEMINI_EMEDDING',
76
+ 'OPENAI_TRAINING', 'Ollama', 'gpt-4o-mini', 'gpt-4o', 'o1', 'o3-mini',
77
+ 'gemini-2.0-flash', 'gemini-2.0-flash-thinking-exp',
78
+ 'gemini-2.0-pro-exp', 'nova', 'deepseek-r1', '```',
79
+ 'text-embedding-3-small', 'text-embedding-3-large',
80
+ 'claude-3-7-sonnet@20250219', 'audio', 'wav', '[ATTACHMENTS]', 'CHAT',
81
+ 'OPENAI_VOICE', 'medium', 'low', 'high', 'medium', 'think', '<think>',
82
+ '</think>', 'AZURE', '<tools>', '</tools>', 'tools', 'text', 'thinking',
83
+ 'OK', 'function', 'gpt-4.5-preview', 'redacted_thinking',
84
+ 'gemma-3-27b-it', 'Azure Openai', 'Anthropic', 'Vertex Anthropic',
85
+ 'gemma3:27b', 7680 * 4320, [], 10, { log: true }, 'Alan', 'user',
86
+ 'system', 'assistant', 'model', 'json_object', 'tool', true,
87
+ 'gemini-embedding-exp-03-07', 'Invalid file data.', 1.1, 100,
88
+ 'Content is required.', 2000 * 768, x => 1024 * x, x => 1000 * x,
89
+ x => 1024 * 1024 * x, x => 60 * x, x => 60 * 60 * x,
90
+ x => 1024 * 1024 * 1024 * x, x => x.replace(/[\.\s]*$/, ''),
91
+ { embedding: true }, 'gemini-2.0-flash-exp', 'image',
92
+ ];
93
+
92
94
  const [tool, messages, text]
93
95
  = [type => ({ type }), messages => ({ messages }), text => ({ text })];
94
96
  const [CODE_INTERPRETER, RETRIEVAL, FUNCTION]
@@ -99,7 +101,6 @@ const [newSessionId, newAiId]
99
101
  = [sessionType, aiType].map(type => () => createUoid({ type }));
100
102
  const chatConfig = { sessions: new Map(), systemPrompt: INSTRUCTIONS };
101
103
  const tokenSafe = count => Math.ceil(count * tokenSafeRatio);
102
- const trimTailing = text => text.replace(/[\.\s]*$/, '');
103
104
  const renderText = (t, o) => _renderText(t, { extraCodeBlock: 0, ...o || {} });
104
105
  const log = (cnt, opt) => _log(cnt, import.meta.url, { time: 1, ...opt || {} });
105
106
  const assertContent = content => assert(content.length, CONTENT_IS_REQUIRED);
@@ -108,267 +109,128 @@ const assertApiKey = (p, o) => assert(o?.apiKey, `${p} api key is required.`);
108
109
  const libOpenAi = async opts => await need('openai', { ...opts, raw: true });
109
110
  const OpenAI = async opts => new (await libOpenAi(opts)).OpenAI(opts);
110
111
  const AzureOpenAI = async opts => new (await libOpenAi(opts)).AzureOpenAI(opts);
111
-
112
- const DEFAULT_MODELS = {
113
- [OPENAI]: GPT_4O,
114
- [AZURE_OPENAI]: GPT_4O,
115
- [GEMINI]: GEMINI_20_FLASH,
116
- [ANTHROPIC]: CLOUD_37_SONNET,
117
- [VERTEX_ANTHROPIC]: CLOUD_37_SONNET,
118
- [OLLAMA]: GEMMA327B,
119
- [OPENAI_VOICE]: NOVA,
120
- [OPENAI_EMBEDDING]: TEXT_EMBEDDING_3_SMALL,
121
- [GEMINI_EMEDDING]: EMBEDDING_001,
122
- [OPENAI_TRAINING]: GPT_4O_MINI, // https://platform.openai.com/docs/guides/fine-tuning
112
+ const OPENAI_S1 = { contextWindow: kT(128), maxOutputTokens: k(16) };
113
+ const OPENAI_S2 = { contextWindow: kT(200), maxOutputTokens: kT(100) };
114
+ const OPENAI_EBD = { ...EBD, maxInputTokens: k(8) - 1 };
115
+ const OPENAI_AUDIO_TYPES = { supportedAudioTypes: [wav] };
116
+ const GPT_4O_AUDIO = { ...OPENAI_AUDIO_TYPES, audio: 'gpt-4o-audio-preview' };
117
+
118
+ const OPENAI_RULES = {
119
+ imageCostTokens: ~~(OPENAI_HI_RES_SIZE / (512 * 512) * 170 + 85),
120
+ maxFileSize: m(20), maxImageSize: OPENAI_HI_RES_SIZE,
121
+ supportedMimeTypes: [png, jpeg, gif, webp],
122
+ json: true, tools: true, vision: true,
123
123
  };
124
124
 
125
- DEFAULT_MODELS[CHAT] = DEFAULT_MODELS[GEMINI];
126
-
127
- const tokenRatioByWords = Math.min(
128
- 100 / 75, // ChatGPT: https://platform.openai.com/tokenizer
129
- Math.min(100 / 60, 100 / 80), // Gemini: https://ai.google.dev/gemini-api/docs/tokens?lang=node
130
- );
131
-
132
- const tokenRatioByCharacters = Math.max(
133
- 3.5, // Claude: https://docs.anthropic.com/en/docs/resources/glossary
134
- 4, // Gemini: https://ai.google.dev/gemini-api/docs/tokens?lang=node
135
- );
125
+ const GEMINI_RULES = {
126
+ audioCostTokens: 1000 * 1000 * 1, // 8.4 hours => 1 million tokens
127
+ imageCostTokens: ~~(v8k / (768 * 768) * 258), maxAudioLength: hour(8.4),
128
+ maxAudioPerPrompt: 1, maxFileSize: m(20), maxImagePerPrompt: 3000,
129
+ maxImageSize: Infinity, maxUrlSize: gb(2), maxVideoLength: minute(45),
130
+ maxVideoPerPrompt: 10, vision: true, supportedMimeTypes: [
131
+ png, jpeg, mov, mpeg, mp4, mpg, avi, wmv, mpegps, flv, pdf, aac,
132
+ flac, mp3, m4a, mpga, opus, pcm, wav, webm, tgpp,
133
+ ],
134
+ };
136
135
 
137
- // https://platform.openai.com/docs/models/continuous-model-upgrades
138
- // https://platform.openai.com/settings/organization/limits // Tier 3
139
- // https://cloud.google.com/vertex-ai/docs/generative-ai/model-reference/gemini
136
+ // https://platform.openai.com/docs/models
140
137
  // https://cloud.google.com/vertex-ai/docs/generative-ai/learn/models
141
138
  const MODELS = {
139
+ [GPT_4O]: { ...OPENAI_S1, ...OPENAI_RULES, ...GPT_4O_AUDIO },
140
+ [GPT_45]: { ...OPENAI_S1, ...OPENAI_RULES, ...GPT_4O_AUDIO },
142
141
  [GPT_4O_MINI]: {
143
- contextWindow: 128000,
144
- imageCostTokens: 1105,
145
- maxOutputTokens: 16384,
146
- requestLimitsRPM: 10000,
147
- tokenLimitsTPD: 1000000000,
148
- tokenLimitsTPM: 10000000,
149
- audio: 'gpt-4o-mini-audio-preview',
150
- fast: true,
151
- json: true,
152
- tools: true,
153
- vision: true,
154
- supportedMimeTypes: [png, jpeg, gif, webp],
155
- supportedAudioTypes: [wav],
156
- trainingData: 'Oct 2023',
157
- },
158
- [GPT_4O]: {
159
- contextWindow: 128000,
160
- imageCostTokens: 1105,
161
- maxOutputTokens: 16384,
162
- requestLimitsRPM: 10000,
163
- tokenLimitsTPD: 20000000,
164
- tokenLimitsTPM: 2000000,
165
- audio: 'gpt-4o-audio-preview',
166
- json: true,
167
- tools: true,
168
- vision: true,
169
- supportedMimeTypes: [png, jpeg, gif, webp],
170
- supportedAudioTypes: [wav],
171
- trainingData: 'Oct 2023',
142
+ ...OPENAI_S1, ...OPENAI_RULES, ...OPENAI_AUDIO_TYPES,
143
+ audio: 'gpt-4o-mini-audio-preview', fast: true,
172
144
  },
173
145
  [GPT_O1]: {
174
- contextWindow: 200000,
175
- imageCostTokens: 1105,
176
- maxOutputTokens: 100000,
177
- requestLimitsRPM: 10000,
178
- tokenLimitsTPD: 200000000,
179
- tokenLimitsTPM: 2000000,
180
- json: true,
146
+ ...OPENAI_S2, ...OPENAI_RULES, ...GPT_4O_AUDIO,
181
147
  reasoning: true,
182
- tools: true,
183
- vision: true,
184
- supportedMimeTypes: [
185
- png, jpeg, gif, webp,
186
- ],
187
- trainingData: 'Oct 2023',
188
148
  },
189
149
  [GPT_O3_MINI]: {
190
- contextWindow: 200000,
191
- imageCostTokens: 1105,
192
- maxOutputTokens: 100000,
193
- requestLimitsRPM: 10000,
194
- tokenLimitsTPD: 1000000000,
195
- tokenLimitsTPM: 10000000,
196
- fast: true,
197
- json: true,
198
- reasoning: true,
199
- tools: true,
200
- vision: true,
201
- supportedMimeTypes: [png, jpeg, gif, webp],
202
- trainingData: 'Oct 2023',
203
- },
204
- [GPT_45]: {
205
- contextWindow: 128000,
206
- imageCostTokens: 1105,
207
- maxOutputTokens: 16384,
208
- requestLimitsRPM: 10000,
209
- tokenLimitsTPD: 100000000,
210
- tokenLimitsTPM: 1000000,
211
- json: true,
212
- tools: true,
213
- vision: true,
214
- supportedMimeTypes: [png, jpeg, gif, webp],
215
- trainingData: 'Oct 2023',
150
+ ...OPENAI_S2, ...OPENAI_RULES, ...GPT_4O_AUDIO,
151
+ fast: true, reasoning: true,
216
152
  },
217
153
  [GEMINI_20_FLASH]: {
218
- // https://ai.google.dev/gemini-api/docs/models/gemini
219
- // https://cloud.google.com/vertex-ai/generative-ai/docs/multimodal/send-multimodal-prompts?hl=en#gemini-send-multimodal-samples-pdf-nodejs
220
- // Audio / Video Comming Soon: https://ai.google.dev/gemini-api/docs/models/gemini#gemini-2.0-flash
221
- audioCostTokens: 1000000, // 8.4 hours => 1 million tokens
222
- contextWindow: 1048576,
223
- imageCostTokens: size8k / (768 * 768) * 258,
224
- maxAudioLength: 60 * 60 * 8.4, // 9.5 hours
225
- maxAudioPerPrompt: 1,
226
- maxFileSize: 20 * 1024 * 1024, // 20 MB
227
- maxImagePerPrompt: 3000,
228
- maxImageSize: Infinity,
229
- maxOutputTokens: 1024 * 8,
230
- maxUrlSize: 1024 * 1024 * 1024 * 2, // 2 GB
231
- maxVideoLength: 60 * 50, // 50 minutes
232
- maxVideoLengthWithAudio: 60 * 50, // 50 minutes
233
- maxVideoLengthWithoutAudio: 60 * 60, // 1 hour
234
- maxVideoPerPrompt: 10,
235
- requestLimitsRPD: 1500,
236
- requestLimitsRPM: 2000,
237
- tokenLimitsTPM: 4 * 1000000,
238
- fast: true,
239
- json: true,
240
- tools: true,
241
- vision: true,
242
- supportedMimeTypes: [
243
- png, jpeg, mov, mpeg, mp4, mpg, avi, wmv, mpegps, flv, pdf, aac,
244
- flac, mp3, m4a, mpga, opus, pcm, wav, webm, tgpp,
245
- ],
246
- trainingData: 'August 2024',
154
+ ...GEMINI_RULES, contextWindow: m(1), maxOutputTokens: k(8),
155
+ fast: true, json: true, tools: true,
247
156
  },
248
157
  [GEMINI_20_FLASH_THINKING]: {
249
- // https://cloud.google.com/vertex-ai/generative-ai/docs/thinking-mode?hl=en
250
- contextWindow: 1024 * (8 + 32),
251
- imageCostTokens: size8k / (768 * 768) * 258,
252
- maxFileSize: 20 * 1024 * 1024, // 20 MB
253
- maxImagePerPrompt: 3000,
254
- maxImageSize: Infinity,
255
- maxOutputTokens: 1024 * 8,
256
- maxUrlSize: 1024 * 1024 * 1024 * 2, // 2 GB
257
- requestLimitsRPM: 1000,
258
- requestLimitsRPD: 1500,
259
- tokenLimitsTPM: 4 * 1000000,
158
+ ...GEMINI_RULES, contextWindow: m(1), maxOutputTokens: k(64),
260
159
  reasoning: true,
261
- vision: true,
262
- supportedMimeTypes: [png, jpeg],
263
- trainingData: 'August 2024',
264
160
  },
265
161
  [GEMINI_20_PRO]: {
266
- contextWindow: 2097152,
267
- imageCostTokens: size8k / (768 * 768) * 258,
268
- maxFileSize: 20 * 1024 * 1024, // 20 MB
269
- maxImagePerPrompt: 3000,
270
- maxImageSize: Infinity,
271
- maxOutputTokens: 1024 * 8,
272
- maxUrlSize: 1024 * 1024 * 1024 * 2, // 2 GB
273
- requestLimitsRPM: 1000,
274
- requestLimitsRPD: 1500,
275
- tokenLimitsTPM: 4 * 1000000,
162
+ ...GEMINI_RULES, contextWindow: m(2), maxOutputTokens: k(8),
276
163
  json: true,
277
- vision: true,
278
- supportedMimeTypes: [
279
- png, jpeg, mov, mpeg, mp4, mpg, avi, wmv, mpegps, flv, pdf, aac,
280
- flac, mp3, m4a, mpga, opus, pcm, wav, webm, tgpp,
281
- ],
282
- trainingData: 'August 2024',
283
164
  },
284
165
  [GEMMA_3_27B]: {
285
- contextWindow: 128 * 1000,
286
- imageCostTokens: 256,
287
- maxImageSize: 896 * 896,
288
- maxOutputTokens: 1024 * 8,
289
- fast: true,
290
- json: true,
291
- vision: true,
292
- supportedMimeTypes: [png, jpeg],
166
+ contextWindow: kT(128), maxOutputTokens: k(8),
167
+ imageCostTokens: 256, maxImageSize: 896 * 896,
168
+ supportedMimeTypes: [png, jpeg, gif],
169
+ fast: true, json: true, vision: true,
293
170
  },
294
171
  [DEEPSEEK_R1]: {
295
- contextWindow: 128 * 1000,
296
- maxOutputTokens: 32768,
297
- requestLimitsRPM: Infinity,
298
- tokenLimitsTPM: Infinity,
172
+ contextWindow: kT(128), maxOutputTokens: k(32),
299
173
  reasoning: true,
300
- },
301
- [TEXT_EMBEDDING_3_SMALL]: {
302
- contextWindow: 8191,
303
- embedding: true,
304
- outputDimension: 1536,
305
- requestLimitsRPM: 500,
306
- tokenLimitsTPM: 1000000,
307
- trainingData: 'Sep 2021',
308
- },
309
- [TEXT_EMBEDDING_3_LARGE]: {
310
- contextWindow: 8191,
311
- embedding: true,
312
- outputDimension: 3072, // ERROR: column cannot have more than 2000 dimensions for hnsw index
313
- requestLimitsRPM: 500,
314
- tokenLimitsTPM: 1000000,
315
- trainingData: 'Sep 2021',
316
- },
317
- [EMBEDDING_001]: { // https://ai.google.dev/pricing#text-embedding004 FREE!
318
- contextWindow: 3072,
319
- embedding: true,
320
- requestLimitsRPM: 1500,
321
- },
322
- // https://console.cloud.google.com/vertex-ai/publishers/anthropic/model-garden/claude-3-7-sonnet?authuser=5&inv=1&invt=Abqftg&project=backend-alpha-97077
323
- [CLOUD_37_SONNET]: {
324
- contextWindow: 200 * 1000,
325
- maxOutputTokens: 64 * 1000, // Should be 128 * 1000, but Anthropic SDK limits it to 64 * 1000
326
- imageCostTokens: size8k / 750,
327
- documentCostTokens: 3000 * 100, // 100 pages: https://docs.anthropic.com/en/docs/build-with-claude/pdf-support
328
- maxImagePerPrompt: 5, // https://docs.anthropic.com/en/docs/build-with-claude/vision
329
- maxImageSize: 1092, // by pixels
330
- maxDocumentPages: 100,
331
- maxDocumentFile: 1024 * 1024 * 32, // 32MB
332
- requestLimitsRPM: 50,
333
- tokenLimitsITPM: 40000,
334
- tokenLimitsOTPM: 8000,
335
- json: true,
336
- reasoning: true,
337
- tools: true,
338
- vision: true,
174
+ }, // ERROR: column cannot have more than 2000 dimensions for hnsw index
175
+ [TEXT_EMBEDDING_3_LARGE]: { ...OPENAI_EBD, dimension: k(3) },
176
+ [TEXT_EMBEDDING_3_SMALL]: { ...OPENAI_EBD, dimension: k(1.5) },
177
+ [GEMINI_EMBEDDING_M]: { ...EBD, maxInputTokens: k(8), dimension: k(3) },
178
+ [CLOUD_37_SONNET]: { // 100 pages: https://docs.anthropic.com/en/docs/build-with-claude/pdf-support
179
+ contextWindow: kT(200), maxOutputTokens: kT(64),
180
+ documentCostTokens: 3000 * 100, maxDocumentFile: m(32),
181
+ maxDocumentPages: 100, imageCostTokens: ~~(v8k / 750),
182
+ maxImagePerPrompt: 100, maxImageSize: 2000 * 2000,
339
183
  supportedMimeTypes: [png, jpeg, gif, webp, pdf],
340
- trainingData: 'Apr 2024',
341
- },
342
- };
184
+ json: true, reasoning: true, tools: true, vision: true,
185
+ }, // https://docs.anthropic.com/en/docs/build-with-claude/vision
343
186
 
344
- MODELS[DEEPSEEK_R1_70B] = MODELS[DEEPSEEK_R1];
345
- MODELS[DEEPSEEK_R1_32B] = MODELS[DEEPSEEK_R1];
346
- MODELS[GEMMA327B] = MODELS[GEMMA_3_27B];
187
+ };
347
188
 
189
+ // Unifiy model configurations
190
+ let ATTACHMENT_TOKEN_COST = 0;
348
191
  for (const n in MODELS) {
349
192
  MODELS[n]['name'] = n;
350
- if (MODELS[n].embedding) {
351
- MODELS[n].maxInputTokens = MODELS[n].contextWindow;
352
- } else {
193
+ if (!MODELS[n].embedding) {
353
194
  MODELS[n].supportedMimeTypes = MODELS[n].supportedMimeTypes || [];
354
195
  MODELS[n].maxOutputTokens = MODELS[n].maxOutputTokens
355
196
  || Math.ceil(MODELS[n].contextWindow * 0.4);
356
197
  MODELS[n].maxInputTokens = MODELS[n].maxInputTokens
357
198
  || (MODELS[n].contextWindow - MODELS[n].maxOutputTokens);
358
- MODELS[n].tokenLimitsTPD = MODELS[n].tokenLimitsTPD
359
- || (MODELS[n].tokenLimitsTPM * minsOfDay);
360
- MODELS[n].requestLimitsRPD = MODELS[n].requestLimitsRPD
361
- || (MODELS[n].requestLimitsRPM * minsOfDay);
362
- MODELS[n].requestCapacityRPM = Math.ceil(Math.min(
363
- MODELS[n].tokenLimitsTPM / MODELS[n].maxInputTokens,
364
- MODELS[n].requestLimitsRPM, MODELS[n].requestLimitsRPD / minsOfDay
365
- ));
199
+ ATTACHMENT_TOKEN_COST = ATTACHMENT_TOKEN_COST ? Math.max(
200
+ ATTACHMENT_TOKEN_COST, MODELS[n].imageCostTokens || 0
201
+ ) : MODELS[n].imageCostTokens;
366
202
  }
367
203
  }
204
+ MODELS[GEMMA327B] = MODELS[GEMMA_3_27B]; // Ollama Alias
205
+ MODELS[GEMINI_20_FLASH].image = GEMINI_20_FLASH_EXP;
206
+ MODELS[GEMINI_20_FLASH_EXP] = {
207
+ ...MODELS[GEMINI_20_FLASH], image: true, tools: false,
208
+ };
368
209
 
369
- const MAX_INPUT_TOKENS = MODELS[GPT_4O_MINI].maxInputTokens;
370
- const ATTACHMENT_TOKEN_COST = Math.max(MODELS[GPT_4O].imageCostTokens, 5000);
371
- const MAX_TRIM_TRY = MAX_INPUT_TOKENS / 1000;
210
+ // Default models for each provider
211
+ const DEFAULT_MODELS = {
212
+ [OPENAI]: GPT_4O,
213
+ [AZURE_OPENAI]: GPT_4O,
214
+ [GEMINI]: GEMINI_20_FLASH,
215
+ [ANTHROPIC]: CLOUD_37_SONNET,
216
+ [VERTEX_ANTHROPIC]: CLOUD_37_SONNET,
217
+ [OLLAMA]: GEMMA327B,
218
+ [OPENAI_VOICE]: NOVA,
219
+ [OPENAI_EMBEDDING]: TEXT_EMBEDDING_3_SMALL,
220
+ [GEMINI_EMEDDING]: GEMINI_EMBEDDING_M,
221
+ [OPENAI_TRAINING]: GPT_4O_MINI, // https://platform.openai.com/docs/guides/fine-tuning
222
+ };
223
+ DEFAULT_MODELS[CHAT] = DEFAULT_MODELS[GEMINI];
224
+
225
+ const tokenRatioByWords = Math.min(
226
+ 100 / 75, // ChatGPT: https://platform.openai.com/tokenizer
227
+ Math.min(100 / 60, 100 / 80), // Gemini: https://ai.google.dev/gemini-api/docs/tokens?lang=node
228
+ );
229
+
230
+ const tokenRatioByCharacters = Math.max(
231
+ 3.5, // Claude: https://docs.anthropic.com/en/docs/resources/glossary
232
+ 4, // Gemini: https://ai.google.dev/gemini-api/docs/tokens?lang=node
233
+ );
372
234
 
373
235
 
374
236
  let tokeniser;
@@ -630,7 +492,7 @@ const buildGptMessage = (content, options) => {
630
492
  const attachments = (options?.attachments || []).map(x => {
631
493
  assert(MODELS[options?.model], 'Model is required.');
632
494
  if (MODELS[options.model]?.supportedMimeTypes?.includes?.(x.mime_type)) {
633
- return { type: 'image_url', image_url: { url: x.url } };
495
+ return { type: 'image_url', image_url: { url: x.url, detail: 'high' } };
634
496
  } else if (MODELS[options.model]?.supportedAudioTypes?.includes?.(x.mime_type)) {
635
497
  alterModel = selectGptAudioModel(options);
636
498
  return {
@@ -720,12 +582,15 @@ const getInfoEnd = text => Math.max(...[THINK_END, TOOLS_END].map(x => {
720
582
  // @todo: escape ``` in think and tools
721
583
  const packResp = async (resp, options) => {
722
584
  if (options?.raw) { return resp; }
723
- let [txt, audio, references, simpleText, referencesMarkdown, end, json] = [
724
- resp.text || '', // ChatGPT / Claude / Gemini / Ollama
725
- resp?.audio?.data, // ChatGPT audio mode
726
- resp?.references, // Gemini references
727
- '', '', '', null,
728
- ];
585
+ let [
586
+ txt, audio, references, simpleText, referencesMarkdown, end, json,
587
+ images
588
+ ] = [
589
+ resp.text || '', // ChatGPT / Claude / Gemini / Ollama
590
+ resp?.audio?.data, // ChatGPT audio mode
591
+ resp?.references, // Gemini references
592
+ '', '', '', null, resp?.images || [],
593
+ ];
729
594
  simpleText = txt;
730
595
  while ((end = getInfoEnd(simpleText))) {
731
596
  simpleText = simpleText.slice(end).trim();
@@ -742,9 +607,17 @@ const packResp = async (resp, options) => {
742
607
  ])) && (audio = await convert(audio, {
743
608
  input: BUFFER, expected: BUFFER, ...options || {},
744
609
  }));
610
+ if (images?.length) {
611
+ for (let i in images) {
612
+ images[i].data = await convert(images[i].data, {
613
+ input: BASE64, expected: BUFFER,
614
+ });
615
+ }
616
+ }
745
617
  options?.jsonMode && !options?.delta && (json = parseJson(simpleText, null));
746
618
  if (options?.simple && options?.audioMode) { return audio; }
747
619
  else if (options?.simple && options?.jsonMode) { return json; }
620
+ else if (options?.simple && options?.imageMode) { return images; }
748
621
  else if (options?.simple) { return simpleText; }
749
622
  else if (options?.jsonMode) { txt = `\`\`\`json\n${simpleText}\n\`\`\``; }
750
623
  // references debug codes:
@@ -797,6 +670,7 @@ const packResp = async (resp, options) => {
797
670
  ...references ? { references } : {},
798
671
  ...referencesMarkdown ? { referencesMarkdown } : {},
799
672
  ...audio ? { audio, audioMimeType: options?.audioMimeType } : {},
673
+ ...images?.length ? { images } : {},
800
674
  processing: !!options?.processing,
801
675
  model: options?.model,
802
676
  };
@@ -829,7 +703,10 @@ const buildPrompts = async (model, input, options = {}) => {
829
703
  prompt = buildClaudeMessage(content, { ...options, cache_control: true });
830
704
  break;
831
705
  case GEMINI:
832
- const _role = { role: options.model === GEMMA_3_27B ? user : system };
706
+ const _role = {
707
+ role: [GEMINI_20_FLASH_EXP, GEMMA_3_27B].includes(options.model)
708
+ ? user : system
709
+ };
833
710
  systemPrompt = buildGeminiHistory(options.systemPrompt, _role);
834
711
  prompt = options.toolsResult?.[options.toolsResult?.length - 1]?.parts
835
712
  || buildGeminiMessage(content, options)
@@ -885,9 +762,10 @@ const buildPrompts = async (model, input, options = {}) => {
885
762
  msgBuilder();
886
763
  } else {
887
764
  content = trimTailing(trimTailing(content).slice(0, -1)) + '...';
888
- }
889
- }, model.maxInputTokens - options.attachments?.length * ATTACHMENT_TOKEN_COST);
890
- if ([OPENAI].includes(options.flavor) || options.model === GEMMA_3_27B) {
765
+ } // @todo: audioCostTokens (needs to calculate the audio length):
766
+ }, model.maxInputTokens - options.attachments?.length * model.imageCostTokens);
767
+ if ([OPENAI].includes(options.flavor)
768
+ || [GEMINI_20_FLASH_EXP, GEMMA_3_27B].includes(options.model)) {
891
769
  systemPrompt = null;
892
770
  }
893
771
  return { systemPrompt, history, prompt };
@@ -1074,7 +952,7 @@ const promptAnthropic = async (aiId, content, options = {}) => {
1074
952
  = await buildPrompts(model, content, { ...options, flavor: ANTHROPIC });
1075
953
  const resp = await client.beta.messages.create({
1076
954
  model: options.model, ...history, system, stream: true,
1077
- max_tokens: options.extendedThinking ? 128000 : model.maxOutputTokens,
955
+ max_tokens: options.extendedThinking ? kT(128) : model.maxOutputTokens,
1078
956
  ...(options.reasoning ?? model.reasoning) ? {
1079
957
  thinking: options.thinking || {
1080
958
  type: 'enabled',
@@ -1164,8 +1042,10 @@ const deleteFile = async (aiId, file_id, options) => {
1164
1042
 
1165
1043
  const generationConfig = options => ({
1166
1044
  generationConfig: {
1045
+ responseMimeType: options.jsonMode ? mimeJson : mimeText,
1046
+ responseModalities: options.modalities
1047
+ || (options.imageMode ? [TEXT, IMAGE] : undefined),
1167
1048
  ...options?.generationConfig || {},
1168
- responseMimeType: options?.jsonMode ? mimeJson : mimeText,
1169
1049
  },
1170
1050
  });
1171
1051
 
@@ -1183,42 +1063,61 @@ const packGeminiReferences = (chunks, supports) => {
1183
1063
  };
1184
1064
 
1185
1065
  const promptGemini = async (aiId, content, options = {}) => {
1186
- const { client, model } = await getAi(aiId);
1187
- let [result, references, functionCalls, responded]
1188
- = [options.result ?? '', null, null, false];
1066
+ let { client, model } = await getAi(aiId);
1067
+ let [result, references, functionCalls, responded, images]
1068
+ = [options.result ?? '', null, null, false, []];
1189
1069
  options.model = options.model || model.name;
1070
+ assert(!(options.imageMode && !model.image), 'Image mode is not supported.');
1071
+ if (String.isString(model.image)) {
1072
+ options.model = model.image;
1073
+ options.imageMode = true;
1074
+ model = MODELS[options.model];
1075
+ }
1190
1076
  const { systemPrompt: systemInstruction, history, prompt }
1191
1077
  = await buildPrompts(model, content, { ...options, flavor: GEMINI });
1192
1078
  const _client = client.getGenerativeModel({
1193
1079
  model: options.model, systemInstruction,
1194
- ...model?.tools && !options.jsonMode ? (options.tools ?? {
1195
- tools: [
1196
- // @todo: Gemini will failed when using these tools together.
1197
- // https://ai.google.dev/gemini-api/docs/function-calling
1198
- // { codeExecution: {} },
1199
- // { googleSearch: {} },
1200
- { functionDeclarations: (await toolsGemini()).map(x => x.def) },
1201
- ],
1202
- toolConfig: { functionCallingConfig: { mode: 'AUTO' } },
1203
- }) : {},
1080
+ ...model?.tools && !options.jsonMode
1081
+ && options.model !== GEMINI_20_FLASH_EXP ? (options.tools ?? {
1082
+ tools: [
1083
+ // @todo: Gemini will failed when using these tools together.
1084
+ // https://ai.google.dev/gemini-api/docs/function-calling
1085
+ // { codeExecution: {} },
1086
+ // { googleSearch: {} },
1087
+ {
1088
+ functionDeclarations: (
1089
+ await toolsGemini()
1090
+ ).map(x => x.def)
1091
+ },
1092
+ ],
1093
+ toolConfig: { functionCallingConfig: { mode: 'AUTO' } },
1094
+ }) : {},
1204
1095
  });
1205
1096
  // https://github.com/google/generative-ai-js/blob/main/samples/node/advanced-chat.js
1206
1097
  // Google's bug: history is not allowed while using inline_data?
1207
1098
  const chat = _client.startChat({ history, ...generationConfig(options) });
1208
1099
  const resp = await chat.sendMessageStream(prompt);
1209
1100
  for await (const chunk of resp.stream) {
1101
+ const deltaImages = [];
1102
+ chunk.candidates[0].content?.parts?.filter(
1103
+ x => x?.inlineData?.mimeType === png
1104
+ )?.map?.(x => {
1105
+ deltaImages.push(x.inlineData);
1106
+ images.push(x.inlineData);
1107
+ });
1210
1108
  functionCalls || (functionCalls = chunk.functionCalls);
1211
1109
  const rfc = packGeminiReferences(
1212
1110
  chunk.candidates[0]?.groundingMetadata?.groundingChunks,
1213
1111
  chunk.candidates[0]?.groundingMetadata?.groundingSupports
1214
1112
  );
1215
1113
  rfc && (references = rfc);
1216
- let delta = chunk?.text?.() || '';
1217
- options.result && delta
1218
- && (responded = responded || (delta = `\n\n${delta}`));
1219
- result += delta;
1220
- delta && await streamResp({
1221
- text: options.delta ? delta : result,
1114
+ let deltaText = chunk?.text?.() || '';
1115
+ options.result && deltaText
1116
+ && (responded = responded || (deltaText = `\n\n${deltaText}`));
1117
+ result += deltaText;
1118
+ (deltaText || deltaImages.length) && await streamResp({
1119
+ text: options.delta ? deltaText : result,
1120
+ images: options.delta ? deltaImages : images,
1222
1121
  }, options);
1223
1122
  }
1224
1123
  const _resp = await resp.response;
@@ -1237,7 +1136,7 @@ const promptGemini = async (aiId, content, options = {}) => {
1237
1136
  });
1238
1137
  }
1239
1138
  return await packResp({
1240
- text: mergeMsgs(toolsResponse, toolsResult), references,
1139
+ text: mergeMsgs(toolsResponse, toolsResult), images, references,
1241
1140
  }, options);
1242
1141
  };
1243
1142
 
@@ -1411,7 +1310,7 @@ const talk = async (input, options = {}) => {
1411
1310
  };
1412
1311
  };
1413
1312
 
1414
- const getMaxChatPromptLimit = async (options) => {
1313
+ const getChatPromptLimit = async (options) => {
1415
1314
  let resp = 0;
1416
1315
  (await getAi(null, { all: true })).map(x => {
1417
1316
  if (options?.aiId && options?.aiId !== x.id) { return; }
@@ -1419,7 +1318,17 @@ const getMaxChatPromptLimit = async (options) => {
1419
1318
  resp = resp ? Math.min(resp, maxInputTokens) : maxInputTokens;
1420
1319
  });
1421
1320
  assert(resp > 0, 'Chat engine has not been initialized.');
1422
- return options?.raw ? resp : Math.min(resp, MAX_INPUT_TOKENS);
1321
+ return resp;
1322
+ };
1323
+
1324
+ const getChatAttachmentCost = async (options) => {
1325
+ let resp = 0;
1326
+ (await getAi(null, { all: true })).map(x => {
1327
+ if (options?.aiId && options?.aiId !== x.id) { return; }
1328
+ resp = Math.max(resp, x.model.imageCostTokens || 0);
1329
+ });
1330
+ assert(resp > 0, 'Chat engine has not been initialized.');
1331
+ return resp;
1423
1332
  };
1424
1333
 
1425
1334
  const distillFile = async (attachments, o) => {
@@ -1465,7 +1374,7 @@ const prompt = async (input, options = {}) => {
1465
1374
  };
1466
1375
 
1467
1376
  const trimPrompt = async (getPrompt, trimFunc, contextWindow, options) => {
1468
- let [i, maxTry] = [0, ~~options?.maxTry || MAX_TRIM_TRY];
1377
+ let [i, maxTry] = [0, ~~options?.maxTry || kT(128)];
1469
1378
  while ((await countTokens(await getPrompt(), { fast: true }) > contextWindow)
1470
1379
  || (await countTokens(await getPrompt()) > contextWindow)) {
1471
1380
  await trimFunc();
@@ -1515,38 +1424,54 @@ const analyzeSessions = async (sessionIds, options) => {
1515
1424
 
1516
1425
  export default init;
1517
1426
  export {
1518
- ATTACHMENT_TOKEN_COST, CLOUD_37_SONNET, CODE_INTERPRETER, DEEPSEEK_R1,
1519
- DEEPSEEK_R1_32B, DEEPSEEK_R1_70B, DEFAULT_MODELS,
1520
- EMBEDDING_001,
1521
- FUNCTION, GEMINI_20_FLASH, GEMINI_20_FLASH_THINKING, GPT_45, GPT_4O, GPT_4O_MINI, GPT_O1, GPT_O3_MINI, INSTRUCTIONS, MODELS,
1522
- OPENAI_VOICE, RETRIEVAL,
1523
- TEXT_EMBEDDING_3_SMALL, _NEED, analyzeSessions,
1427
+ _NEED,
1428
+ CLOUD_37_SONNET,
1429
+ CODE_INTERPRETER,
1430
+ DEEPSEEK_R1,
1431
+ DEFAULT_MODELS,
1432
+ FUNCTION,
1433
+ GEMINI_20_FLASH_THINKING,
1434
+ GEMINI_20_FLASH,
1435
+ GPT_45,
1436
+ GPT_4O_MINI,
1437
+ GPT_4O,
1438
+ GPT_O1,
1439
+ GPT_O3_MINI,
1440
+ INSTRUCTIONS,
1441
+ MODELS,
1442
+ OPENAI_VOICE,
1443
+ RETRIEVAL,
1444
+ analyzeSessions,
1524
1445
  buildGptTrainingCase,
1525
1446
  buildGptTrainingCases,
1526
1447
  cancelGptFineTuningJob,
1527
1448
  countTokens,
1528
- createGeminiEmbedding, createGptFineTuningJob,
1449
+ createGeminiEmbedding,
1450
+ createGptFineTuningJob,
1529
1451
  createOpenAIEmbedding,
1530
1452
  deleteFile,
1531
1453
  distillFile,
1454
+ getAi,
1455
+ getChatAttachmentCost,
1456
+ getChatPromptLimit,
1532
1457
  getGptFineTuningJob,
1533
- getMaxChatPromptLimit,
1534
1458
  getSession,
1535
1459
  init,
1536
1460
  initChat,
1537
1461
  jpeg,
1538
- getAi,
1539
1462
  listFiles,
1540
1463
  listGptFineTuningEvents,
1541
1464
  listGptFineTuningJobs,
1542
1465
  listOpenAIModels,
1543
1466
  ogg,
1544
- prompt, promptOpenAI,
1467
+ prompt,
1545
1468
  promptAnthropic,
1546
1469
  promptGemini,
1470
+ promptOpenAI,
1547
1471
  resetSession,
1548
1472
  tailGptFineTuningEvents,
1549
1473
  talk,
1474
+ TEXT_EMBEDDING_3_SMALL,
1550
1475
  trimPrompt,
1551
1476
  uploadFile,
1552
1477
  uploadFileForFineTuning,