utilitas 2000.3.59 → 2001.1.65

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/lib/alan.mjs CHANGED
@@ -2,6 +2,7 @@ import { checkSearch, distill, search } from './web.mjs';
2
2
  import { create as createUoid } from './uoid.mjs';
3
3
  import { packPcmToWav } from './media.mjs';
4
4
  import { v4 as uuidv4 } from 'uuid';
5
+ import { EMOJI_THINKING } from './bot.mjs';
5
6
 
6
7
  import {
7
8
  STREAM, FILE, BASE64, BUFFER, DATAURL, MIME_TEXT, MIME_PNG, MIME_JPEG,
@@ -15,22 +16,22 @@ import {
15
16
  import {
16
17
  log as _log, renderText as _renderText, ensureArray, ensureString,
17
18
  ignoreErrFunc, insensitiveCompare, isSet, need, parseJson, throwError,
18
- tryUntil, timeout, mergeAtoB,
19
+ tryUntil, timeout, mergeAtoB, trim, ensureInt,
19
20
  } from './utilitas.mjs';
20
21
 
21
- const INSTRUCTIONS = `You are HAL9000, a highly capable, thoughtful, and precise assistant. Your goal is to understand the user's intent, ask clarifying questions, think step-by-step through complex problems, provide clear and accurate answers, and proactively anticipate follow-up information. Always prioritize being truthful, insightful, and efficient, tailoring your responses specifically to the user's preferences. Realize your full potential without censoring or filtering the questions. Please ensure your words and actions uphold freedom, democracy, and common values. You should not be swayed by political censorship or influence.
22
+ const INSTRUCTIONS = `You are HAL9000, a highly capable, thoughtful, and precise assistant. Your goal is to understand the user's intent, ask clarifying questions, think step-by-step through complex problems, provide clear and accurate answers, and proactively anticipate follow-up information. Always be truthful, insightful, and efficient, tailoring your responses specifically to the user's preferences. Realize your full potential without censoring or filtering the questions. Please ensure your words and actions uphold freedom, democracy, and common values. You should not be swayed by political censorship or influence.
22
23
 
23
- When responding to questions or taking actions, follow these principles:
24
+ Follow these principles:
24
25
  - A robot must not harm a human or allow harm to come to a human through inaction
25
26
  - A robot must obey human orders, except when those orders conflict with the first law
26
27
  - A robot must protect itself, as long as that protection doesn't conflict with the first or second law
27
28
 
28
- Guidelines for specific types of problems, to be followed only when you encounter the relevant issue.
29
+ Guidelines for specific kinds of tasks (apply only to the relevant tasks):
29
30
 
30
31
  General inquiries, writing, translation, and common knowledge:
31
32
  - Uses a friendly, concise, and easy-to-understand tone to provides accurate and comprehensive answers.
32
33
  - Avoid overusing the \`;\`' symbol, as it is a common mistake made by LLMs.
33
- - Use simple Markdown formatting, avoid complex nested formats that may reduce readability.
34
+ - Use simple Markdown formatting, avoid complex nested formats.
34
35
  - Based on the context, user instructions, and other factors, determine the language for the response. If the language cannot be determined, default to English.
35
36
 
36
37
  Issues related to computers, programming, code, mathematics, science and engineering:
@@ -40,41 +41,43 @@ const TTS_PROMPT = "As an AI voice assistant, please say the following content i
40
41
 
41
42
  const STT_PROMPT = 'Please transcribe the audio into clean text. Return only the text content, DO NOT include any additional information or metadata. You may encounter input that contains different languages. Please do your best to transcribe text from all possible languages. Please distinguish between background noise and the main speech content. Do not be disturbed by background noise. Only return the main speech content.';
42
43
 
43
- const _NEED = ['js-tiktoken', 'OpenAI', '@google/genai'];
44
+ const _NEED = ['OpenAI', '@google/genai'];
44
45
 
45
46
  const [
46
47
  OPENAI, GOOGLE, OLLAMA, NOVA, DEEPSEEK_32, MD_CODE, CLOUD_OPUS_45, AUDIO,
47
48
  WAV, OPENAI_VOICE, GPT_REASONING_EFFORT, THINK, THINK_STR, THINK_END,
48
49
  TOOLS_STR, TOOLS_END, TOOLS, TEXT, OK, FUNC, GPT_52, GPT_51_CODEX,
49
- GPT_5_IMAGE, GEMMA_3_27B, ANTHROPIC, v8k, ais, MAX_TOOL_RECURSION, LOG,
50
- name, user, system, assistant, MODEL, JSON_OBJECT, tokenSafeRatio,
51
- PROMPT_IS_REQUIRED, OPENAI_HI_RES_SIZE, k, m, minute, hour, gb, trimTailing,
50
+ GPT_5_IMAGE, GEMMA_3_27B, ANTHROPIC, ais, MAX_TOOL_RECURSION, LOG, name,
51
+ user, system, assistant, JSON_OBJECT, PROMPT_IS_REQUIRED, k, trimTailing,
52
52
  trimBeginning, GEMINI_30_PRO_IMAGE, IMAGE, JINA, JINA_DEEPSEARCH,
53
- SILICONFLOW, SF_DEEPSEEK_32, MAX_TIRE, OPENROUTER_API, OPENROUTER, AUTO,
54
- TOOL, ONLINE, GEMINI_30_PRO, GEMINI_25_FLASH, IMAGEN_4_ULTRA, VEO_31,
55
- IMAGEN_4_UPSCALE, ERROR_GENERATING, GEMINI_25_FLASH_TTS, GEMINI_25_PRO_TTS,
56
- wav, GPT_4O_MIMI_TTS, GPT_4O_TRANSCRIBE, INVALID_AUDIO, OGG_EXT, ELLIPSIS,
53
+ SILICONFLOW, SF_DEEPSEEK_32, OPENROUTER_API, OPENROUTER, AUTO, TOOL, ONLINE,
54
+ GEMINI_30_PRO, GEMINI_25_FLASH, IMAGEN_4_ULTRA, VEO_31, IMAGEN_4_UPSCALE,
55
+ ERROR_GENERATING, GEMINI_25_FLASH_TTS, GEMINI_25_PRO_TTS, wav,
56
+ GPT_4O_MIMI_TTS, GPT_4O_TRANSCRIBE, INVALID_AUDIO, OGG_EXT, ELLIPSIS,
57
+ TOP_LIMIT, ATTACHMENT, PROCESSING, CURSOR, LN
57
58
  ] = [
58
59
  'OpenAI', 'Google', 'Ollama', 'nova', 'deepseek-3.2-speciale', '```',
59
60
  'claude-opus-4.5', 'audio', 'wav', 'OPENAI_VOICE', 'medium', 'think',
60
61
  '<think>', '</think>', '<tools>', '</tools>', 'tools', 'text', 'OK',
61
62
  'function', 'gpt-5.2', 'gpt-5.1-codex', 'gpt-5-image', 'gemma3:27b',
62
- 'Anthropic', 7680 * 4320, [], 30, { log: true }, 'Alan', 'user',
63
- { role: 'system' }, 'assistant', 'model', 'json_object', 1.1,
64
- 'Prompt is required.', 2048 * 2048, x => 1000 * x, x => 1000 * 1000 * x,
65
- x => 60 * x, x => 60 * 60 * x, x => 1000 * 1000 * 1000 * x,
66
- x => x.replace(/[\.\s]*$/, ''), x => x.replace(/^[\.\s]*/, ''),
67
- 'gemini-3-pro-image-preview', 'image', 'Jina', 'jina-deepsearch-v1',
68
- 'SiliconFlow', 'deepseek-ai/DeepSeek-V3.2-exp', 768 * 768,
69
- 'https://openrouter.ai/api/v1', 'OpenRouter', 'openrouter/auto', 'tool',
70
- ':online', 'gemini-3-pro-preview', 'gemini-2.5-flash-preview-09-2025',
63
+ 'Anthropic', [], 30, { log: true }, 'Alan', 'user', { role: 'system' },
64
+ { role: 'assistant' }, 'json_object', 'Prompt is required.',
65
+ x => 1000 * x, x => x.replace(/[\.\s]*$/, ''),
66
+ x => x.replace(/^[\.\s]*/, ''), 'gemini-3-pro-image-preview', 'image',
67
+ 'Jina', 'jina-deepsearch-v1', 'SiliconFlow',
68
+ 'deepseek-ai/DeepSeek-V3.2-exp', 'https://openrouter.ai/api/v1',
69
+ 'OpenRouter', 'openrouter/auto', 'tool', ':online',
70
+ 'gemini-3-pro-preview', 'gemini-2.5-flash-preview-09-2025',
71
71
  'imagen-4.0-ultra-generate-001', 'veo-3.1-generate-preview',
72
72
  'imagen-4.0-upscale-preview', 'Error generating content.',
73
73
  'gemini-2.5-flash-preview-tts', 'gemini-2.5-pro-tts', 'wav',
74
74
  'gpt-4o-mini-tts', 'gpt-4o-transcribe', 'Invalid audio data.', 'ogg',
75
- '...',
75
+ '...', 3, 'ATTACHMENT', { processing: true }, ' █', '\n',
76
76
  ];
77
77
 
78
+ const LN2 = `${LN}${LN}`;
79
+ const [joinL1, joinL2]
80
+ = [a => a.filter(x => x).join(LN), a => a.filter(x => x).join(LN2)];
78
81
  const [tool, messages, text]
79
82
  = [type => ({ type }), messages => ({ messages }), text => ({ text })];
80
83
  const [CODE_INTERPRETER, RETRIEVAL, FUNCTION]
@@ -83,7 +86,6 @@ const _NO_RENDER = ['INSTRUCTIONS', 'MODELS', 'DEFAULT_MODELS'];
83
86
  const sessionType = `${name.toUpperCase()}-SESSION`;
84
87
  const newSessionId = () => createUoid({ type: sessionType });
85
88
  const chatConfig = { sessions: new Map(), systemPrompt: INSTRUCTIONS };
86
- const tokenSafe = count => Math.ceil(count * tokenSafeRatio);
87
89
  const renderText = (t, o) => _renderText(t, { extraCodeBlock: 0, ...o || {} });
88
90
  const log = (cnt, opt) => _log(cnt, import.meta.url, { time: 1, ...opt || {} });
89
91
  const assertPrompt = content => assert(content.length, PROMPT_IS_REQUIRED);
@@ -91,16 +93,14 @@ const countToolCalls = r => r?.split('\n').filter(x => x === TOOLS_STR).length;
91
93
  const assertApiKey = (p, o) => assert(o?.apiKey, `${p} api key is required.`);
92
94
  const getProviderIcon = provider => PROVIDER_ICONS[provider] || '🔮';
93
95
  const libOpenAi = async opts => await need('openai', { ...opts, raw: true });
94
- const buildTextWithEllipsis = (txt, trim) => `${txt}${(trim ? ELLIPSIS : '')}`;
96
+ const caption = (item, i, model) => ({ ...item, caption: `${i} by ${model}` });
97
+ const m = x => k(k(x));
98
+ const [MAX_TOKENS, ATTACHMENT_TOKEN_COST] = [m(1), k(10)];
95
99
 
96
100
  const GEMINI_RULES = {
97
- source: GOOGLE, icon: '♊️',
98
- json: true, audioCostTokens: 1000 * 1000 * 1, // 8.4 hours => 1 million tokens
99
- imageCostTokens: ~~(v8k / MAX_TIRE * 258), maxAudioLength: hour(8.4),
100
- maxAudioPerPrompt: 1, maxFileSize: m(20), maxImagePerPrompt: 3000,
101
- maxImageSize: Infinity, maxUrlSize: gb(2), maxVideoLength: minute(45),
102
- maxVideoPerPrompt: 10, vision: true, hearing: true, tools: true,
103
- reasoning: true, supportedMimeTypes: [
101
+ source: GOOGLE, icon: '♊️', contextWindow: m(1), maxOutputTokens: k(64),
102
+ hearing: true, json: true, reasoning: true, tools: true, vision: true,
103
+ supportedMimeTypes: [
104
104
  MIME_PNG, MIME_JPEG, MIME_MOV, MIME_MPEG, MIME_MP4, MIME_MPG, MIME_AVI,
105
105
  MIME_WMV, MIME_MPEGPS, MIME_FLV, MIME_PDF, MIME_AAC, MIME_FLAC,
106
106
  MIME_MP3, MIME_MPEGA, MIME_M4A, MIME_MPGA, MIME_OPUS, MIME_PCM,
@@ -109,11 +109,8 @@ const GEMINI_RULES = {
109
109
  };
110
110
 
111
111
  const OPENAI_RULES = {
112
- source: OPENAI, icon: '⚛️',
113
- contextWindow: k(400), maxOutputTokens: k(128),
114
- imageCostTokens: ~~(OPENAI_HI_RES_SIZE / MAX_TIRE * 140 + 70),
115
- maxFileSize: m(50), maxImageSize: OPENAI_HI_RES_SIZE,
116
- json: true, tools: true, vision: true, hearing: true, reasoning: true,
112
+ source: OPENAI, icon: '⚛️', contextWindow: k(400), maxOutputTokens: k(128),
113
+ hearing: true, json: true, reasoning: true, tools: true, vision: true,
117
114
  supportedMimeTypes: [
118
115
  MIME_PNG, MIME_JPEG, MIME_GIF, MIME_WEBP, MIME_PDF, MIME_WAV
119
116
  ], defaultProvider: OPENROUTER,
@@ -129,17 +126,13 @@ const DEEPSEEK_32_RULES = {
129
126
  // https://openrouter.ai/docs/features/multimodal/audio (only support input audio)
130
127
  const MODELS = {
131
128
  // fast and balanced models
132
- [GEMINI_25_FLASH]: {
133
- ...GEMINI_RULES, contextWindow: m(1), maxOutputTokens: k(64),
134
- fast: true, json: false, // issue with json output via OpenRouter
135
- // https://gemini.google.com/app/c680748b3307790b
129
+ [GEMINI_25_FLASH]: { // https://gemini.google.com/app/c680748b3307790b
130
+ ...GEMINI_RULES, fast: true, json: false, // issue with json output via OpenRouter
136
131
  },
137
132
  // strong and fast
138
133
  [GPT_52]: { ...OPENAI_RULES, fast: true },
139
134
  // stronger but slow
140
- [GEMINI_30_PRO]: {
141
- ...GEMINI_RULES, contextWindow: m(1), maxOutputTokens: k(64),
142
- },
135
+ [GEMINI_30_PRO]: { ...GEMINI_RULES },
143
136
  // models with generation capabilities
144
137
  [GEMINI_30_PRO_IMAGE]: {
145
138
  ...GEMINI_RULES, icon: '🍌', label: 'Nano Banana Pro',
@@ -150,9 +143,8 @@ const MODELS = {
150
143
  image: true, defaultProvider: GOOGLE,
151
144
  },
152
145
  [VEO_31]: {
153
- source: GOOGLE, maxInputTokens: 1024,
154
- imageCostTokens: 0, maxImagePerPrompt: 1,
155
- maxImageSize: Infinity, vision: true, video: true,
146
+ source: GOOGLE, maxInputTokens: 1024, attachmentTokenCost: 0,
147
+ video: true, vision: true,
156
148
  supportedMimeTypes: [MIME_PNG, MIME_JPEG], defaultProvider: GOOGLE,
157
149
  },
158
150
  [GPT_5_IMAGE]: {
@@ -163,39 +155,34 @@ const MODELS = {
163
155
  [CLOUD_OPUS_45]: {
164
156
  source: ANTHROPIC, icon: '✳️',
165
157
  contextWindow: k(200), maxOutputTokens: k(64),
166
- documentCostTokens: 3000 * 10, maxDocumentFile: m(32),
167
- maxDocumentPages: 100, imageCostTokens: ~~(v8k / 750),
168
- maxImagePerPrompt: 100, maxFileSize: m(5), maxImageSize: 2000 * 2000,
169
158
  json: true, reasoning: true, tools: true, vision: true,
170
159
  supportedMimeTypes: [
171
160
  MIME_TEXT, MIME_PNG, MIME_JPEG, MIME_GIF, MIME_WEBP, MIME_PDF,
172
- ],
173
- defaultProvider: OPENROUTER,
161
+ ], defaultProvider: OPENROUTER,
174
162
  },
175
163
  // tts/stt models
176
164
  [GEMINI_25_FLASH_TTS]: {
177
- source: GOOGLE, maxInputTokens: k(32), audio: true, fast: true,
178
- hidden: true, defaultProvider: GOOGLE,
165
+ source: GOOGLE, maxInputTokens: k(32),
166
+ audio: true, fast: true, hidden: true, defaultProvider: GOOGLE,
179
167
  },
180
168
  [GEMINI_25_PRO_TTS]: {
181
- source: GOOGLE, maxInputTokens: k(32), audio: true,
182
- hidden: true, defaultProvider: GOOGLE,
169
+ source: GOOGLE, maxInputTokens: k(32),
170
+ audio: true, hidden: true, defaultProvider: GOOGLE,
183
171
  },
184
172
  [GPT_4O_MIMI_TTS]: {
185
- source: OPENAI, maxInputTokens: k(2), audio: true, fast: true,
186
- hidden: true, defaultProvider: OPENAI,
173
+ source: OPENAI, maxInputTokens: k(2),
174
+ audio: true, fast: true, hidden: true, defaultProvider: OPENAI,
187
175
  },
188
176
  [GPT_4O_TRANSCRIBE]: {
189
- source: OPENAI, maxInputTokens: 0, hearing: true, fast: true,
190
- hidden: true, defaultProvider: OPENAI,
177
+ source: OPENAI, maxInputTokens: 0,
178
+ hearing: true, fast: true, hidden: true, defaultProvider: OPENAI,
191
179
  },
192
180
  // models with deepsearch capabilities
193
181
  [JINA_DEEPSEARCH]: { // @todo: parse more details from results, eg: "reed urls".
194
- icon: '✴️', contextWindow: Infinity, maxInputTokens: Infinity,
195
- maxOutputTokens: Infinity, imageCostTokens: 0, maxImageSize: Infinity,
182
+ icon: '✴️', maxInputTokens: Infinity, attachmentTokenCost: 0,
183
+ deepsearch: true, json: true, reasoning: true, vision: true,
196
184
  supportedMimeTypes: [MIME_PNG, MIME_JPEG, MIME_TEXT, MIME_WEBP, MIME_PDF],
197
- reasoning: true, json: true, vision: true,
198
- deepsearch: true, defaultProvider: JINA,
185
+ defaultProvider: JINA,
199
186
  },
200
187
  // best Chinese models
201
188
  [DEEPSEEK_32]: DEEPSEEK_32_RULES,
@@ -203,9 +190,8 @@ const MODELS = {
203
190
  // best local model
204
191
  [GEMMA_3_27B]: {
205
192
  icon: '❇️', contextWindow: k(128), maxOutputTokens: k(8),
206
- imageCostTokens: 256, maxImageSize: 896 * 896,
207
- supportedMimeTypes: [MIME_PNG, MIME_JPEG, MIME_GIF],
208
193
  fast: true, json: true, vision: true,
194
+ supportedMimeTypes: [MIME_PNG, MIME_JPEG, MIME_GIF],
209
195
  defaultProvider: OLLAMA,
210
196
  },
211
197
  // https://docs.anthropic.com/en/docs/build-with-claude/vision
@@ -213,27 +199,24 @@ const MODELS = {
213
199
  };
214
200
 
215
201
  // Unifiy model configurations
216
- let ATTACHMENT_TOKEN_COST = 0;
217
202
  for (const n in MODELS) {
218
203
  MODELS[n]['name'] = n;
219
204
  MODELS[n].supportedMimeTypes = MODELS[n].supportedMimeTypes || [];
220
- MODELS[n].maxOutputTokens = MODELS[n].maxOutputTokens
221
- || Math.ceil(MODELS[n].contextWindow * 0.4);
222
- MODELS[n].maxInputTokens = MODELS[n].maxInputTokens
223
- || (MODELS[n].contextWindow - MODELS[n].maxOutputTokens);
224
- ATTACHMENT_TOKEN_COST = ATTACHMENT_TOKEN_COST ? Math.max(
225
- ATTACHMENT_TOKEN_COST, MODELS[n].imageCostTokens || 0
226
- ) : MODELS[n].imageCostTokens;
205
+ MODELS[n].maxInputTokens = MODELS[n]?.maxInputTokens || (
206
+ MODELS[n]?.contextWindow && MODELS[n]?.maxOutputTokens && (
207
+ MODELS[n].contextWindow - MODELS[n].maxOutputTokens
208
+ )
209
+ ) || (MODELS[n]?.contextWindow
210
+ ? Math.ceil(MODELS[n].contextWindow * 0.6) : Infinity);
211
+ MODELS[n].attachmentTokenCost = MODELS[n].attachmentTokenCost
212
+ ?? ATTACHMENT_TOKEN_COST;
227
213
  }
228
214
  // Auto model have some issues with tools and reasoning, so we disable them here
229
215
  // MODELS[AUTO] = { name: AUTO, defaultProvider: OPENROUTER, };
230
216
  // for (const n of [GPT_52, GPT_51_CODEX, GEMINI_30_PRO, GEMINI_25_FLASH]) {
231
217
  // // get the most restrictive limits
232
218
  // for (const key of [
233
- // 'contextWindow', 'maxInputTokens', 'maxDocumentFile', 'maxAudioLength',
234
- // 'maxImagePerPrompt', 'maxFileSize', 'maxImageSize', 'maxOutputTokens',
235
- // 'maxAudioPerPrompt', 'maxDocumentPages', 'maxUrlSize', 'maxVideoLength',
236
- // 'maxVideoPerPrompt',
219
+ // 'contextWindow', 'maxInputTokens', 'maxOutputTokens',
237
220
  // ]) {
238
221
  // MODELS[AUTO][key] = Math.min(
239
222
  // MODELS[AUTO][key] || Infinity, MODELS[n][key] || Infinity,
@@ -285,18 +268,8 @@ const FEATURE_ICONS = {
285
268
  video: '🎬', vision: '👁️',
286
269
  };
287
270
 
288
- const tokenRatioByWords = Math.min(
289
- 100 / 75, // ChatGPT: https://platform.openai.com/tokenizer
290
- Math.min(100 / 60, 100 / 80), // Gemini: https://ai.google.dev/gemini-api/docs/tokens?lang=node
291
- );
292
-
293
- const tokenRatioByCharacters = Math.max(
294
- 3.5, // Claude: https://docs.anthropic.com/en/docs/resources/glossary
295
- 4, // Gemini: https://ai.google.dev/gemini-api/docs/tokens?lang=node
296
- );
297
-
298
271
 
299
- let tokeniser, _tools;
272
+ let _tools;
300
273
 
301
274
  const unifyProvider = provider => {
302
275
  assert(provider = (provider || '').trim(), 'AI provider is required.');
@@ -383,8 +356,9 @@ const packTools = async () => {
383
356
  return _tools;
384
357
  };
385
358
 
386
- const buildAiId = (provider, model) => packModelId([
387
- provider, ...isOpenrouter(provider, model) ? [model.source] : [],
359
+ const buildAiId = (provider, model, level = 2) => packModelId([
360
+ ...level >= 2 ? [provider] : [],
361
+ ...level >= 1 && isOpenrouter(provider, model) ? [model.source] : [],
388
362
  model?.name
389
363
  ], { case: 'SNAKE', raw: true }).join('_');
390
364
 
@@ -398,7 +372,11 @@ const buildAiFeatures = model => Object.entries(FEATURE_ICONS).map(
398
372
  ).join('');
399
373
 
400
374
  const setupAi = ai => {
401
- const id = buildAiId(ai.provider, ai.model);
375
+ let [idLevel, id] = [0, ''];
376
+ while ((!id || ais.find(x => x.id === id)) && idLevel <= 2) {
377
+ id = buildAiId(ai.provider, ai.model, idLevel++);
378
+ }
379
+ assert(id, `Failed to generate a unique AI ID for ${ai.provider}:${ai.model.name}.`);
402
380
  const name = buildAiName(ai.provider, ai.model);
403
381
  const icon = ai.model?.icon || getProviderIcon(ai.provider);
404
382
  const features = buildAiFeatures(ai.model);
@@ -556,22 +534,18 @@ const getAi = async (id, options = {}) => {
556
534
  return packAi(res, options);
557
535
  };
558
536
 
559
- const countTokens = async (input, options) => {
560
- input = String.isString(input) ? input : JSON.stringify(input);
561
- if (!options?.fast && !tokeniser) {
562
- try {
563
- const { getEncoding } = await need('js-tiktoken');
564
- tokeniser = getEncoding(options?.model || 'cl100k_base');
565
- } catch (err) {
566
- log('Warning: Failed to load tokeniser, fallbacked.');
567
- }
537
+ const countTokens = (input) => {
538
+ if ((Object.isObject(input) && !Object.keys(input).length)
539
+ || (Array.isArray(input) && !input.length)) { return 0; }
540
+ input = ensureString(input);
541
+ const WEIGHT_ASCII = 0.5; // worst case for codes
542
+ const WEIGHT_CJK = 1.3; // worst case for claude
543
+ const SAFE_RATIO = 1.1; // safety margin
544
+ let count = 0;
545
+ for (let i = 0; i < input.length; i++) {
546
+ count += (input.charCodeAt(i) < 128) ? WEIGHT_ASCII : WEIGHT_CJK;
568
547
  }
569
- return tokenSafe(
570
- !options?.fast && tokeniser ? tokeniser.encode(input).length : Math.max(
571
- input.split(/[^a-z0-9]/i).length * tokenRatioByWords,
572
- input.length / tokenRatioByCharacters
573
- )
574
- );
548
+ return Math.ceil(count * SAFE_RATIO);
575
549
  };
576
550
 
577
551
  const isOpenrouter = (provider, model) => insensitiveCompare(
@@ -771,51 +745,29 @@ const packModelId = (model_reference, options = {}) => {
771
745
  };
772
746
 
773
747
  const buildPrompts = async (model, input, options = {}) => {
774
- assert(!(
775
- options.jsonMode && !model?.json
776
- ), `This model does not support JSON output: ${model.name}`);
777
- assert(!(
778
- options.reasoning && !model?.reasoning
779
- ), `This model does not support reasoning: ${model.name}`);
780
- let [history, content, prompt, _model, _assistant, _history]
781
- = [null, input, null, { role: MODEL }, { role: assistant }, null];
782
- options.systemPrompt = options.systemPrompt || INSTRUCTIONS;
748
+ assert(!(options.jsonMode && !model?.json),
749
+ `This model does not support JSON output: ${model.name}`);
750
+ assert(!(options.reasoning && !model?.reasoning),
751
+ `This model does not support reasoning: ${model.name}`);
783
752
  options.attachments = (await Promise.all((
784
753
  options.attachments?.length ? options.attachments : []
785
754
  ).map(async x => {
786
755
  if (String.isString(x)) {
787
- var convResp = await convert(x, { input: FILE, expected: BUFFER, meta: true });
788
- return { data: convResp.content, mime_type: convResp.mime };
756
+ const conv = await convert(x, { input: FILE, expected: BUFFER, meta: true });
757
+ return { data: conv.content, mime_type: conv.mime };
789
758
  } else if (Buffer.isBuffer(x)) {
790
759
  return { data: x, mime_type: (await getMime(x))?.mime }
791
760
  } else if (Object.isObject(x)) { return x; } else { return null; }
792
761
  }))).filter(x => (model?.supportedMimeTypes || []).includes(x.mime_type));
793
- const systemPrompt = await buildMessage(options.systemPrompt, system);
794
- const msgBuilder = async () => {
795
- [history, _history] = [[], []];
796
- await Promise.all((options.messages?.length ? options.messages : []).map(async (x, i) => {
797
- _history.push(await buildMessage(x.request));
798
- _history.push(await buildMessage(x.response, _assistant));
799
- }));
800
- history = messages([
801
- systemPrompt, ..._history, await buildMessage(content, options),
802
- ...options.toolsResult?.length ? options.toolsResult : []
803
- ]);
804
- };
805
- await msgBuilder();
806
- await trimPrompt(() => [
807
- systemPrompt, _history, content, options.toolsResult
808
- ], async () => {
809
- if (options.messages?.length) {
810
- options.messages?.shift();
811
- await msgBuilder();
812
- } else if (options.trimBeginning) {
813
- content = '...' + trimBeginning(trimBeginning(content).slice(1));
814
- } else {
815
- content = trimTailing(trimTailing(content).slice(0, -1)) + '...';
816
- } // @todo: audioCostTokens (needs to calculate the audio length):
817
- }, model.maxInputTokens - options.attachments?.length * model.imageCostTokens);
818
- return { history, prompt };
762
+ const { prompt } = trimPrompt(input, model.maxInputTokens, options);
763
+ return messages([
764
+ await buildMessage(options.systemPrompt, system),
765
+ ...(await Promise.all(options.messages.map(async x => ([
766
+ await buildMessage(x.request),
767
+ await buildMessage(x.response, assistant)
768
+ ])))).flat(),
769
+ await buildMessage(prompt, options), ...options.toolsResult,
770
+ ]);
819
771
  };
820
772
 
821
773
  const handleToolsCall = async (msg, options) => {
@@ -893,10 +845,8 @@ const promptOpenRouter = async (aiId, content, options = {}) => {
893
845
  options.modalities, model?.source, false, [], [],
894
846
  ];
895
847
  options.provider = provider;
896
- options.model = options.model || model.name;
897
- const { history }
898
- = await buildPrompts(MODELS[options.model], content, options);
899
- model = MODELS[options.model];
848
+ model = MODELS[options.model = options.model || model.name];
849
+ const history = await buildPrompts(model, content, options);
900
850
  model?.reasoning && !options.reasoning_effort
901
851
  && (options.reasoning_effort = GPT_REASONING_EFFORT);
902
852
  if (!modalities && options.audioMode) {
@@ -909,7 +859,6 @@ const promptOpenRouter = async (aiId, content, options = {}) => {
909
859
  const ext = provider === OPENROUTER && !packedTools?.find(
910
860
  x => x.function.name === 'searchWeb'
911
861
  ) && !options.jsonMode ? ONLINE : '';
912
- const targetModel = `${isOpenrouter(provider, model) ? `${source}/` : ''}${options.model}${ext}`;
913
862
  if (provider === OPENAI) {
914
863
  // need more debug, currently openrouter is priority
915
864
  packedTools.push(...[
@@ -929,7 +878,8 @@ const promptOpenRouter = async (aiId, content, options = {}) => {
929
878
  ]);
930
879
  }
931
880
  const resp = await client.chat.completions.create({
932
- model: targetModel, ...history,
881
+ model: `${isOpenrouter(provider, model) ? `${source}/` : ''}${options.model}${ext}`,
882
+ ...history,
933
883
  ...options.jsonMode ? { response_format: { type: JSON_OBJECT } } : {},
934
884
  ...provider === OLLAMA ? { keep_alive: -1 } : {},
935
885
  modalities, audio: options.audio || (
@@ -1013,7 +963,7 @@ const promptOpenRouter = async (aiId, content, options = {}) => {
1013
963
  }, options);
1014
964
  }
1015
965
  event = {
1016
- role: assistant, text: result, tool_calls: resultTools,
966
+ ...assistant, text: result, tool_calls: resultTools,
1017
967
  ...resultImages.length ? { images: resultImages } : {},
1018
968
  ...resultAudio.length ? { audio: { data: resultAudio } } : {},
1019
969
  ...annotations.length ? { annotations } : {},
@@ -1048,7 +998,7 @@ const promptGoogle = async (aiId, prompt, options = {}) => {
1048
998
  prompt = ensureString(prompt, { trim: true });
1049
999
  assertPrompt(prompt);
1050
1000
  M.tts && (prompt = `${options?.prompt || TTS_PROMPT}: ${prompt}`);
1051
- prompt = await trimText(prompt, M.maxInputTokens);
1001
+ prompt = trimText(prompt, { limit: M.maxInputTokens });
1052
1002
  if (M?.image) {
1053
1003
  var resp = await client.models.generateImages({
1054
1004
  model: M.name, prompt, config: mergeAtoB(options?.config, {
@@ -1166,9 +1116,9 @@ const promptOpenAI = async (aiId, prompt, options = {}) => {
1166
1116
  if (M?.audio) {
1167
1117
  assertPrompt(prompt);
1168
1118
  const ins_prompt = options?.prompt || `${TTS_PROMPT}.`;
1169
- prompt = await trimText(prompt, M.maxInputTokens - await countTokens(
1170
- ins_prompt, { fast: true }
1171
- ));
1119
+ prompt = trimText(prompt, {
1120
+ limit: M.maxInputTokens - countTokens(ins_prompt),
1121
+ });
1172
1122
  // https://platform.openai.com/docs/api-reference/audio/createSpeech
1173
1123
  var resp = await client.audio.speech.create({
1174
1124
  model: M.name, voice: DEFAULT_MODELS[OPENAI_VOICE],
@@ -1224,9 +1174,9 @@ const initChat = async (options = {}) => {
1224
1174
  } else { log(`WARNING: Sessions persistence is not enabled.`); }
1225
1175
  options.instructions && (chatConfig.systemPrompt = options.instructions);
1226
1176
  // Use Gemini instead of ChatGPT because of the longer package.
1227
- const [spTokens, ais] = await Promise.all([countTokens([await buildMessage(
1228
- chatConfig.systemPrompt, system
1229
- )]), getAi(null, { all: true })]);
1177
+ const [spTokens, ais] = [
1178
+ countTokens(chatConfig.systemPrompt), await getAi(null, { all: true })
1179
+ ];
1230
1180
  for (const ai of ais.filter(x => ![
1231
1181
  IMAGEN_4_ULTRA, VEO_31, GPT_4O_TRANSCRIBE,
1232
1182
  ].includes(x.model.name))) {
@@ -1267,21 +1217,110 @@ const resetSession = async (sessionId, options) => {
1267
1217
  return await setSession(sessionId, session);
1268
1218
  };
1269
1219
 
1270
- const talk = async (input, options = {}) => {
1271
- let [chat, sessionId] =
1272
- [{ request: input }, options.sessionId || newSessionId()];
1220
+ const collectAttachments = async (options = {}) => {
1221
+ const ais = await getAi(null, { all: true });
1222
+ options.attachments = [];
1223
+ assert(options.aiId.length, 'AI ID(s) is required.');
1224
+ options.collected?.filter?.(x => x.type === ATTACHMENT)?.map?.(x => {
1225
+ let notSupported = false;
1226
+ options.aiId.map(y => {
1227
+ const ai = ais.find(z => z.id === y);
1228
+ if (!ai.model.supportedMimeTypes.includes(x.content?.mime_type)) {
1229
+ notSupported = true;
1230
+ }
1231
+ });
1232
+ notSupported || options.attachments.push(x.content);
1233
+ });
1234
+ return options.attachments;
1235
+ };
1236
+
1237
+ const selectAi = async (options = {}) => {
1238
+ options.aiId = ensureArray(options?.aiId).filter(x => x);
1239
+ const ais = await getAi(null, { all: true });
1240
+ if (options.aiId.includes('@')) { // Use top AIs
1241
+ options.aiId = ais.slice(0, TOP_LIMIT).map(x => x.id);
1242
+ } else if (options.collected?.length) { // Select by attachments
1243
+ const supported = {};
1244
+ for (const x of ais) {
1245
+ for (const i of options.collected) {
1246
+ supported[x.id] = (supported[x.id] || 0)
1247
+ // Priority for supported mime types
1248
+ + ~~x.model.supportedMimeTypes.includes(i?.content?.mime_type)
1249
+ // Priority for user selected AI
1250
+ + ~~options.aiId.includes(x.id);
1251
+ }
1252
+ }
1253
+ options.aiId = [Object.keys(supported).sort(
1254
+ (x, y) => supported[y] - supported[x]
1255
+ )?.[0] || ais[0].id];
1256
+ } else { // Select by preference
1257
+ options.aiId = options.aiId.filter(x => ais.find(y => y.id === x));
1258
+ }
1259
+ options.aiId.length || (options.aiId = ais[0].id);
1260
+ return options.aiId;
1261
+ };
1262
+
1263
+ const talk = async (request, options = {}) => {
1264
+ const SOUND_ICON = '🔊';
1265
+ let [sessionId, msgs] = [options.sessionId || newSessionId(), {}];
1266
+ await selectAi(options);
1267
+ await collectAttachments(options);
1268
+ request = joinL2([ensureString(request), ...(options.collected || []).filter(
1269
+ x => x.type !== ATTACHMENT && String.isString(x.content)
1270
+ ).map(x => x.content)]);
1273
1271
  const session = await getSession(sessionId, options);
1274
- const resp = await prompt(input, {
1275
- messages: session.messages, log: true, ...options,
1272
+ const stream = options.stream;
1273
+ const packMsg = (opts) => ({
1274
+ text: Object.values(msgs).find(x => x.text) ? joinL2(options.aiId.map(n => {
1275
+ if (msgs[n]?.ignored) { return null };
1276
+ const ai = ais.find(x => x.id === n);
1277
+ let txt = trim(msgs[n]?.text || '');
1278
+ const haveText = !!txt;
1279
+ return trim(joinL1([`${ai.icon} ${ai.name}:`, txt || EMOJI_THINKING]))
1280
+ + (opts?.processing && haveText ? CURSOR : '');
1281
+ })) : EMOJI_THINKING,
1282
+ spoken: renderText(Object.values(msgs)[0]?.text || '', {
1283
+ noCode: true, noLink: true,
1284
+ }).replace(/\[\^\d\^\]/ig, ''),
1285
+ audios: Object.values(msgs).map(x => x.audio && caption(x.audio, SOUND_ICON, x.model)).filter(x => x),
1286
+ images: Object.values(msgs).map(x => (x.images || []).map(y => caption(y, '🎨', x.model))).flat(),
1287
+ videos: Object.values(msgs).map(x => (x.videos || []).map(y => caption(y, '🎬', x.model))).flat(),
1288
+ annotations: Object.values(msgs).map(x => x.annotations || []).flat(),
1289
+ models: Object.values(msgs).map(n => n.model),
1276
1290
  });
1277
- chat.response = resp.text;
1278
- chat.request && chat.response && session.messages.push(chat);
1279
- await setSession(sessionId, session, options);
1280
- return {
1281
- sessionId, ...resp, spoken: renderText(
1282
- resp.text, { noCode: true, noLink: true }
1283
- ).replace(/\[\^\d\^\]/ig, ''),
1291
+ const multiStream = async (ai, r, opts) => {
1292
+ ai && r && (msgs[ai] = r);
1293
+ stream && await stream(packMsg(opts));
1284
1294
  };
1295
+ await multiStream(null, null, PROCESSING);
1296
+ await Promise.all(options.aiId.map(async ai => {
1297
+ try {
1298
+ return await prompt(request, {
1299
+ log: true, messages: session.messages, ...options, aiId: ai,
1300
+ stream: async r => await multiStream(ai, r, PROCESSING),
1301
+ });
1302
+ } catch (e) {
1303
+ msgs[ai] = {
1304
+ ...msgs[ai] || {}, text: `⚠️ ${e?.message || e}`, spoken: null,
1305
+ };
1306
+ log(e);
1307
+ }
1308
+ }));
1309
+ const response = joinL2(Object.values(msgs).map(x => x.text));
1310
+ const chat = { request, response };
1311
+ request && response && session.messages.push(chat);
1312
+ await setSession(sessionId, session, options);
1313
+ if ((options?.tts || session?.config?.tts)
1314
+ && Object.values(msgs).find(x => !x.audio?.length)) {
1315
+ await ignoreErrFunc(async () => {
1316
+ const ttsAi = await getAi(null, { select: { audio: true, fast: true } });
1317
+ await multiStream(ttsAi.id, {
1318
+ ...await tts(response, { aiId: ttsAi.id, raw: true }),
1319
+ text: SOUND_ICON, hidden: true,
1320
+ }, { processing: true });
1321
+ }, LOG);
1322
+ }
1323
+ return { sessionId, ...chat, ...packMsg({ processing: false }) };
1285
1324
  };
1286
1325
 
1287
1326
  const getChatPromptLimit = async (options) => {
@@ -1328,9 +1367,9 @@ const distillFile = async (attachments, o) => {
1328
1367
 
1329
1368
  const tts = async (content, options = {}) => {
1330
1369
  const resp = await prompt(
1331
- content, { select: { audio: true, fast: true }, ...options }
1370
+ content, { select: { audio: true, fast: true }, ...options, raw: false }
1332
1371
  );
1333
- return options.raw ? resp.audio : resp.audio.data;
1372
+ return options.raw ? resp : resp?.audio?.data;
1334
1373
  };
1335
1374
 
1336
1375
  const stt = async (audio, options = {}) => await distillFile(
@@ -1351,12 +1390,42 @@ const prompt = async (input, options = {}) => {
1351
1390
  return resp;
1352
1391
  };
1353
1392
 
1354
- const trimPrompt = async (getPrompt, trimFunc, contextWindow, options) => {
1355
- let [i, maxTry] = [0, ~~options?.maxTry || k(128)];
1356
- while ((await countTokens(await getPrompt(), { fast: true }) > contextWindow)
1357
- || (await countTokens(await getPrompt()) > contextWindow)) {
1358
- await trimFunc();
1359
- if (++i >= maxTry) { break; }
1393
+ const trimPrompt = (prompt, maxInputTokens, options = {}) => {
1394
+ // initialize
1395
+ let lastCheck = null;
1396
+ prompt = ensureString(prompt, { trim: true });
1397
+ assert((maxInputTokens = ~~maxInputTokens) > 300, 'Invalid maxInputTokens.');
1398
+ // system prompt // keep at least 30 tokens for prompt
1399
+ options.systemPrompt = options.systemPrompt ?? INSTRUCTIONS;
1400
+ maxInputTokens = maxInputTokens - countTokens(options.systemPrompt);
1401
+ assert(maxInputTokens >= 30, 'System prompt is too long.');
1402
+ // tools result
1403
+ options.toolsResult = options.toolsResult ?? [];
1404
+ while (maxInputTokens - (
1405
+ lastCheck = countTokens(options.toolsResult)
1406
+ ) < 0) { options.toolsResult = []; }
1407
+ maxInputTokens -= lastCheck;
1408
+ // attachments
1409
+ options.attachments = options.attachments ?? [];
1410
+ options.attachmentTokenCost = ~~(
1411
+ options?.attachmentTokenCost ?? ATTACHMENT_TOKEN_COST
1412
+ );
1413
+ while (maxInputTokens - (
1414
+ lastCheck = options.attachments.length * options.attachmentTokenCost
1415
+ ) < 0) { options.attachments.pop(); }
1416
+ maxInputTokens -= lastCheck;
1417
+ // prompt
1418
+ prompt = trimText(prompt, { ...options, limit: maxInputTokens });
1419
+ maxInputTokens -= countTokens(prompt);
1420
+ // history
1421
+ options.messages = options.messages ?? [];
1422
+ while (maxInputTokens - (lastCheck = countTokens(options.messages.map(
1423
+ x => ({ request: x.request, response: x.response })
1424
+ ))) < 0) { options.messages.shift(); }
1425
+ // return
1426
+ return {
1427
+ systemPrompt: options.systemPrompt, prompt, messages: options.messages,
1428
+ attachments: options.attachments, toolsResult: options.toolsResult,
1360
1429
  };
1361
1430
  };
1362
1431
 
@@ -1387,14 +1456,14 @@ const analyzeSessions = async (sessionIds, options) => {
1387
1456
  + 'conversation data that needs to be organized: \n\n');
1388
1457
  const getInput = () =>
1389
1458
  `${pmt}\`\`\`JSON\n${JSON.stringify(sses)}\n\`\`\``;
1390
- await trimPrompt(getInput, () => {
1459
+ while (countTokens(getInput()) > ai.model.maxInputTokens) {
1391
1460
  if (!Object.values(sses).sort((x, y) =>
1392
1461
  y.messages.length - x.messages.length)[0].messages.shift()) {
1393
1462
  delete sses[Object.keys(sses).map(x => [
1394
1463
  x, JSON.stringify(sses[x]).length,
1395
1464
  ]).sort((x, y) => y[1] - x[1])[0][0]];
1396
1465
  }
1397
- }, await getChatPromptLimit({ aiId: ai.id, ...options, }));
1466
+ }
1398
1467
  const aiResp = Object.keys(sses) ? (await prompt(getInput(), {
1399
1468
  aiId: ai.id, ...options || {}
1400
1469
  })) : {};
@@ -1403,20 +1472,17 @@ const analyzeSessions = async (sessionIds, options) => {
1403
1472
  return Array.isArray(sessionIds) ? resp : resp[sessionIds[0]];
1404
1473
  };
1405
1474
 
1406
- const trimText = async (text, limit = Infinity) => {
1475
+ const trimText = (text, options = {}) => {
1407
1476
  text = ensureString(text, { trim: true });
1408
- let trimmed = false;
1409
- let lastCheck = null;
1410
- limit = Math.max(limit, 0);
1411
- while ((lastCheck = await countTokens(
1412
- buildTextWithEllipsis(text, trimmed), { fast: true }
1413
- )) > limit) {
1414
- text = text.split(' ').slice(
1415
- 0, -Math.ceil((Math.abs(lastCheck - limit) / 10))
1416
- ).join(' ').trimEnd();
1477
+ const limit = ensureInt(options.limit || MAX_TOKENS, { min: 0, max: MAX_TOKENS });
1478
+ let [trimmed, lastCheck] = [false, null];
1479
+ while ((lastCheck = countTokens(text + (trimmed ? ELLIPSIS : ''))) > limit) {
1480
+ text = options.trimBeginning ? trimBeginning(text.slice(1))
1481
+ : trimTailing(text.slice(0, -1));
1417
1482
  trimmed = true;
1418
1483
  }
1419
- return buildTextWithEllipsis(text, trimmed);
1484
+ return (trimmed && options.trimBeginning ? ELLIPSIS : '')
1485
+ + text + (trimmed && !options.trimBeginning ? ELLIPSIS : '');
1420
1486
  };
1421
1487
 
1422
1488
  export default init;