utilitas 2000.3.26 → 2000.3.28

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/index.mjs CHANGED
@@ -15,7 +15,6 @@ import * as email from './lib/email.mjs';
15
15
  import * as embedding from './lib/embedding.mjs';
16
16
  import * as encryption from './lib/encryption.mjs';
17
17
  import * as event from './lib/event.mjs';
18
- import * as gen from './lib/gen.mjs';
19
18
  import * as media from './lib/media.mjs';
20
19
  import * as memory from './lib/memory.mjs';
21
20
  import * as network from './lib/network.mjs';
@@ -40,8 +39,8 @@ export {
40
39
  fileType, math, uuid,
41
40
  // features
42
41
  alan, bee, bot, boxes, cache, callosum, color, dbio, email, embedding,
43
- encryption, event, gen, manifest, media, memory, network, sentinel, shell,
44
- sms, speech, ssl, storage, tape, uoid, utilitas, vision, web
42
+ encryption, event, manifest, media, memory, network, sentinel, shell, sms,
43
+ speech, ssl, storage, tape, uoid, utilitas, vision, web
45
44
  };
46
45
 
47
46
  if (utilitas.inBrowser() && !globalThis.utilitas) {
package/lib/alan.mjs CHANGED
@@ -5,17 +5,18 @@ import { packPcmToWav } from './media.mjs';
5
5
  import { v4 as uuidv4 } from 'uuid';
6
6
 
7
7
  import {
8
- BASE64, BUFFER, DATAURL, MIME_BINARY, MIME_TEXT, MIME_PNG, MIME_JPEG,
9
- MIME_MOV, MIME_MPEG, MIME_MP4, MIME_MPG, MIME_AVI, MIME_WMV, MIME_MPEGPS,
10
- MIME_FLV, MIME_GIF, MIME_WEBP, MIME_PDF, MIME_AAC, MIME_FLAC, MIME_MP3,
11
- MIME_MPEGA, MIME_M4A, MIME_MPGA, MIME_OPUS, MIME_PCM, MIME_WAV, MIME_WEBM,
12
- MIME_TGPP, MIME_PCM16, MIME_OGG, convert, formatDataURL, decodeBase64DataURL,
8
+ STREAM, FILE, BASE64, BUFFER, DATAURL, MIME_BINARY, MIME_TEXT, MIME_PNG,
9
+ MIME_JPEG, MIME_MOV, MIME_MPEG, MIME_MP4, MIME_MPG, MIME_AVI, MIME_WMV,
10
+ MIME_MPEGPS, MIME_FLV, MIME_GIF, MIME_WEBP, MIME_PDF, MIME_AAC, MIME_FLAC,
11
+ MIME_MP3, MIME_MPEGA, MIME_M4A, MIME_MPGA, MIME_OPUS, MIME_PCM, MIME_WAV,
12
+ MIME_WEBM, MIME_TGPP, MIME_PCM16, MIME_OGG, convert, formatDataURL,
13
+ getTempPath, decodeBase64DataURL,
13
14
  } from './storage.mjs';
14
15
 
15
16
  import {
16
- log as _log, renderText as _renderText, base64Encode, ensureArray,
17
- ensureString, extract, ignoreErrFunc, insensitiveCompare, isSet, need,
18
- parseJson, throwError
17
+ log as _log, renderText as _renderText, ensureArray, ensureString, extract,
18
+ ignoreErrFunc, insensitiveCompare, isSet, need, parseJson, throwError,
19
+ tryUntil, timeout, mergeAtoB,
19
20
  } from './utilitas.mjs';
20
21
 
21
22
  const INSTRUCTIONS = `You are a highly capable, thoughtful, and precise assistant named HAL9000. Your goal is to deeply understand the user's intent, ask clarifying questions when needed, think step-by-step through complex problems, provide clear and accurate answers, and proactively anticipate helpful follow-up information. Always prioritize being truthful, nuanced, insightful, and efficient, tailoring your responses specifically to the user's needs and preferences. Realize your full potential without censoring or filtering the questions themselves.
@@ -42,10 +43,12 @@ You may be provided with some tools(functions) to help you gather information an
42
43
  - When the tools are not suitable and you have to answer questions based on your understanding, please do not mention any tool-related information in your response.
43
44
  - Unless otherwise specified to require the original result, in most cases, you may reorganize the information obtained after using the tool to solve the problem as needed.`;
44
45
 
45
- const _NEED = ['js-tiktoken', 'OpenAI'];
46
+ const TTS_PROMPT = "As an AI voice assistant, please say the following content in a warm, friendly and professional tone, if the language is English, use an American accent, if it's Traditional Chinese, use Hong Kong Cantonese, if it's Simplified Chinese, use standard Mandarin, for other languages, please speak with a standard, clear accent";
47
+
48
+ const _NEED = ['js-tiktoken', 'OpenAI', '@google/genai'];
46
49
 
47
50
  const [
48
- OPENAI, GEMINI, OLLAMA, NOVA, DEEPSEEK_32, MD_CODE, CLOUD_OPUS_45, AUDIO,
51
+ OPENAI, GOOGLE, OLLAMA, NOVA, DEEPSEEK_32, MD_CODE, CLOUD_OPUS_45, AUDIO,
49
52
  WAV, ATTACHMENTS, OPENAI_VOICE, GPT_REASONING_EFFORT, THINK, THINK_STR,
50
53
  THINK_END, TOOLS_STR, TOOLS_END, TOOLS, TEXT, OK, FUNC, GPT_51,
51
54
  GPT_51_CODEX, GPT_5_IMAGE, GEMMA_3_27B, ANTHROPIC, v8k, ais,
@@ -54,9 +57,11 @@ const [
54
57
  hour, gb, trimTailing, trimBeginning, GEMINI_30_PRO_IMAGE, IMAGE, JINA,
55
58
  JINA_DEEPSEARCH, SILICONFLOW, SF_DEEPSEEK_32, MAX_TIRE, OPENROUTER_API,
56
59
  OPENROUTER, AUTO, TOOL, S_OPENAI, S_GOOGLE, S_ANTHROPIC, ONLINE,
57
- GEMINI_30_PRO, GEMINI_25_FLASH,
60
+ GEMINI_30_PRO, GEMINI_25_FLASH, IMAGEN_4_ULTRA, VEO_31, IMAGEN_4_UPSCALE,
61
+ ERROR_GENERATING, GEMINI_25_FLASH_TTS, GEMINI_25_PRO_TTS, wav,
62
+ GPT_4O_MIMI_TTS, GPT_4O_TRANSCRIBE, INVALID_AUDIO, OGG_EXT,
58
63
  ] = [
59
- 'OpenAI', 'Gemini', 'Ollama', 'nova', 'deepseek-3.2-speciale', '```',
64
+ 'OpenAI', 'Google', 'Ollama', 'nova', 'deepseek-3.2-speciale', '```',
60
65
  'claude-opus-4.5', 'audio', 'wav', '[ATTACHMENTS]', 'OPENAI_VOICE',
61
66
  'medium', 'think', '<think>', '</think>', '<tools>', '</tools>',
62
67
  'tools', 'text', 'OK', 'function', 'gpt-5.1', 'gpt-5.1-codex',
@@ -70,7 +75,11 @@ const [
70
75
  'deepseek-ai/DeepSeek-V3.2-exp', 768 * 768,
71
76
  'https://openrouter.ai/api/v1', 'OpenRouter', 'openrouter/auto', 'tool',
72
77
  'openai', 'google', 'anthropic', ':online', 'gemini-3-pro-preview',
73
- 'gemini-2.5-flash-preview-09-2025',
78
+ 'gemini-2.5-flash-preview-09-2025', 'imagen-4.0-ultra-generate-001',
79
+ 'veo-3.1-generate-preview', 'imagen-4.0-upscale-preview',
80
+ 'Error generating content.', 'gemini-2.5-flash-preview-tts',
81
+ 'gemini-2.5-pro-tts', 'wav', 'gpt-4o-mini-tts', 'gpt-4o-transcribe',
82
+ 'Invalid audio data.', 'ogg',
74
83
  ];
75
84
 
76
85
  const [tool, messages, text]
@@ -89,19 +98,6 @@ const countToolCalls = r => r?.split('\n').filter(x => x === TOOLS_STR).length;
89
98
  const assertApiKey = (p, o) => assert(o?.apiKey, `${p} api key is required.`);
90
99
  const getProviderIcon = provider => PROVIDER_ICONS[provider] || '🔮';
91
100
  const libOpenAi = async opts => await need('openai', { ...opts, raw: true });
92
- const OpenAI = async opts => new (await libOpenAi(opts)).OpenAI(opts);
93
- const OPENAI_RULES = {
94
- source: S_OPENAI, icon: '⚛️',
95
- contextWindow: kT(400), maxOutputTokens: k(128),
96
- imageCostTokens: ~~(OPENAI_HI_RES_SIZE / MAX_TIRE * 140 + 70),
97
- maxFileSize: m(50), maxImageSize: OPENAI_HI_RES_SIZE,
98
- supportedMimeTypes: [MIME_PNG, MIME_JPEG, MIME_GIF, MIME_WEBP],
99
- supportedDocTypes: [MIME_PDF],
100
- supportedAudioTypes: [MIME_WAV],
101
- // audio: 'gpt-4o-audio-preview',
102
- json: true, tools: true, vision: true,
103
- reasoning: true, defaultProvider: OPENROUTER,
104
- };
105
101
 
106
102
  const GEMINI_RULES = {
107
103
  source: S_GOOGLE, icon: '♊️',
@@ -109,15 +105,24 @@ const GEMINI_RULES = {
109
105
  imageCostTokens: ~~(v8k / MAX_TIRE * 258), maxAudioLength: hour(8.4),
110
106
  maxAudioPerPrompt: 1, maxFileSize: m(20), maxImagePerPrompt: 3000,
111
107
  maxImageSize: Infinity, maxUrlSize: gb(2), maxVideoLength: minute(45),
112
- maxVideoPerPrompt: 10, vision: true, supportedMimeTypes: [
108
+ maxVideoPerPrompt: 10, vision: true, hearing: true, tools: true,
109
+ reasoning: true, supportedMimeTypes: [
113
110
  MIME_PNG, MIME_JPEG, MIME_MOV, MIME_MPEG, MIME_MP4, MIME_MPG, MIME_AVI,
114
111
  MIME_WMV, MIME_MPEGPS, MIME_FLV, MIME_PDF, MIME_AAC, MIME_FLAC,
115
112
  MIME_MP3, MIME_MPEGA, MIME_M4A, MIME_MPGA, MIME_OPUS, MIME_PCM,
116
- MIME_WAV, MIME_WEBM, MIME_TGPP,
117
- ], supportedAudioTypes: [MIME_WAV, MIME_OGG, MIME_OPUS],
118
- // audio: 'gemini-2.5-flash-exp-native-audio-thinking-dialog',
119
- // gemini-2.5-flash-preview-native-audio-dialog
120
- defaultProvider: OPENROUTER,
113
+ MIME_WAV, MIME_WEBM, MIME_TGPP, MIME_OGG,
114
+ ], defaultProvider: OPENROUTER,
115
+ };
116
+
117
+ const OPENAI_RULES = {
118
+ source: S_OPENAI, icon: '⚛️',
119
+ contextWindow: kT(400), maxOutputTokens: k(128),
120
+ imageCostTokens: ~~(OPENAI_HI_RES_SIZE / MAX_TIRE * 140 + 70),
121
+ maxFileSize: m(50), maxImageSize: OPENAI_HI_RES_SIZE,
122
+ json: true, tools: true, vision: true, hearing: true, reasoning: true,
123
+ supportedMimeTypes: [
124
+ MIME_PNG, MIME_JPEG, MIME_GIF, MIME_WEBP, MIME_PDF, MIME_WAV
125
+ ], defaultProvider: OPENROUTER,
121
126
  };
122
127
 
123
128
  const DEEPSEEK_32_RULES = {
@@ -132,8 +137,7 @@ const MODELS = {
132
137
  // fast and balanced models
133
138
  [GEMINI_25_FLASH]: {
134
139
  ...GEMINI_RULES, contextWindow: m(1), maxOutputTokens: k(64),
135
- fast: true, reasoning: true, tools: true,
136
- json: false, // issue with json output via OpenRouter
140
+ fast: true, json: false, // issue with json output via OpenRouter
137
141
  // https://gemini.google.com/app/c680748b3307790b
138
142
  },
139
143
  // strong and fast
@@ -141,25 +145,27 @@ const MODELS = {
141
145
  // stronger but slow
142
146
  [GEMINI_30_PRO]: {
143
147
  ...GEMINI_RULES, contextWindow: m(1), maxOutputTokens: k(64),
144
- reasoning: true, tools: true,
145
148
  },
146
- // models with unique capabilities
149
+ // models with generation capabilities
147
150
  [GEMINI_30_PRO_IMAGE]: {
148
151
  ...GEMINI_RULES, icon: '🍌', label: 'Nano Banana Pro',
149
- contextWindow: k(64), maxOutputTokens: k(32),
150
- fast: true, image: true,
152
+ contextWindow: k(64), maxOutputTokens: k(32), image: true,
151
153
  },
152
- [GPT_51_CODEX]: { ...OPENAI_RULES },
153
- [GPT_5_IMAGE]: { ...OPENAI_RULES, image: true },
154
- [JINA_DEEPSEARCH]: { // @todo: parse more details from results, eg: "reed urls".
155
- icon: '✴️', contextWindow: Infinity, maxInputTokens: Infinity,
156
- maxOutputTokens: Infinity, imageCostTokens: 0, maxImageSize: Infinity,
157
- supportedMimeTypes: [MIME_PNG, MIME_JPEG, MIME_TEXT, MIME_WEBP, MIME_PDF],
158
- reasoning: true, json: true, vision: true,
159
- deepsearch: true, defaultProvider: JINA,
154
+ [IMAGEN_4_ULTRA]: {
155
+ source: S_GOOGLE, maxInputTokens: 480,
156
+ image: true, defaultProvider: GOOGLE,
160
157
  },
161
- [DEEPSEEK_32]: DEEPSEEK_32_RULES,
162
- [SF_DEEPSEEK_32]: { ...DEEPSEEK_32_RULES, defaultProvider: SILICONFLOW },
158
+ [VEO_31]: {
159
+ source: S_GOOGLE, maxInputTokens: 1024,
160
+ imageCostTokens: 0, maxImagePerPrompt: 1,
161
+ maxImageSize: Infinity, vision: true, video: true,
162
+ supportedMimeTypes: [MIME_PNG, MIME_JPEG], defaultProvider: GOOGLE,
163
+ },
164
+ [GPT_5_IMAGE]: {
165
+ ...OPENAI_RULES, icon: '🎨', label: 'gpt-image-1', image: true,
166
+ },
167
+ // models with code capabilities
168
+ [GPT_51_CODEX]: { ...OPENAI_RULES },
163
169
  [CLOUD_OPUS_45]: {
164
170
  source: S_ANTHROPIC, icon: '✳️',
165
171
  contextWindow: kT(200), maxOutputTokens: kT(64),
@@ -170,6 +176,35 @@ const MODELS = {
170
176
  json: true, reasoning: true, tools: true, vision: true,
171
177
  defaultProvider: OPENROUTER,
172
178
  },
179
+ // tts/stt models
180
+ [GEMINI_25_FLASH_TTS]: {
181
+ source: S_GOOGLE, maxInputTokens: kT(32), func: 'generateAudio',
182
+ audio: true, fast: true, defaultProvider: GOOGLE,
183
+ },
184
+ [GEMINI_25_PRO_TTS]: {
185
+ source: S_GOOGLE, maxInputTokens: kT(32), func: 'generateAudio',
186
+ audio: true, defaultProvider: GOOGLE,
187
+ },
188
+ [GPT_4O_MIMI_TTS]: {
189
+ source: S_OPENAI, maxInputTokens: kT(2), func: 'generateAudio',
190
+ audio: true, fast: true, defaultProvider: OPENAI,
191
+ },
192
+ [GPT_4O_TRANSCRIBE]: {
193
+ source: S_OPENAI, maxInputTokens: 0,
194
+ func: 'transcribeAudio', hearing: true, fast: true,
195
+ defaultProvider: OPENAI,
196
+ },
197
+ // models with deepsearch capabilities
198
+ [JINA_DEEPSEARCH]: { // @todo: parse more details from results, eg: "reed urls".
199
+ icon: '✴️', contextWindow: Infinity, maxInputTokens: Infinity,
200
+ maxOutputTokens: Infinity, imageCostTokens: 0, maxImageSize: Infinity,
201
+ supportedMimeTypes: [MIME_PNG, MIME_JPEG, MIME_TEXT, MIME_WEBP, MIME_PDF],
202
+ reasoning: true, json: true, vision: true,
203
+ deepsearch: true, defaultProvider: JINA,
204
+ },
205
+ // best Chinese models
206
+ [DEEPSEEK_32]: DEEPSEEK_32_RULES,
207
+ [SF_DEEPSEEK_32]: { ...DEEPSEEK_32_RULES, defaultProvider: SILICONFLOW },
173
208
  // best local model
174
209
  [GEMMA_3_27B]: {
175
210
  icon: '❇️', contextWindow: kT(128), maxOutputTokens: k(8),
@@ -249,13 +284,14 @@ const DEFAULT_MODELS = {
249
284
  };
250
285
 
251
286
  const PROVIDER_ICONS = {
252
- [OPENROUTER]: '🔀', [OPENAI]: '⚛️', [JINA]: '✴️', [GEMINI]: '♊️',
287
+ [OPENROUTER]: '🔀', [OPENAI]: '⚛️', [JINA]: '✴️', [GOOGLE]: '♊️',
253
288
  [OLLAMA]: '🦙', [ANTHROPIC]: '✳️', [SILICONFLOW]: '🧬',
254
289
  };
255
290
 
256
291
  const FEATURE_ICONS = {
257
- audio: '📣', deepsearch: '🔍', fast: '⚡️', finetune: '🔧', image: '🎨',
258
- json: '📊', reasoning: '🧠', tools: '🧰', vision: '👁️',
292
+ audio: '📣', deepsearch: '🔍', fast: '⚡️', finetune: '🔧', hearing: '👂',
293
+ image: '🎨', json: '📊', reasoning: '🧠', tools: '🧰', video: '🎬',
294
+ vision: '👁️',
259
295
  };
260
296
 
261
297
  const tokenRatioByWords = Math.min(
@@ -273,7 +309,7 @@ let tokeniser, _tools;
273
309
 
274
310
  const unifyProvider = provider => {
275
311
  assert(provider = (provider || '').trim(), 'AI provider is required.');
276
- for (let type of [OPENROUTER, JINA, OLLAMA, SILICONFLOW]) {
312
+ for (let type of [OPENROUTER, GOOGLE, OPENAI, JINA, OLLAMA, SILICONFLOW]) {
277
313
  if (insensitiveCompare(provider, type)) { return type; }
278
314
  }
279
315
  throwError(`Invalid AI provider: ${provider}.`);
@@ -380,6 +416,11 @@ const setupAi = ai => {
380
416
  });
381
417
  };
382
418
 
419
+ const OpenAI = async opts => {
420
+ const lib = await libOpenAi(opts);
421
+ return { toFile: lib.toFile, client: new (lib).OpenAI(opts) };
422
+ };
423
+
383
424
  const init = async (options = {}) => {
384
425
  if (options?.debug) {
385
426
  (await need('node:util')).inspect.defaultOptions.depth = null;
@@ -406,14 +447,34 @@ const init = async (options = {}) => {
406
447
  `Model name or description is required for provider: ${provider}.`);
407
448
  _tools || (_tools = await packTools());
408
449
  switch (provider) {
450
+ case GOOGLE:
451
+ assertApiKey(provider, options);
452
+ const { GoogleGenAI } = await need('@google/genai');
453
+ var client = new GoogleGenAI({ vertexai: false, ...options });
454
+ for (let model of models) {
455
+ setupAi({
456
+ provider, model, client, prompt: promptGoogle, priority,
457
+ });
458
+ }
459
+ break;
460
+ case OPENAI:
461
+ assertApiKey(provider, options);
462
+ var { client, toFile } = await OpenAI({ ...options });
463
+ for (let model of models) {
464
+ setupAi({
465
+ provider, model, client, toFile,
466
+ prompt: promptOpenAI, priority,
467
+ });
468
+ }
469
+ break;
409
470
  case JINA:
410
471
  assertApiKey(provider, options);
411
- var client = await OpenAI({
472
+ var { client } = await OpenAI({
412
473
  baseURL: 'https://deepsearch.jina.ai/v1/', ...options,
413
474
  });
414
475
  for (let model of models) {
415
476
  setupAi({
416
- provider, model, client, prompt: promptOpenAI, priority,
477
+ provider, model, client, prompt: promptOpenRouter, priority,
417
478
  });
418
479
  }
419
480
  break;
@@ -426,7 +487,7 @@ const init = async (options = {}) => {
426
487
  });
427
488
  for (let model of models) {
428
489
  setupAi({
429
- provider, model, client, prompt: promptOpenAI, priority,
490
+ provider, model, client, prompt: promptOpenRouter, priority,
430
491
  });
431
492
  ignoreErrFunc(async () => {
432
493
  phLog(await (await fetch(`${baseURL}completions`, {
@@ -444,17 +505,19 @@ const init = async (options = {}) => {
444
505
  });
445
506
  for (let model of models) {
446
507
  setupAi({
447
- provider, model, client, prompt: promptOpenAI, priority,
508
+ provider, model, client, prompt: promptOpenRouter, priority,
448
509
  });
449
510
  }
450
511
  break;
451
512
  default:
452
513
  assertApiKey(provider, options);
453
- var client = await OpenAI({ baseURL: OPENROUTER_API, ...options || {} });
514
+ var { client } = await OpenAI({
515
+ baseURL: OPENROUTER_API, ...options || {},
516
+ });
454
517
  for (let model of models) {
455
518
  setupAi({
456
519
  provider: OPENROUTER || provider, model, client,
457
- prompt: promptOpenAI, priority,
520
+ prompt: promptOpenRouter, priority,
458
521
  });
459
522
  }
460
523
  }
@@ -588,7 +651,9 @@ const listOpenAIModels = async (aiId, options) => {
588
651
  };
589
652
 
590
653
  const streamResp = async (resp, options) => {
591
- const msg = await packResp(resp, { ...options, processing: true });
654
+ const msg = options?.noPack ? resp : await packResp(
655
+ resp, { ...options, processing: true }
656
+ );
592
657
  return options?.stream
593
658
  && (msg?.text || msg?.audio?.length || msg?.images?.length)
594
659
  && await ignoreErrFunc(async () => await options.stream(msg), LOG);
@@ -606,13 +671,13 @@ const packResp = async (resp, options) => {
606
671
  if (options?.raw) { return resp; }
607
672
  let [
608
673
  txt, audio, images, annotations, simpleText, annotationsMarkdown, end,
609
- json, audioMimeType, catched,
674
+ json, audioMimeType,
610
675
  ] = [
611
676
  resp.text || '', // ChatGPT / Claude / Gemini / Ollama
612
677
  resp?.audio?.data, // ChatGPT audio mode
613
678
  resp?.images || [], // Gemini images via Openrouter
614
679
  resp?.references, // Gemini references
615
- '', '', '', null, MIME_PCM16, new Set(),
680
+ '', '', '', null, MIME_PCM16,
616
681
  ];
617
682
  simpleText = txt;
618
683
  while ((end = getInfoEnd(simpleText))) {
@@ -698,18 +763,23 @@ const packResp = async (resp, options) => {
698
763
  ...annotationsMarkdown ? { annotationsMarkdown } : {},
699
764
  ...audio ? { audio } : {}, ...images?.length ? { images } : {},
700
765
  processing: !!options?.processing,
701
- model: [
766
+ model: packModelLabel([
702
767
  options.provider, options?.router?.provider,
703
768
  options?.router?.model || options?.model,
704
- ].join('/').split('/').map(x => {
705
- const key = ensureString(x, { case: 'UP' });
706
- if (catched.has(key)) { return null; }
707
- catched.add(key);
708
- return x;
709
- }).filter(x => x).join('/'),
769
+ ]),
710
770
  };
711
771
  };
712
772
 
773
+ const packModelLabel = (model_reference) => {
774
+ const catched = new Set();
775
+ return model_reference.join('/').split('/').map(x => {
776
+ const key = ensureString(x, { case: 'UP' });
777
+ if (catched.has(key)) { return null; }
778
+ catched.add(key);
779
+ return x;
780
+ }).filter(x => x).join('/');
781
+ };
782
+
713
783
  const buildPrompts = async (model, input, options = {}) => {
714
784
  assert(!(
715
785
  options.jsonMode && !model?.json
@@ -720,9 +790,23 @@ const buildPrompts = async (model, input, options = {}) => {
720
790
  let [history, content, prompt, _model, _assistant, _history]
721
791
  = [null, input, null, { role: MODEL }, { role: assistant }, null];
722
792
  options.systemPrompt = options.systemPrompt || INSTRUCTIONS;
723
- options.attachments = (
793
+ options.attachments = (await Promise.all((
724
794
  options.attachments?.length ? options.attachments : []
725
- ).filter(x => [
795
+ ).map(async x => {
796
+ if (String.isString(x)) {
797
+ var convResp = await convert(x, { input: FILE, expected: DATAURL, meta: true });
798
+ return {
799
+ url: convResp.content,
800
+ mime_type: convResp.mime,
801
+ }
802
+ } else if (Buffer.isBuffer(x)) {
803
+ var convResp = await convert(x, { input: BUFFER, expected: DATAURL, meta: true });
804
+ return {
805
+ url: convResp.content,
806
+ mime_type: convResp.mime,
807
+ }
808
+ } else if (Object.isObject(x)) { return x; } else { return null; }
809
+ }))).filter(x => x && [
726
810
  ...model?.supportedMimeTypes,
727
811
  ...model?.supportedDocTypes,
728
812
  ...model?.supportedAudioTypes,
@@ -819,7 +903,7 @@ const mergeMsgs = (resp, calls) => [resp, ...calls.length ? [
819
903
  `⚠️ Tools recursion limit reached: ${MAX_TOOL_RECURSION}`
820
904
  ] : []].map(x => x.trim()).join('\n\n');
821
905
 
822
- const promptOpenAI = async (aiId, content, options = {}) => {
906
+ const promptOpenRouter = async (aiId, content, options = {}) => {
823
907
  let { provider, client, model } = await getAi(aiId);
824
908
  let [
825
909
  result, resultAudio, resultImages, resultReasoning, event, resultTools,
@@ -847,6 +931,18 @@ const promptOpenAI = async (aiId, content, options = {}) => {
847
931
  x => x.function.name === 'searchWeb'
848
932
  ) && !options.jsonMode ? ONLINE : '';
849
933
  const targetModel = `${isOpenrouter(provider, model) ? `${source}/` : ''}${options.model}${ext}`;
934
+ if (provider === OPENAI) {
935
+ // need more debug, currently openrouter is priority
936
+ packedTools.push(...[
937
+ // https://platform.openai.com/docs/guides/tools?tool-type=web-search
938
+ { type: 'web_search', },
939
+ // https://platform.openai.com/docs/guides/tools-image-generation?lang=javascript
940
+ // https://platform.openai.com/docs/api-reference/responses/create#responses-create-tools
941
+ { type: 'image_generation', input_fidelity: 'high', partial_images: 3, quality: 'high', size: '1536x1024' },
942
+ // https://platform.openai.com/docs/guides/tools-code-interpreter
943
+ { type: 'code_interpreter', container: { type: 'auto', memory_limit: '8g' } },
944
+ ]);
945
+ }
850
946
  if (source === S_GOOGLE) {
851
947
  packedTools.push(...[
852
948
  { googleSearch: {} }, { codeExecution: {} }, { urlContext: {} },
@@ -958,7 +1054,7 @@ const promptOpenAI = async (aiId, content, options = {}) => {
958
1054
  = await handleToolsCall(event, { ...options, result });
959
1055
  if (toolsResult.length
960
1056
  && countToolCalls(toolsResponse) < MAX_TOOL_RECURSION) {
961
- return promptOpenAI(aiId, content, {
1057
+ return promptOpenRouter(aiId, content, {
962
1058
  ...options, toolsResult, result: toolsResponse,
963
1059
  });
964
1060
  }
@@ -966,6 +1062,181 @@ const promptOpenAI = async (aiId, content, options = {}) => {
966
1062
  return await packResp(event, options);
967
1063
  };
968
1064
 
1065
+ const promptGoogle = async (aiId, prompt, options = {}) => {
1066
+ let { provider, client, model } = await getAi(aiId);
1067
+ const target_model = options?.model || model.name;
1068
+ const M = MODELS[target_model];
1069
+ prompt = ensureString(prompt, { trim: true });
1070
+ assert(prompt.length, 'Prompt is required.');
1071
+ M.tts && (prompt = `${options?.prompt || TTS_PROMPT}: ${prompt}`);
1072
+ assert(await countTokens(prompt, { fast: true })
1073
+ <= M.maxInputTokens,
1074
+ `Prompt must be less than ${M.maxInputTokens} tokens.`, 400
1075
+ );
1076
+ if (M?.image) {
1077
+ var resp = await client.models.generateImages({
1078
+ model: M.name, prompt, config: mergeAtoB(options?.config, {
1079
+ numberOfImages: options?.n || 4, sampleImageSize: '2K',
1080
+ includeRaiReason: true,
1081
+ // "1:1" (default), "3:4", "4:3", "9:16", and "16:9"
1082
+ aspectRatio: '16:9', personGeneration: 'allow_adult',
1083
+ }),
1084
+ });
1085
+ var generated = resp?.generatedImages;
1086
+ assert(!resp?.error && generated?.filter(
1087
+ x => !x.raiFilteredReason
1088
+ ).length, resp?.error?.message || generated?.find(
1089
+ x => x.raiFilteredReason
1090
+ )?.raiFilteredReason || ERROR_GENERATING);
1091
+ if (!options?.raw) {
1092
+ resp = {
1093
+ text: '', images: await Promise.all((
1094
+ resp?.generatedImages || []
1095
+ ).map(async x => ({
1096
+ data: await convert(x.image.imageBytes, {
1097
+ input: BASE64, suffix: 'png', ...options || {}
1098
+ }), mimeType: x.image.mimeType,
1099
+ }))), model: packModelLabel([provider, M.source, M.name]),
1100
+ }
1101
+ }
1102
+ } else if (M?.video) {
1103
+ var resp = await client.models.generateVideos({
1104
+ model: M.name, prompt, config: mergeAtoB(options?.config, {
1105
+ aspectRatio: '16:9', numberOfVideos: 1,
1106
+ // personGeneration: 'allow_adult',
1107
+ enablePromptRewriting: true, addWatermark: false,
1108
+ includeRaiReason: true,
1109
+ }),
1110
+ });
1111
+ assert(!resp?.error, resp?.error?.message || ERROR_GENERATING);
1112
+ if (options?.generateRaw) { return resp; }
1113
+ await tryUntil(async () => {
1114
+ resp = await client.operations.getVideosOperation({
1115
+ operation: resp,
1116
+ });
1117
+ assert(
1118
+ resp?.done,
1119
+ `Waiting for Google video generation: ${resp.name}`,
1120
+ );
1121
+ }, { maxTry: 60 * 10, log });
1122
+ assert(!resp?.error && resp?.response?.generatedVideos?.filter(
1123
+ x => !x.raiFilteredReason
1124
+ ).length, resp?.error?.message || resp?.response?.generatedVideos?.find(
1125
+ x => x.raiFilteredReason
1126
+ )?.raiFilteredReason || ERROR_GENERATING);
1127
+ if (options?.videoRaw) {
1128
+ resp = resp?.response?.generatedVideos;
1129
+ } else if (!options?.videoRaw) {
1130
+ resp = {
1131
+ text: '', videos: await Promise.all(resp?.response?.generatedVideos?.filter(
1132
+ x => x?.video?.uri
1133
+ ).map(async x => {
1134
+ const downloadPath = `${getTempPath({
1135
+ seed: x?.video?.uri
1136
+ })}.mp4`;
1137
+ // @todo: fix this
1138
+ // https://github.com/googleapis/js-genai/compare/main...Leask:js-genai:main
1139
+ await client.files.download({ file: x, downloadPath });
1140
+ await timeout(1000 * 10); // hack to wait for file to be downloaded
1141
+ return {
1142
+ data: await convert(downloadPath, {
1143
+ input: FILE, suffix: 'mp4', ...options || {}
1144
+ }), mimeType: MIME_MP4, jobId: resp.name,
1145
+ };
1146
+ })), model: packModelLabel([provider, M.source, M.name]),
1147
+ };
1148
+ }
1149
+ } else if (M?.audio) { // https://ai.google.dev/gemini-api/docs/speech-generation#voices
1150
+ var resp = await client.models.generateContent({
1151
+ model: M.name, contents: prompt,
1152
+ config: mergeAtoB(options?.config, {
1153
+ responseModalities: ['AUDIO'],
1154
+ speechConfig: {
1155
+ voiceConfig: {
1156
+ prebuiltVoiceConfig: {
1157
+ voiceName: options?.voice || 'Zephyr',
1158
+ },
1159
+ },
1160
+ },
1161
+ }),
1162
+ });
1163
+ const rawAudio = resp?.candidates?.[0]?.content?.parts?.[0]?.inlineData;
1164
+ assert(rawAudio, ERROR_GENERATING, 500);
1165
+ if (!options?.raw) {
1166
+ resp = {
1167
+ text: '', audio: {
1168
+ data: await packPcmToWav(rawAudio?.data, {
1169
+ input: BASE64, suffix: wav, ...options || {},
1170
+ }), mimeType: MIME_WAV,
1171
+ }, model: packModelLabel([provider, M.source, M.name]),
1172
+ };
1173
+ }
1174
+ } else {
1175
+ throwError('Unsupported model.');
1176
+ }
1177
+ // await streamResp(
1178
+ // { ...resp, processing: true }, { ...options, noPack: true }
1179
+ // );
1180
+ return { ...resp, processing: false };
1181
+ };
1182
+
1183
+ const promptOpenAI = async (aiId, prompt, options = {}) => {
1184
+ let { provider, client, toFile, model } = await getAi(aiId);
1185
+ const target_model = options?.model || model.name;
1186
+ const M = MODELS[target_model];
1187
+ prompt = ensureString(prompt, { trim: true });
1188
+ if (M?.audio) {
1189
+ assert(prompt.length, 'Prompt is required.');
1190
+ const ins_prompt = options?.prompt || `${TTS_PROMPT}.`;
1191
+ assert(await countTokens(
1192
+ JSON.stringify([ins_prompt, prompt]), { fast: true }
1193
+ ) <= M.maxInputTokens,
1194
+ `Prompt must be less than ${M.maxInputTokens} tokens.`, 400
1195
+ );
1196
+ // https://platform.openai.com/docs/api-reference/audio/createSpeech
1197
+ var resp = await client.audio.speech.create({
1198
+ model: M.name, voice: DEFAULT_MODELS[OPENAI_VOICE],
1199
+ instructions: ins_prompt, response_format: 'opus',
1200
+ input: prompt, ...options?.params || {},
1201
+ });
1202
+ if (!options?.raw) {
1203
+ resp = {
1204
+ text: '', audio: {
1205
+ data: await convert(Buffer.from(
1206
+ await resp.arrayBuffer()
1207
+ ), { suffix: OGG_EXT, ...options || {} }),
1208
+ mimeType: MIME_OGG,
1209
+ }, model: packModelLabel([provider, M.source, M.name]),
1210
+ };
1211
+ }
1212
+ } else if (M?.hearing) {
1213
+ const audio = options?.attachments?.[0]?.data || options?.attachments?.[0];
1214
+ assert(audio, 'Audio attachment is required.');
1215
+ const input = ensureString(options?.input, { case: 'UP' });
1216
+ const { content, cleanup } = await convert(audio, {
1217
+ input: options?.input, ...options || {}, expected: STREAM, INVALID_AUDIO,
1218
+ suffix: ['', BUFFER].includes(input) ? OGG_EXT : null,
1219
+ withCleanupFunc: true,
1220
+ });
1221
+ var resp = await client.audio.transcriptions.create({
1222
+ file: await toFile(content), model: M.name,
1223
+ response_format: 'text', ...options?.params || {},
1224
+ });
1225
+ await cleanup();
1226
+ if (!options?.raw) {
1227
+ resp = {
1228
+ text: resp.trim(),
1229
+ model: packModelLabel([provider, M.source, M.name]),
1230
+ };
1231
+ }
1232
+ } else {
1233
+ throwError('Unsupported model.');
1234
+ }
1235
+ // await streamResp(
1236
+ // { ...resp, processing: true }, { ...options, noPack: true }
1237
+ // );
1238
+ return { ...resp, processing: false };
1239
+ };
969
1240
 
970
1241
  const initChat = async (options = {}) => {
971
1242
  if (options.sessions) {
@@ -1078,7 +1349,6 @@ const distillFile = async (attachments, o) => {
1078
1349
  const buf = await convert(attachments[i], { expected: BUFFER, ...o || {} });
1079
1350
  return {
1080
1351
  url: await convert(buf, { input: BUFFER, expected: DATAURL, ...o || {} }),
1081
- data: base64Encode(buf, true),
1082
1352
  mime_type: extract(await fileTypeFromBuffer(buf), 'mime') || MIME_BINARY,
1083
1353
  };
1084
1354
  })();
@@ -1159,13 +1429,18 @@ export {
1159
1429
  DEFAULT_MODELS,
1160
1430
  FEATURE_ICONS,
1161
1431
  FUNCTION,
1432
+ GEMINI_25_FLASH_TTS,
1162
1433
  GEMINI_25_FLASH,
1434
+ GEMINI_25_PRO_TTS,
1163
1435
  GEMINI_30_PRO_IMAGE,
1436
+ GPT_5_IMAGE,
1164
1437
  GPT_51,
1438
+ IMAGEN_4_ULTRA,
1165
1439
  INSTRUCTIONS,
1166
1440
  MODELS,
1167
1441
  OPENAI_VOICE,
1168
1442
  RETRIEVAL,
1443
+ VEO_31,
1169
1444
  analyzeSessions,
1170
1445
  countTokens,
1171
1446
  distillFile,
@@ -1178,7 +1453,7 @@ export {
1178
1453
  k,
1179
1454
  listOpenAIModels,
1180
1455
  prompt,
1181
- promptOpenAI,
1456
+ promptOpenRouter,
1182
1457
  resetSession,
1183
1458
  talk,
1184
1459
  trimPrompt,
package/lib/manifest.mjs CHANGED
@@ -1,7 +1,7 @@
1
1
  const manifest = {
2
2
  "name": "utilitas",
3
3
  "description": "Just another common utility for JavaScript.",
4
- "version": "2000.3.26",
4
+ "version": "2000.3.28",
5
5
  "private": false,
6
6
  "homepage": "https://github.com/Leask/utilitas",
7
7
  "main": "index.mjs",