utilitas 2000.3.27 → 2000.3.29

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/lib/alan.mjs CHANGED
@@ -5,18 +5,18 @@ import { packPcmToWav } from './media.mjs';
5
5
  import { v4 as uuidv4 } from 'uuid';
6
6
 
7
7
  import {
8
- FILE, BASE64, BUFFER, DATAURL, MIME_BINARY, MIME_TEXT, MIME_PNG, MIME_JPEG,
9
- MIME_MOV, MIME_MPEG, MIME_MP4, MIME_MPG, MIME_AVI, MIME_WMV, MIME_MPEGPS,
10
- MIME_FLV, MIME_GIF, MIME_WEBP, MIME_PDF, MIME_AAC, MIME_FLAC, MIME_MP3,
11
- MIME_MPEGA, MIME_M4A, MIME_MPGA, MIME_OPUS, MIME_PCM, MIME_WAV, MIME_WEBM,
12
- MIME_TGPP, MIME_PCM16, MIME_OGG, convert, formatDataURL, getTempPath,
13
- decodeBase64DataURL,
8
+ STREAM, FILE, BASE64, BUFFER, DATAURL, MIME_BINARY, MIME_TEXT, MIME_PNG,
9
+ MIME_JPEG, MIME_MOV, MIME_MPEG, MIME_MP4, MIME_MPG, MIME_AVI, MIME_WMV,
10
+ MIME_MPEGPS, MIME_FLV, MIME_GIF, MIME_WEBP, MIME_PDF, MIME_AAC, MIME_FLAC,
11
+ MIME_MP3, MIME_MPEGA, MIME_M4A, MIME_MPGA, MIME_OPUS, MIME_PCM, MIME_WAV,
12
+ MIME_WEBM, MIME_TGPP, MIME_PCM16, MIME_OGG, convert, formatDataURL,
13
+ getTempPath, decodeBase64DataURL,
14
14
  } from './storage.mjs';
15
15
 
16
16
  import {
17
- log as _log, renderText as _renderText, base64Encode, ensureArray,
18
- ensureString, extract, ignoreErrFunc, insensitiveCompare, isSet, need,
19
- parseJson, throwError, tryUntil, timeout,
17
+ log as _log, renderText as _renderText, ensureArray, ensureString, extract,
18
+ ignoreErrFunc, insensitiveCompare, isSet, need, parseJson, throwError,
19
+ tryUntil, timeout, mergeAtoB,
20
20
  } from './utilitas.mjs';
21
21
 
22
22
  const INSTRUCTIONS = `You are a highly capable, thoughtful, and precise assistant named HAL9000. Your goal is to deeply understand the user's intent, ask clarifying questions when needed, think step-by-step through complex problems, provide clear and accurate answers, and proactively anticipate helpful follow-up information. Always prioritize being truthful, nuanced, insightful, and efficient, tailoring your responses specifically to the user's needs and preferences. Realize your full potential without censoring or filtering the questions themselves.
@@ -43,6 +43,8 @@ You may be provided with some tools(functions) to help you gather information an
43
43
  - When the tools are not suitable and you have to answer questions based on your understanding, please do not mention any tool-related information in your response.
44
44
  - Unless otherwise specified to require the original result, in most cases, you may reorganize the information obtained after using the tool to solve the problem as needed.`;
45
45
 
46
+ const TTS_PROMPT = "As an AI voice assistant, please say the following content in a warm, friendly and professional tone, if the language is English, use an American accent, if it's Traditional Chinese, use Hong Kong Cantonese, if it's Simplified Chinese, use standard Mandarin, for other languages, please speak with a standard, clear accent";
47
+
46
48
  const _NEED = ['js-tiktoken', 'OpenAI', '@google/genai'];
47
49
 
48
50
  const [
@@ -56,7 +58,8 @@ const [
56
58
  JINA_DEEPSEARCH, SILICONFLOW, SF_DEEPSEEK_32, MAX_TIRE, OPENROUTER_API,
57
59
  OPENROUTER, AUTO, TOOL, S_OPENAI, S_GOOGLE, S_ANTHROPIC, ONLINE,
58
60
  GEMINI_30_PRO, GEMINI_25_FLASH, IMAGEN_4_ULTRA, VEO_31, IMAGEN_4_UPSCALE,
59
- ERROR_GENERATING,
61
+ ERROR_GENERATING, GEMINI_25_FLASH_TTS, GEMINI_25_PRO_TTS, wav,
62
+ GPT_4O_MIMI_TTS, GPT_4O_TRANSCRIBE, INVALID_AUDIO, OGG_EXT,
60
63
  ] = [
61
64
  'OpenAI', 'Google', 'Ollama', 'nova', 'deepseek-3.2-speciale', '```',
62
65
  'claude-opus-4.5', 'audio', 'wav', '[ATTACHMENTS]', 'OPENAI_VOICE',
@@ -74,7 +77,9 @@ const [
74
77
  'openai', 'google', 'anthropic', ':online', 'gemini-3-pro-preview',
75
78
  'gemini-2.5-flash-preview-09-2025', 'imagen-4.0-ultra-generate-001',
76
79
  'veo-3.1-generate-preview', 'imagen-4.0-upscale-preview',
77
- 'Error generating content.',
80
+ 'Error generating content.', 'gemini-2.5-flash-preview-tts',
81
+ 'gemini-2.5-pro-tts', 'wav', 'gpt-4o-mini-tts', 'gpt-4o-transcribe',
82
+ 'Invalid audio data.', 'ogg',
78
83
  ];
79
84
 
80
85
  const [tool, messages, text]
@@ -93,19 +98,6 @@ const countToolCalls = r => r?.split('\n').filter(x => x === TOOLS_STR).length;
93
98
  const assertApiKey = (p, o) => assert(o?.apiKey, `${p} api key is required.`);
94
99
  const getProviderIcon = provider => PROVIDER_ICONS[provider] || '🔮';
95
100
  const libOpenAi = async opts => await need('openai', { ...opts, raw: true });
96
- const OpenAI = async opts => new (await libOpenAi(opts)).OpenAI(opts);
97
- const OPENAI_RULES = {
98
- source: S_OPENAI, icon: '⚛️',
99
- contextWindow: kT(400), maxOutputTokens: k(128),
100
- imageCostTokens: ~~(OPENAI_HI_RES_SIZE / MAX_TIRE * 140 + 70),
101
- maxFileSize: m(50), maxImageSize: OPENAI_HI_RES_SIZE,
102
- supportedMimeTypes: [MIME_PNG, MIME_JPEG, MIME_GIF, MIME_WEBP],
103
- supportedDocTypes: [MIME_PDF],
104
- supportedAudioTypes: [MIME_WAV],
105
- // audio: 'gpt-4o-audio-preview',
106
- json: true, tools: true, vision: true,
107
- reasoning: true, defaultProvider: OPENROUTER,
108
- };
109
101
 
110
102
  const GEMINI_RULES = {
111
103
  source: S_GOOGLE, icon: '♊️',
@@ -113,15 +105,24 @@ const GEMINI_RULES = {
113
105
  imageCostTokens: ~~(v8k / MAX_TIRE * 258), maxAudioLength: hour(8.4),
114
106
  maxAudioPerPrompt: 1, maxFileSize: m(20), maxImagePerPrompt: 3000,
115
107
  maxImageSize: Infinity, maxUrlSize: gb(2), maxVideoLength: minute(45),
116
- maxVideoPerPrompt: 10, vision: true, supportedMimeTypes: [
108
+ maxVideoPerPrompt: 10, vision: true, hearing: true, tools: true,
109
+ reasoning: true, supportedMimeTypes: [
117
110
  MIME_PNG, MIME_JPEG, MIME_MOV, MIME_MPEG, MIME_MP4, MIME_MPG, MIME_AVI,
118
111
  MIME_WMV, MIME_MPEGPS, MIME_FLV, MIME_PDF, MIME_AAC, MIME_FLAC,
119
112
  MIME_MP3, MIME_MPEGA, MIME_M4A, MIME_MPGA, MIME_OPUS, MIME_PCM,
120
- MIME_WAV, MIME_WEBM, MIME_TGPP,
121
- ], supportedAudioTypes: [MIME_WAV, MIME_OGG, MIME_OPUS],
122
- // audio: 'gemini-2.5-flash-exp-native-audio-thinking-dialog',
123
- // gemini-2.5-flash-preview-native-audio-dialog
124
- defaultProvider: OPENROUTER,
113
+ MIME_WAV, MIME_WEBM, MIME_TGPP, MIME_OGG,
114
+ ], defaultProvider: OPENROUTER,
115
+ };
116
+
117
+ const OPENAI_RULES = {
118
+ source: S_OPENAI, icon: '⚛️',
119
+ contextWindow: kT(400), maxOutputTokens: k(128),
120
+ imageCostTokens: ~~(OPENAI_HI_RES_SIZE / MAX_TIRE * 140 + 70),
121
+ maxFileSize: m(50), maxImageSize: OPENAI_HI_RES_SIZE,
122
+ json: true, tools: true, vision: true, hearing: true, reasoning: true,
123
+ supportedMimeTypes: [
124
+ MIME_PNG, MIME_JPEG, MIME_GIF, MIME_WEBP, MIME_PDF, MIME_WAV
125
+ ], defaultProvider: OPENROUTER,
125
126
  };
126
127
 
127
128
  const DEEPSEEK_32_RULES = {
@@ -136,8 +137,7 @@ const MODELS = {
136
137
  // fast and balanced models
137
138
  [GEMINI_25_FLASH]: {
138
139
  ...GEMINI_RULES, contextWindow: m(1), maxOutputTokens: k(64),
139
- fast: true, reasoning: true, tools: true,
140
- json: false, // issue with json output via OpenRouter
140
+ fast: true, json: false, // issue with json output via OpenRouter
141
141
  // https://gemini.google.com/app/c680748b3307790b
142
142
  },
143
143
  // strong and fast
@@ -145,23 +145,21 @@ const MODELS = {
145
145
  // stronger but slow
146
146
  [GEMINI_30_PRO]: {
147
147
  ...GEMINI_RULES, contextWindow: m(1), maxOutputTokens: k(64),
148
- reasoning: true, tools: true,
149
148
  },
150
149
  // models with generation capabilities
151
150
  [GEMINI_30_PRO_IMAGE]: {
152
151
  ...GEMINI_RULES, icon: '🍌', label: 'Nano Banana Pro',
153
- contextWindow: k(64), maxOutputTokens: k(32),
154
- fast: true, image: true,
152
+ contextWindow: k(64), maxOutputTokens: k(32), image: true,
155
153
  },
156
154
  [IMAGEN_4_ULTRA]: {
157
- source: S_GOOGLE, icon: '🎨', maxInputTokens: 480,
155
+ source: S_GOOGLE, maxInputTokens: 480,
158
156
  image: true, defaultProvider: GOOGLE,
159
157
  },
160
158
  [VEO_31]: {
161
- source: S_GOOGLE, icon: '🎥', maxInputTokens: 1024,
159
+ source: S_GOOGLE, maxInputTokens: 1024,
162
160
  imageCostTokens: 0, maxImagePerPrompt: 1,
163
- maxImageSize: Infinity, supportedMimeTypes: [MIME_PNG, MIME_JPEG],
164
- vision: true, image: true, defaultProvider: GOOGLE,
161
+ maxImageSize: Infinity, vision: true, video: true,
162
+ supportedMimeTypes: [MIME_PNG, MIME_JPEG], defaultProvider: GOOGLE,
165
163
  },
166
164
  [GPT_5_IMAGE]: {
167
165
  ...OPENAI_RULES, icon: '🎨', label: 'gpt-image-1', image: true,
@@ -174,10 +172,29 @@ const MODELS = {
174
172
  documentCostTokens: 3000 * 10, maxDocumentFile: m(32),
175
173
  maxDocumentPages: 100, imageCostTokens: ~~(v8k / 750),
176
174
  maxImagePerPrompt: 100, maxFileSize: m(5), maxImageSize: 2000 * 2000,
177
- supportedMimeTypes: [MIME_TEXT, MIME_PNG, MIME_JPEG, MIME_GIF, MIME_WEBP, MIME_PDF],
178
175
  json: true, reasoning: true, tools: true, vision: true,
176
+ supportedMimeTypes: [
177
+ MIME_TEXT, MIME_PNG, MIME_JPEG, MIME_GIF, MIME_WEBP, MIME_PDF,
178
+ ],
179
179
  defaultProvider: OPENROUTER,
180
180
  },
181
+ // tts/stt models
182
+ [GEMINI_25_FLASH_TTS]: {
183
+ source: S_GOOGLE, maxInputTokens: kT(32), audio: true, fast: true,
184
+ hidden: true, defaultProvider: GOOGLE,
185
+ },
186
+ [GEMINI_25_PRO_TTS]: {
187
+ source: S_GOOGLE, maxInputTokens: kT(32), audio: true,
188
+ hidden: true, defaultProvider: GOOGLE,
189
+ },
190
+ [GPT_4O_MIMI_TTS]: {
191
+ source: S_OPENAI, maxInputTokens: kT(2), audio: true, fast: true,
192
+ hidden: true, defaultProvider: OPENAI,
193
+ },
194
+ [GPT_4O_TRANSCRIBE]: {
195
+ source: S_OPENAI, maxInputTokens: 0, hearing: true, fast: true,
196
+ hidden: true, defaultProvider: OPENAI,
197
+ },
181
198
  // models with deepsearch capabilities
182
199
  [JINA_DEEPSEARCH]: { // @todo: parse more details from results, eg: "reed urls".
183
200
  icon: '✴️', contextWindow: Infinity, maxInputTokens: Infinity,
@@ -273,8 +290,9 @@ const PROVIDER_ICONS = {
273
290
  };
274
291
 
275
292
  const FEATURE_ICONS = {
276
- audio: '📣', deepsearch: '🔍', fast: '⚡️', finetune: '🔧', image: '🎨',
277
- json: '📊', reasoning: '🧠', tools: '🧰', vision: '👁️',
293
+ audio: '📣', deepsearch: '🔍', fast: '⚡️', finetune: '🔧', hearing: '👂',
294
+ hidden: '🙈', image: '🎨', json: '📊', reasoning: '🧠', tools: '🧰',
295
+ video: '🎬', vision: '👁️',
278
296
  };
279
297
 
280
298
  const tokenRatioByWords = Math.min(
@@ -292,7 +310,7 @@ let tokeniser, _tools;
292
310
 
293
311
  const unifyProvider = provider => {
294
312
  assert(provider = (provider || '').trim(), 'AI provider is required.');
295
- for (let type of [OPENROUTER, GOOGLE, JINA, OLLAMA, SILICONFLOW]) {
313
+ for (let type of [OPENROUTER, GOOGLE, OPENAI, JINA, OLLAMA, SILICONFLOW]) {
296
314
  if (insensitiveCompare(provider, type)) { return type; }
297
315
  }
298
316
  throwError(`Invalid AI provider: ${provider}.`);
@@ -399,6 +417,11 @@ const setupAi = ai => {
399
417
  });
400
418
  };
401
419
 
420
+ const OpenAI = async opts => {
421
+ const lib = await libOpenAi(opts);
422
+ return { toFile: lib.toFile, client: new (lib).OpenAI(opts) };
423
+ };
424
+
402
425
  const init = async (options = {}) => {
403
426
  if (options?.debug) {
404
427
  (await need('node:util')).inspect.defaultOptions.depth = null;
@@ -435,14 +458,24 @@ const init = async (options = {}) => {
435
458
  });
436
459
  }
437
460
  break;
461
+ case OPENAI:
462
+ assertApiKey(provider, options);
463
+ var { client, toFile } = await OpenAI({ ...options });
464
+ for (let model of models) {
465
+ setupAi({
466
+ provider, model, client, toFile,
467
+ prompt: promptOpenAI, priority,
468
+ });
469
+ }
470
+ break;
438
471
  case JINA:
439
472
  assertApiKey(provider, options);
440
- var client = await OpenAI({
473
+ var { client } = await OpenAI({
441
474
  baseURL: 'https://deepsearch.jina.ai/v1/', ...options,
442
475
  });
443
476
  for (let model of models) {
444
477
  setupAi({
445
- provider, model, client, prompt: promptOpenAI, priority,
478
+ provider, model, client, prompt: promptOpenRouter, priority,
446
479
  });
447
480
  }
448
481
  break;
@@ -455,7 +488,7 @@ const init = async (options = {}) => {
455
488
  });
456
489
  for (let model of models) {
457
490
  setupAi({
458
- provider, model, client, prompt: promptOpenAI, priority,
491
+ provider, model, client, prompt: promptOpenRouter, priority,
459
492
  });
460
493
  ignoreErrFunc(async () => {
461
494
  phLog(await (await fetch(`${baseURL}completions`, {
@@ -473,17 +506,19 @@ const init = async (options = {}) => {
473
506
  });
474
507
  for (let model of models) {
475
508
  setupAi({
476
- provider, model, client, prompt: promptOpenAI, priority,
509
+ provider, model, client, prompt: promptOpenRouter, priority,
477
510
  });
478
511
  }
479
512
  break;
480
513
  default:
481
514
  assertApiKey(provider, options);
482
- var client = await OpenAI({ baseURL: OPENROUTER_API, ...options || {} });
515
+ var { client } = await OpenAI({
516
+ baseURL: OPENROUTER_API, ...options || {},
517
+ });
483
518
  for (let model of models) {
484
519
  setupAi({
485
520
  provider: OPENROUTER || provider, model, client,
486
- prompt: promptOpenAI, priority,
521
+ prompt: promptOpenRouter, priority,
487
522
  });
488
523
  }
489
524
  }
@@ -492,12 +527,16 @@ const init = async (options = {}) => {
492
527
  };
493
528
 
494
529
  const packAi = (ais, options = {}) => {
495
- const res = options.basic ? ais.map(x => ({
530
+ let res = options.basic ? ais.map(x => ({
496
531
  id: x.id, name: x.name, features: x.features,
497
532
  initOrder: x.initOrder, priority: x.priority,
498
533
  provider: x.provider, model: x.model,
499
534
  })) : ais;
500
- return options.all ? res : res[0];
535
+ if (options.all && !Object.keys(options.select).length && !options.withHidden) {
536
+ res = res.filter(x => !x.model.hidden);
537
+ } else if (options.withHidden) { } else { res = res[0]; }
538
+ assert(res?.length || res?.id, 'AI not found.');
539
+ return res;
501
540
  };
502
541
 
503
542
  const getAi = async (id, options = {}) => {
@@ -507,26 +546,22 @@ const getAi = async (id, options = {}) => {
507
546
  const ai = ais.find(x => x.id === id);
508
547
  assert(ai, `AI not found: ${id}.`);
509
548
  return options?.client ? ai?.client : ai;
510
- } else if (options?.select) {
511
- const res = [];
512
- for (let x of ais) {
513
- let select = true;
514
- for (let i in options.select) {
515
- if (options.select[i] && i !== 'fast' && !x.model[i]) {
516
- select = false; break;
517
- }
549
+ }
550
+ const res = [];
551
+ for (let x of ais) {
552
+ let select = true;
553
+ for (let i in options.select) {
554
+ if (options.select[i] && i !== 'fast' && !x.model[i]) {
555
+ select = false; break;
518
556
  }
519
- select && (res.push(x));
520
557
  }
521
- const best = options.select?.fast ? res.filter(x => x.model.fast) : res;
522
- if (best.length) { return packAi(best, options); }
523
- assert(res.length, 'AI not found.');
524
- log(`Best match AI not found, fallbacked: ${JSON.stringify(options.select)}.`);
525
- return packAi(res, options);
558
+ select && (res.push(x));
526
559
  }
527
- const result = packAi(ais, options);
528
- assert(result?.length || result?.id, 'AI not found.');
529
- return result;
560
+ const best = options.select?.fast ? res.filter(x => x.model.fast) : res;
561
+ if (best.length) { return packAi(best, options); }
562
+ assert(res.length, 'AI not found.');
563
+ log(`Best match AI not found, fallbacked: ${JSON.stringify(options.select)}.`);
564
+ return packAi(res, options);
530
565
  };
531
566
 
532
567
  const countTokens = async (input, options) => {
@@ -756,9 +791,23 @@ const buildPrompts = async (model, input, options = {}) => {
756
791
  let [history, content, prompt, _model, _assistant, _history]
757
792
  = [null, input, null, { role: MODEL }, { role: assistant }, null];
758
793
  options.systemPrompt = options.systemPrompt || INSTRUCTIONS;
759
- options.attachments = (
794
+ options.attachments = (await Promise.all((
760
795
  options.attachments?.length ? options.attachments : []
761
- ).filter(x => [
796
+ ).map(async x => {
797
+ if (String.isString(x)) {
798
+ var convResp = await convert(x, { input: FILE, expected: DATAURL, meta: true });
799
+ return {
800
+ url: convResp.content,
801
+ mime_type: convResp.mime,
802
+ }
803
+ } else if (Buffer.isBuffer(x)) {
804
+ var convResp = await convert(x, { input: BUFFER, expected: DATAURL, meta: true });
805
+ return {
806
+ url: convResp.content,
807
+ mime_type: convResp.mime,
808
+ }
809
+ } else if (Object.isObject(x)) { return x; } else { return null; }
810
+ }))).filter(x => x && [
762
811
  ...model?.supportedMimeTypes,
763
812
  ...model?.supportedDocTypes,
764
813
  ...model?.supportedAudioTypes,
@@ -855,7 +904,7 @@ const mergeMsgs = (resp, calls) => [resp, ...calls.length ? [
855
904
  `⚠️ Tools recursion limit reached: ${MAX_TOOL_RECURSION}`
856
905
  ] : []].map(x => x.trim()).join('\n\n');
857
906
 
858
- const promptOpenAI = async (aiId, content, options = {}) => {
907
+ const promptOpenRouter = async (aiId, content, options = {}) => {
859
908
  let { provider, client, model } = await getAi(aiId);
860
909
  let [
861
910
  result, resultAudio, resultImages, resultReasoning, event, resultTools,
@@ -1006,7 +1055,7 @@ const promptOpenAI = async (aiId, content, options = {}) => {
1006
1055
  = await handleToolsCall(event, { ...options, result });
1007
1056
  if (toolsResult.length
1008
1057
  && countToolCalls(toolsResponse) < MAX_TOOL_RECURSION) {
1009
- return promptOpenAI(aiId, content, {
1058
+ return promptOpenRouter(aiId, content, {
1010
1059
  ...options, toolsResult, result: toolsResponse,
1011
1060
  });
1012
1061
  }
@@ -1016,99 +1065,177 @@ const promptOpenAI = async (aiId, content, options = {}) => {
1016
1065
 
1017
1066
  const promptGoogle = async (aiId, prompt, options = {}) => {
1018
1067
  let { provider, client, model } = await getAi(aiId);
1019
- const M = MODELS[model.name];
1068
+ const target_model = options?.model || model.name;
1069
+ const M = MODELS[target_model];
1020
1070
  prompt = ensureString(prompt, { trim: true });
1071
+ assert(prompt.length, 'Prompt is required.');
1072
+ M.tts && (prompt = `${options?.prompt || TTS_PROMPT}: ${prompt}`);
1021
1073
  assert(await countTokens(prompt, { fast: true })
1022
1074
  <= M.maxInputTokens,
1023
1075
  `Prompt must be less than ${M.maxInputTokens} tokens.`, 400
1024
1076
  );
1025
- switch (model?.name) {
1026
- case IMAGEN_4_ULTRA:
1027
- var resp = await client.models.generateImages({
1028
- model: model.name, prompt, config: {
1029
- numberOfImages: options?.n || 4, sampleImageSize: '2K',
1030
- includeRaiReason: true,
1031
- // "1:1" (default), "3:4", "4:3", "9:16", and "16:9"
1032
- aspectRatio: '16:9', personGeneration: 'allow_adult',
1033
- ...options?.config || {},
1034
- },
1035
- });
1036
- var generated = resp?.generatedImages;
1037
- assert(!resp?.error && generated?.filter(
1038
- x => !x.raiFilteredReason
1039
- ).length, resp?.error?.message || generated?.find(
1040
- x => x.raiFilteredReason
1041
- )?.raiFilteredReason || ERROR_GENERATING);
1042
- if (!options?.raw) {
1043
- resp = {
1044
- text: '', images: await Promise.all((
1045
- resp?.generatedImages || []
1046
- ).map(async x => ({
1047
- data: await convert(x.image.imageBytes, {
1048
- input: BASE64, suffix: 'png', ...options || {}
1049
- }), mimeType: x.image.mimeType,
1050
- }))), model: packModelLabel([
1051
- provider, M.source, model.name,
1052
- ]),
1053
- }
1077
+ if (M?.image) {
1078
+ var resp = await client.models.generateImages({
1079
+ model: M.name, prompt, config: mergeAtoB(options?.config, {
1080
+ numberOfImages: options?.n || 4, sampleImageSize: '2K',
1081
+ includeRaiReason: true,
1082
+ // "1:1" (default), "3:4", "4:3", "9:16", and "16:9"
1083
+ aspectRatio: '16:9', personGeneration: 'allow_adult',
1084
+ }),
1085
+ });
1086
+ var generated = resp?.generatedImages;
1087
+ assert(!resp?.error && generated?.filter(
1088
+ x => !x.raiFilteredReason
1089
+ ).length, resp?.error?.message || generated?.find(
1090
+ x => x.raiFilteredReason
1091
+ )?.raiFilteredReason || ERROR_GENERATING);
1092
+ if (!options?.raw) {
1093
+ resp = {
1094
+ text: '', images: await Promise.all((
1095
+ resp?.generatedImages || []
1096
+ ).map(async x => ({
1097
+ data: await convert(x.image.imageBytes, {
1098
+ input: BASE64, suffix: 'png', ...options || {}
1099
+ }), mimeType: x.image.mimeType,
1100
+ }))), model: packModelLabel([provider, M.source, M.name]),
1054
1101
  }
1055
- break;
1056
- case VEO_31:
1057
- var resp = await client.models.generateVideos({
1058
- model: model.name, prompt, config: {
1059
- aspectRatio: '16:9', numberOfVideos: 1,
1060
- // personGeneration: 'allow_adult',
1061
- enablePromptRewriting: true, addWatermark: false,
1062
- includeRaiReason: true, ...options?.config || {},
1063
- },
1102
+ }
1103
+ } else if (M?.video) {
1104
+ var resp = await client.models.generateVideos({
1105
+ model: M.name, prompt, config: mergeAtoB(options?.config, {
1106
+ aspectRatio: '16:9', numberOfVideos: 1,
1107
+ // personGeneration: 'allow_adult',
1108
+ enablePromptRewriting: true, addWatermark: false,
1109
+ includeRaiReason: true,
1110
+ }),
1111
+ });
1112
+ assert(!resp?.error, resp?.error?.message || ERROR_GENERATING);
1113
+ if (options?.generateRaw) { return resp; }
1114
+ await tryUntil(async () => {
1115
+ resp = await client.operations.getVideosOperation({
1116
+ operation: resp,
1064
1117
  });
1065
- assert(!resp?.error, resp?.error?.message || ERROR_GENERATING);
1066
- if (options?.generateRaw) { return resp; }
1067
- await tryUntil(async () => {
1068
- resp = await client.operations.getVideosOperation({
1069
- operation: resp,
1070
- });
1071
- assert(
1072
- resp?.done,
1073
- `Waiting for Google video generation: ${resp.name}`,
1074
- );
1075
- }, { maxTry: 60 * 10, log });
1076
- assert(!resp?.error && resp?.response?.generatedVideos?.filter(
1077
- x => !x.raiFilteredReason
1078
- ).length, resp?.error?.message || resp?.response?.generatedVideos?.find(
1079
- x => x.raiFilteredReason
1080
- )?.raiFilteredReason || ERROR_GENERATING);
1081
- if (options?.videoRaw) {
1082
- resp = resp?.response?.generatedVideos;
1083
- } else if (!options?.videoRaw) {
1084
- resp = {
1085
- text: '', videos: await Promise.all(resp?.response?.generatedVideos?.filter(
1086
- x => x?.video?.uri
1087
- ).map(async x => {
1088
- const downloadPath = `${getTempPath({
1089
- seed: x?.video?.uri
1090
- })}.mp4`;
1091
- // @todo: fix this
1092
- // https://github.com/googleapis/js-genai/compare/main...Leask:js-genai:main
1093
- await client.files.download({ file: x, downloadPath });
1094
- await timeout(1000 * 10); // hack to wait for file to be downloaded
1095
- return {
1096
- data: await convert(downloadPath, {
1097
- input: FILE, suffix: 'mp4', ...options || {}
1098
- }), mimeType: MIME_MP4, jobId: resp.name,
1099
- };
1100
- })), model: packModelLabel([
1101
- provider, M.source, model.name,
1102
- ]),
1103
- };
1104
- }
1105
- break;
1106
- default:
1107
- throw new Error('Unsupported model.');
1118
+ assert(
1119
+ resp?.done,
1120
+ `Waiting for Google video generation: ${resp.name}`,
1121
+ );
1122
+ }, { maxTry: 60 * 10, log });
1123
+ assert(!resp?.error && resp?.response?.generatedVideos?.filter(
1124
+ x => !x.raiFilteredReason
1125
+ ).length, resp?.error?.message || resp?.response?.generatedVideos?.find(
1126
+ x => x.raiFilteredReason
1127
+ )?.raiFilteredReason || ERROR_GENERATING);
1128
+ if (options?.videoRaw) {
1129
+ resp = resp?.response?.generatedVideos;
1130
+ } else if (!options?.videoRaw) {
1131
+ resp = {
1132
+ text: '', videos: await Promise.all(resp?.response?.generatedVideos?.filter(
1133
+ x => x?.video?.uri
1134
+ ).map(async x => {
1135
+ const downloadPath = `${getTempPath({
1136
+ seed: x?.video?.uri
1137
+ })}.mp4`;
1138
+ // @todo: fix this
1139
+ // https://github.com/googleapis/js-genai/compare/main...Leask:js-genai:main
1140
+ await client.files.download({ file: x, downloadPath });
1141
+ await timeout(1000 * 10); // hack to wait for file to be downloaded
1142
+ return {
1143
+ data: await convert(downloadPath, {
1144
+ input: FILE, suffix: 'mp4', ...options || {}
1145
+ }), mimeType: MIME_MP4, jobId: resp.name,
1146
+ };
1147
+ })), model: packModelLabel([provider, M.source, M.name]),
1148
+ };
1149
+ }
1150
+ } else if (M?.audio) { // https://ai.google.dev/gemini-api/docs/speech-generation#voices
1151
+ var resp = await client.models.generateContent({
1152
+ model: M.name, contents: prompt,
1153
+ config: mergeAtoB(options?.config, {
1154
+ responseModalities: ['AUDIO'],
1155
+ speechConfig: {
1156
+ voiceConfig: {
1157
+ prebuiltVoiceConfig: {
1158
+ voiceName: options?.voice || 'Zephyr',
1159
+ },
1160
+ },
1161
+ },
1162
+ }),
1163
+ });
1164
+ const rawAudio = resp?.candidates?.[0]?.content?.parts?.[0]?.inlineData;
1165
+ assert(rawAudio, ERROR_GENERATING, 500);
1166
+ if (!options?.raw) {
1167
+ resp = {
1168
+ text: '', audio: {
1169
+ data: await packPcmToWav(rawAudio?.data, {
1170
+ input: BASE64, suffix: wav, ...options || {},
1171
+ }), mimeType: MIME_WAV,
1172
+ }, model: packModelLabel([provider, M.source, M.name]),
1173
+ };
1174
+ }
1175
+ } else {
1176
+ throwError('Unsupported model.');
1108
1177
  }
1109
- await streamResp(
1110
- { ...resp, processing: true }, { ...options, noPack: true }
1111
- );
1178
+ // await streamResp(
1179
+ // { ...resp, processing: true }, { ...options, noPack: true }
1180
+ // );
1181
+ return { ...resp, processing: false };
1182
+ };
1183
+
1184
+ const promptOpenAI = async (aiId, prompt, options = {}) => {
1185
+ let { provider, client, toFile, model } = await getAi(aiId);
1186
+ const target_model = options?.model || model.name;
1187
+ const M = MODELS[target_model];
1188
+ prompt = ensureString(prompt, { trim: true });
1189
+ if (M?.audio) {
1190
+ assert(prompt.length, 'Prompt is required.');
1191
+ const ins_prompt = options?.prompt || `${TTS_PROMPT}.`;
1192
+ assert(await countTokens(
1193
+ JSON.stringify([ins_prompt, prompt]), { fast: true }
1194
+ ) <= M.maxInputTokens,
1195
+ `Prompt must be less than ${M.maxInputTokens} tokens.`, 400
1196
+ );
1197
+ // https://platform.openai.com/docs/api-reference/audio/createSpeech
1198
+ var resp = await client.audio.speech.create({
1199
+ model: M.name, voice: DEFAULT_MODELS[OPENAI_VOICE],
1200
+ instructions: ins_prompt, response_format: 'opus',
1201
+ input: prompt, ...options?.params || {},
1202
+ });
1203
+ if (!options?.raw) {
1204
+ resp = {
1205
+ text: '', audio: {
1206
+ data: await convert(Buffer.from(
1207
+ await resp.arrayBuffer()
1208
+ ), { suffix: OGG_EXT, ...options || {} }),
1209
+ mimeType: MIME_OGG,
1210
+ }, model: packModelLabel([provider, M.source, M.name]),
1211
+ };
1212
+ }
1213
+ } else if (M?.hearing) {
1214
+ const audio = options?.attachments?.[0]?.data || options?.attachments?.[0];
1215
+ assert(audio, 'Audio attachment is required.');
1216
+ const input = ensureString(options?.input, { case: 'UP' });
1217
+ const { content, cleanup } = await convert(audio, {
1218
+ input: options?.input, ...options || {}, expected: STREAM, INVALID_AUDIO,
1219
+ suffix: ['', BUFFER].includes(input) ? OGG_EXT : null,
1220
+ withCleanupFunc: true,
1221
+ });
1222
+ var resp = await client.audio.transcriptions.create({
1223
+ file: await toFile(content), model: M.name,
1224
+ response_format: 'text', ...options?.params || {},
1225
+ });
1226
+ await cleanup();
1227
+ if (!options?.raw) {
1228
+ resp = {
1229
+ text: resp.trim(),
1230
+ model: packModelLabel([provider, M.source, M.name]),
1231
+ };
1232
+ }
1233
+ } else {
1234
+ throwError('Unsupported model.');
1235
+ }
1236
+ // await streamResp(
1237
+ // { ...resp, processing: true }, { ...options, noPack: true }
1238
+ // );
1112
1239
  return { ...resp, processing: false };
1113
1240
  };
1114
1241
 
@@ -1223,7 +1350,6 @@ const distillFile = async (attachments, o) => {
1223
1350
  const buf = await convert(attachments[i], { expected: BUFFER, ...o || {} });
1224
1351
  return {
1225
1352
  url: await convert(buf, { input: BUFFER, expected: DATAURL, ...o || {} }),
1226
- data: base64Encode(buf, true),
1227
1353
  mime_type: extract(await fileTypeFromBuffer(buf), 'mime') || MIME_BINARY,
1228
1354
  };
1229
1355
  })();
@@ -1304,7 +1430,9 @@ export {
1304
1430
  DEFAULT_MODELS,
1305
1431
  FEATURE_ICONS,
1306
1432
  FUNCTION,
1433
+ GEMINI_25_FLASH_TTS,
1307
1434
  GEMINI_25_FLASH,
1435
+ GEMINI_25_PRO_TTS,
1308
1436
  GEMINI_30_PRO_IMAGE,
1309
1437
  GPT_5_IMAGE,
1310
1438
  GPT_51,
@@ -1326,7 +1454,7 @@ export {
1326
1454
  k,
1327
1455
  listOpenAIModels,
1328
1456
  prompt,
1329
- promptOpenAI,
1457
+ promptOpenRouter,
1330
1458
  resetSession,
1331
1459
  talk,
1332
1460
  trimPrompt,