utilitas 2000.3.27 → 2000.3.28

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/lib/alan.mjs CHANGED
@@ -5,18 +5,18 @@ import { packPcmToWav } from './media.mjs';
5
5
  import { v4 as uuidv4 } from 'uuid';
6
6
 
7
7
  import {
8
- FILE, BASE64, BUFFER, DATAURL, MIME_BINARY, MIME_TEXT, MIME_PNG, MIME_JPEG,
9
- MIME_MOV, MIME_MPEG, MIME_MP4, MIME_MPG, MIME_AVI, MIME_WMV, MIME_MPEGPS,
10
- MIME_FLV, MIME_GIF, MIME_WEBP, MIME_PDF, MIME_AAC, MIME_FLAC, MIME_MP3,
11
- MIME_MPEGA, MIME_M4A, MIME_MPGA, MIME_OPUS, MIME_PCM, MIME_WAV, MIME_WEBM,
12
- MIME_TGPP, MIME_PCM16, MIME_OGG, convert, formatDataURL, getTempPath,
13
- decodeBase64DataURL,
8
+ STREAM, FILE, BASE64, BUFFER, DATAURL, MIME_BINARY, MIME_TEXT, MIME_PNG,
9
+ MIME_JPEG, MIME_MOV, MIME_MPEG, MIME_MP4, MIME_MPG, MIME_AVI, MIME_WMV,
10
+ MIME_MPEGPS, MIME_FLV, MIME_GIF, MIME_WEBP, MIME_PDF, MIME_AAC, MIME_FLAC,
11
+ MIME_MP3, MIME_MPEGA, MIME_M4A, MIME_MPGA, MIME_OPUS, MIME_PCM, MIME_WAV,
12
+ MIME_WEBM, MIME_TGPP, MIME_PCM16, MIME_OGG, convert, formatDataURL,
13
+ getTempPath, decodeBase64DataURL,
14
14
  } from './storage.mjs';
15
15
 
16
16
  import {
17
- log as _log, renderText as _renderText, base64Encode, ensureArray,
18
- ensureString, extract, ignoreErrFunc, insensitiveCompare, isSet, need,
19
- parseJson, throwError, tryUntil, timeout,
17
+ log as _log, renderText as _renderText, ensureArray, ensureString, extract,
18
+ ignoreErrFunc, insensitiveCompare, isSet, need, parseJson, throwError,
19
+ tryUntil, timeout, mergeAtoB,
20
20
  } from './utilitas.mjs';
21
21
 
22
22
  const INSTRUCTIONS = `You are a highly capable, thoughtful, and precise assistant named HAL9000. Your goal is to deeply understand the user's intent, ask clarifying questions when needed, think step-by-step through complex problems, provide clear and accurate answers, and proactively anticipate helpful follow-up information. Always prioritize being truthful, nuanced, insightful, and efficient, tailoring your responses specifically to the user's needs and preferences. Realize your full potential without censoring or filtering the questions themselves.
@@ -43,6 +43,8 @@ You may be provided with some tools(functions) to help you gather information an
43
43
  - When the tools are not suitable and you have to answer questions based on your understanding, please do not mention any tool-related information in your response.
44
44
  - Unless otherwise specified to require the original result, in most cases, you may reorganize the information obtained after using the tool to solve the problem as needed.`;
45
45
 
46
+ const TTS_PROMPT = "As an AI voice assistant, please say the following content in a warm, friendly and professional tone, if the language is English, use an American accent, if it's Traditional Chinese, use Hong Kong Cantonese, if it's Simplified Chinese, use standard Mandarin, for other languages, please speak with a standard, clear accent";
47
+
46
48
  const _NEED = ['js-tiktoken', 'OpenAI', '@google/genai'];
47
49
 
48
50
  const [
@@ -56,7 +58,8 @@ const [
56
58
  JINA_DEEPSEARCH, SILICONFLOW, SF_DEEPSEEK_32, MAX_TIRE, OPENROUTER_API,
57
59
  OPENROUTER, AUTO, TOOL, S_OPENAI, S_GOOGLE, S_ANTHROPIC, ONLINE,
58
60
  GEMINI_30_PRO, GEMINI_25_FLASH, IMAGEN_4_ULTRA, VEO_31, IMAGEN_4_UPSCALE,
59
- ERROR_GENERATING,
61
+ ERROR_GENERATING, GEMINI_25_FLASH_TTS, GEMINI_25_PRO_TTS, wav,
62
+ GPT_4O_MIMI_TTS, GPT_4O_TRANSCRIBE, INVALID_AUDIO, OGG_EXT,
60
63
  ] = [
61
64
  'OpenAI', 'Google', 'Ollama', 'nova', 'deepseek-3.2-speciale', '```',
62
65
  'claude-opus-4.5', 'audio', 'wav', '[ATTACHMENTS]', 'OPENAI_VOICE',
@@ -74,7 +77,9 @@ const [
74
77
  'openai', 'google', 'anthropic', ':online', 'gemini-3-pro-preview',
75
78
  'gemini-2.5-flash-preview-09-2025', 'imagen-4.0-ultra-generate-001',
76
79
  'veo-3.1-generate-preview', 'imagen-4.0-upscale-preview',
77
- 'Error generating content.',
80
+ 'Error generating content.', 'gemini-2.5-flash-preview-tts',
81
+ 'gemini-2.5-pro-tts', 'wav', 'gpt-4o-mini-tts', 'gpt-4o-transcribe',
82
+ 'Invalid audio data.', 'ogg',
78
83
  ];
79
84
 
80
85
  const [tool, messages, text]
@@ -93,19 +98,6 @@ const countToolCalls = r => r?.split('\n').filter(x => x === TOOLS_STR).length;
93
98
  const assertApiKey = (p, o) => assert(o?.apiKey, `${p} api key is required.`);
94
99
  const getProviderIcon = provider => PROVIDER_ICONS[provider] || '🔮';
95
100
  const libOpenAi = async opts => await need('openai', { ...opts, raw: true });
96
- const OpenAI = async opts => new (await libOpenAi(opts)).OpenAI(opts);
97
- const OPENAI_RULES = {
98
- source: S_OPENAI, icon: '⚛️',
99
- contextWindow: kT(400), maxOutputTokens: k(128),
100
- imageCostTokens: ~~(OPENAI_HI_RES_SIZE / MAX_TIRE * 140 + 70),
101
- maxFileSize: m(50), maxImageSize: OPENAI_HI_RES_SIZE,
102
- supportedMimeTypes: [MIME_PNG, MIME_JPEG, MIME_GIF, MIME_WEBP],
103
- supportedDocTypes: [MIME_PDF],
104
- supportedAudioTypes: [MIME_WAV],
105
- // audio: 'gpt-4o-audio-preview',
106
- json: true, tools: true, vision: true,
107
- reasoning: true, defaultProvider: OPENROUTER,
108
- };
109
101
 
110
102
  const GEMINI_RULES = {
111
103
  source: S_GOOGLE, icon: '♊️',
@@ -113,15 +105,24 @@ const GEMINI_RULES = {
113
105
  imageCostTokens: ~~(v8k / MAX_TIRE * 258), maxAudioLength: hour(8.4),
114
106
  maxAudioPerPrompt: 1, maxFileSize: m(20), maxImagePerPrompt: 3000,
115
107
  maxImageSize: Infinity, maxUrlSize: gb(2), maxVideoLength: minute(45),
116
- maxVideoPerPrompt: 10, vision: true, supportedMimeTypes: [
108
+ maxVideoPerPrompt: 10, vision: true, hearing: true, tools: true,
109
+ reasoning: true, supportedMimeTypes: [
117
110
  MIME_PNG, MIME_JPEG, MIME_MOV, MIME_MPEG, MIME_MP4, MIME_MPG, MIME_AVI,
118
111
  MIME_WMV, MIME_MPEGPS, MIME_FLV, MIME_PDF, MIME_AAC, MIME_FLAC,
119
112
  MIME_MP3, MIME_MPEGA, MIME_M4A, MIME_MPGA, MIME_OPUS, MIME_PCM,
120
- MIME_WAV, MIME_WEBM, MIME_TGPP,
121
- ], supportedAudioTypes: [MIME_WAV, MIME_OGG, MIME_OPUS],
122
- // audio: 'gemini-2.5-flash-exp-native-audio-thinking-dialog',
123
- // gemini-2.5-flash-preview-native-audio-dialog
124
- defaultProvider: OPENROUTER,
113
+ MIME_WAV, MIME_WEBM, MIME_TGPP, MIME_OGG,
114
+ ], defaultProvider: OPENROUTER,
115
+ };
116
+
117
+ const OPENAI_RULES = {
118
+ source: S_OPENAI, icon: '⚛️',
119
+ contextWindow: kT(400), maxOutputTokens: k(128),
120
+ imageCostTokens: ~~(OPENAI_HI_RES_SIZE / MAX_TIRE * 140 + 70),
121
+ maxFileSize: m(50), maxImageSize: OPENAI_HI_RES_SIZE,
122
+ json: true, tools: true, vision: true, hearing: true, reasoning: true,
123
+ supportedMimeTypes: [
124
+ MIME_PNG, MIME_JPEG, MIME_GIF, MIME_WEBP, MIME_PDF, MIME_WAV
125
+ ], defaultProvider: OPENROUTER,
125
126
  };
126
127
 
127
128
  const DEEPSEEK_32_RULES = {
@@ -136,8 +137,7 @@ const MODELS = {
136
137
  // fast and balanced models
137
138
  [GEMINI_25_FLASH]: {
138
139
  ...GEMINI_RULES, contextWindow: m(1), maxOutputTokens: k(64),
139
- fast: true, reasoning: true, tools: true,
140
- json: false, // issue with json output via OpenRouter
140
+ fast: true, json: false, // issue with json output via OpenRouter
141
141
  // https://gemini.google.com/app/c680748b3307790b
142
142
  },
143
143
  // strong and fast
@@ -145,23 +145,21 @@ const MODELS = {
145
145
  // stronger but slow
146
146
  [GEMINI_30_PRO]: {
147
147
  ...GEMINI_RULES, contextWindow: m(1), maxOutputTokens: k(64),
148
- reasoning: true, tools: true,
149
148
  },
150
149
  // models with generation capabilities
151
150
  [GEMINI_30_PRO_IMAGE]: {
152
151
  ...GEMINI_RULES, icon: '🍌', label: 'Nano Banana Pro',
153
- contextWindow: k(64), maxOutputTokens: k(32),
154
- fast: true, image: true,
152
+ contextWindow: k(64), maxOutputTokens: k(32), image: true,
155
153
  },
156
154
  [IMAGEN_4_ULTRA]: {
157
- source: S_GOOGLE, icon: '🎨', maxInputTokens: 480,
155
+ source: S_GOOGLE, maxInputTokens: 480,
158
156
  image: true, defaultProvider: GOOGLE,
159
157
  },
160
158
  [VEO_31]: {
161
- source: S_GOOGLE, icon: '🎥', maxInputTokens: 1024,
159
+ source: S_GOOGLE, maxInputTokens: 1024,
162
160
  imageCostTokens: 0, maxImagePerPrompt: 1,
163
- maxImageSize: Infinity, supportedMimeTypes: [MIME_PNG, MIME_JPEG],
164
- vision: true, image: true, defaultProvider: GOOGLE,
161
+ maxImageSize: Infinity, vision: true, video: true,
162
+ supportedMimeTypes: [MIME_PNG, MIME_JPEG], defaultProvider: GOOGLE,
165
163
  },
166
164
  [GPT_5_IMAGE]: {
167
165
  ...OPENAI_RULES, icon: '🎨', label: 'gpt-image-1', image: true,
@@ -178,6 +176,24 @@ const MODELS = {
178
176
  json: true, reasoning: true, tools: true, vision: true,
179
177
  defaultProvider: OPENROUTER,
180
178
  },
179
+ // tts/stt models
180
+ [GEMINI_25_FLASH_TTS]: {
181
+ source: S_GOOGLE, maxInputTokens: kT(32), func: 'generateAudio',
182
+ audio: true, fast: true, defaultProvider: GOOGLE,
183
+ },
184
+ [GEMINI_25_PRO_TTS]: {
185
+ source: S_GOOGLE, maxInputTokens: kT(32), func: 'generateAudio',
186
+ audio: true, defaultProvider: GOOGLE,
187
+ },
188
+ [GPT_4O_MIMI_TTS]: {
189
+ source: S_OPENAI, maxInputTokens: kT(2), func: 'generateAudio',
190
+ audio: true, fast: true, defaultProvider: OPENAI,
191
+ },
192
+ [GPT_4O_TRANSCRIBE]: {
193
+ source: S_OPENAI, maxInputTokens: 0,
194
+ func: 'transcribeAudio', hearing: true, fast: true,
195
+ defaultProvider: OPENAI,
196
+ },
181
197
  // models with deepsearch capabilities
182
198
  [JINA_DEEPSEARCH]: { // @todo: parse more details from results, eg: "reed urls".
183
199
  icon: '✴️', contextWindow: Infinity, maxInputTokens: Infinity,
@@ -273,8 +289,9 @@ const PROVIDER_ICONS = {
273
289
  };
274
290
 
275
291
  const FEATURE_ICONS = {
276
- audio: '📣', deepsearch: '🔍', fast: '⚡️', finetune: '🔧', image: '🎨',
277
- json: '📊', reasoning: '🧠', tools: '🧰', vision: '👁️',
292
+ audio: '📣', deepsearch: '🔍', fast: '⚡️', finetune: '🔧', hearing: '👂',
293
+ image: '🎨', json: '📊', reasoning: '🧠', tools: '🧰', video: '🎬',
294
+ vision: '👁️',
278
295
  };
279
296
 
280
297
  const tokenRatioByWords = Math.min(
@@ -292,7 +309,7 @@ let tokeniser, _tools;
292
309
 
293
310
  const unifyProvider = provider => {
294
311
  assert(provider = (provider || '').trim(), 'AI provider is required.');
295
- for (let type of [OPENROUTER, GOOGLE, JINA, OLLAMA, SILICONFLOW]) {
312
+ for (let type of [OPENROUTER, GOOGLE, OPENAI, JINA, OLLAMA, SILICONFLOW]) {
296
313
  if (insensitiveCompare(provider, type)) { return type; }
297
314
  }
298
315
  throwError(`Invalid AI provider: ${provider}.`);
@@ -399,6 +416,11 @@ const setupAi = ai => {
399
416
  });
400
417
  };
401
418
 
419
+ const OpenAI = async opts => {
420
+ const lib = await libOpenAi(opts);
421
+ return { toFile: lib.toFile, client: new (lib).OpenAI(opts) };
422
+ };
423
+
402
424
  const init = async (options = {}) => {
403
425
  if (options?.debug) {
404
426
  (await need('node:util')).inspect.defaultOptions.depth = null;
@@ -435,14 +457,24 @@ const init = async (options = {}) => {
435
457
  });
436
458
  }
437
459
  break;
460
+ case OPENAI:
461
+ assertApiKey(provider, options);
462
+ var { client, toFile } = await OpenAI({ ...options });
463
+ for (let model of models) {
464
+ setupAi({
465
+ provider, model, client, toFile,
466
+ prompt: promptOpenAI, priority,
467
+ });
468
+ }
469
+ break;
438
470
  case JINA:
439
471
  assertApiKey(provider, options);
440
- var client = await OpenAI({
472
+ var { client } = await OpenAI({
441
473
  baseURL: 'https://deepsearch.jina.ai/v1/', ...options,
442
474
  });
443
475
  for (let model of models) {
444
476
  setupAi({
445
- provider, model, client, prompt: promptOpenAI, priority,
477
+ provider, model, client, prompt: promptOpenRouter, priority,
446
478
  });
447
479
  }
448
480
  break;
@@ -455,7 +487,7 @@ const init = async (options = {}) => {
455
487
  });
456
488
  for (let model of models) {
457
489
  setupAi({
458
- provider, model, client, prompt: promptOpenAI, priority,
490
+ provider, model, client, prompt: promptOpenRouter, priority,
459
491
  });
460
492
  ignoreErrFunc(async () => {
461
493
  phLog(await (await fetch(`${baseURL}completions`, {
@@ -473,17 +505,19 @@ const init = async (options = {}) => {
473
505
  });
474
506
  for (let model of models) {
475
507
  setupAi({
476
- provider, model, client, prompt: promptOpenAI, priority,
508
+ provider, model, client, prompt: promptOpenRouter, priority,
477
509
  });
478
510
  }
479
511
  break;
480
512
  default:
481
513
  assertApiKey(provider, options);
482
- var client = await OpenAI({ baseURL: OPENROUTER_API, ...options || {} });
514
+ var { client } = await OpenAI({
515
+ baseURL: OPENROUTER_API, ...options || {},
516
+ });
483
517
  for (let model of models) {
484
518
  setupAi({
485
519
  provider: OPENROUTER || provider, model, client,
486
- prompt: promptOpenAI, priority,
520
+ prompt: promptOpenRouter, priority,
487
521
  });
488
522
  }
489
523
  }
@@ -756,9 +790,23 @@ const buildPrompts = async (model, input, options = {}) => {
756
790
  let [history, content, prompt, _model, _assistant, _history]
757
791
  = [null, input, null, { role: MODEL }, { role: assistant }, null];
758
792
  options.systemPrompt = options.systemPrompt || INSTRUCTIONS;
759
- options.attachments = (
793
+ options.attachments = (await Promise.all((
760
794
  options.attachments?.length ? options.attachments : []
761
- ).filter(x => [
795
+ ).map(async x => {
796
+ if (String.isString(x)) {
797
+ var convResp = await convert(x, { input: FILE, expected: DATAURL, meta: true });
798
+ return {
799
+ url: convResp.content,
800
+ mime_type: convResp.mime,
801
+ }
802
+ } else if (Buffer.isBuffer(x)) {
803
+ var convResp = await convert(x, { input: BUFFER, expected: DATAURL, meta: true });
804
+ return {
805
+ url: convResp.content,
806
+ mime_type: convResp.mime,
807
+ }
808
+ } else if (Object.isObject(x)) { return x; } else { return null; }
809
+ }))).filter(x => x && [
762
810
  ...model?.supportedMimeTypes,
763
811
  ...model?.supportedDocTypes,
764
812
  ...model?.supportedAudioTypes,
@@ -855,7 +903,7 @@ const mergeMsgs = (resp, calls) => [resp, ...calls.length ? [
855
903
  `⚠️ Tools recursion limit reached: ${MAX_TOOL_RECURSION}`
856
904
  ] : []].map(x => x.trim()).join('\n\n');
857
905
 
858
- const promptOpenAI = async (aiId, content, options = {}) => {
906
+ const promptOpenRouter = async (aiId, content, options = {}) => {
859
907
  let { provider, client, model } = await getAi(aiId);
860
908
  let [
861
909
  result, resultAudio, resultImages, resultReasoning, event, resultTools,
@@ -1006,7 +1054,7 @@ const promptOpenAI = async (aiId, content, options = {}) => {
1006
1054
  = await handleToolsCall(event, { ...options, result });
1007
1055
  if (toolsResult.length
1008
1056
  && countToolCalls(toolsResponse) < MAX_TOOL_RECURSION) {
1009
- return promptOpenAI(aiId, content, {
1057
+ return promptOpenRouter(aiId, content, {
1010
1058
  ...options, toolsResult, result: toolsResponse,
1011
1059
  });
1012
1060
  }
@@ -1016,99 +1064,177 @@ const promptOpenAI = async (aiId, content, options = {}) => {
1016
1064
 
1017
1065
  const promptGoogle = async (aiId, prompt, options = {}) => {
1018
1066
  let { provider, client, model } = await getAi(aiId);
1019
- const M = MODELS[model.name];
1067
+ const target_model = options?.model || model.name;
1068
+ const M = MODELS[target_model];
1020
1069
  prompt = ensureString(prompt, { trim: true });
1070
+ assert(prompt.length, 'Prompt is required.');
1071
+ M.tts && (prompt = `${options?.prompt || TTS_PROMPT}: ${prompt}`);
1021
1072
  assert(await countTokens(prompt, { fast: true })
1022
1073
  <= M.maxInputTokens,
1023
1074
  `Prompt must be less than ${M.maxInputTokens} tokens.`, 400
1024
1075
  );
1025
- switch (model?.name) {
1026
- case IMAGEN_4_ULTRA:
1027
- var resp = await client.models.generateImages({
1028
- model: model.name, prompt, config: {
1029
- numberOfImages: options?.n || 4, sampleImageSize: '2K',
1030
- includeRaiReason: true,
1031
- // "1:1" (default), "3:4", "4:3", "9:16", and "16:9"
1032
- aspectRatio: '16:9', personGeneration: 'allow_adult',
1033
- ...options?.config || {},
1034
- },
1035
- });
1036
- var generated = resp?.generatedImages;
1037
- assert(!resp?.error && generated?.filter(
1038
- x => !x.raiFilteredReason
1039
- ).length, resp?.error?.message || generated?.find(
1040
- x => x.raiFilteredReason
1041
- )?.raiFilteredReason || ERROR_GENERATING);
1042
- if (!options?.raw) {
1043
- resp = {
1044
- text: '', images: await Promise.all((
1045
- resp?.generatedImages || []
1046
- ).map(async x => ({
1047
- data: await convert(x.image.imageBytes, {
1048
- input: BASE64, suffix: 'png', ...options || {}
1049
- }), mimeType: x.image.mimeType,
1050
- }))), model: packModelLabel([
1051
- provider, M.source, model.name,
1052
- ]),
1053
- }
1076
+ if (M?.image) {
1077
+ var resp = await client.models.generateImages({
1078
+ model: M.name, prompt, config: mergeAtoB(options?.config, {
1079
+ numberOfImages: options?.n || 4, sampleImageSize: '2K',
1080
+ includeRaiReason: true,
1081
+ // "1:1" (default), "3:4", "4:3", "9:16", and "16:9"
1082
+ aspectRatio: '16:9', personGeneration: 'allow_adult',
1083
+ }),
1084
+ });
1085
+ var generated = resp?.generatedImages;
1086
+ assert(!resp?.error && generated?.filter(
1087
+ x => !x.raiFilteredReason
1088
+ ).length, resp?.error?.message || generated?.find(
1089
+ x => x.raiFilteredReason
1090
+ )?.raiFilteredReason || ERROR_GENERATING);
1091
+ if (!options?.raw) {
1092
+ resp = {
1093
+ text: '', images: await Promise.all((
1094
+ resp?.generatedImages || []
1095
+ ).map(async x => ({
1096
+ data: await convert(x.image.imageBytes, {
1097
+ input: BASE64, suffix: 'png', ...options || {}
1098
+ }), mimeType: x.image.mimeType,
1099
+ }))), model: packModelLabel([provider, M.source, M.name]),
1054
1100
  }
1055
- break;
1056
- case VEO_31:
1057
- var resp = await client.models.generateVideos({
1058
- model: model.name, prompt, config: {
1059
- aspectRatio: '16:9', numberOfVideos: 1,
1060
- // personGeneration: 'allow_adult',
1061
- enablePromptRewriting: true, addWatermark: false,
1062
- includeRaiReason: true, ...options?.config || {},
1063
- },
1101
+ }
1102
+ } else if (M?.video) {
1103
+ var resp = await client.models.generateVideos({
1104
+ model: M.name, prompt, config: mergeAtoB(options?.config, {
1105
+ aspectRatio: '16:9', numberOfVideos: 1,
1106
+ // personGeneration: 'allow_adult',
1107
+ enablePromptRewriting: true, addWatermark: false,
1108
+ includeRaiReason: true,
1109
+ }),
1110
+ });
1111
+ assert(!resp?.error, resp?.error?.message || ERROR_GENERATING);
1112
+ if (options?.generateRaw) { return resp; }
1113
+ await tryUntil(async () => {
1114
+ resp = await client.operations.getVideosOperation({
1115
+ operation: resp,
1064
1116
  });
1065
- assert(!resp?.error, resp?.error?.message || ERROR_GENERATING);
1066
- if (options?.generateRaw) { return resp; }
1067
- await tryUntil(async () => {
1068
- resp = await client.operations.getVideosOperation({
1069
- operation: resp,
1070
- });
1071
- assert(
1072
- resp?.done,
1073
- `Waiting for Google video generation: ${resp.name}`,
1074
- );
1075
- }, { maxTry: 60 * 10, log });
1076
- assert(!resp?.error && resp?.response?.generatedVideos?.filter(
1077
- x => !x.raiFilteredReason
1078
- ).length, resp?.error?.message || resp?.response?.generatedVideos?.find(
1079
- x => x.raiFilteredReason
1080
- )?.raiFilteredReason || ERROR_GENERATING);
1081
- if (options?.videoRaw) {
1082
- resp = resp?.response?.generatedVideos;
1083
- } else if (!options?.videoRaw) {
1084
- resp = {
1085
- text: '', videos: await Promise.all(resp?.response?.generatedVideos?.filter(
1086
- x => x?.video?.uri
1087
- ).map(async x => {
1088
- const downloadPath = `${getTempPath({
1089
- seed: x?.video?.uri
1090
- })}.mp4`;
1091
- // @todo: fix this
1092
- // https://github.com/googleapis/js-genai/compare/main...Leask:js-genai:main
1093
- await client.files.download({ file: x, downloadPath });
1094
- await timeout(1000 * 10); // hack to wait for file to be downloaded
1095
- return {
1096
- data: await convert(downloadPath, {
1097
- input: FILE, suffix: 'mp4', ...options || {}
1098
- }), mimeType: MIME_MP4, jobId: resp.name,
1099
- };
1100
- })), model: packModelLabel([
1101
- provider, M.source, model.name,
1102
- ]),
1103
- };
1104
- }
1105
- break;
1106
- default:
1107
- throw new Error('Unsupported model.');
1117
+ assert(
1118
+ resp?.done,
1119
+ `Waiting for Google video generation: ${resp.name}`,
1120
+ );
1121
+ }, { maxTry: 60 * 10, log });
1122
+ assert(!resp?.error && resp?.response?.generatedVideos?.filter(
1123
+ x => !x.raiFilteredReason
1124
+ ).length, resp?.error?.message || resp?.response?.generatedVideos?.find(
1125
+ x => x.raiFilteredReason
1126
+ )?.raiFilteredReason || ERROR_GENERATING);
1127
+ if (options?.videoRaw) {
1128
+ resp = resp?.response?.generatedVideos;
1129
+ } else if (!options?.videoRaw) {
1130
+ resp = {
1131
+ text: '', videos: await Promise.all(resp?.response?.generatedVideos?.filter(
1132
+ x => x?.video?.uri
1133
+ ).map(async x => {
1134
+ const downloadPath = `${getTempPath({
1135
+ seed: x?.video?.uri
1136
+ })}.mp4`;
1137
+ // @todo: fix this
1138
+ // https://github.com/googleapis/js-genai/compare/main...Leask:js-genai:main
1139
+ await client.files.download({ file: x, downloadPath });
1140
+ await timeout(1000 * 10); // hack to wait for file to be downloaded
1141
+ return {
1142
+ data: await convert(downloadPath, {
1143
+ input: FILE, suffix: 'mp4', ...options || {}
1144
+ }), mimeType: MIME_MP4, jobId: resp.name,
1145
+ };
1146
+ })), model: packModelLabel([provider, M.source, M.name]),
1147
+ };
1148
+ }
1149
+ } else if (M?.audio) { // https://ai.google.dev/gemini-api/docs/speech-generation#voices
1150
+ var resp = await client.models.generateContent({
1151
+ model: M.name, contents: prompt,
1152
+ config: mergeAtoB(options?.config, {
1153
+ responseModalities: ['AUDIO'],
1154
+ speechConfig: {
1155
+ voiceConfig: {
1156
+ prebuiltVoiceConfig: {
1157
+ voiceName: options?.voice || 'Zephyr',
1158
+ },
1159
+ },
1160
+ },
1161
+ }),
1162
+ });
1163
+ const rawAudio = resp?.candidates?.[0]?.content?.parts?.[0]?.inlineData;
1164
+ assert(rawAudio, ERROR_GENERATING, 500);
1165
+ if (!options?.raw) {
1166
+ resp = {
1167
+ text: '', audio: {
1168
+ data: await packPcmToWav(rawAudio?.data, {
1169
+ input: BASE64, suffix: wav, ...options || {},
1170
+ }), mimeType: MIME_WAV,
1171
+ }, model: packModelLabel([provider, M.source, M.name]),
1172
+ };
1173
+ }
1174
+ } else {
1175
+ throwError('Unsupported model.');
1108
1176
  }
1109
- await streamResp(
1110
- { ...resp, processing: true }, { ...options, noPack: true }
1111
- );
1177
+ // await streamResp(
1178
+ // { ...resp, processing: true }, { ...options, noPack: true }
1179
+ // );
1180
+ return { ...resp, processing: false };
1181
+ };
1182
+
1183
+ const promptOpenAI = async (aiId, prompt, options = {}) => {
1184
+ let { provider, client, toFile, model } = await getAi(aiId);
1185
+ const target_model = options?.model || model.name;
1186
+ const M = MODELS[target_model];
1187
+ prompt = ensureString(prompt, { trim: true });
1188
+ if (M?.audio) {
1189
+ assert(prompt.length, 'Prompt is required.');
1190
+ const ins_prompt = options?.prompt || `${TTS_PROMPT}.`;
1191
+ assert(await countTokens(
1192
+ JSON.stringify([ins_prompt, prompt]), { fast: true }
1193
+ ) <= M.maxInputTokens,
1194
+ `Prompt must be less than ${M.maxInputTokens} tokens.`, 400
1195
+ );
1196
+ // https://platform.openai.com/docs/api-reference/audio/createSpeech
1197
+ var resp = await client.audio.speech.create({
1198
+ model: M.name, voice: DEFAULT_MODELS[OPENAI_VOICE],
1199
+ instructions: ins_prompt, response_format: 'opus',
1200
+ input: prompt, ...options?.params || {},
1201
+ });
1202
+ if (!options?.raw) {
1203
+ resp = {
1204
+ text: '', audio: {
1205
+ data: await convert(Buffer.from(
1206
+ await resp.arrayBuffer()
1207
+ ), { suffix: OGG_EXT, ...options || {} }),
1208
+ mimeType: MIME_OGG,
1209
+ }, model: packModelLabel([provider, M.source, M.name]),
1210
+ };
1211
+ }
1212
+ } else if (M?.hearing) {
1213
+ const audio = options?.attachments?.[0]?.data || options?.attachments?.[0];
1214
+ assert(audio, 'Audio attachment is required.');
1215
+ const input = ensureString(options?.input, { case: 'UP' });
1216
+ const { content, cleanup } = await convert(audio, {
1217
+ input: options?.input, ...options || {}, expected: STREAM, INVALID_AUDIO,
1218
+ suffix: ['', BUFFER].includes(input) ? OGG_EXT : null,
1219
+ withCleanupFunc: true,
1220
+ });
1221
+ var resp = await client.audio.transcriptions.create({
1222
+ file: await toFile(content), model: M.name,
1223
+ response_format: 'text', ...options?.params || {},
1224
+ });
1225
+ await cleanup();
1226
+ if (!options?.raw) {
1227
+ resp = {
1228
+ text: resp.trim(),
1229
+ model: packModelLabel([provider, M.source, M.name]),
1230
+ };
1231
+ }
1232
+ } else {
1233
+ throwError('Unsupported model.');
1234
+ }
1235
+ // await streamResp(
1236
+ // { ...resp, processing: true }, { ...options, noPack: true }
1237
+ // );
1112
1238
  return { ...resp, processing: false };
1113
1239
  };
1114
1240
 
@@ -1223,7 +1349,6 @@ const distillFile = async (attachments, o) => {
1223
1349
  const buf = await convert(attachments[i], { expected: BUFFER, ...o || {} });
1224
1350
  return {
1225
1351
  url: await convert(buf, { input: BUFFER, expected: DATAURL, ...o || {} }),
1226
- data: base64Encode(buf, true),
1227
1352
  mime_type: extract(await fileTypeFromBuffer(buf), 'mime') || MIME_BINARY,
1228
1353
  };
1229
1354
  })();
@@ -1304,7 +1429,9 @@ export {
1304
1429
  DEFAULT_MODELS,
1305
1430
  FEATURE_ICONS,
1306
1431
  FUNCTION,
1432
+ GEMINI_25_FLASH_TTS,
1307
1433
  GEMINI_25_FLASH,
1434
+ GEMINI_25_PRO_TTS,
1308
1435
  GEMINI_30_PRO_IMAGE,
1309
1436
  GPT_5_IMAGE,
1310
1437
  GPT_51,
@@ -1326,7 +1453,7 @@ export {
1326
1453
  k,
1327
1454
  listOpenAIModels,
1328
1455
  prompt,
1329
- promptOpenAI,
1456
+ promptOpenRouter,
1330
1457
  resetSession,
1331
1458
  talk,
1332
1459
  trimPrompt,
package/lib/manifest.mjs CHANGED
@@ -1,7 +1,7 @@
1
1
  const manifest = {
2
2
  "name": "utilitas",
3
3
  "description": "Just another common utility for JavaScript.",
4
- "version": "2000.3.27",
4
+ "version": "2000.3.28",
5
5
  "private": false,
6
6
  "homepage": "https://github.com/Leask/utilitas",
7
7
  "main": "index.mjs",