utilitas 2000.3.27 → 2000.3.28
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +7 -11
- package/dist/utilitas.lite.mjs +1 -1
- package/dist/utilitas.lite.mjs.map +1 -1
- package/lib/alan.mjs +266 -139
- package/lib/manifest.mjs +1 -1
- package/lib/speech.mjs +15 -139
- package/lib/storage.mjs +6 -4
- package/package.json +1 -1
package/lib/alan.mjs
CHANGED
|
@@ -5,18 +5,18 @@ import { packPcmToWav } from './media.mjs';
|
|
|
5
5
|
import { v4 as uuidv4 } from 'uuid';
|
|
6
6
|
|
|
7
7
|
import {
|
|
8
|
-
FILE, BASE64, BUFFER, DATAURL, MIME_BINARY, MIME_TEXT, MIME_PNG,
|
|
9
|
-
MIME_MOV, MIME_MPEG, MIME_MP4, MIME_MPG, MIME_AVI, MIME_WMV,
|
|
10
|
-
MIME_FLV, MIME_GIF, MIME_WEBP, MIME_PDF, MIME_AAC, MIME_FLAC,
|
|
11
|
-
MIME_MPEGA, MIME_M4A, MIME_MPGA, MIME_OPUS, MIME_PCM, MIME_WAV,
|
|
12
|
-
MIME_TGPP, MIME_PCM16, MIME_OGG, convert, formatDataURL,
|
|
13
|
-
decodeBase64DataURL,
|
|
8
|
+
STREAM, FILE, BASE64, BUFFER, DATAURL, MIME_BINARY, MIME_TEXT, MIME_PNG,
|
|
9
|
+
MIME_JPEG, MIME_MOV, MIME_MPEG, MIME_MP4, MIME_MPG, MIME_AVI, MIME_WMV,
|
|
10
|
+
MIME_MPEGPS, MIME_FLV, MIME_GIF, MIME_WEBP, MIME_PDF, MIME_AAC, MIME_FLAC,
|
|
11
|
+
MIME_MP3, MIME_MPEGA, MIME_M4A, MIME_MPGA, MIME_OPUS, MIME_PCM, MIME_WAV,
|
|
12
|
+
MIME_WEBM, MIME_TGPP, MIME_PCM16, MIME_OGG, convert, formatDataURL,
|
|
13
|
+
getTempPath, decodeBase64DataURL,
|
|
14
14
|
} from './storage.mjs';
|
|
15
15
|
|
|
16
16
|
import {
|
|
17
|
-
log as _log, renderText as _renderText,
|
|
18
|
-
|
|
19
|
-
|
|
17
|
+
log as _log, renderText as _renderText, ensureArray, ensureString, extract,
|
|
18
|
+
ignoreErrFunc, insensitiveCompare, isSet, need, parseJson, throwError,
|
|
19
|
+
tryUntil, timeout, mergeAtoB,
|
|
20
20
|
} from './utilitas.mjs';
|
|
21
21
|
|
|
22
22
|
const INSTRUCTIONS = `You are a highly capable, thoughtful, and precise assistant named HAL9000. Your goal is to deeply understand the user's intent, ask clarifying questions when needed, think step-by-step through complex problems, provide clear and accurate answers, and proactively anticipate helpful follow-up information. Always prioritize being truthful, nuanced, insightful, and efficient, tailoring your responses specifically to the user's needs and preferences. Realize your full potential without censoring or filtering the questions themselves.
|
|
@@ -43,6 +43,8 @@ You may be provided with some tools(functions) to help you gather information an
|
|
|
43
43
|
- When the tools are not suitable and you have to answer questions based on your understanding, please do not mention any tool-related information in your response.
|
|
44
44
|
- Unless otherwise specified to require the original result, in most cases, you may reorganize the information obtained after using the tool to solve the problem as needed.`;
|
|
45
45
|
|
|
46
|
+
const TTS_PROMPT = "As an AI voice assistant, please say the following content in a warm, friendly and professional tone, if the language is English, use an American accent, if it's Traditional Chinese, use Hong Kong Cantonese, if it's Simplified Chinese, use standard Mandarin, for other languages, please speak with a standard, clear accent";
|
|
47
|
+
|
|
46
48
|
const _NEED = ['js-tiktoken', 'OpenAI', '@google/genai'];
|
|
47
49
|
|
|
48
50
|
const [
|
|
@@ -56,7 +58,8 @@ const [
|
|
|
56
58
|
JINA_DEEPSEARCH, SILICONFLOW, SF_DEEPSEEK_32, MAX_TIRE, OPENROUTER_API,
|
|
57
59
|
OPENROUTER, AUTO, TOOL, S_OPENAI, S_GOOGLE, S_ANTHROPIC, ONLINE,
|
|
58
60
|
GEMINI_30_PRO, GEMINI_25_FLASH, IMAGEN_4_ULTRA, VEO_31, IMAGEN_4_UPSCALE,
|
|
59
|
-
ERROR_GENERATING,
|
|
61
|
+
ERROR_GENERATING, GEMINI_25_FLASH_TTS, GEMINI_25_PRO_TTS, wav,
|
|
62
|
+
GPT_4O_MIMI_TTS, GPT_4O_TRANSCRIBE, INVALID_AUDIO, OGG_EXT,
|
|
60
63
|
] = [
|
|
61
64
|
'OpenAI', 'Google', 'Ollama', 'nova', 'deepseek-3.2-speciale', '```',
|
|
62
65
|
'claude-opus-4.5', 'audio', 'wav', '[ATTACHMENTS]', 'OPENAI_VOICE',
|
|
@@ -74,7 +77,9 @@ const [
|
|
|
74
77
|
'openai', 'google', 'anthropic', ':online', 'gemini-3-pro-preview',
|
|
75
78
|
'gemini-2.5-flash-preview-09-2025', 'imagen-4.0-ultra-generate-001',
|
|
76
79
|
'veo-3.1-generate-preview', 'imagen-4.0-upscale-preview',
|
|
77
|
-
'Error generating content.',
|
|
80
|
+
'Error generating content.', 'gemini-2.5-flash-preview-tts',
|
|
81
|
+
'gemini-2.5-pro-tts', 'wav', 'gpt-4o-mini-tts', 'gpt-4o-transcribe',
|
|
82
|
+
'Invalid audio data.', 'ogg',
|
|
78
83
|
];
|
|
79
84
|
|
|
80
85
|
const [tool, messages, text]
|
|
@@ -93,19 +98,6 @@ const countToolCalls = r => r?.split('\n').filter(x => x === TOOLS_STR).length;
|
|
|
93
98
|
const assertApiKey = (p, o) => assert(o?.apiKey, `${p} api key is required.`);
|
|
94
99
|
const getProviderIcon = provider => PROVIDER_ICONS[provider] || '🔮';
|
|
95
100
|
const libOpenAi = async opts => await need('openai', { ...opts, raw: true });
|
|
96
|
-
const OpenAI = async opts => new (await libOpenAi(opts)).OpenAI(opts);
|
|
97
|
-
const OPENAI_RULES = {
|
|
98
|
-
source: S_OPENAI, icon: '⚛️',
|
|
99
|
-
contextWindow: kT(400), maxOutputTokens: k(128),
|
|
100
|
-
imageCostTokens: ~~(OPENAI_HI_RES_SIZE / MAX_TIRE * 140 + 70),
|
|
101
|
-
maxFileSize: m(50), maxImageSize: OPENAI_HI_RES_SIZE,
|
|
102
|
-
supportedMimeTypes: [MIME_PNG, MIME_JPEG, MIME_GIF, MIME_WEBP],
|
|
103
|
-
supportedDocTypes: [MIME_PDF],
|
|
104
|
-
supportedAudioTypes: [MIME_WAV],
|
|
105
|
-
// audio: 'gpt-4o-audio-preview',
|
|
106
|
-
json: true, tools: true, vision: true,
|
|
107
|
-
reasoning: true, defaultProvider: OPENROUTER,
|
|
108
|
-
};
|
|
109
101
|
|
|
110
102
|
const GEMINI_RULES = {
|
|
111
103
|
source: S_GOOGLE, icon: '♊️',
|
|
@@ -113,15 +105,24 @@ const GEMINI_RULES = {
|
|
|
113
105
|
imageCostTokens: ~~(v8k / MAX_TIRE * 258), maxAudioLength: hour(8.4),
|
|
114
106
|
maxAudioPerPrompt: 1, maxFileSize: m(20), maxImagePerPrompt: 3000,
|
|
115
107
|
maxImageSize: Infinity, maxUrlSize: gb(2), maxVideoLength: minute(45),
|
|
116
|
-
maxVideoPerPrompt: 10, vision: true,
|
|
108
|
+
maxVideoPerPrompt: 10, vision: true, hearing: true, tools: true,
|
|
109
|
+
reasoning: true, supportedMimeTypes: [
|
|
117
110
|
MIME_PNG, MIME_JPEG, MIME_MOV, MIME_MPEG, MIME_MP4, MIME_MPG, MIME_AVI,
|
|
118
111
|
MIME_WMV, MIME_MPEGPS, MIME_FLV, MIME_PDF, MIME_AAC, MIME_FLAC,
|
|
119
112
|
MIME_MP3, MIME_MPEGA, MIME_M4A, MIME_MPGA, MIME_OPUS, MIME_PCM,
|
|
120
|
-
MIME_WAV, MIME_WEBM, MIME_TGPP,
|
|
121
|
-
],
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
113
|
+
MIME_WAV, MIME_WEBM, MIME_TGPP, MIME_OGG,
|
|
114
|
+
], defaultProvider: OPENROUTER,
|
|
115
|
+
};
|
|
116
|
+
|
|
117
|
+
const OPENAI_RULES = {
|
|
118
|
+
source: S_OPENAI, icon: '⚛️',
|
|
119
|
+
contextWindow: kT(400), maxOutputTokens: k(128),
|
|
120
|
+
imageCostTokens: ~~(OPENAI_HI_RES_SIZE / MAX_TIRE * 140 + 70),
|
|
121
|
+
maxFileSize: m(50), maxImageSize: OPENAI_HI_RES_SIZE,
|
|
122
|
+
json: true, tools: true, vision: true, hearing: true, reasoning: true,
|
|
123
|
+
supportedMimeTypes: [
|
|
124
|
+
MIME_PNG, MIME_JPEG, MIME_GIF, MIME_WEBP, MIME_PDF, MIME_WAV
|
|
125
|
+
], defaultProvider: OPENROUTER,
|
|
125
126
|
};
|
|
126
127
|
|
|
127
128
|
const DEEPSEEK_32_RULES = {
|
|
@@ -136,8 +137,7 @@ const MODELS = {
|
|
|
136
137
|
// fast and balanced models
|
|
137
138
|
[GEMINI_25_FLASH]: {
|
|
138
139
|
...GEMINI_RULES, contextWindow: m(1), maxOutputTokens: k(64),
|
|
139
|
-
fast: true,
|
|
140
|
-
json: false, // issue with json output via OpenRouter
|
|
140
|
+
fast: true, json: false, // issue with json output via OpenRouter
|
|
141
141
|
// https://gemini.google.com/app/c680748b3307790b
|
|
142
142
|
},
|
|
143
143
|
// strong and fast
|
|
@@ -145,23 +145,21 @@ const MODELS = {
|
|
|
145
145
|
// stronger but slow
|
|
146
146
|
[GEMINI_30_PRO]: {
|
|
147
147
|
...GEMINI_RULES, contextWindow: m(1), maxOutputTokens: k(64),
|
|
148
|
-
reasoning: true, tools: true,
|
|
149
148
|
},
|
|
150
149
|
// models with generation capabilities
|
|
151
150
|
[GEMINI_30_PRO_IMAGE]: {
|
|
152
151
|
...GEMINI_RULES, icon: '🍌', label: 'Nano Banana Pro',
|
|
153
|
-
contextWindow: k(64), maxOutputTokens: k(32),
|
|
154
|
-
fast: true, image: true,
|
|
152
|
+
contextWindow: k(64), maxOutputTokens: k(32), image: true,
|
|
155
153
|
},
|
|
156
154
|
[IMAGEN_4_ULTRA]: {
|
|
157
|
-
source: S_GOOGLE,
|
|
155
|
+
source: S_GOOGLE, maxInputTokens: 480,
|
|
158
156
|
image: true, defaultProvider: GOOGLE,
|
|
159
157
|
},
|
|
160
158
|
[VEO_31]: {
|
|
161
|
-
source: S_GOOGLE,
|
|
159
|
+
source: S_GOOGLE, maxInputTokens: 1024,
|
|
162
160
|
imageCostTokens: 0, maxImagePerPrompt: 1,
|
|
163
|
-
maxImageSize: Infinity,
|
|
164
|
-
|
|
161
|
+
maxImageSize: Infinity, vision: true, video: true,
|
|
162
|
+
supportedMimeTypes: [MIME_PNG, MIME_JPEG], defaultProvider: GOOGLE,
|
|
165
163
|
},
|
|
166
164
|
[GPT_5_IMAGE]: {
|
|
167
165
|
...OPENAI_RULES, icon: '🎨', label: 'gpt-image-1', image: true,
|
|
@@ -178,6 +176,24 @@ const MODELS = {
|
|
|
178
176
|
json: true, reasoning: true, tools: true, vision: true,
|
|
179
177
|
defaultProvider: OPENROUTER,
|
|
180
178
|
},
|
|
179
|
+
// tts/stt models
|
|
180
|
+
[GEMINI_25_FLASH_TTS]: {
|
|
181
|
+
source: S_GOOGLE, maxInputTokens: kT(32), func: 'generateAudio',
|
|
182
|
+
audio: true, fast: true, defaultProvider: GOOGLE,
|
|
183
|
+
},
|
|
184
|
+
[GEMINI_25_PRO_TTS]: {
|
|
185
|
+
source: S_GOOGLE, maxInputTokens: kT(32), func: 'generateAudio',
|
|
186
|
+
audio: true, defaultProvider: GOOGLE,
|
|
187
|
+
},
|
|
188
|
+
[GPT_4O_MIMI_TTS]: {
|
|
189
|
+
source: S_OPENAI, maxInputTokens: kT(2), func: 'generateAudio',
|
|
190
|
+
audio: true, fast: true, defaultProvider: OPENAI,
|
|
191
|
+
},
|
|
192
|
+
[GPT_4O_TRANSCRIBE]: {
|
|
193
|
+
source: S_OPENAI, maxInputTokens: 0,
|
|
194
|
+
func: 'transcribeAudio', hearing: true, fast: true,
|
|
195
|
+
defaultProvider: OPENAI,
|
|
196
|
+
},
|
|
181
197
|
// models with deepsearch capabilities
|
|
182
198
|
[JINA_DEEPSEARCH]: { // @todo: parse more details from results, eg: "reed urls".
|
|
183
199
|
icon: '✴️', contextWindow: Infinity, maxInputTokens: Infinity,
|
|
@@ -273,8 +289,9 @@ const PROVIDER_ICONS = {
|
|
|
273
289
|
};
|
|
274
290
|
|
|
275
291
|
const FEATURE_ICONS = {
|
|
276
|
-
audio: '📣', deepsearch: '🔍', fast: '⚡️', finetune: '🔧',
|
|
277
|
-
json: '📊', reasoning: '🧠', tools: '🧰',
|
|
292
|
+
audio: '📣', deepsearch: '🔍', fast: '⚡️', finetune: '🔧', hearing: '👂',
|
|
293
|
+
image: '🎨', json: '📊', reasoning: '🧠', tools: '🧰', video: '🎬',
|
|
294
|
+
vision: '👁️',
|
|
278
295
|
};
|
|
279
296
|
|
|
280
297
|
const tokenRatioByWords = Math.min(
|
|
@@ -292,7 +309,7 @@ let tokeniser, _tools;
|
|
|
292
309
|
|
|
293
310
|
const unifyProvider = provider => {
|
|
294
311
|
assert(provider = (provider || '').trim(), 'AI provider is required.');
|
|
295
|
-
for (let type of [OPENROUTER, GOOGLE, JINA, OLLAMA, SILICONFLOW]) {
|
|
312
|
+
for (let type of [OPENROUTER, GOOGLE, OPENAI, JINA, OLLAMA, SILICONFLOW]) {
|
|
296
313
|
if (insensitiveCompare(provider, type)) { return type; }
|
|
297
314
|
}
|
|
298
315
|
throwError(`Invalid AI provider: ${provider}.`);
|
|
@@ -399,6 +416,11 @@ const setupAi = ai => {
|
|
|
399
416
|
});
|
|
400
417
|
};
|
|
401
418
|
|
|
419
|
+
const OpenAI = async opts => {
|
|
420
|
+
const lib = await libOpenAi(opts);
|
|
421
|
+
return { toFile: lib.toFile, client: new (lib).OpenAI(opts) };
|
|
422
|
+
};
|
|
423
|
+
|
|
402
424
|
const init = async (options = {}) => {
|
|
403
425
|
if (options?.debug) {
|
|
404
426
|
(await need('node:util')).inspect.defaultOptions.depth = null;
|
|
@@ -435,14 +457,24 @@ const init = async (options = {}) => {
|
|
|
435
457
|
});
|
|
436
458
|
}
|
|
437
459
|
break;
|
|
460
|
+
case OPENAI:
|
|
461
|
+
assertApiKey(provider, options);
|
|
462
|
+
var { client, toFile } = await OpenAI({ ...options });
|
|
463
|
+
for (let model of models) {
|
|
464
|
+
setupAi({
|
|
465
|
+
provider, model, client, toFile,
|
|
466
|
+
prompt: promptOpenAI, priority,
|
|
467
|
+
});
|
|
468
|
+
}
|
|
469
|
+
break;
|
|
438
470
|
case JINA:
|
|
439
471
|
assertApiKey(provider, options);
|
|
440
|
-
var client = await OpenAI({
|
|
472
|
+
var { client } = await OpenAI({
|
|
441
473
|
baseURL: 'https://deepsearch.jina.ai/v1/', ...options,
|
|
442
474
|
});
|
|
443
475
|
for (let model of models) {
|
|
444
476
|
setupAi({
|
|
445
|
-
provider, model, client, prompt:
|
|
477
|
+
provider, model, client, prompt: promptOpenRouter, priority,
|
|
446
478
|
});
|
|
447
479
|
}
|
|
448
480
|
break;
|
|
@@ -455,7 +487,7 @@ const init = async (options = {}) => {
|
|
|
455
487
|
});
|
|
456
488
|
for (let model of models) {
|
|
457
489
|
setupAi({
|
|
458
|
-
provider, model, client, prompt:
|
|
490
|
+
provider, model, client, prompt: promptOpenRouter, priority,
|
|
459
491
|
});
|
|
460
492
|
ignoreErrFunc(async () => {
|
|
461
493
|
phLog(await (await fetch(`${baseURL}completions`, {
|
|
@@ -473,17 +505,19 @@ const init = async (options = {}) => {
|
|
|
473
505
|
});
|
|
474
506
|
for (let model of models) {
|
|
475
507
|
setupAi({
|
|
476
|
-
provider, model, client, prompt:
|
|
508
|
+
provider, model, client, prompt: promptOpenRouter, priority,
|
|
477
509
|
});
|
|
478
510
|
}
|
|
479
511
|
break;
|
|
480
512
|
default:
|
|
481
513
|
assertApiKey(provider, options);
|
|
482
|
-
var client = await OpenAI({
|
|
514
|
+
var { client } = await OpenAI({
|
|
515
|
+
baseURL: OPENROUTER_API, ...options || {},
|
|
516
|
+
});
|
|
483
517
|
for (let model of models) {
|
|
484
518
|
setupAi({
|
|
485
519
|
provider: OPENROUTER || provider, model, client,
|
|
486
|
-
prompt:
|
|
520
|
+
prompt: promptOpenRouter, priority,
|
|
487
521
|
});
|
|
488
522
|
}
|
|
489
523
|
}
|
|
@@ -756,9 +790,23 @@ const buildPrompts = async (model, input, options = {}) => {
|
|
|
756
790
|
let [history, content, prompt, _model, _assistant, _history]
|
|
757
791
|
= [null, input, null, { role: MODEL }, { role: assistant }, null];
|
|
758
792
|
options.systemPrompt = options.systemPrompt || INSTRUCTIONS;
|
|
759
|
-
options.attachments = (
|
|
793
|
+
options.attachments = (await Promise.all((
|
|
760
794
|
options.attachments?.length ? options.attachments : []
|
|
761
|
-
).
|
|
795
|
+
).map(async x => {
|
|
796
|
+
if (String.isString(x)) {
|
|
797
|
+
var convResp = await convert(x, { input: FILE, expected: DATAURL, meta: true });
|
|
798
|
+
return {
|
|
799
|
+
url: convResp.content,
|
|
800
|
+
mime_type: convResp.mime,
|
|
801
|
+
}
|
|
802
|
+
} else if (Buffer.isBuffer(x)) {
|
|
803
|
+
var convResp = await convert(x, { input: BUFFER, expected: DATAURL, meta: true });
|
|
804
|
+
return {
|
|
805
|
+
url: convResp.content,
|
|
806
|
+
mime_type: convResp.mime,
|
|
807
|
+
}
|
|
808
|
+
} else if (Object.isObject(x)) { return x; } else { return null; }
|
|
809
|
+
}))).filter(x => x && [
|
|
762
810
|
...model?.supportedMimeTypes,
|
|
763
811
|
...model?.supportedDocTypes,
|
|
764
812
|
...model?.supportedAudioTypes,
|
|
@@ -855,7 +903,7 @@ const mergeMsgs = (resp, calls) => [resp, ...calls.length ? [
|
|
|
855
903
|
`⚠️ Tools recursion limit reached: ${MAX_TOOL_RECURSION}`
|
|
856
904
|
] : []].map(x => x.trim()).join('\n\n');
|
|
857
905
|
|
|
858
|
-
const
|
|
906
|
+
const promptOpenRouter = async (aiId, content, options = {}) => {
|
|
859
907
|
let { provider, client, model } = await getAi(aiId);
|
|
860
908
|
let [
|
|
861
909
|
result, resultAudio, resultImages, resultReasoning, event, resultTools,
|
|
@@ -1006,7 +1054,7 @@ const promptOpenAI = async (aiId, content, options = {}) => {
|
|
|
1006
1054
|
= await handleToolsCall(event, { ...options, result });
|
|
1007
1055
|
if (toolsResult.length
|
|
1008
1056
|
&& countToolCalls(toolsResponse) < MAX_TOOL_RECURSION) {
|
|
1009
|
-
return
|
|
1057
|
+
return promptOpenRouter(aiId, content, {
|
|
1010
1058
|
...options, toolsResult, result: toolsResponse,
|
|
1011
1059
|
});
|
|
1012
1060
|
}
|
|
@@ -1016,99 +1064,177 @@ const promptOpenAI = async (aiId, content, options = {}) => {
|
|
|
1016
1064
|
|
|
1017
1065
|
const promptGoogle = async (aiId, prompt, options = {}) => {
|
|
1018
1066
|
let { provider, client, model } = await getAi(aiId);
|
|
1019
|
-
const
|
|
1067
|
+
const target_model = options?.model || model.name;
|
|
1068
|
+
const M = MODELS[target_model];
|
|
1020
1069
|
prompt = ensureString(prompt, { trim: true });
|
|
1070
|
+
assert(prompt.length, 'Prompt is required.');
|
|
1071
|
+
M.tts && (prompt = `${options?.prompt || TTS_PROMPT}: ${prompt}`);
|
|
1021
1072
|
assert(await countTokens(prompt, { fast: true })
|
|
1022
1073
|
<= M.maxInputTokens,
|
|
1023
1074
|
`Prompt must be less than ${M.maxInputTokens} tokens.`, 400
|
|
1024
1075
|
);
|
|
1025
|
-
|
|
1026
|
-
|
|
1027
|
-
|
|
1028
|
-
|
|
1029
|
-
|
|
1030
|
-
|
|
1031
|
-
|
|
1032
|
-
|
|
1033
|
-
|
|
1034
|
-
|
|
1035
|
-
|
|
1036
|
-
|
|
1037
|
-
|
|
1038
|
-
|
|
1039
|
-
|
|
1040
|
-
|
|
1041
|
-
|
|
1042
|
-
|
|
1043
|
-
|
|
1044
|
-
|
|
1045
|
-
|
|
1046
|
-
|
|
1047
|
-
|
|
1048
|
-
|
|
1049
|
-
}), mimeType: x.image.mimeType,
|
|
1050
|
-
}))), model: packModelLabel([
|
|
1051
|
-
provider, M.source, model.name,
|
|
1052
|
-
]),
|
|
1053
|
-
}
|
|
1076
|
+
if (M?.image) {
|
|
1077
|
+
var resp = await client.models.generateImages({
|
|
1078
|
+
model: M.name, prompt, config: mergeAtoB(options?.config, {
|
|
1079
|
+
numberOfImages: options?.n || 4, sampleImageSize: '2K',
|
|
1080
|
+
includeRaiReason: true,
|
|
1081
|
+
// "1:1" (default), "3:4", "4:3", "9:16", and "16:9"
|
|
1082
|
+
aspectRatio: '16:9', personGeneration: 'allow_adult',
|
|
1083
|
+
}),
|
|
1084
|
+
});
|
|
1085
|
+
var generated = resp?.generatedImages;
|
|
1086
|
+
assert(!resp?.error && generated?.filter(
|
|
1087
|
+
x => !x.raiFilteredReason
|
|
1088
|
+
).length, resp?.error?.message || generated?.find(
|
|
1089
|
+
x => x.raiFilteredReason
|
|
1090
|
+
)?.raiFilteredReason || ERROR_GENERATING);
|
|
1091
|
+
if (!options?.raw) {
|
|
1092
|
+
resp = {
|
|
1093
|
+
text: '', images: await Promise.all((
|
|
1094
|
+
resp?.generatedImages || []
|
|
1095
|
+
).map(async x => ({
|
|
1096
|
+
data: await convert(x.image.imageBytes, {
|
|
1097
|
+
input: BASE64, suffix: 'png', ...options || {}
|
|
1098
|
+
}), mimeType: x.image.mimeType,
|
|
1099
|
+
}))), model: packModelLabel([provider, M.source, M.name]),
|
|
1054
1100
|
}
|
|
1055
|
-
|
|
1056
|
-
|
|
1057
|
-
|
|
1058
|
-
|
|
1059
|
-
|
|
1060
|
-
|
|
1061
|
-
|
|
1062
|
-
|
|
1063
|
-
|
|
1101
|
+
}
|
|
1102
|
+
} else if (M?.video) {
|
|
1103
|
+
var resp = await client.models.generateVideos({
|
|
1104
|
+
model: M.name, prompt, config: mergeAtoB(options?.config, {
|
|
1105
|
+
aspectRatio: '16:9', numberOfVideos: 1,
|
|
1106
|
+
// personGeneration: 'allow_adult',
|
|
1107
|
+
enablePromptRewriting: true, addWatermark: false,
|
|
1108
|
+
includeRaiReason: true,
|
|
1109
|
+
}),
|
|
1110
|
+
});
|
|
1111
|
+
assert(!resp?.error, resp?.error?.message || ERROR_GENERATING);
|
|
1112
|
+
if (options?.generateRaw) { return resp; }
|
|
1113
|
+
await tryUntil(async () => {
|
|
1114
|
+
resp = await client.operations.getVideosOperation({
|
|
1115
|
+
operation: resp,
|
|
1064
1116
|
});
|
|
1065
|
-
assert(
|
|
1066
|
-
|
|
1067
|
-
|
|
1068
|
-
|
|
1069
|
-
|
|
1070
|
-
|
|
1071
|
-
|
|
1072
|
-
|
|
1073
|
-
|
|
1074
|
-
|
|
1075
|
-
|
|
1076
|
-
|
|
1077
|
-
|
|
1078
|
-
|
|
1079
|
-
|
|
1080
|
-
|
|
1081
|
-
|
|
1082
|
-
|
|
1083
|
-
|
|
1084
|
-
|
|
1085
|
-
|
|
1086
|
-
|
|
1087
|
-
|
|
1088
|
-
|
|
1089
|
-
|
|
1090
|
-
|
|
1091
|
-
|
|
1092
|
-
|
|
1093
|
-
|
|
1094
|
-
|
|
1095
|
-
|
|
1096
|
-
|
|
1097
|
-
|
|
1098
|
-
|
|
1099
|
-
|
|
1100
|
-
|
|
1101
|
-
|
|
1102
|
-
|
|
1103
|
-
|
|
1104
|
-
|
|
1105
|
-
|
|
1106
|
-
|
|
1107
|
-
|
|
1117
|
+
assert(
|
|
1118
|
+
resp?.done,
|
|
1119
|
+
`Waiting for Google video generation: ${resp.name}`,
|
|
1120
|
+
);
|
|
1121
|
+
}, { maxTry: 60 * 10, log });
|
|
1122
|
+
assert(!resp?.error && resp?.response?.generatedVideos?.filter(
|
|
1123
|
+
x => !x.raiFilteredReason
|
|
1124
|
+
).length, resp?.error?.message || resp?.response?.generatedVideos?.find(
|
|
1125
|
+
x => x.raiFilteredReason
|
|
1126
|
+
)?.raiFilteredReason || ERROR_GENERATING);
|
|
1127
|
+
if (options?.videoRaw) {
|
|
1128
|
+
resp = resp?.response?.generatedVideos;
|
|
1129
|
+
} else if (!options?.videoRaw) {
|
|
1130
|
+
resp = {
|
|
1131
|
+
text: '', videos: await Promise.all(resp?.response?.generatedVideos?.filter(
|
|
1132
|
+
x => x?.video?.uri
|
|
1133
|
+
).map(async x => {
|
|
1134
|
+
const downloadPath = `${getTempPath({
|
|
1135
|
+
seed: x?.video?.uri
|
|
1136
|
+
})}.mp4`;
|
|
1137
|
+
// @todo: fix this
|
|
1138
|
+
// https://github.com/googleapis/js-genai/compare/main...Leask:js-genai:main
|
|
1139
|
+
await client.files.download({ file: x, downloadPath });
|
|
1140
|
+
await timeout(1000 * 10); // hack to wait for file to be downloaded
|
|
1141
|
+
return {
|
|
1142
|
+
data: await convert(downloadPath, {
|
|
1143
|
+
input: FILE, suffix: 'mp4', ...options || {}
|
|
1144
|
+
}), mimeType: MIME_MP4, jobId: resp.name,
|
|
1145
|
+
};
|
|
1146
|
+
})), model: packModelLabel([provider, M.source, M.name]),
|
|
1147
|
+
};
|
|
1148
|
+
}
|
|
1149
|
+
} else if (M?.audio) { // https://ai.google.dev/gemini-api/docs/speech-generation#voices
|
|
1150
|
+
var resp = await client.models.generateContent({
|
|
1151
|
+
model: M.name, contents: prompt,
|
|
1152
|
+
config: mergeAtoB(options?.config, {
|
|
1153
|
+
responseModalities: ['AUDIO'],
|
|
1154
|
+
speechConfig: {
|
|
1155
|
+
voiceConfig: {
|
|
1156
|
+
prebuiltVoiceConfig: {
|
|
1157
|
+
voiceName: options?.voice || 'Zephyr',
|
|
1158
|
+
},
|
|
1159
|
+
},
|
|
1160
|
+
},
|
|
1161
|
+
}),
|
|
1162
|
+
});
|
|
1163
|
+
const rawAudio = resp?.candidates?.[0]?.content?.parts?.[0]?.inlineData;
|
|
1164
|
+
assert(rawAudio, ERROR_GENERATING, 500);
|
|
1165
|
+
if (!options?.raw) {
|
|
1166
|
+
resp = {
|
|
1167
|
+
text: '', audio: {
|
|
1168
|
+
data: await packPcmToWav(rawAudio?.data, {
|
|
1169
|
+
input: BASE64, suffix: wav, ...options || {},
|
|
1170
|
+
}), mimeType: MIME_WAV,
|
|
1171
|
+
}, model: packModelLabel([provider, M.source, M.name]),
|
|
1172
|
+
};
|
|
1173
|
+
}
|
|
1174
|
+
} else {
|
|
1175
|
+
throwError('Unsupported model.');
|
|
1108
1176
|
}
|
|
1109
|
-
await streamResp(
|
|
1110
|
-
|
|
1111
|
-
);
|
|
1177
|
+
// await streamResp(
|
|
1178
|
+
// { ...resp, processing: true }, { ...options, noPack: true }
|
|
1179
|
+
// );
|
|
1180
|
+
return { ...resp, processing: false };
|
|
1181
|
+
};
|
|
1182
|
+
|
|
1183
|
+
const promptOpenAI = async (aiId, prompt, options = {}) => {
|
|
1184
|
+
let { provider, client, toFile, model } = await getAi(aiId);
|
|
1185
|
+
const target_model = options?.model || model.name;
|
|
1186
|
+
const M = MODELS[target_model];
|
|
1187
|
+
prompt = ensureString(prompt, { trim: true });
|
|
1188
|
+
if (M?.audio) {
|
|
1189
|
+
assert(prompt.length, 'Prompt is required.');
|
|
1190
|
+
const ins_prompt = options?.prompt || `${TTS_PROMPT}.`;
|
|
1191
|
+
assert(await countTokens(
|
|
1192
|
+
JSON.stringify([ins_prompt, prompt]), { fast: true }
|
|
1193
|
+
) <= M.maxInputTokens,
|
|
1194
|
+
`Prompt must be less than ${M.maxInputTokens} tokens.`, 400
|
|
1195
|
+
);
|
|
1196
|
+
// https://platform.openai.com/docs/api-reference/audio/createSpeech
|
|
1197
|
+
var resp = await client.audio.speech.create({
|
|
1198
|
+
model: M.name, voice: DEFAULT_MODELS[OPENAI_VOICE],
|
|
1199
|
+
instructions: ins_prompt, response_format: 'opus',
|
|
1200
|
+
input: prompt, ...options?.params || {},
|
|
1201
|
+
});
|
|
1202
|
+
if (!options?.raw) {
|
|
1203
|
+
resp = {
|
|
1204
|
+
text: '', audio: {
|
|
1205
|
+
data: await convert(Buffer.from(
|
|
1206
|
+
await resp.arrayBuffer()
|
|
1207
|
+
), { suffix: OGG_EXT, ...options || {} }),
|
|
1208
|
+
mimeType: MIME_OGG,
|
|
1209
|
+
}, model: packModelLabel([provider, M.source, M.name]),
|
|
1210
|
+
};
|
|
1211
|
+
}
|
|
1212
|
+
} else if (M?.hearing) {
|
|
1213
|
+
const audio = options?.attachments?.[0]?.data || options?.attachments?.[0];
|
|
1214
|
+
assert(audio, 'Audio attachment is required.');
|
|
1215
|
+
const input = ensureString(options?.input, { case: 'UP' });
|
|
1216
|
+
const { content, cleanup } = await convert(audio, {
|
|
1217
|
+
input: options?.input, ...options || {}, expected: STREAM, INVALID_AUDIO,
|
|
1218
|
+
suffix: ['', BUFFER].includes(input) ? OGG_EXT : null,
|
|
1219
|
+
withCleanupFunc: true,
|
|
1220
|
+
});
|
|
1221
|
+
var resp = await client.audio.transcriptions.create({
|
|
1222
|
+
file: await toFile(content), model: M.name,
|
|
1223
|
+
response_format: 'text', ...options?.params || {},
|
|
1224
|
+
});
|
|
1225
|
+
await cleanup();
|
|
1226
|
+
if (!options?.raw) {
|
|
1227
|
+
resp = {
|
|
1228
|
+
text: resp.trim(),
|
|
1229
|
+
model: packModelLabel([provider, M.source, M.name]),
|
|
1230
|
+
};
|
|
1231
|
+
}
|
|
1232
|
+
} else {
|
|
1233
|
+
throwError('Unsupported model.');
|
|
1234
|
+
}
|
|
1235
|
+
// await streamResp(
|
|
1236
|
+
// { ...resp, processing: true }, { ...options, noPack: true }
|
|
1237
|
+
// );
|
|
1112
1238
|
return { ...resp, processing: false };
|
|
1113
1239
|
};
|
|
1114
1240
|
|
|
@@ -1223,7 +1349,6 @@ const distillFile = async (attachments, o) => {
|
|
|
1223
1349
|
const buf = await convert(attachments[i], { expected: BUFFER, ...o || {} });
|
|
1224
1350
|
return {
|
|
1225
1351
|
url: await convert(buf, { input: BUFFER, expected: DATAURL, ...o || {} }),
|
|
1226
|
-
data: base64Encode(buf, true),
|
|
1227
1352
|
mime_type: extract(await fileTypeFromBuffer(buf), 'mime') || MIME_BINARY,
|
|
1228
1353
|
};
|
|
1229
1354
|
})();
|
|
@@ -1304,7 +1429,9 @@ export {
|
|
|
1304
1429
|
DEFAULT_MODELS,
|
|
1305
1430
|
FEATURE_ICONS,
|
|
1306
1431
|
FUNCTION,
|
|
1432
|
+
GEMINI_25_FLASH_TTS,
|
|
1307
1433
|
GEMINI_25_FLASH,
|
|
1434
|
+
GEMINI_25_PRO_TTS,
|
|
1308
1435
|
GEMINI_30_PRO_IMAGE,
|
|
1309
1436
|
GPT_5_IMAGE,
|
|
1310
1437
|
GPT_51,
|
|
@@ -1326,7 +1453,7 @@ export {
|
|
|
1326
1453
|
k,
|
|
1327
1454
|
listOpenAIModels,
|
|
1328
1455
|
prompt,
|
|
1329
|
-
|
|
1456
|
+
promptOpenRouter,
|
|
1330
1457
|
resetSession,
|
|
1331
1458
|
talk,
|
|
1332
1459
|
trimPrompt,
|
package/lib/manifest.mjs
CHANGED