utilitas 2000.3.26 → 2000.3.28
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +11 -23
- package/dist/utilitas.lite.mjs +1 -1
- package/dist/utilitas.lite.mjs.map +1 -1
- package/index.mjs +2 -3
- package/lib/alan.mjs +349 -74
- package/lib/manifest.mjs +1 -1
- package/lib/speech.mjs +15 -170
- package/lib/storage.mjs +6 -4
- package/package.json +1 -1
- package/lib/gen.mjs +0 -209
package/lib/speech.mjs
CHANGED
|
@@ -1,39 +1,20 @@
|
|
|
1
|
-
import { DEFAULT_MODELS, OPENAI_VOICE, countTokens, k } from './alan.mjs';
|
|
2
|
-
import { getFfmpeg, packPcmToWav } from './media.mjs';
|
|
3
1
|
import { get } from './web.mjs';
|
|
4
|
-
import {
|
|
5
|
-
import {
|
|
2
|
+
import { getFfmpeg } from './media.mjs';
|
|
3
|
+
import { getTempPath } from './storage.mjs';
|
|
4
|
+
import { hash } from './encryption.mjs';
|
|
6
5
|
|
|
7
6
|
import {
|
|
8
|
-
call,
|
|
9
|
-
need, throwError
|
|
7
|
+
call, ignoreErrFunc, inBrowser, need, throwError,
|
|
10
8
|
} from './utilitas.mjs';
|
|
11
9
|
|
|
12
10
|
import {
|
|
13
|
-
convertAudioTo16kNanoOpusOgg,
|
|
14
|
-
convertAudioTo16kNanoPcmWave,
|
|
11
|
+
convertAudioTo16kNanoOpusOgg, convertAudioTo16kNanoPcmWave,
|
|
15
12
|
} from './media.mjs';
|
|
16
13
|
|
|
17
|
-
const _NEED = ['
|
|
14
|
+
const _NEED = ['whisper-node'];
|
|
18
15
|
|
|
19
|
-
const [
|
|
20
|
-
|
|
21
|
-
GPT_4O_MIMI_TTS, GPT_4O_TRANSCRIBE, GEMINI_25_FLASH_TTS, GEMINI_FLASH,
|
|
22
|
-
OPENAI_TTS_MAX_LENGTH, WHISPER_DEFAULT_MODEL, errorMessage
|
|
23
|
-
] = [
|
|
24
|
-
'BUFFER', 'STREAM', 'BASE64', 'FILE', {}, 'ogg', 'SPEAKER', true, 'wav',
|
|
25
|
-
'gpt-4o-mini-tts', 'gpt-4o-transcribe', 'gemini-2.5-flash-preview-tts',
|
|
26
|
-
'gemini-flash-latest', 4096, 'base', 'Invalid audio data.',
|
|
27
|
-
];
|
|
28
|
-
|
|
29
|
-
const [
|
|
30
|
-
defaultOpenAITtsModel, defaultOpenAISttModel, defaultGeminiTtsModel,
|
|
31
|
-
defaultGeminiSttModel,
|
|
32
|
-
] = [GPT_4O_MIMI_TTS, GPT_4O_TRANSCRIBE, GEMINI_25_FLASH_TTS, GEMINI_FLASH];
|
|
33
|
-
|
|
34
|
-
const TTS_PROMPT = "As an AI voice assistant, please say the following content in a warm, friendly and professional tone, if the language is English, use an American accent, if it's Traditional Chinese, use Hong Kong Cantonese, if it's Simplified Chinese, use standard Mandarin, for other languages, please speak with a standard, clear accent";
|
|
35
|
-
|
|
36
|
-
const STT_PROMPT = 'Please transcribe the audio into clean text. Return only the text content, DO NOT include any additional information or metadata. You may encounter input that contains different languages. Please do your best to transcribe text from all possible languages. Please distinguish between background noise and the main speech content. Do not be disturbed by background noise. Only return the main speech content.';
|
|
16
|
+
const [FILE, suffix, SPEAKER, cleanup, WHISPER_DEFAULT_MODEL, errorMessage]
|
|
17
|
+
= ['FILE', 'ogg', 'SPEAKER', true, 'base', 'Invalid audio data.'];
|
|
37
18
|
|
|
38
19
|
const WHISPER_MODELS = [
|
|
39
20
|
// npx whisper-node download tiny.en
|
|
@@ -86,108 +67,22 @@ const getWhisperModelReady = async (model, options) => {
|
|
|
86
67
|
return (await get(getWhisperModelUrl(model), { fuzzy: true }))?.cache?.content;
|
|
87
68
|
};
|
|
88
69
|
|
|
89
|
-
const
|
|
90
|
-
if (options) {
|
|
91
|
-
assert(
|
|
92
|
-
options?.tts || options?.stt,
|
|
93
|
-
'At least one of TTS or STT is selected.', 500
|
|
94
|
-
);
|
|
95
|
-
const provider = ensureString(options?.provider, { case: 'UP' });
|
|
96
|
-
switch (provider) {
|
|
97
|
-
case 'OPENAI':
|
|
98
|
-
clients._provider = provider;
|
|
99
|
-
const OpenAI = await need('openai');
|
|
100
|
-
const openai = new OpenAI(options);
|
|
101
|
-
if (options?.tts) {
|
|
102
|
-
clients.tts = openai.audio.speech;
|
|
103
|
-
}
|
|
104
|
-
if (options?.stt) {
|
|
105
|
-
clients.stt = openai.audio.transcriptions;
|
|
106
|
-
clients.toFile = OpenAI.toFile;
|
|
107
|
-
}
|
|
108
|
-
break;
|
|
109
|
-
case 'GOOGLE':
|
|
110
|
-
clients._provider = provider;
|
|
111
|
-
const { GoogleGenAI } = await need('@google/genai');
|
|
112
|
-
const client = new GoogleGenAI(options);
|
|
113
|
-
if (options?.tts) {
|
|
114
|
-
clients.tts = client.models.generateContent;
|
|
115
|
-
}
|
|
116
|
-
if (options?.stt) {
|
|
117
|
-
clients.stt = client.models.generateContent;
|
|
118
|
-
}
|
|
119
|
-
break;
|
|
120
|
-
case '':
|
|
121
|
-
clients._provider = 'LOCAL';
|
|
122
|
-
options?.tts && await checkSay({ assert: true });
|
|
123
|
-
options?.stt && await checkWhisper({ assert: true });
|
|
124
|
-
break;
|
|
125
|
-
default:
|
|
126
|
-
throwError('Invalid speech provider.', 500);
|
|
127
|
-
}
|
|
128
|
-
}
|
|
129
|
-
assert(
|
|
130
|
-
countKeys(clients), 'Speech API client has not been initialized.', 501
|
|
131
|
-
);
|
|
132
|
-
return clients;
|
|
133
|
-
};
|
|
134
|
-
|
|
135
|
-
const checkSay = async (options) => {
|
|
70
|
+
const checkSay = async () => {
|
|
136
71
|
const result = !!(await ignoreErrFunc(async () => (
|
|
137
72
|
await Promise.all([need('node:os'), need('say'), getFfmpeg()])
|
|
138
73
|
)[0].platform() === 'darwin'));
|
|
139
|
-
|
|
74
|
+
assert(result, 'Say API is not available.', 500);
|
|
140
75
|
return result;
|
|
141
76
|
};
|
|
142
77
|
|
|
143
|
-
const checkWhisper = async (
|
|
78
|
+
const checkWhisper = async () => {
|
|
144
79
|
const result = !!(await ignoreErrFunc(() => Promise.all([
|
|
145
80
|
need('whisper-node'), getFfmpeg()
|
|
146
81
|
])));
|
|
147
|
-
|
|
82
|
+
assert(result, 'Whisper API is not available.', 500);
|
|
148
83
|
return result;
|
|
149
84
|
};
|
|
150
85
|
|
|
151
|
-
const ttsOpenAI = async (input, options) => {
|
|
152
|
-
assert(clients.tts, 'OpenAI TTS API has not been initialized.', 500);
|
|
153
|
-
assert(input, 'Text is required.', 400);
|
|
154
|
-
assert(input.length <= OPENAI_TTS_MAX_LENGTH, 'Text is too long.', 400);
|
|
155
|
-
// https://platform.openai.com/docs/api-reference/audio/createSpeech
|
|
156
|
-
const content = await clients.tts.create({
|
|
157
|
-
model: defaultOpenAITtsModel, voice: DEFAULT_MODELS[OPENAI_VOICE],
|
|
158
|
-
instructions: 'Speak in a friendly and sweet tone.',
|
|
159
|
-
response_format: 'opus', input, ...options?.params || {},
|
|
160
|
-
});
|
|
161
|
-
const buffer = Buffer.from(await content.arrayBuffer());
|
|
162
|
-
return await convert(buffer, { suffix, ...options || {} });
|
|
163
|
-
};
|
|
164
|
-
|
|
165
|
-
// https://ai.google.dev/gemini-api/docs/speech-generation#voices
|
|
166
|
-
const ttsGoogle = async (contents, options) => {
|
|
167
|
-
assert(clients.tts, 'Google TTS API has not been initialized.', 500);
|
|
168
|
-
assert(contents, 'Text is required.', 400);
|
|
169
|
-
assert(await countTokens(contents) <= k(32), 'Text is too long.', 400);
|
|
170
|
-
const resp = await clients.tts({
|
|
171
|
-
model: options?.model || defaultGeminiTtsModel,
|
|
172
|
-
contents: `${options?.prompt || TTS_PROMPT}: ${contents}`,
|
|
173
|
-
config: mergeAtoB(options?.config, {
|
|
174
|
-
responseModalities: ['AUDIO'],
|
|
175
|
-
speechConfig: {
|
|
176
|
-
voiceConfig: {
|
|
177
|
-
prebuiltVoiceConfig: {
|
|
178
|
-
voiceName: options?.voice || 'Zephyr',
|
|
179
|
-
},
|
|
180
|
-
},
|
|
181
|
-
},
|
|
182
|
-
}),
|
|
183
|
-
});
|
|
184
|
-
const rawAudio = resp?.candidates?.[0]?.content?.parts?.[0]?.inlineData;
|
|
185
|
-
assert(rawAudio, 'Failed to generate audio.', 500);
|
|
186
|
-
return options?.raw ? rawAudio : await packPcmToWav(rawAudio?.data, {
|
|
187
|
-
input: BASE64, expected: 'FILE', suffix: wav, ...options || {},
|
|
188
|
-
});
|
|
189
|
-
};
|
|
190
|
-
|
|
191
86
|
const ttsSay = async (text, options) => {
|
|
192
87
|
const say = await need('say');
|
|
193
88
|
assert(text, 'Text is required.', 400);
|
|
@@ -214,45 +109,6 @@ const ttsBrowser = async (text) => {
|
|
|
214
109
|
return speechSynthesis.speak(new SpeechSynthesisUtterance(text));
|
|
215
110
|
};
|
|
216
111
|
|
|
217
|
-
const sttOpenAI = async (audio, options) => {
|
|
218
|
-
assert(clients.stt, 'OpenAI STT API has not been initialized.', 500);
|
|
219
|
-
const input = ensureString(options?.input, { case: 'UP' });
|
|
220
|
-
const { content, cleanup } = await convert(audio, {
|
|
221
|
-
input: options?.input, ...options || {}, expected: STREAM, errorMessage,
|
|
222
|
-
suffix: ['', BUFFER].includes(input) ? suffix : null,
|
|
223
|
-
withCleanupFunc: true,
|
|
224
|
-
});
|
|
225
|
-
const result = await clients.stt.create({
|
|
226
|
-
file: await clients.toFile(content), model: defaultOpenAISttModel,
|
|
227
|
-
response_format: 'text', ...options?.params || {},
|
|
228
|
-
});
|
|
229
|
-
await cleanup();
|
|
230
|
-
return result;
|
|
231
|
-
};
|
|
232
|
-
|
|
233
|
-
const sttGoogle = async (audio, options) => {
|
|
234
|
-
assert(clients.stt, 'Google STT API has not been initialized.', 500);
|
|
235
|
-
const data = await convert(audio, {
|
|
236
|
-
input: options?.input, expected: BASE64, errorMessage,
|
|
237
|
-
});
|
|
238
|
-
const resp = await clients.stt({
|
|
239
|
-
model: options?.model || defaultGeminiSttModel, contents: {
|
|
240
|
-
parts: [{
|
|
241
|
-
inlineData: {
|
|
242
|
-
mimeType: options?.mimeType || MIME_WAV, data,
|
|
243
|
-
},
|
|
244
|
-
}, { text: STT_PROMPT }],
|
|
245
|
-
},
|
|
246
|
-
config: { ...options?.config || {} },
|
|
247
|
-
});
|
|
248
|
-
assert(
|
|
249
|
-
resp?.candidates?.[0]?.content?.parts?.[0],
|
|
250
|
-
'Failed to transcribe audio.', 500
|
|
251
|
-
);
|
|
252
|
-
return options?.raw ? resp.candidates
|
|
253
|
-
: (resp.candidates[0].content.parts[0].text?.trim?.() || '');
|
|
254
|
-
};
|
|
255
|
-
|
|
256
112
|
// This function is not working properly, a pull request is filed:
|
|
257
113
|
// https://github.com/ariym/whisper-node/pull/58
|
|
258
114
|
const sttWhisper = async (audio, options) => {
|
|
@@ -282,35 +138,24 @@ const sttWhisper = async (audio, options) => {
|
|
|
282
138
|
const tts = async (text, options) => {
|
|
283
139
|
let engine;
|
|
284
140
|
if (inBrowser()) { engine = ttsBrowser }
|
|
285
|
-
else if (clients?.tts && clients._provider === 'GOOGLE') { engine = ttsGoogle; }
|
|
286
|
-
else if (clients?.tts && clients._provider === 'OPENAI') { engine = ttsOpenAI; }
|
|
287
141
|
else if (await checkSay()) { engine = ttsSay; }
|
|
288
|
-
else { throwError('Text-to-Speech engine
|
|
142
|
+
else { throwError('Text-to-Speech engine is not available.', 500); }
|
|
289
143
|
return await engine(text, options);
|
|
290
144
|
};
|
|
291
145
|
|
|
292
146
|
const stt = async (audio, options) => {
|
|
293
147
|
let engine;
|
|
294
|
-
if (
|
|
295
|
-
else
|
|
296
|
-
else if (await checkWhisper()) { engine = sttWhisper; }
|
|
297
|
-
else { throwError('Speech-to-Text engine has not been initialized.', 500); }
|
|
148
|
+
if (await checkWhisper()) { engine = sttWhisper; }
|
|
149
|
+
else { throwError('Speech-to-Text engine is not available.', 500); }
|
|
298
150
|
return await engine(audio, options);
|
|
299
151
|
};
|
|
300
152
|
|
|
301
|
-
export default init;
|
|
302
153
|
export {
|
|
303
154
|
_NEED,
|
|
304
|
-
OPENAI_TTS_MAX_LENGTH,
|
|
305
155
|
checkSay,
|
|
306
156
|
checkWhisper,
|
|
307
|
-
init,
|
|
308
157
|
stt,
|
|
309
|
-
sttGoogle,
|
|
310
|
-
sttOpenAI,
|
|
311
158
|
sttWhisper,
|
|
312
159
|
tts,
|
|
313
|
-
ttsGoogle,
|
|
314
|
-
ttsOpenAI,
|
|
315
160
|
ttsSay,
|
|
316
161
|
};
|
package/lib/storage.mjs
CHANGED
|
@@ -240,7 +240,7 @@ const blobToBuffer = async blob => {
|
|
|
240
240
|
|
|
241
241
|
const convert = async (any, options) => {
|
|
242
242
|
assert(any, options?.errorMessage || 'Invalid input.', 400);
|
|
243
|
-
|
|
243
|
+
let result = {};
|
|
244
244
|
let [input, expected] = [(
|
|
245
245
|
Buffer.isBuffer(any)
|
|
246
246
|
|| ArrayBuffer.isArrayBuffer(any)
|
|
@@ -248,7 +248,7 @@ const convert = async (any, options) => {
|
|
|
248
248
|
) ? BUFFER : options?.input, options?.expected || BUFFER].map(
|
|
249
249
|
x => ensureString(x, { case: 'UP' })
|
|
250
250
|
);
|
|
251
|
-
let [oriFile, meta, mime, subExp] = [null, null,
|
|
251
|
+
let [oriFile, meta, mime, subExp] = [null, null, null, expected];
|
|
252
252
|
switch (input) {
|
|
253
253
|
case FILE:
|
|
254
254
|
oriFile = any;
|
|
@@ -269,6 +269,7 @@ const convert = async (any, options) => {
|
|
|
269
269
|
input = BUFFER;
|
|
270
270
|
break;
|
|
271
271
|
}
|
|
272
|
+
mime || (mime = (await getMime(any, any))?.mime || MIME_BINARY);
|
|
272
273
|
switch (expected) {
|
|
273
274
|
case STREAM: subExp = FILE; break;
|
|
274
275
|
case DATAURL: subExp = BUFFER; break;
|
|
@@ -313,8 +314,9 @@ const convert = async (any, options) => {
|
|
|
313
314
|
|
|
314
315
|
const getMime = async (buf, filename) => {
|
|
315
316
|
const mimeType = await ignoreErrFunc(() => need('mime-types'));
|
|
316
|
-
const mime = extract(await fileTypeFromBuffer(buf), 'mime')
|
|
317
|
-
|| (filename && mimeType?.lookup?.(filename))
|
|
317
|
+
const mime = (buf && Buffer.isBuffer(buf) && extract(await fileTypeFromBuffer(buf), 'mime'))
|
|
318
|
+
|| (filename && String.isString(filename) && mimeType?.lookup?.(filename))
|
|
319
|
+
|| MIME_BINARY;
|
|
318
320
|
return { mime, extension: mimeType?.extension?.(mime) || 'bin' };
|
|
319
321
|
};
|
|
320
322
|
|
package/package.json
CHANGED
package/lib/gen.mjs
DELETED
|
@@ -1,209 +0,0 @@
|
|
|
1
|
-
import {
|
|
2
|
-
ensureArray, ensureString, log as _log, need, throwError,
|
|
3
|
-
tryUntil, timeout,
|
|
4
|
-
} from './utilitas.mjs';
|
|
5
|
-
|
|
6
|
-
import { convert, MIME_PNG, MIME_MP4, getTempPath } from './storage.mjs';
|
|
7
|
-
import { createReadStream } from 'fs';
|
|
8
|
-
|
|
9
|
-
const _NEED = ['OpenAI', '@google/genai'];
|
|
10
|
-
const log = (cnt, opt) => _log(cnt, import.meta.url, { time: 1, ...opt || {} });
|
|
11
|
-
const [
|
|
12
|
-
clients, OPENAI, GOOGLE, BASE64, FILE, BUFFER, ERROR_GENERATING,
|
|
13
|
-
IMAGEN_MODEL, OPENAI_MODEL, VEO_MODEL, IMAGEN_UPSCALE_MODEL,
|
|
14
|
-
] = [
|
|
15
|
-
{}, 'OPENAI', 'GOOGLE', 'BASE64', 'FILE', 'BUFFER',
|
|
16
|
-
'Error generating media.', 'imagen-4.0-ultra-generate-001',
|
|
17
|
-
'gpt-image-1', 'veo-3.1-generate-preview', 'imagen-4.0-upscale-preview',
|
|
18
|
-
];
|
|
19
|
-
|
|
20
|
-
const init = async (options) => {
|
|
21
|
-
assert(options?.apiKey, 'API key is required.');
|
|
22
|
-
const provider = ensureString(options?.provider, { case: 'UP' });
|
|
23
|
-
switch (provider) {
|
|
24
|
-
case OPENAI:
|
|
25
|
-
const OpenAI = await need('openai');
|
|
26
|
-
var client = new OpenAI(options);
|
|
27
|
-
clients[provider] = {
|
|
28
|
-
image: client.images,
|
|
29
|
-
toFile: OpenAI.toFile,
|
|
30
|
-
};
|
|
31
|
-
break;
|
|
32
|
-
case GOOGLE:
|
|
33
|
-
const { GoogleGenAI } = await need('@google/genai');
|
|
34
|
-
var client = new GoogleGenAI({ vertexai: false, ...options });
|
|
35
|
-
clients[provider] = {
|
|
36
|
-
gen: client,
|
|
37
|
-
};
|
|
38
|
-
break;
|
|
39
|
-
default:
|
|
40
|
-
throw new Error('Invalid provider.');
|
|
41
|
-
}
|
|
42
|
-
return clients;
|
|
43
|
-
};
|
|
44
|
-
|
|
45
|
-
const extractImage = async (data, options) => await convert(
|
|
46
|
-
data, { input: BASE64, suffix: 'png', ...options || {} }
|
|
47
|
-
);
|
|
48
|
-
|
|
49
|
-
const extractVideo = async (data, options) => await convert(
|
|
50
|
-
data, { input: FILE, suffix: 'mp4', ...options || {} }
|
|
51
|
-
);
|
|
52
|
-
|
|
53
|
-
const prepareImage = async (files, repack, options) => {
|
|
54
|
-
if (!files) { return }
|
|
55
|
-
const multiple = Array.isArray(files);
|
|
56
|
-
files = ensureArray(files);
|
|
57
|
-
const resp = await Promise.all(files.map(async x => await repack(
|
|
58
|
-
createReadStream(await convert(
|
|
59
|
-
x, { expected: 'FILE', ...options || {} }
|
|
60
|
-
)), null, { type: MIME_PNG } // don't need to be right MIME type
|
|
61
|
-
)));
|
|
62
|
-
return multiple ? resp : resp[0];
|
|
63
|
-
};
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
const image = async (prompt, options) => {
|
|
67
|
-
let provider = ensureString(options?.provider, { case: 'UP' });
|
|
68
|
-
if (!provider && clients?.[GOOGLE]) { provider = GOOGLE; }
|
|
69
|
-
else if (!provider && clients?.[OPENAI]) { provider = OPENAI; }
|
|
70
|
-
const client = clients?.[provider];
|
|
71
|
-
const n = options?.n || 4;
|
|
72
|
-
assert(client, 'No available image generation provider.');
|
|
73
|
-
prompt = ensureString(prompt);
|
|
74
|
-
assert(prompt.length <= 4000,
|
|
75
|
-
'Prompt must be less than 4000 characters.', 400);
|
|
76
|
-
options = {
|
|
77
|
-
...options || {},
|
|
78
|
-
expected: ensureString(options?.expected || BUFFER, { case: 'LOW' }),
|
|
79
|
-
};
|
|
80
|
-
switch (provider) {
|
|
81
|
-
case OPENAI:
|
|
82
|
-
let [func, extraOptions] = ['generate', {}];
|
|
83
|
-
if (options?.reference || options?.mask) {
|
|
84
|
-
func = 'edit';
|
|
85
|
-
extraOptions = {
|
|
86
|
-
image: await prepareImage(options?.reference, client.toFile, options),
|
|
87
|
-
mask: await prepareImage(options?.mask, client.toFile, options),
|
|
88
|
-
};
|
|
89
|
-
}
|
|
90
|
-
try { // https://platform.openai.com/docs/guides/image-generation?image-generation-model=gpt-image-1
|
|
91
|
-
var resp = await client.image[func]({
|
|
92
|
-
prompt, model: OPENAI_MODEL, n, quality: 'high',
|
|
93
|
-
size: '1536x1024', moderation: 'low',
|
|
94
|
-
// 1024x1024 (square), 1536x1024 (landscape), 1024x1536 (portrait), auto (default)
|
|
95
|
-
// background: 'transparent',
|
|
96
|
-
...extraOptions, ...options?.params || {},
|
|
97
|
-
});
|
|
98
|
-
} catch (err) { throwError(err?.message || ERROR_GENERATING); }
|
|
99
|
-
if (!options?.raw) {
|
|
100
|
-
resp.data = await Promise.all(resp.data.map(async x => ({
|
|
101
|
-
caption: `🎨 by ${OPENAI_MODEL}`,
|
|
102
|
-
data: await extractImage(x.b64_json, {
|
|
103
|
-
...options || {}, input: BASE64,
|
|
104
|
-
}),
|
|
105
|
-
mimeType: MIME_PNG,
|
|
106
|
-
})));
|
|
107
|
-
}
|
|
108
|
-
return resp?.data;
|
|
109
|
-
case GOOGLE:
|
|
110
|
-
var resp = await client.gen.models.generateImages({
|
|
111
|
-
model: IMAGEN_MODEL, prompt, config: {
|
|
112
|
-
numberOfImages: n, sampleImageSize: '2K',
|
|
113
|
-
includeRaiReason: true,
|
|
114
|
-
// "1:1" (default), "3:4", "4:3", "9:16", and "16:9"
|
|
115
|
-
aspectRatio: '16:9', personGeneration: 'allow_adult',
|
|
116
|
-
...options?.config || {},
|
|
117
|
-
},
|
|
118
|
-
});
|
|
119
|
-
const generated = resp?.generatedImages;
|
|
120
|
-
assert(!resp?.error && generated?.filter(
|
|
121
|
-
x => !x.raiFilteredReason
|
|
122
|
-
).length, resp?.error?.message || generated?.find(
|
|
123
|
-
x => x.raiFilteredReason
|
|
124
|
-
)?.raiFilteredReason || ERROR_GENERATING);
|
|
125
|
-
if (!options?.raw) {
|
|
126
|
-
resp = await Promise.all((resp?.generatedImages || []).map(
|
|
127
|
-
async x => ({
|
|
128
|
-
caption: `🎨 by ${IMAGEN_MODEL}`,
|
|
129
|
-
data: await extractImage(x.image.imageBytes, options),
|
|
130
|
-
mimeType: x.mimeType,
|
|
131
|
-
})
|
|
132
|
-
));
|
|
133
|
-
}
|
|
134
|
-
return resp;
|
|
135
|
-
default:
|
|
136
|
-
throw new Error('Invalid provider.');
|
|
137
|
-
}
|
|
138
|
-
};
|
|
139
|
-
|
|
140
|
-
const video = async (prompt, options) => {
|
|
141
|
-
let provider = ensureString(options?.provider, { case: 'UP' });
|
|
142
|
-
if (!provider && clients?.[GOOGLE]) { provider = GOOGLE; }
|
|
143
|
-
const client = clients?.[provider];
|
|
144
|
-
assert(client, 'No available video generation provider.');
|
|
145
|
-
prompt = ensureString(prompt);
|
|
146
|
-
assert(prompt.length <= 4000,
|
|
147
|
-
'Prompt must be less than 4000 characters.', 400);
|
|
148
|
-
options = {
|
|
149
|
-
...options || {},
|
|
150
|
-
expected: ensureString(options?.expected || BUFFER, { case: 'LOW' }),
|
|
151
|
-
};
|
|
152
|
-
switch (provider) {
|
|
153
|
-
case GOOGLE:
|
|
154
|
-
var resp = await client.gen.models.generateVideos({
|
|
155
|
-
model: VEO_MODEL, prompt, config: {
|
|
156
|
-
aspectRatio: '16:9', numberOfVideos: 1,
|
|
157
|
-
// personGeneration: 'allow_adult',
|
|
158
|
-
enablePromptRewriting: true, addWatermark: false,
|
|
159
|
-
includeRaiReason: true, ...options?.config || {},
|
|
160
|
-
},
|
|
161
|
-
});
|
|
162
|
-
assert(!resp?.error, resp?.error?.message || ERROR_GENERATING);
|
|
163
|
-
if (options?.generateRaw) { return resp; }
|
|
164
|
-
await tryUntil(async () => {
|
|
165
|
-
resp = await client.gen.operations.getVideosOperation({
|
|
166
|
-
operation: resp,
|
|
167
|
-
});
|
|
168
|
-
assert(
|
|
169
|
-
resp?.done,
|
|
170
|
-
`Waiting for Google video generation: ${resp.name}`,
|
|
171
|
-
);
|
|
172
|
-
}, { maxTry: 60 * 10, log });
|
|
173
|
-
let generated = resp?.response?.generatedVideos;
|
|
174
|
-
assert(!resp?.error && generated?.filter(
|
|
175
|
-
x => !x.raiFilteredReason
|
|
176
|
-
).length, resp?.error?.message || generated?.find(
|
|
177
|
-
x => x.raiFilteredReason
|
|
178
|
-
)?.raiFilteredReason || ERROR_GENERATING);
|
|
179
|
-
if (!options?.videoRaw) {
|
|
180
|
-
generated = await Promise.all(generated?.filter(
|
|
181
|
-
x => x?.video?.uri
|
|
182
|
-
).map(async (x, i) => {
|
|
183
|
-
const downloadPath = `${getTempPath({
|
|
184
|
-
seed: x?.video?.uri
|
|
185
|
-
})}.mp4`;
|
|
186
|
-
// @todo: fix this
|
|
187
|
-
// https://github.com/googleapis/js-genai/compare/main...Leask:js-genai:main
|
|
188
|
-
await client.gen.files.download({ file: x, downloadPath });
|
|
189
|
-
await timeout(1000 * 10); // hack to wait for file to be downloaded
|
|
190
|
-
return {
|
|
191
|
-
caption: `🎥 by ${VEO_MODEL}`,
|
|
192
|
-
data: await extractVideo(downloadPath, options),
|
|
193
|
-
mimeType: MIME_MP4, jobId: resp.name,
|
|
194
|
-
};
|
|
195
|
-
}));
|
|
196
|
-
}
|
|
197
|
-
return generated;
|
|
198
|
-
default:
|
|
199
|
-
throw new Error('Invalid provider.');
|
|
200
|
-
}
|
|
201
|
-
};
|
|
202
|
-
|
|
203
|
-
export default init;
|
|
204
|
-
export {
|
|
205
|
-
_NEED,
|
|
206
|
-
image,
|
|
207
|
-
init,
|
|
208
|
-
video,
|
|
209
|
-
};
|