utilitas 2000.3.26 → 2000.3.28

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/lib/speech.mjs CHANGED
@@ -1,39 +1,20 @@
1
- import { DEFAULT_MODELS, OPENAI_VOICE, countTokens, k } from './alan.mjs';
2
- import { getFfmpeg, packPcmToWav } from './media.mjs';
3
1
  import { get } from './web.mjs';
4
- import { convert, getTempPath, MIME_WAV } from './storage.mjs';
5
- import { ensureString, mergeAtoB } from './utilitas.mjs';
2
+ import { getFfmpeg } from './media.mjs';
3
+ import { getTempPath } from './storage.mjs';
4
+ import { hash } from './encryption.mjs';
6
5
 
7
6
  import {
8
- call, countKeys, ignoreErrFunc, inBrowser,
9
- need, throwError
7
+ call, ignoreErrFunc, inBrowser, need, throwError,
10
8
  } from './utilitas.mjs';
11
9
 
12
10
  import {
13
- convertAudioTo16kNanoOpusOgg,
14
- convertAudioTo16kNanoPcmWave,
11
+ convertAudioTo16kNanoOpusOgg, convertAudioTo16kNanoPcmWave,
15
12
  } from './media.mjs';
16
13
 
17
- const _NEED = ['@google/genai', 'OpenAI', 'whisper-node'];
14
+ const _NEED = ['whisper-node'];
18
15
 
19
- const [
20
- BUFFER, STREAM, BASE64, FILE, clients, suffix, SPEAKER, cleanup, wav,
21
- GPT_4O_MIMI_TTS, GPT_4O_TRANSCRIBE, GEMINI_25_FLASH_TTS, GEMINI_FLASH,
22
- OPENAI_TTS_MAX_LENGTH, WHISPER_DEFAULT_MODEL, errorMessage
23
- ] = [
24
- 'BUFFER', 'STREAM', 'BASE64', 'FILE', {}, 'ogg', 'SPEAKER', true, 'wav',
25
- 'gpt-4o-mini-tts', 'gpt-4o-transcribe', 'gemini-2.5-flash-preview-tts',
26
- 'gemini-flash-latest', 4096, 'base', 'Invalid audio data.',
27
- ];
28
-
29
- const [
30
- defaultOpenAITtsModel, defaultOpenAISttModel, defaultGeminiTtsModel,
31
- defaultGeminiSttModel,
32
- ] = [GPT_4O_MIMI_TTS, GPT_4O_TRANSCRIBE, GEMINI_25_FLASH_TTS, GEMINI_FLASH];
33
-
34
- const TTS_PROMPT = "As an AI voice assistant, please say the following content in a warm, friendly and professional tone, if the language is English, use an American accent, if it's Traditional Chinese, use Hong Kong Cantonese, if it's Simplified Chinese, use standard Mandarin, for other languages, please speak with a standard, clear accent";
35
-
36
- const STT_PROMPT = 'Please transcribe the audio into clean text. Return only the text content, DO NOT include any additional information or metadata. You may encounter input that contains different languages. Please do your best to transcribe text from all possible languages. Please distinguish between background noise and the main speech content. Do not be disturbed by background noise. Only return the main speech content.';
16
+ const [FILE, suffix, SPEAKER, cleanup, WHISPER_DEFAULT_MODEL, errorMessage]
17
+ = ['FILE', 'ogg', 'SPEAKER', true, 'base', 'Invalid audio data.'];
37
18
 
38
19
  const WHISPER_MODELS = [
39
20
  // npx whisper-node download tiny.en
@@ -86,108 +67,22 @@ const getWhisperModelReady = async (model, options) => {
86
67
  return (await get(getWhisperModelUrl(model), { fuzzy: true }))?.cache?.content;
87
68
  };
88
69
 
89
- const init = async (options) => {
90
- if (options) {
91
- assert(
92
- options?.tts || options?.stt,
93
- 'At least one of TTS or STT is selected.', 500
94
- );
95
- const provider = ensureString(options?.provider, { case: 'UP' });
96
- switch (provider) {
97
- case 'OPENAI':
98
- clients._provider = provider;
99
- const OpenAI = await need('openai');
100
- const openai = new OpenAI(options);
101
- if (options?.tts) {
102
- clients.tts = openai.audio.speech;
103
- }
104
- if (options?.stt) {
105
- clients.stt = openai.audio.transcriptions;
106
- clients.toFile = OpenAI.toFile;
107
- }
108
- break;
109
- case 'GOOGLE':
110
- clients._provider = provider;
111
- const { GoogleGenAI } = await need('@google/genai');
112
- const client = new GoogleGenAI(options);
113
- if (options?.tts) {
114
- clients.tts = client.models.generateContent;
115
- }
116
- if (options?.stt) {
117
- clients.stt = client.models.generateContent;
118
- }
119
- break;
120
- case '':
121
- clients._provider = 'LOCAL';
122
- options?.tts && await checkSay({ assert: true });
123
- options?.stt && await checkWhisper({ assert: true });
124
- break;
125
- default:
126
- throwError('Invalid speech provider.', 500);
127
- }
128
- }
129
- assert(
130
- countKeys(clients), 'Speech API client has not been initialized.', 501
131
- );
132
- return clients;
133
- };
134
-
135
- const checkSay = async (options) => {
70
+ const checkSay = async () => {
136
71
  const result = !!(await ignoreErrFunc(async () => (
137
72
  await Promise.all([need('node:os'), need('say'), getFfmpeg()])
138
73
  )[0].platform() === 'darwin'));
139
- options?.assert && assert(result, 'Say API is not available.', 500);
74
+ assert(result, 'Say API is not available.', 500);
140
75
  return result;
141
76
  };
142
77
 
143
- const checkWhisper = async (options) => {
78
+ const checkWhisper = async () => {
144
79
  const result = !!(await ignoreErrFunc(() => Promise.all([
145
80
  need('whisper-node'), getFfmpeg()
146
81
  ])));
147
- options?.assert && assert(result, 'Whisper API is not available.', 500);
82
+ assert(result, 'Whisper API is not available.', 500);
148
83
  return result;
149
84
  };
150
85
 
151
- const ttsOpenAI = async (input, options) => {
152
- assert(clients.tts, 'OpenAI TTS API has not been initialized.', 500);
153
- assert(input, 'Text is required.', 400);
154
- assert(input.length <= OPENAI_TTS_MAX_LENGTH, 'Text is too long.', 400);
155
- // https://platform.openai.com/docs/api-reference/audio/createSpeech
156
- const content = await clients.tts.create({
157
- model: defaultOpenAITtsModel, voice: DEFAULT_MODELS[OPENAI_VOICE],
158
- instructions: 'Speak in a friendly and sweet tone.',
159
- response_format: 'opus', input, ...options?.params || {},
160
- });
161
- const buffer = Buffer.from(await content.arrayBuffer());
162
- return await convert(buffer, { suffix, ...options || {} });
163
- };
164
-
165
- // https://ai.google.dev/gemini-api/docs/speech-generation#voices
166
- const ttsGoogle = async (contents, options) => {
167
- assert(clients.tts, 'Google TTS API has not been initialized.', 500);
168
- assert(contents, 'Text is required.', 400);
169
- assert(await countTokens(contents) <= k(32), 'Text is too long.', 400);
170
- const resp = await clients.tts({
171
- model: options?.model || defaultGeminiTtsModel,
172
- contents: `${options?.prompt || TTS_PROMPT}: ${contents}`,
173
- config: mergeAtoB(options?.config, {
174
- responseModalities: ['AUDIO'],
175
- speechConfig: {
176
- voiceConfig: {
177
- prebuiltVoiceConfig: {
178
- voiceName: options?.voice || 'Zephyr',
179
- },
180
- },
181
- },
182
- }),
183
- });
184
- const rawAudio = resp?.candidates?.[0]?.content?.parts?.[0]?.inlineData;
185
- assert(rawAudio, 'Failed to generate audio.', 500);
186
- return options?.raw ? rawAudio : await packPcmToWav(rawAudio?.data, {
187
- input: BASE64, expected: 'FILE', suffix: wav, ...options || {},
188
- });
189
- };
190
-
191
86
  const ttsSay = async (text, options) => {
192
87
  const say = await need('say');
193
88
  assert(text, 'Text is required.', 400);
@@ -214,45 +109,6 @@ const ttsBrowser = async (text) => {
214
109
  return speechSynthesis.speak(new SpeechSynthesisUtterance(text));
215
110
  };
216
111
 
217
- const sttOpenAI = async (audio, options) => {
218
- assert(clients.stt, 'OpenAI STT API has not been initialized.', 500);
219
- const input = ensureString(options?.input, { case: 'UP' });
220
- const { content, cleanup } = await convert(audio, {
221
- input: options?.input, ...options || {}, expected: STREAM, errorMessage,
222
- suffix: ['', BUFFER].includes(input) ? suffix : null,
223
- withCleanupFunc: true,
224
- });
225
- const result = await clients.stt.create({
226
- file: await clients.toFile(content), model: defaultOpenAISttModel,
227
- response_format: 'text', ...options?.params || {},
228
- });
229
- await cleanup();
230
- return result;
231
- };
232
-
233
- const sttGoogle = async (audio, options) => {
234
- assert(clients.stt, 'Google STT API has not been initialized.', 500);
235
- const data = await convert(audio, {
236
- input: options?.input, expected: BASE64, errorMessage,
237
- });
238
- const resp = await clients.stt({
239
- model: options?.model || defaultGeminiSttModel, contents: {
240
- parts: [{
241
- inlineData: {
242
- mimeType: options?.mimeType || MIME_WAV, data,
243
- },
244
- }, { text: STT_PROMPT }],
245
- },
246
- config: { ...options?.config || {} },
247
- });
248
- assert(
249
- resp?.candidates?.[0]?.content?.parts?.[0],
250
- 'Failed to transcribe audio.', 500
251
- );
252
- return options?.raw ? resp.candidates
253
- : (resp.candidates[0].content.parts[0].text?.trim?.() || '');
254
- };
255
-
256
112
  // This function is not working properly, a pull request is filed:
257
113
  // https://github.com/ariym/whisper-node/pull/58
258
114
  const sttWhisper = async (audio, options) => {
@@ -282,35 +138,24 @@ const sttWhisper = async (audio, options) => {
282
138
  const tts = async (text, options) => {
283
139
  let engine;
284
140
  if (inBrowser()) { engine = ttsBrowser }
285
- else if (clients?.tts && clients._provider === 'GOOGLE') { engine = ttsGoogle; }
286
- else if (clients?.tts && clients._provider === 'OPENAI') { engine = ttsOpenAI; }
287
141
  else if (await checkSay()) { engine = ttsSay; }
288
- else { throwError('Text-to-Speech engine has not been initialized.', 500); }
142
+ else { throwError('Text-to-Speech engine is not available.', 500); }
289
143
  return await engine(text, options);
290
144
  };
291
145
 
292
146
  const stt = async (audio, options) => {
293
147
  let engine;
294
- if (clients?.stt && clients._provider === 'GOOGLE') { engine = sttGoogle; }
295
- else if (clients?.stt && clients._provider === 'OPENAI') { engine = sttOpenAI; }
296
- else if (await checkWhisper()) { engine = sttWhisper; }
297
- else { throwError('Speech-to-Text engine has not been initialized.', 500); }
148
+ if (await checkWhisper()) { engine = sttWhisper; }
149
+ else { throwError('Speech-to-Text engine is not available.', 500); }
298
150
  return await engine(audio, options);
299
151
  };
300
152
 
301
- export default init;
302
153
  export {
303
154
  _NEED,
304
- OPENAI_TTS_MAX_LENGTH,
305
155
  checkSay,
306
156
  checkWhisper,
307
- init,
308
157
  stt,
309
- sttGoogle,
310
- sttOpenAI,
311
158
  sttWhisper,
312
159
  tts,
313
- ttsGoogle,
314
- ttsOpenAI,
315
160
  ttsSay,
316
161
  };
package/lib/storage.mjs CHANGED
@@ -240,7 +240,7 @@ const blobToBuffer = async blob => {
240
240
 
241
241
  const convert = async (any, options) => {
242
242
  assert(any, options?.errorMessage || 'Invalid input.', 400);
243
- const result = {}
243
+ let result = {};
244
244
  let [input, expected] = [(
245
245
  Buffer.isBuffer(any)
246
246
  || ArrayBuffer.isArrayBuffer(any)
@@ -248,7 +248,7 @@ const convert = async (any, options) => {
248
248
  ) ? BUFFER : options?.input, options?.expected || BUFFER].map(
249
249
  x => ensureString(x, { case: 'UP' })
250
250
  );
251
- let [oriFile, meta, mime, subExp] = [null, null, MIME_BINARY, expected];
251
+ let [oriFile, meta, mime, subExp] = [null, null, null, expected];
252
252
  switch (input) {
253
253
  case FILE:
254
254
  oriFile = any;
@@ -269,6 +269,7 @@ const convert = async (any, options) => {
269
269
  input = BUFFER;
270
270
  break;
271
271
  }
272
+ mime || (mime = (await getMime(any, any))?.mime || MIME_BINARY);
272
273
  switch (expected) {
273
274
  case STREAM: subExp = FILE; break;
274
275
  case DATAURL: subExp = BUFFER; break;
@@ -313,8 +314,9 @@ const convert = async (any, options) => {
313
314
 
314
315
  const getMime = async (buf, filename) => {
315
316
  const mimeType = await ignoreErrFunc(() => need('mime-types'));
316
- const mime = extract(await fileTypeFromBuffer(buf), 'mime')
317
- || (filename && mimeType?.lookup?.(filename)) || MIME_BINARY;
317
+ const mime = (buf && Buffer.isBuffer(buf) && extract(await fileTypeFromBuffer(buf), 'mime'))
318
+ || (filename && String.isString(filename) && mimeType?.lookup?.(filename))
319
+ || MIME_BINARY;
318
320
  return { mime, extension: mimeType?.extension?.(mime) || 'bin' };
319
321
  };
320
322
 
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "utilitas",
3
3
  "description": "Just another common utility for JavaScript.",
4
- "version": "2000.3.26",
4
+ "version": "2000.3.28",
5
5
  "private": false,
6
6
  "homepage": "https://github.com/Leask/utilitas",
7
7
  "main": "index.mjs",
package/lib/gen.mjs DELETED
@@ -1,209 +0,0 @@
1
- import {
2
- ensureArray, ensureString, log as _log, need, throwError,
3
- tryUntil, timeout,
4
- } from './utilitas.mjs';
5
-
6
- import { convert, MIME_PNG, MIME_MP4, getTempPath } from './storage.mjs';
7
- import { createReadStream } from 'fs';
8
-
9
- const _NEED = ['OpenAI', '@google/genai'];
10
- const log = (cnt, opt) => _log(cnt, import.meta.url, { time: 1, ...opt || {} });
11
- const [
12
- clients, OPENAI, GOOGLE, BASE64, FILE, BUFFER, ERROR_GENERATING,
13
- IMAGEN_MODEL, OPENAI_MODEL, VEO_MODEL, IMAGEN_UPSCALE_MODEL,
14
- ] = [
15
- {}, 'OPENAI', 'GOOGLE', 'BASE64', 'FILE', 'BUFFER',
16
- 'Error generating media.', 'imagen-4.0-ultra-generate-001',
17
- 'gpt-image-1', 'veo-3.1-generate-preview', 'imagen-4.0-upscale-preview',
18
- ];
19
-
20
- const init = async (options) => {
21
- assert(options?.apiKey, 'API key is required.');
22
- const provider = ensureString(options?.provider, { case: 'UP' });
23
- switch (provider) {
24
- case OPENAI:
25
- const OpenAI = await need('openai');
26
- var client = new OpenAI(options);
27
- clients[provider] = {
28
- image: client.images,
29
- toFile: OpenAI.toFile,
30
- };
31
- break;
32
- case GOOGLE:
33
- const { GoogleGenAI } = await need('@google/genai');
34
- var client = new GoogleGenAI({ vertexai: false, ...options });
35
- clients[provider] = {
36
- gen: client,
37
- };
38
- break;
39
- default:
40
- throw new Error('Invalid provider.');
41
- }
42
- return clients;
43
- };
44
-
45
- const extractImage = async (data, options) => await convert(
46
- data, { input: BASE64, suffix: 'png', ...options || {} }
47
- );
48
-
49
- const extractVideo = async (data, options) => await convert(
50
- data, { input: FILE, suffix: 'mp4', ...options || {} }
51
- );
52
-
53
- const prepareImage = async (files, repack, options) => {
54
- if (!files) { return }
55
- const multiple = Array.isArray(files);
56
- files = ensureArray(files);
57
- const resp = await Promise.all(files.map(async x => await repack(
58
- createReadStream(await convert(
59
- x, { expected: 'FILE', ...options || {} }
60
- )), null, { type: MIME_PNG } // don't need to be right MIME type
61
- )));
62
- return multiple ? resp : resp[0];
63
- };
64
-
65
-
66
- const image = async (prompt, options) => {
67
- let provider = ensureString(options?.provider, { case: 'UP' });
68
- if (!provider && clients?.[GOOGLE]) { provider = GOOGLE; }
69
- else if (!provider && clients?.[OPENAI]) { provider = OPENAI; }
70
- const client = clients?.[provider];
71
- const n = options?.n || 4;
72
- assert(client, 'No available image generation provider.');
73
- prompt = ensureString(prompt);
74
- assert(prompt.length <= 4000,
75
- 'Prompt must be less than 4000 characters.', 400);
76
- options = {
77
- ...options || {},
78
- expected: ensureString(options?.expected || BUFFER, { case: 'LOW' }),
79
- };
80
- switch (provider) {
81
- case OPENAI:
82
- let [func, extraOptions] = ['generate', {}];
83
- if (options?.reference || options?.mask) {
84
- func = 'edit';
85
- extraOptions = {
86
- image: await prepareImage(options?.reference, client.toFile, options),
87
- mask: await prepareImage(options?.mask, client.toFile, options),
88
- };
89
- }
90
- try { // https://platform.openai.com/docs/guides/image-generation?image-generation-model=gpt-image-1
91
- var resp = await client.image[func]({
92
- prompt, model: OPENAI_MODEL, n, quality: 'high',
93
- size: '1536x1024', moderation: 'low',
94
- // 1024x1024 (square), 1536x1024 (landscape), 1024x1536 (portrait), auto (default)
95
- // background: 'transparent',
96
- ...extraOptions, ...options?.params || {},
97
- });
98
- } catch (err) { throwError(err?.message || ERROR_GENERATING); }
99
- if (!options?.raw) {
100
- resp.data = await Promise.all(resp.data.map(async x => ({
101
- caption: `🎨 by ${OPENAI_MODEL}`,
102
- data: await extractImage(x.b64_json, {
103
- ...options || {}, input: BASE64,
104
- }),
105
- mimeType: MIME_PNG,
106
- })));
107
- }
108
- return resp?.data;
109
- case GOOGLE:
110
- var resp = await client.gen.models.generateImages({
111
- model: IMAGEN_MODEL, prompt, config: {
112
- numberOfImages: n, sampleImageSize: '2K',
113
- includeRaiReason: true,
114
- // "1:1" (default), "3:4", "4:3", "9:16", and "16:9"
115
- aspectRatio: '16:9', personGeneration: 'allow_adult',
116
- ...options?.config || {},
117
- },
118
- });
119
- const generated = resp?.generatedImages;
120
- assert(!resp?.error && generated?.filter(
121
- x => !x.raiFilteredReason
122
- ).length, resp?.error?.message || generated?.find(
123
- x => x.raiFilteredReason
124
- )?.raiFilteredReason || ERROR_GENERATING);
125
- if (!options?.raw) {
126
- resp = await Promise.all((resp?.generatedImages || []).map(
127
- async x => ({
128
- caption: `🎨 by ${IMAGEN_MODEL}`,
129
- data: await extractImage(x.image.imageBytes, options),
130
- mimeType: x.mimeType,
131
- })
132
- ));
133
- }
134
- return resp;
135
- default:
136
- throw new Error('Invalid provider.');
137
- }
138
- };
139
-
140
- const video = async (prompt, options) => {
141
- let provider = ensureString(options?.provider, { case: 'UP' });
142
- if (!provider && clients?.[GOOGLE]) { provider = GOOGLE; }
143
- const client = clients?.[provider];
144
- assert(client, 'No available video generation provider.');
145
- prompt = ensureString(prompt);
146
- assert(prompt.length <= 4000,
147
- 'Prompt must be less than 4000 characters.', 400);
148
- options = {
149
- ...options || {},
150
- expected: ensureString(options?.expected || BUFFER, { case: 'LOW' }),
151
- };
152
- switch (provider) {
153
- case GOOGLE:
154
- var resp = await client.gen.models.generateVideos({
155
- model: VEO_MODEL, prompt, config: {
156
- aspectRatio: '16:9', numberOfVideos: 1,
157
- // personGeneration: 'allow_adult',
158
- enablePromptRewriting: true, addWatermark: false,
159
- includeRaiReason: true, ...options?.config || {},
160
- },
161
- });
162
- assert(!resp?.error, resp?.error?.message || ERROR_GENERATING);
163
- if (options?.generateRaw) { return resp; }
164
- await tryUntil(async () => {
165
- resp = await client.gen.operations.getVideosOperation({
166
- operation: resp,
167
- });
168
- assert(
169
- resp?.done,
170
- `Waiting for Google video generation: ${resp.name}`,
171
- );
172
- }, { maxTry: 60 * 10, log });
173
- let generated = resp?.response?.generatedVideos;
174
- assert(!resp?.error && generated?.filter(
175
- x => !x.raiFilteredReason
176
- ).length, resp?.error?.message || generated?.find(
177
- x => x.raiFilteredReason
178
- )?.raiFilteredReason || ERROR_GENERATING);
179
- if (!options?.videoRaw) {
180
- generated = await Promise.all(generated?.filter(
181
- x => x?.video?.uri
182
- ).map(async (x, i) => {
183
- const downloadPath = `${getTempPath({
184
- seed: x?.video?.uri
185
- })}.mp4`;
186
- // @todo: fix this
187
- // https://github.com/googleapis/js-genai/compare/main...Leask:js-genai:main
188
- await client.gen.files.download({ file: x, downloadPath });
189
- await timeout(1000 * 10); // hack to wait for file to be downloaded
190
- return {
191
- caption: `🎥 by ${VEO_MODEL}`,
192
- data: await extractVideo(downloadPath, options),
193
- mimeType: MIME_MP4, jobId: resp.name,
194
- };
195
- }));
196
- }
197
- return generated;
198
- default:
199
- throw new Error('Invalid provider.');
200
- }
201
- };
202
-
203
- export default init;
204
- export {
205
- _NEED,
206
- image,
207
- init,
208
- video,
209
- };