utilitas 2000.3.27 → 2000.3.28

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/lib/speech.mjs CHANGED
@@ -1,36 +1,20 @@
1
- import { DEFAULT_MODELS, OPENAI_VOICE, countTokens, k } from './alan.mjs';
2
- import { getFfmpeg, packPcmToWav } from './media.mjs';
3
1
  import { get } from './web.mjs';
4
- import { convert, getTempPath } from './storage.mjs';
5
- import { ensureString, mergeAtoB } from './utilitas.mjs';
2
+ import { getFfmpeg } from './media.mjs';
3
+ import { getTempPath } from './storage.mjs';
4
+ import { hash } from './encryption.mjs';
6
5
 
7
6
  import {
8
- call, countKeys, ignoreErrFunc, inBrowser,
9
- need, throwError
7
+ call, ignoreErrFunc, inBrowser, need, throwError,
10
8
  } from './utilitas.mjs';
11
9
 
12
10
  import {
13
- convertAudioTo16kNanoOpusOgg,
14
- convertAudioTo16kNanoPcmWave,
11
+ convertAudioTo16kNanoOpusOgg, convertAudioTo16kNanoPcmWave,
15
12
  } from './media.mjs';
16
13
 
17
- const _NEED = ['@google/genai', 'OpenAI', 'whisper-node'];
14
+ const _NEED = ['whisper-node'];
18
15
 
19
- const [
20
- BUFFER, STREAM, BASE64, FILE, clients, suffix, SPEAKER, cleanup, wav,
21
- GPT_4O_MIMI_TTS, GPT_4O_TRANSCRIBE, GEMINI_25_FLASH_TTS,
22
- OPENAI_TTS_MAX_LENGTH, WHISPER_DEFAULT_MODEL, errorMessage
23
- ] = [
24
- 'BUFFER', 'STREAM', 'BASE64', 'FILE', {}, 'ogg', 'SPEAKER', true, 'wav',
25
- 'gpt-4o-mini-tts', 'gpt-4o-transcribe', 'gemini-2.5-flash-preview-tts',
26
- 4096, 'base', 'Invalid audio data.',
27
- ];
28
-
29
- const [
30
- defaultOpenAITtsModel, defaultOpenAISttModel, defaultGeminiTtsModel,
31
- ] = [GPT_4O_MIMI_TTS, GPT_4O_TRANSCRIBE, GEMINI_25_FLASH_TTS];
32
-
33
- const TTS_PROMPT = "As an AI voice assistant, please say the following content in a warm, friendly and professional tone, if the language is English, use an American accent, if it's Traditional Chinese, use Hong Kong Cantonese, if it's Simplified Chinese, use standard Mandarin, for other languages, please speak with a standard, clear accent";
16
+ const [FILE, suffix, SPEAKER, cleanup, WHISPER_DEFAULT_MODEL, errorMessage]
17
+ = ['FILE', 'ogg', 'SPEAKER', true, 'base', 'Invalid audio data.'];
34
18
 
35
19
  const WHISPER_MODELS = [
36
20
  // npx whisper-node download tiny.en
@@ -83,105 +67,22 @@ const getWhisperModelReady = async (model, options) => {
83
67
  return (await get(getWhisperModelUrl(model), { fuzzy: true }))?.cache?.content;
84
68
  };
85
69
 
86
- const init = async (options) => {
87
- if (options) {
88
- assert(
89
- options?.tts || options?.stt,
90
- 'At least one of TTS or STT is selected.', 500
91
- );
92
- const provider = ensureString(options?.provider, { case: 'UP' });
93
- switch (provider) {
94
- case 'OPENAI':
95
- clients._provider = provider;
96
- const OpenAI = await need('openai');
97
- const openai = new OpenAI(options);
98
- if (options?.tts) {
99
- clients.tts = openai.audio.speech;
100
- }
101
- if (options?.stt) {
102
- clients.stt = openai.audio.transcriptions;
103
- clients.toFile = OpenAI.toFile;
104
- }
105
- break;
106
- case 'GOOGLE':
107
- clients._provider = provider;
108
- const { GoogleGenAI } = await need('@google/genai');
109
- const client = new GoogleGenAI(options);
110
- if (options?.tts) {
111
- clients.tts = client.models.generateContent;
112
- }
113
- break;
114
- case '':
115
- clients._provider = 'LOCAL';
116
- options?.tts && await checkSay({ assert: true });
117
- options?.stt && await checkWhisper({ assert: true });
118
- break;
119
- default:
120
- throwError('Invalid speech provider.', 500);
121
- }
122
- }
123
- assert(
124
- countKeys(clients), 'Speech API client has not been initialized.', 501
125
- );
126
- return clients;
127
- };
128
-
129
- const checkSay = async (options) => {
70
+ const checkSay = async () => {
130
71
  const result = !!(await ignoreErrFunc(async () => (
131
72
  await Promise.all([need('node:os'), need('say'), getFfmpeg()])
132
73
  )[0].platform() === 'darwin'));
133
- options?.assert && assert(result, 'Say API is not available.', 500);
74
+ assert(result, 'Say API is not available.', 500);
134
75
  return result;
135
76
  };
136
77
 
137
- const checkWhisper = async (options) => {
78
+ const checkWhisper = async () => {
138
79
  const result = !!(await ignoreErrFunc(() => Promise.all([
139
80
  need('whisper-node'), getFfmpeg()
140
81
  ])));
141
- options?.assert && assert(result, 'Whisper API is not available.', 500);
82
+ assert(result, 'Whisper API is not available.', 500);
142
83
  return result;
143
84
  };
144
85
 
145
- const ttsOpenAI = async (input, options) => {
146
- assert(clients.tts, 'OpenAI TTS API has not been initialized.', 500);
147
- assert(input, 'Text is required.', 400);
148
- assert(input.length <= OPENAI_TTS_MAX_LENGTH, 'Text is too long.', 400);
149
- // https://platform.openai.com/docs/api-reference/audio/createSpeech
150
- const content = await clients.tts.create({
151
- model: defaultOpenAITtsModel, voice: DEFAULT_MODELS[OPENAI_VOICE],
152
- instructions: 'Speak in a friendly and sweet tone.',
153
- response_format: 'opus', input, ...options?.params || {},
154
- });
155
- const buffer = Buffer.from(await content.arrayBuffer());
156
- return await convert(buffer, { suffix, ...options || {} });
157
- };
158
-
159
- // https://ai.google.dev/gemini-api/docs/speech-generation#voices
160
- const ttsGoogle = async (contents, options) => {
161
- assert(clients.tts, 'Google TTS API has not been initialized.', 500);
162
- assert(contents, 'Text is required.', 400);
163
- assert(await countTokens(contents) <= k(32), 'Text is too long.', 400);
164
- const resp = await clients.tts({
165
- model: options?.model || defaultGeminiTtsModel,
166
- contents: `${options?.prompt || TTS_PROMPT}: ${contents}`,
167
- config: mergeAtoB(options?.config, {
168
- responseModalities: ['AUDIO'],
169
- speechConfig: {
170
- voiceConfig: {
171
- prebuiltVoiceConfig: {
172
- voiceName: options?.voice || 'Zephyr',
173
- },
174
- },
175
- },
176
- }),
177
- });
178
- const rawAudio = resp?.candidates?.[0]?.content?.parts?.[0]?.inlineData;
179
- assert(rawAudio, 'Failed to generate audio.', 500);
180
- return options?.raw ? rawAudio : await packPcmToWav(rawAudio?.data, {
181
- input: BASE64, expected: 'FILE', suffix: wav, ...options || {},
182
- });
183
- };
184
-
185
86
  const ttsSay = async (text, options) => {
186
87
  const say = await need('say');
187
88
  assert(text, 'Text is required.', 400);
@@ -208,22 +109,6 @@ const ttsBrowser = async (text) => {
208
109
  return speechSynthesis.speak(new SpeechSynthesisUtterance(text));
209
110
  };
210
111
 
211
- const sttOpenAI = async (audio, options) => {
212
- assert(clients.stt, 'OpenAI STT API has not been initialized.', 500);
213
- const input = ensureString(options?.input, { case: 'UP' });
214
- const { content, cleanup } = await convert(audio, {
215
- input: options?.input, ...options || {}, expected: STREAM, errorMessage,
216
- suffix: ['', BUFFER].includes(input) ? suffix : null,
217
- withCleanupFunc: true,
218
- });
219
- const result = await clients.stt.create({
220
- file: await clients.toFile(content), model: defaultOpenAISttModel,
221
- response_format: 'text', ...options?.params || {},
222
- });
223
- await cleanup();
224
- return result;
225
- };
226
-
227
112
  // This function is not working properly, a pull request is filed:
228
113
  // https://github.com/ariym/whisper-node/pull/58
229
114
  const sttWhisper = async (audio, options) => {
@@ -253,33 +138,24 @@ const sttWhisper = async (audio, options) => {
253
138
  const tts = async (text, options) => {
254
139
  let engine;
255
140
  if (inBrowser()) { engine = ttsBrowser }
256
- else if (clients?.tts && clients._provider === 'GOOGLE') { engine = ttsGoogle; }
257
- else if (clients?.tts && clients._provider === 'OPENAI') { engine = ttsOpenAI; }
258
141
  else if (await checkSay()) { engine = ttsSay; }
259
- else { throwError('Text-to-Speech engine has not been initialized.', 500); }
142
+ else { throwError('Text-to-Speech engine is not available.', 500); }
260
143
  return await engine(text, options);
261
144
  };
262
145
 
263
146
  const stt = async (audio, options) => {
264
147
  let engine;
265
- if (clients?.stt && clients._provider === 'OPENAI') { engine = sttOpenAI; }
266
- else if (await checkWhisper()) { engine = sttWhisper; }
267
- else { throwError('Speech-to-Text engine has not been initialized.', 500); }
148
+ if (await checkWhisper()) { engine = sttWhisper; }
149
+ else { throwError('Speech-to-Text engine is not available.', 500); }
268
150
  return await engine(audio, options);
269
151
  };
270
152
 
271
- export default init;
272
153
  export {
273
154
  _NEED,
274
- OPENAI_TTS_MAX_LENGTH,
275
155
  checkSay,
276
156
  checkWhisper,
277
- init,
278
157
  stt,
279
- sttOpenAI,
280
158
  sttWhisper,
281
159
  tts,
282
- ttsGoogle,
283
- ttsOpenAI,
284
160
  ttsSay,
285
161
  };
package/lib/storage.mjs CHANGED
@@ -240,7 +240,7 @@ const blobToBuffer = async blob => {
240
240
 
241
241
  const convert = async (any, options) => {
242
242
  assert(any, options?.errorMessage || 'Invalid input.', 400);
243
- const result = {}
243
+ let result = {};
244
244
  let [input, expected] = [(
245
245
  Buffer.isBuffer(any)
246
246
  || ArrayBuffer.isArrayBuffer(any)
@@ -248,7 +248,7 @@ const convert = async (any, options) => {
248
248
  ) ? BUFFER : options?.input, options?.expected || BUFFER].map(
249
249
  x => ensureString(x, { case: 'UP' })
250
250
  );
251
- let [oriFile, meta, mime, subExp] = [null, null, MIME_BINARY, expected];
251
+ let [oriFile, meta, mime, subExp] = [null, null, null, expected];
252
252
  switch (input) {
253
253
  case FILE:
254
254
  oriFile = any;
@@ -269,6 +269,7 @@ const convert = async (any, options) => {
269
269
  input = BUFFER;
270
270
  break;
271
271
  }
272
+ mime || (mime = (await getMime(any, any))?.mime || MIME_BINARY);
272
273
  switch (expected) {
273
274
  case STREAM: subExp = FILE; break;
274
275
  case DATAURL: subExp = BUFFER; break;
@@ -313,8 +314,9 @@ const convert = async (any, options) => {
313
314
 
314
315
  const getMime = async (buf, filename) => {
315
316
  const mimeType = await ignoreErrFunc(() => need('mime-types'));
316
- const mime = extract(await fileTypeFromBuffer(buf), 'mime')
317
- || (filename && mimeType?.lookup?.(filename)) || MIME_BINARY;
317
+ const mime = (buf && Buffer.isBuffer(buf) && extract(await fileTypeFromBuffer(buf), 'mime'))
318
+ || (filename && String.isString(filename) && mimeType?.lookup?.(filename))
319
+ || MIME_BINARY;
318
320
  return { mime, extension: mimeType?.extension?.(mime) || 'bin' };
319
321
  };
320
322
 
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "utilitas",
3
3
  "description": "Just another common utility for JavaScript.",
4
- "version": "2000.3.27",
4
+ "version": "2000.3.28",
5
5
  "private": false,
6
6
  "homepage": "https://github.com/Leask/utilitas",
7
7
  "main": "index.mjs",