utilitas 1999.1.70 → 1999.1.72
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +4 -2
- package/dist/utilitas.lite.mjs +1 -1
- package/dist/utilitas.lite.mjs.map +1 -1
- package/lib/alan.mjs +4 -7
- package/lib/manifest.mjs +1 -2
- package/lib/media.mjs +12 -0
- package/lib/speech.mjs +53 -22
- package/package.json +1 -2
package/lib/alan.mjs
CHANGED
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
import { checkSearch, distill, search } from './web.mjs';
|
|
2
2
|
import { create as createUoid } from './uoid.mjs';
|
|
3
|
-
import { createWavHeader } from './media.mjs';
|
|
4
3
|
import { end, loop } from './event.mjs';
|
|
5
4
|
import { fileTypeFromBuffer } from 'file-type';
|
|
5
|
+
import { packPcmToWav } from './media.mjs';
|
|
6
6
|
import { v4 as uuidv4 } from 'uuid';
|
|
7
7
|
|
|
8
8
|
import {
|
|
@@ -707,12 +707,8 @@ const packResp = async (resp, options) => {
|
|
|
707
707
|
const str = simpleText.indexOf(x);
|
|
708
708
|
str >= 0 && (simpleText = simpleText.slice(0, str).trim());
|
|
709
709
|
});
|
|
710
|
-
audio
|
|
711
|
-
input: BASE64, expected: BUFFER,
|
|
712
|
-
})) && audio.length && (audio = Buffer.concat([
|
|
713
|
-
createWavHeader(audio.length), audio
|
|
714
|
-
])) && (audio = await convert(audio, {
|
|
715
|
-
input: BUFFER, expected: BUFFER, ...options || {},
|
|
710
|
+
audio = await ignoreErrFunc(async () => await packPcmToWav(audio, {
|
|
711
|
+
input: Buffer.isBuffer(audio) ? BUFFER : BASE64, expected: BUFFER,
|
|
716
712
|
}));
|
|
717
713
|
if (images?.length) {
|
|
718
714
|
for (let i in images) {
|
|
@@ -1592,6 +1588,7 @@ export {
|
|
|
1592
1588
|
getSession,
|
|
1593
1589
|
init,
|
|
1594
1590
|
initChat,
|
|
1591
|
+
k,
|
|
1595
1592
|
listFiles,
|
|
1596
1593
|
listGptFineTuningEvents,
|
|
1597
1594
|
listGptFineTuningJobs,
|
package/lib/manifest.mjs
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
const manifest = {
|
|
2
2
|
"name": "utilitas",
|
|
3
3
|
"description": "Just another common utility for JavaScript.",
|
|
4
|
-
"version": "1999.1.
|
|
4
|
+
"version": "1999.1.72",
|
|
5
5
|
"private": false,
|
|
6
6
|
"homepage": "https://github.com/Leask/utilitas",
|
|
7
7
|
"main": "index.mjs",
|
|
@@ -30,7 +30,6 @@ const manifest = {
|
|
|
30
30
|
"@ffprobe-installer/ffprobe": "^2.1.2",
|
|
31
31
|
"@google-cloud/speech": "^7.1.0",
|
|
32
32
|
"@google-cloud/storage": "^7.16.0",
|
|
33
|
-
"@google-cloud/text-to-speech": "^6.1.0",
|
|
34
33
|
"@google-cloud/vision": "^5.1.0",
|
|
35
34
|
"@google/genai": "^1.0.0",
|
|
36
35
|
"@mozilla/readability": "github:mozilla/readability",
|
package/lib/media.mjs
CHANGED
|
@@ -38,6 +38,17 @@ const createWavHeader = (
|
|
|
38
38
|
return header;
|
|
39
39
|
};
|
|
40
40
|
|
|
41
|
+
const packPcmToWav = async (audio, options) => {
|
|
42
|
+
(audio = await convert(audio, { ...options || {}, expected: BUFFER })) // DON'T override expected
|
|
43
|
+
&& audio.length
|
|
44
|
+
&& (audio = Buffer.concat([createWavHeader(audio.length), audio]))
|
|
45
|
+
&& (audio = await convert(audio, {
|
|
46
|
+
expected: BUFFER, ...options || {}, input: BUFFER, // DON'T override input
|
|
47
|
+
}));
|
|
48
|
+
assert(audio, 'Failed to pack PCM to WAV.', 500);
|
|
49
|
+
return audio;
|
|
50
|
+
};
|
|
51
|
+
|
|
41
52
|
// https://codex.so/ffmpeg-node-js
|
|
42
53
|
const getFfmpeg = async (options) => {
|
|
43
54
|
const ffmpeg = await need('fluent-ffmpeg');
|
|
@@ -92,4 +103,5 @@ export {
|
|
|
92
103
|
convertAudioTo16kNanoPcmWave,
|
|
93
104
|
createWavHeader,
|
|
94
105
|
getFfmpeg,
|
|
106
|
+
packPcmToWav,
|
|
95
107
|
};
|
package/lib/speech.mjs
CHANGED
|
@@ -1,9 +1,9 @@
|
|
|
1
|
-
import { DEFAULT_MODELS, OPENAI_VOICE } from './alan.mjs';
|
|
1
|
+
import { DEFAULT_MODELS, OPENAI_VOICE, countTokens, k } from './alan.mjs';
|
|
2
2
|
import { getApiKeyCredentials, hash } from './encryption.mjs';
|
|
3
|
-
import { getFfmpeg } from './media.mjs';
|
|
3
|
+
import { getFfmpeg, packPcmToWav } from './media.mjs';
|
|
4
4
|
import { get } from './web.mjs';
|
|
5
5
|
import { convert, getTempPath } from './storage.mjs';
|
|
6
|
-
import { ensureString } from './utilitas.mjs';
|
|
6
|
+
import { ensureString, mergeAtoB } from './utilitas.mjs';
|
|
7
7
|
|
|
8
8
|
import {
|
|
9
9
|
call, countKeys, ignoreErrFunc, inBrowser,
|
|
@@ -17,19 +17,32 @@ import {
|
|
|
17
17
|
|
|
18
18
|
const _NEED = [
|
|
19
19
|
'@google-cloud/speech',
|
|
20
|
-
'@google
|
|
20
|
+
'@google/genai',
|
|
21
21
|
'OpenAI',
|
|
22
22
|
'whisper-node',
|
|
23
23
|
];
|
|
24
24
|
|
|
25
25
|
const WHISPER_DEFAULT_MODEL = 'base';
|
|
26
26
|
const errorMessage = 'Invalid audio data.';
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
27
|
+
|
|
28
|
+
const [
|
|
29
|
+
BUFFER, STREAM, BASE64, FILE, clients, languageCode, audioEncoding, suffix,
|
|
30
|
+
SPEAKER, cleanup, wav,
|
|
31
|
+
] = [
|
|
32
|
+
'BUFFER', 'STREAM', 'BASE64', 'FILE', {}, 'en-US', 'OGG_OPUS', 'ogg',
|
|
33
|
+
'SPEAKER', true, 'wav'
|
|
34
|
+
];
|
|
35
|
+
|
|
36
|
+
const [
|
|
37
|
+
GPT_4O_MIMI_TTS, GPT_4O_TRANSCRIBE, GEMINI_25_PRO_TTS, GEMINI_25_FLASH_TTS,
|
|
38
|
+
OPENAI_TTS_MAX_LENGTH,
|
|
39
|
+
] = [
|
|
40
|
+
'gpt-4o-mini-tts', 'gpt-4o-transcribe', 'gemini-2.5-pro-preview-tts',
|
|
41
|
+
'gemini-2.5-flash-preview-tts', 4096
|
|
42
|
+
];
|
|
43
|
+
|
|
44
|
+
const [defaultOpenAITtsModel, defaultOpenAISttModel, defaultGeminiTtsModel]
|
|
45
|
+
= [GPT_4O_MIMI_TTS, GPT_4O_TRANSCRIBE, GEMINI_25_PRO_TTS];
|
|
33
46
|
|
|
34
47
|
const WHISPER_MODELS = [
|
|
35
48
|
// npx whisper-node download tiny.en
|
|
@@ -104,13 +117,14 @@ const init = async (options) => {
|
|
|
104
117
|
break;
|
|
105
118
|
case 'GOOGLE':
|
|
106
119
|
clients._provider = provider;
|
|
107
|
-
const sslCreds = await getApiKeyCredentials(options);
|
|
108
120
|
if (options?.tts) {
|
|
109
|
-
|
|
110
|
-
|
|
121
|
+
let { GoogleGenAI } = await need('@google/genai');
|
|
122
|
+
let client = new GoogleGenAI(options);
|
|
123
|
+
clients.tts = client.models.generateContent;
|
|
111
124
|
}
|
|
112
125
|
if (options?.stt) {
|
|
113
126
|
const stt = (await need('@google-cloud/speech')).default;
|
|
127
|
+
const sslCreds = await getApiKeyCredentials(options);
|
|
114
128
|
clients.stt = new stt.SpeechClient({ sslCreds });
|
|
115
129
|
}
|
|
116
130
|
break;
|
|
@@ -159,15 +173,29 @@ const ttsOpenAI = async (input, options) => {
|
|
|
159
173
|
return await convert(buffer, { suffix, ...options || {} });
|
|
160
174
|
};
|
|
161
175
|
|
|
162
|
-
|
|
176
|
+
// https://ai.google.dev/gemini-api/docs/speech-generation#voices
|
|
177
|
+
const ttsGoogle = async (contents, options) => {
|
|
163
178
|
assert(clients.tts, 'Google TTS API has not been initialized.', 500);
|
|
164
|
-
assert(
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
179
|
+
assert(contents, 'Text is required.', 400);
|
|
180
|
+
assert(await countTokens(contents) <= k(32), 'Text is too long.', 400);
|
|
181
|
+
const resp = await clients.tts({
|
|
182
|
+
model: options?.model || defaultGeminiTtsModel, contents,
|
|
183
|
+
config: mergeAtoB(options?.config, {
|
|
184
|
+
responseModalities: ['AUDIO'],
|
|
185
|
+
speechConfig: {
|
|
186
|
+
voiceConfig: {
|
|
187
|
+
prebuiltVoiceConfig: {
|
|
188
|
+
voiceName: options?.voice || 'Leda',
|
|
189
|
+
},
|
|
190
|
+
},
|
|
191
|
+
},
|
|
192
|
+
}),
|
|
193
|
+
});
|
|
194
|
+
const rawAudio = resp?.candidates?.[0]?.content?.parts?.[0]?.inlineData;
|
|
195
|
+
assert(rawAudio, 'Failed to generate audio.', 500);
|
|
196
|
+
return options?.raw ? rawAudio : await packPcmToWav(rawAudio?.data, {
|
|
197
|
+
input: BASE64, expected: 'FILE', suffix: wav, ...options || {},
|
|
169
198
|
});
|
|
170
|
-
return await convert(response.audioContent, { suffix, ...options || {} });
|
|
171
199
|
};
|
|
172
200
|
|
|
173
201
|
const ttsSay = async (text, options) => {
|
|
@@ -275,13 +303,16 @@ const stt = async (audio, options) => {
|
|
|
275
303
|
export default init;
|
|
276
304
|
export {
|
|
277
305
|
_NEED,
|
|
306
|
+
OPENAI_TTS_MAX_LENGTH,
|
|
278
307
|
checkSay,
|
|
279
308
|
checkWhisper,
|
|
280
|
-
init,
|
|
309
|
+
init,
|
|
310
|
+
stt,
|
|
311
|
+
sttGoogle,
|
|
281
312
|
sttOpenAI,
|
|
282
313
|
sttWhisper,
|
|
283
314
|
tts,
|
|
284
315
|
ttsGoogle,
|
|
285
316
|
ttsOpenAI,
|
|
286
|
-
ttsSay
|
|
317
|
+
ttsSay,
|
|
287
318
|
};
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "utilitas",
|
|
3
3
|
"description": "Just another common utility for JavaScript.",
|
|
4
|
-
"version": "1999.1.
|
|
4
|
+
"version": "1999.1.72",
|
|
5
5
|
"private": false,
|
|
6
6
|
"homepage": "https://github.com/Leask/utilitas",
|
|
7
7
|
"main": "index.mjs",
|
|
@@ -41,7 +41,6 @@
|
|
|
41
41
|
"@ffprobe-installer/ffprobe": "^2.1.2",
|
|
42
42
|
"@google-cloud/speech": "^7.1.0",
|
|
43
43
|
"@google-cloud/storage": "^7.16.0",
|
|
44
|
-
"@google-cloud/text-to-speech": "^6.1.0",
|
|
45
44
|
"@google-cloud/vision": "^5.1.0",
|
|
46
45
|
"@google/genai": "^1.0.0",
|
|
47
46
|
"@mozilla/readability": "github:mozilla/readability",
|