utilitas 2000.3.22 → 2000.3.24
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +2 -3
- package/dist/utilitas.lite.mjs +1 -1
- package/dist/utilitas.lite.mjs.map +1 -1
- package/lib/embedding.mjs +4 -2
- package/lib/encryption.mjs +1 -15
- package/lib/manifest.mjs +12 -12
- package/lib/speech.mjs +33 -38
- package/package.json +12 -12
package/lib/embedding.mjs
CHANGED
|
@@ -22,7 +22,7 @@ const [
|
|
|
22
22
|
'JINA',
|
|
23
23
|
'text-embedding-3-small', // dim: 1536
|
|
24
24
|
'text-embedding-3-large', // dim: 3072
|
|
25
|
-
'gemini-embedding-001', // dim: 768, 1536, or 3072(default)
|
|
25
|
+
'gemini-embedding-001', // dim: 768(default), 1536, or 3072(google default)
|
|
26
26
|
'jina-clip-v2', // dim: 1024
|
|
27
27
|
'jina-embeddings-v3', // dim: 256‑1024
|
|
28
28
|
];
|
|
@@ -41,7 +41,9 @@ const DEFAULT_MODELS = {
|
|
|
41
41
|
const MODEL_CONFIG = {
|
|
42
42
|
[OPENAI_MODEL_EMBED_SMALL]: { source: 'openai', maxTokens: 8192 },
|
|
43
43
|
[OPENAI_MODEL_EMBED_LARGE]: { source: 'openai', maxTokens: 8192 },
|
|
44
|
-
[GOOGLE_MODEL_GEMINI_EMBED]: {
|
|
44
|
+
[GOOGLE_MODEL_GEMINI_EMBED]: {
|
|
45
|
+
source: 'google', maxTokens: 2048, options: { dimensions: 768 },
|
|
46
|
+
},
|
|
45
47
|
[JINA_MODEL_CLIP_2]: {
|
|
46
48
|
maxTokens: 8192,
|
|
47
49
|
image: true,
|
package/lib/encryption.mjs
CHANGED
|
@@ -9,10 +9,7 @@ import { createReadStream } from 'fs';
|
|
|
9
9
|
import { base64Decode, base64Encode, ensureString, hexEncode, need } from './utilitas.mjs';
|
|
10
10
|
import { networkInterfaces } from 'os';
|
|
11
11
|
|
|
12
|
-
const _NEED = [
|
|
13
|
-
'@google-cloud/speech', '@google-cloud/text-to-speech', 'google-gax',
|
|
14
|
-
];
|
|
15
|
-
|
|
12
|
+
const _NEED = ['google-gax'];
|
|
16
13
|
const defaultAlgorithm = 'sha256';
|
|
17
14
|
const defaultEncryption = 'aes-256-gcm';
|
|
18
15
|
const uniqueString = (any) => hash(ensureString(any || networkInterfaces()));
|
|
@@ -111,16 +108,6 @@ const aesDecrypt = (any, options) => {
|
|
|
111
108
|
return decrypted;
|
|
112
109
|
};
|
|
113
110
|
|
|
114
|
-
const getGoogleApiKeyCredentials = async (options) => {
|
|
115
|
-
// Included in @google-cloud/speech and @google-cloud/text-to-speech
|
|
116
|
-
const { GoogleAuth, grpc } = await need('google-gax');
|
|
117
|
-
const authClient = new GoogleAuth().fromAPIKey(options?.apiKey);
|
|
118
|
-
return grpc.credentials.combineChannelCredentials(
|
|
119
|
-
grpc.credentials.createSsl(),
|
|
120
|
-
grpc.credentials.createFromGoogleCredential(authClient)
|
|
121
|
-
);
|
|
122
|
-
};
|
|
123
|
-
|
|
124
111
|
const getGoogleAuthByCredentials = async (keyFilename) => {
|
|
125
112
|
const { GoogleAuth } = await need('google-gax');
|
|
126
113
|
return (new GoogleAuth({
|
|
@@ -144,7 +131,6 @@ export {
|
|
|
144
131
|
defaultAlgorithm,
|
|
145
132
|
defaultEncryption,
|
|
146
133
|
digestObject,
|
|
147
|
-
getGoogleApiKeyCredentials,
|
|
148
134
|
getGoogleAuthByCredentials,
|
|
149
135
|
getGoogleAuthTokenByAuth,
|
|
150
136
|
getSortedQueryString,
|
package/lib/manifest.mjs
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
const manifest = {
|
|
2
2
|
"name": "utilitas",
|
|
3
3
|
"description": "Just another common utility for JavaScript.",
|
|
4
|
-
"version": "2000.3.
|
|
4
|
+
"version": "2000.3.24",
|
|
5
5
|
"private": false,
|
|
6
6
|
"homepage": "https://github.com/Leask/utilitas",
|
|
7
7
|
"main": "index.mjs",
|
|
@@ -26,46 +26,46 @@ const manifest = {
|
|
|
26
26
|
"devDependencies": {
|
|
27
27
|
"@ffmpeg-installer/ffmpeg": "^1.1.0",
|
|
28
28
|
"@ffprobe-installer/ffprobe": "^2.1.2",
|
|
29
|
-
"@google-cloud/
|
|
30
|
-
"@google
|
|
31
|
-
"@google/genai": "^1.30.0",
|
|
29
|
+
"@google-cloud/storage": "^7.18.0",
|
|
30
|
+
"@google/genai": "^1.31.0",
|
|
32
31
|
"@mozilla/readability": "github:mozilla/readability",
|
|
33
|
-
"@sentry/node": "^10.
|
|
34
|
-
"@sentry/profiling-node": "^10.
|
|
32
|
+
"@sentry/node": "^10.29.0",
|
|
33
|
+
"@sentry/profiling-node": "^10.29.0",
|
|
35
34
|
"acme-client": "^5.4.0",
|
|
36
35
|
"browserify-fs": "^1.0.0",
|
|
37
36
|
"buffer": "^6.0.3",
|
|
38
37
|
"fast-geoip": "^1.1.88",
|
|
39
38
|
"fluent-ffmpeg": "^2.1.3",
|
|
40
39
|
"form-data": "^4.0.5",
|
|
40
|
+
"google-gax": "^5.0.6",
|
|
41
41
|
"ioredis": "^5.8.2",
|
|
42
42
|
"js-tiktoken": "^1.0.21",
|
|
43
43
|
"jsdom": "^27.2.0",
|
|
44
44
|
"lorem-ipsum": "^2.0.8",
|
|
45
|
-
"mailgun.js": "^12.
|
|
45
|
+
"mailgun.js": "^12.4.0",
|
|
46
46
|
"mailparser": "^3.9.0",
|
|
47
47
|
"mime": "^4.1.0",
|
|
48
48
|
"mysql2": "^3.15.3",
|
|
49
49
|
"node-mailjet": "^6.0.11",
|
|
50
50
|
"node-polyfill-webpack-plugin": "^4.1.0",
|
|
51
51
|
"office-text-extractor": "^3.0.3",
|
|
52
|
-
"openai": "^6.
|
|
53
|
-
"pdfjs-dist": "^5.4.394",
|
|
52
|
+
"openai": "^6.10.0",
|
|
54
53
|
"pdf-lib": "^1.17.1",
|
|
54
|
+
"pdfjs-dist": "^5.4.449",
|
|
55
55
|
"pg": "^8.16.3",
|
|
56
56
|
"pgvector": "^0.2.1",
|
|
57
57
|
"ping": "^1.0.0",
|
|
58
58
|
"process": "^0.11.10",
|
|
59
|
-
"puppeteer": "^24.
|
|
59
|
+
"puppeteer": "^24.32.0",
|
|
60
60
|
"say": "^0.16.0",
|
|
61
61
|
"telegraf": "^4.16.3",
|
|
62
62
|
"telesignsdk": "^3.0.4",
|
|
63
63
|
"tesseract.js": "^6.0.1",
|
|
64
|
-
"twilio": "^5.10.
|
|
64
|
+
"twilio": "^5.10.7",
|
|
65
65
|
"url": "github:Leask/node-url",
|
|
66
66
|
"webpack-cli": "^6.0.1",
|
|
67
67
|
"whisper-node": "^1.1.1",
|
|
68
|
-
"wrangler": "^4.
|
|
68
|
+
"wrangler": "^4.53.0",
|
|
69
69
|
"xlsx": "https://cdn.sheetjs.com/xlsx-0.20.1/xlsx-0.20.1.tgz",
|
|
70
70
|
"youtube-transcript": "^1.2.1"
|
|
71
71
|
}
|
package/lib/speech.mjs
CHANGED
|
@@ -1,8 +1,7 @@
|
|
|
1
1
|
import { DEFAULT_MODELS, OPENAI_VOICE, countTokens, k } from './alan.mjs';
|
|
2
|
-
import { getGoogleApiKeyCredentials, hash } from './encryption.mjs';
|
|
3
2
|
import { getFfmpeg, packPcmToWav } from './media.mjs';
|
|
4
3
|
import { get } from './web.mjs';
|
|
5
|
-
import { convert, getTempPath } from './storage.mjs';
|
|
4
|
+
import { convert, getTempPath, MIME_WAV } from './storage.mjs';
|
|
6
5
|
import { ensureString, mergeAtoB } from './utilitas.mjs';
|
|
7
6
|
|
|
8
7
|
import {
|
|
@@ -15,34 +14,24 @@ import {
|
|
|
15
14
|
convertAudioTo16kNanoPcmWave,
|
|
16
15
|
} from './media.mjs';
|
|
17
16
|
|
|
18
|
-
const _NEED = [
|
|
19
|
-
'@google-cloud/speech',
|
|
20
|
-
'@google/genai',
|
|
21
|
-
'OpenAI',
|
|
22
|
-
'whisper-node',
|
|
23
|
-
];
|
|
24
|
-
|
|
25
|
-
const WHISPER_DEFAULT_MODEL = 'base';
|
|
26
|
-
const errorMessage = 'Invalid audio data.';
|
|
17
|
+
const _NEED = ['@google/genai', 'OpenAI', 'whisper-node'];
|
|
27
18
|
|
|
28
19
|
const [
|
|
29
|
-
BUFFER, STREAM, BASE64, FILE, clients,
|
|
30
|
-
|
|
20
|
+
BUFFER, STREAM, BASE64, FILE, clients, suffix, SPEAKER, cleanup, wav,
|
|
21
|
+
GPT_4O_MIMI_TTS, GPT_4O_TRANSCRIBE, GEMINI_25_FLASH_TTS, GEMINI_FLASH,
|
|
22
|
+
OPENAI_TTS_MAX_LENGTH, WHISPER_DEFAULT_MODEL, errorMessage
|
|
31
23
|
] = [
|
|
32
|
-
'BUFFER', 'STREAM', 'BASE64', 'FILE', {}, '
|
|
33
|
-
'
|
|
24
|
+
'BUFFER', 'STREAM', 'BASE64', 'FILE', {}, 'ogg', 'SPEAKER', true, 'wav',
|
|
25
|
+
'gpt-4o-mini-tts', 'gpt-4o-transcribe', 'gemini-2.5-flash-preview-tts',
|
|
26
|
+
'gemini-flash-latest', 4096, 'base', 'Invalid audio data.',
|
|
34
27
|
];
|
|
35
28
|
|
|
36
29
|
const [
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
] = [
|
|
40
|
-
'gpt-4o-mini-tts', 'gpt-4o-transcribe', 'gemini-2.5-pro-preview-tts',
|
|
41
|
-
'gemini-2.5-flash-preview-tts', 4096
|
|
42
|
-
];
|
|
30
|
+
defaultOpenAITtsModel, defaultOpenAISttModel, defaultGeminiTtsModel,
|
|
31
|
+
defaultGeminiSttModel,
|
|
32
|
+
] = [GPT_4O_MIMI_TTS, GPT_4O_TRANSCRIBE, GEMINI_25_FLASH_TTS, GEMINI_FLASH];
|
|
43
33
|
|
|
44
|
-
const
|
|
45
|
-
= [GPT_4O_MIMI_TTS, GPT_4O_TRANSCRIBE, GEMINI_25_PRO_TTS];
|
|
34
|
+
const STT_PROMPT = 'Please transcribe the audio into clean text. Return only the text content, DO NOT include any additional information or metadata. You may encounter input that contains different languages. Please do your best to transcribe text from all possible languages. Please distinguish between background noise and the main speech content. Do not be disturbed by background noise. Only return the main speech content.';
|
|
46
35
|
|
|
47
36
|
const WHISPER_MODELS = [
|
|
48
37
|
// npx whisper-node download tiny.en
|
|
@@ -117,15 +106,13 @@ const init = async (options) => {
|
|
|
117
106
|
break;
|
|
118
107
|
case 'GOOGLE':
|
|
119
108
|
clients._provider = provider;
|
|
109
|
+
const { GoogleGenAI } = await need('@google/genai');
|
|
110
|
+
const client = new GoogleGenAI(options);
|
|
120
111
|
if (options?.tts) {
|
|
121
|
-
let { GoogleGenAI } = await need('@google/genai');
|
|
122
|
-
let client = new GoogleGenAI(options);
|
|
123
112
|
clients.tts = client.models.generateContent;
|
|
124
113
|
}
|
|
125
114
|
if (options?.stt) {
|
|
126
|
-
|
|
127
|
-
const sslCreds = await getGoogleApiKeyCredentials(options);
|
|
128
|
-
clients.stt = new stt.SpeechClient({ sslCreds });
|
|
115
|
+
clients.stt = client.models.generateContent;
|
|
129
116
|
}
|
|
130
117
|
break;
|
|
131
118
|
case '':
|
|
@@ -242,17 +229,25 @@ const sttOpenAI = async (audio, options) => {
|
|
|
242
229
|
|
|
243
230
|
const sttGoogle = async (audio, options) => {
|
|
244
231
|
assert(clients.stt, 'Google STT API has not been initialized.', 500);
|
|
245
|
-
const
|
|
232
|
+
const data = await convert(audio, {
|
|
246
233
|
input: options?.input, expected: BASE64, errorMessage,
|
|
247
234
|
});
|
|
248
|
-
const
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
235
|
+
const resp = await clients.stt({
|
|
236
|
+
model: options?.model || defaultGeminiSttModel, contents: {
|
|
237
|
+
parts: [{
|
|
238
|
+
inlineData: {
|
|
239
|
+
mimeType: options?.mimeType || MIME_WAV, data,
|
|
240
|
+
},
|
|
241
|
+
}, { text: STT_PROMPT }],
|
|
252
242
|
},
|
|
243
|
+
config: { ...options?.config || {} },
|
|
253
244
|
});
|
|
254
|
-
|
|
255
|
-
|
|
245
|
+
assert(
|
|
246
|
+
resp?.candidates?.[0]?.content?.parts?.[0],
|
|
247
|
+
'Failed to transcribe audio.', 500
|
|
248
|
+
);
|
|
249
|
+
return options?.raw ? resp.candidates
|
|
250
|
+
: (resp.candidates[0].content.parts[0].text?.trim?.() || '');
|
|
256
251
|
};
|
|
257
252
|
|
|
258
253
|
// This function is not working properly, a pull request is filed:
|
|
@@ -284,8 +279,8 @@ const sttWhisper = async (audio, options) => {
|
|
|
284
279
|
const tts = async (text, options) => {
|
|
285
280
|
let engine;
|
|
286
281
|
if (inBrowser()) { engine = ttsBrowser }
|
|
287
|
-
else if (clients?.tts && clients._provider === 'OPENAI') { engine = ttsOpenAI; }
|
|
288
282
|
else if (clients?.tts && clients._provider === 'GOOGLE') { engine = ttsGoogle; }
|
|
283
|
+
else if (clients?.tts && clients._provider === 'OPENAI') { engine = ttsOpenAI; }
|
|
289
284
|
else if (await checkSay()) { engine = ttsSay; }
|
|
290
285
|
else { throwError('Text-to-Speech engine has not been initialized.', 500); }
|
|
291
286
|
return await engine(text, options);
|
|
@@ -293,8 +288,8 @@ const tts = async (text, options) => {
|
|
|
293
288
|
|
|
294
289
|
const stt = async (audio, options) => {
|
|
295
290
|
let engine;
|
|
296
|
-
if (clients?.stt && clients._provider === '
|
|
297
|
-
else if (clients?.stt && clients._provider === '
|
|
291
|
+
if (clients?.stt && clients._provider === 'GOOGLE') { engine = sttGoogle; }
|
|
292
|
+
else if (clients?.stt && clients._provider === 'OPENAI') { engine = sttOpenAI; }
|
|
298
293
|
else if (await checkWhisper()) { engine = sttWhisper; }
|
|
299
294
|
else { throwError('Speech-to-Text engine has not been initialized.', 500); }
|
|
300
295
|
return await engine(audio, options);
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "utilitas",
|
|
3
3
|
"description": "Just another common utility for JavaScript.",
|
|
4
|
-
"version": "2000.3.
|
|
4
|
+
"version": "2000.3.24",
|
|
5
5
|
"private": false,
|
|
6
6
|
"homepage": "https://github.com/Leask/utilitas",
|
|
7
7
|
"main": "index.mjs",
|
|
@@ -37,46 +37,46 @@
|
|
|
37
37
|
"devDependencies": {
|
|
38
38
|
"@ffmpeg-installer/ffmpeg": "^1.1.0",
|
|
39
39
|
"@ffprobe-installer/ffprobe": "^2.1.2",
|
|
40
|
-
"@google-cloud/
|
|
41
|
-
"@google
|
|
42
|
-
"@google/genai": "^1.30.0",
|
|
40
|
+
"@google-cloud/storage": "^7.18.0",
|
|
41
|
+
"@google/genai": "^1.31.0",
|
|
43
42
|
"@mozilla/readability": "github:mozilla/readability",
|
|
44
|
-
"@sentry/node": "^10.
|
|
45
|
-
"@sentry/profiling-node": "^10.
|
|
43
|
+
"@sentry/node": "^10.29.0",
|
|
44
|
+
"@sentry/profiling-node": "^10.29.0",
|
|
46
45
|
"acme-client": "^5.4.0",
|
|
47
46
|
"browserify-fs": "^1.0.0",
|
|
48
47
|
"buffer": "^6.0.3",
|
|
49
48
|
"fast-geoip": "^1.1.88",
|
|
50
49
|
"fluent-ffmpeg": "^2.1.3",
|
|
51
50
|
"form-data": "^4.0.5",
|
|
51
|
+
"google-gax": "^5.0.6",
|
|
52
52
|
"ioredis": "^5.8.2",
|
|
53
53
|
"js-tiktoken": "^1.0.21",
|
|
54
54
|
"jsdom": "^27.2.0",
|
|
55
55
|
"lorem-ipsum": "^2.0.8",
|
|
56
|
-
"mailgun.js": "^12.
|
|
56
|
+
"mailgun.js": "^12.4.0",
|
|
57
57
|
"mailparser": "^3.9.0",
|
|
58
58
|
"mime": "^4.1.0",
|
|
59
59
|
"mysql2": "^3.15.3",
|
|
60
60
|
"node-mailjet": "^6.0.11",
|
|
61
61
|
"node-polyfill-webpack-plugin": "^4.1.0",
|
|
62
62
|
"office-text-extractor": "^3.0.3",
|
|
63
|
-
"openai": "^6.
|
|
64
|
-
"pdfjs-dist": "^5.4.394",
|
|
63
|
+
"openai": "^6.10.0",
|
|
65
64
|
"pdf-lib": "^1.17.1",
|
|
65
|
+
"pdfjs-dist": "^5.4.449",
|
|
66
66
|
"pg": "^8.16.3",
|
|
67
67
|
"pgvector": "^0.2.1",
|
|
68
68
|
"ping": "^1.0.0",
|
|
69
69
|
"process": "^0.11.10",
|
|
70
|
-
"puppeteer": "^24.
|
|
70
|
+
"puppeteer": "^24.32.0",
|
|
71
71
|
"say": "^0.16.0",
|
|
72
72
|
"telegraf": "^4.16.3",
|
|
73
73
|
"telesignsdk": "^3.0.4",
|
|
74
74
|
"tesseract.js": "^6.0.1",
|
|
75
|
-
"twilio": "^5.10.
|
|
75
|
+
"twilio": "^5.10.7",
|
|
76
76
|
"url": "github:Leask/node-url",
|
|
77
77
|
"webpack-cli": "^6.0.1",
|
|
78
78
|
"whisper-node": "^1.1.1",
|
|
79
|
-
"wrangler": "^4.
|
|
79
|
+
"wrangler": "^4.53.0",
|
|
80
80
|
"xlsx": "https://cdn.sheetjs.com/xlsx-0.20.1/xlsx-0.20.1.tgz",
|
|
81
81
|
"youtube-transcript": "^1.2.1"
|
|
82
82
|
}
|