utilitas 2000.3.22 → 2000.3.24

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/lib/embedding.mjs CHANGED
@@ -22,7 +22,7 @@ const [
22
22
  'JINA',
23
23
  'text-embedding-3-small', // dim: 1536
24
24
  'text-embedding-3-large', // dim: 3072
25
- 'gemini-embedding-001', // dim: 768, 1536, or 3072(default)
25
+ 'gemini-embedding-001', // dim: 768(default), 1536, or 3072(google default)
26
26
  'jina-clip-v2', // dim: 1024
27
27
  'jina-embeddings-v3', // dim: 256‑1024
28
28
  ];
@@ -41,7 +41,9 @@ const DEFAULT_MODELS = {
41
41
  const MODEL_CONFIG = {
42
42
  [OPENAI_MODEL_EMBED_SMALL]: { source: 'openai', maxTokens: 8192 },
43
43
  [OPENAI_MODEL_EMBED_LARGE]: { source: 'openai', maxTokens: 8192 },
44
- [GOOGLE_MODEL_GEMINI_EMBED]: { source: 'google', maxTokens: 20000 },
44
+ [GOOGLE_MODEL_GEMINI_EMBED]: {
45
+ source: 'google', maxTokens: 2048, options: { dimensions: 768 },
46
+ },
45
47
  [JINA_MODEL_CLIP_2]: {
46
48
  maxTokens: 8192,
47
49
  image: true,
@@ -9,10 +9,7 @@ import { createReadStream } from 'fs';
9
9
  import { base64Decode, base64Encode, ensureString, hexEncode, need } from './utilitas.mjs';
10
10
  import { networkInterfaces } from 'os';
11
11
 
12
- const _NEED = [
13
- '@google-cloud/speech', '@google-cloud/text-to-speech', 'google-gax',
14
- ];
15
-
12
+ const _NEED = ['google-gax'];
16
13
  const defaultAlgorithm = 'sha256';
17
14
  const defaultEncryption = 'aes-256-gcm';
18
15
  const uniqueString = (any) => hash(ensureString(any || networkInterfaces()));
@@ -111,16 +108,6 @@ const aesDecrypt = (any, options) => {
111
108
  return decrypted;
112
109
  };
113
110
 
114
- const getGoogleApiKeyCredentials = async (options) => {
115
- // Included in @google-cloud/speech and @google-cloud/text-to-speech
116
- const { GoogleAuth, grpc } = await need('google-gax');
117
- const authClient = new GoogleAuth().fromAPIKey(options?.apiKey);
118
- return grpc.credentials.combineChannelCredentials(
119
- grpc.credentials.createSsl(),
120
- grpc.credentials.createFromGoogleCredential(authClient)
121
- );
122
- };
123
-
124
111
  const getGoogleAuthByCredentials = async (keyFilename) => {
125
112
  const { GoogleAuth } = await need('google-gax');
126
113
  return (new GoogleAuth({
@@ -144,7 +131,6 @@ export {
144
131
  defaultAlgorithm,
145
132
  defaultEncryption,
146
133
  digestObject,
147
- getGoogleApiKeyCredentials,
148
134
  getGoogleAuthByCredentials,
149
135
  getGoogleAuthTokenByAuth,
150
136
  getSortedQueryString,
package/lib/manifest.mjs CHANGED
@@ -1,7 +1,7 @@
1
1
  const manifest = {
2
2
  "name": "utilitas",
3
3
  "description": "Just another common utility for JavaScript.",
4
- "version": "2000.3.22",
4
+ "version": "2000.3.24",
5
5
  "private": false,
6
6
  "homepage": "https://github.com/Leask/utilitas",
7
7
  "main": "index.mjs",
@@ -26,46 +26,46 @@ const manifest = {
26
26
  "devDependencies": {
27
27
  "@ffmpeg-installer/ffmpeg": "^1.1.0",
28
28
  "@ffprobe-installer/ffprobe": "^2.1.2",
29
- "@google-cloud/speech": "^7.2.1",
30
- "@google-cloud/storage": "^7.17.3",
31
- "@google/genai": "^1.30.0",
29
+ "@google-cloud/storage": "^7.18.0",
30
+ "@google/genai": "^1.31.0",
32
31
  "@mozilla/readability": "github:mozilla/readability",
33
- "@sentry/node": "^10.26.0",
34
- "@sentry/profiling-node": "^10.26.0",
32
+ "@sentry/node": "^10.29.0",
33
+ "@sentry/profiling-node": "^10.29.0",
35
34
  "acme-client": "^5.4.0",
36
35
  "browserify-fs": "^1.0.0",
37
36
  "buffer": "^6.0.3",
38
37
  "fast-geoip": "^1.1.88",
39
38
  "fluent-ffmpeg": "^2.1.3",
40
39
  "form-data": "^4.0.5",
40
+ "google-gax": "^5.0.6",
41
41
  "ioredis": "^5.8.2",
42
42
  "js-tiktoken": "^1.0.21",
43
43
  "jsdom": "^27.2.0",
44
44
  "lorem-ipsum": "^2.0.8",
45
- "mailgun.js": "^12.1.1",
45
+ "mailgun.js": "^12.4.0",
46
46
  "mailparser": "^3.9.0",
47
47
  "mime": "^4.1.0",
48
48
  "mysql2": "^3.15.3",
49
49
  "node-mailjet": "^6.0.11",
50
50
  "node-polyfill-webpack-plugin": "^4.1.0",
51
51
  "office-text-extractor": "^3.0.3",
52
- "openai": "^6.9.1",
53
- "pdfjs-dist": "^5.4.394",
52
+ "openai": "^6.10.0",
54
53
  "pdf-lib": "^1.17.1",
54
+ "pdfjs-dist": "^5.4.449",
55
55
  "pg": "^8.16.3",
56
56
  "pgvector": "^0.2.1",
57
57
  "ping": "^1.0.0",
58
58
  "process": "^0.11.10",
59
- "puppeteer": "^24.31.0",
59
+ "puppeteer": "^24.32.0",
60
60
  "say": "^0.16.0",
61
61
  "telegraf": "^4.16.3",
62
62
  "telesignsdk": "^3.0.4",
63
63
  "tesseract.js": "^6.0.1",
64
- "twilio": "^5.10.6",
64
+ "twilio": "^5.10.7",
65
65
  "url": "github:Leask/node-url",
66
66
  "webpack-cli": "^6.0.1",
67
67
  "whisper-node": "^1.1.1",
68
- "wrangler": "^4.50.0",
68
+ "wrangler": "^4.53.0",
69
69
  "xlsx": "https://cdn.sheetjs.com/xlsx-0.20.1/xlsx-0.20.1.tgz",
70
70
  "youtube-transcript": "^1.2.1"
71
71
  }
package/lib/speech.mjs CHANGED
@@ -1,8 +1,7 @@
1
1
  import { DEFAULT_MODELS, OPENAI_VOICE, countTokens, k } from './alan.mjs';
2
- import { getGoogleApiKeyCredentials, hash } from './encryption.mjs';
3
2
  import { getFfmpeg, packPcmToWav } from './media.mjs';
4
3
  import { get } from './web.mjs';
5
- import { convert, getTempPath } from './storage.mjs';
4
+ import { convert, getTempPath, MIME_WAV } from './storage.mjs';
6
5
  import { ensureString, mergeAtoB } from './utilitas.mjs';
7
6
 
8
7
  import {
@@ -15,34 +14,24 @@ import {
15
14
  convertAudioTo16kNanoPcmWave,
16
15
  } from './media.mjs';
17
16
 
18
- const _NEED = [
19
- '@google-cloud/speech',
20
- '@google/genai',
21
- 'OpenAI',
22
- 'whisper-node',
23
- ];
24
-
25
- const WHISPER_DEFAULT_MODEL = 'base';
26
- const errorMessage = 'Invalid audio data.';
17
+ const _NEED = ['@google/genai', 'OpenAI', 'whisper-node'];
27
18
 
28
19
  const [
29
- BUFFER, STREAM, BASE64, FILE, clients, languageCode, audioEncoding, suffix,
30
- SPEAKER, cleanup, wav,
20
+ BUFFER, STREAM, BASE64, FILE, clients, suffix, SPEAKER, cleanup, wav,
21
+ GPT_4O_MIMI_TTS, GPT_4O_TRANSCRIBE, GEMINI_25_FLASH_TTS, GEMINI_FLASH,
22
+ OPENAI_TTS_MAX_LENGTH, WHISPER_DEFAULT_MODEL, errorMessage
31
23
  ] = [
32
- 'BUFFER', 'STREAM', 'BASE64', 'FILE', {}, 'en-US', 'OGG_OPUS', 'ogg',
33
- 'SPEAKER', true, 'wav'
24
+ 'BUFFER', 'STREAM', 'BASE64', 'FILE', {}, 'ogg', 'SPEAKER', true, 'wav',
25
+ 'gpt-4o-mini-tts', 'gpt-4o-transcribe', 'gemini-2.5-flash-preview-tts',
26
+ 'gemini-flash-latest', 4096, 'base', 'Invalid audio data.',
34
27
  ];
35
28
 
36
29
  const [
37
- GPT_4O_MIMI_TTS, GPT_4O_TRANSCRIBE, GEMINI_25_PRO_TTS, GEMINI_25_FLASH_TTS,
38
- OPENAI_TTS_MAX_LENGTH,
39
- ] = [
40
- 'gpt-4o-mini-tts', 'gpt-4o-transcribe', 'gemini-2.5-pro-preview-tts',
41
- 'gemini-2.5-flash-preview-tts', 4096
42
- ];
30
+ defaultOpenAITtsModel, defaultOpenAISttModel, defaultGeminiTtsModel,
31
+ defaultGeminiSttModel,
32
+ ] = [GPT_4O_MIMI_TTS, GPT_4O_TRANSCRIBE, GEMINI_25_FLASH_TTS, GEMINI_FLASH];
43
33
 
44
- const [defaultOpenAITtsModel, defaultOpenAISttModel, defaultGeminiTtsModel]
45
- = [GPT_4O_MIMI_TTS, GPT_4O_TRANSCRIBE, GEMINI_25_PRO_TTS];
34
+ const STT_PROMPT = 'Please transcribe the audio into clean text. Return only the text content, DO NOT include any additional information or metadata. You may encounter input that contains different languages. Please do your best to transcribe text from all possible languages. Please distinguish between background noise and the main speech content. Do not be disturbed by background noise. Only return the main speech content.';
46
35
 
47
36
  const WHISPER_MODELS = [
48
37
  // npx whisper-node download tiny.en
@@ -117,15 +106,13 @@ const init = async (options) => {
117
106
  break;
118
107
  case 'GOOGLE':
119
108
  clients._provider = provider;
109
+ const { GoogleGenAI } = await need('@google/genai');
110
+ const client = new GoogleGenAI(options);
120
111
  if (options?.tts) {
121
- let { GoogleGenAI } = await need('@google/genai');
122
- let client = new GoogleGenAI(options);
123
112
  clients.tts = client.models.generateContent;
124
113
  }
125
114
  if (options?.stt) {
126
- const stt = (await need('@google-cloud/speech')).default;
127
- const sslCreds = await getGoogleApiKeyCredentials(options);
128
- clients.stt = new stt.SpeechClient({ sslCreds });
115
+ clients.stt = client.models.generateContent;
129
116
  }
130
117
  break;
131
118
  case '':
@@ -242,17 +229,25 @@ const sttOpenAI = async (audio, options) => {
242
229
 
243
230
  const sttGoogle = async (audio, options) => {
244
231
  assert(clients.stt, 'Google STT API has not been initialized.', 500);
245
- const content = await convert(audio, {
232
+ const data = await convert(audio, {
246
233
  input: options?.input, expected: BASE64, errorMessage,
247
234
  });
248
- const [response] = await clients.stt.recognize({
249
- audio: { content, ...options?.audio || {} }, config: {
250
- encoding: audioEncoding, sampleRateHertz: 48000,
251
- languageCode, ...options?.config || {}
235
+ const resp = await clients.stt({
236
+ model: options?.model || defaultGeminiSttModel, contents: {
237
+ parts: [{
238
+ inlineData: {
239
+ mimeType: options?.mimeType || MIME_WAV, data,
240
+ },
241
+ }, { text: STT_PROMPT }],
252
242
  },
243
+ config: { ...options?.config || {} },
253
244
  });
254
- return options?.raw ? response : response.results
255
- .map(result => result.alternatives[0].transcript).join('\n');
245
+ assert(
246
+ resp?.candidates?.[0]?.content?.parts?.[0],
247
+ 'Failed to transcribe audio.', 500
248
+ );
249
+ return options?.raw ? resp.candidates
250
+ : (resp.candidates[0].content.parts[0].text?.trim?.() || '');
256
251
  };
257
252
 
258
253
  // This function is not working properly, a pull request is filed:
@@ -284,8 +279,8 @@ const sttWhisper = async (audio, options) => {
284
279
  const tts = async (text, options) => {
285
280
  let engine;
286
281
  if (inBrowser()) { engine = ttsBrowser }
287
- else if (clients?.tts && clients._provider === 'OPENAI') { engine = ttsOpenAI; }
288
282
  else if (clients?.tts && clients._provider === 'GOOGLE') { engine = ttsGoogle; }
283
+ else if (clients?.tts && clients._provider === 'OPENAI') { engine = ttsOpenAI; }
289
284
  else if (await checkSay()) { engine = ttsSay; }
290
285
  else { throwError('Text-to-Speech engine has not been initialized.', 500); }
291
286
  return await engine(text, options);
@@ -293,8 +288,8 @@ const tts = async (text, options) => {
293
288
 
294
289
  const stt = async (audio, options) => {
295
290
  let engine;
296
- if (clients?.stt && clients._provider === 'OPENAI') { engine = sttOpenAI; }
297
- else if (clients?.stt && clients._provider === 'GOOGLE') { engine = sttGoogle; }
291
+ if (clients?.stt && clients._provider === 'GOOGLE') { engine = sttGoogle; }
292
+ else if (clients?.stt && clients._provider === 'OPENAI') { engine = sttOpenAI; }
298
293
  else if (await checkWhisper()) { engine = sttWhisper; }
299
294
  else { throwError('Speech-to-Text engine has not been initialized.', 500); }
300
295
  return await engine(audio, options);
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "utilitas",
3
3
  "description": "Just another common utility for JavaScript.",
4
- "version": "2000.3.22",
4
+ "version": "2000.3.24",
5
5
  "private": false,
6
6
  "homepage": "https://github.com/Leask/utilitas",
7
7
  "main": "index.mjs",
@@ -37,46 +37,46 @@
37
37
  "devDependencies": {
38
38
  "@ffmpeg-installer/ffmpeg": "^1.1.0",
39
39
  "@ffprobe-installer/ffprobe": "^2.1.2",
40
- "@google-cloud/speech": "^7.2.1",
41
- "@google-cloud/storage": "^7.17.3",
42
- "@google/genai": "^1.30.0",
40
+ "@google-cloud/storage": "^7.18.0",
41
+ "@google/genai": "^1.31.0",
43
42
  "@mozilla/readability": "github:mozilla/readability",
44
- "@sentry/node": "^10.26.0",
45
- "@sentry/profiling-node": "^10.26.0",
43
+ "@sentry/node": "^10.29.0",
44
+ "@sentry/profiling-node": "^10.29.0",
46
45
  "acme-client": "^5.4.0",
47
46
  "browserify-fs": "^1.0.0",
48
47
  "buffer": "^6.0.3",
49
48
  "fast-geoip": "^1.1.88",
50
49
  "fluent-ffmpeg": "^2.1.3",
51
50
  "form-data": "^4.0.5",
51
+ "google-gax": "^5.0.6",
52
52
  "ioredis": "^5.8.2",
53
53
  "js-tiktoken": "^1.0.21",
54
54
  "jsdom": "^27.2.0",
55
55
  "lorem-ipsum": "^2.0.8",
56
- "mailgun.js": "^12.1.1",
56
+ "mailgun.js": "^12.4.0",
57
57
  "mailparser": "^3.9.0",
58
58
  "mime": "^4.1.0",
59
59
  "mysql2": "^3.15.3",
60
60
  "node-mailjet": "^6.0.11",
61
61
  "node-polyfill-webpack-plugin": "^4.1.0",
62
62
  "office-text-extractor": "^3.0.3",
63
- "openai": "^6.9.1",
64
- "pdfjs-dist": "^5.4.394",
63
+ "openai": "^6.10.0",
65
64
  "pdf-lib": "^1.17.1",
65
+ "pdfjs-dist": "^5.4.449",
66
66
  "pg": "^8.16.3",
67
67
  "pgvector": "^0.2.1",
68
68
  "ping": "^1.0.0",
69
69
  "process": "^0.11.10",
70
- "puppeteer": "^24.31.0",
70
+ "puppeteer": "^24.32.0",
71
71
  "say": "^0.16.0",
72
72
  "telegraf": "^4.16.3",
73
73
  "telesignsdk": "^3.0.4",
74
74
  "tesseract.js": "^6.0.1",
75
- "twilio": "^5.10.6",
75
+ "twilio": "^5.10.7",
76
76
  "url": "github:Leask/node-url",
77
77
  "webpack-cli": "^6.0.1",
78
78
  "whisper-node": "^1.1.1",
79
- "wrangler": "^4.50.0",
79
+ "wrangler": "^4.53.0",
80
80
  "xlsx": "https://cdn.sheetjs.com/xlsx-0.20.1/xlsx-0.20.1.tgz",
81
81
  "youtube-transcript": "^1.2.1"
82
82
  }