@loonylabs/tts-middleware 0.9.0 → 0.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +19 -16
- package/dist/middleware/services/tts/index.d.ts +4 -3
- package/dist/middleware/services/tts/index.d.ts.map +1 -1
- package/dist/middleware/services/tts/index.js +3 -3
- package/dist/middleware/services/tts/index.js.map +1 -1
- package/dist/middleware/services/tts/providers/index.d.ts +2 -1
- package/dist/middleware/services/tts/providers/index.d.ts.map +1 -1
- package/dist/middleware/services/tts/providers/index.js +3 -3
- package/dist/middleware/services/tts/providers/index.js.map +1 -1
- package/dist/middleware/services/tts/providers/vertex-ai-tts-provider.d.ts +168 -0
- package/dist/middleware/services/tts/providers/vertex-ai-tts-provider.d.ts.map +1 -0
- package/dist/middleware/services/tts/providers/vertex-ai-tts-provider.js +416 -0
- package/dist/middleware/services/tts/providers/vertex-ai-tts-provider.js.map +1 -0
- package/dist/middleware/services/tts/tts.service.js +7 -7
- package/dist/middleware/services/tts/tts.service.js.map +1 -1
- package/dist/middleware/services/tts/types/common.types.d.ts +11 -1
- package/dist/middleware/services/tts/types/common.types.d.ts.map +1 -1
- package/dist/middleware/services/tts/types/common.types.js +1 -1
- package/dist/middleware/services/tts/types/common.types.js.map +1 -1
- package/dist/middleware/services/tts/types/index.d.ts +2 -2
- package/dist/middleware/services/tts/types/index.d.ts.map +1 -1
- package/dist/middleware/services/tts/types/index.js +2 -2
- package/dist/middleware/services/tts/types/index.js.map +1 -1
- package/dist/middleware/services/tts/types/provider-options.types.d.ts +55 -8
- package/dist/middleware/services/tts/types/provider-options.types.d.ts.map +1 -1
- package/dist/middleware/services/tts/types/provider-options.types.js +4 -3
- package/dist/middleware/services/tts/types/provider-options.types.js.map +1 -1
- package/dist/middleware/services/tts/utils/retry.utils.d.ts +11 -0
- package/dist/middleware/services/tts/utils/retry.utils.d.ts.map +1 -1
- package/dist/middleware/services/tts/utils/retry.utils.js +23 -0
- package/dist/middleware/services/tts/utils/retry.utils.js.map +1 -1
- package/dist/middleware/shared/config/tts.config.d.ts +6 -6
- package/dist/middleware/shared/config/tts.config.d.ts.map +1 -1
- package/dist/middleware/shared/config/tts.config.js +8 -8
- package/dist/middleware/shared/config/tts.config.js.map +1 -1
- package/package.json +3 -3
package/README.md
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
# TTS Middleware
|
|
4
4
|
|
|
5
|
-
*Provider-agnostic Text-to-Speech middleware with **GDPR compliance** support. Currently supports Azure Speech Services, EdenAI, Google Cloud TTS, Fish Audio, Inworld AI, and
|
|
5
|
+
*Provider-agnostic Text-to-Speech middleware with **GDPR compliance** support. Currently supports Azure Speech Services, EdenAI, Google Cloud TTS, Fish Audio, Inworld AI, and Vertex AI TTS. Features EU data residency via Azure and Google Cloud, pluggable logging, character-based billing, and comprehensive error handling.*
|
|
6
6
|
|
|
7
7
|
<!-- Horizontal Badge Navigation Bar -->
|
|
8
8
|
[](https://www.npmjs.com/package/@loonylabs/tts-middleware)
|
|
@@ -43,7 +43,7 @@
|
|
|
43
43
|
- **Google Cloud TTS**: Neural2, WaveNet, Studio voices with EU data residency
|
|
44
44
|
- **Fish Audio**: S1 model with 13 languages & 64+ emotions (test/admin only)
|
|
45
45
|
- **Inworld AI**: TTS 1.5 Max/Mini with 15 languages & voice cloning (test/admin only)
|
|
46
|
-
- **
|
|
46
|
+
- **Vertex AI TTS**: Gemini Flash/Pro models with 30 voices, 90+ languages & style prompts (test/admin only)
|
|
47
47
|
- **Ready for:** OpenAI, ElevenLabs, Deepgram (interfaces prepared)
|
|
48
48
|
- **GDPR/DSGVO Compliance**: Built-in EU region support for Azure and Google Cloud
|
|
49
49
|
- **SSML Abstraction**: Auto-generates provider-specific SSML from simple JSON options
|
|
@@ -139,10 +139,10 @@ const inworld = await ttsService.synthesize({
|
|
|
139
139
|
providerOptions: { modelId: 'inworld-tts-1.5-max', temperature: 1.1 },
|
|
140
140
|
});
|
|
141
141
|
|
|
142
|
-
//
|
|
143
|
-
const
|
|
142
|
+
// Vertex AI TTS (test/admin only)
|
|
143
|
+
const vertexAI = await ttsService.synthesize({
|
|
144
144
|
text: 'Have a wonderful day!',
|
|
145
|
-
provider: TTSProvider.
|
|
145
|
+
provider: TTSProvider.VERTEX_AI,
|
|
146
146
|
voice: { id: 'Kore' },
|
|
147
147
|
providerOptions: { model: 'gemini-2.5-flash-preview-tts', stylePrompt: 'Say cheerfully:' },
|
|
148
148
|
});
|
|
@@ -249,9 +249,9 @@ FISH_AUDIO_API_KEY=your-fish-audio-api-key
|
|
|
249
249
|
# Inworld AI (test/admin only – no EU data residency)
|
|
250
250
|
INWORLD_API_KEY=your-inworld-api-key
|
|
251
251
|
|
|
252
|
-
#
|
|
252
|
+
# Vertex AI TTS (test/admin only – no EU data residency)
|
|
253
253
|
# Reuses GOOGLE_APPLICATION_CREDENTIALS and GOOGLE_CLOUD_PROJECT from above
|
|
254
|
-
|
|
254
|
+
VERTEX_AI_TTS_REGION=us-central1
|
|
255
255
|
|
|
256
256
|
# Logging
|
|
257
257
|
TTS_DEBUG=false
|
|
@@ -317,18 +317,19 @@ LOG_LEVEL=info
|
|
|
317
317
|
| **Pricing** | $10/1M chars (Max), $5/1M chars (Mini) |
|
|
318
318
|
| **EU Compliance** | No data residency guarantees |
|
|
319
319
|
|
|
320
|
-
###
|
|
320
|
+
### Vertex AI TTS (Test/Admin Only)
|
|
321
321
|
|
|
322
322
|
| Feature | Details |
|
|
323
323
|
|---------|---------|
|
|
324
|
-
| **Models** |
|
|
324
|
+
| **Models** | `gemini-2.5-flash-preview-tts` (budget, fast), `gemini-2.5-pro-preview-tts` (premium, natural) |
|
|
325
325
|
| **Languages** | 90+ with auto-detection |
|
|
326
326
|
| **Voices** | 30 multilingual: Kore, Puck, Charon, Zephyr, Fenrir, Sulafat, etc. |
|
|
327
327
|
| **Style Control** | Natural language prompts: "Say cheerfully:", "Read in a spooky whisper:" |
|
|
328
328
|
| **Audio** | MP3 (via ffmpeg), WAV (fallback) |
|
|
329
|
-
| **Auth** |
|
|
329
|
+
| **Auth** | Service Account OAuth2 (reuses `GOOGLE_APPLICATION_CREDENTIALS`) |
|
|
330
|
+
| **Region** | `VERTEX_AI_TTS_REGION` env var (default: `us-central1`) |
|
|
330
331
|
| **Pricing** | $0.50-1.00/M input tokens + $10-20/M audio output tokens |
|
|
331
|
-
| **EU Compliance** |
|
|
332
|
+
| **EU Compliance** | Preview models currently `us-central1` only — no EU data residency yet |
|
|
332
333
|
|
|
333
334
|
## GDPR / Compliance
|
|
334
335
|
|
|
@@ -341,10 +342,12 @@ LOG_LEVEL=info
|
|
|
341
342
|
| **EdenAI** | Yes | Depends* | Depends* | Depends on underlying provider |
|
|
342
343
|
| **Fish Audio** | No | No | No | Test/admin only |
|
|
343
344
|
| **Inworld AI** | No | No | No | Test/admin only |
|
|
344
|
-
| **
|
|
345
|
+
| **Vertex AI TTS** | Yes (Vertex DPA) | Partial | No* | Test/admin only |
|
|
345
346
|
|
|
346
347
|
*EdenAI is an aggregator - compliance depends on the underlying provider.
|
|
347
348
|
|
|
349
|
+
\*Vertex AI TTS: DPA available, no model training on customer data — but preview models are currently `us-central1` only (no EU data residency until GA with EU region support).
|
|
350
|
+
|
|
348
351
|
## API Reference
|
|
349
352
|
|
|
350
353
|
### TTSService
|
|
@@ -530,14 +533,14 @@ graph TD
|
|
|
530
533
|
Registry -->|Select| Eden[EdenAIProvider]
|
|
531
534
|
Registry -->|Select| Fish[FishAudioProvider]
|
|
532
535
|
Registry -->|Select| Inworld[InworldProvider]
|
|
533
|
-
Registry -->|Select|
|
|
536
|
+
Registry -->|Select| VertexAI[VertexAITTSProvider]
|
|
534
537
|
|
|
535
538
|
Azure -->|SSML/SDK| AzureAPI[Azure Speech API]
|
|
536
539
|
GCloud -->|gRPC/SDK| GoogleAPI[Google Cloud TTS API]
|
|
537
540
|
Eden -->|REST| EdenAPI[EdenAI API]
|
|
538
541
|
Fish -->|REST| FishAPI[Fish Audio API]
|
|
539
542
|
Inworld -->|REST| InworldAPI[Inworld AI API]
|
|
540
|
-
|
|
543
|
+
VertexAI -->|REST/OAuth2| VertexAPI[Vertex AI API]
|
|
541
544
|
|
|
542
545
|
GoogleAPI -->|EU Endpoint| EU[eu-texttospeech.googleapis.com]
|
|
543
546
|
EdenAPI -.-> OpenAI[OpenAI TTS]
|
|
@@ -547,7 +550,7 @@ graph TD
|
|
|
547
550
|
## Testing
|
|
548
551
|
|
|
549
552
|
```bash
|
|
550
|
-
# Run all tests (
|
|
553
|
+
# Run all tests (600+ tests, >90% coverage)
|
|
551
554
|
npm test
|
|
552
555
|
|
|
553
556
|
# Unit tests only
|
|
@@ -564,7 +567,7 @@ npx ts-node scripts/manual-test-edenai.ts
|
|
|
564
567
|
npx ts-node scripts/manual-test-google-cloud-tts.ts
|
|
565
568
|
npx ts-node scripts/manual-test-fish-audio.ts [en] [de]
|
|
566
569
|
npx ts-node scripts/manual-test-inworld.ts [en] [de] [mini]
|
|
567
|
-
npx ts-node scripts/manual-test-
|
|
570
|
+
npx ts-node scripts/manual-test-vertex-ai.ts [en] [de] [pro] [style]
|
|
568
571
|
|
|
569
572
|
# List available Google Cloud voices
|
|
570
573
|
npx ts-node scripts/list-google-voices.ts de-DE
|
|
@@ -20,9 +20,10 @@
|
|
|
20
20
|
*/
|
|
21
21
|
export { TTSService, ttsService } from './tts.service';
|
|
22
22
|
export { TTSProvider, TTSErrorCode, AudioFormat, } from './types';
|
|
23
|
-
export type { AudioOptions, VoiceConfig, TTSSynthesizeRequest, TTSResponse, TTSResponseMetadata, TTSBillingInfo, TTSVoice, TTSVoiceMetadata, AzureProviderOptions, OpenAIProviderOptions, ElevenLabsProviderOptions, GoogleCloudProviderOptions, GoogleCloudTTSProviderOptions, DeepgramProviderOptions, EdenAIProviderOptions, FishAudioProviderOptions, InworldProviderOptions,
|
|
24
|
-
export { isAzureOptions, isOpenAIOptions, isElevenLabsOptions, isGoogleCloudOptions, isGoogleCloudTTSOptions, isDeepgramOptions, isEdenAIOptions, isFishAudioOptions, isInworldOptions,
|
|
25
|
-
export { BaseTTSProvider, AzureProvider, EdenAIProvider, FishAudioProvider, GoogleCloudTTSProvider, InworldProvider,
|
|
23
|
+
export type { AudioOptions, VoiceConfig, TTSSynthesizeRequest, TTSResponse, TTSResponseMetadata, TTSBillingInfo, TTSVoice, TTSVoiceMetadata, AzureProviderOptions, OpenAIProviderOptions, ElevenLabsProviderOptions, GoogleCloudProviderOptions, GoogleCloudTTSProviderOptions, DeepgramProviderOptions, EdenAIProviderOptions, FishAudioProviderOptions, InworldProviderOptions, VertexAITTSProviderOptions, ProviderOptions, } from './types';
|
|
24
|
+
export { isAzureOptions, isOpenAIOptions, isElevenLabsOptions, isGoogleCloudOptions, isGoogleCloudTTSOptions, isDeepgramOptions, isEdenAIOptions, isFishAudioOptions, isInworldOptions, isVertexAITTSOptions, } from './types';
|
|
25
|
+
export { BaseTTSProvider, AzureProvider, EdenAIProvider, FishAudioProvider, GoogleCloudTTSProvider, InworldProvider, VertexAITTSProvider, } from './providers';
|
|
26
|
+
export type { VertexAITTSConfig } from './providers';
|
|
26
27
|
export type { GoogleCloudTTSRegion, GoogleCloudTTSConfig, } from './providers';
|
|
27
28
|
export { TTSError, InvalidConfigError, InvalidVoiceError, QuotaExceededError, ProviderUnavailableError, SynthesisFailedError, NetworkError, } from './providers';
|
|
28
29
|
export { countCharacters, countCharactersWithoutSSML, validateCharacterCount, countBillableCharacters, estimateAudioDuration, formatCharacterCount, } from './utils';
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../../src/middleware/services/tts/index.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;GAmBG;AAGH,OAAO,EAAE,UAAU,EAAE,UAAU,EAAE,MAAM,eAAe,CAAC;AAGvD,OAAO,EACL,WAAW,EACX,YAAY,EACZ,WAAW,GACZ,MAAM,SAAS,CAAC;AAEjB,YAAY,EACV,YAAY,EACZ,WAAW,EACX,oBAAoB,EACpB,WAAW,EACX,mBAAmB,EACnB,cAAc,EACd,QAAQ,EACR,gBAAgB,EAChB,oBAAoB,EACpB,qBAAqB,EACrB,yBAAyB,EACzB,0BAA0B,EAC1B,6BAA6B,EAC7B,uBAAuB,EACvB,qBAAqB,EACrB,wBAAwB,EACxB,sBAAsB,EACtB,
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../../src/middleware/services/tts/index.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;GAmBG;AAGH,OAAO,EAAE,UAAU,EAAE,UAAU,EAAE,MAAM,eAAe,CAAC;AAGvD,OAAO,EACL,WAAW,EACX,YAAY,EACZ,WAAW,GACZ,MAAM,SAAS,CAAC;AAEjB,YAAY,EACV,YAAY,EACZ,WAAW,EACX,oBAAoB,EACpB,WAAW,EACX,mBAAmB,EACnB,cAAc,EACd,QAAQ,EACR,gBAAgB,EAChB,oBAAoB,EACpB,qBAAqB,EACrB,yBAAyB,EACzB,0BAA0B,EAC1B,6BAA6B,EAC7B,uBAAuB,EACvB,qBAAqB,EACrB,wBAAwB,EACxB,sBAAsB,EACtB,0BAA0B,EAC1B,eAAe,GAChB,MAAM,SAAS,CAAC;AAEjB,OAAO,EACL,cAAc,EACd,eAAe,EACf,mBAAmB,EACnB,oBAAoB,EACpB,uBAAuB,EACvB,iBAAiB,EACjB,eAAe,EACf,kBAAkB,EAClB,gBAAgB,EAChB,oBAAoB,GACrB,MAAM,SAAS,CAAC;AAGjB,OAAO,EACL,eAAe,EACf,aAAa,EACb,cAAc,EACd,iBAAiB,EACjB,sBAAsB,EACtB,eAAe,EACf,mBAAmB,GACpB,MAAM,aAAa,CAAC;AAErB,YAAY,EAAE,iBAAiB,EAAE,MAAM,aAAa,CAAC;AAErD,YAAY,EACV,oBAAoB,EACpB,oBAAoB,GACrB,MAAM,aAAa,CAAC;AAGrB,OAAO,EACL,QAAQ,EACR,kBAAkB,EAClB,iBAAiB,EACjB,kBAAkB,EAClB,wBAAwB,EACxB,oBAAoB,EACpB,YAAY,GACb,MAAM,aAAa,CAAC;AAGrB,OAAO,EACL,eAAe,EACf,0BAA0B,EAC1B,sBAAsB,EACtB,uBAAuB,EACvB,qBAAqB,EACrB,oBAAoB,GACrB,MAAM,SAAS,CAAC;AAGjB,OAAO,EACL,SAAS,EACT,SAAS,EACT,WAAW,EACX,WAAW,EACX,WAAW,EACX,YAAY,GACb,MAAM,SAAS,CAAC;AAEjB,YAAY,EAAE,SAAS,EAAE,QAAQ,EAAE,MAAM,SAAS,CAAC;AAGnD,OAAO,EACL,gBAAgB,EAChB,gBAAgB,EAChB,oBAAoB,GACrB,MAAM,SAAS,CAAC;AAEjB,YAAY,EAAE,WAAW,EAAE,WAAW,EAAE,MAAM,SAAS,CAAC"}
|
|
@@ -20,7 +20,7 @@
|
|
|
20
20
|
* @module @loonylabs/tts-middleware
|
|
21
21
|
*/
|
|
22
22
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
23
|
-
exports.DEFAULT_RETRY_CONFIG = exports.isRetryableError = exports.executeWithRetry = exports.silentLogger = exports.getLogLevel = exports.setLogLevel = exports.resetLogger = exports.getLogger = exports.setLogger = exports.formatCharacterCount = exports.estimateAudioDuration = exports.countBillableCharacters = exports.validateCharacterCount = exports.countCharactersWithoutSSML = exports.countCharacters = exports.NetworkError = exports.SynthesisFailedError = exports.ProviderUnavailableError = exports.QuotaExceededError = exports.InvalidVoiceError = exports.InvalidConfigError = exports.TTSError = exports.
|
|
23
|
+
exports.DEFAULT_RETRY_CONFIG = exports.isRetryableError = exports.executeWithRetry = exports.silentLogger = exports.getLogLevel = exports.setLogLevel = exports.resetLogger = exports.getLogger = exports.setLogger = exports.formatCharacterCount = exports.estimateAudioDuration = exports.countBillableCharacters = exports.validateCharacterCount = exports.countCharactersWithoutSSML = exports.countCharacters = exports.NetworkError = exports.SynthesisFailedError = exports.ProviderUnavailableError = exports.QuotaExceededError = exports.InvalidVoiceError = exports.InvalidConfigError = exports.TTSError = exports.VertexAITTSProvider = exports.InworldProvider = exports.GoogleCloudTTSProvider = exports.FishAudioProvider = exports.EdenAIProvider = exports.AzureProvider = exports.BaseTTSProvider = exports.isVertexAITTSOptions = exports.isInworldOptions = exports.isFishAudioOptions = exports.isEdenAIOptions = exports.isDeepgramOptions = exports.isGoogleCloudTTSOptions = exports.isGoogleCloudOptions = exports.isElevenLabsOptions = exports.isOpenAIOptions = exports.isAzureOptions = exports.TTSErrorCode = exports.TTSProvider = exports.ttsService = exports.TTSService = void 0;
|
|
24
24
|
// ===== Main Service =====
|
|
25
25
|
var tts_service_1 = require("./tts.service");
|
|
26
26
|
Object.defineProperty(exports, "TTSService", { enumerable: true, get: function () { return tts_service_1.TTSService; } });
|
|
@@ -39,7 +39,7 @@ Object.defineProperty(exports, "isDeepgramOptions", { enumerable: true, get: fun
|
|
|
39
39
|
Object.defineProperty(exports, "isEdenAIOptions", { enumerable: true, get: function () { return types_2.isEdenAIOptions; } });
|
|
40
40
|
Object.defineProperty(exports, "isFishAudioOptions", { enumerable: true, get: function () { return types_2.isFishAudioOptions; } });
|
|
41
41
|
Object.defineProperty(exports, "isInworldOptions", { enumerable: true, get: function () { return types_2.isInworldOptions; } });
|
|
42
|
-
Object.defineProperty(exports, "
|
|
42
|
+
Object.defineProperty(exports, "isVertexAITTSOptions", { enumerable: true, get: function () { return types_2.isVertexAITTSOptions; } });
|
|
43
43
|
// ===== Providers =====
|
|
44
44
|
var providers_1 = require("./providers");
|
|
45
45
|
Object.defineProperty(exports, "BaseTTSProvider", { enumerable: true, get: function () { return providers_1.BaseTTSProvider; } });
|
|
@@ -48,7 +48,7 @@ Object.defineProperty(exports, "EdenAIProvider", { enumerable: true, get: functi
|
|
|
48
48
|
Object.defineProperty(exports, "FishAudioProvider", { enumerable: true, get: function () { return providers_1.FishAudioProvider; } });
|
|
49
49
|
Object.defineProperty(exports, "GoogleCloudTTSProvider", { enumerable: true, get: function () { return providers_1.GoogleCloudTTSProvider; } });
|
|
50
50
|
Object.defineProperty(exports, "InworldProvider", { enumerable: true, get: function () { return providers_1.InworldProvider; } });
|
|
51
|
-
Object.defineProperty(exports, "
|
|
51
|
+
Object.defineProperty(exports, "VertexAITTSProvider", { enumerable: true, get: function () { return providers_1.VertexAITTSProvider; } });
|
|
52
52
|
// ===== Errors =====
|
|
53
53
|
var providers_2 = require("./providers");
|
|
54
54
|
Object.defineProperty(exports, "TTSError", { enumerable: true, get: function () { return providers_2.TTSError; } });
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../../../src/middleware/services/tts/index.ts"],"names":[],"mappings":";AAAA;;;;;;;;;;;;;;;;;;;GAmBG;;;AAEH,2BAA2B;AAC3B,6CAAuD;AAA9C,yGAAA,UAAU,OAAA;AAAE,yGAAA,UAAU,OAAA;AAE/B,oBAAoB;AACpB,iCAIiB;AAHf,oGAAA,WAAW,OAAA;AACX,qGAAA,YAAY,OAAA;AA0Bd,iCAWiB;AAVf,uGAAA,cAAc,OAAA;AACd,wGAAA,eAAe,OAAA;AACf,4GAAA,mBAAmB,OAAA;AACnB,6GAAA,oBAAoB,OAAA;AACpB,gHAAA,uBAAuB,OAAA;AACvB,0GAAA,iBAAiB,OAAA;AACjB,wGAAA,eAAe,OAAA;AACf,2GAAA,kBAAkB,OAAA;AAClB,yGAAA,gBAAgB,OAAA;AAChB,
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../../../src/middleware/services/tts/index.ts"],"names":[],"mappings":";AAAA;;;;;;;;;;;;;;;;;;;GAmBG;;;AAEH,2BAA2B;AAC3B,6CAAuD;AAA9C,yGAAA,UAAU,OAAA;AAAE,yGAAA,UAAU,OAAA;AAE/B,oBAAoB;AACpB,iCAIiB;AAHf,oGAAA,WAAW,OAAA;AACX,qGAAA,YAAY,OAAA;AA0Bd,iCAWiB;AAVf,uGAAA,cAAc,OAAA;AACd,wGAAA,eAAe,OAAA;AACf,4GAAA,mBAAmB,OAAA;AACnB,6GAAA,oBAAoB,OAAA;AACpB,gHAAA,uBAAuB,OAAA;AACvB,0GAAA,iBAAiB,OAAA;AACjB,wGAAA,eAAe,OAAA;AACf,2GAAA,kBAAkB,OAAA;AAClB,yGAAA,gBAAgB,OAAA;AAChB,6GAAA,oBAAoB,OAAA;AAGtB,wBAAwB;AACxB,yCAQqB;AAPnB,4GAAA,eAAe,OAAA;AACf,0GAAA,aAAa,OAAA;AACb,2GAAA,cAAc,OAAA;AACd,8GAAA,iBAAiB,OAAA;AACjB,mHAAA,sBAAsB,OAAA;AACtB,4GAAA,eAAe,OAAA;AACf,gHAAA,mBAAmB,OAAA;AAUrB,qBAAqB;AACrB,yCAQqB;AAPnB,qGAAA,QAAQ,OAAA;AACR,+GAAA,kBAAkB,OAAA;AAClB,8GAAA,iBAAiB,OAAA;AACjB,+GAAA,kBAAkB,OAAA;AAClB,qHAAA,wBAAwB,OAAA;AACxB,iHAAA,oBAAoB,OAAA;AACpB,yGAAA,YAAY,OAAA;AAGd,wBAAwB;AACxB,iCAOiB;AANf,wGAAA,eAAe,OAAA;AACf,mHAAA,0BAA0B,OAAA;AAC1B,+GAAA,sBAAsB,OAAA;AACtB,gHAAA,uBAAuB,OAAA;AACvB,8GAAA,qBAAqB,OAAA;AACrB,6GAAA,oBAAoB,OAAA;AAGtB,qBAAqB;AACrB,iCAOiB;AANf,kGAAA,SAAS,OAAA;AACT,kGAAA,SAAS,OAAA;AACT,oGAAA,WAAW,OAAA;AACX,oGAAA,WAAW,OAAA;AACX,oGAAA,WAAW,OAAA;AACX,qGAAA,YAAY,OAAA;AAKd,oBAAoB;AACpB,iCAIiB;AAHf,yGAAA,gBAAgB,OAAA;AAChB,yGAAA,gBAAgB,OAAA;AAChB,6GAAA,oBAAoB,OAAA"}
|
|
@@ -10,5 +10,6 @@ export { GoogleCloudTTSProvider } from './google-cloud-tts-provider';
|
|
|
10
10
|
export type { GoogleCloudTTSRegion, GoogleCloudTTSConfig } from './google-cloud-tts-provider';
|
|
11
11
|
export { FishAudioProvider } from './fish-audio-provider';
|
|
12
12
|
export { InworldProvider } from './inworld-provider';
|
|
13
|
-
export {
|
|
13
|
+
export { VertexAITTSProvider } from './vertex-ai-tts-provider';
|
|
14
|
+
export type { VertexAITTSConfig } from './vertex-ai-tts-provider';
|
|
14
15
|
//# sourceMappingURL=index.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../../../src/middleware/services/tts/providers/index.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAGH,OAAO,EACL,eAAe,EACf,QAAQ,EACR,kBAAkB,EAClB,iBAAiB,EACjB,kBAAkB,EAClB,wBAAwB,EACxB,oBAAoB,EACpB,YAAY,GACb,MAAM,qBAAqB,CAAC;AAG7B,OAAO,EAAE,aAAa,EAAE,MAAM,kBAAkB,CAAC;AACjD,OAAO,EAAE,cAAc,EAAE,MAAM,mBAAmB,CAAC;AACnD,OAAO,EAAE,sBAAsB,EAAE,MAAM,6BAA6B,CAAC;AACrE,YAAY,EAAE,oBAAoB,EAAE,oBAAoB,EAAE,MAAM,6BAA6B,CAAC;AAC9F,OAAO,EAAE,iBAAiB,EAAE,MAAM,uBAAuB,CAAC;AAC1D,OAAO,EAAE,eAAe,EAAE,MAAM,oBAAoB,CAAC;AACrD,OAAO,EAAE,
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../../../src/middleware/services/tts/providers/index.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAGH,OAAO,EACL,eAAe,EACf,QAAQ,EACR,kBAAkB,EAClB,iBAAiB,EACjB,kBAAkB,EAClB,wBAAwB,EACxB,oBAAoB,EACpB,YAAY,GACb,MAAM,qBAAqB,CAAC;AAG7B,OAAO,EAAE,aAAa,EAAE,MAAM,kBAAkB,CAAC;AACjD,OAAO,EAAE,cAAc,EAAE,MAAM,mBAAmB,CAAC;AACnD,OAAO,EAAE,sBAAsB,EAAE,MAAM,6BAA6B,CAAC;AACrE,YAAY,EAAE,oBAAoB,EAAE,oBAAoB,EAAE,MAAM,6BAA6B,CAAC;AAC9F,OAAO,EAAE,iBAAiB,EAAE,MAAM,uBAAuB,CAAC;AAC1D,OAAO,EAAE,eAAe,EAAE,MAAM,oBAAoB,CAAC;AACrD,OAAO,EAAE,mBAAmB,EAAE,MAAM,0BAA0B,CAAC;AAC/D,YAAY,EAAE,iBAAiB,EAAE,MAAM,0BAA0B,CAAC"}
|
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
* Export all provider classes and error types
|
|
6
6
|
*/
|
|
7
7
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
8
|
-
exports.
|
|
8
|
+
exports.VertexAITTSProvider = exports.InworldProvider = exports.FishAudioProvider = exports.GoogleCloudTTSProvider = exports.EdenAIProvider = exports.AzureProvider = exports.NetworkError = exports.SynthesisFailedError = exports.ProviderUnavailableError = exports.QuotaExceededError = exports.InvalidVoiceError = exports.InvalidConfigError = exports.TTSError = exports.BaseTTSProvider = void 0;
|
|
9
9
|
// Base provider and errors
|
|
10
10
|
var base_tts_provider_1 = require("./base-tts-provider");
|
|
11
11
|
Object.defineProperty(exports, "BaseTTSProvider", { enumerable: true, get: function () { return base_tts_provider_1.BaseTTSProvider; } });
|
|
@@ -27,8 +27,8 @@ var fish_audio_provider_1 = require("./fish-audio-provider");
|
|
|
27
27
|
Object.defineProperty(exports, "FishAudioProvider", { enumerable: true, get: function () { return fish_audio_provider_1.FishAudioProvider; } });
|
|
28
28
|
var inworld_provider_1 = require("./inworld-provider");
|
|
29
29
|
Object.defineProperty(exports, "InworldProvider", { enumerable: true, get: function () { return inworld_provider_1.InworldProvider; } });
|
|
30
|
-
var
|
|
31
|
-
Object.defineProperty(exports, "
|
|
30
|
+
var vertex_ai_tts_provider_1 = require("./vertex-ai-tts-provider");
|
|
31
|
+
Object.defineProperty(exports, "VertexAITTSProvider", { enumerable: true, get: function () { return vertex_ai_tts_provider_1.VertexAITTSProvider; } });
|
|
32
32
|
// Future provider implementations will be exported here:
|
|
33
33
|
// export { OpenAIProvider } from './openai-provider';
|
|
34
34
|
// export { ElevenLabsProvider } from './elevenlabs-provider';
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../../../../src/middleware/services/tts/providers/index.ts"],"names":[],"mappings":";AAAA;;;;GAIG;;;AAEH,2BAA2B;AAC3B,yDAS6B;AAR3B,oHAAA,eAAe,OAAA;AACf,6GAAA,QAAQ,OAAA;AACR,uHAAA,kBAAkB,OAAA;AAClB,sHAAA,iBAAiB,OAAA;AACjB,uHAAA,kBAAkB,OAAA;AAClB,6HAAA,wBAAwB,OAAA;AACxB,yHAAA,oBAAoB,OAAA;AACpB,iHAAA,YAAY,OAAA;AAGd,2BAA2B;AAC3B,mDAAiD;AAAxC,+GAAA,aAAa,OAAA;AACtB,qDAAmD;AAA1C,iHAAA,cAAc,OAAA;AACvB,yEAAqE;AAA5D,mIAAA,sBAAsB,OAAA;AAE/B,6DAA0D;AAAjD,wHAAA,iBAAiB,OAAA;AAC1B,uDAAqD;AAA5C,mHAAA,eAAe,OAAA;AACxB,
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../../../../src/middleware/services/tts/providers/index.ts"],"names":[],"mappings":";AAAA;;;;GAIG;;;AAEH,2BAA2B;AAC3B,yDAS6B;AAR3B,oHAAA,eAAe,OAAA;AACf,6GAAA,QAAQ,OAAA;AACR,uHAAA,kBAAkB,OAAA;AAClB,sHAAA,iBAAiB,OAAA;AACjB,uHAAA,kBAAkB,OAAA;AAClB,6HAAA,wBAAwB,OAAA;AACxB,yHAAA,oBAAoB,OAAA;AACpB,iHAAA,YAAY,OAAA;AAGd,2BAA2B;AAC3B,mDAAiD;AAAxC,+GAAA,aAAa,OAAA;AACtB,qDAAmD;AAA1C,iHAAA,cAAc,OAAA;AACvB,yEAAqE;AAA5D,mIAAA,sBAAsB,OAAA;AAE/B,6DAA0D;AAAjD,wHAAA,iBAAiB,OAAA;AAC1B,uDAAqD;AAA5C,mHAAA,eAAe,OAAA;AACxB,mEAA+D;AAAtD,6HAAA,mBAAmB,OAAA;AAG5B,yDAAyD;AACzD,sDAAsD;AACtD,8DAA8D;AAC9D,0DAA0D"}
|
|
@@ -0,0 +1,168 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Vertex AI TTS Provider
|
|
3
|
+
*
|
|
4
|
+
* @description Provider for Google Vertex AI TTS via the generateContent
|
|
5
|
+
* endpoint with responseModalities: ['AUDIO']. Authenticates via Service Account
|
|
6
|
+
* (same as Google Cloud TTS — reuses GOOGLE_APPLICATION_CREDENTIALS).
|
|
7
|
+
*
|
|
8
|
+
* Supports 30 multilingual voices with auto-detect language and natural language
|
|
9
|
+
* style control. Output is raw PCM (24kHz, 16-bit, mono) which is converted to
|
|
10
|
+
* MP3 via ffmpeg or WAV as fallback.
|
|
11
|
+
*
|
|
12
|
+
* Test/Admin only -- no EU data residency guarantees.
|
|
13
|
+
*
|
|
14
|
+
* @see https://cloud.google.com/vertex-ai/generative-ai/docs/text-to-speech
|
|
15
|
+
*/
|
|
16
|
+
import type { TTSSynthesizeRequest, TTSResponse } from '../types';
|
|
17
|
+
import { BaseTTSProvider } from './base-tts-provider';
|
|
18
|
+
import type { RegionRotationConfig } from '../types/provider-options.types';
|
|
19
|
+
/**
|
|
20
|
+
* Vertex AI TTS configuration
|
|
21
|
+
*/
|
|
22
|
+
export interface VertexAITTSConfig {
|
|
23
|
+
/**
|
|
24
|
+
* Path to Service Account JSON file
|
|
25
|
+
* @env GOOGLE_APPLICATION_CREDENTIALS
|
|
26
|
+
*/
|
|
27
|
+
keyFilename?: string;
|
|
28
|
+
/**
|
|
29
|
+
* Google Cloud Project ID
|
|
30
|
+
* @env GOOGLE_CLOUD_PROJECT
|
|
31
|
+
*/
|
|
32
|
+
projectId?: string;
|
|
33
|
+
/**
|
|
34
|
+
* Vertex AI region
|
|
35
|
+
* @env VERTEX_AI_TTS_REGION
|
|
36
|
+
* @default 'us-central1'
|
|
37
|
+
*/
|
|
38
|
+
region?: string;
|
|
39
|
+
/**
|
|
40
|
+
* Optional region rotation for quota management (429 / Resource Exhausted)
|
|
41
|
+
*
|
|
42
|
+
* @description When configured, the provider automatically rotates through the
|
|
43
|
+
* specified regions on quota errors. Same pattern as llm-middleware and tti-middleware.
|
|
44
|
+
*
|
|
45
|
+
* @example
|
|
46
|
+
* ```typescript
|
|
47
|
+
* {
|
|
48
|
+
* regions: ['europe-west4', 'europe-west1'],
|
|
49
|
+
* fallback: 'us-central1',
|
|
50
|
+
* }
|
|
51
|
+
* ```
|
|
52
|
+
*/
|
|
53
|
+
regionRotation?: RegionRotationConfig;
|
|
54
|
+
}
|
|
55
|
+
/**
|
|
56
|
+
* Vertex AI TTS provider implementation
|
|
57
|
+
*
|
|
58
|
+
* @description Provides TTS synthesis using Google's Vertex AI generateContent API.
|
|
59
|
+
* Authenticates with Service Account OAuth2 (same credentials as Google Cloud TTS).
|
|
60
|
+
* Outputs raw PCM which is converted to MP3 (via ffmpeg) or WAV (pure Node.js fallback).
|
|
61
|
+
*
|
|
62
|
+
* Billing: Token-based ($0.50-1.00/M input + $10-20/M audio output tokens).
|
|
63
|
+
* For billing compatibility, reports character count like all other providers.
|
|
64
|
+
*
|
|
65
|
+
* @example
|
|
66
|
+
* ```typescript
|
|
67
|
+
* const provider = new VertexAITTSProvider();
|
|
68
|
+
* const response = await provider.synthesize(
|
|
69
|
+
* "Hello World",
|
|
70
|
+
* "Kore",
|
|
71
|
+
* {
|
|
72
|
+
* text: "Hello World",
|
|
73
|
+
* voice: { id: "Kore" },
|
|
74
|
+
* audio: { format: "mp3" },
|
|
75
|
+
* providerOptions: {
|
|
76
|
+
* model: "gemini-2.5-flash-preview-tts",
|
|
77
|
+
* stylePrompt: "Say cheerfully:"
|
|
78
|
+
* }
|
|
79
|
+
* }
|
|
80
|
+
* );
|
|
81
|
+
* ```
|
|
82
|
+
*/
|
|
83
|
+
export declare class VertexAITTSProvider extends BaseTTSProvider {
|
|
84
|
+
private config;
|
|
85
|
+
private authClient;
|
|
86
|
+
/**
|
|
87
|
+
* Creates a new Vertex AI TTS provider
|
|
88
|
+
*
|
|
89
|
+
* @param config - Optional configuration (uses env vars if not provided)
|
|
90
|
+
* @throws {InvalidConfigError} If credentials are missing
|
|
91
|
+
*/
|
|
92
|
+
constructor(config?: Partial<VertexAITTSConfig>);
|
|
93
|
+
/**
|
|
94
|
+
* Validate Vertex AI configuration
|
|
95
|
+
*
|
|
96
|
+
* @private
|
|
97
|
+
* @throws {InvalidConfigError} If configuration is invalid
|
|
98
|
+
*/
|
|
99
|
+
private validateVertexAIConfig;
|
|
100
|
+
/**
|
|
101
|
+
* Get an authenticated access token via Service Account
|
|
102
|
+
*
|
|
103
|
+
* @private
|
|
104
|
+
* @returns OAuth2 access token
|
|
105
|
+
*/
|
|
106
|
+
private getAccessToken;
|
|
107
|
+
/**
|
|
108
|
+
* Synthesize text to speech using Vertex AI TTS
|
|
109
|
+
*
|
|
110
|
+
* @param text - The input text to synthesize
|
|
111
|
+
* @param voiceId - The voice name (e.g. "Kore", "Puck", "Charon")
|
|
112
|
+
* @param request - The full synthesis request with options
|
|
113
|
+
* @returns Promise resolving to the synthesis response
|
|
114
|
+
*/
|
|
115
|
+
synthesize(text: string, voiceId: string, request: TTSSynthesizeRequest): Promise<TTSResponse>;
|
|
116
|
+
/**
|
|
117
|
+
* Build Vertex AI generateContent request payload
|
|
118
|
+
*
|
|
119
|
+
* @private
|
|
120
|
+
*/
|
|
121
|
+
private buildRequest;
|
|
122
|
+
/**
|
|
123
|
+
* Call the Vertex AI API with optional region rotation on quota errors
|
|
124
|
+
*
|
|
125
|
+
* @private
|
|
126
|
+
* @param requestBody - The request payload
|
|
127
|
+
* @param model - The model to use
|
|
128
|
+
* @param regionOverride - Optional per-request region override (skips rotation)
|
|
129
|
+
* @returns The PCM audio buffer and the region that processed the request
|
|
130
|
+
*/
|
|
131
|
+
private callAPIWithRegionRotation;
|
|
132
|
+
/**
|
|
133
|
+
* Call Vertex AI generateContent API
|
|
134
|
+
*
|
|
135
|
+
* @private
|
|
136
|
+
* @param requestBody - The request payload
|
|
137
|
+
* @param model - The model to use
|
|
138
|
+
* @param region - The Vertex AI region to use
|
|
139
|
+
* @returns Promise resolving to raw PCM audio buffer
|
|
140
|
+
*/
|
|
141
|
+
private callAPI;
|
|
142
|
+
/**
|
|
143
|
+
* Convert raw PCM audio to the requested format
|
|
144
|
+
*
|
|
145
|
+
* @private
|
|
146
|
+
* @param pcmBuffer - Raw PCM buffer (24kHz, 16-bit, mono, little-endian)
|
|
147
|
+
* @param requestedFormat - The desired output format ('mp3', 'wav', etc.)
|
|
148
|
+
* @returns The converted audio buffer and actual format used
|
|
149
|
+
*/
|
|
150
|
+
private convertPcmAudio;
|
|
151
|
+
/**
|
|
152
|
+
* Convert raw PCM to MP3 using ffmpeg via child_process
|
|
153
|
+
*
|
|
154
|
+
* @private
|
|
155
|
+
* @param pcmBuffer - Raw PCM buffer (24kHz, 16-bit, mono, little-endian)
|
|
156
|
+
* @returns Promise resolving to MP3 buffer
|
|
157
|
+
*/
|
|
158
|
+
private pcmToMp3;
|
|
159
|
+
/**
|
|
160
|
+
* Convert raw PCM to WAV by prepending a 44-byte WAV header
|
|
161
|
+
*
|
|
162
|
+
* @private
|
|
163
|
+
* @param pcmBuffer - Raw PCM buffer (24kHz, 16-bit, mono, little-endian)
|
|
164
|
+
* @returns WAV buffer
|
|
165
|
+
*/
|
|
166
|
+
private pcmToWav;
|
|
167
|
+
}
|
|
168
|
+
//# sourceMappingURL=vertex-ai-tts-provider.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"vertex-ai-tts-provider.d.ts","sourceRoot":"","sources":["../../../../../src/middleware/services/tts/providers/vertex-ai-tts-provider.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;GAcG;AAGH,OAAO,KAAK,EAAE,oBAAoB,EAAE,WAAW,EAAE,MAAM,UAAU,CAAC;AAGlE,OAAO,EACL,eAAe,EAEhB,MAAM,qBAAqB,CAAC;AAC7B,OAAO,KAAK,EAA8B,oBAAoB,EAAE,MAAM,iCAAiC,CAAC;AAGxG;;GAEG;AACH,MAAM,WAAW,iBAAiB;IAChC;;;OAGG;IACH,WAAW,CAAC,EAAE,MAAM,CAAC;IAErB;;;OAGG;IACH,SAAS,CAAC,EAAE,MAAM,CAAC;IAEnB;;;;OAIG;IACH,MAAM,CAAC,EAAE,MAAM,CAAC;IAEhB;;;;;;;;;;;;;OAaG;IACH,cAAc,CAAC,EAAE,oBAAoB,CAAC;CACvC;AAMD;;;;;;;;;;;;;;;;;;;;;;;;;;;GA2BG;AACH,qBAAa,mBAAoB,SAAQ,eAAe;IACtD,OAAO,CAAC,MAAM,CAAoB;IAClC,OAAO,CAAC,UAAU,CAA6E;IAE/F;;;;;OAKG;gBACS,MAAM,CAAC,EAAE,OAAO,CAAC,iBAAiB,CAAC;IAmB/C;;;;;OAKG;IACH,OAAO,CAAC,sBAAsB;IAgB9B;;;;;OAKG;YACW,cAAc;IAqB5B;;;;;;;OAOG;IACG,UAAU,CACd,IAAI,EAAE,MAAM,EACZ,OAAO,EAAE,MAAM,EACf,OAAO,EAAE,oBAAoB,GAC5B,OAAO,CAAC,WAAW,CAAC;IA4DvB;;;;OAIG;IACH,OAAO,CAAC,YAAY;IA6BpB;;;;;;;;OAQG;YACW,yBAAyB;IAsDvC;;;;;;;;OAQG;YACW,OAAO;IA6CrB;;;;;;;OAOG;YACW,eAAe;IA0B7B;;;;;;OAMG;IACH,OAAO,CAAC,QAAQ;IAkChB;;;;;;OAMG;IACH,OAAO,CAAC,QAAQ;CAwBjB"}
|