dvgateway-adapters 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +45 -0
- package/dist/index.d.ts +41 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +37 -0
- package/dist/index.js.map +1 -0
- package/dist/llm/anthropic.d.ts +62 -0
- package/dist/llm/anthropic.d.ts.map +1 -0
- package/dist/llm/anthropic.js +83 -0
- package/dist/llm/anthropic.js.map +1 -0
- package/dist/llm/index.d.ts +5 -0
- package/dist/llm/index.d.ts.map +1 -0
- package/dist/llm/index.js +4 -0
- package/dist/llm/index.js.map +1 -0
- package/dist/llm/openai-llm.d.ts +55 -0
- package/dist/llm/openai-llm.d.ts.map +1 -0
- package/dist/llm/openai-llm.js +68 -0
- package/dist/llm/openai-llm.js.map +1 -0
- package/dist/realtime/index.d.ts +3 -0
- package/dist/realtime/index.d.ts.map +1 -0
- package/dist/realtime/index.js +3 -0
- package/dist/realtime/index.js.map +1 -0
- package/dist/realtime/openai-realtime.d.ts +132 -0
- package/dist/realtime/openai-realtime.d.ts.map +1 -0
- package/dist/realtime/openai-realtime.js +261 -0
- package/dist/realtime/openai-realtime.js.map +1 -0
- package/dist/stt/deepgram.d.ts +105 -0
- package/dist/stt/deepgram.d.ts.map +1 -0
- package/dist/stt/deepgram.js +180 -0
- package/dist/stt/deepgram.js.map +1 -0
- package/dist/stt/index.d.ts +3 -0
- package/dist/stt/index.d.ts.map +1 -0
- package/dist/stt/index.js +3 -0
- package/dist/stt/index.js.map +1 -0
- package/dist/tts/cached-tts.d.ts +131 -0
- package/dist/tts/cached-tts.d.ts.map +1 -0
- package/dist/tts/cached-tts.js +231 -0
- package/dist/tts/cached-tts.js.map +1 -0
- package/dist/tts/elevenlabs.d.ts +95 -0
- package/dist/tts/elevenlabs.d.ts.map +1 -0
- package/dist/tts/elevenlabs.js +195 -0
- package/dist/tts/elevenlabs.js.map +1 -0
- package/dist/tts/index.d.ts +7 -0
- package/dist/tts/index.d.ts.map +1 -0
- package/dist/tts/index.js +5 -0
- package/dist/tts/index.js.map +1 -0
- package/dist/tts/openai-tts.d.ts +64 -0
- package/dist/tts/openai-tts.d.ts.map +1 -0
- package/dist/tts/openai-tts.js +148 -0
- package/dist/tts/openai-tts.js.map +1 -0
- package/package.json +89 -0
|
@@ -0,0 +1,131 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Cached TTS Adapter
|
|
3
|
+
*
|
|
4
|
+
* Wraps any TtsAdapter with a disk-based audio cache keyed by
|
|
5
|
+
* (text + voiceId + provider + model). Cached audio persists across
|
|
6
|
+
* process restarts, eliminating redundant TTS API calls for repeated
|
|
7
|
+
* announcements and broadcasts.
|
|
8
|
+
*
|
|
9
|
+
* Features:
|
|
10
|
+
* - Disk-based PCM cache (survives restarts/updates)
|
|
11
|
+
* - SHA-256 cache key = hash(provider + voiceId + model + speed + text)
|
|
12
|
+
* - Automatic cache hit/miss logging with metrics
|
|
13
|
+
* - Warmup / preload API for pre-generating announcement audio pools
|
|
14
|
+
* - TTL-based expiration (optional)
|
|
15
|
+
* - Max cache size enforcement (LRU eviction)
|
|
16
|
+
*
|
|
17
|
+
* @example
|
|
18
|
+
* ```typescript
|
|
19
|
+
* import { ElevenLabsAdapter } from 'dvgateway-adapters/tts';
|
|
20
|
+
* import { CachedTtsAdapter } from 'dvgateway-adapters/tts';
|
|
21
|
+
*
|
|
22
|
+
* const tts = new CachedTtsAdapter(
|
|
23
|
+
* new ElevenLabsAdapter({ apiKey: '...' }),
|
|
24
|
+
* {
|
|
25
|
+
* provider: 'elevenlabs',
|
|
26
|
+
* cacheDir: './tts-cache',
|
|
27
|
+
* ttlMs: 7 * 24 * 60 * 60 * 1000, // 7 days
|
|
28
|
+
* },
|
|
29
|
+
* );
|
|
30
|
+
*
|
|
31
|
+
* // Pre-warm common announcements (done once, reused after restart)
|
|
32
|
+
* await tts.warmup([
|
|
33
|
+
* { text: '잠시만 기다려 주세요.', voiceId: 'abc123' },
|
|
34
|
+
* { text: '상담사에게 연결하겠습니다.' },
|
|
35
|
+
* ]);
|
|
36
|
+
*
|
|
37
|
+
* // synthesize() returns cached audio if available — no API call
|
|
38
|
+
* const audio = tts.synthesize('잠시만 기다려 주세요.');
|
|
39
|
+
* ```
|
|
40
|
+
*/
|
|
41
|
+
import type { TtsAdapter, TtsOptions } from 'dvgateway-sdk';
|
|
42
|
+
export interface CachedTtsAdapterOptions {
|
|
43
|
+
/**
|
|
44
|
+
* Provider name used as part of the cache key.
|
|
45
|
+
* E.g. "elevenlabs", "openai"
|
|
46
|
+
*/
|
|
47
|
+
provider: string;
|
|
48
|
+
/**
|
|
49
|
+
* Directory for storing cached PCM files (default: "./tts-cache")
|
|
50
|
+
* Created automatically if it does not exist.
|
|
51
|
+
*/
|
|
52
|
+
cacheDir?: string;
|
|
53
|
+
/**
|
|
54
|
+
* Default voice ID used when none is specified in synthesize().
|
|
55
|
+
* Becomes part of the cache key.
|
|
56
|
+
*/
|
|
57
|
+
defaultVoiceId?: string;
|
|
58
|
+
/**
|
|
59
|
+
* Default model name for cache key differentiation (e.g. "eleven_flash_v2_5").
|
|
60
|
+
*/
|
|
61
|
+
defaultModel?: string;
|
|
62
|
+
/**
|
|
63
|
+
* Time-to-live for cached files in ms.
|
|
64
|
+
* 0 = no expiration (default: 0)
|
|
65
|
+
*/
|
|
66
|
+
ttlMs?: number;
|
|
67
|
+
/**
|
|
68
|
+
* Maximum number of cached files.
|
|
69
|
+
* When exceeded, least-recently-used entries are evicted.
|
|
70
|
+
* 0 = unlimited (default: 0)
|
|
71
|
+
*/
|
|
72
|
+
maxEntries?: number;
|
|
73
|
+
}
|
|
74
|
+
export interface WarmupEntry {
|
|
75
|
+
/** Text to pre-synthesize */
|
|
76
|
+
text: string;
|
|
77
|
+
/** Override voice ID for this entry */
|
|
78
|
+
voiceId?: string;
|
|
79
|
+
/** Override speed for this entry */
|
|
80
|
+
speed?: number;
|
|
81
|
+
}
|
|
82
|
+
interface CacheStats {
|
|
83
|
+
hits: number;
|
|
84
|
+
misses: number;
|
|
85
|
+
evictions: number;
|
|
86
|
+
totalEntries: number;
|
|
87
|
+
}
|
|
88
|
+
export declare class CachedTtsAdapter implements TtsAdapter {
|
|
89
|
+
private readonly inner;
|
|
90
|
+
private readonly provider;
|
|
91
|
+
private readonly cacheDir;
|
|
92
|
+
private readonly defaultVoiceId;
|
|
93
|
+
private readonly defaultModel;
|
|
94
|
+
private readonly ttlMs;
|
|
95
|
+
private readonly maxEntries;
|
|
96
|
+
private readonly stats;
|
|
97
|
+
private initialized;
|
|
98
|
+
constructor(inner: TtsAdapter, opts: CachedTtsAdapterOptions);
|
|
99
|
+
/**
|
|
100
|
+
* Synthesize text to speech, returning cached audio if available.
|
|
101
|
+
* On cache miss, delegates to the inner adapter and stores the result.
|
|
102
|
+
*/
|
|
103
|
+
synthesize(text: string, options?: TtsOptions): AsyncIterable<Buffer>;
|
|
104
|
+
/**
|
|
105
|
+
* Pre-generate and cache audio for a list of announcements.
|
|
106
|
+
* Skips entries that are already cached. Useful at startup to
|
|
107
|
+
* build an audio pool that survives restarts.
|
|
108
|
+
*
|
|
109
|
+
* @returns Number of entries that were newly synthesized
|
|
110
|
+
*/
|
|
111
|
+
warmup(entries: ReadonlyArray<WarmupEntry>): Promise<number>;
|
|
112
|
+
/**
|
|
113
|
+
* Check if a specific text is already cached.
|
|
114
|
+
*/
|
|
115
|
+
isCached(text: string, voiceId?: string, speed?: number): Promise<boolean>;
|
|
116
|
+
/**
|
|
117
|
+
* Clear all cached audio files.
|
|
118
|
+
*/
|
|
119
|
+
clearCache(): Promise<number>;
|
|
120
|
+
/**
|
|
121
|
+
* Get cache statistics.
|
|
122
|
+
*/
|
|
123
|
+
getStats(): Readonly<CacheStats>;
|
|
124
|
+
private buildCacheKey;
|
|
125
|
+
private ensureDir;
|
|
126
|
+
private readCache;
|
|
127
|
+
private writeCache;
|
|
128
|
+
private evictIfNeeded;
|
|
129
|
+
}
|
|
130
|
+
export {};
|
|
131
|
+
//# sourceMappingURL=cached-tts.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"cached-tts.d.ts","sourceRoot":"","sources":["../../src/tts/cached-tts.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAuCG;AAKH,OAAO,KAAK,EAAE,UAAU,EAAE,UAAU,EAAE,MAAM,eAAe,CAAC;AAI5D,MAAM,WAAW,uBAAuB;IACtC;;;OAGG;IACH,QAAQ,EAAE,MAAM,CAAC;IAEjB;;;OAGG;IACH,QAAQ,CAAC,EAAE,MAAM,CAAC;IAElB;;;OAGG;IACH,cAAc,CAAC,EAAE,MAAM,CAAC;IAExB;;OAEG;IACH,YAAY,CAAC,EAAE,MAAM,CAAC;IAEtB;;;OAGG;IACH,KAAK,CAAC,EAAE,MAAM,CAAC;IAEf;;;;OAIG;IACH,UAAU,CAAC,EAAE,MAAM,CAAC;CACrB;AAED,MAAM,WAAW,WAAW;IAC1B,6BAA6B;IAC7B,IAAI,EAAE,MAAM,CAAC;IACb,uCAAuC;IACvC,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,oCAAoC;IACpC,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB;AAED,UAAU,UAAU;IAClB,IAAI,EAAE,MAAM,CAAC;IACb,MAAM,EAAE,MAAM,CAAC;IACf,SAAS,EAAE,MAAM,CAAC;IAClB,YAAY,EAAE,MAAM,CAAC;CACtB;AASD,qBAAa,gBAAiB,YAAW,UAAU;IACjD,OAAO,CAAC,QAAQ,CAAC,KAAK,CAAa;IACnC,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAS;IAClC,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAS;IAClC,OAAO,CAAC,QAAQ,CAAC,cAAc,CAAS;IACxC,OAAO,CAAC,QAAQ,CAAC,YAAY,CAAS;IACtC,OAAO,CAAC,QAAQ,CAAC,KAAK,CAAS;IAC/B,OAAO,CAAC,QAAQ,CAAC,UAAU,CAAS;IACpC,OAAO,CAAC,QAAQ,CAAC,KAAK,CAAqE;IAC3F,OAAO,CAAC,WAAW,CAAS;gBAEhB,KAAK,EAAE,UAAU,EAAE,IAAI,EAAE,uBAAuB;IAU5D;;;OAGG;IACI,UAAU,CAAC,IAAI,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE,UAAU,GAAG,aAAa,CAAC,MAAM,CAAC;IAsC5E;;;;;;OAMG;IACG,MAAM,CAAC,OAAO,EAAE,aAAa,CAAC,WAAW,CAAC,GAAG,OAAO,CAAC,MAAM,CAAC;IA+BlE;;OAEG;IACG,QAAQ,CAAC,IAAI,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE,MAAM,EAAE,KAAK,CAAC,EAAE,MAAM,GAAG,OAAO,CAAC,OAAO,CAAC;IAQhF;;OAEG;IACG,UAAU,IAAI,OAAO,CAAC,MAAM,CAAC;IAanC;;OAEG;IACH,QAAQ,IAAI,QAAQ,CAAC,UAAU,CAAC;IAMhC,OAAO,CAAC,aAAa;YAKP,SAAS;YAMT,SAAS;YAkBT,UAAU;YAUV,aAAa;CAgC5B"}
|
|
@@ -0,0 +1,231 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Cached TTS Adapter
|
|
3
|
+
*
|
|
4
|
+
* Wraps any TtsAdapter with a disk-based audio cache keyed by
|
|
5
|
+
* (text + voiceId + provider + model). Cached audio persists across
|
|
6
|
+
* process restarts, eliminating redundant TTS API calls for repeated
|
|
7
|
+
* announcements and broadcasts.
|
|
8
|
+
*
|
|
9
|
+
* Features:
|
|
10
|
+
* - Disk-based PCM cache (survives restarts/updates)
|
|
11
|
+
* - SHA-256 cache key = hash(provider + voiceId + model + speed + text)
|
|
12
|
+
* - Automatic cache hit/miss logging with metrics
|
|
13
|
+
* - Warmup / preload API for pre-generating announcement audio pools
|
|
14
|
+
* - TTL-based expiration (optional)
|
|
15
|
+
* - Max cache size enforcement (LRU eviction)
|
|
16
|
+
*
|
|
17
|
+
* @example
|
|
18
|
+
* ```typescript
|
|
19
|
+
* import { ElevenLabsAdapter } from 'dvgateway-adapters/tts';
|
|
20
|
+
* import { CachedTtsAdapter } from 'dvgateway-adapters/tts';
|
|
21
|
+
*
|
|
22
|
+
* const tts = new CachedTtsAdapter(
|
|
23
|
+
* new ElevenLabsAdapter({ apiKey: '...' }),
|
|
24
|
+
* {
|
|
25
|
+
* provider: 'elevenlabs',
|
|
26
|
+
* cacheDir: './tts-cache',
|
|
27
|
+
* ttlMs: 7 * 24 * 60 * 60 * 1000, // 7 days
|
|
28
|
+
* },
|
|
29
|
+
* );
|
|
30
|
+
*
|
|
31
|
+
* // Pre-warm common announcements (done once, reused after restart)
|
|
32
|
+
* await tts.warmup([
|
|
33
|
+
* { text: '잠시만 기다려 주세요.', voiceId: 'abc123' },
|
|
34
|
+
* { text: '상담사에게 연결하겠습니다.' },
|
|
35
|
+
* ]);
|
|
36
|
+
*
|
|
37
|
+
* // synthesize() returns cached audio if available — no API call
|
|
38
|
+
* const audio = tts.synthesize('잠시만 기다려 주세요.');
|
|
39
|
+
* ```
|
|
40
|
+
*/
|
|
41
|
+
import { createHash } from 'node:crypto';
|
|
42
|
+
import { mkdir, readFile, writeFile, readdir, stat, unlink } from 'node:fs/promises';
|
|
43
|
+
import { join } from 'node:path';
|
|
44
|
+
// ─── Constants ───────────────────────────────────────────────────────────────
|
|
45
|
+
const DEFAULT_CACHE_DIR = './tts-cache';
|
|
46
|
+
const PCM_CHUNK_SIZE = 640; // 20ms at 16kHz, 16-bit = 640 bytes
|
|
47
|
+
// ─── Implementation ──────────────────────────────────────────────────────────
|
|
48
|
+
export class CachedTtsAdapter {
|
|
49
|
+
inner;
|
|
50
|
+
provider;
|
|
51
|
+
cacheDir;
|
|
52
|
+
defaultVoiceId;
|
|
53
|
+
defaultModel;
|
|
54
|
+
ttlMs;
|
|
55
|
+
maxEntries;
|
|
56
|
+
stats = { hits: 0, misses: 0, evictions: 0, totalEntries: 0 };
|
|
57
|
+
initialized = false;
|
|
58
|
+
constructor(inner, opts) {
|
|
59
|
+
this.inner = inner;
|
|
60
|
+
this.provider = opts.provider;
|
|
61
|
+
this.cacheDir = opts.cacheDir ?? DEFAULT_CACHE_DIR;
|
|
62
|
+
this.defaultVoiceId = opts.defaultVoiceId ?? 'default';
|
|
63
|
+
this.defaultModel = opts.defaultModel ?? 'default';
|
|
64
|
+
this.ttlMs = opts.ttlMs ?? 0;
|
|
65
|
+
this.maxEntries = opts.maxEntries ?? 0;
|
|
66
|
+
}
|
|
67
|
+
/**
|
|
68
|
+
* Synthesize text to speech, returning cached audio if available.
|
|
69
|
+
* On cache miss, delegates to the inner adapter and stores the result.
|
|
70
|
+
*/
|
|
71
|
+
async *synthesize(text, options) {
|
|
72
|
+
await this.ensureDir();
|
|
73
|
+
const voiceId = options?.voiceId ?? this.defaultVoiceId;
|
|
74
|
+
const speed = options?.speed ?? 1.0;
|
|
75
|
+
const key = this.buildCacheKey(text, voiceId, speed);
|
|
76
|
+
const filePath = join(this.cacheDir, `${key}.pcm`);
|
|
77
|
+
// Try cache hit
|
|
78
|
+
const cached = await this.readCache(filePath);
|
|
79
|
+
if (cached) {
|
|
80
|
+
this.stats.hits++;
|
|
81
|
+
// Yield in 20ms chunks to match real-time streaming cadence
|
|
82
|
+
let offset = 0;
|
|
83
|
+
while (offset < cached.length) {
|
|
84
|
+
const end = Math.min(offset + PCM_CHUNK_SIZE, cached.length);
|
|
85
|
+
yield cached.subarray(offset, end);
|
|
86
|
+
offset = end;
|
|
87
|
+
}
|
|
88
|
+
return;
|
|
89
|
+
}
|
|
90
|
+
// Cache miss — synthesize via inner adapter
|
|
91
|
+
this.stats.misses++;
|
|
92
|
+
const chunks = [];
|
|
93
|
+
for await (const chunk of this.inner.synthesize(text, options)) {
|
|
94
|
+
chunks.push(chunk);
|
|
95
|
+
yield chunk;
|
|
96
|
+
}
|
|
97
|
+
// Store in cache asynchronously (don't block the caller)
|
|
98
|
+
const full = Buffer.concat(chunks);
|
|
99
|
+
this.writeCache(filePath, full).catch(() => {
|
|
100
|
+
// Swallow write errors — cache is best-effort
|
|
101
|
+
});
|
|
102
|
+
}
|
|
103
|
+
/**
|
|
104
|
+
* Pre-generate and cache audio for a list of announcements.
|
|
105
|
+
* Skips entries that are already cached. Useful at startup to
|
|
106
|
+
* build an audio pool that survives restarts.
|
|
107
|
+
*
|
|
108
|
+
* @returns Number of entries that were newly synthesized
|
|
109
|
+
*/
|
|
110
|
+
async warmup(entries) {
|
|
111
|
+
await this.ensureDir();
|
|
112
|
+
let synthesized = 0;
|
|
113
|
+
for (const entry of entries) {
|
|
114
|
+
const voiceId = entry.voiceId ?? this.defaultVoiceId;
|
|
115
|
+
const speed = entry.speed ?? 1.0;
|
|
116
|
+
const key = this.buildCacheKey(entry.text, voiceId, speed);
|
|
117
|
+
const filePath = join(this.cacheDir, `${key}.pcm`);
|
|
118
|
+
// Skip if already cached and not expired
|
|
119
|
+
const existing = await this.readCache(filePath);
|
|
120
|
+
if (existing) {
|
|
121
|
+
continue;
|
|
122
|
+
}
|
|
123
|
+
// Synthesize and store
|
|
124
|
+
const chunks = [];
|
|
125
|
+
const options = { voiceId, speed };
|
|
126
|
+
for await (const chunk of this.inner.synthesize(entry.text, options)) {
|
|
127
|
+
chunks.push(chunk);
|
|
128
|
+
}
|
|
129
|
+
const full = Buffer.concat(chunks);
|
|
130
|
+
await this.writeCache(filePath, full);
|
|
131
|
+
synthesized++;
|
|
132
|
+
}
|
|
133
|
+
return synthesized;
|
|
134
|
+
}
|
|
135
|
+
/**
|
|
136
|
+
* Check if a specific text is already cached.
|
|
137
|
+
*/
|
|
138
|
+
async isCached(text, voiceId, speed) {
|
|
139
|
+
await this.ensureDir();
|
|
140
|
+
const key = this.buildCacheKey(text, voiceId ?? this.defaultVoiceId, speed ?? 1.0);
|
|
141
|
+
const filePath = join(this.cacheDir, `${key}.pcm`);
|
|
142
|
+
const cached = await this.readCache(filePath);
|
|
143
|
+
return cached !== undefined;
|
|
144
|
+
}
|
|
145
|
+
/**
|
|
146
|
+
* Clear all cached audio files.
|
|
147
|
+
*/
|
|
148
|
+
async clearCache() {
|
|
149
|
+
await this.ensureDir();
|
|
150
|
+
const files = await readdir(this.cacheDir);
|
|
151
|
+
const pcmFiles = files.filter(f => f.endsWith('.pcm'));
|
|
152
|
+
let removed = 0;
|
|
153
|
+
for (const f of pcmFiles) {
|
|
154
|
+
await unlink(join(this.cacheDir, f)).catch(() => undefined);
|
|
155
|
+
removed++;
|
|
156
|
+
}
|
|
157
|
+
this.stats.totalEntries = 0;
|
|
158
|
+
return removed;
|
|
159
|
+
}
|
|
160
|
+
/**
|
|
161
|
+
* Get cache statistics.
|
|
162
|
+
*/
|
|
163
|
+
getStats() {
|
|
164
|
+
return { ...this.stats };
|
|
165
|
+
}
|
|
166
|
+
// ─── Private ─────────────────────────────────────────────────────────────
|
|
167
|
+
buildCacheKey(text, voiceId, speed) {
|
|
168
|
+
const raw = `${this.provider}|${this.defaultModel}|${voiceId}|${speed}|${text}`;
|
|
169
|
+
return createHash('sha256').update(raw).digest('hex');
|
|
170
|
+
}
|
|
171
|
+
async ensureDir() {
|
|
172
|
+
if (this.initialized)
|
|
173
|
+
return;
|
|
174
|
+
await mkdir(this.cacheDir, { recursive: true });
|
|
175
|
+
this.initialized = true;
|
|
176
|
+
}
|
|
177
|
+
async readCache(filePath) {
|
|
178
|
+
try {
|
|
179
|
+
// Check TTL
|
|
180
|
+
if (this.ttlMs > 0) {
|
|
181
|
+
const info = await stat(filePath);
|
|
182
|
+
const age = Date.now() - info.mtimeMs;
|
|
183
|
+
if (age > this.ttlMs) {
|
|
184
|
+
await unlink(filePath).catch(() => undefined);
|
|
185
|
+
return undefined;
|
|
186
|
+
}
|
|
187
|
+
}
|
|
188
|
+
return await readFile(filePath);
|
|
189
|
+
}
|
|
190
|
+
catch {
|
|
191
|
+
return undefined;
|
|
192
|
+
}
|
|
193
|
+
}
|
|
194
|
+
async writeCache(filePath, data) {
|
|
195
|
+
// Enforce max entries via LRU eviction before writing
|
|
196
|
+
if (this.maxEntries > 0) {
|
|
197
|
+
await this.evictIfNeeded();
|
|
198
|
+
}
|
|
199
|
+
await writeFile(filePath, data);
|
|
200
|
+
this.stats.totalEntries++;
|
|
201
|
+
}
|
|
202
|
+
async evictIfNeeded() {
|
|
203
|
+
try {
|
|
204
|
+
const files = await readdir(this.cacheDir);
|
|
205
|
+
const pcmFiles = files.filter(f => f.endsWith('.pcm'));
|
|
206
|
+
if (pcmFiles.length < this.maxEntries)
|
|
207
|
+
return;
|
|
208
|
+
// Sort by modification time (oldest first) for LRU eviction
|
|
209
|
+
const withStats = await Promise.all(pcmFiles.map(async (f) => {
|
|
210
|
+
const fullPath = join(this.cacheDir, f);
|
|
211
|
+
const info = await stat(fullPath);
|
|
212
|
+
return { path: fullPath, mtimeMs: info.mtimeMs };
|
|
213
|
+
}));
|
|
214
|
+
withStats.sort((a, b) => a.mtimeMs - b.mtimeMs);
|
|
215
|
+
// Remove oldest entries until we're under the limit
|
|
216
|
+
const toRemove = withStats.length - this.maxEntries + 1;
|
|
217
|
+
for (let i = 0; i < toRemove; i++) {
|
|
218
|
+
const entry = withStats[i];
|
|
219
|
+
if (entry) {
|
|
220
|
+
await unlink(entry.path).catch(() => undefined);
|
|
221
|
+
this.stats.evictions++;
|
|
222
|
+
this.stats.totalEntries--;
|
|
223
|
+
}
|
|
224
|
+
}
|
|
225
|
+
}
|
|
226
|
+
catch {
|
|
227
|
+
// Best-effort eviction
|
|
228
|
+
}
|
|
229
|
+
}
|
|
230
|
+
}
|
|
231
|
+
//# sourceMappingURL=cached-tts.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"cached-tts.js","sourceRoot":"","sources":["../../src/tts/cached-tts.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAuCG;AAEH,OAAO,EAAE,UAAU,EAAE,MAAM,aAAa,CAAC;AACzC,OAAO,EAAE,KAAK,EAAE,QAAQ,EAAE,SAAS,EAAE,OAAO,EAAE,IAAI,EAAE,MAAM,EAAE,MAAM,kBAAkB,CAAC;AACrF,OAAO,EAAE,IAAI,EAAE,MAAM,WAAW,CAAC;AA2DjC,gFAAgF;AAEhF,MAAM,iBAAiB,GAAG,aAAa,CAAC;AACxC,MAAM,cAAc,GAAG,GAAG,CAAC,CAAC,oCAAoC;AAEhE,gFAAgF;AAEhF,MAAM,OAAO,gBAAgB;IACV,KAAK,CAAa;IAClB,QAAQ,CAAS;IACjB,QAAQ,CAAS;IACjB,cAAc,CAAS;IACvB,YAAY,CAAS;IACrB,KAAK,CAAS;IACd,UAAU,CAAS;IACnB,KAAK,GAAe,EAAE,IAAI,EAAE,CAAC,EAAE,MAAM,EAAE,CAAC,EAAE,SAAS,EAAE,CAAC,EAAE,YAAY,EAAE,CAAC,EAAE,CAAC;IACnF,WAAW,GAAG,KAAK,CAAC;IAE5B,YAAY,KAAiB,EAAE,IAA6B;QAC1D,IAAI,CAAC,KAAK,GAAG,KAAK,CAAC;QACnB,IAAI,CAAC,QAAQ,GAAG,IAAI,CAAC,QAAQ,CAAC;QAC9B,IAAI,CAAC,QAAQ,GAAG,IAAI,CAAC,QAAQ,IAAI,iBAAiB,CAAC;QACnD,IAAI,CAAC,cAAc,GAAG,IAAI,CAAC,cAAc,IAAI,SAAS,CAAC;QACvD,IAAI,CAAC,YAAY,GAAG,IAAI,CAAC,YAAY,IAAI,SAAS,CAAC;QACnD,IAAI,CAAC,KAAK,GAAG,IAAI,CAAC,KAAK,IAAI,CAAC,CAAC;QAC7B,IAAI,CAAC,UAAU,GAAG,IAAI,CAAC,UAAU,IAAI,CAAC,CAAC;IACzC,CAAC;IAED;;;OAGG;IACH,KAAK,CAAC,CAAC,UAAU,CAAC,IAAY,EAAE,OAAoB;QAClD,MAAM,IAAI,CAAC,SAAS,EAAE,CAAC;QAEvB,MAAM,OAAO,GAAG,OAAO,EAAE,OAAO,IAAI,IAAI,CAAC,cAAc,CAAC;QACxD,MAAM,KAAK,GAAG,OAAO,EAAE,KAAK,IAAI,GAAG,CAAC;QACpC,MAAM,GAAG,GAAG,IAAI,CAAC,aAAa,CAAC,IAAI,EAAE,OAAO,EAAE,KAAK,CAAC,CAAC;QACrD,MAAM,QAAQ,GAAG,IAAI,CAAC,IAAI,CAAC,QAAQ,EAAE,GAAG,GAAG,MAAM,CAAC,CAAC;QAEnD,gBAAgB;QAChB,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,SAAS,CAAC,QAAQ,CAAC,CAAC;QAC9C,IAAI,MAAM,EAAE,CAAC;YACX,IAAI,CAAC,KAAK,CAAC,IAAI,EAAE,CAAC;YAClB,4DAA4D;YAC5D,IAAI,MAAM,GAAG,CAAC,CAAC;YACf,OAAO,MAAM,GAAG,MAAM,CAAC,MAAM,EAAE,CAAC;gBAC9B,MAAM,GAAG,GAAG,IAAI,CAAC,GAAG,CAAC,MAAM,GAAG,cAAc,EAAE,MAAM,CAAC,MAAM,CAAC,CAAC;gBAC7D,MAAM,MAAM,CAAC,QAAQ,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;gBACnC,MAAM,GAAG,GAAG,CAAC;YACf,CAAC;YACD,OAAO;QACT,CAAC;QAED,4CAA4C;QAC5C,IAAI,CAAC,KAAK,CAAC,MAAM,EAAE,CAAC;QACpB,MAAM,MAAM,GAAa,EAAE,CAAC;QAE5B,IAAI,KAAK,EAAE,MAAM,KAAK,IAAI,IAAI,CAAC,KAAK,CAAC,UAAU,CAAC,IAAI,EAAE,OAAO,CAAC,EAAE,CAAC;YAC/D,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;YACnB,MAAM,KAAK,CAAC;QACd,CAAC;QAED,yDAAyD;QACzD,MAAM,IAAI,GAAG,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC;QACnC,IAAI,CAAC,UAAU,CAAC,QAAQ,EAAE,IAAI,CAAC,CAAC,KAAK,CAAC,GAAG,EAAE;YACzC,8CAA8C;QAChD,CAAC,CAAC,CAAC;IACL,CAAC;IAED;;;;;;OAMG;IACH,KAAK,CAAC,MAAM,CAAC,OAAmC;QAC9C,MAAM,IAAI,CAAC,SAAS,EAAE,CAAC;QAEvB,IAAI,WAAW,GAAG,CAAC,CAAC;QAEpB,KAAK,MAAM,KAAK,IAAI,OAAO,EAAE,CAAC;YAC5B,MAAM,OAAO,GAAG,KAAK,CAAC,OAAO,IAAI,IAAI,CAAC,cAAc,CAAC;YACrD,MAAM,KAAK,GAAG,KAAK,CAAC,KAAK,IAAI,GAAG,CAAC;YACjC,MAAM,GAAG,GAAG,IAAI,CAAC,aAAa,CAAC,KAAK,CAAC,IAAI,EAAE,OAAO,EAAE,KAAK,CAAC,CAAC;YAC3D,MAAM,QAAQ,GAAG,IAAI,CAAC,IAAI,CAAC,QAAQ,EAAE,GAAG,GAAG,MAAM,CAAC,CAAC;YAEnD,yCAAyC;YACzC,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,SAAS,CAAC,QAAQ,CAAC,CAAC;YAChD,IAAI,QAAQ,EAAE,CAAC;gBACb,SAAS;YACX,CAAC;YAED,uBAAuB;YACvB,MAAM,MAAM,GAAa,EAAE,CAAC;YAC5B,MAAM,OAAO,GAAe,EAAE,OAAO,EAAE,KAAK,EAAE,CAAC;YAC/C,IAAI,KAAK,EAAE,MAAM,KAAK,IAAI,IAAI,CAAC,KAAK,CAAC,UAAU,CAAC,KAAK,CAAC,IAAI,EAAE,OAAO,CAAC,EAAE,CAAC;gBACrE,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;YACrB,CAAC;YACD,MAAM,IAAI,GAAG,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC;YACnC,MAAM,IAAI,CAAC,UAAU,CAAC,QAAQ,EAAE,IAAI,CAAC,CAAC;YACtC,WAAW,EAAE,CAAC;QAChB,CAAC;QAED,OAAO,WAAW,CAAC;IACrB,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,QAAQ,CAAC,IAAY,EAAE,OAAgB,EAAE,KAAc;QAC3D,MAAM,IAAI,CAAC,SAAS,EAAE,CAAC;QACvB,MAAM,GAAG,GAAG,IAAI,CAAC,aAAa,CAAC,IAAI,EAAE,OAAO,IAAI,IAAI,CAAC,cAAc,EAAE,KAAK,IAAI,GAAG,CAAC,CAAC;QACnF,MAAM,QAAQ,GAAG,IAAI,CAAC,IAAI,CAAC,QAAQ,EAAE,GAAG,GAAG,MAAM,CAAC,CAAC;QACnD,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,SAAS,CAAC,QAAQ,CAAC,CAAC;QAC9C,OAAO,MAAM,KAAK,SAAS,CAAC;IAC9B,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,UAAU;QACd,MAAM,IAAI,CAAC,SAAS,EAAE,CAAC;QACvB,MAAM,KAAK,GAAG,MAAM,OAAO,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;QAC3C,MAAM,QAAQ,GAAG,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC;QACvD,IAAI,OAAO,GAAG,CAAC,CAAC;QAChB,KAAK,MAAM,CAAC,IAAI,QAAQ,EAAE,CAAC;YACzB,MAAM,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,QAAQ,EAAE,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,GAAG,EAAE,CAAC,SAAS,CAAC,CAAC;YAC5D,OAAO,EAAE,CAAC;QACZ,CAAC;QACD,IAAI,CAAC,KAAK,CAAC,YAAY,GAAG,CAAC,CAAC;QAC5B,OAAO,OAAO,CAAC;IACjB,CAAC;IAED;;OAEG;IACH,QAAQ;QACN,OAAO,EAAE,GAAG,IAAI,CAAC,KAAK,EAAE,CAAC;IAC3B,CAAC;IAED,4EAA4E;IAEpE,aAAa,CAAC,IAAY,EAAE,OAAe,EAAE,KAAa;QAChE,MAAM,GAAG,GAAG,GAAG,IAAI,CAAC,QAAQ,IAAI,IAAI,CAAC,YAAY,IAAI,OAAO,IAAI,KAAK,IAAI,IAAI,EAAE,CAAC;QAChF,OAAO,UAAU,CAAC,QAAQ,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;IACxD,CAAC;IAEO,KAAK,CAAC,SAAS;QACrB,IAAI,IAAI,CAAC,WAAW;YAAE,OAAO;QAC7B,MAAM,KAAK,CAAC,IAAI,CAAC,QAAQ,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;QAChD,IAAI,CAAC,WAAW,GAAG,IAAI,CAAC;IAC1B,CAAC;IAEO,KAAK,CAAC,SAAS,CAAC,QAAgB;QACtC,IAAI,CAAC;YACH,YAAY;YACZ,IAAI,IAAI,CAAC,KAAK,GAAG,CAAC,EAAE,CAAC;gBACnB,MAAM,IAAI,GAAG,MAAM,IAAI,CAAC,QAAQ,CAAC,CAAC;gBAClC,MAAM,GAAG,GAAG,IAAI,CAAC,GAAG,EAAE,GAAG,IAAI,CAAC,OAAO,CAAC;gBACtC,IAAI,GAAG,GAAG,IAAI,CAAC,KAAK,EAAE,CAAC;oBACrB,MAAM,MAAM,CAAC,QAAQ,CAAC,CAAC,KAAK,CAAC,GAAG,EAAE,CAAC,SAAS,CAAC,CAAC;oBAC9C,OAAO,SAAS,CAAC;gBACnB,CAAC;YACH,CAAC;YAED,OAAO,MAAM,QAAQ,CAAC,QAAQ,CAAC,CAAC;QAClC,CAAC;QAAC,MAAM,CAAC;YACP,OAAO,SAAS,CAAC;QACnB,CAAC;IACH,CAAC;IAEO,KAAK,CAAC,UAAU,CAAC,QAAgB,EAAE,IAAY;QACrD,sDAAsD;QACtD,IAAI,IAAI,CAAC,UAAU,GAAG,CAAC,EAAE,CAAC;YACxB,MAAM,IAAI,CAAC,aAAa,EAAE,CAAC;QAC7B,CAAC;QAED,MAAM,SAAS,CAAC,QAAQ,EAAE,IAAI,CAAC,CAAC;QAChC,IAAI,CAAC,KAAK,CAAC,YAAY,EAAE,CAAC;IAC5B,CAAC;IAEO,KAAK,CAAC,aAAa;QACzB,IAAI,CAAC;YACH,MAAM,KAAK,GAAG,MAAM,OAAO,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;YAC3C,MAAM,QAAQ,GAAG,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC;YAEvD,IAAI,QAAQ,CAAC,MAAM,GAAG,IAAI,CAAC,UAAU;gBAAE,OAAO;YAE9C,4DAA4D;YAC5D,MAAM,SAAS,GAAG,MAAM,OAAO,CAAC,GAAG,CACjC,QAAQ,CAAC,GAAG,CAAC,KAAK,EAAC,CAAC,EAAC,EAAE;gBACrB,MAAM,QAAQ,GAAG,IAAI,CAAC,IAAI,CAAC,QAAQ,EAAE,CAAC,CAAC,CAAC;gBACxC,MAAM,IAAI,GAAG,MAAM,IAAI,CAAC,QAAQ,CAAC,CAAC;gBAClC,OAAO,EAAE,IAAI,EAAE,QAAQ,EAAE,OAAO,EAAE,IAAI,CAAC,OAAO,EAAE,CAAC;YACnD,CAAC,CAAC,CACH,CAAC;YAEF,SAAS,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC;YAEhD,oDAAoD;YACpD,MAAM,QAAQ,GAAG,SAAS,CAAC,MAAM,GAAG,IAAI,CAAC,UAAU,GAAG,CAAC,CAAC;YACxD,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,QAAQ,EAAE,CAAC,EAAE,EAAE,CAAC;gBAClC,MAAM,KAAK,GAAG,SAAS,CAAC,CAAC,CAAC,CAAC;gBAC3B,IAAI,KAAK,EAAE,CAAC;oBACV,MAAM,MAAM,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,KAAK,CAAC,GAAG,EAAE,CAAC,SAAS,CAAC,CAAC;oBAChD,IAAI,CAAC,KAAK,CAAC,SAAS,EAAE,CAAC;oBACvB,IAAI,CAAC,KAAK,CAAC,YAAY,EAAE,CAAC;gBAC5B,CAAC;YACH,CAAC;QACH,CAAC;QAAC,MAAM,CAAC;YACP,uBAAuB;QACzB,CAAC;IACH,CAAC;CACF"}
|
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* ElevenLabs TTS Adapter
|
|
3
|
+
*
|
|
4
|
+
* Synthesizes text to speech using ElevenLabs' streaming API.
|
|
5
|
+
* Returns 16kHz slin16 PCM chunks for direct injection into DVGateway.
|
|
6
|
+
*
|
|
7
|
+
* Features:
|
|
8
|
+
* - Streaming output (first audio chunk ≤75ms latency with Flash models)
|
|
9
|
+
* - Automatic 24kHz → 16kHz resampling (ElevenLabs outputs 24kHz PCM)
|
|
10
|
+
* - Turbo / Flash / Multilingual model support
|
|
11
|
+
* - Korean + multilingual support
|
|
12
|
+
* - Voice settings: stability, similarity boost, style, speed
|
|
13
|
+
*
|
|
14
|
+
* ElevenLabs Model Reference (2026-03):
|
|
15
|
+
* eleven_flash_v2_5 — Fastest (~75ms TTFA), best for real-time voice (default)
|
|
16
|
+
* eleven_turbo_v2_5 — Balanced quality/speed (~200ms TTFA)
|
|
17
|
+
* eleven_multilingual_v2 — Highest multilingual quality (higher latency)
|
|
18
|
+
* eleven_multilingual_v3 — Next-gen multilingual, improved prosody (2026)
|
|
19
|
+
* eleven_english_sts_v2 — English speech-to-speech transformation
|
|
20
|
+
*
|
|
21
|
+
* Voice IDs (popular Korean/multilingual voices):
|
|
22
|
+
* 21m00Tcm4TlvDq8ikWAM — Rachel (English, warm)
|
|
23
|
+
* 29vD33N1CtxCmqQRPOHJ — Drew (English, conversational)
|
|
24
|
+
* pNInz6obpgDQGcFmaJgB — Adam (English, deep)
|
|
25
|
+
* Use ElevenLabs voice library or clone a custom voice for Korean.
|
|
26
|
+
*
|
|
27
|
+
* API Endpoint: POST https://api.elevenlabs.io/v1/text-to-speech/{voiceId}/stream
|
|
28
|
+
* Docs: https://elevenlabs.io/docs/api-reference/text-to-speech
|
|
29
|
+
*/
|
|
30
|
+
import type { TtsAdapter, TtsOptions, HumanVoiceOptions, VoiceInfo } from 'dvgateway-sdk';
|
|
31
|
+
export interface ElevenLabsAdapterOptions {
|
|
32
|
+
apiKey: string;
|
|
33
|
+
/** Voice ID (default: "Rachel" voice — specify a Korean-capable voice for KO) */
|
|
34
|
+
voiceId?: string;
|
|
35
|
+
/**
|
|
36
|
+
* Model ID (default: "eleven_multilingual_v2" when humanVoice enabled, "eleven_flash_v2_5" otherwise)
|
|
37
|
+
* Options:
|
|
38
|
+
* eleven_flash_v2_5 — Fastest (~75ms), best for real-time voice
|
|
39
|
+
* eleven_turbo_v2_5 — Balanced quality/speed
|
|
40
|
+
* eleven_multilingual_v2 — Best multilingual quality (higher latency)
|
|
41
|
+
* eleven_multilingual_v3 — Next-gen multilingual (2026, improved prosody)
|
|
42
|
+
*/
|
|
43
|
+
model?: string;
|
|
44
|
+
/** Stability (0.0–1.0, default: 0.3 with humanVoice, 0.5 otherwise) — higher = more consistent voice */
|
|
45
|
+
stability?: number;
|
|
46
|
+
/** Similarity boost (0.0–1.0, default: 0.75) — higher = closer to original voice */
|
|
47
|
+
similarityBoost?: number;
|
|
48
|
+
/** Style exaggeration (0.0–1.0, default: 0.6 with humanVoice, 0.0 otherwise) — adds expressiveness, increases latency */
|
|
49
|
+
style?: number;
|
|
50
|
+
/** Boost speaker clarity and target speaker similarity (default: true) */
|
|
51
|
+
useSpeakerBoost?: boolean;
|
|
52
|
+
/**
|
|
53
|
+
* Output format (default: "pcm_24000")
|
|
54
|
+
* Options: pcm_16000, pcm_22050, pcm_24000, pcm_44100, mp3_44100_128, etc.
|
|
55
|
+
* Use pcm_24000 for best quality streaming; we resample to 16kHz for DVGateway.
|
|
56
|
+
*/
|
|
57
|
+
outputFormat?: string;
|
|
58
|
+
/**
|
|
59
|
+
* Optimize streaming latency (0–4, default: 4 — maximum optimization)
|
|
60
|
+
* 0 = off (best quality), 4 = maximum latency optimization
|
|
61
|
+
* When humanVoice is enabled, defaults to 3 for better prosody.
|
|
62
|
+
*/
|
|
63
|
+
optimizeStreamingLatency?: number;
|
|
64
|
+
/**
|
|
65
|
+
* Human-like voice optimization (default: Korean-optimized preset).
|
|
66
|
+
* When enabled, adjusts stability, style, and model for natural speech.
|
|
67
|
+
* Set to false to disable, or provide custom HumanVoiceOptions.
|
|
68
|
+
*/
|
|
69
|
+
humanVoice?: HumanVoiceOptions | false;
|
|
70
|
+
}
|
|
71
|
+
/** Built-in Korean native voices from ElevenLabs Voice Library */
|
|
72
|
+
export declare const ELEVENLABS_KOREAN_VOICES: ReadonlyArray<VoiceInfo>;
|
|
73
|
+
export declare class ElevenLabsAdapter implements TtsAdapter {
|
|
74
|
+
private readonly opts;
|
|
75
|
+
constructor(opts: ElevenLabsAdapterOptions);
|
|
76
|
+
synthesize(text: string, opts?: TtsOptions): AsyncIterable<Buffer>;
|
|
77
|
+
/**
|
|
78
|
+
* Fetch all voices available to the user from ElevenLabs API.
|
|
79
|
+
* Returns default voices, cloned voices, and shared library voices.
|
|
80
|
+
* Cloned voices are labeled with (클론), generated with (생성됨).
|
|
81
|
+
*/
|
|
82
|
+
static fetchVoices(apiKey: string): Promise<VoiceInfo[]>;
|
|
83
|
+
/**
|
|
84
|
+
* Clone a voice from an audio file using ElevenLabs API.
|
|
85
|
+
*
|
|
86
|
+
* @param apiKey - ElevenLabs API key
|
|
87
|
+
* @param name - Name for the cloned voice
|
|
88
|
+
* @param audioData - Raw audio file (Buffer/Uint8Array)
|
|
89
|
+
* @param fileName - Original file name for MIME type detection
|
|
90
|
+
* @param description - Optional voice description
|
|
91
|
+
* @returns VoiceInfo with the new voice's id and name
|
|
92
|
+
*/
|
|
93
|
+
static cloneVoice(apiKey: string, name: string, audioData: Uint8Array, fileName?: string, description?: string): Promise<VoiceInfo>;
|
|
94
|
+
}
|
|
95
|
+
//# sourceMappingURL=elevenlabs.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"elevenlabs.d.ts","sourceRoot":"","sources":["../../src/tts/elevenlabs.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA4BG;AAEH,OAAO,KAAK,EAAE,UAAU,EAAE,UAAU,EAAE,iBAAiB,EAAE,SAAS,EAAE,MAAM,eAAe,CAAC;AAG1F,MAAM,WAAW,wBAAwB;IACvC,MAAM,EAAE,MAAM,CAAC;IACf,iFAAiF;IACjF,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB;;;;;;;OAOG;IACH,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,wGAAwG;IACxG,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,oFAAoF;IACpF,eAAe,CAAC,EAAE,MAAM,CAAC;IACzB,yHAAyH;IACzH,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,0EAA0E;IAC1E,eAAe,CAAC,EAAE,OAAO,CAAC;IAC1B;;;;OAIG;IACH,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB;;;;OAIG;IACH,wBAAwB,CAAC,EAAE,MAAM,CAAC;IAClC;;;;OAIG;IACH,UAAU,CAAC,EAAE,iBAAiB,GAAG,KAAK,CAAC;CACxC;AAKD,kEAAkE;AAClE,eAAO,MAAM,wBAAwB,EAAE,aAAa,CAAC,SAAS,CAUpD,CAAC;AAEX,qBAAa,iBAAkB,YAAW,UAAU;IAClD,OAAO,CAAC,QAAQ,CAAC,IAAI,CAA+G;gBAExH,IAAI,EAAE,wBAAwB;IA4BnC,UAAU,CAAC,IAAI,EAAE,MAAM,EAAE,IAAI,CAAC,EAAE,UAAU,GAAG,aAAa,CAAC,MAAM,CAAC;IAwEzE;;;;OAIG;WACU,WAAW,CAAC,MAAM,EAAE,MAAM,GAAG,OAAO,CAAC,SAAS,EAAE,CAAC;IA6B9D;;;;;;;;;OASG;WACU,UAAU,CACrB,MAAM,EAAE,MAAM,EACd,IAAI,EAAE,MAAM,EACZ,SAAS,EAAE,UAAU,EACrB,QAAQ,GAAE,MAAoB,EAC9B,WAAW,GAAE,MAAW,GACvB,OAAO,CAAC,SAAS,CAAC;CAoBtB"}
|
|
@@ -0,0 +1,195 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* ElevenLabs TTS Adapter
|
|
3
|
+
*
|
|
4
|
+
* Synthesizes text to speech using ElevenLabs' streaming API.
|
|
5
|
+
* Returns 16kHz slin16 PCM chunks for direct injection into DVGateway.
|
|
6
|
+
*
|
|
7
|
+
* Features:
|
|
8
|
+
* - Streaming output (first audio chunk ≤75ms latency with Flash models)
|
|
9
|
+
* - Automatic 24kHz → 16kHz resampling (ElevenLabs outputs 24kHz PCM)
|
|
10
|
+
* - Turbo / Flash / Multilingual model support
|
|
11
|
+
* - Korean + multilingual support
|
|
12
|
+
* - Voice settings: stability, similarity boost, style, speed
|
|
13
|
+
*
|
|
14
|
+
* ElevenLabs Model Reference (2026-03):
|
|
15
|
+
* eleven_flash_v2_5 — Fastest (~75ms TTFA), best for real-time voice (default)
|
|
16
|
+
* eleven_turbo_v2_5 — Balanced quality/speed (~200ms TTFA)
|
|
17
|
+
* eleven_multilingual_v2 — Highest multilingual quality (higher latency)
|
|
18
|
+
* eleven_multilingual_v3 — Next-gen multilingual, improved prosody (2026)
|
|
19
|
+
* eleven_english_sts_v2 — English speech-to-speech transformation
|
|
20
|
+
*
|
|
21
|
+
* Voice IDs (popular Korean/multilingual voices):
|
|
22
|
+
* 21m00Tcm4TlvDq8ikWAM — Rachel (English, warm)
|
|
23
|
+
* 29vD33N1CtxCmqQRPOHJ — Drew (English, conversational)
|
|
24
|
+
* pNInz6obpgDQGcFmaJgB — Adam (English, deep)
|
|
25
|
+
* Use ElevenLabs voice library or clone a custom voice for Korean.
|
|
26
|
+
*
|
|
27
|
+
* API Endpoint: POST https://api.elevenlabs.io/v1/text-to-speech/{voiceId}/stream
|
|
28
|
+
* Docs: https://elevenlabs.io/docs/api-reference/text-to-speech
|
|
29
|
+
*/
|
|
30
|
+
import { resample, float32ToSlin16, HUMAN_VOICE_DEFAULTS_KO } from 'dvgateway-sdk';
|
|
31
|
+
const ELEVENLABS_SAMPLE_RATE = 24000;
|
|
32
|
+
const DV_SAMPLE_RATE = 16000;
|
|
33
|
+
/** Built-in Korean native voices from ElevenLabs Voice Library */
|
|
34
|
+
export const ELEVENLABS_KOREAN_VOICES = [
|
|
35
|
+
{ id: 'pjJMvFj0JGWi3mogOkHH', label: 'Hyun Bin (남성, 한국어)' },
|
|
36
|
+
{ id: 't0jbNlBVZ17f02VDIeMI', label: '지영 / JiYoung (여성, 한국어)' },
|
|
37
|
+
{ id: 'zrHiDhphv9ZnVXBqCLjz', label: 'Jennie (여성, 한국어)' },
|
|
38
|
+
{ id: 'ZJCNdOEhQGMOIbMuhBME', label: 'Han Aim (남성, 한국어)' },
|
|
39
|
+
{ id: 'ova4yY2jqnnUdGOmTGbx', label: 'KKC HQ (남성, 한국어)' },
|
|
40
|
+
{ id: 'Xb7hH8MSUJpSbSDYk0k2', label: 'Anna Kim (여성, 한국어)' },
|
|
41
|
+
{ id: 'XrExE9yKIg1WjnnlVkGX', label: 'Yuna (여성, 한국어)' },
|
|
42
|
+
{ id: 'ThT5KcBeYPX3keUQqHPh', label: 'Jina (여성, 한국어)' },
|
|
43
|
+
{ id: 'Sita5M0jWFxPiECPABjR', label: 'jjeong (여성, 한국어)' },
|
|
44
|
+
];
|
|
45
|
+
export class ElevenLabsAdapter {
|
|
46
|
+
opts;
|
|
47
|
+
constructor(opts) {
|
|
48
|
+
// Resolve human voice options: default is Korean-optimized preset
|
|
49
|
+
const hv = opts.humanVoice === false
|
|
50
|
+
? false
|
|
51
|
+
: { ...HUMAN_VOICE_DEFAULTS_KO, ...(opts.humanVoice ?? {}) };
|
|
52
|
+
// When humanVoice is enabled, adjust defaults for natural speech:
|
|
53
|
+
// - model: eleven_multilingual_v2 (best Korean prosody)
|
|
54
|
+
// - stability: lower (0.3) for more natural variation
|
|
55
|
+
// - style: higher (emotionalRange) for expressiveness
|
|
56
|
+
// - optimizeStreamingLatency: 3 (balance quality & speed)
|
|
57
|
+
const hvEnabled = hv !== false;
|
|
58
|
+
this.opts = {
|
|
59
|
+
voiceId: opts.voiceId ?? '21m00Tcm4TlvDq8ikWAM', // Rachel
|
|
60
|
+
model: opts.model ?? (hvEnabled ? 'eleven_multilingual_v2' : 'eleven_flash_v2_5'),
|
|
61
|
+
stability: opts.stability ?? (hvEnabled ? (1.0 - hv.speechVariation) : 0.5),
|
|
62
|
+
similarityBoost: opts.similarityBoost ?? 0.75,
|
|
63
|
+
style: opts.style ?? (hvEnabled ? hv.emotionalRange : 0.0),
|
|
64
|
+
useSpeakerBoost: opts.useSpeakerBoost ?? true,
|
|
65
|
+
outputFormat: opts.outputFormat ?? 'pcm_24000',
|
|
66
|
+
optimizeStreamingLatency: opts.optimizeStreamingLatency ?? (hvEnabled ? 3 : 4),
|
|
67
|
+
apiKey: opts.apiKey,
|
|
68
|
+
humanVoice: hv,
|
|
69
|
+
};
|
|
70
|
+
}
|
|
71
|
+
async *synthesize(text, opts) {
|
|
72
|
+
const voiceId = opts?.voiceId ?? this.opts.voiceId;
|
|
73
|
+
const url = `https://api.elevenlabs.io/v1/text-to-speech/${voiceId}/stream`
|
|
74
|
+
+ `?output_format=${this.opts.outputFormat}`
|
|
75
|
+
+ `&optimize_streaming_latency=${this.opts.optimizeStreamingLatency}`;
|
|
76
|
+
const response = await fetch(url, {
|
|
77
|
+
method: 'POST',
|
|
78
|
+
headers: {
|
|
79
|
+
'xi-api-key': this.opts.apiKey,
|
|
80
|
+
'Content-Type': 'application/json',
|
|
81
|
+
Accept: 'audio/pcm',
|
|
82
|
+
},
|
|
83
|
+
body: JSON.stringify({
|
|
84
|
+
text,
|
|
85
|
+
model_id: this.opts.model,
|
|
86
|
+
voice_settings: {
|
|
87
|
+
stability: this.opts.stability,
|
|
88
|
+
similarity_boost: this.opts.similarityBoost,
|
|
89
|
+
style: this.opts.style,
|
|
90
|
+
use_speaker_boost: this.opts.useSpeakerBoost,
|
|
91
|
+
speed: opts?.speed ?? 1.0,
|
|
92
|
+
},
|
|
93
|
+
}),
|
|
94
|
+
});
|
|
95
|
+
if (!response.ok) {
|
|
96
|
+
const errBody = await response.text().catch(() => '');
|
|
97
|
+
throw new Error(`ElevenLabs TTS failed (HTTP ${response.status}): ${errBody}`);
|
|
98
|
+
}
|
|
99
|
+
if (!response.body) {
|
|
100
|
+
throw new Error('ElevenLabs TTS: empty response body');
|
|
101
|
+
}
|
|
102
|
+
// Stream PCM audio chunks through resampler
|
|
103
|
+
const reader = response.body.getReader();
|
|
104
|
+
let remainder = Buffer.alloc(0);
|
|
105
|
+
// PCM 24kHz frames: process in 960-byte chunks (20ms at 24kHz, 16-bit = 960 bytes)
|
|
106
|
+
const CHUNK_BYTES = 960; // 480 samples × 2 bytes
|
|
107
|
+
while (true) {
|
|
108
|
+
const { done, value } = await reader.read();
|
|
109
|
+
if (done) {
|
|
110
|
+
// Flush any remaining bytes
|
|
111
|
+
if (remainder.length > 0) {
|
|
112
|
+
const samples24k = pcmBytesToFloat32(remainder);
|
|
113
|
+
const samples16k = resample(samples24k, ELEVENLABS_SAMPLE_RATE, DV_SAMPLE_RATE);
|
|
114
|
+
yield float32ToSlin16(samples16k);
|
|
115
|
+
}
|
|
116
|
+
break;
|
|
117
|
+
}
|
|
118
|
+
// Accumulate into buffer
|
|
119
|
+
remainder = Buffer.concat([remainder, Buffer.from(value)]);
|
|
120
|
+
// Emit complete 20ms frames
|
|
121
|
+
while (remainder.length >= CHUNK_BYTES) {
|
|
122
|
+
const frame = remainder.subarray(0, CHUNK_BYTES);
|
|
123
|
+
remainder = remainder.subarray(CHUNK_BYTES);
|
|
124
|
+
const samples24k = pcmBytesToFloat32(frame);
|
|
125
|
+
const samples16k = resample(samples24k, ELEVENLABS_SAMPLE_RATE, DV_SAMPLE_RATE);
|
|
126
|
+
yield float32ToSlin16(samples16k);
|
|
127
|
+
}
|
|
128
|
+
}
|
|
129
|
+
}
|
|
130
|
+
// ── Voice Management APIs ──────────────────────────────────────────────
|
|
131
|
+
/**
|
|
132
|
+
* Fetch all voices available to the user from ElevenLabs API.
|
|
133
|
+
* Returns default voices, cloned voices, and shared library voices.
|
|
134
|
+
* Cloned voices are labeled with (클론), generated with (생성됨).
|
|
135
|
+
*/
|
|
136
|
+
static async fetchVoices(apiKey) {
|
|
137
|
+
const response = await fetch('https://api.elevenlabs.io/v1/voices', {
|
|
138
|
+
headers: { 'xi-api-key': apiKey },
|
|
139
|
+
});
|
|
140
|
+
if (!response.ok) {
|
|
141
|
+
const errBody = await response.text().catch(() => '');
|
|
142
|
+
throw new Error(`ElevenLabs fetch voices failed (HTTP ${response.status}): ${errBody}`);
|
|
143
|
+
}
|
|
144
|
+
const data = await response.json();
|
|
145
|
+
return data.voices.map((v) => {
|
|
146
|
+
let tag = '';
|
|
147
|
+
if (v.category === 'cloned')
|
|
148
|
+
tag = ' (클론)';
|
|
149
|
+
else if (v.category === 'generated')
|
|
150
|
+
tag = ' (생성됨)';
|
|
151
|
+
else if (v.category === 'professional')
|
|
152
|
+
tag = ' (프로)';
|
|
153
|
+
const lang = v.labels?.language ? ` [${v.labels.language}]` : '';
|
|
154
|
+
return { id: v.voice_id, label: `${v.name}${tag}${lang}` };
|
|
155
|
+
});
|
|
156
|
+
}
|
|
157
|
+
/**
|
|
158
|
+
* Clone a voice from an audio file using ElevenLabs API.
|
|
159
|
+
*
|
|
160
|
+
* @param apiKey - ElevenLabs API key
|
|
161
|
+
* @param name - Name for the cloned voice
|
|
162
|
+
* @param audioData - Raw audio file (Buffer/Uint8Array)
|
|
163
|
+
* @param fileName - Original file name for MIME type detection
|
|
164
|
+
* @param description - Optional voice description
|
|
165
|
+
* @returns VoiceInfo with the new voice's id and name
|
|
166
|
+
*/
|
|
167
|
+
static async cloneVoice(apiKey, name, audioData, fileName = 'voice.wav', description = '') {
|
|
168
|
+
const formData = new FormData();
|
|
169
|
+
formData.append('name', name);
|
|
170
|
+
if (description)
|
|
171
|
+
formData.append('description', description);
|
|
172
|
+
formData.append('files', new Blob([audioData]), fileName);
|
|
173
|
+
const response = await fetch('https://api.elevenlabs.io/v1/voices/add', {
|
|
174
|
+
method: 'POST',
|
|
175
|
+
headers: { 'xi-api-key': apiKey },
|
|
176
|
+
body: formData,
|
|
177
|
+
});
|
|
178
|
+
if (!response.ok) {
|
|
179
|
+
const errBody = await response.text().catch(() => '');
|
|
180
|
+
throw new Error(`ElevenLabs voice clone failed (HTTP ${response.status}): ${errBody}`);
|
|
181
|
+
}
|
|
182
|
+
const data = await response.json();
|
|
183
|
+
return { id: data.voice_id, label: data.name };
|
|
184
|
+
}
|
|
185
|
+
}
|
|
186
|
+
/** Convert little-endian 16-bit PCM bytes to Float32Array */
|
|
187
|
+
function pcmBytesToFloat32(buf) {
|
|
188
|
+
const samples = buf.byteLength >> 1;
|
|
189
|
+
const out = new Float32Array(samples);
|
|
190
|
+
for (let i = 0; i < samples; i++) {
|
|
191
|
+
out[i] = buf.readInt16LE(i * 2) / 32768.0;
|
|
192
|
+
}
|
|
193
|
+
return out;
|
|
194
|
+
}
|
|
195
|
+
//# sourceMappingURL=elevenlabs.js.map
|