dvgateway-adapters 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. package/README.md +45 -0
  2. package/dist/index.d.ts +41 -0
  3. package/dist/index.d.ts.map +1 -0
  4. package/dist/index.js +37 -0
  5. package/dist/index.js.map +1 -0
  6. package/dist/llm/anthropic.d.ts +62 -0
  7. package/dist/llm/anthropic.d.ts.map +1 -0
  8. package/dist/llm/anthropic.js +83 -0
  9. package/dist/llm/anthropic.js.map +1 -0
  10. package/dist/llm/index.d.ts +5 -0
  11. package/dist/llm/index.d.ts.map +1 -0
  12. package/dist/llm/index.js +4 -0
  13. package/dist/llm/index.js.map +1 -0
  14. package/dist/llm/openai-llm.d.ts +55 -0
  15. package/dist/llm/openai-llm.d.ts.map +1 -0
  16. package/dist/llm/openai-llm.js +68 -0
  17. package/dist/llm/openai-llm.js.map +1 -0
  18. package/dist/realtime/index.d.ts +3 -0
  19. package/dist/realtime/index.d.ts.map +1 -0
  20. package/dist/realtime/index.js +3 -0
  21. package/dist/realtime/index.js.map +1 -0
  22. package/dist/realtime/openai-realtime.d.ts +132 -0
  23. package/dist/realtime/openai-realtime.d.ts.map +1 -0
  24. package/dist/realtime/openai-realtime.js +261 -0
  25. package/dist/realtime/openai-realtime.js.map +1 -0
  26. package/dist/stt/deepgram.d.ts +105 -0
  27. package/dist/stt/deepgram.d.ts.map +1 -0
  28. package/dist/stt/deepgram.js +180 -0
  29. package/dist/stt/deepgram.js.map +1 -0
  30. package/dist/stt/index.d.ts +3 -0
  31. package/dist/stt/index.d.ts.map +1 -0
  32. package/dist/stt/index.js +3 -0
  33. package/dist/stt/index.js.map +1 -0
  34. package/dist/tts/cached-tts.d.ts +131 -0
  35. package/dist/tts/cached-tts.d.ts.map +1 -0
  36. package/dist/tts/cached-tts.js +231 -0
  37. package/dist/tts/cached-tts.js.map +1 -0
  38. package/dist/tts/elevenlabs.d.ts +95 -0
  39. package/dist/tts/elevenlabs.d.ts.map +1 -0
  40. package/dist/tts/elevenlabs.js +195 -0
  41. package/dist/tts/elevenlabs.js.map +1 -0
  42. package/dist/tts/index.d.ts +7 -0
  43. package/dist/tts/index.d.ts.map +1 -0
  44. package/dist/tts/index.js +5 -0
  45. package/dist/tts/index.js.map +1 -0
  46. package/dist/tts/openai-tts.d.ts +64 -0
  47. package/dist/tts/openai-tts.d.ts.map +1 -0
  48. package/dist/tts/openai-tts.js +148 -0
  49. package/dist/tts/openai-tts.js.map +1 -0
  50. package/package.json +89 -0
@@ -0,0 +1,131 @@
1
+ /**
2
+ * Cached TTS Adapter
3
+ *
4
+ * Wraps any TtsAdapter with a disk-based audio cache keyed by
5
+ * (text + voiceId + provider + model). Cached audio persists across
6
+ * process restarts, eliminating redundant TTS API calls for repeated
7
+ * announcements and broadcasts.
8
+ *
9
+ * Features:
10
+ * - Disk-based PCM cache (survives restarts/updates)
11
+ * - SHA-256 cache key = hash(provider + voiceId + model + speed + text)
12
+ * - Automatic cache hit/miss logging with metrics
13
+ * - Warmup / preload API for pre-generating announcement audio pools
14
+ * - TTL-based expiration (optional)
15
+ * - Max cache size enforcement (LRU eviction)
16
+ *
17
+ * @example
18
+ * ```typescript
19
+ * import { ElevenLabsAdapter } from 'dvgateway-adapters/tts';
20
+ * import { CachedTtsAdapter } from 'dvgateway-adapters/tts';
21
+ *
22
+ * const tts = new CachedTtsAdapter(
23
+ * new ElevenLabsAdapter({ apiKey: '...' }),
24
+ * {
25
+ * provider: 'elevenlabs',
26
+ * cacheDir: './tts-cache',
27
+ * ttlMs: 7 * 24 * 60 * 60 * 1000, // 7 days
28
+ * },
29
+ * );
30
+ *
31
+ * // Pre-warm common announcements (done once, reused after restart)
32
+ * await tts.warmup([
33
+ * { text: '잠시만 기다려 주세요.', voiceId: 'abc123' },
34
+ * { text: '상담사에게 연결하겠습니다.' },
35
+ * ]);
36
+ *
37
+ * // synthesize() returns cached audio if available — no API call
38
+ * const audio = tts.synthesize('잠시만 기다려 주세요.');
39
+ * ```
40
+ */
41
+ import type { TtsAdapter, TtsOptions } from 'dvgateway-sdk';
42
+ export interface CachedTtsAdapterOptions {
43
+ /**
44
+ * Provider name used as part of the cache key.
45
+ * E.g. "elevenlabs", "openai"
46
+ */
47
+ provider: string;
48
+ /**
49
+ * Directory for storing cached PCM files (default: "./tts-cache")
50
+ * Created automatically if it does not exist.
51
+ */
52
+ cacheDir?: string;
53
+ /**
54
+ * Default voice ID used when none is specified in synthesize().
55
+ * Becomes part of the cache key.
56
+ */
57
+ defaultVoiceId?: string;
58
+ /**
59
+ * Default model name for cache key differentiation (e.g. "eleven_flash_v2_5").
60
+ */
61
+ defaultModel?: string;
62
+ /**
63
+ * Time-to-live for cached files in ms.
64
+ * 0 = no expiration (default: 0)
65
+ */
66
+ ttlMs?: number;
67
+ /**
68
+ * Maximum number of cached files.
69
+ * When exceeded, least-recently-used entries are evicted.
70
+ * 0 = unlimited (default: 0)
71
+ */
72
+ maxEntries?: number;
73
+ }
74
+ export interface WarmupEntry {
75
+ /** Text to pre-synthesize */
76
+ text: string;
77
+ /** Override voice ID for this entry */
78
+ voiceId?: string;
79
+ /** Override speed for this entry */
80
+ speed?: number;
81
+ }
82
+ interface CacheStats {
83
+ hits: number;
84
+ misses: number;
85
+ evictions: number;
86
+ totalEntries: number;
87
+ }
88
+ export declare class CachedTtsAdapter implements TtsAdapter {
89
+ private readonly inner;
90
+ private readonly provider;
91
+ private readonly cacheDir;
92
+ private readonly defaultVoiceId;
93
+ private readonly defaultModel;
94
+ private readonly ttlMs;
95
+ private readonly maxEntries;
96
+ private readonly stats;
97
+ private initialized;
98
+ constructor(inner: TtsAdapter, opts: CachedTtsAdapterOptions);
99
+ /**
100
+ * Synthesize text to speech, returning cached audio if available.
101
+ * On cache miss, delegates to the inner adapter and stores the result.
102
+ */
103
+ synthesize(text: string, options?: TtsOptions): AsyncIterable<Buffer>;
104
+ /**
105
+ * Pre-generate and cache audio for a list of announcements.
106
+ * Skips entries that are already cached. Useful at startup to
107
+ * build an audio pool that survives restarts.
108
+ *
109
+ * @returns Number of entries that were newly synthesized
110
+ */
111
+ warmup(entries: ReadonlyArray<WarmupEntry>): Promise<number>;
112
+ /**
113
+ * Check if a specific text is already cached.
114
+ */
115
+ isCached(text: string, voiceId?: string, speed?: number): Promise<boolean>;
116
+ /**
117
+ * Clear all cached audio files.
118
+ */
119
+ clearCache(): Promise<number>;
120
+ /**
121
+ * Get cache statistics.
122
+ */
123
+ getStats(): Readonly<CacheStats>;
124
+ private buildCacheKey;
125
+ private ensureDir;
126
+ private readCache;
127
+ private writeCache;
128
+ private evictIfNeeded;
129
+ }
130
+ export {};
131
+ //# sourceMappingURL=cached-tts.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"cached-tts.d.ts","sourceRoot":"","sources":["../../src/tts/cached-tts.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAuCG;AAKH,OAAO,KAAK,EAAE,UAAU,EAAE,UAAU,EAAE,MAAM,eAAe,CAAC;AAI5D,MAAM,WAAW,uBAAuB;IACtC;;;OAGG;IACH,QAAQ,EAAE,MAAM,CAAC;IAEjB;;;OAGG;IACH,QAAQ,CAAC,EAAE,MAAM,CAAC;IAElB;;;OAGG;IACH,cAAc,CAAC,EAAE,MAAM,CAAC;IAExB;;OAEG;IACH,YAAY,CAAC,EAAE,MAAM,CAAC;IAEtB;;;OAGG;IACH,KAAK,CAAC,EAAE,MAAM,CAAC;IAEf;;;;OAIG;IACH,UAAU,CAAC,EAAE,MAAM,CAAC;CACrB;AAED,MAAM,WAAW,WAAW;IAC1B,6BAA6B;IAC7B,IAAI,EAAE,MAAM,CAAC;IACb,uCAAuC;IACvC,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,oCAAoC;IACpC,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB;AAED,UAAU,UAAU;IAClB,IAAI,EAAE,MAAM,CAAC;IACb,MAAM,EAAE,MAAM,CAAC;IACf,SAAS,EAAE,MAAM,CAAC;IAClB,YAAY,EAAE,MAAM,CAAC;CACtB;AASD,qBAAa,gBAAiB,YAAW,UAAU;IACjD,OAAO,CAAC,QAAQ,CAAC,KAAK,CAAa;IACnC,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAS;IAClC,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAS;IAClC,OAAO,CAAC,QAAQ,CAAC,cAAc,CAAS;IACxC,OAAO,CAAC,QAAQ,CAAC,YAAY,CAAS;IACtC,OAAO,CAAC,QAAQ,CAAC,KAAK,CAAS;IAC/B,OAAO,CAAC,QAAQ,CAAC,UAAU,CAAS;IACpC,OAAO,CAAC,QAAQ,CAAC,KAAK,CAAqE;IAC3F,OAAO,CAAC,WAAW,CAAS;gBAEhB,KAAK,EAAE,UAAU,EAAE,IAAI,EAAE,uBAAuB;IAU5D;;;OAGG;IACI,UAAU,CAAC,IAAI,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE,UAAU,GAAG,aAAa,CAAC,MAAM,CAAC;IAsC5E;;;;;;OAMG;IACG,MAAM,CAAC,OAAO,EAAE,aAAa,CAAC,WAAW,CAAC,GAAG,OAAO,CAAC,MAAM,CAAC;IA+BlE;;OAEG;IACG,QAAQ,CAAC,IAAI,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE,MAAM,EAAE,KAAK,CAAC,EAAE,MAAM,GAAG,OAAO,CAAC,OAAO,CAAC;IAQhF;;OAEG;IACG,UAAU,IAAI,OAAO,CAAC,MAAM,CAAC;IAanC;;OAEG;IACH,QAAQ,IAAI,QAAQ,CAAC,UAAU,CAAC;IAMhC,OAAO,CAAC,aAAa;YAKP,SAAS;YAMT,SAAS;YAkBT,UAAU;YAUV,aAAa;CAgC5B"}
@@ -0,0 +1,231 @@
1
+ /**
2
+ * Cached TTS Adapter
3
+ *
4
+ * Wraps any TtsAdapter with a disk-based audio cache keyed by
5
+ * (text + voiceId + provider + model). Cached audio persists across
6
+ * process restarts, eliminating redundant TTS API calls for repeated
7
+ * announcements and broadcasts.
8
+ *
9
+ * Features:
10
+ * - Disk-based PCM cache (survives restarts/updates)
11
+ * - SHA-256 cache key = hash(provider + voiceId + model + speed + text)
12
+ * - Automatic cache hit/miss logging with metrics
13
+ * - Warmup / preload API for pre-generating announcement audio pools
14
+ * - TTL-based expiration (optional)
15
+ * - Max cache size enforcement (LRU eviction)
16
+ *
17
+ * @example
18
+ * ```typescript
19
+ * import { ElevenLabsAdapter } from 'dvgateway-adapters/tts';
20
+ * import { CachedTtsAdapter } from 'dvgateway-adapters/tts';
21
+ *
22
+ * const tts = new CachedTtsAdapter(
23
+ * new ElevenLabsAdapter({ apiKey: '...' }),
24
+ * {
25
+ * provider: 'elevenlabs',
26
+ * cacheDir: './tts-cache',
27
+ * ttlMs: 7 * 24 * 60 * 60 * 1000, // 7 days
28
+ * },
29
+ * );
30
+ *
31
+ * // Pre-warm common announcements (done once, reused after restart)
32
+ * await tts.warmup([
33
+ * { text: '잠시만 기다려 주세요.', voiceId: 'abc123' },
34
+ * { text: '상담사에게 연결하겠습니다.' },
35
+ * ]);
36
+ *
37
+ * // synthesize() returns cached audio if available — no API call
38
+ * const audio = tts.synthesize('잠시만 기다려 주세요.');
39
+ * ```
40
+ */
41
+ import { createHash } from 'node:crypto';
42
+ import { mkdir, readFile, writeFile, readdir, stat, unlink } from 'node:fs/promises';
43
+ import { join } from 'node:path';
44
+ // ─── Constants ───────────────────────────────────────────────────────────────
45
+ const DEFAULT_CACHE_DIR = './tts-cache';
46
+ const PCM_CHUNK_SIZE = 640; // 20ms at 16kHz, 16-bit = 640 bytes
47
+ // ─── Implementation ──────────────────────────────────────────────────────────
48
+ export class CachedTtsAdapter {
49
+ inner;
50
+ provider;
51
+ cacheDir;
52
+ defaultVoiceId;
53
+ defaultModel;
54
+ ttlMs;
55
+ maxEntries;
56
+ stats = { hits: 0, misses: 0, evictions: 0, totalEntries: 0 };
57
+ initialized = false;
58
+ constructor(inner, opts) {
59
+ this.inner = inner;
60
+ this.provider = opts.provider;
61
+ this.cacheDir = opts.cacheDir ?? DEFAULT_CACHE_DIR;
62
+ this.defaultVoiceId = opts.defaultVoiceId ?? 'default';
63
+ this.defaultModel = opts.defaultModel ?? 'default';
64
+ this.ttlMs = opts.ttlMs ?? 0;
65
+ this.maxEntries = opts.maxEntries ?? 0;
66
+ }
67
+ /**
68
+ * Synthesize text to speech, returning cached audio if available.
69
+ * On cache miss, delegates to the inner adapter and stores the result.
70
+ */
71
+ async *synthesize(text, options) {
72
+ await this.ensureDir();
73
+ const voiceId = options?.voiceId ?? this.defaultVoiceId;
74
+ const speed = options?.speed ?? 1.0;
75
+ const key = this.buildCacheKey(text, voiceId, speed);
76
+ const filePath = join(this.cacheDir, `${key}.pcm`);
77
+ // Try cache hit
78
+ const cached = await this.readCache(filePath);
79
+ if (cached) {
80
+ this.stats.hits++;
81
+ // Yield in 20ms chunks to match real-time streaming cadence
82
+ let offset = 0;
83
+ while (offset < cached.length) {
84
+ const end = Math.min(offset + PCM_CHUNK_SIZE, cached.length);
85
+ yield cached.subarray(offset, end);
86
+ offset = end;
87
+ }
88
+ return;
89
+ }
90
+ // Cache miss — synthesize via inner adapter
91
+ this.stats.misses++;
92
+ const chunks = [];
93
+ for await (const chunk of this.inner.synthesize(text, options)) {
94
+ chunks.push(chunk);
95
+ yield chunk;
96
+ }
97
+ // Store in cache asynchronously (don't block the caller)
98
+ const full = Buffer.concat(chunks);
99
+ this.writeCache(filePath, full).catch(() => {
100
+ // Swallow write errors — cache is best-effort
101
+ });
102
+ }
103
+ /**
104
+ * Pre-generate and cache audio for a list of announcements.
105
+ * Skips entries that are already cached. Useful at startup to
106
+ * build an audio pool that survives restarts.
107
+ *
108
+ * @returns Number of entries that were newly synthesized
109
+ */
110
+ async warmup(entries) {
111
+ await this.ensureDir();
112
+ let synthesized = 0;
113
+ for (const entry of entries) {
114
+ const voiceId = entry.voiceId ?? this.defaultVoiceId;
115
+ const speed = entry.speed ?? 1.0;
116
+ const key = this.buildCacheKey(entry.text, voiceId, speed);
117
+ const filePath = join(this.cacheDir, `${key}.pcm`);
118
+ // Skip if already cached and not expired
119
+ const existing = await this.readCache(filePath);
120
+ if (existing) {
121
+ continue;
122
+ }
123
+ // Synthesize and store
124
+ const chunks = [];
125
+ const options = { voiceId, speed };
126
+ for await (const chunk of this.inner.synthesize(entry.text, options)) {
127
+ chunks.push(chunk);
128
+ }
129
+ const full = Buffer.concat(chunks);
130
+ await this.writeCache(filePath, full);
131
+ synthesized++;
132
+ }
133
+ return synthesized;
134
+ }
135
+ /**
136
+ * Check if a specific text is already cached.
137
+ */
138
+ async isCached(text, voiceId, speed) {
139
+ await this.ensureDir();
140
+ const key = this.buildCacheKey(text, voiceId ?? this.defaultVoiceId, speed ?? 1.0);
141
+ const filePath = join(this.cacheDir, `${key}.pcm`);
142
+ const cached = await this.readCache(filePath);
143
+ return cached !== undefined;
144
+ }
145
+ /**
146
+ * Clear all cached audio files.
147
+ */
148
+ async clearCache() {
149
+ await this.ensureDir();
150
+ const files = await readdir(this.cacheDir);
151
+ const pcmFiles = files.filter(f => f.endsWith('.pcm'));
152
+ let removed = 0;
153
+ for (const f of pcmFiles) {
154
+ await unlink(join(this.cacheDir, f)).catch(() => undefined);
155
+ removed++;
156
+ }
157
+ this.stats.totalEntries = 0;
158
+ return removed;
159
+ }
160
+ /**
161
+ * Get cache statistics.
162
+ */
163
+ getStats() {
164
+ return { ...this.stats };
165
+ }
166
+ // ─── Private ─────────────────────────────────────────────────────────────
167
+ buildCacheKey(text, voiceId, speed) {
168
+ const raw = `${this.provider}|${this.defaultModel}|${voiceId}|${speed}|${text}`;
169
+ return createHash('sha256').update(raw).digest('hex');
170
+ }
171
+ async ensureDir() {
172
+ if (this.initialized)
173
+ return;
174
+ await mkdir(this.cacheDir, { recursive: true });
175
+ this.initialized = true;
176
+ }
177
+ async readCache(filePath) {
178
+ try {
179
+ // Check TTL
180
+ if (this.ttlMs > 0) {
181
+ const info = await stat(filePath);
182
+ const age = Date.now() - info.mtimeMs;
183
+ if (age > this.ttlMs) {
184
+ await unlink(filePath).catch(() => undefined);
185
+ return undefined;
186
+ }
187
+ }
188
+ return await readFile(filePath);
189
+ }
190
+ catch {
191
+ return undefined;
192
+ }
193
+ }
194
+ async writeCache(filePath, data) {
195
+ // Enforce max entries via LRU eviction before writing
196
+ if (this.maxEntries > 0) {
197
+ await this.evictIfNeeded();
198
+ }
199
+ await writeFile(filePath, data);
200
+ this.stats.totalEntries++;
201
+ }
202
+ async evictIfNeeded() {
203
+ try {
204
+ const files = await readdir(this.cacheDir);
205
+ const pcmFiles = files.filter(f => f.endsWith('.pcm'));
206
+ if (pcmFiles.length < this.maxEntries)
207
+ return;
208
+ // Sort by modification time (oldest first) for LRU eviction
209
+ const withStats = await Promise.all(pcmFiles.map(async (f) => {
210
+ const fullPath = join(this.cacheDir, f);
211
+ const info = await stat(fullPath);
212
+ return { path: fullPath, mtimeMs: info.mtimeMs };
213
+ }));
214
+ withStats.sort((a, b) => a.mtimeMs - b.mtimeMs);
215
+ // Remove oldest entries until we're under the limit
216
+ const toRemove = withStats.length - this.maxEntries + 1;
217
+ for (let i = 0; i < toRemove; i++) {
218
+ const entry = withStats[i];
219
+ if (entry) {
220
+ await unlink(entry.path).catch(() => undefined);
221
+ this.stats.evictions++;
222
+ this.stats.totalEntries--;
223
+ }
224
+ }
225
+ }
226
+ catch {
227
+ // Best-effort eviction
228
+ }
229
+ }
230
+ }
231
+ //# sourceMappingURL=cached-tts.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"cached-tts.js","sourceRoot":"","sources":["../../src/tts/cached-tts.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAuCG;AAEH,OAAO,EAAE,UAAU,EAAE,MAAM,aAAa,CAAC;AACzC,OAAO,EAAE,KAAK,EAAE,QAAQ,EAAE,SAAS,EAAE,OAAO,EAAE,IAAI,EAAE,MAAM,EAAE,MAAM,kBAAkB,CAAC;AACrF,OAAO,EAAE,IAAI,EAAE,MAAM,WAAW,CAAC;AA2DjC,gFAAgF;AAEhF,MAAM,iBAAiB,GAAG,aAAa,CAAC;AACxC,MAAM,cAAc,GAAG,GAAG,CAAC,CAAC,oCAAoC;AAEhE,gFAAgF;AAEhF,MAAM,OAAO,gBAAgB;IACV,KAAK,CAAa;IAClB,QAAQ,CAAS;IACjB,QAAQ,CAAS;IACjB,cAAc,CAAS;IACvB,YAAY,CAAS;IACrB,KAAK,CAAS;IACd,UAAU,CAAS;IACnB,KAAK,GAAe,EAAE,IAAI,EAAE,CAAC,EAAE,MAAM,EAAE,CAAC,EAAE,SAAS,EAAE,CAAC,EAAE,YAAY,EAAE,CAAC,EAAE,CAAC;IACnF,WAAW,GAAG,KAAK,CAAC;IAE5B,YAAY,KAAiB,EAAE,IAA6B;QAC1D,IAAI,CAAC,KAAK,GAAG,KAAK,CAAC;QACnB,IAAI,CAAC,QAAQ,GAAG,IAAI,CAAC,QAAQ,CAAC;QAC9B,IAAI,CAAC,QAAQ,GAAG,IAAI,CAAC,QAAQ,IAAI,iBAAiB,CAAC;QACnD,IAAI,CAAC,cAAc,GAAG,IAAI,CAAC,cAAc,IAAI,SAAS,CAAC;QACvD,IAAI,CAAC,YAAY,GAAG,IAAI,CAAC,YAAY,IAAI,SAAS,CAAC;QACnD,IAAI,CAAC,KAAK,GAAG,IAAI,CAAC,KAAK,IAAI,CAAC,CAAC;QAC7B,IAAI,CAAC,UAAU,GAAG,IAAI,CAAC,UAAU,IAAI,CAAC,CAAC;IACzC,CAAC;IAED;;;OAGG;IACH,KAAK,CAAC,CAAC,UAAU,CAAC,IAAY,EAAE,OAAoB;QAClD,MAAM,IAAI,CAAC,SAAS,EAAE,CAAC;QAEvB,MAAM,OAAO,GAAG,OAAO,EAAE,OAAO,IAAI,IAAI,CAAC,cAAc,CAAC;QACxD,MAAM,KAAK,GAAG,OAAO,EAAE,KAAK,IAAI,GAAG,CAAC;QACpC,MAAM,GAAG,GAAG,IAAI,CAAC,aAAa,CAAC,IAAI,EAAE,OAAO,EAAE,KAAK,CAAC,CAAC;QACrD,MAAM,QAAQ,GAAG,IAAI,CAAC,IAAI,CAAC,QAAQ,EAAE,GAAG,GAAG,MAAM,CAAC,CAAC;QAEnD,gBAAgB;QAChB,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,SAAS,CAAC,QAAQ,CAAC,CAAC;QAC9C,IAAI,MAAM,EAAE,CAAC;YACX,IAAI,CAAC,KAAK,CAAC,IAAI,EAAE,CAAC;YAClB,4DAA4D;YAC5D,IAAI,MAAM,GAAG,CAAC,CAAC;YACf,OAAO,MAAM,GAAG,MAAM,CAAC,MAAM,EAAE,CAAC;gBAC9B,MAAM,GAAG,GAAG,IAAI,CAAC,GAAG,CAAC,MAAM,GAAG,cAAc,EAAE,MAAM,CAAC,MAAM,CAAC,CAAC;gBAC7D,MAAM,MAAM,CAAC,QAAQ,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;gBACnC,MAAM,GAAG,GAAG,CAAC;YACf,CAAC;YACD,OAAO;QACT,CAAC;QAED,4CAA4C;QAC5C,IAAI,CAAC,KAAK,CAAC,MAAM,EAAE,CAAC;QACpB,MAAM,MAAM,GAAa,EAAE,CAAC;QAE5B,IAAI,KAAK,EAAE,MAAM,KAAK,IAAI,IAAI,CAAC,KAAK,CAAC,UAAU,CAAC,IAAI,EAAE,OAAO,CAAC,EAAE,CAAC;YAC/D,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;YACnB,MAAM,KAAK,CAAC;QACd,CAAC;QAED,yDAAyD;QACzD,MAAM,IAAI,GAAG,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC;QACnC,IAAI,CAAC,UAAU,CAAC,QAAQ,EAAE,IAAI,CAAC,CAAC,KAAK,CAAC,GAAG,EAAE;YACzC,8CAA8C;QAChD,CAAC,CAAC,CAAC;IACL,CAAC;IAED;;;;;;OAMG;IACH,KAAK,CAAC,MAAM,CAAC,OAAmC;QAC9C,MAAM,IAAI,CAAC,SAAS,EAAE,CAAC;QAEvB,IAAI,WAAW,GAAG,CAAC,CAAC;QAEpB,KAAK,MAAM,KAAK,IAAI,OAAO,EAAE,CAAC;YAC5B,MAAM,OAAO,GAAG,KAAK,CAAC,OAAO,IAAI,IAAI,CAAC,cAAc,CAAC;YACrD,MAAM,KAAK,GAAG,KAAK,CAAC,KAAK,IAAI,GAAG,CAAC;YACjC,MAAM,GAAG,GAAG,IAAI,CAAC,aAAa,CAAC,KAAK,CAAC,IAAI,EAAE,OAAO,EAAE,KAAK,CAAC,CAAC;YAC3D,MAAM,QAAQ,GAAG,IAAI,CAAC,IAAI,CAAC,QAAQ,EAAE,GAAG,GAAG,MAAM,CAAC,CAAC;YAEnD,yCAAyC;YACzC,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,SAAS,CAAC,QAAQ,CAAC,CAAC;YAChD,IAAI,QAAQ,EAAE,CAAC;gBACb,SAAS;YACX,CAAC;YAED,uBAAuB;YACvB,MAAM,MAAM,GAAa,EAAE,CAAC;YAC5B,MAAM,OAAO,GAAe,EAAE,OAAO,EAAE,KAAK,EAAE,CAAC;YAC/C,IAAI,KAAK,EAAE,MAAM,KAAK,IAAI,IAAI,CAAC,KAAK,CAAC,UAAU,CAAC,KAAK,CAAC,IAAI,EAAE,OAAO,CAAC,EAAE,CAAC;gBACrE,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;YACrB,CAAC;YACD,MAAM,IAAI,GAAG,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC;YACnC,MAAM,IAAI,CAAC,UAAU,CAAC,QAAQ,EAAE,IAAI,CAAC,CAAC;YACtC,WAAW,EAAE,CAAC;QAChB,CAAC;QAED,OAAO,WAAW,CAAC;IACrB,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,QAAQ,CAAC,IAAY,EAAE,OAAgB,EAAE,KAAc;QAC3D,MAAM,IAAI,CAAC,SAAS,EAAE,CAAC;QACvB,MAAM,GAAG,GAAG,IAAI,CAAC,aAAa,CAAC,IAAI,EAAE,OAAO,IAAI,IAAI,CAAC,cAAc,EAAE,KAAK,IAAI,GAAG,CAAC,CAAC;QACnF,MAAM,QAAQ,GAAG,IAAI,CAAC,IAAI,CAAC,QAAQ,EAAE,GAAG,GAAG,MAAM,CAAC,CAAC;QACnD,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,SAAS,CAAC,QAAQ,CAAC,CAAC;QAC9C,OAAO,MAAM,KAAK,SAAS,CAAC;IAC9B,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,UAAU;QACd,MAAM,IAAI,CAAC,SAAS,EAAE,CAAC;QACvB,MAAM,KAAK,GAAG,MAAM,OAAO,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;QAC3C,MAAM,QAAQ,GAAG,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC;QACvD,IAAI,OAAO,GAAG,CAAC,CAAC;QAChB,KAAK,MAAM,CAAC,IAAI,QAAQ,EAAE,CAAC;YACzB,MAAM,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,QAAQ,EAAE,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,GAAG,EAAE,CAAC,SAAS,CAAC,CAAC;YAC5D,OAAO,EAAE,CAAC;QACZ,CAAC;QACD,IAAI,CAAC,KAAK,CAAC,YAAY,GAAG,CAAC,CAAC;QAC5B,OAAO,OAAO,CAAC;IACjB,CAAC;IAED;;OAEG;IACH,QAAQ;QACN,OAAO,EAAE,GAAG,IAAI,CAAC,KAAK,EAAE,CAAC;IAC3B,CAAC;IAED,4EAA4E;IAEpE,aAAa,CAAC,IAAY,EAAE,OAAe,EAAE,KAAa;QAChE,MAAM,GAAG,GAAG,GAAG,IAAI,CAAC,QAAQ,IAAI,IAAI,CAAC,YAAY,IAAI,OAAO,IAAI,KAAK,IAAI,IAAI,EAAE,CAAC;QAChF,OAAO,UAAU,CAAC,QAAQ,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;IACxD,CAAC;IAEO,KAAK,CAAC,SAAS;QACrB,IAAI,IAAI,CAAC,WAAW;YAAE,OAAO;QAC7B,MAAM,KAAK,CAAC,IAAI,CAAC,QAAQ,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;QAChD,IAAI,CAAC,WAAW,GAAG,IAAI,CAAC;IAC1B,CAAC;IAEO,KAAK,CAAC,SAAS,CAAC,QAAgB;QACtC,IAAI,CAAC;YACH,YAAY;YACZ,IAAI,IAAI,CAAC,KAAK,GAAG,CAAC,EAAE,CAAC;gBACnB,MAAM,IAAI,GAAG,MAAM,IAAI,CAAC,QAAQ,CAAC,CAAC;gBAClC,MAAM,GAAG,GAAG,IAAI,CAAC,GAAG,EAAE,GAAG,IAAI,CAAC,OAAO,CAAC;gBACtC,IAAI,GAAG,GAAG,IAAI,CAAC,KAAK,EAAE,CAAC;oBACrB,MAAM,MAAM,CAAC,QAAQ,CAAC,CAAC,KAAK,CAAC,GAAG,EAAE,CAAC,SAAS,CAAC,CAAC;oBAC9C,OAAO,SAAS,CAAC;gBACnB,CAAC;YACH,CAAC;YAED,OAAO,MAAM,QAAQ,CAAC,QAAQ,CAAC,CAAC;QAClC,CAAC;QAAC,MAAM,CAAC;YACP,OAAO,SAAS,CAAC;QACnB,CAAC;IACH,CAAC;IAEO,KAAK,CAAC,UAAU,CAAC,QAAgB,EAAE,IAAY;QACrD,sDAAsD;QACtD,IAAI,IAAI,CAAC,UAAU,GAAG,CAAC,EAAE,CAAC;YACxB,MAAM,IAAI,CAAC,aAAa,EAAE,CAAC;QAC7B,CAAC;QAED,MAAM,SAAS,CAAC,QAAQ,EAAE,IAAI,CAAC,CAAC;QAChC,IAAI,CAAC,KAAK,CAAC,YAAY,EAAE,CAAC;IAC5B,CAAC;IAEO,KAAK,CAAC,aAAa;QACzB,IAAI,CAAC;YACH,MAAM,KAAK,GAAG,MAAM,OAAO,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;YAC3C,MAAM,QAAQ,GAAG,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC;YAEvD,IAAI,QAAQ,CAAC,MAAM,GAAG,IAAI,CAAC,UAAU;gBAAE,OAAO;YAE9C,4DAA4D;YAC5D,MAAM,SAAS,GAAG,MAAM,OAAO,CAAC,GAAG,CACjC,QAAQ,CAAC,GAAG,CAAC,KAAK,EAAC,CAAC,EAAC,EAAE;gBACrB,MAAM,QAAQ,GAAG,IAAI,CAAC,IAAI,CAAC,QAAQ,EAAE,CAAC,CAAC,CAAC;gBACxC,MAAM,IAAI,GAAG,MAAM,IAAI,CAAC,QAAQ,CAAC,CAAC;gBAClC,OAAO,EAAE,IAAI,EAAE,QAAQ,EAAE,OAAO,EAAE,IAAI,CAAC,OAAO,EAAE,CAAC;YACnD,CAAC,CAAC,CACH,CAAC;YAEF,SAAS,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC;YAEhD,oDAAoD;YACpD,MAAM,QAAQ,GAAG,SAAS,CAAC,MAAM,GAAG,IAAI,CAAC,UAAU,GAAG,CAAC,CAAC;YACxD,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,QAAQ,EAAE,CAAC,EAAE,EAAE,CAAC;gBAClC,MAAM,KAAK,GAAG,SAAS,CAAC,CAAC,CAAC,CAAC;gBAC3B,IAAI,KAAK,EAAE,CAAC;oBACV,MAAM,MAAM,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,KAAK,CAAC,GAAG,EAAE,CAAC,SAAS,CAAC,CAAC;oBAChD,IAAI,CAAC,KAAK,CAAC,SAAS,EAAE,CAAC;oBACvB,IAAI,CAAC,KAAK,CAAC,YAAY,EAAE,CAAC;gBAC5B,CAAC;YACH,CAAC;QACH,CAAC;QAAC,MAAM,CAAC;YACP,uBAAuB;QACzB,CAAC;IACH,CAAC;CACF"}
@@ -0,0 +1,95 @@
1
+ /**
2
+ * ElevenLabs TTS Adapter
3
+ *
4
+ * Synthesizes text to speech using ElevenLabs' streaming API.
5
+ * Returns 16kHz slin16 PCM chunks for direct injection into DVGateway.
6
+ *
7
+ * Features:
8
+ * - Streaming output (first audio chunk ≤75ms latency with Flash models)
9
+ * - Automatic 24kHz → 16kHz resampling (ElevenLabs outputs 24kHz PCM)
10
+ * - Turbo / Flash / Multilingual model support
11
+ * - Korean + multilingual support
12
+ * - Voice settings: stability, similarity boost, style, speed
13
+ *
14
+ * ElevenLabs Model Reference (2026-03):
15
+ * eleven_flash_v2_5 — Fastest (~75ms TTFA), best for real-time voice (default)
16
+ * eleven_turbo_v2_5 — Balanced quality/speed (~200ms TTFA)
17
+ * eleven_multilingual_v2 — Highest multilingual quality (higher latency)
18
+ * eleven_multilingual_v3 — Next-gen multilingual, improved prosody (2026)
19
+ * eleven_english_sts_v2 — English speech-to-speech transformation
20
+ *
21
+ * Voice IDs (popular Korean/multilingual voices):
22
+ * 21m00Tcm4TlvDq8ikWAM — Rachel (English, warm)
23
+ * 29vD33N1CtxCmqQRPOHJ — Drew (English, conversational)
24
+ * pNInz6obpgDQGcFmaJgB — Adam (English, deep)
25
+ * Use ElevenLabs voice library or clone a custom voice for Korean.
26
+ *
27
+ * API Endpoint: POST https://api.elevenlabs.io/v1/text-to-speech/{voiceId}/stream
28
+ * Docs: https://elevenlabs.io/docs/api-reference/text-to-speech
29
+ */
30
+ import type { TtsAdapter, TtsOptions, HumanVoiceOptions, VoiceInfo } from 'dvgateway-sdk';
31
+ export interface ElevenLabsAdapterOptions {
32
+ apiKey: string;
33
+ /** Voice ID (default: "Rachel" voice — specify a Korean-capable voice for KO) */
34
+ voiceId?: string;
35
+ /**
36
+ * Model ID (default: "eleven_multilingual_v2" when humanVoice enabled, "eleven_flash_v2_5" otherwise)
37
+ * Options:
38
+ * eleven_flash_v2_5 — Fastest (~75ms), best for real-time voice
39
+ * eleven_turbo_v2_5 — Balanced quality/speed
40
+ * eleven_multilingual_v2 — Best multilingual quality (higher latency)
41
+ * eleven_multilingual_v3 — Next-gen multilingual (2026, improved prosody)
42
+ */
43
+ model?: string;
44
+ /** Stability (0.0–1.0, default: 0.3 with humanVoice, 0.5 otherwise) — higher = more consistent voice */
45
+ stability?: number;
46
+ /** Similarity boost (0.0–1.0, default: 0.75) — higher = closer to original voice */
47
+ similarityBoost?: number;
48
+ /** Style exaggeration (0.0–1.0, default: 0.6 with humanVoice, 0.0 otherwise) — adds expressiveness, increases latency */
49
+ style?: number;
50
+ /** Boost speaker clarity and target speaker similarity (default: true) */
51
+ useSpeakerBoost?: boolean;
52
+ /**
53
+ * Output format (default: "pcm_24000")
54
+ * Options: pcm_16000, pcm_22050, pcm_24000, pcm_44100, mp3_44100_128, etc.
55
+ * Use pcm_24000 for best quality streaming; we resample to 16kHz for DVGateway.
56
+ */
57
+ outputFormat?: string;
58
+ /**
59
+ * Optimize streaming latency (0–4, default: 4 — maximum optimization)
60
+ * 0 = off (best quality), 4 = maximum latency optimization
61
+ * When humanVoice is enabled, defaults to 3 for better prosody.
62
+ */
63
+ optimizeStreamingLatency?: number;
64
+ /**
65
+ * Human-like voice optimization (default: Korean-optimized preset).
66
+ * When enabled, adjusts stability, style, and model for natural speech.
67
+ * Set to false to disable, or provide custom HumanVoiceOptions.
68
+ */
69
+ humanVoice?: HumanVoiceOptions | false;
70
+ }
71
+ /** Built-in Korean native voices from ElevenLabs Voice Library */
72
+ export declare const ELEVENLABS_KOREAN_VOICES: ReadonlyArray<VoiceInfo>;
73
+ export declare class ElevenLabsAdapter implements TtsAdapter {
74
+ private readonly opts;
75
+ constructor(opts: ElevenLabsAdapterOptions);
76
+ synthesize(text: string, opts?: TtsOptions): AsyncIterable<Buffer>;
77
+ /**
78
+ * Fetch all voices available to the user from ElevenLabs API.
79
+ * Returns default voices, cloned voices, and shared library voices.
80
+ * Cloned voices are labeled with (클론), generated with (생성됨).
81
+ */
82
+ static fetchVoices(apiKey: string): Promise<VoiceInfo[]>;
83
+ /**
84
+ * Clone a voice from an audio file using ElevenLabs API.
85
+ *
86
+ * @param apiKey - ElevenLabs API key
87
+ * @param name - Name for the cloned voice
88
+ * @param audioData - Raw audio file (Buffer/Uint8Array)
89
+ * @param fileName - Original file name for MIME type detection
90
+ * @param description - Optional voice description
91
+ * @returns VoiceInfo with the new voice's id and name
92
+ */
93
+ static cloneVoice(apiKey: string, name: string, audioData: Uint8Array, fileName?: string, description?: string): Promise<VoiceInfo>;
94
+ }
95
+ //# sourceMappingURL=elevenlabs.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"elevenlabs.d.ts","sourceRoot":"","sources":["../../src/tts/elevenlabs.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA4BG;AAEH,OAAO,KAAK,EAAE,UAAU,EAAE,UAAU,EAAE,iBAAiB,EAAE,SAAS,EAAE,MAAM,eAAe,CAAC;AAG1F,MAAM,WAAW,wBAAwB;IACvC,MAAM,EAAE,MAAM,CAAC;IACf,iFAAiF;IACjF,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB;;;;;;;OAOG;IACH,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,wGAAwG;IACxG,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,oFAAoF;IACpF,eAAe,CAAC,EAAE,MAAM,CAAC;IACzB,yHAAyH;IACzH,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,0EAA0E;IAC1E,eAAe,CAAC,EAAE,OAAO,CAAC;IAC1B;;;;OAIG;IACH,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB;;;;OAIG;IACH,wBAAwB,CAAC,EAAE,MAAM,CAAC;IAClC;;;;OAIG;IACH,UAAU,CAAC,EAAE,iBAAiB,GAAG,KAAK,CAAC;CACxC;AAKD,kEAAkE;AAClE,eAAO,MAAM,wBAAwB,EAAE,aAAa,CAAC,SAAS,CAUpD,CAAC;AAEX,qBAAa,iBAAkB,YAAW,UAAU;IAClD,OAAO,CAAC,QAAQ,CAAC,IAAI,CAA+G;gBAExH,IAAI,EAAE,wBAAwB;IA4BnC,UAAU,CAAC,IAAI,EAAE,MAAM,EAAE,IAAI,CAAC,EAAE,UAAU,GAAG,aAAa,CAAC,MAAM,CAAC;IAwEzE;;;;OAIG;WACU,WAAW,CAAC,MAAM,EAAE,MAAM,GAAG,OAAO,CAAC,SAAS,EAAE,CAAC;IA6B9D;;;;;;;;;OASG;WACU,UAAU,CACrB,MAAM,EAAE,MAAM,EACd,IAAI,EAAE,MAAM,EACZ,SAAS,EAAE,UAAU,EACrB,QAAQ,GAAE,MAAoB,EAC9B,WAAW,GAAE,MAAW,GACvB,OAAO,CAAC,SAAS,CAAC;CAoBtB"}
@@ -0,0 +1,195 @@
1
+ /**
2
+ * ElevenLabs TTS Adapter
3
+ *
4
+ * Synthesizes text to speech using ElevenLabs' streaming API.
5
+ * Returns 16kHz slin16 PCM chunks for direct injection into DVGateway.
6
+ *
7
+ * Features:
8
+ * - Streaming output (first audio chunk ≤75ms latency with Flash models)
9
+ * - Automatic 24kHz → 16kHz resampling (ElevenLabs outputs 24kHz PCM)
10
+ * - Turbo / Flash / Multilingual model support
11
+ * - Korean + multilingual support
12
+ * - Voice settings: stability, similarity boost, style, speed
13
+ *
14
+ * ElevenLabs Model Reference (2026-03):
15
+ * eleven_flash_v2_5 — Fastest (~75ms TTFA), best for real-time voice (default)
16
+ * eleven_turbo_v2_5 — Balanced quality/speed (~200ms TTFA)
17
+ * eleven_multilingual_v2 — Highest multilingual quality (higher latency)
18
+ * eleven_multilingual_v3 — Next-gen multilingual, improved prosody (2026)
19
+ * eleven_english_sts_v2 — English speech-to-speech transformation
20
+ *
21
+ * Voice IDs (popular Korean/multilingual voices):
22
+ * 21m00Tcm4TlvDq8ikWAM — Rachel (English, warm)
23
+ * 29vD33N1CtxCmqQRPOHJ — Drew (English, conversational)
24
+ * pNInz6obpgDQGcFmaJgB — Adam (English, deep)
25
+ * Use ElevenLabs voice library or clone a custom voice for Korean.
26
+ *
27
+ * API Endpoint: POST https://api.elevenlabs.io/v1/text-to-speech/{voiceId}/stream
28
+ * Docs: https://elevenlabs.io/docs/api-reference/text-to-speech
29
+ */
30
+ import { resample, float32ToSlin16, HUMAN_VOICE_DEFAULTS_KO } from 'dvgateway-sdk';
31
+ const ELEVENLABS_SAMPLE_RATE = 24000;
32
+ const DV_SAMPLE_RATE = 16000;
33
+ /** Built-in Korean native voices from ElevenLabs Voice Library */
34
+ export const ELEVENLABS_KOREAN_VOICES = [
35
+ { id: 'pjJMvFj0JGWi3mogOkHH', label: 'Hyun Bin (남성, 한국어)' },
36
+ { id: 't0jbNlBVZ17f02VDIeMI', label: '지영 / JiYoung (여성, 한국어)' },
37
+ { id: 'zrHiDhphv9ZnVXBqCLjz', label: 'Jennie (여성, 한국어)' },
38
+ { id: 'ZJCNdOEhQGMOIbMuhBME', label: 'Han Aim (남성, 한국어)' },
39
+ { id: 'ova4yY2jqnnUdGOmTGbx', label: 'KKC HQ (남성, 한국어)' },
40
+ { id: 'Xb7hH8MSUJpSbSDYk0k2', label: 'Anna Kim (여성, 한국어)' },
41
+ { id: 'XrExE9yKIg1WjnnlVkGX', label: 'Yuna (여성, 한국어)' },
42
+ { id: 'ThT5KcBeYPX3keUQqHPh', label: 'Jina (여성, 한국어)' },
43
+ { id: 'Sita5M0jWFxPiECPABjR', label: 'jjeong (여성, 한국어)' },
44
+ ];
45
+ export class ElevenLabsAdapter {
46
+ opts;
47
+ constructor(opts) {
48
+ // Resolve human voice options: default is Korean-optimized preset
49
+ const hv = opts.humanVoice === false
50
+ ? false
51
+ : { ...HUMAN_VOICE_DEFAULTS_KO, ...(opts.humanVoice ?? {}) };
52
+ // When humanVoice is enabled, adjust defaults for natural speech:
53
+ // - model: eleven_multilingual_v2 (best Korean prosody)
54
+ // - stability: lower (0.3) for more natural variation
55
+ // - style: higher (emotionalRange) for expressiveness
56
+ // - optimizeStreamingLatency: 3 (balance quality & speed)
57
+ const hvEnabled = hv !== false;
58
+ this.opts = {
59
+ voiceId: opts.voiceId ?? '21m00Tcm4TlvDq8ikWAM', // Rachel
60
+ model: opts.model ?? (hvEnabled ? 'eleven_multilingual_v2' : 'eleven_flash_v2_5'),
61
+ stability: opts.stability ?? (hvEnabled ? (1.0 - hv.speechVariation) : 0.5),
62
+ similarityBoost: opts.similarityBoost ?? 0.75,
63
+ style: opts.style ?? (hvEnabled ? hv.emotionalRange : 0.0),
64
+ useSpeakerBoost: opts.useSpeakerBoost ?? true,
65
+ outputFormat: opts.outputFormat ?? 'pcm_24000',
66
+ optimizeStreamingLatency: opts.optimizeStreamingLatency ?? (hvEnabled ? 3 : 4),
67
+ apiKey: opts.apiKey,
68
+ humanVoice: hv,
69
+ };
70
+ }
71
+ async *synthesize(text, opts) {
72
+ const voiceId = opts?.voiceId ?? this.opts.voiceId;
73
+ const url = `https://api.elevenlabs.io/v1/text-to-speech/${voiceId}/stream`
74
+ + `?output_format=${this.opts.outputFormat}`
75
+ + `&optimize_streaming_latency=${this.opts.optimizeStreamingLatency}`;
76
+ const response = await fetch(url, {
77
+ method: 'POST',
78
+ headers: {
79
+ 'xi-api-key': this.opts.apiKey,
80
+ 'Content-Type': 'application/json',
81
+ Accept: 'audio/pcm',
82
+ },
83
+ body: JSON.stringify({
84
+ text,
85
+ model_id: this.opts.model,
86
+ voice_settings: {
87
+ stability: this.opts.stability,
88
+ similarity_boost: this.opts.similarityBoost,
89
+ style: this.opts.style,
90
+ use_speaker_boost: this.opts.useSpeakerBoost,
91
+ speed: opts?.speed ?? 1.0,
92
+ },
93
+ }),
94
+ });
95
+ if (!response.ok) {
96
+ const errBody = await response.text().catch(() => '');
97
+ throw new Error(`ElevenLabs TTS failed (HTTP ${response.status}): ${errBody}`);
98
+ }
99
+ if (!response.body) {
100
+ throw new Error('ElevenLabs TTS: empty response body');
101
+ }
102
+ // Stream PCM audio chunks through resampler
103
+ const reader = response.body.getReader();
104
+ let remainder = Buffer.alloc(0);
105
+ // PCM 24kHz frames: process in 960-byte chunks (20ms at 24kHz, 16-bit = 960 bytes)
106
+ const CHUNK_BYTES = 960; // 480 samples × 2 bytes
107
+ while (true) {
108
+ const { done, value } = await reader.read();
109
+ if (done) {
110
+ // Flush any remaining bytes
111
+ if (remainder.length > 0) {
112
+ const samples24k = pcmBytesToFloat32(remainder);
113
+ const samples16k = resample(samples24k, ELEVENLABS_SAMPLE_RATE, DV_SAMPLE_RATE);
114
+ yield float32ToSlin16(samples16k);
115
+ }
116
+ break;
117
+ }
118
+ // Accumulate into buffer
119
+ remainder = Buffer.concat([remainder, Buffer.from(value)]);
120
+ // Emit complete 20ms frames
121
+ while (remainder.length >= CHUNK_BYTES) {
122
+ const frame = remainder.subarray(0, CHUNK_BYTES);
123
+ remainder = remainder.subarray(CHUNK_BYTES);
124
+ const samples24k = pcmBytesToFloat32(frame);
125
+ const samples16k = resample(samples24k, ELEVENLABS_SAMPLE_RATE, DV_SAMPLE_RATE);
126
+ yield float32ToSlin16(samples16k);
127
+ }
128
+ }
129
+ }
130
+ // ── Voice Management APIs ──────────────────────────────────────────────
131
+ /**
132
+ * Fetch all voices available to the user from ElevenLabs API.
133
+ * Returns default voices, cloned voices, and shared library voices.
134
+ * Cloned voices are labeled with (클론), generated with (생성됨).
135
+ */
136
+ static async fetchVoices(apiKey) {
137
+ const response = await fetch('https://api.elevenlabs.io/v1/voices', {
138
+ headers: { 'xi-api-key': apiKey },
139
+ });
140
+ if (!response.ok) {
141
+ const errBody = await response.text().catch(() => '');
142
+ throw new Error(`ElevenLabs fetch voices failed (HTTP ${response.status}): ${errBody}`);
143
+ }
144
+ const data = await response.json();
145
+ return data.voices.map((v) => {
146
+ let tag = '';
147
+ if (v.category === 'cloned')
148
+ tag = ' (클론)';
149
+ else if (v.category === 'generated')
150
+ tag = ' (생성됨)';
151
+ else if (v.category === 'professional')
152
+ tag = ' (프로)';
153
+ const lang = v.labels?.language ? ` [${v.labels.language}]` : '';
154
+ return { id: v.voice_id, label: `${v.name}${tag}${lang}` };
155
+ });
156
+ }
157
+ /**
158
+ * Clone a voice from an audio file using ElevenLabs API.
159
+ *
160
+ * @param apiKey - ElevenLabs API key
161
+ * @param name - Name for the cloned voice
162
+ * @param audioData - Raw audio file (Buffer/Uint8Array)
163
+ * @param fileName - Original file name for MIME type detection
164
+ * @param description - Optional voice description
165
+ * @returns VoiceInfo with the new voice's id and name
166
+ */
167
+ static async cloneVoice(apiKey, name, audioData, fileName = 'voice.wav', description = '') {
168
+ const formData = new FormData();
169
+ formData.append('name', name);
170
+ if (description)
171
+ formData.append('description', description);
172
+ formData.append('files', new Blob([audioData]), fileName);
173
+ const response = await fetch('https://api.elevenlabs.io/v1/voices/add', {
174
+ method: 'POST',
175
+ headers: { 'xi-api-key': apiKey },
176
+ body: formData,
177
+ });
178
+ if (!response.ok) {
179
+ const errBody = await response.text().catch(() => '');
180
+ throw new Error(`ElevenLabs voice clone failed (HTTP ${response.status}): ${errBody}`);
181
+ }
182
+ const data = await response.json();
183
+ return { id: data.voice_id, label: data.name };
184
+ }
185
+ }
186
+ /** Convert little-endian 16-bit PCM bytes to Float32Array */
187
+ function pcmBytesToFloat32(buf) {
188
+ const samples = buf.byteLength >> 1;
189
+ const out = new Float32Array(samples);
190
+ for (let i = 0; i < samples; i++) {
191
+ out[i] = buf.readInt16LE(i * 2) / 32768.0;
192
+ }
193
+ return out;
194
+ }
195
+ //# sourceMappingURL=elevenlabs.js.map