dvgateway-adapters 1.1.4 → 1.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -13,8 +13,11 @@ npm install dvgateway-sdk dvgateway-adapters
13
13
  | 분류 | 어댑터 | 서비스 |
14
14
  |------|--------|--------|
15
15
  | STT | `DeepgramAdapter` | Deepgram Nova-3 |
16
+ | STT | `GoogleChirp3Adapter` | Google Cloud STT Chirp 3 |
16
17
  | TTS | `ElevenLabsAdapter` | ElevenLabs Flash v2.5 |
17
18
  | TTS | `OpenAITtsAdapter` | OpenAI gpt-4o-mini-tts |
19
+ | TTS | `GeminiTtsAdapter` | Google Gemini TTS |
20
+ | TTS | `CosyVoiceAdapter` | Alibaba CosyVoice |
18
21
  | TTS | `CachedTtsAdapter` | 디스크 기반 TTS 캐시 래퍼 |
19
22
  | LLM | `AnthropicAdapter` | Anthropic Claude |
20
23
  | LLM | `OpenAILlmAdapter` | OpenAI GPT |
@@ -24,9 +27,9 @@ npm install dvgateway-sdk dvgateway-adapters
24
27
 
25
28
  ```typescript
26
29
  import { DVGatewayClient } from 'dvgateway-sdk';
27
- import { DeepgramAdapter } from 'dvgateway-adapters/stt';
30
+ import { DeepgramAdapter, GoogleChirp3Adapter } from 'dvgateway-adapters/stt';
28
31
  import { AnthropicAdapter } from 'dvgateway-adapters/llm';
29
- import { ElevenLabsAdapter } from 'dvgateway-adapters/tts';
32
+ import { ElevenLabsAdapter, GeminiTtsAdapter, CosyVoiceAdapter } from 'dvgateway-adapters/tts';
30
33
 
31
34
  const gw = new DVGatewayClient({
32
35
  baseUrl: 'http://localhost:8080',
package/dist/index.d.ts CHANGED
@@ -5,8 +5,8 @@
5
5
  * Import only what you need — each adapter is tree-shakeable.
6
6
  *
7
7
  * Adapter overview:
8
- * STT (Speech-to-Text) — Deepgram Nova-3
9
- * TTS (Text-to-Speech) — ElevenLabs Flash v2.5, OpenAI TTS
8
+ * STT (Speech-to-Text) — Deepgram Nova-3, Google Chirp3
9
+ * TTS (Text-to-Speech) — ElevenLabs Flash v2.5, OpenAI TTS, Gemini TTS, CosyVoice
10
10
  * LLM (Language Model) — Anthropic Claude, OpenAI GPT
11
11
  * Realtime (Speech-to-Speech) — OpenAI Realtime API (audio 1.5)
12
12
  *
@@ -24,6 +24,8 @@
24
24
  */
25
25
  export { DeepgramAdapter } from './stt/deepgram.js';
26
26
  export type { DeepgramAdapterOptions } from './stt/deepgram.js';
27
+ export { GoogleChirp3Adapter } from './stt/google-chirp3.js';
28
+ export type { GoogleChirp3AdapterOptions } from './stt/google-chirp3.js';
27
29
  export type { HumanVoiceOptions, SttOptions, TtsOptions, } from 'dvgateway-sdk';
28
30
  export { HUMAN_VOICE_DEFAULTS_KO, HUMAN_VOICE_DEFAULTS_EN, } from 'dvgateway-sdk';
29
31
  export { ElevenLabsAdapter, ELEVENLABS_KOREAN_VOICES } from './tts/elevenlabs.js';
@@ -32,6 +34,10 @@ export { OpenAITtsAdapter } from './tts/openai-tts.js';
32
34
  export type { OpenAITtsAdapterOptions, OpenAITtsVoice, OpenAITtsModel, } from './tts/openai-tts.js';
33
35
  export { CachedTtsAdapter } from './tts/cached-tts.js';
34
36
  export type { CachedTtsAdapterOptions, WarmupEntry, } from './tts/cached-tts.js';
37
+ export { GeminiTtsAdapter, GEMINI_TTS_VOICES } from './tts/gemini-tts.js';
38
+ export type { GeminiTtsAdapterOptions, GeminiTtsVoice, GeminiTtsModel, } from './tts/gemini-tts.js';
39
+ export { CosyVoiceAdapter, COSYVOICE_VOICES } from './tts/cosyvoice-tts.js';
40
+ export type { CosyVoiceAdapterOptions, CosyVoiceVoice, CosyVoiceModel, } from './tts/cosyvoice-tts.js';
35
41
  export { AnthropicAdapter } from './llm/anthropic.js';
36
42
  export type { AnthropicAdapterOptions } from './llm/anthropic.js';
37
43
  export { OpenAILlmAdapter } from './llm/openai-llm.js';
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;GAuBG;AAGH,OAAO,EAAE,eAAe,EAAE,MAAmB,mBAAmB,CAAC;AACjE,YAAY,EAAE,sBAAsB,EAAE,MAAO,mBAAmB,CAAC;AAGjE,YAAY,EACV,iBAAiB,EACjB,UAAU,EACV,UAAU,GACX,MAA4C,eAAe,CAAC;AAC7D,OAAO,EACL,uBAAuB,EACvB,uBAAuB,GACxB,MAA4C,eAAe,CAAC;AAG7D,OAAO,EAAE,iBAAiB,EAAE,wBAAwB,EAAE,MAAM,qBAAqB,CAAC;AAClF,YAAY,EAAE,wBAAwB,EAAE,MAAoB,qBAAqB,CAAC;AAElF,OAAO,EAAE,gBAAgB,EAAE,MAAsB,qBAAqB,CAAC;AACvE,YAAY,EACV,uBAAuB,EACvB,cAAc,EACd,cAAc,GACf,MAAgD,qBAAqB,CAAC;AAEvE,OAAO,EAAE,gBAAgB,EAAE,MAAsB,qBAAqB,CAAC;AACvE,YAAY,EACV,uBAAuB,EACvB,WAAW,GACZ,MAAgD,qBAAqB,CAAC;AAGvE,OAAO,EAAE,gBAAgB,EAAE,MAAsB,oBAAoB,CAAC;AACtE,YAAY,EAAE,uBAAuB,EAAE,MAAU,oBAAoB,CAAC;AAEtE,OAAO,EAAE,gBAAgB,EAAE,MAAsB,qBAAqB,CAAC;AACvE,YAAY,EAAE,uBAAuB,EAAE,MAAU,qBAAqB,CAAC;AAGvE,OAAO,EAAE,qBAAqB,EAAE,MAAiB,+BAA+B,CAAC;AACjF,YAAY,EACV,4BAA4B,EAC5B,mBAAmB,EACnB,+BAA+B,EAC/B,kCAAkC,EAClC,qBAAqB,GACtB,MAAgD,+BAA+B,CAAC"}
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;GAuBG;AAGH,OAAO,EAAE,eAAe,EAAE,MAAmB,mBAAmB,CAAC;AACjE,YAAY,EAAE,sBAAsB,EAAE,MAAO,mBAAmB,CAAC;AAEjE,OAAO,EAAE,mBAAmB,EAAE,MAAe,wBAAwB,CAAC;AACtE,YAAY,EAAE,0BAA0B,EAAE,MAAM,wBAAwB,CAAC;AAGzE,YAAY,EACV,iBAAiB,EACjB,UAAU,EACV,UAAU,GACX,MAA4C,eAAe,CAAC;AAC7D,OAAO,EACL,uBAAuB,EACvB,uBAAuB,GACxB,MAA4C,eAAe,CAAC;AAG7D,OAAO,EAAE,iBAAiB,EAAE,wBAAwB,EAAE,MAAM,qBAAqB,CAAC;AAClF,YAAY,EAAE,wBAAwB,EAAE,MAAoB,qBAAqB,CAAC;AAElF,OAAO,EAAE,gBAAgB,EAAE,MAAsB,qBAAqB,CAAC;AACvE,YAAY,EACV,uBAAuB,EACvB,cAAc,EACd,cAAc,GACf,MAAgD,qBAAqB,CAAC;AAEvE,OAAO,EAAE,gBAAgB,EAAE,MAAsB,qBAAqB,CAAC;AACvE,YAAY,EACV,uBAAuB,EACvB,WAAW,GACZ,MAAgD,qBAAqB,CAAC;AAEvE,OAAO,EAAE,gBAAgB,EAAE,iBAAiB,EAAE,MAAM,qBAAqB,CAAC;AAC1E,YAAY,EACV,uBAAuB,EACvB,cAAc,EACd,cAAc,GACf,MAAgD,qBAAqB,CAAC;AAEvE,OAAO,EAAE,gBAAgB,EAAE,gBAAgB,EAAE,MAAM,wBAAwB,CAAC;AAC5E,YAAY,EACV,uBAAuB,EACvB,cAAc,EACd,cAAc,GACf,MAAgD,wBAAwB,CAAC;AAG1E,OAAO,EAAE,gBAAgB,EAAE,MAAsB,oBAAoB,CAAC;AACtE,YAAY,EAAE,uBAAuB,EAAE,MAAU,oBAAoB,CAAC;AAEtE,OAAO,EAAE,gBAAgB,EAAE,MAAsB,qBAAqB,CAAC;AACvE,YAAY,EAAE,uBAAuB,EAAE,MAAU,qBAAqB,CAAC;AAGvE,OAAO,EAAE,qBAAqB,EAAE,MAAiB,+BAA+B,CAAC;AACjF,YAAY,EACV,4BAA4B,EAC5B,mBAAmB,EACnB,+BAA+B,EAC/B,kCAAkC,EAClC,qBAAqB,GACtB,MAAgD,+BAA+B,CAAC"}
package/dist/index.js CHANGED
@@ -5,8 +5,8 @@
5
5
  * Import only what you need — each adapter is tree-shakeable.
6
6
  *
7
7
  * Adapter overview:
8
- * STT (Speech-to-Text) — Deepgram Nova-3
9
- * TTS (Text-to-Speech) — ElevenLabs Flash v2.5, OpenAI TTS
8
+ * STT (Speech-to-Text) — Deepgram Nova-3, Google Chirp3
9
+ * TTS (Text-to-Speech) — ElevenLabs Flash v2.5, OpenAI TTS, Gemini TTS, CosyVoice
10
10
  * LLM (Language Model) — Anthropic Claude, OpenAI GPT
11
11
  * Realtime (Speech-to-Speech) — OpenAI Realtime API (audio 1.5)
12
12
  *
@@ -24,11 +24,14 @@
24
24
  */
25
25
  // ── STT (Speech-to-Text) ──────────────────────────────────────────────────────
26
26
  export { DeepgramAdapter } from './stt/deepgram.js';
27
+ export { GoogleChirp3Adapter } from './stt/google-chirp3.js';
27
28
  export { HUMAN_VOICE_DEFAULTS_KO, HUMAN_VOICE_DEFAULTS_EN, } from 'dvgateway-sdk';
28
29
  // ── TTS (Text-to-Speech) ─────────────────────────────────────────────────────
29
30
  export { ElevenLabsAdapter, ELEVENLABS_KOREAN_VOICES } from './tts/elevenlabs.js';
30
31
  export { OpenAITtsAdapter } from './tts/openai-tts.js';
31
32
  export { CachedTtsAdapter } from './tts/cached-tts.js';
33
+ export { GeminiTtsAdapter, GEMINI_TTS_VOICES } from './tts/gemini-tts.js';
34
+ export { CosyVoiceAdapter, COSYVOICE_VOICES } from './tts/cosyvoice-tts.js';
32
35
  // ── LLM (Language Model) ─────────────────────────────────────────────────────
33
36
  export { AnthropicAdapter } from './llm/anthropic.js';
34
37
  export { OpenAILlmAdapter } from './llm/openai-llm.js';
package/dist/index.js.map CHANGED
@@ -1 +1 @@
1
- {"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;GAuBG;AAEH,iFAAiF;AACjF,OAAO,EAAE,eAAe,EAAE,MAAmB,mBAAmB,CAAC;AASjE,OAAO,EACL,uBAAuB,EACvB,uBAAuB,GACxB,MAA4C,eAAe,CAAC;AAE7D,gFAAgF;AAChF,OAAO,EAAE,iBAAiB,EAAE,wBAAwB,EAAE,MAAM,qBAAqB,CAAC;AAGlF,OAAO,EAAE,gBAAgB,EAAE,MAAsB,qBAAqB,CAAC;AAOvE,OAAO,EAAE,gBAAgB,EAAE,MAAsB,qBAAqB,CAAC;AAMvE,gFAAgF;AAChF,OAAO,EAAE,gBAAgB,EAAE,MAAsB,oBAAoB,CAAC;AAGtE,OAAO,EAAE,gBAAgB,EAAE,MAAsB,qBAAqB,CAAC;AAGvE,iFAAiF;AACjF,OAAO,EAAE,qBAAqB,EAAE,MAAiB,+BAA+B,CAAC"}
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;GAuBG;AAEH,iFAAiF;AACjF,OAAO,EAAE,eAAe,EAAE,MAAmB,mBAAmB,CAAC;AAGjE,OAAO,EAAE,mBAAmB,EAAE,MAAe,wBAAwB,CAAC;AAStE,OAAO,EACL,uBAAuB,EACvB,uBAAuB,GACxB,MAA4C,eAAe,CAAC;AAE7D,gFAAgF;AAChF,OAAO,EAAE,iBAAiB,EAAE,wBAAwB,EAAE,MAAM,qBAAqB,CAAC;AAGlF,OAAO,EAAE,gBAAgB,EAAE,MAAsB,qBAAqB,CAAC;AAOvE,OAAO,EAAE,gBAAgB,EAAE,MAAsB,qBAAqB,CAAC;AAMvE,OAAO,EAAE,gBAAgB,EAAE,iBAAiB,EAAE,MAAM,qBAAqB,CAAC;AAO1E,OAAO,EAAE,gBAAgB,EAAE,gBAAgB,EAAE,MAAM,wBAAwB,CAAC;AAO5E,gFAAgF;AAChF,OAAO,EAAE,gBAAgB,EAAE,MAAsB,oBAAoB,CAAC;AAGtE,OAAO,EAAE,gBAAgB,EAAE,MAAsB,qBAAqB,CAAC;AAGvE,iFAAiF;AACjF,OAAO,EAAE,qBAAqB,EAAE,MAAiB,+BAA+B,CAAC"}
@@ -0,0 +1,76 @@
1
+ /**
2
+ * Google Cloud Speech-to-Text V2 Adapter (Chirp 3 model)
3
+ *
4
+ * Sends 16kHz slin16 PCM audio to Google Cloud Speech-to-Text REST API
5
+ * in 2-second chunks and fires onTranscript callbacks for final results.
6
+ *
7
+ * Features:
8
+ * - Chirp 3 model (best multilingual accuracy, GA — 100+ languages)
9
+ * - Speaker diarization support
10
+ * - Automatic language detection
11
+ * - Speech adaptation / denoiser
12
+ * - Automatic punctuation
13
+ *
14
+ * API versions:
15
+ * V2 (default): POST https://speech.googleapis.com/v2/projects/{projectId}/locations/global/recognizers/_:recognize
16
+ * V1 (fallback): POST https://speech.googleapis.com/v1/speech:recognize
17
+ *
18
+ * Audio format:
19
+ * Input: raw 16-bit PCM, 16kHz, mono (matches DVGateway slin16)
20
+ * Output: JSON transcript results with confidence scores
21
+ *
22
+ * API key format:
23
+ * V2: "project_id:api_key" — splits into project ID and API key
24
+ * V1: plain "api_key" — used when no colon separator is present
25
+ *
26
+ * Docs: https://cloud.google.com/speech-to-text/v2/docs
27
+ */
28
+ import type { SttAdapter, AudioChunk, TranscriptResult } from 'dvgateway-sdk';
29
+ export interface GoogleChirp3AdapterOptions {
30
+ /**
31
+ * API key for Google Cloud Speech-to-Text.
32
+ * Format: "project_id:api_key" for V2 API, or plain "api_key" for V1 fallback.
33
+ */
34
+ apiKey: string;
35
+ /** Language code, e.g. "ko-KR", "en-US" (default: "ko-KR") */
36
+ language?: string;
37
+ /**
38
+ * Google Cloud STT model (default: "chirp_3")
39
+ * Options: chirp_3, chirp_2, long, short, telephony, medical_dictation, medical_conversation
40
+ */
41
+ model?: string;
42
+ /** Enable automatic punctuation (default: true) */
43
+ punctuate?: boolean;
44
+ }
45
+ export declare class GoogleChirp3Adapter implements SttAdapter {
46
+ private readonly opts;
47
+ private readonly projectId;
48
+ private readonly apiKey;
49
+ private transcriptHandler;
50
+ private stopped;
51
+ private audioBuffer;
52
+ constructor(opts: GoogleChirp3AdapterOptions);
53
+ onTranscript(handler: (result: TranscriptResult) => void): void;
54
+ startStream(linkedId: string, audioStream: AsyncIterable<AudioChunk>): Promise<void>;
55
+ stop(): Promise<void>;
56
+ /**
57
+ * Send a PCM audio chunk to Google Cloud Speech-to-Text for recognition.
58
+ * Tries V2 API first (if project ID is available), falls back to V1.
59
+ */
60
+ private recognizeChunk;
61
+ /**
62
+ * Call Google Cloud Speech-to-Text V2 API.
63
+ * Endpoint: POST https://speech.googleapis.com/v2/projects/{projectId}/locations/global/recognizers/_:recognize
64
+ */
65
+ private callV2Api;
66
+ /**
67
+ * Call Google Cloud Speech-to-Text V1 API.
68
+ * Endpoint: POST https://speech.googleapis.com/v1/speech:recognize
69
+ */
70
+ private callV1Api;
71
+ /**
72
+ * Parse Google STT response and invoke the transcript handler.
73
+ */
74
+ private handleResponse;
75
+ }
76
+ //# sourceMappingURL=google-chirp3.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"google-chirp3.d.ts","sourceRoot":"","sources":["../../src/stt/google-chirp3.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;GA0BG;AAEH,OAAO,KAAK,EAAE,UAAU,EAAE,UAAU,EAAE,gBAAgB,EAAE,MAAM,eAAe,CAAC;AAG9E,MAAM,WAAW,0BAA0B;IACzC;;;OAGG;IACH,MAAM,EAAE,MAAM,CAAC;IACf,8DAA8D;IAC9D,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB;;;OAGG;IACH,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,mDAAmD;IACnD,SAAS,CAAC,EAAE,OAAO,CAAC;CACrB;AAyBD,qBAAa,mBAAoB,YAAW,UAAU;IACpD,OAAO,CAAC,QAAQ,CAAC,IAAI,CAAuC;IAC5D,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAgB;IAC1C,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAS;IAChC,OAAO,CAAC,iBAAiB,CAAqD;IAC9E,OAAO,CAAC,OAAO,CAAS;IACxB,OAAO,CAAC,WAAW,CAA2B;gBAElC,IAAI,EAAE,0BAA0B;IAmB5C,YAAY,CAAC,OAAO,EAAE,CAAC,MAAM,EAAE,gBAAgB,KAAK,IAAI,GAAG,IAAI;IAIzD,WAAW,CAAC,QAAQ,EAAE,MAAM,EAAE,WAAW,EAAE,aAAa,CAAC,UAAU,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC;IA0BpF,IAAI,IAAI,OAAO,CAAC,IAAI,CAAC;IAK3B;;;OAGG;YACW,cAAc;IAmC5B;;;OAGG;YACW,SAAS;IAyBvB;;;OAGG;YACW,SAAS;IAyBvB;;OAEG;IACH,OAAO,CAAC,cAAc;CAmBvB"}
@@ -0,0 +1,191 @@
1
+ /**
2
+ * Google Cloud Speech-to-Text V2 Adapter (Chirp 3 model)
3
+ *
4
+ * Sends 16kHz slin16 PCM audio to Google Cloud Speech-to-Text REST API
5
+ * in 2-second chunks and fires onTranscript callbacks for final results.
6
+ *
7
+ * Features:
8
+ * - Chirp 3 model (best multilingual accuracy, GA — 100+ languages)
9
+ * - Speaker diarization support
10
+ * - Automatic language detection
11
+ * - Speech adaptation / denoiser
12
+ * - Automatic punctuation
13
+ *
14
+ * API versions:
15
+ * V2 (default): POST https://speech.googleapis.com/v2/projects/{projectId}/locations/global/recognizers/_:recognize
16
+ * V1 (fallback): POST https://speech.googleapis.com/v1/speech:recognize
17
+ *
18
+ * Audio format:
19
+ * Input: raw 16-bit PCM, 16kHz, mono (matches DVGateway slin16)
20
+ * Output: JSON transcript results with confidence scores
21
+ *
22
+ * API key format:
23
+ * V2: "project_id:api_key" — splits into project ID and API key
24
+ * V1: plain "api_key" — used when no colon separator is present
25
+ *
26
+ * Docs: https://cloud.google.com/speech-to-text/v2/docs
27
+ */
28
+ import { float32ToSlin16 } from 'dvgateway-sdk';
29
+ /** Audio chunk accumulation settings */
30
+ const CHUNK_DURATION_MS = 2000;
31
+ const SAMPLE_RATE = 16000;
32
+ const BYTES_PER_SAMPLE = 2; // 16-bit PCM
33
+ const CHUNK_BYTE_SIZE = SAMPLE_RATE * BYTES_PER_SAMPLE * (CHUNK_DURATION_MS / 1000);
34
+ export class GoogleChirp3Adapter {
35
+ opts;
36
+ projectId;
37
+ apiKey;
38
+ transcriptHandler = null;
39
+ stopped = false;
40
+ audioBuffer = Buffer.alloc(0);
41
+ constructor(opts) {
42
+ this.opts = {
43
+ language: opts.language ?? 'ko-KR',
44
+ model: opts.model ?? 'chirp_3',
45
+ punctuate: opts.punctuate ?? true,
46
+ apiKey: opts.apiKey,
47
+ };
48
+ // Parse API key: "project_id:api_key" for V2, plain "api_key" for V1
49
+ const colonIdx = opts.apiKey.indexOf(':');
50
+ if (colonIdx > 0) {
51
+ this.projectId = opts.apiKey.substring(0, colonIdx);
52
+ this.apiKey = opts.apiKey.substring(colonIdx + 1);
53
+ }
54
+ else {
55
+ this.projectId = null;
56
+ this.apiKey = opts.apiKey;
57
+ }
58
+ }
59
+ onTranscript(handler) {
60
+ this.transcriptHandler = handler;
61
+ }
62
+ async startStream(linkedId, audioStream) {
63
+ this.stopped = false;
64
+ this.audioBuffer = Buffer.alloc(0);
65
+ for await (const chunk of audioStream) {
66
+ if (this.stopped)
67
+ break;
68
+ // Convert Float32 samples to slin16 PCM
69
+ const pcm = float32ToSlin16(chunk.samples);
70
+ this.audioBuffer = Buffer.concat([this.audioBuffer, Buffer.from(pcm.buffer, pcm.byteOffset, pcm.byteLength)]);
71
+ // When we have accumulated enough audio, send a chunk for recognition
72
+ while (this.audioBuffer.length >= CHUNK_BYTE_SIZE && !this.stopped) {
73
+ const chunkData = this.audioBuffer.subarray(0, CHUNK_BYTE_SIZE);
74
+ this.audioBuffer = this.audioBuffer.subarray(CHUNK_BYTE_SIZE);
75
+ await this.recognizeChunk(linkedId, chunkData);
76
+ }
77
+ }
78
+ // Process any remaining audio in the buffer
79
+ if (!this.stopped && this.audioBuffer.length > 0) {
80
+ await this.recognizeChunk(linkedId, this.audioBuffer);
81
+ this.audioBuffer = Buffer.alloc(0);
82
+ }
83
+ }
84
+ async stop() {
85
+ this.stopped = true;
86
+ this.audioBuffer = Buffer.alloc(0);
87
+ }
88
+ /**
89
+ * Send a PCM audio chunk to Google Cloud Speech-to-Text for recognition.
90
+ * Tries V2 API first (if project ID is available), falls back to V1.
91
+ */
92
+ async recognizeChunk(linkedId, pcmData) {
93
+ const base64Audio = pcmData.toString('base64');
94
+ try {
95
+ let response;
96
+ if (this.projectId) {
97
+ // Try V2 API first
98
+ response = await this.callV2Api(base64Audio);
99
+ if (response.error) {
100
+ // Fallback to V1 on error
101
+ process.stderr.write(`[GoogleChirp3Adapter] V2 API error (${response.error.code}): ${response.error.message}, falling back to V1\n`);
102
+ response = await this.callV1Api(base64Audio);
103
+ }
104
+ }
105
+ else {
106
+ // No project ID — use V1 directly
107
+ response = await this.callV1Api(base64Audio);
108
+ }
109
+ if (response.error) {
110
+ process.stderr.write(`[GoogleChirp3Adapter] API error (${response.error.code}): ${response.error.message}\n`);
111
+ return;
112
+ }
113
+ this.handleResponse(linkedId, response);
114
+ }
115
+ catch (err) {
116
+ const message = err instanceof Error ? err.message : String(err);
117
+ process.stderr.write(`[GoogleChirp3Adapter] recognition error: ${message}\n`);
118
+ }
119
+ }
120
+ /**
121
+ * Call Google Cloud Speech-to-Text V2 API.
122
+ * Endpoint: POST https://speech.googleapis.com/v2/projects/{projectId}/locations/global/recognizers/_:recognize
123
+ */
124
+ async callV2Api(base64Audio) {
125
+ const url = `https://speech.googleapis.com/v2/projects/${this.projectId}/locations/global/recognizers/_:recognize?key=${this.apiKey}`;
126
+ const body = {
127
+ config: {
128
+ languageCodes: [this.opts.language],
129
+ model: this.opts.model,
130
+ autoDecodingConfig: {},
131
+ features: {
132
+ enableAutomaticPunctuation: this.opts.punctuate,
133
+ },
134
+ },
135
+ content: base64Audio,
136
+ };
137
+ const res = await fetch(url, {
138
+ method: 'POST',
139
+ headers: { 'Content-Type': 'application/json' },
140
+ body: JSON.stringify(body),
141
+ });
142
+ return (await res.json());
143
+ }
144
+ /**
145
+ * Call Google Cloud Speech-to-Text V1 API.
146
+ * Endpoint: POST https://speech.googleapis.com/v1/speech:recognize
147
+ */
148
+ async callV1Api(base64Audio) {
149
+ const url = `https://speech.googleapis.com/v1/speech:recognize?key=${this.apiKey}`;
150
+ const body = {
151
+ config: {
152
+ encoding: 'LINEAR16',
153
+ sampleRateHertz: SAMPLE_RATE,
154
+ languageCode: this.opts.language,
155
+ enableAutomaticPunctuation: this.opts.punctuate,
156
+ model: this.opts.model,
157
+ },
158
+ audio: {
159
+ content: base64Audio,
160
+ },
161
+ };
162
+ const res = await fetch(url, {
163
+ method: 'POST',
164
+ headers: { 'Content-Type': 'application/json' },
165
+ body: JSON.stringify(body),
166
+ });
167
+ return (await res.json());
168
+ }
169
+ /**
170
+ * Parse Google STT response and invoke the transcript handler.
171
+ */
172
+ handleResponse(linkedId, response) {
173
+ if (!response.results || response.results.length === 0)
174
+ return;
175
+ for (const result of response.results) {
176
+ const alt = result.alternatives?.[0];
177
+ if (!alt || !alt.transcript)
178
+ continue;
179
+ const transcriptResult = {
180
+ linkedId,
181
+ text: alt.transcript,
182
+ isFinal: result.isFinal !== false, // REST API results are final by default
183
+ confidence: alt.confidence,
184
+ language: result.languageCode ?? this.opts.language,
185
+ timestampMs: Date.now(),
186
+ };
187
+ this.transcriptHandler?.(transcriptResult);
188
+ }
189
+ }
190
+ }
191
+ //# sourceMappingURL=google-chirp3.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"google-chirp3.js","sourceRoot":"","sources":["../../src/stt/google-chirp3.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;GA0BG;AAGH,OAAO,EAAE,eAAe,EAAE,MAAM,eAAe,CAAC;AAoChD,wCAAwC;AACxC,MAAM,iBAAiB,GAAG,IAAI,CAAC;AAC/B,MAAM,WAAW,GAAG,KAAK,CAAC;AAC1B,MAAM,gBAAgB,GAAG,CAAC,CAAC,CAAC,aAAa;AACzC,MAAM,eAAe,GAAG,WAAW,GAAG,gBAAgB,GAAG,CAAC,iBAAiB,GAAG,IAAI,CAAC,CAAC;AAEpF,MAAM,OAAO,mBAAmB;IACb,IAAI,CAAuC;IAC3C,SAAS,CAAgB;IACzB,MAAM,CAAS;IACxB,iBAAiB,GAAgD,IAAI,CAAC;IACtE,OAAO,GAAG,KAAK,CAAC;IAChB,WAAW,GAAW,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;IAE9C,YAAY,IAAgC;QAC1C,IAAI,CAAC,IAAI,GAAG;YACV,QAAQ,EAAE,IAAI,CAAC,QAAQ,IAAI,OAAO;YAClC,KAAK,EAAK,IAAI,CAAC,KAAK,IAAO,SAAS;YACpC,SAAS,EAAE,IAAI,CAAC,SAAS,IAAI,IAAI;YACjC,MAAM,EAAI,IAAI,CAAC,MAAM;SACtB,CAAC;QAEF,qEAAqE;QACrE,MAAM,QAAQ,GAAG,IAAI,CAAC,MAAM,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC;QAC1C,IAAI,QAAQ,GAAG,CAAC,EAAE,CAAC;YACjB,IAAI,CAAC,SAAS,GAAG,IAAI,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC,EAAE,QAAQ,CAAC,CAAC;YACpD,IAAI,CAAC,MAAM,GAAG,IAAI,CAAC,MAAM,CAAC,SAAS,CAAC,QAAQ,GAAG,CAAC,CAAC,CAAC;QACpD,CAAC;aAAM,CAAC;YACN,IAAI,CAAC,SAAS,GAAG,IAAI,CAAC;YACtB,IAAI,CAAC,MAAM,GAAG,IAAI,CAAC,MAAM,CAAC;QAC5B,CAAC;IACH,CAAC;IAED,YAAY,CAAC,OAA2C;QACtD,IAAI,CAAC,iBAAiB,GAAG,OAAO,CAAC;IACnC,CAAC;IAED,KAAK,CAAC,WAAW,CAAC,QAAgB,EAAE,WAAsC;QACxE,IAAI,CAAC,OAAO,GAAG,KAAK,CAAC;QACrB,IAAI,CAAC,WAAW,GAAG,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;QAEnC,IAAI,KAAK,EAAE,MAAM,KAAK,IAAI,WAAW,EAAE,CAAC;YACtC,IAAI,IAAI,CAAC,OAAO;gBAAE,MAAM;YAExB,wCAAwC;YACxC,MAAM,GAAG,GAAG,eAAe,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;YAC3C,IAAI,CAAC,WAAW,GAAG,MAAM,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,WAAW,EAAE,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,MAAM,EAAE,GAAG,CAAC,UAAU,EAAE,GAAG,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC;YAE9G,sEAAsE;YACtE,OAAO,IAAI,CAAC,WAAW,CAAC,MAAM,IAAI,eAAe,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,CAAC;gBACnE,MAAM,SAAS,GAAG,IAAI,CAAC,WAAW,CAAC,QAAQ,CAAC,CAAC,EAAE,eAAe,CAAC,CAAC;gBAChE,IAAI,CAAC,WAAW,GAAG,IAAI,CAAC,WAAW,CAAC,QAAQ,CAAC,eAAe,CAAC,CAAC;gBAC9D,MAAM,IAAI,CAAC,cAAc,CAAC,QAAQ,EAAE,SAAS,CAAC,CAAC;YACjD,CAAC;QACH,CAAC;QAED,4CAA4C;QAC5C,IAAI,CAAC,IAAI,CAAC,OAAO,IAAI,IAAI,CAAC,WAAW,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACjD,MAAM,IAAI,CAAC,cAAc,CAAC,QAAQ,EAAE,IAAI,CAAC,WAAW,CAAC,CAAC;YACtD,IAAI,CAAC,WAAW,GAAG,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;QACrC,CAAC;IACH,CAAC;IAED,KAAK,CAAC,IAAI;QACR,IAAI,CAAC,OAAO,GAAG,IAAI,CAAC;QACpB,IAAI,CAAC,WAAW,GAAG,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;IACrC,CAAC;IAED;;;OAGG;IACK,KAAK,CAAC,cAAc,CAAC,QAAgB,EAAE,OAAe;QAC5D,MAAM,WAAW,GAAG,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAC;QAE/C,IAAI,CAAC;YACH,IAAI,QAA2B,CAAC;YAEhC,IAAI,IAAI,CAAC,SAAS,EAAE,CAAC;gBACnB,mBAAmB;gBACnB,QAAQ,GAAG,MAAM,IAAI,CAAC,SAAS,CAAC,WAAW,CAAC,CAAC;gBAC7C,IAAI,QAAQ,CAAC,KAAK,EAAE,CAAC;oBACnB,0BAA0B;oBAC1B,OAAO,CAAC,MAAM,CAAC,KAAK,CAClB,uCAAuC,QAAQ,CAAC,KAAK,CAAC,IAAI,MAAM,QAAQ,CAAC,KAAK,CAAC,OAAO,wBAAwB,CAC/G,CAAC;oBACF,QAAQ,GAAG,MAAM,IAAI,CAAC,SAAS,CAAC,WAAW,CAAC,CAAC;gBAC/C,CAAC;YACH,CAAC;iBAAM,CAAC;gBACN,kCAAkC;gBAClC,QAAQ,GAAG,MAAM,IAAI,CAAC,SAAS,CAAC,WAAW,CAAC,CAAC;YAC/C,CAAC;YAED,IAAI,QAAQ,CAAC,KAAK,EAAE,CAAC;gBACnB,OAAO,CAAC,MAAM,CAAC,KAAK,CAClB,oCAAoC,QAAQ,CAAC,KAAK,CAAC,IAAI,MAAM,QAAQ,CAAC,KAAK,CAAC,OAAO,IAAI,CACxF,CAAC;gBACF,OAAO;YACT,CAAC;YAED,IAAI,CAAC,cAAc,CAAC,QAAQ,EAAE,QAAQ,CAAC,CAAC;QAC1C,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,MAAM,OAAO,GAAG,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC;YACjE,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,4CAA4C,OAAO,IAAI,CAAC,CAAC;QAChF,CAAC;IACH,CAAC;IAED;;;OAGG;IACK,KAAK,CAAC,SAAS,CAAC,WAAmB;QACzC,MAAM,GAAG,GACP,6CAA6C,IAAI,CAAC,SAAS,iDAAiD,IAAI,CAAC,MAAM,EAAE,CAAC;QAE5H,MAAM,IAAI,GAAG;YACX,MAAM,EAAE;gBACN,aAAa,EAAE,CAAC,IAAI,CAAC,IAAI,CAAC,QAAQ,CAAC;gBACnC,KAAK,EAAE,IAAI,CAAC,IAAI,CAAC,KAAK;gBACtB,kBAAkB,EAAE,EAAE;gBACtB,QAAQ,EAAE;oBACR,0BAA0B,EAAE,IAAI,CAAC,IAAI,CAAC,SAAS;iBAChD;aACF;YACD,OAAO,EAAE,WAAW;SACrB,CAAC;QAEF,MAAM,GAAG,GAAG,MAAM,KAAK,CAAC,GAAG,EAAE;YAC3B,MAAM,EAAE,MAAM;YACd,OAAO,EAAE,EAAE,cAAc,EAAE,kBAAkB,EAAE;YAC/C,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC;SAC3B,CAAC,CAAC;QAEH,OAAO,CAAC,MAAM,GAAG,CAAC,IAAI,EAAE,CAAsB,CAAC;IACjD,CAAC;IAED;;;OAGG;IACK,KAAK,CAAC,SAAS,CAAC,WAAmB;QACzC,MAAM,GAAG,GAAG,yDAAyD,IAAI,CAAC,MAAM,EAAE,CAAC;QAEnF,MAAM,IAAI,GAAG;YACX,MAAM,EAAE;gBACN,QAAQ,EAAE,UAAmB;gBAC7B,eAAe,EAAE,WAAW;gBAC5B,YAAY,EAAE,IAAI,CAAC,IAAI,CAAC,QAAQ;gBAChC,0BAA0B,EAAE,IAAI,CAAC,IAAI,CAAC,SAAS;gBAC/C,KAAK,EAAE,IAAI,CAAC,IAAI,CAAC,KAAK;aACvB;YACD,KAAK,EAAE;gBACL,OAAO,EAAE,WAAW;aACrB;SACF,CAAC;QAEF,MAAM,GAAG,GAAG,MAAM,KAAK,CAAC,GAAG,EAAE;YAC3B,MAAM,EAAE,MAAM;YACd,OAAO,EAAE,EAAE,cAAc,EAAE,kBAAkB,EAAE;YAC/C,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC;SAC3B,CAAC,CAAC;QAEH,OAAO,CAAC,MAAM,GAAG,CAAC,IAAI,EAAE,CAAsB,CAAC;IACjD,CAAC;IAED;;OAEG;IACK,cAAc,CAAC,QAAgB,EAAE,QAA2B;QAClE,IAAI,CAAC,QAAQ,CAAC,OAAO,IAAI,QAAQ,CAAC,OAAO,CAAC,MAAM,KAAK,CAAC;YAAE,OAAO;QAE/D,KAAK,MAAM,MAAM,IAAI,QAAQ,CAAC,OAAO,EAAE,CAAC;YACtC,MAAM,GAAG,GAAG,MAAM,CAAC,YAAY,EAAE,CAAC,CAAC,CAAC,CAAC;YACrC,IAAI,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,UAAU;gBAAE,SAAS;YAEtC,MAAM,gBAAgB,GAAqB;gBACzC,QAAQ;gBACR,IAAI,EAAE,GAAG,CAAC,UAAU;gBACpB,OAAO,EAAE,MAAM,CAAC,OAAO,KAAK,KAAK,EAAE,wCAAwC;gBAC3E,UAAU,EAAE,GAAG,CAAC,UAAU;gBAC1B,QAAQ,EAAE,MAAM,CAAC,YAAY,IAAI,IAAI,CAAC,IAAI,CAAC,QAAQ;gBACnD,WAAW,EAAE,IAAI,CAAC,GAAG,EAAE;aACxB,CAAC;YAEF,IAAI,CAAC,iBAAiB,EAAE,CAAC,gBAAgB,CAAC,CAAC;QAC7C,CAAC;IACH,CAAC;CACF"}
@@ -1,3 +1,5 @@
1
1
  export { DeepgramAdapter } from './deepgram.js';
2
2
  export type { DeepgramAdapterOptions } from './deepgram.js';
3
+ export { GoogleChirp3Adapter } from './google-chirp3.js';
4
+ export type { GoogleChirp3AdapterOptions } from './google-chirp3.js';
3
5
  //# sourceMappingURL=index.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/stt/index.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,eAAe,EAAE,MAAM,eAAe,CAAC;AAChD,YAAY,EAAE,sBAAsB,EAAE,MAAM,eAAe,CAAC"}
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/stt/index.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,eAAe,EAAE,MAAM,eAAe,CAAC;AAChD,YAAY,EAAE,sBAAsB,EAAE,MAAM,eAAe,CAAC;AAC5D,OAAO,EAAE,mBAAmB,EAAE,MAAM,oBAAoB,CAAC;AACzD,YAAY,EAAE,0BAA0B,EAAE,MAAM,oBAAoB,CAAC"}
package/dist/stt/index.js CHANGED
@@ -1,3 +1,4 @@
1
1
  // STT (Speech-to-Text) adapters
2
2
  export { DeepgramAdapter } from './deepgram.js';
3
+ export { GoogleChirp3Adapter } from './google-chirp3.js';
3
4
  //# sourceMappingURL=index.js.map
@@ -1 +1 @@
1
- {"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/stt/index.ts"],"names":[],"mappings":"AAAA,gCAAgC;AAChC,OAAO,EAAE,eAAe,EAAE,MAAM,eAAe,CAAC"}
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/stt/index.ts"],"names":[],"mappings":"AAAA,gCAAgC;AAChC,OAAO,EAAE,eAAe,EAAE,MAAM,eAAe,CAAC;AAEhD,OAAO,EAAE,mBAAmB,EAAE,MAAM,oBAAoB,CAAC"}
@@ -0,0 +1,69 @@
1
+ /**
2
+ * Alibaba CosyVoice TTS Adapter (DashScope API)
3
+ *
4
+ * Synthesizes text to speech using Alibaba's CosyVoice models via the DashScope API.
5
+ * Returns 16kHz slin16 PCM chunks for direct injection into DVGateway.
6
+ *
7
+ * Features:
8
+ * - High-quality Chinese and multilingual TTS via CosyVoice
9
+ * - MP3 → 16kHz PCM conversion via ffmpeg subprocess
10
+ * - Supports 11 languages including Korean via language_hints parameter
11
+ * - Multiple voice presets optimized for different use cases
12
+ *
13
+ * Model Reference (2026-03):
14
+ * cosyvoice-v3.5-plus — Highest quality, best prosody and naturalness (default)
15
+ * cosyvoice-v3.5-flash — Low latency, optimized for real-time applications
16
+ *
17
+ * Voice options:
18
+ * longxiaochun — Recommended, versatile female voice (default)
19
+ * longxiaochun_v2 — Updated version of longxiaochun
20
+ * longyue — Female, gentle tone
21
+ * longwan — Female, warm tone
22
+ * longjing — Female, clear and professional
23
+ * longshuo — Male, steady and authoritative
24
+ * longhua — Male, warm and friendly
25
+ * longfei — Male, energetic
26
+ * longshu — Male, calm and measured
27
+ *
28
+ * Supported languages via language_hints:
29
+ * zh (Chinese), en (English), ja (Japanese), ko (Korean),
30
+ * yue (Cantonese), and more.
31
+ *
32
+ * Note: CosyVoice outputs MP3; we decode to PCM and resample to 16kHz for DVGateway.
33
+ *
34
+ * API Endpoint: POST https://dashscope.aliyuncs.com/api/v1/services/aigc/text2audio/generation
35
+ * Docs: https://help.aliyun.com/document_detail/2712195.html
36
+ */
37
+ import type { TtsAdapter, TtsOptions, VoiceInfo } from 'dvgateway-sdk';
38
+ export type CosyVoiceVoice = 'longxiaochun' | 'longxiaochun_v2' | 'longyue' | 'longwan' | 'longjing' | 'longshuo' | 'longhua' | 'longfei' | 'longshu';
39
+ export type CosyVoiceModel = 'cosyvoice-v3.5-plus' | 'cosyvoice-v3.5-flash';
40
+ export interface CosyVoiceAdapterOptions {
41
+ apiKey: string;
42
+ /** Voice preset (default: "longxiaochun") — see voice options above */
43
+ voice?: CosyVoiceVoice;
44
+ /**
45
+ * Model (default: "cosyvoice-v3.5-plus")
46
+ * cosyvoice-v3.5-plus — Highest quality, best prosody
47
+ * cosyvoice-v3.5-flash — Low latency, real-time optimized
48
+ */
49
+ model?: CosyVoiceModel;
50
+ /**
51
+ * Language hints for multilingual synthesis.
52
+ * Array of language codes, e.g. ["ko"] for Korean, ["zh"] for Chinese.
53
+ * When not specified, the model auto-detects the language.
54
+ */
55
+ language?: string[];
56
+ /**
57
+ * Audio sample rate in Hz (default: 16000).
58
+ * Supported: 8000, 16000, 22050, 24000, 44100, 48000
59
+ */
60
+ sampleRate?: number;
61
+ }
62
+ /** Available CosyVoice voice presets with descriptions */
63
+ export declare const COSYVOICE_VOICES: ReadonlyArray<VoiceInfo>;
64
+ export declare class CosyVoiceAdapter implements TtsAdapter {
65
+ private readonly opts;
66
+ constructor(opts: CosyVoiceAdapterOptions);
67
+ synthesize(text: string, options?: TtsOptions): AsyncIterable<Buffer>;
68
+ }
69
+ //# sourceMappingURL=cosyvoice-tts.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"cosyvoice-tts.d.ts","sourceRoot":"","sources":["../../src/tts/cosyvoice-tts.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAmCG;AAGH,OAAO,KAAK,EAAE,UAAU,EAAE,UAAU,EAAE,SAAS,EAAE,MAAM,eAAe,CAAC;AAEvE,MAAM,MAAM,cAAc,GACtB,cAAc,GACd,iBAAiB,GACjB,SAAS,GACT,SAAS,GACT,UAAU,GACV,UAAU,GACV,SAAS,GACT,SAAS,GACT,SAAS,CAAC;AAEd,MAAM,MAAM,cAAc,GAAG,qBAAqB,GAAG,sBAAsB,CAAC;AAE5E,MAAM,WAAW,uBAAuB;IACtC,MAAM,EAAE,MAAM,CAAC;IACf,uEAAuE;IACvE,KAAK,CAAC,EAAE,cAAc,CAAC;IACvB;;;;OAIG;IACH,KAAK,CAAC,EAAE,cAAc,CAAC;IACvB;;;;OAIG;IACH,QAAQ,CAAC,EAAE,MAAM,EAAE,CAAC;IACpB;;;OAGG;IACH,UAAU,CAAC,EAAE,MAAM,CAAC;CACrB;AAED,0DAA0D;AAC1D,eAAO,MAAM,gBAAgB,EAAE,aAAa,CAAC,SAAS,CAU5C,CAAC;AAMX,qBAAa,gBAAiB,YAAW,UAAU;IACjD,OAAO,CAAC,QAAQ,CAAC,IAAI,CAAoC;gBAE7C,IAAI,EAAE,uBAAuB;IAUlC,UAAU,CAAC,IAAI,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE,UAAU,GAAG,aAAa,CAAC,MAAM,CAAC;CA+D7E"}
@@ -0,0 +1,163 @@
1
+ /**
2
+ * Alibaba CosyVoice TTS Adapter (DashScope API)
3
+ *
4
+ * Synthesizes text to speech using Alibaba's CosyVoice models via the DashScope API.
5
+ * Returns 16kHz slin16 PCM chunks for direct injection into DVGateway.
6
+ *
7
+ * Features:
8
+ * - High-quality Chinese and multilingual TTS via CosyVoice
9
+ * - MP3 → 16kHz PCM conversion via ffmpeg subprocess
10
+ * - Supports 11 languages including Korean via language_hints parameter
11
+ * - Multiple voice presets optimized for different use cases
12
+ *
13
+ * Model Reference (2026-03):
14
+ * cosyvoice-v3.5-plus — Highest quality, best prosody and naturalness (default)
15
+ * cosyvoice-v3.5-flash — Low latency, optimized for real-time applications
16
+ *
17
+ * Voice options:
18
+ * longxiaochun — Recommended, versatile female voice (default)
19
+ * longxiaochun_v2 — Updated version of longxiaochun
20
+ * longyue — Female, gentle tone
21
+ * longwan — Female, warm tone
22
+ * longjing — Female, clear and professional
23
+ * longshuo — Male, steady and authoritative
24
+ * longhua — Male, warm and friendly
25
+ * longfei — Male, energetic
26
+ * longshu — Male, calm and measured
27
+ *
28
+ * Supported languages via language_hints:
29
+ * zh (Chinese), en (English), ja (Japanese), ko (Korean),
30
+ * yue (Cantonese), and more.
31
+ *
32
+ * Note: CosyVoice outputs MP3; we decode to PCM and resample to 16kHz for DVGateway.
33
+ *
34
+ * API Endpoint: POST https://dashscope.aliyuncs.com/api/v1/services/aigc/text2audio/generation
35
+ * Docs: https://help.aliyun.com/document_detail/2712195.html
36
+ */
37
+ import { spawn } from 'child_process';
38
+ /** Available CosyVoice voice presets with descriptions */
39
+ export const COSYVOICE_VOICES = [
40
+ { id: 'longxiaochun', label: 'longxiaochun (여성, 추천)' },
41
+ { id: 'longxiaochun_v2', label: 'longxiaochun_v2 (여성, 업데이트)' },
42
+ { id: 'longyue', label: 'longyue (여성, 부드러운 톤)' },
43
+ { id: 'longwan', label: 'longwan (여성, 따뜻한 톤)' },
44
+ { id: 'longjing', label: 'longjing (여성, 전문적)' },
45
+ { id: 'longshuo', label: 'longshuo (남성, 안정적)' },
46
+ { id: 'longhua', label: 'longhua (남성, 친근한)' },
47
+ { id: 'longfei', label: 'longfei (남성, 에너지)' },
48
+ { id: 'longshu', label: 'longshu (남성, 차분한)' },
49
+ ];
50
+ const DASHSCOPE_TTS_URL = 'https://dashscope.aliyuncs.com/api/v1/services/aigc/text2audio/generation';
51
+ const DV_SAMPLE_RATE = 16000;
52
+ const PCM_CHUNK_BYTES = 640; // 20ms at 16kHz, 16-bit PCM
53
+ export class CosyVoiceAdapter {
54
+ opts;
55
+ constructor(opts) {
56
+ this.opts = {
57
+ apiKey: opts.apiKey,
58
+ voice: opts.voice ?? 'longxiaochun',
59
+ model: opts.model ?? 'cosyvoice-v3.5-plus',
60
+ language: opts.language ?? [],
61
+ sampleRate: opts.sampleRate ?? DV_SAMPLE_RATE,
62
+ };
63
+ }
64
+ async *synthesize(text, options) {
65
+ const voice = options?.voiceId ?? this.opts.voice;
66
+ const language = options?.language
67
+ ? [options.language.split('-')[0]] // "ko-KR" → ["ko"]
68
+ : this.opts.language;
69
+ // Build DashScope request body
70
+ const requestBody = {
71
+ model: this.opts.model,
72
+ input: {
73
+ text,
74
+ voice,
75
+ ...(language.length > 0 ? { language_hints: language } : {}),
76
+ },
77
+ parameters: {
78
+ format: 'mp3',
79
+ sample_rate: this.opts.sampleRate,
80
+ },
81
+ };
82
+ const response = await fetch(DASHSCOPE_TTS_URL, {
83
+ method: 'POST',
84
+ headers: {
85
+ 'Authorization': `Bearer ${this.opts.apiKey}`,
86
+ 'Content-Type': 'application/json',
87
+ 'Accept': '*/*',
88
+ },
89
+ body: JSON.stringify(requestBody),
90
+ });
91
+ if (!response.ok) {
92
+ const errorText = await response.text();
93
+ throw new Error(`CosyVoice API error ${response.status}: ${errorText}`);
94
+ }
95
+ // Determine if response is JSON (base64 audio) or binary MP3
96
+ const contentType = response.headers.get('content-type') ?? '';
97
+ let mp3Buffer;
98
+ if (contentType.includes('application/json')) {
99
+ // JSON response with base64-encoded audio
100
+ const json = await response.json();
101
+ if (!json.output?.audio) {
102
+ throw new Error(`CosyVoice API returned no audio data: ${json.message ?? 'unknown error'}`);
103
+ }
104
+ mp3Buffer = Buffer.from(json.output.audio, 'base64');
105
+ }
106
+ else {
107
+ // Binary MP3 response
108
+ const arrayBuffer = await response.arrayBuffer();
109
+ mp3Buffer = Buffer.from(arrayBuffer);
110
+ }
111
+ // Convert MP3 → 16kHz 16-bit signed LE PCM via ffmpeg
112
+ const pcmBuffer = await mp3ToPcm(mp3Buffer, DV_SAMPLE_RATE);
113
+ // Yield PCM chunks (20ms frames)
114
+ let offset = 0;
115
+ while (offset < pcmBuffer.length) {
116
+ const end = Math.min(offset + PCM_CHUNK_BYTES, pcmBuffer.length);
117
+ yield pcmBuffer.subarray(offset, end);
118
+ offset = end;
119
+ }
120
+ }
121
+ }
122
+ /**
123
+ * Convert MP3 audio buffer to raw 16-bit signed LE PCM at the target sample rate
124
+ * using ffmpeg as a subprocess.
125
+ */
126
+ function mp3ToPcm(mp3Data, sampleRate) {
127
+ return new Promise((resolve, reject) => {
128
+ const ffmpeg = spawn('ffmpeg', [
129
+ '-i', 'pipe:0', // Read from stdin
130
+ '-f', 's16le', // Output format: signed 16-bit little-endian
131
+ '-acodec', 'pcm_s16le',
132
+ '-ar', String(sampleRate),
133
+ '-ac', '1', // Mono
134
+ '-loglevel', 'error',
135
+ 'pipe:1', // Write to stdout
136
+ ]);
137
+ const chunks = [];
138
+ ffmpeg.stdout.on('data', (chunk) => {
139
+ chunks.push(chunk);
140
+ });
141
+ ffmpeg.stderr.on('data', (data) => {
142
+ // Log ffmpeg errors but don't reject — some warnings are non-fatal
143
+ const msg = data.toString().trim();
144
+ if (msg) {
145
+ console.error(`[TTS][CosyVoice] ffmpeg: ${msg}`);
146
+ }
147
+ });
148
+ ffmpeg.on('close', (code) => {
149
+ if (code !== 0) {
150
+ reject(new Error(`ffmpeg exited with code ${code}`));
151
+ return;
152
+ }
153
+ resolve(Buffer.concat(chunks));
154
+ });
155
+ ffmpeg.on('error', (err) => {
156
+ reject(new Error(`ffmpeg spawn error: ${err.message}`));
157
+ });
158
+ // Write MP3 data to ffmpeg stdin and close
159
+ ffmpeg.stdin.write(mp3Data);
160
+ ffmpeg.stdin.end();
161
+ });
162
+ }
163
+ //# sourceMappingURL=cosyvoice-tts.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"cosyvoice-tts.js","sourceRoot":"","sources":["../../src/tts/cosyvoice-tts.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAmCG;AAEH,OAAO,EAAE,KAAK,EAAE,MAAM,eAAe,CAAC;AAuCtC,0DAA0D;AAC1D,MAAM,CAAC,MAAM,gBAAgB,GAA6B;IACxD,EAAE,EAAE,EAAE,cAAc,EAAK,KAAK,EAAE,uBAAuB,EAAE;IACzD,EAAE,EAAE,EAAE,iBAAiB,EAAE,KAAK,EAAE,4BAA4B,EAAE;IAC9D,EAAE,EAAE,EAAE,SAAS,EAAU,KAAK,EAAE,sBAAsB,EAAE;IACxD,EAAE,EAAE,EAAE,SAAS,EAAU,KAAK,EAAE,qBAAqB,EAAE;IACvD,EAAE,EAAE,EAAE,UAAU,EAAS,KAAK,EAAE,oBAAoB,EAAE;IACtD,EAAE,EAAE,EAAE,UAAU,EAAS,KAAK,EAAE,oBAAoB,EAAE;IACtD,EAAE,EAAE,EAAE,SAAS,EAAU,KAAK,EAAE,mBAAmB,EAAE;IACrD,EAAE,EAAE,EAAE,SAAS,EAAU,KAAK,EAAE,mBAAmB,EAAE;IACrD,EAAE,EAAE,EAAE,SAAS,EAAU,KAAK,EAAE,mBAAmB,EAAE;CAC7C,CAAC;AAEX,MAAM,iBAAiB,GAAG,2EAA2E,CAAC;AACtG,MAAM,cAAc,GAAG,KAAK,CAAC;AAC7B,MAAM,eAAe,GAAG,GAAG,CAAC,CAAC,4BAA4B;AAEzD,MAAM,OAAO,gBAAgB;IACV,IAAI,CAAoC;IAEzD,YAAY,IAA6B;QACvC,IAAI,CAAC,IAAI,GAAG;YACV,MAAM,EAAM,IAAI,CAAC,MAAM;YACvB,KAAK,EAAO,IAAI,CAAC,KAAK,IAAS,cAAc;YAC7C,KAAK,EAAO,IAAI,CAAC,KAAK,IAAS,qBAAqB;YACpD,QAAQ,EAAI,IAAI,CAAC,QAAQ,IAAM,EAAE;YACjC,UAAU,EAAE,IAAI,CAAC,UAAU,IAAI,cAAc;SAC9C,CAAC;IACJ,CAAC;IAED,KAAK,CAAC,CAAC,UAAU,CAAC,IAAY,EAAE,OAAoB;QAClD,MAAM,KAAK,GAAI,OAAO,EAAE,OAAsC,IAAI,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC;QAClF,MAAM,QAAQ,GAAG,OAAO,EAAE,QAAQ;YAChC,CAAC,CAAC,CAAC,OAAO,CAAC,QAAQ,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAE,mBAAmB;YACvD,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,QAAQ,CAAC;QAEvB,+BAA+B;QAC/B,MAAM,WAAW,GAA4B;YAC3C,KAAK,EAAE,IAAI,CAAC,IAAI,CAAC,KAAK;YACtB,KAAK,EAAE;gBACL,IAAI;gBACJ,KAAK;gBACL,GAAG,CAAC,QAAQ,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,EAAE,cAAc,EAAE,QAAQ,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;aAC7D;YACD,UAAU,EAAE;gBACV,MAAM,EAAE,KAAK;gBACb,WAAW,EAAE,IAAI,CAAC,IAAI,CAAC,UAAU;aAClC;SACF,CAAC;QAEF,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,iBAAiB,EAAE;YAC9C,MAAM,EAAE,MAAM;YACd,OAAO,EAAE;gBACP,eAAe,EAAE,UAAU,IAAI,CAAC,IAAI,CAAC,MAAM,EAAE;gBAC7C,cAAc,EAAE,kBAAkB;gBAClC,QAAQ,EAAE,KAAK;aAChB;YACD,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC,WAAW,CAAC;SAClC,CAAC,CAAC;QAEH,IAAI,CAAC,QAAQ,CAAC,EAAE,EAAE,CAAC;YACjB,MAAM,SAAS,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAC;YACxC,MAAM,IAAI,KAAK,CAAC,uBAAuB,QAAQ,CAAC,MAAM,KAAK,SAAS,EAAE,CAAC,CAAC;QAC1E,CAAC;QAED,6DAA6D;QAC7D,MAAM,WAAW,GAAG,QAAQ,CAAC,OAAO,CAAC,GAAG,CAAC,cAAc,CAAC,IAAI,EAAE,CAAC;QAC/D,IAAI,SAAiB,CAAC;QAEtB,IAAI,WAAW,CAAC,QAAQ,CAAC,kBAAkB,CAAC,EAAE,CAAC;YAC7C,0CAA0C;YAC1C,MAAM,IAAI,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAuD,CAAC;YACxF,IAAI,CAAC,IAAI,CAAC,MAAM,EAAE,KAAK,EAAE,CAAC;gBACxB,MAAM,IAAI,KAAK,CAAC,yCAAyC,IAAI,CAAC,OAAO,IAAI,eAAe,EAAE,CAAC,CAAC;YAC9F,CAAC;YACD,SAAS,GAAG,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC,KAAK,EAAE,QAAQ,CAAC,CAAC;QACvD,CAAC;aAAM,CAAC;YACN,sBAAsB;YACtB,MAAM,WAAW,GAAG,MAAM,QAAQ,CAAC,WAAW,EAAE,CAAC;YACjD,SAAS,GAAG,MAAM,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;QACvC,CAAC;QAED,sDAAsD;QACtD,MAAM,SAAS,GAAG,MAAM,QAAQ,CAAC,SAAS,EAAE,cAAc,CAAC,CAAC;QAE5D,iCAAiC;QACjC,IAAI,MAAM,GAAG,CAAC,CAAC;QACf,OAAO,MAAM,GAAG,SAAS,CAAC,MAAM,EAAE,CAAC;YACjC,MAAM,GAAG,GAAG,IAAI,CAAC,GAAG,CAAC,MAAM,GAAG,eAAe,EAAE,SAAS,CAAC,MAAM,CAAC,CAAC;YACjE,MAAM,SAAS,CAAC,QAAQ,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;YACtC,MAAM,GAAG,GAAG,CAAC;QACf,CAAC;IACH,CAAC;CACF;AAED;;;GAGG;AACH,SAAS,QAAQ,CAAC,OAAe,EAAE,UAAkB;IACnD,OAAO,IAAI,OAAO,CAAC,CAAC,OAAO,EAAE,MAAM,EAAE,EAAE;QACrC,MAAM,MAAM,GAAG,KAAK,CAAC,QAAQ,EAAE;YAC7B,IAAI,EAAE,QAAQ,EAAS,kBAAkB;YACzC,IAAI,EAAE,OAAO,EAAU,6CAA6C;YACpE,SAAS,EAAE,WAAW;YACtB,KAAK,EAAE,MAAM,CAAC,UAAU,CAAC;YACzB,KAAK,EAAE,GAAG,EAAa,OAAO;YAC9B,WAAW,EAAE,OAAO;YACpB,QAAQ,EAAe,kBAAkB;SAC1C,CAAC,CAAC;QAEH,MAAM,MAAM,GAAa,EAAE,CAAC;QAE5B,MAAM,CAAC,MAAM,CAAC,EAAE,CAAC,MAAM,EAAE,CAAC,KAAa,EAAE,EAAE;YACzC,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;QACrB,CAAC,CAAC,CAAC;QAEH,MAAM,CAAC,MAAM,CAAC,EAAE,CAAC,MAAM,EAAE,CAAC,IAAY,EAAE,EAAE;YACxC,mEAAmE;YACnE,MAAM,GAAG,GAAG,IAAI,CAAC,QAAQ,EAAE,CAAC,IAAI,EAAE,CAAC;YACnC,IAAI,GAAG,EAAE,CAAC;gBACR,OAAO,CAAC,KAAK,CAAC,4BAA4B,GAAG,EAAE,CAAC,CAAC;YACnD,CAAC;QACH,CAAC,CAAC,CAAC;QAEH,MAAM,CAAC,EAAE,CAAC,OAAO,EAAE,CAAC,IAAI,EAAE,EAAE;YAC1B,IAAI,IAAI,KAAK,CAAC,EAAE,CAAC;gBACf,MAAM,CAAC,IAAI,KAAK,CAAC,2BAA2B,IAAI,EAAE,CAAC,CAAC,CAAC;gBACrD,OAAO;YACT,CAAC;YACD,OAAO,CAAC,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC;QACjC,CAAC,CAAC,CAAC;QAEH,MAAM,CAAC,EAAE,CAAC,OAAO,EAAE,CAAC,GAAG,EAAE,EAAE;YACzB,MAAM,CAAC,IAAI,KAAK,CAAC,uBAAuB,GAAG,CAAC,OAAO,EAAE,CAAC,CAAC,CAAC;QAC1D,CAAC,CAAC,CAAC;QAEH,2CAA2C;QAC3C,MAAM,CAAC,KAAK,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;QAC5B,MAAM,CAAC,KAAK,CAAC,GAAG,EAAE,CAAC;IACrB,CAAC,CAAC,CAAC;AACL,CAAC"}
@@ -0,0 +1,86 @@
1
+ /**
2
+ * Google Gemini TTS Adapter
3
+ *
4
+ * Uses Google Cloud Text-to-Speech API with Gemini models for speech synthesis.
5
+ * Returns 16kHz slin16 PCM chunks for direct injection into DVGateway.
6
+ *
7
+ * The API returns base64-encoded MP3 audio which is decoded and converted
8
+ * to 16kHz 16-bit PCM via ffmpeg subprocess for DVGateway compatibility.
9
+ *
10
+ * Model Reference (2026-03):
11
+ * gemini-2.5-flash-tts — Low-latency, optimized for real-time voice (default)
12
+ * gemini-2.5-pro-tts — Highest quality, richer prosody and expressiveness
13
+ *
14
+ * Voice options (30 voices):
15
+ * Kore — Recommended female voice (natural, warm)
16
+ * Puck — Recommended male voice (clear, friendly)
17
+ * Aoede — Melodic, expressive female
18
+ * Charon — Deep, authoritative male
19
+ * Fenrir — Strong, commanding male
20
+ * Leda — Soft, gentle female
21
+ * Orus — Calm, measured male
22
+ * Zephyr — Light, airy, gender-neutral
23
+ * Achernar — Crisp, professional
24
+ * Achird — Warm, conversational
25
+ * Algenib — Bright, energetic
26
+ * Algieba — Smooth, refined
27
+ * Alnilam — Clear, precise
28
+ * Autonoe — Expressive, dynamic
29
+ * Callirhoe — Graceful, flowing
30
+ * Despina — Cheerful, lively
31
+ * Enceladus — Rich, resonant
32
+ * Erinome — Gentle, soothing
33
+ * Gacrux — Steady, reliable
34
+ * Iapetus — Bold, confident
35
+ * Laomedeia — Elegant, poised
36
+ * Pulcherrima — Beautiful, melodic
37
+ * Rasalgethi — Warm, inviting
38
+ * Sadachbia — Calm, reassuring
39
+ * Sadaltager — Neutral, versatile
40
+ * Schedar — Crisp, articulate
41
+ * Sulafar — Deep, thoughtful
42
+ * Umbriel — Soft, subtle
43
+ * Vindemiatrix — Bright, clear
44
+ * Zubenelgenubi — Unique, distinctive
45
+ *
46
+ * Supports 24+ languages including Korean (ko-KR).
47
+ * Natural language prompts can control style, tone, pace, and emotion
48
+ * via the `prompt` field in the input object.
49
+ *
50
+ * API Endpoint: POST https://texttospeech.googleapis.com/v1/text:synthesize?key={apiKey}
51
+ * Docs: https://cloud.google.com/text-to-speech/docs/reference/rest
52
+ */
53
+ import type { TtsAdapter, TtsOptions, VoiceInfo } from 'dvgateway-sdk';
54
+ export type GeminiTtsVoice = 'Kore' | 'Puck' | 'Aoede' | 'Charon' | 'Fenrir' | 'Leda' | 'Orus' | 'Zephyr' | 'Achernar' | 'Achird' | 'Algenib' | 'Algieba' | 'Alnilam' | 'Autonoe' | 'Callirhoe' | 'Despina' | 'Enceladus' | 'Erinome' | 'Gacrux' | 'Iapetus' | 'Laomedeia' | 'Pulcherrima' | 'Rasalgethi' | 'Sadachbia' | 'Sadaltager' | 'Schedar' | 'Sulafar' | 'Umbriel' | 'Vindemiatrix' | 'Zubenelgenubi';
55
+ export type GeminiTtsModel = 'gemini-2.5-flash-tts' | 'gemini-2.5-pro-tts';
56
+ export interface GeminiTtsAdapterOptions {
57
+ /** Google Cloud API key */
58
+ apiKey: string;
59
+ /** Voice name (default: "Kore") — see voice options above */
60
+ voice?: GeminiTtsVoice;
61
+ /**
62
+ * Model (default: "gemini-2.5-flash-tts")
63
+ * gemini-2.5-flash-tts — Low-latency, real-time optimized
64
+ * gemini-2.5-pro-tts — Highest quality, richer prosody
65
+ */
66
+ model?: GeminiTtsModel;
67
+ /**
68
+ * BCP-47 language code (default: "ko-KR")
69
+ * Examples: "en-US", "ja-JP", "zh-CN", "ko-KR"
70
+ */
71
+ languageCode?: string;
72
+ /**
73
+ * Natural language prompt for style control.
74
+ * Controls tone, pace, emotion, and speaking style.
75
+ * E.g. "Speak warmly and calmly, with gentle pauses between sentences."
76
+ */
77
+ prompt?: string;
78
+ }
79
+ /** All available Gemini TTS voices with descriptive labels */
80
+ export declare const GEMINI_TTS_VOICES: ReadonlyArray<VoiceInfo>;
81
+ export declare class GeminiTtsAdapter implements TtsAdapter {
82
+ private readonly opts;
83
+ constructor(opts: GeminiTtsAdapterOptions);
84
+ synthesize(text: string, options?: TtsOptions): AsyncIterable<Buffer>;
85
+ }
86
+ //# sourceMappingURL=gemini-tts.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"gemini-tts.d.ts","sourceRoot":"","sources":["../../src/tts/gemini-tts.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAmDG;AAGH,OAAO,KAAK,EAAE,UAAU,EAAE,UAAU,EAAE,SAAS,EAAE,MAAM,eAAe,CAAC;AAEvE,MAAM,MAAM,cAAc,GACtB,MAAM,GAAG,MAAM,GAAG,OAAO,GAAG,QAAQ,GAAG,QAAQ,GAAG,MAAM,GAAG,MAAM,GAAG,QAAQ,GAC5E,UAAU,GAAG,QAAQ,GAAG,SAAS,GAAG,SAAS,GAAG,SAAS,GAAG,SAAS,GACrE,WAAW,GAAG,SAAS,GAAG,WAAW,GAAG,SAAS,GAAG,QAAQ,GAAG,SAAS,GACxE,WAAW,GAAG,aAAa,GAAG,YAAY,GAAG,WAAW,GAAG,YAAY,GACvE,SAAS,GAAG,SAAS,GAAG,SAAS,GAAG,cAAc,GAAG,eAAe,CAAC;AAEzE,MAAM,MAAM,cAAc,GAAG,sBAAsB,GAAG,oBAAoB,CAAC;AAE3E,MAAM,WAAW,uBAAuB;IACtC,2BAA2B;IAC3B,MAAM,EAAE,MAAM,CAAC;IACf,6DAA6D;IAC7D,KAAK,CAAC,EAAE,cAAc,CAAC;IACvB;;;;OAIG;IACH,KAAK,CAAC,EAAE,cAAc,CAAC;IACvB;;;OAGG;IACH,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB;;;;OAIG;IACH,MAAM,CAAC,EAAE,MAAM,CAAC;CACjB;AAED,8DAA8D;AAC9D,eAAO,MAAM,iBAAiB,EAAE,aAAa,CAAC,SAAS,CA+B7C,CAAC;AAMX,qBAAa,gBAAiB,YAAW,UAAU;IACjD,OAAO,CAAC,QAAQ,CAAC,IAAI,CAA0E;gBAEnF,IAAI,EAAE,uBAAuB;IAUlC,UAAU,CAAC,IAAI,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE,UAAU,GAAG,aAAa,CAAC,MAAM,CAAC;CAuD7E"}
@@ -0,0 +1,186 @@
1
+ /**
2
+ * Google Gemini TTS Adapter
3
+ *
4
+ * Uses Google Cloud Text-to-Speech API with Gemini models for speech synthesis.
5
+ * Returns 16kHz slin16 PCM chunks for direct injection into DVGateway.
6
+ *
7
+ * The API returns base64-encoded MP3 audio which is decoded and converted
8
+ * to 16kHz 16-bit PCM via ffmpeg subprocess for DVGateway compatibility.
9
+ *
10
+ * Model Reference (2026-03):
11
+ * gemini-2.5-flash-tts — Low-latency, optimized for real-time voice (default)
12
+ * gemini-2.5-pro-tts — Highest quality, richer prosody and expressiveness
13
+ *
14
+ * Voice options (30 voices):
15
+ * Kore — Recommended female voice (natural, warm)
16
+ * Puck — Recommended male voice (clear, friendly)
17
+ * Aoede — Melodic, expressive female
18
+ * Charon — Deep, authoritative male
19
+ * Fenrir — Strong, commanding male
20
+ * Leda — Soft, gentle female
21
+ * Orus — Calm, measured male
22
+ * Zephyr — Light, airy, gender-neutral
23
+ * Achernar — Crisp, professional
24
+ * Achird — Warm, conversational
25
+ * Algenib — Bright, energetic
26
+ * Algieba — Smooth, refined
27
+ * Alnilam — Clear, precise
28
+ * Autonoe — Expressive, dynamic
29
+ * Callirhoe — Graceful, flowing
30
+ * Despina — Cheerful, lively
31
+ * Enceladus — Rich, resonant
32
+ * Erinome — Gentle, soothing
33
+ * Gacrux — Steady, reliable
34
+ * Iapetus — Bold, confident
35
+ * Laomedeia — Elegant, poised
36
+ * Pulcherrima — Beautiful, melodic
37
+ * Rasalgethi — Warm, inviting
38
+ * Sadachbia — Calm, reassuring
39
+ * Sadaltager — Neutral, versatile
40
+ * Schedar — Crisp, articulate
41
+ * Sulafar — Deep, thoughtful
42
+ * Umbriel — Soft, subtle
43
+ * Vindemiatrix — Bright, clear
44
+ * Zubenelgenubi — Unique, distinctive
45
+ *
46
+ * Supports 24+ languages including Korean (ko-KR).
47
+ * Natural language prompts can control style, tone, pace, and emotion
48
+ * via the `prompt` field in the input object.
49
+ *
50
+ * API Endpoint: POST https://texttospeech.googleapis.com/v1/text:synthesize?key={apiKey}
51
+ * Docs: https://cloud.google.com/text-to-speech/docs/reference/rest
52
+ */
53
+ import { spawn } from 'node:child_process';
54
+ /** All available Gemini TTS voices with descriptive labels */
55
+ export const GEMINI_TTS_VOICES = [
56
+ { id: 'Kore', label: 'Kore (recommended female, natural)' },
57
+ { id: 'Puck', label: 'Puck (recommended male, clear)' },
58
+ { id: 'Aoede', label: 'Aoede (melodic, expressive)' },
59
+ { id: 'Charon', label: 'Charon (deep, authoritative)' },
60
+ { id: 'Fenrir', label: 'Fenrir (strong, commanding)' },
61
+ { id: 'Leda', label: 'Leda (soft, gentle)' },
62
+ { id: 'Orus', label: 'Orus (calm, measured)' },
63
+ { id: 'Zephyr', label: 'Zephyr (light, airy)' },
64
+ { id: 'Achernar', label: 'Achernar (crisp, professional)' },
65
+ { id: 'Achird', label: 'Achird (warm, conversational)' },
66
+ { id: 'Algenib', label: 'Algenib (bright, energetic)' },
67
+ { id: 'Algieba', label: 'Algieba (smooth, refined)' },
68
+ { id: 'Alnilam', label: 'Alnilam (clear, precise)' },
69
+ { id: 'Autonoe', label: 'Autonoe (expressive, dynamic)' },
70
+ { id: 'Callirhoe', label: 'Callirhoe (graceful, flowing)' },
71
+ { id: 'Despina', label: 'Despina (cheerful, lively)' },
72
+ { id: 'Enceladus', label: 'Enceladus (rich, resonant)' },
73
+ { id: 'Erinome', label: 'Erinome (gentle, soothing)' },
74
+ { id: 'Gacrux', label: 'Gacrux (steady, reliable)' },
75
+ { id: 'Iapetus', label: 'Iapetus (bold, confident)' },
76
+ { id: 'Laomedeia', label: 'Laomedeia (elegant, poised)' },
77
+ { id: 'Pulcherrima', label: 'Pulcherrima (beautiful, melodic)' },
78
+ { id: 'Rasalgethi', label: 'Rasalgethi (warm, inviting)' },
79
+ { id: 'Sadachbia', label: 'Sadachbia (calm, reassuring)' },
80
+ { id: 'Sadaltager', label: 'Sadaltager (neutral, versatile)' },
81
+ { id: 'Schedar', label: 'Schedar (crisp, articulate)' },
82
+ { id: 'Sulafar', label: 'Sulafar (deep, thoughtful)' },
83
+ { id: 'Umbriel', label: 'Umbriel (soft, subtle)' },
84
+ { id: 'Vindemiatrix', label: 'Vindemiatrix (bright, clear)' },
85
+ { id: 'Zubenelgenubi', label: 'Zubenelgenubi (unique, distinctive)' },
86
+ ];
87
+ const API_BASE_URL = 'https://texttospeech.googleapis.com/v1/text:synthesize';
88
+ const DV_SAMPLE_RATE = 16000;
89
+ const PCM_CHUNK_BYTES = 640; // 20ms at 16kHz, 16-bit PCM (16000 * 2 * 0.02)
90
+ export class GeminiTtsAdapter {
91
+ opts;
92
+ constructor(opts) {
93
+ this.opts = {
94
+ apiKey: opts.apiKey,
95
+ voice: opts.voice ?? 'Kore',
96
+ model: opts.model ?? 'gemini-2.5-flash-tts',
97
+ languageCode: opts.languageCode ?? 'ko-KR',
98
+ prompt: opts.prompt,
99
+ };
100
+ }
101
+ async *synthesize(text, options) {
102
+ const voice = options?.voiceId ?? this.opts.voice;
103
+ // Build request body for Google Cloud Text-to-Speech API
104
+ const input = { text };
105
+ if (this.opts.prompt) {
106
+ input.prompt = this.opts.prompt;
107
+ }
108
+ const requestBody = {
109
+ input,
110
+ voice: {
111
+ languageCode: this.opts.languageCode,
112
+ name: voice,
113
+ modelName: this.opts.model,
114
+ },
115
+ audioConfig: {
116
+ audioEncoding: 'MP3',
117
+ },
118
+ };
119
+ const url = `${API_BASE_URL}?key=${this.opts.apiKey}`;
120
+ const response = await fetch(url, {
121
+ method: 'POST',
122
+ headers: { 'Content-Type': 'application/json' },
123
+ body: JSON.stringify(requestBody),
124
+ });
125
+ if (!response.ok) {
126
+ const errorBody = await response.text();
127
+ throw new Error(`Gemini TTS API error (${response.status}): ${errorBody}`);
128
+ }
129
+ const data = (await response.json());
130
+ if (!data.audioContent) {
131
+ throw new Error('Gemini TTS API returned empty audioContent');
132
+ }
133
+ // Decode base64 MP3 audio
134
+ const mp3Buffer = Buffer.from(data.audioContent, 'base64');
135
+ // Convert MP3 to 16kHz 16-bit signed little-endian PCM via ffmpeg
136
+ const pcmBuffer = await mp3ToPcm16k(mp3Buffer);
137
+ // Yield PCM chunks (20ms frames)
138
+ let offset = 0;
139
+ while (offset < pcmBuffer.length) {
140
+ const end = Math.min(offset + PCM_CHUNK_BYTES, pcmBuffer.length);
141
+ yield pcmBuffer.subarray(offset, end);
142
+ offset = end;
143
+ }
144
+ }
145
+ }
146
+ /**
147
+ * Convert MP3 audio buffer to 16kHz 16-bit signed little-endian PCM
148
+ * using ffmpeg subprocess. This follows the same pattern used by the
149
+ * Go gateway for audio format conversion.
150
+ */
151
+ function mp3ToPcm16k(mp3Data) {
152
+ return new Promise((resolve, reject) => {
153
+ const ffmpeg = spawn('ffmpeg', [
154
+ '-i', 'pipe:0', // Read from stdin
155
+ '-f', 's16le', // Output format: signed 16-bit little-endian
156
+ '-ar', String(DV_SAMPLE_RATE), // Sample rate: 16kHz
157
+ '-ac', '1', // Mono channel
158
+ '-acodec', 'pcm_s16le', // PCM codec
159
+ 'pipe:1', // Write to stdout
160
+ ], {
161
+ stdio: ['pipe', 'pipe', 'pipe'],
162
+ });
163
+ const chunks = [];
164
+ let stderrOutput = '';
165
+ ffmpeg.stdout.on('data', (chunk) => {
166
+ chunks.push(chunk);
167
+ });
168
+ ffmpeg.stderr.on('data', (data) => {
169
+ stderrOutput += data.toString();
170
+ });
171
+ ffmpeg.on('close', (code) => {
172
+ if (code !== 0) {
173
+ reject(new Error(`ffmpeg MP3→PCM conversion failed (exit code ${code}): ${stderrOutput}`));
174
+ return;
175
+ }
176
+ resolve(Buffer.concat(chunks));
177
+ });
178
+ ffmpeg.on('error', (err) => {
179
+ reject(new Error(`Failed to spawn ffmpeg: ${err.message}`));
180
+ });
181
+ // Write MP3 data to ffmpeg stdin and close
182
+ ffmpeg.stdin.write(mp3Data);
183
+ ffmpeg.stdin.end();
184
+ });
185
+ }
186
+ //# sourceMappingURL=gemini-tts.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"gemini-tts.js","sourceRoot":"","sources":["../../src/tts/gemini-tts.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAmDG;AAEH,OAAO,EAAE,KAAK,EAAE,MAAM,oBAAoB,CAAC;AAoC3C,8DAA8D;AAC9D,MAAM,CAAC,MAAM,iBAAiB,GAA6B;IACzD,EAAE,EAAE,EAAE,MAAM,EAAW,KAAK,EAAE,oCAAoC,EAAE;IACpE,EAAE,EAAE,EAAE,MAAM,EAAW,KAAK,EAAE,gCAAgC,EAAE;IAChE,EAAE,EAAE,EAAE,OAAO,EAAU,KAAK,EAAE,6BAA6B,EAAE;IAC7D,EAAE,EAAE,EAAE,QAAQ,EAAS,KAAK,EAAE,8BAA8B,EAAE;IAC9D,EAAE,EAAE,EAAE,QAAQ,EAAS,KAAK,EAAE,6BAA6B,EAAE;IAC7D,EAAE,EAAE,EAAE,MAAM,EAAW,KAAK,EAAE,qBAAqB,EAAE;IACrD,EAAE,EAAE,EAAE,MAAM,EAAW,KAAK,EAAE,uBAAuB,EAAE;IACvD,EAAE,EAAE,EAAE,QAAQ,EAAS,KAAK,EAAE,sBAAsB,EAAE;IACtD,EAAE,EAAE,EAAE,UAAU,EAAO,KAAK,EAAE,gCAAgC,EAAE;IAChE,EAAE,EAAE,EAAE,QAAQ,EAAS,KAAK,EAAE,+BAA+B,EAAE;IAC/D,EAAE,EAAE,EAAE,SAAS,EAAQ,KAAK,EAAE,6BAA6B,EAAE;IAC7D,EAAE,EAAE,EAAE,SAAS,EAAQ,KAAK,EAAE,2BAA2B,EAAE;IAC3D,EAAE,EAAE,EAAE,SAAS,EAAQ,KAAK,EAAE,0BAA0B,EAAE;IAC1D,EAAE,EAAE,EAAE,SAAS,EAAQ,KAAK,EAAE,+BAA+B,EAAE;IAC/D,EAAE,EAAE,EAAE,WAAW,EAAM,KAAK,EAAE,+BAA+B,EAAE;IAC/D,EAAE,EAAE,EAAE,SAAS,EAAQ,KAAK,EAAE,4BAA4B,EAAE;IAC5D,EAAE,EAAE,EAAE,WAAW,EAAM,KAAK,EAAE,4BAA4B,EAAE;IAC5D,EAAE,EAAE,EAAE,SAAS,EAAQ,KAAK,EAAE,4BAA4B,EAAE;IAC5D,EAAE,EAAE,EAAE,QAAQ,EAAS,KAAK,EAAE,2BAA2B,EAAE;IAC3D,EAAE,EAAE,EAAE,SAAS,EAAQ,KAAK,EAAE,2BAA2B,EAAE;IAC3D,EAAE,EAAE,EAAE,WAAW,EAAM,KAAK,EAAE,6BAA6B,EAAE;IAC7D,EAAE,EAAE,EAAE,aAAa,EAAI,KAAK,EAAE,kCAAkC,EAAE;IAClE,EAAE,EAAE,EAAE,YAAY,EAAK,KAAK,EAAE,6BAA6B,EAAE;IAC7D,EAAE,EAAE,EAAE,WAAW,EAAM,KAAK,EAAE,8BAA8B,EAAE;IAC9D,EAAE,EAAE,EAAE,YAAY,EAAK,KAAK,EAAE,iCAAiC,EAAE;IACjE,EAAE,EAAE,EAAE,SAAS,EAAQ,KAAK,EAAE,6BAA6B,EAAE;IAC7D,EAAE,EAAE,EAAE,SAAS,EAAQ,KAAK,EAAE,4BAA4B,EAAE;IAC5D,EAAE,EAAE,EAAE,SAAS,EAAQ,KAAK,EAAE,wBAAwB,EAAE;IACxD,EAAE,EAAE,EAAE,cAAc,EAAG,KAAK,EAAE,8BAA8B,EAAE;IAC9D,EAAE,EAAE,EAAE,eAAe,EAAE,KAAK,EAAE,qCAAqC,EAAE;CAC7D,CAAC;AAEX,MAAM,YAAY,GAAG,wDAAwD,CAAC;AAC9E,MAAM,cAAc,GAAG,KAAK,CAAC;AAC7B,MAAM,eAAe,GAAG,GAAG,CAAC,CAAC,+CAA+C;AAE5E,MAAM,OAAO,gBAAgB;IACV,IAAI,CAA0E;IAE/F,YAAY,IAA6B;QACvC,IAAI,CAAC,IAAI,GAAG;YACV,MAAM,EAAQ,IAAI,CAAC,MAAM;YACzB,KAAK,EAAS,IAAI,CAAC,KAAK,IAAW,MAAM;YACzC,KAAK,EAAS,IAAI,CAAC,KAAK,IAAW,sBAAsB;YACzD,YAAY,EAAE,IAAI,CAAC,YAAY,IAAI,OAAO;YAC1C,MAAM,EAAQ,IAAI,CAAC,MAAM;SAC1B,CAAC;IACJ,CAAC;IAED,KAAK,CAAC,CAAC,UAAU,CAAC,IAAY,EAAE,OAAoB;QAClD,MAAM,KAAK,GAAI,OAAO,EAAE,OAAsC,IAAI,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC;QAElF,yDAAyD;QACzD,MAAM,KAAK,GAA2B,EAAE,IAAI,EAAE,CAAC;QAC/C,IAAI,IAAI,CAAC,IAAI,CAAC,MAAM,EAAE,CAAC;YACrB,KAAK,CAAC,MAAM,GAAG,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC;QAClC,CAAC;QAED,MAAM,WAAW,GAAG;YAClB,KAAK;YACL,KAAK,EAAE;gBACL,YAAY,EAAE,IAAI,CAAC,IAAI,CAAC,YAAY;gBACpC,IAAI,EAAU,KAAK;gBACnB,SAAS,EAAK,IAAI,CAAC,IAAI,CAAC,KAAK;aAC9B;YACD,WAAW,EAAE;gBACX,aAAa,EAAE,KAAK;aACrB;SACF,CAAC;QAEF,MAAM,GAAG,GAAG,GAAG,YAAY,QAAQ,IAAI,CAAC,IAAI,CAAC,MAAM,EAAE,CAAC;QAEtD,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,GAAG,EAAE;YAChC,MAAM,EAAG,MAAM;YACf,OAAO,EAAE,EAAE,cAAc,EAAE,kBAAkB,EAAE;YAC/C,IAAI,EAAK,IAAI,CAAC,SAAS,CAAC,WAAW,CAAC;SACrC,CAAC,CAAC;QAEH,IAAI,CAAC,QAAQ,CAAC,EAAE,EAAE,CAAC;YACjB,MAAM,SAAS,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAC;YACxC,MAAM,IAAI,KAAK,CACb,yBAAyB,QAAQ,CAAC,MAAM,MAAM,SAAS,EAAE,CAC1D,CAAC;QACJ,CAAC;QAED,MAAM,IAAI,GAAG,CAAC,MAAM,QAAQ,CAAC,IAAI,EAAE,CAA6B,CAAC;QACjE,IAAI,CAAC,IAAI,CAAC,YAAY,EAAE,CAAC;YACvB,MAAM,IAAI,KAAK,CAAC,4CAA4C,CAAC,CAAC;QAChE,CAAC;QAED,0BAA0B;QAC1B,MAAM,SAAS,GAAG,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,YAAY,EAAE,QAAQ,CAAC,CAAC;QAE3D,kEAAkE;QAClE,MAAM,SAAS,GAAG,MAAM,WAAW,CAAC,SAAS,CAAC,CAAC;QAE/C,iCAAiC;QACjC,IAAI,MAAM,GAAG,CAAC,CAAC;QACf,OAAO,MAAM,GAAG,SAAS,CAAC,MAAM,EAAE,CAAC;YACjC,MAAM,GAAG,GAAG,IAAI,CAAC,GAAG,CAAC,MAAM,GAAG,eAAe,EAAE,SAAS,CAAC,MAAM,CAAC,CAAC;YACjE,MAAM,SAAS,CAAC,QAAQ,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;YACtC,MAAM,GAAG,GAAG,CAAC;QACf,CAAC;IACH,CAAC;CACF;AAED;;;;GAIG;AACH,SAAS,WAAW,CAAC,OAAe;IAClC,OAAO,IAAI,OAAO,CAAC,CAAC,OAAO,EAAE,MAAM,EAAE,EAAE;QACrC,MAAM,MAAM,GAAG,KAAK,CAAC,QAAQ,EAAE;YAC7B,IAAI,EAAE,QAAQ,EAAY,kBAAkB;YAC5C,IAAI,EAAE,OAAO,EAAa,6CAA6C;YACvE,KAAK,EAAE,MAAM,CAAC,cAAc,CAAC,EAAE,qBAAqB;YACpD,KAAK,EAAE,GAAG,EAAgB,eAAe;YACzC,SAAS,EAAE,WAAW,EAAI,YAAY;YACtC,QAAQ,EAAkB,kBAAkB;SAC7C,EAAE;YACD,KAAK,EAAE,CAAC,MAAM,EAAE,MAAM,EAAE,MAAM,CAAC;SAChC,CAAC,CAAC;QAEH,MAAM,MAAM,GAAa,EAAE,CAAC;QAC5B,IAAI,YAAY,GAAG,EAAE,CAAC;QAEtB,MAAM,CAAC,MAAM,CAAC,EAAE,CAAC,MAAM,EAAE,CAAC,KAAa,EAAE,EAAE;YACzC,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;QACrB,CAAC,CAAC,CAAC;QAEH,MAAM,CAAC,MAAM,CAAC,EAAE,CAAC,MAAM,EAAE,CAAC,IAAY,EAAE,EAAE;YACxC,YAAY,IAAI,IAAI,CAAC,QAAQ,EAAE,CAAC;QAClC,CAAC,CAAC,CAAC;QAEH,MAAM,CAAC,EAAE,CAAC,OAAO,EAAE,CAAC,IAAI,EAAE,EAAE;YAC1B,IAAI,IAAI,KAAK,CAAC,EAAE,CAAC;gBACf,MAAM,CAAC,IAAI,KAAK,CACd,+CAA+C,IAAI,MAAM,YAAY,EAAE,CACxE,CAAC,CAAC;gBACH,OAAO;YACT,CAAC;YACD,OAAO,CAAC,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC;QACjC,CAAC,CAAC,CAAC;QAEH,MAAM,CAAC,EAAE,CAAC,OAAO,EAAE,CAAC,GAAG,EAAE,EAAE;YACzB,MAAM,CAAC,IAAI,KAAK,CAAC,2BAA2B,GAAG,CAAC,OAAO,EAAE,CAAC,CAAC,CAAC;QAC9D,CAAC,CAAC,CAAC;QAEH,2CAA2C;QAC3C,MAAM,CAAC,KAAK,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;QAC5B,MAAM,CAAC,KAAK,CAAC,GAAG,EAAE,CAAC;IACrB,CAAC,CAAC,CAAC;AACL,CAAC"}
@@ -2,6 +2,10 @@ export { ElevenLabsAdapter, ELEVENLABS_KOREAN_VOICES } from './elevenlabs.js';
2
2
  export type { ElevenLabsAdapterOptions } from './elevenlabs.js';
3
3
  export { OpenAITtsAdapter } from './openai-tts.js';
4
4
  export type { OpenAITtsAdapterOptions, OpenAITtsVoice, OpenAITtsModel } from './openai-tts.js';
5
+ export { GeminiTtsAdapter, GEMINI_TTS_VOICES } from './gemini-tts.js';
6
+ export type { GeminiTtsAdapterOptions, GeminiTtsVoice, GeminiTtsModel } from './gemini-tts.js';
5
7
  export { CachedTtsAdapter } from './cached-tts.js';
6
8
  export type { CachedTtsAdapterOptions, WarmupEntry } from './cached-tts.js';
9
+ export { CosyVoiceAdapter, COSYVOICE_VOICES } from './cosyvoice-tts.js';
10
+ export type { CosyVoiceAdapterOptions, CosyVoiceVoice, CosyVoiceModel } from './cosyvoice-tts.js';
7
11
  //# sourceMappingURL=index.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/tts/index.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,iBAAiB,EAAE,wBAAwB,EAAE,MAAM,iBAAiB,CAAC;AAC9E,YAAY,EAAE,wBAAwB,EAAE,MAAM,iBAAiB,CAAC;AAEhE,OAAO,EAAE,gBAAgB,EAAE,MAAM,iBAAiB,CAAC;AACnD,YAAY,EAAE,uBAAuB,EAAE,cAAc,EAAE,cAAc,EAAE,MAAM,iBAAiB,CAAC;AAE/F,OAAO,EAAE,gBAAgB,EAAE,MAAM,iBAAiB,CAAC;AACnD,YAAY,EAAE,uBAAuB,EAAE,WAAW,EAAE,MAAM,iBAAiB,CAAC"}
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/tts/index.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,iBAAiB,EAAE,wBAAwB,EAAE,MAAM,iBAAiB,CAAC;AAC9E,YAAY,EAAE,wBAAwB,EAAE,MAAM,iBAAiB,CAAC;AAEhE,OAAO,EAAE,gBAAgB,EAAE,MAAM,iBAAiB,CAAC;AACnD,YAAY,EAAE,uBAAuB,EAAE,cAAc,EAAE,cAAc,EAAE,MAAM,iBAAiB,CAAC;AAE/F,OAAO,EAAE,gBAAgB,EAAE,iBAAiB,EAAE,MAAM,iBAAiB,CAAC;AACtE,YAAY,EAAE,uBAAuB,EAAE,cAAc,EAAE,cAAc,EAAE,MAAM,iBAAiB,CAAC;AAE/F,OAAO,EAAE,gBAAgB,EAAE,MAAM,iBAAiB,CAAC;AACnD,YAAY,EAAE,uBAAuB,EAAE,WAAW,EAAE,MAAM,iBAAiB,CAAC;AAE5E,OAAO,EAAE,gBAAgB,EAAE,gBAAgB,EAAE,MAAM,oBAAoB,CAAC;AACxE,YAAY,EAAE,uBAAuB,EAAE,cAAc,EAAE,cAAc,EAAE,MAAM,oBAAoB,CAAC"}
package/dist/tts/index.js CHANGED
@@ -1,5 +1,7 @@
1
1
  // TTS (Text-to-Speech) adapters
2
2
  export { ElevenLabsAdapter, ELEVENLABS_KOREAN_VOICES } from './elevenlabs.js';
3
3
  export { OpenAITtsAdapter } from './openai-tts.js';
4
+ export { GeminiTtsAdapter, GEMINI_TTS_VOICES } from './gemini-tts.js';
4
5
  export { CachedTtsAdapter } from './cached-tts.js';
6
+ export { CosyVoiceAdapter, COSYVOICE_VOICES } from './cosyvoice-tts.js';
5
7
  //# sourceMappingURL=index.js.map
@@ -1 +1 @@
1
- {"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/tts/index.ts"],"names":[],"mappings":"AAAA,gCAAgC;AAChC,OAAO,EAAE,iBAAiB,EAAE,wBAAwB,EAAE,MAAM,iBAAiB,CAAC;AAG9E,OAAO,EAAE,gBAAgB,EAAE,MAAM,iBAAiB,CAAC;AAGnD,OAAO,EAAE,gBAAgB,EAAE,MAAM,iBAAiB,CAAC"}
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/tts/index.ts"],"names":[],"mappings":"AAAA,gCAAgC;AAChC,OAAO,EAAE,iBAAiB,EAAE,wBAAwB,EAAE,MAAM,iBAAiB,CAAC;AAG9E,OAAO,EAAE,gBAAgB,EAAE,MAAM,iBAAiB,CAAC;AAGnD,OAAO,EAAE,gBAAgB,EAAE,iBAAiB,EAAE,MAAM,iBAAiB,CAAC;AAGtE,OAAO,EAAE,gBAAgB,EAAE,MAAM,iBAAiB,CAAC;AAGnD,OAAO,EAAE,gBAAgB,EAAE,gBAAgB,EAAE,MAAM,oBAAoB,CAAC"}
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "dvgateway-adapters",
3
- "version": "1.1.4",
3
+ "version": "1.1.6",
4
4
  "description": "AI service adapters for DVGateway SDK (Deepgram, ElevenLabs, Anthropic, OpenAI, OpenAI Realtime)",
5
5
  "type": "module",
6
6
  "main": "./dist/index.js",