dvgateway-adapters 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +45 -0
- package/dist/index.d.ts +41 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +37 -0
- package/dist/index.js.map +1 -0
- package/dist/llm/anthropic.d.ts +62 -0
- package/dist/llm/anthropic.d.ts.map +1 -0
- package/dist/llm/anthropic.js +83 -0
- package/dist/llm/anthropic.js.map +1 -0
- package/dist/llm/index.d.ts +5 -0
- package/dist/llm/index.d.ts.map +1 -0
- package/dist/llm/index.js +4 -0
- package/dist/llm/index.js.map +1 -0
- package/dist/llm/openai-llm.d.ts +55 -0
- package/dist/llm/openai-llm.d.ts.map +1 -0
- package/dist/llm/openai-llm.js +68 -0
- package/dist/llm/openai-llm.js.map +1 -0
- package/dist/realtime/index.d.ts +3 -0
- package/dist/realtime/index.d.ts.map +1 -0
- package/dist/realtime/index.js +3 -0
- package/dist/realtime/index.js.map +1 -0
- package/dist/realtime/openai-realtime.d.ts +132 -0
- package/dist/realtime/openai-realtime.d.ts.map +1 -0
- package/dist/realtime/openai-realtime.js +261 -0
- package/dist/realtime/openai-realtime.js.map +1 -0
- package/dist/stt/deepgram.d.ts +105 -0
- package/dist/stt/deepgram.d.ts.map +1 -0
- package/dist/stt/deepgram.js +180 -0
- package/dist/stt/deepgram.js.map +1 -0
- package/dist/stt/index.d.ts +3 -0
- package/dist/stt/index.d.ts.map +1 -0
- package/dist/stt/index.js +3 -0
- package/dist/stt/index.js.map +1 -0
- package/dist/tts/cached-tts.d.ts +131 -0
- package/dist/tts/cached-tts.d.ts.map +1 -0
- package/dist/tts/cached-tts.js +231 -0
- package/dist/tts/cached-tts.js.map +1 -0
- package/dist/tts/elevenlabs.d.ts +95 -0
- package/dist/tts/elevenlabs.d.ts.map +1 -0
- package/dist/tts/elevenlabs.js +195 -0
- package/dist/tts/elevenlabs.js.map +1 -0
- package/dist/tts/index.d.ts +7 -0
- package/dist/tts/index.d.ts.map +1 -0
- package/dist/tts/index.js +5 -0
- package/dist/tts/index.js.map +1 -0
- package/dist/tts/openai-tts.d.ts +64 -0
- package/dist/tts/openai-tts.d.ts.map +1 -0
- package/dist/tts/openai-tts.js +148 -0
- package/dist/tts/openai-tts.js.map +1 -0
- package/package.json +89 -0
package/README.md
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
# dvgateway-adapters
|
|
2
|
+
|
|
3
|
+
DVGateway SDK용 AI 서비스 어댑터 패키지입니다.
|
|
4
|
+
|
|
5
|
+
## 설치
|
|
6
|
+
|
|
7
|
+
```bash
|
|
8
|
+
npm install dvgateway-sdk dvgateway-adapters
|
|
9
|
+
```
|
|
10
|
+
|
|
11
|
+
## 포함된 어댑터
|
|
12
|
+
|
|
13
|
+
| 분류 | 어댑터 | 서비스 |
|
|
14
|
+
|------|--------|--------|
|
|
15
|
+
| STT | `DeepgramAdapter` | Deepgram Nova-3 |
|
|
16
|
+
| TTS | `ElevenLabsAdapter` | ElevenLabs Flash v2.5 |
|
|
17
|
+
| TTS | `OpenAITtsAdapter` | OpenAI gpt-4o-mini-tts |
|
|
18
|
+
| TTS | `CachedTtsAdapter` | 디스크 기반 TTS 캐시 래퍼 |
|
|
19
|
+
| LLM | `AnthropicAdapter` | Anthropic Claude |
|
|
20
|
+
| LLM | `OpenAILlmAdapter` | OpenAI GPT |
|
|
21
|
+
| Realtime | `OpenAIRealtimeAdapter` | OpenAI Realtime (Speech-to-Speech) |
|
|
22
|
+
|
|
23
|
+
## 사용 예시
|
|
24
|
+
|
|
25
|
+
```typescript
|
|
26
|
+
import { DVGatewayClient } from 'dvgateway-sdk';
|
|
27
|
+
import { DeepgramAdapter } from 'dvgateway-adapters/stt';
|
|
28
|
+
import { AnthropicAdapter } from 'dvgateway-adapters/llm';
|
|
29
|
+
import { ElevenLabsAdapter } from 'dvgateway-adapters/tts';
|
|
30
|
+
|
|
31
|
+
const gw = new DVGatewayClient({
|
|
32
|
+
baseUrl: 'http://localhost:8080',
|
|
33
|
+
auth: { type: 'apiKey', apiKey: 'your_key' },
|
|
34
|
+
});
|
|
35
|
+
|
|
36
|
+
await gw.pipeline()
|
|
37
|
+
.stt(new DeepgramAdapter({ apiKey: '...', language: 'ko' }))
|
|
38
|
+
.llm(new AnthropicAdapter({ apiKey: '...', model: 'claude-sonnet-4-6' }))
|
|
39
|
+
.tts(new ElevenLabsAdapter({ apiKey: '...' }))
|
|
40
|
+
.start();
|
|
41
|
+
```
|
|
42
|
+
|
|
43
|
+
## 라이선스
|
|
44
|
+
|
|
45
|
+
MIT
|
package/dist/index.d.ts
ADDED
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* DVGateway AI Adapters
|
|
3
|
+
*
|
|
4
|
+
* Pre-built adapters for popular AI services.
|
|
5
|
+
* Import only what you need — each adapter is tree-shakeable.
|
|
6
|
+
*
|
|
7
|
+
* Adapter overview:
|
|
8
|
+
* STT (Speech-to-Text) — Deepgram Nova-3
|
|
9
|
+
* TTS (Text-to-Speech) — ElevenLabs Flash v2.5, OpenAI TTS
|
|
10
|
+
* LLM (Language Model) — Anthropic Claude, OpenAI GPT
|
|
11
|
+
* Realtime (Speech-to-Speech) — OpenAI Realtime API (audio 1.5)
|
|
12
|
+
*
|
|
13
|
+
* @example Standard cascaded pipeline (STT → LLM → TTS)
|
|
14
|
+
* ```typescript
|
|
15
|
+
* import { DeepgramAdapter } from 'dvgateway-adapters/stt';
|
|
16
|
+
* import { ElevenLabsAdapter } from 'dvgateway-adapters/tts';
|
|
17
|
+
* import { AnthropicAdapter } from 'dvgateway-adapters/llm';
|
|
18
|
+
* ```
|
|
19
|
+
*
|
|
20
|
+
* @example Direct speech-to-speech (lower latency)
|
|
21
|
+
* ```typescript
|
|
22
|
+
* import { OpenAIRealtimeAdapter } from 'dvgateway-adapters/realtime';
|
|
23
|
+
* ```
|
|
24
|
+
*/
|
|
25
|
+
export { DeepgramAdapter } from './stt/deepgram.js';
|
|
26
|
+
export type { DeepgramAdapterOptions } from './stt/deepgram.js';
|
|
27
|
+
export type { HumanVoiceOptions, SttOptions, TtsOptions, } from 'dvgateway-sdk';
|
|
28
|
+
export { HUMAN_VOICE_DEFAULTS_KO, HUMAN_VOICE_DEFAULTS_EN, } from 'dvgateway-sdk';
|
|
29
|
+
export { ElevenLabsAdapter, ELEVENLABS_KOREAN_VOICES } from './tts/elevenlabs.js';
|
|
30
|
+
export type { ElevenLabsAdapterOptions } from './tts/elevenlabs.js';
|
|
31
|
+
export { OpenAITtsAdapter } from './tts/openai-tts.js';
|
|
32
|
+
export type { OpenAITtsAdapterOptions, OpenAITtsVoice, OpenAITtsModel, } from './tts/openai-tts.js';
|
|
33
|
+
export { CachedTtsAdapter } from './tts/cached-tts.js';
|
|
34
|
+
export type { CachedTtsAdapterOptions, WarmupEntry, } from './tts/cached-tts.js';
|
|
35
|
+
export { AnthropicAdapter } from './llm/anthropic.js';
|
|
36
|
+
export type { AnthropicAdapterOptions } from './llm/anthropic.js';
|
|
37
|
+
export { OpenAILlmAdapter } from './llm/openai-llm.js';
|
|
38
|
+
export type { OpenAILlmAdapterOptions } from './llm/openai-llm.js';
|
|
39
|
+
export { OpenAIRealtimeAdapter } from './realtime/openai-realtime.js';
|
|
40
|
+
export type { OpenAIRealtimeAdapterOptions, OpenAIRealtimeVoice, OpenAIRealtimeTurnDetectionMode, OpenAIRealtimeTurnDetectionOptions, RealtimeSpeechAdapter, } from './realtime/openai-realtime.js';
|
|
41
|
+
//# sourceMappingURL=index.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;GAuBG;AAGH,OAAO,EAAE,eAAe,EAAE,MAAmB,mBAAmB,CAAC;AACjE,YAAY,EAAE,sBAAsB,EAAE,MAAO,mBAAmB,CAAC;AAGjE,YAAY,EACV,iBAAiB,EACjB,UAAU,EACV,UAAU,GACX,MAA4C,eAAe,CAAC;AAC7D,OAAO,EACL,uBAAuB,EACvB,uBAAuB,GACxB,MAA4C,eAAe,CAAC;AAG7D,OAAO,EAAE,iBAAiB,EAAE,wBAAwB,EAAE,MAAM,qBAAqB,CAAC;AAClF,YAAY,EAAE,wBAAwB,EAAE,MAAoB,qBAAqB,CAAC;AAElF,OAAO,EAAE,gBAAgB,EAAE,MAAsB,qBAAqB,CAAC;AACvE,YAAY,EACV,uBAAuB,EACvB,cAAc,EACd,cAAc,GACf,MAAgD,qBAAqB,CAAC;AAEvE,OAAO,EAAE,gBAAgB,EAAE,MAAsB,qBAAqB,CAAC;AACvE,YAAY,EACV,uBAAuB,EACvB,WAAW,GACZ,MAAgD,qBAAqB,CAAC;AAGvE,OAAO,EAAE,gBAAgB,EAAE,MAAsB,oBAAoB,CAAC;AACtE,YAAY,EAAE,uBAAuB,EAAE,MAAU,oBAAoB,CAAC;AAEtE,OAAO,EAAE,gBAAgB,EAAE,MAAsB,qBAAqB,CAAC;AACvE,YAAY,EAAE,uBAAuB,EAAE,MAAU,qBAAqB,CAAC;AAGvE,OAAO,EAAE,qBAAqB,EAAE,MAAiB,+BAA+B,CAAC;AACjF,YAAY,EACV,4BAA4B,EAC5B,mBAAmB,EACnB,+BAA+B,EAC/B,kCAAkC,EAClC,qBAAqB,GACtB,MAAgD,+BAA+B,CAAC"}
|
package/dist/index.js
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* DVGateway AI Adapters
|
|
3
|
+
*
|
|
4
|
+
* Pre-built adapters for popular AI services.
|
|
5
|
+
* Import only what you need — each adapter is tree-shakeable.
|
|
6
|
+
*
|
|
7
|
+
* Adapter overview:
|
|
8
|
+
* STT (Speech-to-Text) — Deepgram Nova-3
|
|
9
|
+
* TTS (Text-to-Speech) — ElevenLabs Flash v2.5, OpenAI TTS
|
|
10
|
+
* LLM (Language Model) — Anthropic Claude, OpenAI GPT
|
|
11
|
+
* Realtime (Speech-to-Speech) — OpenAI Realtime API (audio 1.5)
|
|
12
|
+
*
|
|
13
|
+
* @example Standard cascaded pipeline (STT → LLM → TTS)
|
|
14
|
+
* ```typescript
|
|
15
|
+
* import { DeepgramAdapter } from 'dvgateway-adapters/stt';
|
|
16
|
+
* import { ElevenLabsAdapter } from 'dvgateway-adapters/tts';
|
|
17
|
+
* import { AnthropicAdapter } from 'dvgateway-adapters/llm';
|
|
18
|
+
* ```
|
|
19
|
+
*
|
|
20
|
+
* @example Direct speech-to-speech (lower latency)
|
|
21
|
+
* ```typescript
|
|
22
|
+
* import { OpenAIRealtimeAdapter } from 'dvgateway-adapters/realtime';
|
|
23
|
+
* ```
|
|
24
|
+
*/
|
|
25
|
+
// ── STT (Speech-to-Text) ──────────────────────────────────────────────────────
|
|
26
|
+
export { DeepgramAdapter } from './stt/deepgram.js';
|
|
27
|
+
export { HUMAN_VOICE_DEFAULTS_KO, HUMAN_VOICE_DEFAULTS_EN, } from 'dvgateway-sdk';
|
|
28
|
+
// ── TTS (Text-to-Speech) ─────────────────────────────────────────────────────
|
|
29
|
+
export { ElevenLabsAdapter, ELEVENLABS_KOREAN_VOICES } from './tts/elevenlabs.js';
|
|
30
|
+
export { OpenAITtsAdapter } from './tts/openai-tts.js';
|
|
31
|
+
export { CachedTtsAdapter } from './tts/cached-tts.js';
|
|
32
|
+
// ── LLM (Language Model) ─────────────────────────────────────────────────────
|
|
33
|
+
export { AnthropicAdapter } from './llm/anthropic.js';
|
|
34
|
+
export { OpenAILlmAdapter } from './llm/openai-llm.js';
|
|
35
|
+
// ── Realtime Speech-to-Speech ─────────────────────────────────────────────────
|
|
36
|
+
export { OpenAIRealtimeAdapter } from './realtime/openai-realtime.js';
|
|
37
|
+
//# sourceMappingURL=index.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;GAuBG;AAEH,iFAAiF;AACjF,OAAO,EAAE,eAAe,EAAE,MAAmB,mBAAmB,CAAC;AASjE,OAAO,EACL,uBAAuB,EACvB,uBAAuB,GACxB,MAA4C,eAAe,CAAC;AAE7D,gFAAgF;AAChF,OAAO,EAAE,iBAAiB,EAAE,wBAAwB,EAAE,MAAM,qBAAqB,CAAC;AAGlF,OAAO,EAAE,gBAAgB,EAAE,MAAsB,qBAAqB,CAAC;AAOvE,OAAO,EAAE,gBAAgB,EAAE,MAAsB,qBAAqB,CAAC;AAMvE,gFAAgF;AAChF,OAAO,EAAE,gBAAgB,EAAE,MAAsB,oBAAoB,CAAC;AAGtE,OAAO,EAAE,gBAAgB,EAAE,MAAsB,qBAAqB,CAAC;AAGvE,iFAAiF;AACjF,OAAO,EAAE,qBAAqB,EAAE,MAAiB,+BAA+B,CAAC"}
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Anthropic Claude LLM Adapter
|
|
3
|
+
*
|
|
4
|
+
* Integrates with the Anthropic Messages API for streaming LLM responses.
|
|
5
|
+
* Implements the LlmAdapter interface — returns streaming tokens as AsyncIterable.
|
|
6
|
+
*
|
|
7
|
+
* Model Reference (2026-03):
|
|
8
|
+
* claude-opus-4-6 — Most capable, best reasoning and instruction following
|
|
9
|
+
* claude-sonnet-4-6 — Best balance of speed and quality (recommended for voice)
|
|
10
|
+
* claude-haiku-4-5-20251001 — Fastest, lowest cost (best latency for voice bots)
|
|
11
|
+
*
|
|
12
|
+
* Latency targets (voice pipeline, TTFT = time to first token):
|
|
13
|
+
* claude-haiku-4-5 — ~80ms TTFT (optimal for real-time voice)
|
|
14
|
+
* claude-sonnet-4-6 — ~120ms TTFT (recommended for complex voice agents)
|
|
15
|
+
* claude-opus-4-6 — ~200ms TTFT (use only for high-complexity tasks)
|
|
16
|
+
*
|
|
17
|
+
* Extended Thinking:
|
|
18
|
+
* claude-sonnet-4-6 and claude-opus-4-6 support extended thinking (budget_tokens).
|
|
19
|
+
* Not recommended for real-time voice (adds significant latency).
|
|
20
|
+
*
|
|
21
|
+
* Uses the official @anthropic-ai/sdk for correct streaming and error handling.
|
|
22
|
+
*
|
|
23
|
+
* API Reference: https://docs.anthropic.com/en/api/messages
|
|
24
|
+
* SDK: https://github.com/anthropics/anthropic-sdk-node
|
|
25
|
+
*/
|
|
26
|
+
import type { LlmAdapter, LlmOptions, Message } from 'dvgateway-sdk';
|
|
27
|
+
export interface AnthropicAdapterOptions {
|
|
28
|
+
apiKey: string;
|
|
29
|
+
/**
|
|
30
|
+
* Model ID (default: "claude-sonnet-4-6" — best voice balance)
|
|
31
|
+
*
|
|
32
|
+
* Available models (2026-03):
|
|
33
|
+
* claude-opus-4-6 — Most capable (complex reasoning)
|
|
34
|
+
* claude-sonnet-4-6 — Balanced speed/quality (recommended for voice)
|
|
35
|
+
* claude-haiku-4-5-20251001 — Fastest (lowest latency, cost-effective)
|
|
36
|
+
*/
|
|
37
|
+
model?: string;
|
|
38
|
+
/** System prompt injected as the first message */
|
|
39
|
+
systemPrompt?: string;
|
|
40
|
+
/** Max output tokens (default: 1024 — keeps voice responses concise) */
|
|
41
|
+
maxTokens?: number;
|
|
42
|
+
/** Temperature 0.0–1.0 (default: 0.7 — balanced creativity) */
|
|
43
|
+
temperature?: number;
|
|
44
|
+
/**
|
|
45
|
+
* Top-P nucleus sampling (default: undefined — uses model default)
|
|
46
|
+
* Cannot be used with temperature simultaneously.
|
|
47
|
+
*/
|
|
48
|
+
topP?: number;
|
|
49
|
+
/**
|
|
50
|
+
* Stop sequences — list of strings that stop generation when encountered.
|
|
51
|
+
* Useful for controlling response boundaries in voice contexts.
|
|
52
|
+
*/
|
|
53
|
+
stopSequences?: string[];
|
|
54
|
+
}
|
|
55
|
+
export declare class AnthropicAdapter implements LlmAdapter {
|
|
56
|
+
private readonly client;
|
|
57
|
+
private readonly opts;
|
|
58
|
+
readonly systemPrompt: string;
|
|
59
|
+
constructor(opts: AnthropicAdapterOptions);
|
|
60
|
+
chat(messages: Message[], opts?: LlmOptions): AsyncIterable<string>;
|
|
61
|
+
}
|
|
62
|
+
//# sourceMappingURL=anthropic.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"anthropic.d.ts","sourceRoot":"","sources":["../../src/llm/anthropic.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;GAwBG;AAGH,OAAO,KAAK,EAAE,UAAU,EAAE,UAAU,EAAE,OAAO,EAAE,MAAM,eAAe,CAAC;AAErE,MAAM,WAAW,uBAAuB;IACtC,MAAM,EAAE,MAAM,CAAC;IACf;;;;;;;OAOG;IACH,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,kDAAkD;IAClD,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,wEAAwE;IACxE,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,+DAA+D;IAC/D,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB;;;OAGG;IACH,IAAI,CAAC,EAAE,MAAM,CAAC;IACd;;;OAGG;IACH,aAAa,CAAC,EAAE,MAAM,EAAE,CAAC;CAC1B;AAED,qBAAa,gBAAiB,YAAW,UAAU;IACjD,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAY;IACnC,OAAO,CAAC,QAAQ,CAAC,IAAI,CAAoC;IAGzD,QAAQ,CAAC,YAAY,EAAE,MAAM,CAAC;gBAElB,IAAI,EAAE,uBAAuB;IAqBlC,IAAI,CAAC,QAAQ,EAAE,OAAO,EAAE,EAAE,IAAI,CAAC,EAAE,UAAU,GAAG,aAAa,CAAC,MAAM,CAAC;CAuC3E"}
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Anthropic Claude LLM Adapter
|
|
3
|
+
*
|
|
4
|
+
* Integrates with the Anthropic Messages API for streaming LLM responses.
|
|
5
|
+
* Implements the LlmAdapter interface — returns streaming tokens as AsyncIterable.
|
|
6
|
+
*
|
|
7
|
+
* Model Reference (2026-03):
|
|
8
|
+
* claude-opus-4-6 — Most capable, best reasoning and instruction following
|
|
9
|
+
* claude-sonnet-4-6 — Best balance of speed and quality (recommended for voice)
|
|
10
|
+
* claude-haiku-4-5-20251001 — Fastest, lowest cost (best latency for voice bots)
|
|
11
|
+
*
|
|
12
|
+
* Latency targets (voice pipeline, TTFT = time to first token):
|
|
13
|
+
* claude-haiku-4-5 — ~80ms TTFT (optimal for real-time voice)
|
|
14
|
+
* claude-sonnet-4-6 — ~120ms TTFT (recommended for complex voice agents)
|
|
15
|
+
* claude-opus-4-6 — ~200ms TTFT (use only for high-complexity tasks)
|
|
16
|
+
*
|
|
17
|
+
* Extended Thinking:
|
|
18
|
+
* claude-sonnet-4-6 and claude-opus-4-6 support extended thinking (budget_tokens).
|
|
19
|
+
* Not recommended for real-time voice (adds significant latency).
|
|
20
|
+
*
|
|
21
|
+
* Uses the official @anthropic-ai/sdk for correct streaming and error handling.
|
|
22
|
+
*
|
|
23
|
+
* API Reference: https://docs.anthropic.com/en/api/messages
|
|
24
|
+
* SDK: https://github.com/anthropics/anthropic-sdk-node
|
|
25
|
+
*/
|
|
26
|
+
import Anthropic from '@anthropic-ai/sdk';
|
|
27
|
+
export class AnthropicAdapter {
|
|
28
|
+
client;
|
|
29
|
+
opts;
|
|
30
|
+
// Expose systemPrompt for pipeline builder to read
|
|
31
|
+
systemPrompt;
|
|
32
|
+
constructor(opts) {
|
|
33
|
+
this.opts = {
|
|
34
|
+
model: opts.model ?? 'claude-sonnet-4-6',
|
|
35
|
+
systemPrompt: opts.systemPrompt ?? 'You are a helpful voice assistant. Keep answers concise and conversational, suitable for text-to-speech.',
|
|
36
|
+
maxTokens: opts.maxTokens ?? 1024,
|
|
37
|
+
temperature: opts.temperature ?? 0.7,
|
|
38
|
+
topP: opts.topP ?? undefined,
|
|
39
|
+
stopSequences: opts.stopSequences ?? [],
|
|
40
|
+
apiKey: opts.apiKey,
|
|
41
|
+
};
|
|
42
|
+
this.systemPrompt = this.opts.systemPrompt;
|
|
43
|
+
this.client = new Anthropic({
|
|
44
|
+
apiKey: opts.apiKey,
|
|
45
|
+
defaultHeaders: {
|
|
46
|
+
'anthropic-version': '2023-06-01',
|
|
47
|
+
},
|
|
48
|
+
});
|
|
49
|
+
}
|
|
50
|
+
async *chat(messages, opts) {
|
|
51
|
+
const model = opts?.model ?? this.opts.model;
|
|
52
|
+
const maxTokens = opts?.maxTokens ?? this.opts.maxTokens;
|
|
53
|
+
const temperature = opts?.temperature ?? this.opts.temperature;
|
|
54
|
+
const systemPrompt = opts?.systemPrompt ?? this.opts.systemPrompt;
|
|
55
|
+
// Separate system messages from conversation
|
|
56
|
+
const userMessages = messages.filter((m) => m.role !== 'system');
|
|
57
|
+
const systemContent = messages.find((m) => m.role === 'system')?.content ?? systemPrompt;
|
|
58
|
+
// Convert to Anthropic message format
|
|
59
|
+
const anthropicMessages = userMessages.map((m) => ({
|
|
60
|
+
role: m.role,
|
|
61
|
+
content: m.content,
|
|
62
|
+
}));
|
|
63
|
+
// Build request params
|
|
64
|
+
const params = {
|
|
65
|
+
model,
|
|
66
|
+
max_tokens: maxTokens,
|
|
67
|
+
system: systemContent,
|
|
68
|
+
messages: anthropicMessages,
|
|
69
|
+
...(temperature !== undefined && { temperature }),
|
|
70
|
+
...(this.opts.topP !== undefined && { top_p: this.opts.topP }),
|
|
71
|
+
...(this.opts.stopSequences.length > 0 && { stop_sequences: this.opts.stopSequences }),
|
|
72
|
+
};
|
|
73
|
+
// Use streaming API — yields tokens as they arrive (low TTFT)
|
|
74
|
+
const stream = this.client.messages.stream(params);
|
|
75
|
+
for await (const event of stream) {
|
|
76
|
+
if (event.type === 'content_block_delta' &&
|
|
77
|
+
event.delta.type === 'text_delta') {
|
|
78
|
+
yield event.delta.text;
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
//# sourceMappingURL=anthropic.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"anthropic.js","sourceRoot":"","sources":["../../src/llm/anthropic.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;GAwBG;AAEH,OAAO,SAAS,MAAM,mBAAmB,CAAC;AAgC1C,MAAM,OAAO,gBAAgB;IACV,MAAM,CAAY;IAClB,IAAI,CAAoC;IAEzD,mDAAmD;IAC1C,YAAY,CAAS;IAE9B,YAAY,IAA6B;QACvC,IAAI,CAAC,IAAI,GAAG;YACV,KAAK,EAAY,IAAI,CAAC,KAAK,IAAa,mBAAmB;YAC3D,YAAY,EAAK,IAAI,CAAC,YAAY,IAAM,0GAA0G;YAClJ,SAAS,EAAQ,IAAI,CAAC,SAAS,IAAS,IAAI;YAC5C,WAAW,EAAM,IAAI,CAAC,WAAW,IAAO,GAAG;YAC3C,IAAI,EAAa,IAAI,CAAC,IAAI,IAAc,SAA8B;YACtE,aAAa,EAAI,IAAI,CAAC,aAAa,IAAK,EAAE;YAC1C,MAAM,EAAW,IAAI,CAAC,MAAM;SAC7B,CAAC;QAEF,IAAI,CAAC,YAAY,GAAG,IAAI,CAAC,IAAI,CAAC,YAAY,CAAC;QAE3C,IAAI,CAAC,MAAM,GAAG,IAAI,SAAS,CAAC;YAC1B,MAAM,EAAG,IAAI,CAAC,MAAM;YACpB,cAAc,EAAE;gBACd,mBAAmB,EAAE,YAAY;aAClC;SACF,CAAC,CAAC;IACL,CAAC;IAED,KAAK,CAAC,CAAC,IAAI,CAAC,QAAmB,EAAE,IAAiB;QAChD,MAAM,KAAK,GAAW,IAAI,EAAE,KAAK,IAAW,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC;QAC5D,MAAM,SAAS,GAAO,IAAI,EAAE,SAAS,IAAO,IAAI,CAAC,IAAI,CAAC,SAAS,CAAC;QAChE,MAAM,WAAW,GAAK,IAAI,EAAE,WAAW,IAAK,IAAI,CAAC,IAAI,CAAC,WAAW,CAAC;QAClE,MAAM,YAAY,GAAI,IAAI,EAAE,YAAY,IAAI,IAAI,CAAC,IAAI,CAAC,YAAY,CAAC;QAEnE,6CAA6C;QAC7C,MAAM,YAAY,GAAI,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,QAAQ,CAAC,CAAC;QAClE,MAAM,aAAa,GAAG,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,QAAQ,CAAC,EAAE,OAAO,IAAI,YAAY,CAAC;QAEzF,sCAAsC;QACtC,MAAM,iBAAiB,GAA6B,YAAY,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;YAC3E,IAAI,EAAK,CAAC,CAAC,IAA4B;YACvC,OAAO,EAAE,CAAC,CAAC,OAAO;SACnB,CAAC,CAAC,CAAC;QAEJ,uBAAuB;QACvB,MAAM,MAAM,GAAkC;YAC5C,KAAK;YACL,UAAU,EAAE,SAAS;YACrB,MAAM,EAAM,aAAa;YACzB,QAAQ,EAAI,iBAAiB;YAC7B,GAAG,CAAC,WAAW,KAAK,SAAS,IAAI,EAAE,WAAW,EAAE,CAAC;YACjD,GAAG,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,KAAK,SAAS,IAAI,EAAE,KAAK,EAAE,IAAI,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC;YAC9D,GAAG,CAAC,IAAI,CAAC,IAAI,CAAC,aAAa,CAAC,MAAM,GAAG,CAAC,IAAI,EAAE,cAAc,EAAE,IAAI,CAAC,IAAI,CAAC,aAAa,EAAE,CAAC;SACvF,CAAC;QAEF,8DAA8D;QAC9D,MAAM,MAAM,GAAG,IAAI,CAAC,MAAM,CAAC,QAAQ,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC;QAEnD,IAAI,KAAK,EAAE,MAAM,KAAK,IAAI,MAAM,EAAE,CAAC;YACjC,IACE,KAAK,CAAC,IAAI,KAAK,qBAAqB;gBACpC,KAAK,CAAC,KAAK,CAAC,IAAI,KAAK,YAAY,EACjC,CAAC;gBACD,MAAM,KAAK,CAAC,KAAK,CAAC,IAAI,CAAC;YACzB,CAAC;QACH,CAAC;IACH,CAAC;CACF"}
|
|
@@ -0,0 +1,5 @@
|
|
|
1
|
+
export { AnthropicAdapter } from './anthropic.js';
|
|
2
|
+
export type { AnthropicAdapterOptions } from './anthropic.js';
|
|
3
|
+
export { OpenAILlmAdapter } from './openai-llm.js';
|
|
4
|
+
export type { OpenAILlmAdapterOptions } from './openai-llm.js';
|
|
5
|
+
//# sourceMappingURL=index.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/llm/index.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,gBAAgB,EAAE,MAAM,gBAAgB,CAAC;AAClD,YAAY,EAAE,uBAAuB,EAAE,MAAM,gBAAgB,CAAC;AAE9D,OAAO,EAAE,gBAAgB,EAAE,MAAM,iBAAiB,CAAC;AACnD,YAAY,EAAE,uBAAuB,EAAE,MAAM,iBAAiB,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/llm/index.ts"],"names":[],"mappings":"AAAA,gCAAgC;AAChC,OAAO,EAAE,gBAAgB,EAAE,MAAM,gBAAgB,CAAC;AAGlD,OAAO,EAAE,gBAAgB,EAAE,MAAM,iBAAiB,CAAC"}
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* OpenAI LLM Adapter
|
|
3
|
+
*
|
|
4
|
+
* Integrates with OpenAI Chat Completions API for streaming responses.
|
|
5
|
+
* Can be used as a primary or fallback LLM.
|
|
6
|
+
*
|
|
7
|
+
* Model Reference (2026-03):
|
|
8
|
+
* gpt-4o-mini — Fastest, cost-effective (default for voice)
|
|
9
|
+
* gpt-4o — Best overall quality, multimodal capable
|
|
10
|
+
* gpt-4o-2024-11-20 — Stable pinned version of gpt-4o
|
|
11
|
+
* o3-mini — Reasoning model; not recommended for real-time voice (high latency)
|
|
12
|
+
* o1-mini — Reasoning model; not recommended for real-time voice (high latency)
|
|
13
|
+
*
|
|
14
|
+
* Latency targets (voice pipeline, TTFT = time to first token):
|
|
15
|
+
* gpt-4o-mini — ~100ms TTFT (recommended for real-time voice)
|
|
16
|
+
* gpt-4o — ~150ms TTFT
|
|
17
|
+
*
|
|
18
|
+
* API Reference: https://platform.openai.com/docs/api-reference/chat
|
|
19
|
+
* SDK: https://github.com/openai/openai-node
|
|
20
|
+
*/
|
|
21
|
+
import type { LlmAdapter, LlmOptions, Message } from 'dvgateway-sdk';
|
|
22
|
+
export interface OpenAILlmAdapterOptions {
|
|
23
|
+
apiKey: string;
|
|
24
|
+
/**
|
|
25
|
+
* Model ID (default: "gpt-4o-mini" — fastest for voice)
|
|
26
|
+
* Options:
|
|
27
|
+
* gpt-4o-mini — Fastest, cost-effective (recommended for voice)
|
|
28
|
+
* gpt-4o — Best quality, multimodal
|
|
29
|
+
*/
|
|
30
|
+
model?: string;
|
|
31
|
+
/** System prompt */
|
|
32
|
+
systemPrompt?: string;
|
|
33
|
+
/** Max output tokens (default: 1024) */
|
|
34
|
+
maxTokens?: number;
|
|
35
|
+
/** Temperature (default: 0.7) */
|
|
36
|
+
temperature?: number;
|
|
37
|
+
/**
|
|
38
|
+
* Presence penalty (-2.0 to 2.0, default: 0.0)
|
|
39
|
+
* Positive values discourage topic repetition.
|
|
40
|
+
*/
|
|
41
|
+
presencePenalty?: number;
|
|
42
|
+
/**
|
|
43
|
+
* Frequency penalty (-2.0 to 2.0, default: 0.0)
|
|
44
|
+
* Positive values reduce verbatim repetition.
|
|
45
|
+
*/
|
|
46
|
+
frequencyPenalty?: number;
|
|
47
|
+
}
|
|
48
|
+
export declare class OpenAILlmAdapter implements LlmAdapter {
|
|
49
|
+
private readonly client;
|
|
50
|
+
private readonly opts;
|
|
51
|
+
readonly systemPrompt: string;
|
|
52
|
+
constructor(opts: OpenAILlmAdapterOptions);
|
|
53
|
+
chat(messages: Message[], opts?: LlmOptions): AsyncIterable<string>;
|
|
54
|
+
}
|
|
55
|
+
//# sourceMappingURL=openai-llm.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"openai-llm.d.ts","sourceRoot":"","sources":["../../src/llm/openai-llm.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;GAmBG;AAGH,OAAO,KAAK,EAAE,UAAU,EAAE,UAAU,EAAE,OAAO,EAAE,MAAM,eAAe,CAAC;AAErE,MAAM,WAAW,uBAAuB;IACtC,MAAM,EAAE,MAAM,CAAC;IACf;;;;;OAKG;IACH,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,oBAAoB;IACpB,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,wCAAwC;IACxC,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,iCAAiC;IACjC,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB;;;OAGG;IACH,eAAe,CAAC,EAAE,MAAM,CAAC;IACzB;;;OAGG;IACH,gBAAgB,CAAC,EAAE,MAAM,CAAC;CAC3B;AAED,qBAAa,gBAAiB,YAAW,UAAU;IACjD,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAS;IAChC,OAAO,CAAC,QAAQ,CAAC,IAAI,CAAoC;IAEzD,QAAQ,CAAC,YAAY,EAAE,MAAM,CAAC;gBAElB,IAAI,EAAE,uBAAuB;IAelC,IAAI,CAAC,QAAQ,EAAE,OAAO,EAAE,EAAE,IAAI,CAAC,EAAE,UAAU,GAAG,aAAa,CAAC,MAAM,CAAC;CA8B3E"}
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* OpenAI LLM Adapter
|
|
3
|
+
*
|
|
4
|
+
* Integrates with OpenAI Chat Completions API for streaming responses.
|
|
5
|
+
* Can be used as a primary or fallback LLM.
|
|
6
|
+
*
|
|
7
|
+
* Model Reference (2026-03):
|
|
8
|
+
* gpt-4o-mini — Fastest, cost-effective (default for voice)
|
|
9
|
+
* gpt-4o — Best overall quality, multimodal capable
|
|
10
|
+
* gpt-4o-2024-11-20 — Stable pinned version of gpt-4o
|
|
11
|
+
* o3-mini — Reasoning model; not recommended for real-time voice (high latency)
|
|
12
|
+
* o1-mini — Reasoning model; not recommended for real-time voice (high latency)
|
|
13
|
+
*
|
|
14
|
+
* Latency targets (voice pipeline, TTFT = time to first token):
|
|
15
|
+
* gpt-4o-mini — ~100ms TTFT (recommended for real-time voice)
|
|
16
|
+
* gpt-4o — ~150ms TTFT
|
|
17
|
+
*
|
|
18
|
+
* API Reference: https://platform.openai.com/docs/api-reference/chat
|
|
19
|
+
* SDK: https://github.com/openai/openai-node
|
|
20
|
+
*/
|
|
21
|
+
import OpenAI from 'openai';
|
|
22
|
+
export class OpenAILlmAdapter {
|
|
23
|
+
client;
|
|
24
|
+
opts;
|
|
25
|
+
systemPrompt;
|
|
26
|
+
constructor(opts) {
|
|
27
|
+
this.opts = {
|
|
28
|
+
model: opts.model ?? 'gpt-4o-mini',
|
|
29
|
+
systemPrompt: opts.systemPrompt ?? 'You are a helpful voice assistant. Keep answers concise and conversational.',
|
|
30
|
+
maxTokens: opts.maxTokens ?? 1024,
|
|
31
|
+
temperature: opts.temperature ?? 0.7,
|
|
32
|
+
presencePenalty: opts.presencePenalty ?? 0.0,
|
|
33
|
+
frequencyPenalty: opts.frequencyPenalty ?? 0.0,
|
|
34
|
+
apiKey: opts.apiKey,
|
|
35
|
+
};
|
|
36
|
+
this.systemPrompt = this.opts.systemPrompt;
|
|
37
|
+
this.client = new OpenAI({ apiKey: opts.apiKey });
|
|
38
|
+
}
|
|
39
|
+
async *chat(messages, opts) {
|
|
40
|
+
const model = opts?.model ?? this.opts.model;
|
|
41
|
+
const maxTokens = opts?.maxTokens ?? this.opts.maxTokens;
|
|
42
|
+
const temperature = opts?.temperature ?? this.opts.temperature;
|
|
43
|
+
const system = messages.find((m) => m.role === 'system')?.content
|
|
44
|
+
?? opts?.systemPrompt
|
|
45
|
+
?? this.opts.systemPrompt;
|
|
46
|
+
const chatMessages = [
|
|
47
|
+
{ role: 'system', content: system },
|
|
48
|
+
...messages
|
|
49
|
+
.filter((m) => m.role !== 'system')
|
|
50
|
+
.map((m) => ({ role: m.role, content: m.content })),
|
|
51
|
+
];
|
|
52
|
+
const stream = await this.client.chat.completions.create({
|
|
53
|
+
model,
|
|
54
|
+
max_tokens: maxTokens,
|
|
55
|
+
temperature,
|
|
56
|
+
presence_penalty: this.opts.presencePenalty,
|
|
57
|
+
frequency_penalty: this.opts.frequencyPenalty,
|
|
58
|
+
messages: chatMessages,
|
|
59
|
+
stream: true,
|
|
60
|
+
});
|
|
61
|
+
for await (const chunk of stream) {
|
|
62
|
+
const delta = chunk.choices[0]?.delta?.content;
|
|
63
|
+
if (delta)
|
|
64
|
+
yield delta;
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
//# sourceMappingURL=openai-llm.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"openai-llm.js","sourceRoot":"","sources":["../../src/llm/openai-llm.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;GAmBG;AAEH,OAAO,MAAM,MAAM,QAAQ,CAAC;AA8B5B,MAAM,OAAO,gBAAgB;IACV,MAAM,CAAS;IACf,IAAI,CAAoC;IAEhD,YAAY,CAAS;IAE9B,YAAY,IAA6B;QACvC,IAAI,CAAC,IAAI,GAAG;YACV,KAAK,EAAc,IAAI,CAAC,KAAK,IAAgB,aAAa;YAC1D,YAAY,EAAO,IAAI,CAAC,YAAY,IAAS,6EAA6E;YAC1H,SAAS,EAAU,IAAI,CAAC,SAAS,IAAY,IAAI;YACjD,WAAW,EAAQ,IAAI,CAAC,WAAW,IAAU,GAAG;YAChD,eAAe,EAAI,IAAI,CAAC,eAAe,IAAM,GAAG;YAChD,gBAAgB,EAAG,IAAI,CAAC,gBAAgB,IAAK,GAAG;YAChD,MAAM,EAAa,IAAI,CAAC,MAAM;SAC/B,CAAC;QAEF,IAAI,CAAC,YAAY,GAAG,IAAI,CAAC,IAAI,CAAC,YAAY,CAAC;QAC3C,IAAI,CAAC,MAAM,GAAG,IAAI,MAAM,CAAC,EAAE,MAAM,EAAE,IAAI,CAAC,MAAM,EAAE,CAAC,CAAC;IACpD,CAAC;IAED,KAAK,CAAC,CAAC,IAAI,CAAC,QAAmB,EAAE,IAAiB;QAChD,MAAM,KAAK,GAAS,IAAI,EAAE,KAAK,IAAU,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC;QACzD,MAAM,SAAS,GAAK,IAAI,EAAE,SAAS,IAAM,IAAI,CAAC,IAAI,CAAC,SAAS,CAAC;QAC7D,MAAM,WAAW,GAAG,IAAI,EAAE,WAAW,IAAI,IAAI,CAAC,IAAI,CAAC,WAAW,CAAC;QAC/D,MAAM,MAAM,GAAQ,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,QAAQ,CAAC,EAAE,OAAO;eACjE,IAAI,EAAE,YAAY;eAClB,IAAI,CAAC,IAAI,CAAC,YAAY,CAAC;QAE5B,MAAM,YAAY,GAA6C;YAC7D,EAAE,IAAI,EAAE,QAAQ,EAAE,OAAO,EAAE,MAAM,EAAE;YACnC,GAAG,QAAQ;iBACR,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,QAAQ,CAAC;iBAClC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,EAAE,IAAI,EAAE,CAAC,CAAC,IAA4B,EAAE,OAAO,EAAE,CAAC,CAAC,OAAO,EAAE,CAAC,CAAC;SAC9E,CAAC;QAEF,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,WAAW,CAAC,MAAM,CAAC;YACvD,KAAK;YACL,UAAU,EAAS,SAAS;YAC5B,WAAW;YACX,gBAAgB,EAAG,IAAI,CAAC,IAAI,CAAC,eAAe;YAC5C,iBAAiB,EAAE,IAAI,CAAC,IAAI,CAAC,gBAAgB;YAC7C,QAAQ,EAAW,YAAY;YAC/B,MAAM,EAAa,IAAI;SACxB,CAAC,CAAC;QAEH,IAAI,KAAK,EAAE,MAAM,KAAK,IAAI,MAAM,EAAE,CAAC;YACjC,MAAM,KAAK,GAAG,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,KAAK,EAAE,OAAO,CAAC;YAC/C,IAAI,KAAK;gBAAE,MAAM,KAAK,CAAC;QACzB,CAAC;IACH,CAAC;CACF"}
|
|
@@ -0,0 +1,3 @@
|
|
|
1
|
+
export { OpenAIRealtimeAdapter } from './openai-realtime.js';
|
|
2
|
+
export type { OpenAIRealtimeAdapterOptions, OpenAIRealtimeVoice, OpenAIRealtimeTurnDetectionMode, OpenAIRealtimeTurnDetectionOptions, RealtimeSpeechAdapter, } from './openai-realtime.js';
|
|
3
|
+
//# sourceMappingURL=index.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/realtime/index.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,qBAAqB,EAAE,MAAM,sBAAsB,CAAC;AAC7D,YAAY,EACV,4BAA4B,EAC5B,mBAAmB,EACnB,+BAA+B,EAC/B,kCAAkC,EAClC,qBAAqB,GACtB,MAAM,sBAAsB,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/realtime/index.ts"],"names":[],"mappings":"AAAA,qCAAqC;AACrC,OAAO,EAAE,qBAAqB,EAAE,MAAM,sBAAsB,CAAC"}
|
|
@@ -0,0 +1,132 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* OpenAI Realtime Speech-to-Speech Adapter
|
|
3
|
+
*
|
|
4
|
+
* Implements a direct speech-to-speech pipeline using the OpenAI Realtime API.
|
|
5
|
+
* This adapter bypasses the traditional STT → LLM → TTS chain entirely —
|
|
6
|
+
* audio goes in, audio comes out with minimal latency (~300ms glass-to-glass).
|
|
7
|
+
*
|
|
8
|
+
* Architecture:
|
|
9
|
+
*
|
|
10
|
+
* DVGateway audio in (16kHz PCM)
|
|
11
|
+
* ↓ upsample to 24kHz
|
|
12
|
+
* OpenAI Realtime API (WebSocket)
|
|
13
|
+
* ↓ gpt-4o-realtime (audio 1.5) processes speech end-to-end
|
|
14
|
+
* DVGateway audio out (24kHz → 16kHz PCM)
|
|
15
|
+
* ↓
|
|
16
|
+
* TTS injector → call audio
|
|
17
|
+
*
|
|
18
|
+
* Features:
|
|
19
|
+
* - Sub-300ms end-to-end latency (vs ~500ms for cascaded pipeline)
|
|
20
|
+
* - No separate STT/LLM/TTS billing — single API call
|
|
21
|
+
* - Native voice activity detection (server-side VAD)
|
|
22
|
+
* - Automatic conversation history management
|
|
23
|
+
* - Input and output transcription
|
|
24
|
+
* - Function/tool calling support
|
|
25
|
+
* - Configurable voice, instructions, and turn detection
|
|
26
|
+
*
|
|
27
|
+
* Model Reference (2026-03):
|
|
28
|
+
* gpt-4o-realtime-preview — Latest stable realtime model
|
|
29
|
+
* gpt-4o-realtime-preview-2024-12-17 — Pinned stable version
|
|
30
|
+
* gpt-4o-mini-realtime-preview — Cost-efficient realtime (audio 1.5)
|
|
31
|
+
* gpt-4o-mini-realtime-preview-2024-12-17 — Pinned mini version
|
|
32
|
+
*
|
|
33
|
+
* Audio format:
|
|
34
|
+
* Input: PCM16 24kHz mono, base64-encoded (upsample from DVGateway 16kHz)
|
|
35
|
+
* Output: PCM16 24kHz mono, base64-encoded (downsample to DVGateway 16kHz)
|
|
36
|
+
*
|
|
37
|
+
* API Reference: https://platform.openai.com/docs/api-reference/realtime
|
|
38
|
+
* WebSocket: wss://api.openai.com/v1/realtime
|
|
39
|
+
*/
|
|
40
|
+
import type { AudioChunk, TranscriptResult } from 'dvgateway-sdk';
|
|
41
|
+
export type OpenAIRealtimeVoice = 'alloy' | 'echo' | 'fable' | 'onyx' | 'nova' | 'shimmer' | 'ash' | 'ballad' | 'coral' | 'sage' | 'verse';
|
|
42
|
+
export type OpenAIRealtimeTurnDetectionMode = 'server_vad' | 'none';
|
|
43
|
+
export interface OpenAIRealtimeTurnDetectionOptions {
|
|
44
|
+
/** Detection mode (default: "server_vad") */
|
|
45
|
+
mode?: OpenAIRealtimeTurnDetectionMode;
|
|
46
|
+
/** VAD activation threshold 0.0–1.0 (default: 0.5) */
|
|
47
|
+
threshold?: number;
|
|
48
|
+
/** Silence duration ms before turn ends (default: 200) */
|
|
49
|
+
silenceDurationMs?: number;
|
|
50
|
+
/** Audio ms prepended before speech start (default: 300) */
|
|
51
|
+
prefixPaddingMs?: number;
|
|
52
|
+
}
|
|
53
|
+
export interface OpenAIRealtimeAdapterOptions {
|
|
54
|
+
apiKey: string;
|
|
55
|
+
/**
|
|
56
|
+
* Realtime model (default: "gpt-4o-realtime-preview")
|
|
57
|
+
* Options:
|
|
58
|
+
* gpt-4o-realtime-preview — Best quality, full GPT-4o
|
|
59
|
+
* gpt-4o-mini-realtime-preview — Cost-efficient (audio 1.5)
|
|
60
|
+
*/
|
|
61
|
+
model?: string;
|
|
62
|
+
/**
|
|
63
|
+
* Voice for the AI assistant response (default: "alloy")
|
|
64
|
+
* Options: alloy, echo, fable, onyx, nova, shimmer, ash, ballad, coral, sage, verse
|
|
65
|
+
*/
|
|
66
|
+
voice?: OpenAIRealtimeVoice;
|
|
67
|
+
/**
|
|
68
|
+
* System instructions for the assistant (default: voice assistant prompt)
|
|
69
|
+
* E.g. "You are a helpful Korean-English bilingual voice assistant. Keep answers short."
|
|
70
|
+
*/
|
|
71
|
+
instructions?: string;
|
|
72
|
+
/**
|
|
73
|
+
* Turn detection configuration.
|
|
74
|
+
* Use "none" if you want to manage turn detection yourself (push-to-talk style).
|
|
75
|
+
*/
|
|
76
|
+
turnDetection?: OpenAIRealtimeTurnDetectionOptions;
|
|
77
|
+
/**
|
|
78
|
+
* Enable input (user) audio transcription (default: true)
|
|
79
|
+
* Uses whisper-1 model on the server side.
|
|
80
|
+
*/
|
|
81
|
+
inputTranscription?: boolean;
|
|
82
|
+
/**
|
|
83
|
+
* Temperature for response generation (default: 0.8)
|
|
84
|
+
* Range: 0.6–1.2 (recommended for realtime)
|
|
85
|
+
*/
|
|
86
|
+
temperature?: number;
|
|
87
|
+
/**
|
|
88
|
+
* Max response output tokens (default: "inf" — unlimited)
|
|
89
|
+
* Set a number to cap response length.
|
|
90
|
+
*/
|
|
91
|
+
maxResponseTokens?: number | 'inf';
|
|
92
|
+
}
|
|
93
|
+
/**
|
|
94
|
+
* RealtimeSpeechAdapter — combined speech-to-speech adapter interface.
|
|
95
|
+
* A single adapter handles audio input, AI processing, and audio output.
|
|
96
|
+
*/
|
|
97
|
+
export interface RealtimeSpeechAdapter {
|
|
98
|
+
/**
|
|
99
|
+
* Start a real-time speech session.
|
|
100
|
+
* @param linkedId - DVGateway call session identifier
|
|
101
|
+
* @param audioIn - Async iterable of normalized 16kHz audio chunks from the call
|
|
102
|
+
*/
|
|
103
|
+
startSession(linkedId: string, audioIn: AsyncIterable<AudioChunk>): Promise<void>;
|
|
104
|
+
/** Register a handler for audio output chunks (16kHz slin16 PCM) */
|
|
105
|
+
onAudioOutput(handler: (chunk: Buffer, linkedId: string) => void): void;
|
|
106
|
+
/** Register a handler for transcription results (user and assistant) */
|
|
107
|
+
onTranscript(handler: (result: TranscriptResult) => void): void;
|
|
108
|
+
/** Register a handler for errors */
|
|
109
|
+
onError(handler: (err: Error, linkedId?: string) => void): void;
|
|
110
|
+
/** Stop the session and release resources */
|
|
111
|
+
stop(linkedId?: string): Promise<void>;
|
|
112
|
+
}
|
|
113
|
+
export declare class OpenAIRealtimeAdapter implements RealtimeSpeechAdapter {
|
|
114
|
+
private readonly opts;
|
|
115
|
+
private audioOutputHandler;
|
|
116
|
+
private transcriptHandler;
|
|
117
|
+
private errorHandler;
|
|
118
|
+
private readonly sessions;
|
|
119
|
+
private stopped;
|
|
120
|
+
constructor(opts: OpenAIRealtimeAdapterOptions);
|
|
121
|
+
onAudioOutput(handler: (chunk: Buffer, linkedId: string) => void): void;
|
|
122
|
+
onTranscript(handler: (result: TranscriptResult) => void): void;
|
|
123
|
+
onError(handler: (err: Error, linkedId?: string) => void): void;
|
|
124
|
+
startSession(linkedId: string, audioIn: AsyncIterable<AudioChunk>): Promise<void>;
|
|
125
|
+
stop(linkedId?: string): Promise<void>;
|
|
126
|
+
private connectRealtime;
|
|
127
|
+
private pipeAudioIn;
|
|
128
|
+
private handleServerEvent;
|
|
129
|
+
private buildTurnDetection;
|
|
130
|
+
private sendEvent;
|
|
131
|
+
}
|
|
132
|
+
//# sourceMappingURL=openai-realtime.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"openai-realtime.d.ts","sourceRoot":"","sources":["../../src/realtime/openai-realtime.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAsCG;AAGH,OAAO,KAAK,EAAE,UAAU,EAAE,gBAAgB,EAAE,MAAM,eAAe,CAAC;AAKlE,MAAM,MAAM,mBAAmB,GAC3B,OAAO,GAAG,MAAM,GAAG,OAAO,GAAG,MAAM,GAAG,MAAM,GAAG,SAAS,GACxD,KAAK,GAAG,QAAQ,GAAG,OAAO,GAAG,MAAM,GAAG,OAAO,CAAC;AAElD,MAAM,MAAM,+BAA+B,GAAG,YAAY,GAAG,MAAM,CAAC;AAEpE,MAAM,WAAW,kCAAkC;IACjD,6CAA6C;IAC7C,IAAI,CAAC,EAAE,+BAA+B,CAAC;IACvC,sDAAsD;IACtD,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,0DAA0D;IAC1D,iBAAiB,CAAC,EAAE,MAAM,CAAC;IAC3B,4DAA4D;IAC5D,eAAe,CAAC,EAAE,MAAM,CAAC;CAC1B;AAED,MAAM,WAAW,4BAA4B;IAC3C,MAAM,EAAE,MAAM,CAAC;IACf;;;;;OAKG;IACH,KAAK,CAAC,EAAE,MAAM,CAAC;IACf;;;OAGG;IACH,KAAK,CAAC,EAAE,mBAAmB,CAAC;IAC5B;;;OAGG;IACH,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB;;;OAGG;IACH,aAAa,CAAC,EAAE,kCAAkC,CAAC;IACnD;;;OAGG;IACH,kBAAkB,CAAC,EAAE,OAAO,CAAC;IAC7B;;;OAGG;IACH,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB;;;OAGG;IACH,iBAAiB,CAAC,EAAE,MAAM,GAAG,KAAK,CAAC;CACpC;AAiBD;;;GAGG;AACH,MAAM,WAAW,qBAAqB;IACpC;;;;OAIG;IACH,YAAY,CAAC,QAAQ,EAAE,MAAM,EAAE,OAAO,EAAE,aAAa,CAAC,UAAU,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;IAElF,oEAAoE;IACpE,aAAa,CAAC,OAAO,EAAE,CAAC,KAAK,EAAE,MAAM,EAAE,QAAQ,EAAE,MAAM,KAAK,IAAI,GAAG,IAAI,CAAC;IAExE,wEAAwE;IACxE,YAAY,CAAC,OAAO,EAAE,CAAC,MAAM,EAAE,gBAAgB,KAAK,IAAI,GAAG,IAAI,CAAC;IAEhE,oCAAoC;IACpC,OAAO,CAAC,OAAO,EAAE,CAAC,GAAG,EAAE,KAAK,EAAE,QAAQ,CAAC,EAAE,MAAM,KAAK,IAAI,GAAG,IAAI,CAAC;IAEhE,6CAA6C;IAC7C,IAAI,CAAC,QAAQ,CAAC,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;CACxC;AAUD,qBAAa,qBAAsB,YAAW,qBAAqB;IACjE,OAAO,CAAC,QAAQ,CAAC,IAAI,CAAyC;IAE9D,OAAO,CAAC,kBAAkB,CAA4D;IACtF,OAAO,CAAC,iBAAiB,CAAqD;IAC9E,OAAO,CAAC,YAAY,CAA0D;IAG9E,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAgC;IACzD,OAAO,CAAC,OAAO,CAAS;gBAEZ,IAAI,EAAE,4BAA4B;IAa9C,aAAa,CAAC,OAAO,EAAE,CAAC,KAAK,EAAE,MAAM,EAAE,QAAQ,EAAE,MAAM,KAAK,IAAI,GAAG,IAAI;IAIvE,YAAY,CAAC,OAAO,EAAE,CAAC,MAAM,EAAE,gBAAgB,KAAK,IAAI,GAAG,IAAI;IAI/D,OAAO,CAAC,OAAO,EAAE,CAAC,GAAG,EAAE,KAAK,EAAE,QAAQ,CAAC,EAAE,MAAM,KAAK,IAAI,GAAG,IAAI;IAIzD,YAAY,CAAC,QAAQ,EAAE,MAAM,EAAE,OAAO,EAAE,aAAa,CAAC,UAAU,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC;IAuCjF,IAAI,CAAC,QAAQ,CAAC,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC;YAqB9B,eAAe;YAoCf,WAAW;IAmCzB,OAAO,CAAC,iBAAiB;IA4DzB,OAAO,CAAC,kBAAkB;IAc1B,OAAO,CAAC,SAAS;CAKlB"}
|