@framers/agentos 0.1.175 → 0.1.177
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/api/agent.d.ts.map +1 -1
- package/dist/api/agent.js +53 -5
- package/dist/api/agent.js.map +1 -1
- package/dist/api/generateText.d.ts +1 -1
- package/dist/api/generateText.d.ts.map +1 -1
- package/dist/api/generateText.js +1 -0
- package/dist/api/generateText.js.map +1 -1
- package/dist/cognitive_substrate/GMIEvent.d.ts +6 -1
- package/dist/cognitive_substrate/GMIEvent.d.ts.map +1 -1
- package/dist/cognitive_substrate/GMIEvent.js +5 -0
- package/dist/cognitive_substrate/GMIEvent.js.map +1 -1
- package/dist/memory/index.d.ts +2 -0
- package/dist/memory/index.d.ts.map +1 -1
- package/dist/memory/index.js +1 -0
- package/dist/memory/index.js.map +1 -1
- package/dist/memory/io/facade/Memory.d.ts +7 -6
- package/dist/memory/io/facade/Memory.d.ts.map +1 -1
- package/dist/memory/io/facade/Memory.js +37 -13
- package/dist/memory/io/facade/Memory.js.map +1 -1
- package/dist/memory/mechanisms/CognitiveMechanismsEngine.d.ts +4 -0
- package/dist/memory/mechanisms/CognitiveMechanismsEngine.d.ts.map +1 -1
- package/dist/memory/mechanisms/CognitiveMechanismsEngine.js +9 -1
- package/dist/memory/mechanisms/CognitiveMechanismsEngine.js.map +1 -1
- package/dist/memory/mechanisms/PersonaDriftMechanism.d.ts +50 -0
- package/dist/memory/mechanisms/PersonaDriftMechanism.d.ts.map +1 -0
- package/dist/memory/mechanisms/PersonaDriftMechanism.js +104 -0
- package/dist/memory/mechanisms/PersonaDriftMechanism.js.map +1 -0
- package/dist/memory/mechanisms/types.d.ts +2 -0
- package/dist/memory/mechanisms/types.d.ts.map +1 -1
- package/dist/voice-pipeline/WebSocketStreamTransport.d.ts +8 -8
- package/dist/voice-pipeline/WebSocketStreamTransport.js +12 -12
- package/dist/voice-pipeline/WebSocketStreamTransport.js.map +1 -1
- package/dist/voice-pipeline/index.d.ts +78 -18
- package/dist/voice-pipeline/index.d.ts.map +1 -1
- package/dist/voice-pipeline/index.js +79 -18
- package/dist/voice-pipeline/index.js.map +1 -1
- package/dist/voice-pipeline/providers/AgentSessionVoiceAdapter.d.ts +63 -0
- package/dist/voice-pipeline/providers/AgentSessionVoiceAdapter.d.ts.map +1 -0
- package/dist/voice-pipeline/providers/AgentSessionVoiceAdapter.js +72 -0
- package/dist/voice-pipeline/providers/AgentSessionVoiceAdapter.js.map +1 -0
- package/dist/voice-pipeline/providers/ElevenLabsStreamingSTT.d.ts +70 -0
- package/dist/voice-pipeline/providers/ElevenLabsStreamingSTT.d.ts.map +1 -0
- package/dist/voice-pipeline/providers/ElevenLabsStreamingSTT.js +248 -0
- package/dist/voice-pipeline/providers/ElevenLabsStreamingSTT.js.map +1 -0
- package/dist/voice-pipeline/providers/index.d.ts +13 -0
- package/dist/voice-pipeline/providers/index.d.ts.map +1 -0
- package/dist/voice-pipeline/providers/index.js +13 -0
- package/dist/voice-pipeline/providers/index.js.map +1 -0
- package/package.json +1 -1
|
@@ -1,39 +1,98 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* @module @framers/agentos/voice-pipeline
|
|
3
3
|
*
|
|
4
|
-
*
|
|
4
|
+
* Real-time streaming voice pipeline for AgentOS.
|
|
5
5
|
*
|
|
6
|
-
*
|
|
7
|
-
*
|
|
6
|
+
* Provides a complete, provider-agnostic voice conversation system with
|
|
7
|
+
* pluggable STT, TTS, endpoint detection, barge-in handling, and transport.
|
|
8
8
|
*
|
|
9
|
-
*
|
|
10
|
-
* ({@link AudioFrame}, {@link IStreamTransport}, {@link IEndpointDetector}, etc.).
|
|
9
|
+
* ## Architecture
|
|
11
10
|
*
|
|
12
|
-
*
|
|
13
|
-
*
|
|
14
|
-
*
|
|
11
|
+
* ```
|
|
12
|
+
* Browser Mic → Transport → STT → Endpoint Detector → Agent → TTS → Transport → Browser Speaker
|
|
13
|
+
* ↑
|
|
14
|
+
* Barge-in Handler
|
|
15
|
+
* ```
|
|
16
|
+
*
|
|
17
|
+
* All components are injected via `VoicePipelineOverrides`, making the pipeline
|
|
18
|
+
* fully provider-agnostic. Swap Deepgram for ElevenLabs STT, or ElevenLabs for
|
|
19
|
+
* OpenAI TTS, by changing one line.
|
|
20
|
+
*
|
|
21
|
+
* ## Built-in Providers
|
|
22
|
+
*
|
|
23
|
+
* **STT (Speech-to-Text):**
|
|
24
|
+
* - {@link DeepgramStreamingSTT} — WebSocket streaming via Deepgram Nova-2. Lowest latency.
|
|
25
|
+
* - {@link ElevenLabsStreamingSTT} — Chunked REST via ElevenLabs Scribe. Uses same key as TTS.
|
|
15
26
|
*
|
|
16
|
-
*
|
|
17
|
-
*
|
|
18
|
-
* - {@link AcousticEndpointDetector}: Purely acoustic (silence-only, no transcript analysis).
|
|
27
|
+
* **TTS (Text-to-Speech):**
|
|
28
|
+
* - {@link ElevenLabsStreamingTTS} — WebSocket streaming via ElevenLabs. High quality voices.
|
|
19
29
|
*
|
|
20
|
-
*
|
|
21
|
-
*
|
|
22
|
-
*
|
|
30
|
+
* **Endpoint Detection:**
|
|
31
|
+
* - {@link HeuristicEndpointDetector} — Punctuation + silence timeout. Fast, no model needed.
|
|
32
|
+
* - {@link AcousticEndpointDetector} — Silence-only, no transcript analysis.
|
|
23
33
|
*
|
|
24
|
-
*
|
|
25
|
-
*
|
|
34
|
+
* **Barge-in Handling:**
|
|
35
|
+
* - {@link HardCutBargeinHandler} — Immediate TTS cancel above speech threshold.
|
|
36
|
+
* - {@link SoftFadeBargeinHandler} — Three-tier (ignore/pause/cancel) with configurable fade.
|
|
26
37
|
*
|
|
27
|
-
*
|
|
38
|
+
* **Transport:**
|
|
39
|
+
* - {@link WebSocketStreamTransport} — WebSocket bidirectional audio/text.
|
|
40
|
+
* - {@link WebRTCStreamTransport} — WebRTC DataChannel transport.
|
|
41
|
+
*
|
|
42
|
+
* **Agent Adapter:**
|
|
43
|
+
* - {@link AgentSessionVoiceAdapter} — Wraps any AgentOS `AgentSession` as `IVoicePipelineAgentSession`.
|
|
44
|
+
*
|
|
45
|
+
* ## Usage
|
|
28
46
|
*
|
|
29
|
-
* @example
|
|
30
47
|
* ```typescript
|
|
31
48
|
* import {
|
|
32
49
|
* VoicePipelineOrchestrator,
|
|
33
50
|
* HeuristicEndpointDetector,
|
|
34
51
|
* HardCutBargeinHandler,
|
|
35
52
|
* WebSocketStreamTransport,
|
|
53
|
+
* ElevenLabsStreamingSTT,
|
|
54
|
+
* ElevenLabsStreamingTTS,
|
|
55
|
+
* AgentSessionVoiceAdapter,
|
|
36
56
|
* } from '../voice-pipeline';
|
|
57
|
+
* import { agent } from '@framers/agentos';
|
|
58
|
+
*
|
|
59
|
+
* // Create agent and voice adapter
|
|
60
|
+
* const a = agent({ model: 'gpt-4o-mini', instructions: 'You are a voice companion.' });
|
|
61
|
+
* const session = a.session('voice-1');
|
|
62
|
+
* const voiceAdapter = new AgentSessionVoiceAdapter(session);
|
|
63
|
+
*
|
|
64
|
+
* // Create providers (use whichever API keys you have)
|
|
65
|
+
* const stt = new ElevenLabsStreamingSTT({ apiKey: process.env.ELEVENLABS_API_KEY! });
|
|
66
|
+
* const tts = new ElevenLabsStreamingTTS({ apiKey: process.env.ELEVENLABS_API_KEY! });
|
|
67
|
+
*
|
|
68
|
+
* // Create and start the pipeline
|
|
69
|
+
* const orchestrator = new VoicePipelineOrchestrator({
|
|
70
|
+
* stt: 'elevenlabs', tts: 'elevenlabs', language: 'en-US',
|
|
71
|
+
* });
|
|
72
|
+
*
|
|
73
|
+
* const pipelineSession = await orchestrator.startSession(transport, voiceAdapter, {
|
|
74
|
+
* streamingSTT: stt,
|
|
75
|
+
* streamingTTS: tts,
|
|
76
|
+
* endpointDetector: new HeuristicEndpointDetector(),
|
|
77
|
+
* bargeinHandler: new HardCutBargeinHandler(),
|
|
78
|
+
* });
|
|
79
|
+
*
|
|
80
|
+
* // Listen for state changes (idle → listening → processing → speaking → listening)
|
|
81
|
+
* pipelineSession.on('state_change', (state) => console.log('Pipeline:', state));
|
|
82
|
+
* ```
|
|
83
|
+
*
|
|
84
|
+
* ## Custom Providers
|
|
85
|
+
*
|
|
86
|
+
* Implement {@link IStreamingSTT} and {@link IStreamingTTS} to add any provider:
|
|
87
|
+
*
|
|
88
|
+
* ```typescript
|
|
89
|
+
* class MyCustomSTT implements IStreamingSTT {
|
|
90
|
+
* readonly providerId = 'my-custom-stt';
|
|
91
|
+
* readonly isStreaming = true;
|
|
92
|
+
* async startSession(config?: StreamingSTTConfig): Promise<StreamingSTTSession> {
|
|
93
|
+
* // Connect to your STT service, return a session that emits 'transcript' events
|
|
94
|
+
* }
|
|
95
|
+
* }
|
|
37
96
|
* ```
|
|
38
97
|
*/
|
|
39
98
|
export * from './types.js';
|
|
@@ -45,4 +104,5 @@ export { WebSocketStreamTransport } from './WebSocketStreamTransport.js';
|
|
|
45
104
|
export { WebRTCStreamTransport, createWebRTCTransport } from './WebRTCStreamTransport.js';
|
|
46
105
|
export { VoicePipelineOrchestrator } from './VoicePipelineOrchestrator.js';
|
|
47
106
|
export { VoiceInterruptError } from './VoiceInterruptError.js';
|
|
107
|
+
export { DeepgramStreamingSTT, type DeepgramStreamingSTTConfig, ElevenLabsStreamingSTT, type ElevenLabsStreamingSTTConfig, ElevenLabsStreamingTTS, type ElevenLabsStreamingTTSConfig, AgentSessionVoiceAdapter, } from './providers/index.js';
|
|
48
108
|
//# sourceMappingURL=index.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/voice-pipeline/index.ts"],"names":[],"mappings":"AAAA
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/voice-pipeline/index.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAgGG;AAIH,cAAc,YAAY,CAAC;AAG3B,OAAO,EAAE,qBAAqB,EAAE,MAAM,4BAA4B,CAAC;AACnE,OAAO,EAAE,sBAAsB,EAAE,MAAM,6BAA6B,CAAC;AAGrE,OAAO,EAAE,yBAAyB,EAAE,MAAM,gCAAgC,CAAC;AAC3E,OAAO,EAAE,wBAAwB,EAAE,MAAM,+BAA+B,CAAC;AAGzE,OAAO,EAAE,wBAAwB,EAAE,MAAM,+BAA+B,CAAC;AAGzE,OAAO,EAAE,qBAAqB,EAAE,qBAAqB,EAAE,MAAM,4BAA4B,CAAC;AAG1F,OAAO,EAAE,yBAAyB,EAAE,MAAM,gCAAgC,CAAC;AAG3E,OAAO,EAAE,mBAAmB,EAAE,MAAM,0BAA0B,CAAC;AAG/D,OAAO,EACL,oBAAoB,EACpB,KAAK,0BAA0B,EAC/B,sBAAsB,EACtB,KAAK,4BAA4B,EACjC,sBAAsB,EACtB,KAAK,4BAA4B,EACjC,wBAAwB,GACzB,MAAM,sBAAsB,CAAC"}
|
|
@@ -1,39 +1,98 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* @module @framers/agentos/voice-pipeline
|
|
3
3
|
*
|
|
4
|
-
*
|
|
4
|
+
* Real-time streaming voice pipeline for AgentOS.
|
|
5
5
|
*
|
|
6
|
-
*
|
|
7
|
-
*
|
|
6
|
+
* Provides a complete, provider-agnostic voice conversation system with
|
|
7
|
+
* pluggable STT, TTS, endpoint detection, barge-in handling, and transport.
|
|
8
8
|
*
|
|
9
|
-
*
|
|
10
|
-
* ({@link AudioFrame}, {@link IStreamTransport}, {@link IEndpointDetector}, etc.).
|
|
9
|
+
* ## Architecture
|
|
11
10
|
*
|
|
12
|
-
*
|
|
13
|
-
*
|
|
14
|
-
*
|
|
11
|
+
* ```
|
|
12
|
+
* Browser Mic → Transport → STT → Endpoint Detector → Agent → TTS → Transport → Browser Speaker
|
|
13
|
+
* ↑
|
|
14
|
+
* Barge-in Handler
|
|
15
|
+
* ```
|
|
16
|
+
*
|
|
17
|
+
* All components are injected via `VoicePipelineOverrides`, making the pipeline
|
|
18
|
+
* fully provider-agnostic. Swap Deepgram for ElevenLabs STT, or ElevenLabs for
|
|
19
|
+
* OpenAI TTS, by changing one line.
|
|
20
|
+
*
|
|
21
|
+
* ## Built-in Providers
|
|
22
|
+
*
|
|
23
|
+
* **STT (Speech-to-Text):**
|
|
24
|
+
* - {@link DeepgramStreamingSTT} — WebSocket streaming via Deepgram Nova-2. Lowest latency.
|
|
25
|
+
* - {@link ElevenLabsStreamingSTT} — Chunked REST via ElevenLabs Scribe. Uses same key as TTS.
|
|
15
26
|
*
|
|
16
|
-
*
|
|
17
|
-
*
|
|
18
|
-
* - {@link AcousticEndpointDetector}: Purely acoustic (silence-only, no transcript analysis).
|
|
27
|
+
* **TTS (Text-to-Speech):**
|
|
28
|
+
* - {@link ElevenLabsStreamingTTS} — WebSocket streaming via ElevenLabs. High quality voices.
|
|
19
29
|
*
|
|
20
|
-
*
|
|
21
|
-
*
|
|
22
|
-
*
|
|
30
|
+
* **Endpoint Detection:**
|
|
31
|
+
* - {@link HeuristicEndpointDetector} — Punctuation + silence timeout. Fast, no model needed.
|
|
32
|
+
* - {@link AcousticEndpointDetector} — Silence-only, no transcript analysis.
|
|
23
33
|
*
|
|
24
|
-
*
|
|
25
|
-
*
|
|
34
|
+
* **Barge-in Handling:**
|
|
35
|
+
* - {@link HardCutBargeinHandler} — Immediate TTS cancel above speech threshold.
|
|
36
|
+
* - {@link SoftFadeBargeinHandler} — Three-tier (ignore/pause/cancel) with configurable fade.
|
|
26
37
|
*
|
|
27
|
-
*
|
|
38
|
+
* **Transport:**
|
|
39
|
+
* - {@link WebSocketStreamTransport} — WebSocket bidirectional audio/text.
|
|
40
|
+
* - {@link WebRTCStreamTransport} — WebRTC DataChannel transport.
|
|
41
|
+
*
|
|
42
|
+
* **Agent Adapter:**
|
|
43
|
+
* - {@link AgentSessionVoiceAdapter} — Wraps any AgentOS `AgentSession` as `IVoicePipelineAgentSession`.
|
|
44
|
+
*
|
|
45
|
+
* ## Usage
|
|
28
46
|
*
|
|
29
|
-
* @example
|
|
30
47
|
* ```typescript
|
|
31
48
|
* import {
|
|
32
49
|
* VoicePipelineOrchestrator,
|
|
33
50
|
* HeuristicEndpointDetector,
|
|
34
51
|
* HardCutBargeinHandler,
|
|
35
52
|
* WebSocketStreamTransport,
|
|
53
|
+
* ElevenLabsStreamingSTT,
|
|
54
|
+
* ElevenLabsStreamingTTS,
|
|
55
|
+
* AgentSessionVoiceAdapter,
|
|
36
56
|
* } from '../voice-pipeline/index.js';
|
|
57
|
+
* import { agent } from '@framers/agentos';
|
|
58
|
+
*
|
|
59
|
+
* // Create agent and voice adapter
|
|
60
|
+
* const a = agent({ model: 'gpt-4o-mini', instructions: 'You are a voice companion.' });
|
|
61
|
+
* const session = a.session('voice-1');
|
|
62
|
+
* const voiceAdapter = new AgentSessionVoiceAdapter(session);
|
|
63
|
+
*
|
|
64
|
+
* // Create providers (use whichever API keys you have)
|
|
65
|
+
* const stt = new ElevenLabsStreamingSTT({ apiKey: process.env.ELEVENLABS_API_KEY! });
|
|
66
|
+
* const tts = new ElevenLabsStreamingTTS({ apiKey: process.env.ELEVENLABS_API_KEY! });
|
|
67
|
+
*
|
|
68
|
+
* // Create and start the pipeline
|
|
69
|
+
* const orchestrator = new VoicePipelineOrchestrator({
|
|
70
|
+
* stt: 'elevenlabs', tts: 'elevenlabs', language: 'en-US',
|
|
71
|
+
* });
|
|
72
|
+
*
|
|
73
|
+
* const pipelineSession = await orchestrator.startSession(transport, voiceAdapter, {
|
|
74
|
+
* streamingSTT: stt,
|
|
75
|
+
* streamingTTS: tts,
|
|
76
|
+
* endpointDetector: new HeuristicEndpointDetector(),
|
|
77
|
+
* bargeinHandler: new HardCutBargeinHandler(),
|
|
78
|
+
* });
|
|
79
|
+
*
|
|
80
|
+
* // Listen for state changes (idle → listening → processing → speaking → listening)
|
|
81
|
+
* pipelineSession.on('state_change', (state) => console.log('Pipeline:', state));
|
|
82
|
+
* ```
|
|
83
|
+
*
|
|
84
|
+
* ## Custom Providers
|
|
85
|
+
*
|
|
86
|
+
* Implement {@link IStreamingSTT} and {@link IStreamingTTS} to add any provider:
|
|
87
|
+
*
|
|
88
|
+
* ```typescript
|
|
89
|
+
* class MyCustomSTT implements IStreamingSTT {
|
|
90
|
+
* readonly providerId = 'my-custom-stt';
|
|
91
|
+
* readonly isStreaming = true;
|
|
92
|
+
* async startSession(config?: StreamingSTTConfig): Promise<StreamingSTTSession> {
|
|
93
|
+
* // Connect to your STT service, return a session that emits 'transcript' events
|
|
94
|
+
* }
|
|
95
|
+
* }
|
|
37
96
|
* ```
|
|
38
97
|
*/
|
|
39
98
|
// Re-export all type definitions from the types module.
|
|
@@ -53,4 +112,6 @@ export { WebRTCStreamTransport, createWebRTCTransport } from './WebRTCStreamTran
|
|
|
53
112
|
export { VoicePipelineOrchestrator } from './VoicePipelineOrchestrator.js';
|
|
54
113
|
// Typed error for barge-in interruptions
|
|
55
114
|
export { VoiceInterruptError } from './VoiceInterruptError.js';
|
|
115
|
+
// Streaming provider implementations
|
|
116
|
+
export { DeepgramStreamingSTT, ElevenLabsStreamingSTT, ElevenLabsStreamingTTS, AgentSessionVoiceAdapter, } from './providers/index.js';
|
|
56
117
|
//# sourceMappingURL=index.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/voice-pipeline/index.ts"],"names":[],"mappings":"AAAA
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/voice-pipeline/index.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAgGG;AAEH,wDAAwD;AACxD,8EAA8E;AAC9E,cAAc,YAAY,CAAC;AAE3B,4CAA4C;AAC5C,OAAO,EAAE,qBAAqB,EAAE,MAAM,4BAA4B,CAAC;AACnE,OAAO,EAAE,sBAAsB,EAAE,MAAM,6BAA6B,CAAC;AAErE,6CAA6C;AAC7C,OAAO,EAAE,yBAAyB,EAAE,MAAM,gCAAgC,CAAC;AAC3E,OAAO,EAAE,wBAAwB,EAAE,MAAM,+BAA+B,CAAC;AAEzE,2CAA2C;AAC3C,OAAO,EAAE,wBAAwB,EAAE,MAAM,+BAA+B,CAAC;AAEzE,oDAAoD;AACpD,OAAO,EAAE,qBAAqB,EAAE,qBAAqB,EAAE,MAAM,4BAA4B,CAAC;AAE1F,gDAAgD;AAChD,OAAO,EAAE,yBAAyB,EAAE,MAAM,gCAAgC,CAAC;AAE3E,yCAAyC;AACzC,OAAO,EAAE,mBAAmB,EAAE,MAAM,0BAA0B,CAAC;AAE/D,qCAAqC;AACrC,OAAO,EACL,oBAAoB,EAEpB,sBAAsB,EAEtB,sBAAsB,EAEtB,wBAAwB,GACzB,MAAM,sBAAsB,CAAC"}
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @module voice-pipeline/providers/AgentSessionVoiceAdapter
|
|
3
|
+
*
|
|
4
|
+
* Adapts an AgentOS {@link AgentSession} to the {@link IVoicePipelineAgentSession}
|
|
5
|
+
* interface required by {@link VoicePipelineOrchestrator}.
|
|
6
|
+
*
|
|
7
|
+
* The adapter wraps `AgentSession.stream(text)` and yields the resulting
|
|
8
|
+
* `textStream` (an `AsyncIterable<string>` of token deltas) as the return
|
|
9
|
+
* value of `sendText()`.
|
|
10
|
+
*
|
|
11
|
+
* ## Abort Handling
|
|
12
|
+
*
|
|
13
|
+
* The `abort()` method is implemented by setting an internal flag that causes
|
|
14
|
+
* the `sendText()` iterator to stop yielding tokens. Since `StreamTextResult`
|
|
15
|
+
* does not expose a native cancellation mechanism, the underlying provider
|
|
16
|
+
* stream continues but its output is discarded.
|
|
17
|
+
*/
|
|
18
|
+
import type { AgentSession } from '../../api/agent.js';
|
|
19
|
+
import type { IVoicePipelineAgentSession, VoiceTurnMetadata } from '../types.js';
|
|
20
|
+
/**
|
|
21
|
+
* Wraps an AgentOS `AgentSession` as an `IVoicePipelineAgentSession`.
|
|
22
|
+
*
|
|
23
|
+
* @example
|
|
24
|
+
* ```typescript
|
|
25
|
+
* import { agent } from '@framers/agentos';
|
|
26
|
+
* import { AgentSessionVoiceAdapter } from '../../voice-pipeline';
|
|
27
|
+
*
|
|
28
|
+
* const a = agent({ model: 'gpt-4o' });
|
|
29
|
+
* const session = a.session('voice-session-1');
|
|
30
|
+
* const voiceAdapter = new AgentSessionVoiceAdapter(session);
|
|
31
|
+
*
|
|
32
|
+
* // Use with VoicePipelineOrchestrator
|
|
33
|
+
* orchestrator.startSession(transport, voiceAdapter, overrides);
|
|
34
|
+
* ```
|
|
35
|
+
*/
|
|
36
|
+
export declare class AgentSessionVoiceAdapter implements IVoicePipelineAgentSession {
|
|
37
|
+
private readonly session;
|
|
38
|
+
/** Internal abort flag. Set by `abort()`, checked by the token iterator. */
|
|
39
|
+
private aborted;
|
|
40
|
+
constructor(session: AgentSession);
|
|
41
|
+
/**
|
|
42
|
+
* Send user text to the agent and yield response tokens as an async iterable.
|
|
43
|
+
*
|
|
44
|
+
* The `metadata` parameter carries voice-specific context (speech duration,
|
|
45
|
+
* endpoint reason, confidence, etc.) that could be injected into the agent's
|
|
46
|
+
* context for more informed responses. Currently the metadata is not forwarded
|
|
47
|
+
* to the agent (the AgentSession API doesn't support metadata injection),
|
|
48
|
+
* but it is available for future enhancement.
|
|
49
|
+
*
|
|
50
|
+
* @param text - Transcribed user speech to send to the agent.
|
|
51
|
+
* @param _metadata - Voice turn metadata (reserved for future use).
|
|
52
|
+
* @returns An async iterable of response token strings.
|
|
53
|
+
*/
|
|
54
|
+
sendText(text: string, _metadata: VoiceTurnMetadata): AsyncIterable<string>;
|
|
55
|
+
/**
|
|
56
|
+
* Abort the current generation.
|
|
57
|
+
* Sets an internal flag causing the active `sendText()` iterator to stop
|
|
58
|
+
* yielding tokens. The underlying LLM stream is not explicitly cancelled
|
|
59
|
+
* but its output is discarded.
|
|
60
|
+
*/
|
|
61
|
+
abort(): void;
|
|
62
|
+
}
|
|
63
|
+
//# sourceMappingURL=AgentSessionVoiceAdapter.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"AgentSessionVoiceAdapter.d.ts","sourceRoot":"","sources":["../../../src/voice-pipeline/providers/AgentSessionVoiceAdapter.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;GAgBG;AAEH,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,oBAAoB,CAAC;AACvD,OAAO,KAAK,EAAE,0BAA0B,EAAE,iBAAiB,EAAE,MAAM,aAAa,CAAC;AAEjF;;;;;;;;;;;;;;;GAeG;AACH,qBAAa,wBAAyB,YAAW,0BAA0B;IAI7D,OAAO,CAAC,QAAQ,CAAC,OAAO;IAHpC,4EAA4E;IAC5E,OAAO,CAAC,OAAO,CAAS;gBAEK,OAAO,EAAE,YAAY;IAElD;;;;;;;;;;;;OAYG;IACI,QAAQ,CAAC,IAAI,EAAE,MAAM,EAAE,SAAS,EAAE,iBAAiB,GAAG,aAAa,CAAC,MAAM,CAAC;IAWlF;;;;;OAKG;IACH,KAAK,IAAI,IAAI;CAGd"}
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @module voice-pipeline/providers/AgentSessionVoiceAdapter
|
|
3
|
+
*
|
|
4
|
+
* Adapts an AgentOS {@link AgentSession} to the {@link IVoicePipelineAgentSession}
|
|
5
|
+
* interface required by {@link VoicePipelineOrchestrator}.
|
|
6
|
+
*
|
|
7
|
+
* The adapter wraps `AgentSession.stream(text)` and yields the resulting
|
|
8
|
+
* `textStream` (an `AsyncIterable<string>` of token deltas) as the return
|
|
9
|
+
* value of `sendText()`.
|
|
10
|
+
*
|
|
11
|
+
* ## Abort Handling
|
|
12
|
+
*
|
|
13
|
+
* The `abort()` method is implemented by setting an internal flag that causes
|
|
14
|
+
* the `sendText()` iterator to stop yielding tokens. Since `StreamTextResult`
|
|
15
|
+
* does not expose a native cancellation mechanism, the underlying provider
|
|
16
|
+
* stream continues but its output is discarded.
|
|
17
|
+
*/
|
|
18
|
+
/**
|
|
19
|
+
* Wraps an AgentOS `AgentSession` as an `IVoicePipelineAgentSession`.
|
|
20
|
+
*
|
|
21
|
+
* @example
|
|
22
|
+
* ```typescript
|
|
23
|
+
* import { agent } from '@framers/agentos';
|
|
24
|
+
* import { AgentSessionVoiceAdapter } from '../../voice-pipeline/index.js';
|
|
25
|
+
*
|
|
26
|
+
* const a = agent({ model: 'gpt-4o' });
|
|
27
|
+
* const session = a.session('voice-session-1');
|
|
28
|
+
* const voiceAdapter = new AgentSessionVoiceAdapter(session);
|
|
29
|
+
*
|
|
30
|
+
* // Use with VoicePipelineOrchestrator
|
|
31
|
+
* orchestrator.startSession(transport, voiceAdapter, overrides);
|
|
32
|
+
* ```
|
|
33
|
+
*/
|
|
34
|
+
export class AgentSessionVoiceAdapter {
|
|
35
|
+
constructor(session) {
|
|
36
|
+
this.session = session;
|
|
37
|
+
/** Internal abort flag. Set by `abort()`, checked by the token iterator. */
|
|
38
|
+
this.aborted = false;
|
|
39
|
+
}
|
|
40
|
+
/**
|
|
41
|
+
* Send user text to the agent and yield response tokens as an async iterable.
|
|
42
|
+
*
|
|
43
|
+
* The `metadata` parameter carries voice-specific context (speech duration,
|
|
44
|
+
* endpoint reason, confidence, etc.) that could be injected into the agent's
|
|
45
|
+
* context for more informed responses. Currently the metadata is not forwarded
|
|
46
|
+
* to the agent (the AgentSession API doesn't support metadata injection),
|
|
47
|
+
* but it is available for future enhancement.
|
|
48
|
+
*
|
|
49
|
+
* @param text - Transcribed user speech to send to the agent.
|
|
50
|
+
* @param _metadata - Voice turn metadata (reserved for future use).
|
|
51
|
+
* @returns An async iterable of response token strings.
|
|
52
|
+
*/
|
|
53
|
+
async *sendText(text, _metadata) {
|
|
54
|
+
this.aborted = false;
|
|
55
|
+
const result = this.session.stream(text);
|
|
56
|
+
for await (const token of result.textStream) {
|
|
57
|
+
if (this.aborted)
|
|
58
|
+
break;
|
|
59
|
+
yield token;
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
/**
|
|
63
|
+
* Abort the current generation.
|
|
64
|
+
* Sets an internal flag causing the active `sendText()` iterator to stop
|
|
65
|
+
* yielding tokens. The underlying LLM stream is not explicitly cancelled
|
|
66
|
+
* but its output is discarded.
|
|
67
|
+
*/
|
|
68
|
+
abort() {
|
|
69
|
+
this.aborted = true;
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
//# sourceMappingURL=AgentSessionVoiceAdapter.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"AgentSessionVoiceAdapter.js","sourceRoot":"","sources":["../../../src/voice-pipeline/providers/AgentSessionVoiceAdapter.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;GAgBG;AAKH;;;;;;;;;;;;;;;GAeG;AACH,MAAM,OAAO,wBAAwB;IAInC,YAA6B,OAAqB;QAArB,YAAO,GAAP,OAAO,CAAc;QAHlD,4EAA4E;QACpE,YAAO,GAAG,KAAK,CAAC;IAE6B,CAAC;IAEtD;;;;;;;;;;;;OAYG;IACH,KAAK,CAAC,CAAC,QAAQ,CAAC,IAAY,EAAE,SAA4B;QACxD,IAAI,CAAC,OAAO,GAAG,KAAK,CAAC;QAErB,MAAM,MAAM,GAAG,IAAI,CAAC,OAAO,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC;QAEzC,IAAI,KAAK,EAAE,MAAM,KAAK,IAAI,MAAM,CAAC,UAAU,EAAE,CAAC;YAC5C,IAAI,IAAI,CAAC,OAAO;gBAAE,MAAM;YACxB,MAAM,KAAK,CAAC;QACd,CAAC;IACH,CAAC;IAED;;;;;OAKG;IACH,KAAK;QACH,IAAI,CAAC,OAAO,GAAG,IAAI,CAAC;IACtB,CAAC;CACF"}
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @module voice-pipeline/providers/ElevenLabsStreamingSTT
|
|
3
|
+
*
|
|
4
|
+
* Streaming speech-to-text adapter for ElevenLabs' WebSocket STT API.
|
|
5
|
+
* Implements {@link IStreamingSTT} / {@link StreamingSTTSession} for the
|
|
6
|
+
* voice pipeline orchestrator.
|
|
7
|
+
*
|
|
8
|
+
* ## ElevenLabs STT WebSocket Protocol
|
|
9
|
+
*
|
|
10
|
+
* - **Endpoint:** `wss://api.elevenlabs.io/v1/speech-to-text/stream`
|
|
11
|
+
* - **Authentication:** `xi-api-key` header on upgrade
|
|
12
|
+
* - **Inbound (client → ElevenLabs):** Binary PCM frames (16-bit signed LE, 16kHz mono)
|
|
13
|
+
* - **Outbound (ElevenLabs → client):** JSON transcript results
|
|
14
|
+
* - **Close:** Send JSON `{ "type": "close_stream" }` to finalize
|
|
15
|
+
*
|
|
16
|
+
* ## Fallback: Chunked REST
|
|
17
|
+
*
|
|
18
|
+
* If the WebSocket endpoint is unavailable or errors, this adapter falls back
|
|
19
|
+
* to a chunked REST approach: accumulates audio into ~2s chunks and POSTs each
|
|
20
|
+
* to `/v1/speech-to-text` for batch transcription. This provides near-realtime
|
|
21
|
+
* results (2s latency per chunk) using only the REST API.
|
|
22
|
+
*
|
|
23
|
+
* @see https://elevenlabs.io/docs/api-reference/speech-to-text
|
|
24
|
+
*/
|
|
25
|
+
import type { IStreamingSTT, StreamingSTTSession, StreamingSTTConfig } from '../types.js';
|
|
26
|
+
/**
|
|
27
|
+
* Configuration for the {@link ElevenLabsStreamingSTT} provider.
|
|
28
|
+
*/
|
|
29
|
+
export interface ElevenLabsStreamingSTTConfig {
|
|
30
|
+
/** ElevenLabs API key. */
|
|
31
|
+
apiKey: string;
|
|
32
|
+
/**
|
|
33
|
+
* Base URL for the ElevenLabs API.
|
|
34
|
+
* @default 'https://api.elevenlabs.io/v1'
|
|
35
|
+
*/
|
|
36
|
+
baseUrl?: string;
|
|
37
|
+
/**
|
|
38
|
+
* STT model to use.
|
|
39
|
+
* @default 'scribe_v1'
|
|
40
|
+
*/
|
|
41
|
+
model?: string;
|
|
42
|
+
}
|
|
43
|
+
/**
|
|
44
|
+
* Streaming STT provider using ElevenLabs' Speech-to-Text API.
|
|
45
|
+
*
|
|
46
|
+
* Uses chunked REST transcription (2-second audio windows) to provide
|
|
47
|
+
* near-realtime STT with the same ElevenLabs API key used for TTS.
|
|
48
|
+
* No separate Deepgram key required.
|
|
49
|
+
*
|
|
50
|
+
* @example
|
|
51
|
+
* ```typescript
|
|
52
|
+
* const stt = new ElevenLabsStreamingSTT({
|
|
53
|
+
* apiKey: process.env.ELEVENLABS_API_KEY!,
|
|
54
|
+
* });
|
|
55
|
+
* const session = await stt.startSession({ language: 'en' });
|
|
56
|
+
* session.on('transcript', (event) => console.log(event.text));
|
|
57
|
+
* ```
|
|
58
|
+
*/
|
|
59
|
+
export declare class ElevenLabsStreamingSTT implements IStreamingSTT {
|
|
60
|
+
private readonly config;
|
|
61
|
+
readonly providerId = "elevenlabs-streaming-stt";
|
|
62
|
+
readonly isStreaming = true;
|
|
63
|
+
constructor(config: ElevenLabsStreamingSTTConfig);
|
|
64
|
+
/**
|
|
65
|
+
* Create a new STT session. Uses chunked REST calls to ElevenLabs'
|
|
66
|
+
* batch STT endpoint for near-realtime transcription.
|
|
67
|
+
*/
|
|
68
|
+
startSession(config?: StreamingSTTConfig): Promise<StreamingSTTSession>;
|
|
69
|
+
}
|
|
70
|
+
//# sourceMappingURL=ElevenLabsStreamingSTT.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"ElevenLabsStreamingSTT.d.ts","sourceRoot":"","sources":["../../../src/voice-pipeline/providers/ElevenLabsStreamingSTT.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;GAuBG;AAIH,OAAO,KAAK,EACV,aAAa,EACb,mBAAmB,EACnB,kBAAkB,EAInB,MAAM,aAAa,CAAC;AAMrB;;GAEG;AACH,MAAM,WAAW,4BAA4B;IAC3C,0BAA0B;IAC1B,MAAM,EAAE,MAAM,CAAC;IAEf;;;OAGG;IACH,OAAO,CAAC,EAAE,MAAM,CAAC;IAEjB;;;OAGG;IACH,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB;AAoQD;;;;;;;;;;;;;;;GAeG;AACH,qBAAa,sBAAuB,YAAW,aAAa;IAI9C,OAAO,CAAC,QAAQ,CAAC,MAAM;IAHnC,QAAQ,CAAC,UAAU,8BAA8B;IACjD,QAAQ,CAAC,WAAW,QAAQ;gBAEC,MAAM,EAAE,4BAA4B;IAEjE;;;OAGG;IACG,YAAY,CAAC,MAAM,CAAC,EAAE,kBAAkB,GAAG,OAAO,CAAC,mBAAmB,CAAC;CAG9E"}
|