@framers/agentos 0.1.175 → 0.1.177

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. package/dist/api/agent.d.ts.map +1 -1
  2. package/dist/api/agent.js +53 -5
  3. package/dist/api/agent.js.map +1 -1
  4. package/dist/api/generateText.d.ts +1 -1
  5. package/dist/api/generateText.d.ts.map +1 -1
  6. package/dist/api/generateText.js +1 -0
  7. package/dist/api/generateText.js.map +1 -1
  8. package/dist/cognitive_substrate/GMIEvent.d.ts +6 -1
  9. package/dist/cognitive_substrate/GMIEvent.d.ts.map +1 -1
  10. package/dist/cognitive_substrate/GMIEvent.js +5 -0
  11. package/dist/cognitive_substrate/GMIEvent.js.map +1 -1
  12. package/dist/memory/index.d.ts +2 -0
  13. package/dist/memory/index.d.ts.map +1 -1
  14. package/dist/memory/index.js +1 -0
  15. package/dist/memory/index.js.map +1 -1
  16. package/dist/memory/io/facade/Memory.d.ts +7 -6
  17. package/dist/memory/io/facade/Memory.d.ts.map +1 -1
  18. package/dist/memory/io/facade/Memory.js +37 -13
  19. package/dist/memory/io/facade/Memory.js.map +1 -1
  20. package/dist/memory/mechanisms/CognitiveMechanismsEngine.d.ts +4 -0
  21. package/dist/memory/mechanisms/CognitiveMechanismsEngine.d.ts.map +1 -1
  22. package/dist/memory/mechanisms/CognitiveMechanismsEngine.js +9 -1
  23. package/dist/memory/mechanisms/CognitiveMechanismsEngine.js.map +1 -1
  24. package/dist/memory/mechanisms/PersonaDriftMechanism.d.ts +50 -0
  25. package/dist/memory/mechanisms/PersonaDriftMechanism.d.ts.map +1 -0
  26. package/dist/memory/mechanisms/PersonaDriftMechanism.js +104 -0
  27. package/dist/memory/mechanisms/PersonaDriftMechanism.js.map +1 -0
  28. package/dist/memory/mechanisms/types.d.ts +2 -0
  29. package/dist/memory/mechanisms/types.d.ts.map +1 -1
  30. package/dist/voice-pipeline/WebSocketStreamTransport.d.ts +8 -8
  31. package/dist/voice-pipeline/WebSocketStreamTransport.js +12 -12
  32. package/dist/voice-pipeline/WebSocketStreamTransport.js.map +1 -1
  33. package/dist/voice-pipeline/index.d.ts +78 -18
  34. package/dist/voice-pipeline/index.d.ts.map +1 -1
  35. package/dist/voice-pipeline/index.js +79 -18
  36. package/dist/voice-pipeline/index.js.map +1 -1
  37. package/dist/voice-pipeline/providers/AgentSessionVoiceAdapter.d.ts +63 -0
  38. package/dist/voice-pipeline/providers/AgentSessionVoiceAdapter.d.ts.map +1 -0
  39. package/dist/voice-pipeline/providers/AgentSessionVoiceAdapter.js +72 -0
  40. package/dist/voice-pipeline/providers/AgentSessionVoiceAdapter.js.map +1 -0
  41. package/dist/voice-pipeline/providers/ElevenLabsStreamingSTT.d.ts +70 -0
  42. package/dist/voice-pipeline/providers/ElevenLabsStreamingSTT.d.ts.map +1 -0
  43. package/dist/voice-pipeline/providers/ElevenLabsStreamingSTT.js +248 -0
  44. package/dist/voice-pipeline/providers/ElevenLabsStreamingSTT.js.map +1 -0
  45. package/dist/voice-pipeline/providers/index.d.ts +13 -0
  46. package/dist/voice-pipeline/providers/index.d.ts.map +1 -0
  47. package/dist/voice-pipeline/providers/index.js +13 -0
  48. package/dist/voice-pipeline/providers/index.js.map +1 -0
  49. package/package.json +1 -1
@@ -1,39 +1,98 @@
1
1
  /**
2
2
  * @module @framers/agentos/voice-pipeline
3
3
  *
4
- * Barrel exports for the AgentOS streaming voice pipeline.
4
+ * Real-time streaming voice pipeline for AgentOS.
5
5
  *
6
- * This module provides all the building blocks needed to assemble a real-time
7
- * voice conversation system:
6
+ * Provides a complete, provider-agnostic voice conversation system with
7
+ * pluggable STT, TTS, endpoint detection, barge-in handling, and transport.
8
8
  *
9
- * - **Types** -- All interfaces and type aliases defining the pipeline's contracts
10
- * ({@link AudioFrame}, {@link IStreamTransport}, {@link IEndpointDetector}, etc.).
9
+ * ## Architecture
11
10
  *
12
- * - **Orchestrator** -- {@link VoicePipelineOrchestrator} is the central state machine
13
- * that wires transport, STT, endpoint detection, TTS, and barge-in handling into
14
- * a coordinated conversation loop.
11
+ * ```
12
+ * Browser Mic → Transport → STT Endpoint Detector → Agent → TTS Transport Browser Speaker
13
+ *
14
+ * Barge-in Handler
15
+ * ```
16
+ *
17
+ * All components are injected via `VoicePipelineOverrides`, making the pipeline
18
+ * fully provider-agnostic. Swap Deepgram for ElevenLabs STT, or ElevenLabs for
19
+ * OpenAI TTS, by changing one line.
20
+ *
21
+ * ## Built-in Providers
22
+ *
23
+ * **STT (Speech-to-Text):**
24
+ * - {@link DeepgramStreamingSTT} — WebSocket streaming via Deepgram Nova-2. Lowest latency.
25
+ * - {@link ElevenLabsStreamingSTT} — Chunked REST via ElevenLabs Scribe. Uses same key as TTS.
15
26
  *
16
- * - **Endpoint Detectors** -- Two strategies for detecting turn boundaries:
17
- * - {@link HeuristicEndpointDetector}: Rule-based (punctuation + silence timeout).
18
- * - {@link AcousticEndpointDetector}: Purely acoustic (silence-only, no transcript analysis).
27
+ * **TTS (Text-to-Speech):**
28
+ * - {@link ElevenLabsStreamingTTS} WebSocket streaming via ElevenLabs. High quality voices.
19
29
  *
20
- * - **Barge-in Handlers** -- Two strategies for handling user interruptions:
21
- * - {@link HardCutBargeinHandler}: Immediate stop above a speech duration threshold.
22
- * - {@link SoftFadeBargeinHandler}: Three-tier (ignore/pause/cancel) with configurable fade.
30
+ * **Endpoint Detection:**
31
+ * - {@link HeuristicEndpointDetector} Punctuation + silence timeout. Fast, no model needed.
32
+ * - {@link AcousticEndpointDetector} — Silence-only, no transcript analysis.
23
33
  *
24
- * - **Transport** -- {@link WebSocketStreamTransport}: WebSocket-based bidirectional
25
- * audio/text transport implementing {@link IStreamTransport}.
34
+ * **Barge-in Handling:**
35
+ * - {@link HardCutBargeinHandler} — Immediate TTS cancel above speech threshold.
36
+ * - {@link SoftFadeBargeinHandler} — Three-tier (ignore/pause/cancel) with configurable fade.
26
37
  *
27
- * - **Error** -- {@link VoiceInterruptError}: Typed error for barge-in interruptions.
38
+ * **Transport:**
39
+ * - {@link WebSocketStreamTransport} — WebSocket bidirectional audio/text.
40
+ * - {@link WebRTCStreamTransport} — WebRTC DataChannel transport.
41
+ *
42
+ * **Agent Adapter:**
43
+ * - {@link AgentSessionVoiceAdapter} — Wraps any AgentOS `AgentSession` as `IVoicePipelineAgentSession`.
44
+ *
45
+ * ## Usage
28
46
  *
29
- * @example
30
47
  * ```typescript
31
48
  * import {
32
49
  * VoicePipelineOrchestrator,
33
50
  * HeuristicEndpointDetector,
34
51
  * HardCutBargeinHandler,
35
52
  * WebSocketStreamTransport,
53
+ * ElevenLabsStreamingSTT,
54
+ * ElevenLabsStreamingTTS,
55
+ * AgentSessionVoiceAdapter,
36
56
  * } from '../voice-pipeline';
57
+ * import { agent } from '@framers/agentos';
58
+ *
59
+ * // Create agent and voice adapter
60
+ * const a = agent({ model: 'gpt-4o-mini', instructions: 'You are a voice companion.' });
61
+ * const session = a.session('voice-1');
62
+ * const voiceAdapter = new AgentSessionVoiceAdapter(session);
63
+ *
64
+ * // Create providers (use whichever API keys you have)
65
+ * const stt = new ElevenLabsStreamingSTT({ apiKey: process.env.ELEVENLABS_API_KEY! });
66
+ * const tts = new ElevenLabsStreamingTTS({ apiKey: process.env.ELEVENLABS_API_KEY! });
67
+ *
68
+ * // Create and start the pipeline
69
+ * const orchestrator = new VoicePipelineOrchestrator({
70
+ * stt: 'elevenlabs', tts: 'elevenlabs', language: 'en-US',
71
+ * });
72
+ *
73
+ * const pipelineSession = await orchestrator.startSession(transport, voiceAdapter, {
74
+ * streamingSTT: stt,
75
+ * streamingTTS: tts,
76
+ * endpointDetector: new HeuristicEndpointDetector(),
77
+ * bargeinHandler: new HardCutBargeinHandler(),
78
+ * });
79
+ *
80
+ * // Listen for state changes (idle → listening → processing → speaking → listening)
81
+ * pipelineSession.on('state_change', (state) => console.log('Pipeline:', state));
82
+ * ```
83
+ *
84
+ * ## Custom Providers
85
+ *
86
+ * Implement {@link IStreamingSTT} and {@link IStreamingTTS} to add any provider:
87
+ *
88
+ * ```typescript
89
+ * class MyCustomSTT implements IStreamingSTT {
90
+ * readonly providerId = 'my-custom-stt';
91
+ * readonly isStreaming = true;
92
+ * async startSession(config?: StreamingSTTConfig): Promise<StreamingSTTSession> {
93
+ * // Connect to your STT service, return a session that emits 'transcript' events
94
+ * }
95
+ * }
37
96
  * ```
38
97
  */
39
98
  export * from './types.js';
@@ -45,4 +104,5 @@ export { WebSocketStreamTransport } from './WebSocketStreamTransport.js';
45
104
  export { WebRTCStreamTransport, createWebRTCTransport } from './WebRTCStreamTransport.js';
46
105
  export { VoicePipelineOrchestrator } from './VoicePipelineOrchestrator.js';
47
106
  export { VoiceInterruptError } from './VoiceInterruptError.js';
107
+ export { DeepgramStreamingSTT, type DeepgramStreamingSTTConfig, ElevenLabsStreamingSTT, type ElevenLabsStreamingSTTConfig, ElevenLabsStreamingTTS, type ElevenLabsStreamingTTSConfig, AgentSessionVoiceAdapter, } from './providers/index.js';
48
108
  //# sourceMappingURL=index.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/voice-pipeline/index.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAqCG;AAIH,cAAc,YAAY,CAAC;AAG3B,OAAO,EAAE,qBAAqB,EAAE,MAAM,4BAA4B,CAAC;AACnE,OAAO,EAAE,sBAAsB,EAAE,MAAM,6BAA6B,CAAC;AAGrE,OAAO,EAAE,yBAAyB,EAAE,MAAM,gCAAgC,CAAC;AAC3E,OAAO,EAAE,wBAAwB,EAAE,MAAM,+BAA+B,CAAC;AAGzE,OAAO,EAAE,wBAAwB,EAAE,MAAM,+BAA+B,CAAC;AAGzE,OAAO,EAAE,qBAAqB,EAAE,qBAAqB,EAAE,MAAM,4BAA4B,CAAC;AAG1F,OAAO,EAAE,yBAAyB,EAAE,MAAM,gCAAgC,CAAC;AAG3E,OAAO,EAAE,mBAAmB,EAAE,MAAM,0BAA0B,CAAC"}
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/voice-pipeline/index.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAgGG;AAIH,cAAc,YAAY,CAAC;AAG3B,OAAO,EAAE,qBAAqB,EAAE,MAAM,4BAA4B,CAAC;AACnE,OAAO,EAAE,sBAAsB,EAAE,MAAM,6BAA6B,CAAC;AAGrE,OAAO,EAAE,yBAAyB,EAAE,MAAM,gCAAgC,CAAC;AAC3E,OAAO,EAAE,wBAAwB,EAAE,MAAM,+BAA+B,CAAC;AAGzE,OAAO,EAAE,wBAAwB,EAAE,MAAM,+BAA+B,CAAC;AAGzE,OAAO,EAAE,qBAAqB,EAAE,qBAAqB,EAAE,MAAM,4BAA4B,CAAC;AAG1F,OAAO,EAAE,yBAAyB,EAAE,MAAM,gCAAgC,CAAC;AAG3E,OAAO,EAAE,mBAAmB,EAAE,MAAM,0BAA0B,CAAC;AAG/D,OAAO,EACL,oBAAoB,EACpB,KAAK,0BAA0B,EAC/B,sBAAsB,EACtB,KAAK,4BAA4B,EACjC,sBAAsB,EACtB,KAAK,4BAA4B,EACjC,wBAAwB,GACzB,MAAM,sBAAsB,CAAC"}
@@ -1,39 +1,98 @@
1
1
  /**
2
2
  * @module @framers/agentos/voice-pipeline
3
3
  *
4
- * Barrel exports for the AgentOS streaming voice pipeline.
4
+ * Real-time streaming voice pipeline for AgentOS.
5
5
  *
6
- * This module provides all the building blocks needed to assemble a real-time
7
- * voice conversation system:
6
+ * Provides a complete, provider-agnostic voice conversation system with
7
+ * pluggable STT, TTS, endpoint detection, barge-in handling, and transport.
8
8
  *
9
- * - **Types** -- All interfaces and type aliases defining the pipeline's contracts
10
- * ({@link AudioFrame}, {@link IStreamTransport}, {@link IEndpointDetector}, etc.).
9
+ * ## Architecture
11
10
  *
12
- * - **Orchestrator** -- {@link VoicePipelineOrchestrator} is the central state machine
13
- * that wires transport, STT, endpoint detection, TTS, and barge-in handling into
14
- * a coordinated conversation loop.
11
+ * ```
12
+ * Browser Mic → Transport → STT Endpoint Detector → Agent → TTS Transport Browser Speaker
13
+ *
14
+ * Barge-in Handler
15
+ * ```
16
+ *
17
+ * All components are injected via `VoicePipelineOverrides`, making the pipeline
18
+ * fully provider-agnostic. Swap Deepgram for ElevenLabs STT, or ElevenLabs for
19
+ * OpenAI TTS, by changing one line.
20
+ *
21
+ * ## Built-in Providers
22
+ *
23
+ * **STT (Speech-to-Text):**
24
+ * - {@link DeepgramStreamingSTT} — WebSocket streaming via Deepgram Nova-2. Lowest latency.
25
+ * - {@link ElevenLabsStreamingSTT} — Chunked REST via ElevenLabs Scribe. Uses same key as TTS.
15
26
  *
16
- * - **Endpoint Detectors** -- Two strategies for detecting turn boundaries:
17
- * - {@link HeuristicEndpointDetector}: Rule-based (punctuation + silence timeout).
18
- * - {@link AcousticEndpointDetector}: Purely acoustic (silence-only, no transcript analysis).
27
+ * **TTS (Text-to-Speech):**
28
+ * - {@link ElevenLabsStreamingTTS} WebSocket streaming via ElevenLabs. High quality voices.
19
29
  *
20
- * - **Barge-in Handlers** -- Two strategies for handling user interruptions:
21
- * - {@link HardCutBargeinHandler}: Immediate stop above a speech duration threshold.
22
- * - {@link SoftFadeBargeinHandler}: Three-tier (ignore/pause/cancel) with configurable fade.
30
+ * **Endpoint Detection:**
31
+ * - {@link HeuristicEndpointDetector} Punctuation + silence timeout. Fast, no model needed.
32
+ * - {@link AcousticEndpointDetector} — Silence-only, no transcript analysis.
23
33
  *
24
- * - **Transport** -- {@link WebSocketStreamTransport}: WebSocket-based bidirectional
25
- * audio/text transport implementing {@link IStreamTransport}.
34
+ * **Barge-in Handling:**
35
+ * - {@link HardCutBargeinHandler} — Immediate TTS cancel above speech threshold.
36
+ * - {@link SoftFadeBargeinHandler} — Three-tier (ignore/pause/cancel) with configurable fade.
26
37
  *
27
- * - **Error** -- {@link VoiceInterruptError}: Typed error for barge-in interruptions.
38
+ * **Transport:**
39
+ * - {@link WebSocketStreamTransport} — WebSocket bidirectional audio/text.
40
+ * - {@link WebRTCStreamTransport} — WebRTC DataChannel transport.
41
+ *
42
+ * **Agent Adapter:**
43
+ * - {@link AgentSessionVoiceAdapter} — Wraps any AgentOS `AgentSession` as `IVoicePipelineAgentSession`.
44
+ *
45
+ * ## Usage
28
46
  *
29
- * @example
30
47
  * ```typescript
31
48
  * import {
32
49
  * VoicePipelineOrchestrator,
33
50
  * HeuristicEndpointDetector,
34
51
  * HardCutBargeinHandler,
35
52
  * WebSocketStreamTransport,
53
+ * ElevenLabsStreamingSTT,
54
+ * ElevenLabsStreamingTTS,
55
+ * AgentSessionVoiceAdapter,
36
56
  * } from '../voice-pipeline/index.js';
57
+ * import { agent } from '@framers/agentos';
58
+ *
59
+ * // Create agent and voice adapter
60
+ * const a = agent({ model: 'gpt-4o-mini', instructions: 'You are a voice companion.' });
61
+ * const session = a.session('voice-1');
62
+ * const voiceAdapter = new AgentSessionVoiceAdapter(session);
63
+ *
64
+ * // Create providers (use whichever API keys you have)
65
+ * const stt = new ElevenLabsStreamingSTT({ apiKey: process.env.ELEVENLABS_API_KEY! });
66
+ * const tts = new ElevenLabsStreamingTTS({ apiKey: process.env.ELEVENLABS_API_KEY! });
67
+ *
68
+ * // Create and start the pipeline
69
+ * const orchestrator = new VoicePipelineOrchestrator({
70
+ * stt: 'elevenlabs', tts: 'elevenlabs', language: 'en-US',
71
+ * });
72
+ *
73
+ * const pipelineSession = await orchestrator.startSession(transport, voiceAdapter, {
74
+ * streamingSTT: stt,
75
+ * streamingTTS: tts,
76
+ * endpointDetector: new HeuristicEndpointDetector(),
77
+ * bargeinHandler: new HardCutBargeinHandler(),
78
+ * });
79
+ *
80
+ * // Listen for state changes (idle → listening → processing → speaking → listening)
81
+ * pipelineSession.on('state_change', (state) => console.log('Pipeline:', state));
82
+ * ```
83
+ *
84
+ * ## Custom Providers
85
+ *
86
+ * Implement {@link IStreamingSTT} and {@link IStreamingTTS} to add any provider:
87
+ *
88
+ * ```typescript
89
+ * class MyCustomSTT implements IStreamingSTT {
90
+ * readonly providerId = 'my-custom-stt';
91
+ * readonly isStreaming = true;
92
+ * async startSession(config?: StreamingSTTConfig): Promise<StreamingSTTSession> {
93
+ * // Connect to your STT service, return a session that emits 'transcript' events
94
+ * }
95
+ * }
37
96
  * ```
38
97
  */
39
98
  // Re-export all type definitions from the types module.
@@ -53,4 +112,6 @@ export { WebRTCStreamTransport, createWebRTCTransport } from './WebRTCStreamTran
53
112
  export { VoicePipelineOrchestrator } from './VoicePipelineOrchestrator.js';
54
113
  // Typed error for barge-in interruptions
55
114
  export { VoiceInterruptError } from './VoiceInterruptError.js';
115
+ // Streaming provider implementations
116
+ export { DeepgramStreamingSTT, ElevenLabsStreamingSTT, ElevenLabsStreamingTTS, AgentSessionVoiceAdapter, } from './providers/index.js';
56
117
  //# sourceMappingURL=index.js.map
@@ -1 +1 @@
1
- {"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/voice-pipeline/index.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAqCG;AAEH,wDAAwD;AACxD,8EAA8E;AAC9E,cAAc,YAAY,CAAC;AAE3B,4CAA4C;AAC5C,OAAO,EAAE,qBAAqB,EAAE,MAAM,4BAA4B,CAAC;AACnE,OAAO,EAAE,sBAAsB,EAAE,MAAM,6BAA6B,CAAC;AAErE,6CAA6C;AAC7C,OAAO,EAAE,yBAAyB,EAAE,MAAM,gCAAgC,CAAC;AAC3E,OAAO,EAAE,wBAAwB,EAAE,MAAM,+BAA+B,CAAC;AAEzE,2CAA2C;AAC3C,OAAO,EAAE,wBAAwB,EAAE,MAAM,+BAA+B,CAAC;AAEzE,oDAAoD;AACpD,OAAO,EAAE,qBAAqB,EAAE,qBAAqB,EAAE,MAAM,4BAA4B,CAAC;AAE1F,gDAAgD;AAChD,OAAO,EAAE,yBAAyB,EAAE,MAAM,gCAAgC,CAAC;AAE3E,yCAAyC;AACzC,OAAO,EAAE,mBAAmB,EAAE,MAAM,0BAA0B,CAAC"}
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/voice-pipeline/index.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAgGG;AAEH,wDAAwD;AACxD,8EAA8E;AAC9E,cAAc,YAAY,CAAC;AAE3B,4CAA4C;AAC5C,OAAO,EAAE,qBAAqB,EAAE,MAAM,4BAA4B,CAAC;AACnE,OAAO,EAAE,sBAAsB,EAAE,MAAM,6BAA6B,CAAC;AAErE,6CAA6C;AAC7C,OAAO,EAAE,yBAAyB,EAAE,MAAM,gCAAgC,CAAC;AAC3E,OAAO,EAAE,wBAAwB,EAAE,MAAM,+BAA+B,CAAC;AAEzE,2CAA2C;AAC3C,OAAO,EAAE,wBAAwB,EAAE,MAAM,+BAA+B,CAAC;AAEzE,oDAAoD;AACpD,OAAO,EAAE,qBAAqB,EAAE,qBAAqB,EAAE,MAAM,4BAA4B,CAAC;AAE1F,gDAAgD;AAChD,OAAO,EAAE,yBAAyB,EAAE,MAAM,gCAAgC,CAAC;AAE3E,yCAAyC;AACzC,OAAO,EAAE,mBAAmB,EAAE,MAAM,0BAA0B,CAAC;AAE/D,qCAAqC;AACrC,OAAO,EACL,oBAAoB,EAEpB,sBAAsB,EAEtB,sBAAsB,EAEtB,wBAAwB,GACzB,MAAM,sBAAsB,CAAC"}
@@ -0,0 +1,63 @@
1
+ /**
2
+ * @module voice-pipeline/providers/AgentSessionVoiceAdapter
3
+ *
4
+ * Adapts an AgentOS {@link AgentSession} to the {@link IVoicePipelineAgentSession}
5
+ * interface required by {@link VoicePipelineOrchestrator}.
6
+ *
7
+ * The adapter wraps `AgentSession.stream(text)` and yields the resulting
8
+ * `textStream` (an `AsyncIterable<string>` of token deltas) as the return
9
+ * value of `sendText()`.
10
+ *
11
+ * ## Abort Handling
12
+ *
13
+ * The `abort()` method is implemented by setting an internal flag that causes
14
+ * the `sendText()` iterator to stop yielding tokens. Since `StreamTextResult`
15
+ * does not expose a native cancellation mechanism, the underlying provider
16
+ * stream continues but its output is discarded.
17
+ */
18
+ import type { AgentSession } from '../../api/agent.js';
19
+ import type { IVoicePipelineAgentSession, VoiceTurnMetadata } from '../types.js';
20
+ /**
21
+ * Wraps an AgentOS `AgentSession` as an `IVoicePipelineAgentSession`.
22
+ *
23
+ * @example
24
+ * ```typescript
25
+ * import { agent } from '@framers/agentos';
26
+ * import { AgentSessionVoiceAdapter } from '../../voice-pipeline';
27
+ *
28
+ * const a = agent({ model: 'gpt-4o' });
29
+ * const session = a.session('voice-session-1');
30
+ * const voiceAdapter = new AgentSessionVoiceAdapter(session);
31
+ *
32
+ * // Use with VoicePipelineOrchestrator
33
+ * orchestrator.startSession(transport, voiceAdapter, overrides);
34
+ * ```
35
+ */
36
+ export declare class AgentSessionVoiceAdapter implements IVoicePipelineAgentSession {
37
+ private readonly session;
38
+ /** Internal abort flag. Set by `abort()`, checked by the token iterator. */
39
+ private aborted;
40
+ constructor(session: AgentSession);
41
+ /**
42
+ * Send user text to the agent and yield response tokens as an async iterable.
43
+ *
44
+ * The `metadata` parameter carries voice-specific context (speech duration,
45
+ * endpoint reason, confidence, etc.) that could be injected into the agent's
46
+ * context for more informed responses. Currently the metadata is not forwarded
47
+ * to the agent (the AgentSession API doesn't support metadata injection),
48
+ * but it is available for future enhancement.
49
+ *
50
+ * @param text - Transcribed user speech to send to the agent.
51
+ * @param _metadata - Voice turn metadata (reserved for future use).
52
+ * @returns An async iterable of response token strings.
53
+ */
54
+ sendText(text: string, _metadata: VoiceTurnMetadata): AsyncIterable<string>;
55
+ /**
56
+ * Abort the current generation.
57
+ * Sets an internal flag causing the active `sendText()` iterator to stop
58
+ * yielding tokens. The underlying LLM stream is not explicitly cancelled
59
+ * but its output is discarded.
60
+ */
61
+ abort(): void;
62
+ }
63
+ //# sourceMappingURL=AgentSessionVoiceAdapter.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"AgentSessionVoiceAdapter.d.ts","sourceRoot":"","sources":["../../../src/voice-pipeline/providers/AgentSessionVoiceAdapter.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;GAgBG;AAEH,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,oBAAoB,CAAC;AACvD,OAAO,KAAK,EAAE,0BAA0B,EAAE,iBAAiB,EAAE,MAAM,aAAa,CAAC;AAEjF;;;;;;;;;;;;;;;GAeG;AACH,qBAAa,wBAAyB,YAAW,0BAA0B;IAI7D,OAAO,CAAC,QAAQ,CAAC,OAAO;IAHpC,4EAA4E;IAC5E,OAAO,CAAC,OAAO,CAAS;gBAEK,OAAO,EAAE,YAAY;IAElD;;;;;;;;;;;;OAYG;IACI,QAAQ,CAAC,IAAI,EAAE,MAAM,EAAE,SAAS,EAAE,iBAAiB,GAAG,aAAa,CAAC,MAAM,CAAC;IAWlF;;;;;OAKG;IACH,KAAK,IAAI,IAAI;CAGd"}
@@ -0,0 +1,72 @@
1
+ /**
2
+ * @module voice-pipeline/providers/AgentSessionVoiceAdapter
3
+ *
4
+ * Adapts an AgentOS {@link AgentSession} to the {@link IVoicePipelineAgentSession}
5
+ * interface required by {@link VoicePipelineOrchestrator}.
6
+ *
7
+ * The adapter wraps `AgentSession.stream(text)` and yields the resulting
8
+ * `textStream` (an `AsyncIterable<string>` of token deltas) as the return
9
+ * value of `sendText()`.
10
+ *
11
+ * ## Abort Handling
12
+ *
13
+ * The `abort()` method is implemented by setting an internal flag that causes
14
+ * the `sendText()` iterator to stop yielding tokens. Since `StreamTextResult`
15
+ * does not expose a native cancellation mechanism, the underlying provider
16
+ * stream continues but its output is discarded.
17
+ */
18
+ /**
19
+ * Wraps an AgentOS `AgentSession` as an `IVoicePipelineAgentSession`.
20
+ *
21
+ * @example
22
+ * ```typescript
23
+ * import { agent } from '@framers/agentos';
24
+ * import { AgentSessionVoiceAdapter } from '../../voice-pipeline/index.js';
25
+ *
26
+ * const a = agent({ model: 'gpt-4o' });
27
+ * const session = a.session('voice-session-1');
28
+ * const voiceAdapter = new AgentSessionVoiceAdapter(session);
29
+ *
30
+ * // Use with VoicePipelineOrchestrator
31
+ * orchestrator.startSession(transport, voiceAdapter, overrides);
32
+ * ```
33
+ */
34
+ export class AgentSessionVoiceAdapter {
35
+ constructor(session) {
36
+ this.session = session;
37
+ /** Internal abort flag. Set by `abort()`, checked by the token iterator. */
38
+ this.aborted = false;
39
+ }
40
+ /**
41
+ * Send user text to the agent and yield response tokens as an async iterable.
42
+ *
43
+ * The `metadata` parameter carries voice-specific context (speech duration,
44
+ * endpoint reason, confidence, etc.) that could be injected into the agent's
45
+ * context for more informed responses. Currently the metadata is not forwarded
46
+ * to the agent (the AgentSession API doesn't support metadata injection),
47
+ * but it is available for future enhancement.
48
+ *
49
+ * @param text - Transcribed user speech to send to the agent.
50
+ * @param _metadata - Voice turn metadata (reserved for future use).
51
+ * @returns An async iterable of response token strings.
52
+ */
53
+ async *sendText(text, _metadata) {
54
+ this.aborted = false;
55
+ const result = this.session.stream(text);
56
+ for await (const token of result.textStream) {
57
+ if (this.aborted)
58
+ break;
59
+ yield token;
60
+ }
61
+ }
62
+ /**
63
+ * Abort the current generation.
64
+ * Sets an internal flag causing the active `sendText()` iterator to stop
65
+ * yielding tokens. The underlying LLM stream is not explicitly cancelled
66
+ * but its output is discarded.
67
+ */
68
+ abort() {
69
+ this.aborted = true;
70
+ }
71
+ }
72
+ //# sourceMappingURL=AgentSessionVoiceAdapter.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"AgentSessionVoiceAdapter.js","sourceRoot":"","sources":["../../../src/voice-pipeline/providers/AgentSessionVoiceAdapter.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;GAgBG;AAKH;;;;;;;;;;;;;;;GAeG;AACH,MAAM,OAAO,wBAAwB;IAInC,YAA6B,OAAqB;QAArB,YAAO,GAAP,OAAO,CAAc;QAHlD,4EAA4E;QACpE,YAAO,GAAG,KAAK,CAAC;IAE6B,CAAC;IAEtD;;;;;;;;;;;;OAYG;IACH,KAAK,CAAC,CAAC,QAAQ,CAAC,IAAY,EAAE,SAA4B;QACxD,IAAI,CAAC,OAAO,GAAG,KAAK,CAAC;QAErB,MAAM,MAAM,GAAG,IAAI,CAAC,OAAO,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC;QAEzC,IAAI,KAAK,EAAE,MAAM,KAAK,IAAI,MAAM,CAAC,UAAU,EAAE,CAAC;YAC5C,IAAI,IAAI,CAAC,OAAO;gBAAE,MAAM;YACxB,MAAM,KAAK,CAAC;QACd,CAAC;IACH,CAAC;IAED;;;;;OAKG;IACH,KAAK;QACH,IAAI,CAAC,OAAO,GAAG,IAAI,CAAC;IACtB,CAAC;CACF"}
@@ -0,0 +1,70 @@
1
+ /**
2
+ * @module voice-pipeline/providers/ElevenLabsStreamingSTT
3
+ *
4
+ * Streaming speech-to-text adapter for ElevenLabs' WebSocket STT API.
5
+ * Implements {@link IStreamingSTT} / {@link StreamingSTTSession} for the
6
+ * voice pipeline orchestrator.
7
+ *
8
+ * ## ElevenLabs STT WebSocket Protocol
9
+ *
10
+ * - **Endpoint:** `wss://api.elevenlabs.io/v1/speech-to-text/stream`
11
+ * - **Authentication:** `xi-api-key` header on upgrade
12
+ * - **Inbound (client → ElevenLabs):** Binary PCM frames (16-bit signed LE, 16kHz mono)
13
+ * - **Outbound (ElevenLabs → client):** JSON transcript results
14
+ * - **Close:** Send JSON `{ "type": "close_stream" }` to finalize
15
+ *
16
+ * ## Fallback: Chunked REST
17
+ *
18
+ * If the WebSocket endpoint is unavailable or errors, this adapter falls back
19
+ * to a chunked REST approach: accumulates audio into ~2s chunks and POSTs each
20
+ * to `/v1/speech-to-text` for batch transcription. This provides near-realtime
21
+ * results (2s latency per chunk) using only the REST API.
22
+ *
23
+ * @see https://elevenlabs.io/docs/api-reference/speech-to-text
24
+ */
25
+ import type { IStreamingSTT, StreamingSTTSession, StreamingSTTConfig } from '../types.js';
26
+ /**
27
+ * Configuration for the {@link ElevenLabsStreamingSTT} provider.
28
+ */
29
+ export interface ElevenLabsStreamingSTTConfig {
30
+ /** ElevenLabs API key. */
31
+ apiKey: string;
32
+ /**
33
+ * Base URL for the ElevenLabs API.
34
+ * @default 'https://api.elevenlabs.io/v1'
35
+ */
36
+ baseUrl?: string;
37
+ /**
38
+ * STT model to use.
39
+ * @default 'scribe_v1'
40
+ */
41
+ model?: string;
42
+ }
43
+ /**
44
+ * Streaming STT provider using ElevenLabs' Speech-to-Text API.
45
+ *
46
+ * Uses chunked REST transcription (2-second audio windows) to provide
47
+ * near-realtime STT with the same ElevenLabs API key used for TTS.
48
+ * No separate Deepgram key required.
49
+ *
50
+ * @example
51
+ * ```typescript
52
+ * const stt = new ElevenLabsStreamingSTT({
53
+ * apiKey: process.env.ELEVENLABS_API_KEY!,
54
+ * });
55
+ * const session = await stt.startSession({ language: 'en' });
56
+ * session.on('transcript', (event) => console.log(event.text));
57
+ * ```
58
+ */
59
+ export declare class ElevenLabsStreamingSTT implements IStreamingSTT {
60
+ private readonly config;
61
+ readonly providerId = "elevenlabs-streaming-stt";
62
+ readonly isStreaming = true;
63
+ constructor(config: ElevenLabsStreamingSTTConfig);
64
+ /**
65
+ * Create a new STT session. Uses chunked REST calls to ElevenLabs'
66
+ * batch STT endpoint for near-realtime transcription.
67
+ */
68
+ startSession(config?: StreamingSTTConfig): Promise<StreamingSTTSession>;
69
+ }
70
+ //# sourceMappingURL=ElevenLabsStreamingSTT.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"ElevenLabsStreamingSTT.d.ts","sourceRoot":"","sources":["../../../src/voice-pipeline/providers/ElevenLabsStreamingSTT.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;GAuBG;AAIH,OAAO,KAAK,EACV,aAAa,EACb,mBAAmB,EACnB,kBAAkB,EAInB,MAAM,aAAa,CAAC;AAMrB;;GAEG;AACH,MAAM,WAAW,4BAA4B;IAC3C,0BAA0B;IAC1B,MAAM,EAAE,MAAM,CAAC;IAEf;;;OAGG;IACH,OAAO,CAAC,EAAE,MAAM,CAAC;IAEjB;;;OAGG;IACH,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB;AAoQD;;;;;;;;;;;;;;;GAeG;AACH,qBAAa,sBAAuB,YAAW,aAAa;IAI9C,OAAO,CAAC,QAAQ,CAAC,MAAM;IAHnC,QAAQ,CAAC,UAAU,8BAA8B;IACjD,QAAQ,CAAC,WAAW,QAAQ;gBAEC,MAAM,EAAE,4BAA4B;IAEjE;;;OAGG;IACG,YAAY,CAAC,MAAM,CAAC,EAAE,kBAAkB,GAAG,OAAO,CAAC,mBAAmB,CAAC;CAG9E"}