@ariaflowagents/livekit-plugin-transport-sip 0.9.10 → 0.9.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. package/dist/audio_input.d.ts +6 -5
  2. package/dist/audio_input.js +9 -7
  3. package/dist/audio_output.d.ts +0 -1
  4. package/dist/audio_output.js +0 -1
  5. package/dist/index.d.ts +1 -1
  6. package/dist/index.js +0 -1
  7. package/dist/native_bridge.d.ts +0 -1
  8. package/dist/native_bridge.js +0 -1
  9. package/dist/rtp/jitter_buffer.d.ts +0 -1
  10. package/dist/rtp/jitter_buffer.js +0 -1
  11. package/dist/rtp/rtp_packet.d.ts +0 -1
  12. package/dist/rtp/rtp_packet.js +0 -1
  13. package/dist/rtp/rtp_session.d.ts +0 -1
  14. package/dist/rtp/rtp_session.js +0 -1
  15. package/dist/sdp_g711.d.ts +0 -1
  16. package/dist/sdp_g711.js +0 -1
  17. package/dist/server.d.ts +86 -13
  18. package/dist/server.js +202 -33
  19. package/dist/sip_signaling.d.ts +0 -1
  20. package/dist/sip_signaling.js +0 -1
  21. package/dist/text_output.d.ts +0 -1
  22. package/dist/text_output.js +0 -1
  23. package/dist/transport_adapter.d.ts +9 -2
  24. package/dist/transport_adapter.js +9 -9
  25. package/dist/types.d.ts +0 -1
  26. package/dist/types.js +0 -1
  27. package/package.json +12 -12
  28. package/dist/audio_input.d.ts.map +0 -1
  29. package/dist/audio_input.js.map +0 -1
  30. package/dist/audio_output.d.ts.map +0 -1
  31. package/dist/audio_output.js.map +0 -1
  32. package/dist/codec/g711.d.ts +0 -17
  33. package/dist/codec/g711.d.ts.map +0 -1
  34. package/dist/codec/g711.js +0 -123
  35. package/dist/codec/g711.js.map +0 -1
  36. package/dist/index.d.ts.map +0 -1
  37. package/dist/index.js.map +0 -1
  38. package/dist/jssip_signaling.d.ts +0 -76
  39. package/dist/jssip_signaling.d.ts.map +0 -1
  40. package/dist/jssip_signaling.js +0 -255
  41. package/dist/jssip_signaling.js.map +0 -1
  42. package/dist/native_bridge.d.ts.map +0 -1
  43. package/dist/native_bridge.js.map +0 -1
  44. package/dist/rtp/jitter_buffer.d.ts.map +0 -1
  45. package/dist/rtp/jitter_buffer.js.map +0 -1
  46. package/dist/rtp/rtp_packet.d.ts.map +0 -1
  47. package/dist/rtp/rtp_packet.js.map +0 -1
  48. package/dist/rtp/rtp_session.d.ts.map +0 -1
  49. package/dist/rtp/rtp_session.js.map +0 -1
  50. package/dist/sdp_g711.d.ts.map +0 -1
  51. package/dist/sdp_g711.js.map +0 -1
  52. package/dist/server.d.ts.map +0 -1
  53. package/dist/server.js.map +0 -1
  54. package/dist/sip_signaling.d.ts.map +0 -1
  55. package/dist/sip_signaling.js.map +0 -1
  56. package/dist/text_output.d.ts.map +0 -1
  57. package/dist/text_output.js.map +0 -1
  58. package/dist/transport_adapter.d.ts.map +0 -1
  59. package/dist/transport_adapter.js.map +0 -1
  60. package/dist/types.d.ts.map +0 -1
  61. package/dist/types.js.map +0 -1
@@ -2,10 +2,12 @@ import { AudioInput } from '@ariaflowagents/livekit-plugin';
2
2
  import type { RtpSession } from './rtp/rtp_session.js';
3
3
  /**
4
4
  * Receives audio from an RTP stream and provides it as a
5
- * ReadableStream<AudioFrame> for the STT pipeline.
5
+ * ReadableStream<AudioFrame> for the voice pipeline.
6
6
  *
7
- * RTP delivers G.711 at 8kHz. After decode, we upsample to 16kHz
8
- * to match GeminiLiveSTT's default sample rate expectation.
7
+ * RTP delivers G.711 at 8kHz. After decode, we upsample to the
8
+ * configured output sample rate (default 24kHz, matching Twilio and
9
+ * other transports). Cascaded callers can pass 16kHz for
10
+ * GeminiLiveSTT backward compatibility.
9
11
  *
10
12
  * The resampler is allocated once and reused across all frames
11
13
  * for the lifetime of this input (Sox sinc via local Rust FFI).
@@ -16,9 +18,8 @@ export declare class SIPAudioInput extends AudioInput {
16
18
  private closed;
17
19
  private numChannels;
18
20
  private resampler;
19
- constructor(rtpSession: RtpSession, sampleRate?: number, numChannels?: number);
21
+ constructor(rtpSession: RtpSession, sampleRate?: number, numChannels?: number, outputSampleRate?: number);
20
22
  private startStream;
21
23
  private setupListener;
22
24
  close(): Promise<void>;
23
25
  }
24
- //# sourceMappingURL=audio_input.d.ts.map
@@ -2,13 +2,15 @@ import { AudioInput, AudioFrame } from '@ariaflowagents/livekit-plugin';
2
2
  import { createResampler } from '@ariaflowagents/livekit-plugin/utils/resample';
3
3
  import { TransformStream } from 'node:stream/web';
4
4
  const RTP_SAMPLE_RATE = 8000;
5
- const STT_SAMPLE_RATE = 16000;
5
+ const DEFAULT_OUTPUT_SAMPLE_RATE = 24000;
6
6
  /**
7
7
  * Receives audio from an RTP stream and provides it as a
8
- * ReadableStream<AudioFrame> for the STT pipeline.
8
+ * ReadableStream<AudioFrame> for the voice pipeline.
9
9
  *
10
- * RTP delivers G.711 at 8kHz. After decode, we upsample to 16kHz
11
- * to match GeminiLiveSTT's default sample rate expectation.
10
+ * RTP delivers G.711 at 8kHz. After decode, we upsample to the
11
+ * configured output sample rate (default 24kHz, matching Twilio and
12
+ * other transports). Cascaded callers can pass 16kHz for
13
+ * GeminiLiveSTT backward compatibility.
12
14
  *
13
15
  * The resampler is allocated once and reused across all frames
14
16
  * for the lifetime of this input (Sox sinc via local Rust FFI).
@@ -18,12 +20,13 @@ export class SIPAudioInput extends AudioInput {
18
20
  writer = null;
19
21
  closed = false;
20
22
  numChannels;
21
- resampler = createResampler(RTP_SAMPLE_RATE, STT_SAMPLE_RATE);
22
- constructor(rtpSession, sampleRate = RTP_SAMPLE_RATE, numChannels = 1) {
23
+ resampler;
24
+ constructor(rtpSession, sampleRate = RTP_SAMPLE_RATE, numChannels = 1, outputSampleRate = DEFAULT_OUTPUT_SAMPLE_RATE) {
23
25
  super();
24
26
  this.rtpSession = rtpSession;
25
27
  void sampleRate;
26
28
  this.numChannels = numChannels;
29
+ this.resampler = createResampler(RTP_SAMPLE_RATE, outputSampleRate);
27
30
  this.startStream();
28
31
  this.setupListener();
29
32
  }
@@ -55,4 +58,3 @@ export class SIPAudioInput extends AudioInput {
55
58
  await super.close();
56
59
  }
57
60
  }
58
- //# sourceMappingURL=audio_input.js.map
@@ -31,4 +31,3 @@ export declare class SIPAudioOutput extends AudioOutput {
31
31
  clearBuffer(): void;
32
32
  close(): Promise<void>;
33
33
  }
34
- //# sourceMappingURL=audio_output.d.ts.map
@@ -130,4 +130,3 @@ export class SIPAudioOutput extends AudioOutput {
130
130
  this.sendQueue = [];
131
131
  }
132
132
  }
133
- //# sourceMappingURL=audio_output.js.map
package/dist/index.d.ts CHANGED
@@ -1,4 +1,5 @@
1
1
  export { SIPAgentServer } from './server.js';
2
+ export type { SIPNativeSessionOptions, SIPRealtimeSessionOptions, SIPAgentEventSink, } from './server.js';
2
3
  export { SIPSignaling } from './sip_signaling.js';
3
4
  export type { SIPServerOptions, SIPTransport } from './types.js';
4
5
  export { SIPTransportAdapter } from './transport_adapter.js';
@@ -10,4 +11,3 @@ export { RtpSession, type RtpSessionOptions } from './rtp/rtp_session.js';
10
11
  export { parseRtpPacket, buildRtpPacket, type RtpPacket } from './rtp/rtp_packet.js';
11
12
  export { JitterBuffer } from './rtp/jitter_buffer.js';
12
13
  export { createSipNativeAudioTransport } from './native_bridge.js';
13
- //# sourceMappingURL=index.d.ts.map
package/dist/index.js CHANGED
@@ -9,4 +9,3 @@ export { RtpSession } from './rtp/rtp_session.js';
9
9
  export { parseRtpPacket, buildRtpPacket } from './rtp/rtp_packet.js';
10
10
  export { JitterBuffer } from './rtp/jitter_buffer.js';
11
11
  export { createSipNativeAudioTransport } from './native_bridge.js';
12
- //# sourceMappingURL=index.js.map
@@ -9,4 +9,3 @@ export declare function createSipNativeAudioTransport(rtpSession: RtpSession, op
9
9
  codecSampleRate?: number;
10
10
  packetDurationMs?: number;
11
11
  }): NativeAudioTransport;
12
- //# sourceMappingURL=native_bridge.d.ts.map
@@ -127,4 +127,3 @@ export function createSipNativeAudioTransport(rtpSession, options) {
127
127
  },
128
128
  };
129
129
  }
130
- //# sourceMappingURL=native_bridge.js.map
@@ -16,4 +16,3 @@ export declare class JitterBuffer {
16
16
  pull(): RtpPacket | null;
17
17
  private isOlder;
18
18
  }
19
- //# sourceMappingURL=jitter_buffer.d.ts.map
@@ -48,4 +48,3 @@ export class JitterBuffer {
48
48
  return diff > 0 && diff < 0x8000;
49
49
  }
50
50
  }
51
- //# sourceMappingURL=jitter_buffer.js.map
@@ -20,4 +20,3 @@ export interface RtpPacket {
20
20
  }
21
21
  export declare function parseRtpPacket(data: Buffer): RtpPacket | null;
22
22
  export declare function buildRtpPacket(payloadType: number, sequenceNumber: number, timestamp: number, ssrc: number, payload: Uint8Array, marker?: boolean): Buffer;
23
- //# sourceMappingURL=rtp_packet.d.ts.map
@@ -60,4 +60,3 @@ export function buildRtpPacket(payloadType, sequenceNumber, timestamp, ssrc, pay
60
60
  header.writeUInt32BE(ssrc >>> 0, 8);
61
61
  return Buffer.concat([header, Buffer.from(payload)]);
62
62
  }
63
- //# sourceMappingURL=rtp_packet.js.map
@@ -51,4 +51,3 @@ export declare class RtpSession extends EventEmitter {
51
51
  sendAudio(pcm: Int16Array): void;
52
52
  close(): void;
53
53
  }
54
- //# sourceMappingURL=rtp_session.d.ts.map
@@ -159,4 +159,3 @@ function concatInt16(a, b) {
159
159
  out.set(b, a.length);
160
160
  return out;
161
161
  }
162
- //# sourceMappingURL=rtp_session.js.map
@@ -15,4 +15,3 @@ export declare function negotiateG711FromRemoteOffer(remoteSdp: string, fallback
15
15
  * SDP answer body advertising a single negotiated G.711 codec (RFC 3264).
16
16
  */
17
17
  export declare function buildG711SdpAnswer(localAddress: string, rtpPort: number, codec: Codec): string;
18
- //# sourceMappingURL=sdp_g711.d.ts.map
package/dist/sdp_g711.js CHANGED
@@ -71,4 +71,3 @@ export function buildG711SdpAnswer(localAddress, rtpPort, codec) {
71
71
  `a=ptime:20\r\n` +
72
72
  `a=sendrecv\r\n`);
73
73
  }
74
- //# sourceMappingURL=sdp_g711.js.map
package/dist/server.d.ts CHANGED
@@ -8,27 +8,79 @@
8
8
  * - Works with PBX systems and SIP gateways
9
9
  * - G.711 codec negotiation
10
10
  *
11
+ * Supports three session modes:
12
+ * - Cascaded: startSession() — STT → LLM → TTS pipeline
13
+ * - Native: startNativeSession() — RealtimeRuntime + RealtimeAudioClient
14
+ * - Realtime: startRealtimeSession() — LiveKit AgentSession + provider RealtimeModel
15
+ *
11
16
  * For WebSocket/WebRTC SIP signaling, use
12
17
  * `@ariaflow/livekit-plugin-transport-sip-jssip`.
13
18
  */
14
19
  import { SessionManager, type AriaFlowVoiceSession } from '@ariaflowagents/livekit-plugin';
15
- import type { voice } from '@livekit/agents';
20
+ import { voice } from '@livekit/agents';
21
+ import type { RealtimeRuntime, RealtimeAudioClient, RealtimeSessionHandle } from '@ariaflowagents/core/realtime';
16
22
  import { SIPTransportAdapter } from './transport_adapter.js';
17
23
  import type { SIPServerOptions } from './types.js';
24
+ /**
25
+ * Optional event sink for AgentSession events in realtime mode.
26
+ * SIP has no text channel — use this callback for observability.
27
+ */
28
+ export type SIPAgentEventSink = (event: {
29
+ type: string;
30
+ callId: string;
31
+ timestamp: number;
32
+ data: unknown;
33
+ }) => void;
34
+ /**
35
+ * Options for starting a native audio session through RealtimeRuntime.
36
+ */
37
+ export interface SIPNativeSessionOptions {
38
+ runtime: RealtimeRuntime;
39
+ createModelClient: () => RealtimeAudioClient;
40
+ sessionId?: string;
41
+ userId?: string;
42
+ agentId?: string;
43
+ }
44
+ /**
45
+ * Options for starting a direct AgentSession backed by a LiveKit realtime LLM.
46
+ */
47
+ export interface SIPRealtimeSessionOptions {
48
+ model: NonNullable<voice.AgentSessionOptions['llm']>;
49
+ agent: voice.Agent;
50
+ maxToolSteps?: number;
51
+ sessionId?: string;
52
+ onSessionEnd?: (reason: string) => void;
53
+ onEvent?: SIPAgentEventSink;
54
+ }
18
55
  /**
19
56
  * A SIP server that accepts incoming calls and creates agent sessions.
20
57
  *
21
- * This server handles SIP signaling (via UDP or WebSocket) and manages
22
- * RTP sessions for audio transmission.
58
+ * Supports three session modes:
23
59
  *
24
- * UDP Example:
25
- * const server = new SIPAgentServer({
26
- * transport: 'udp',
27
- * localAddress: '10.0.0.5',
28
- * sipPort: 5060,
29
- * rtpPortStart: 10000,
60
+ * **Cascaded** (startSession):
61
+ * server.onCall(async (transport, callId) => {
62
+ * const voiceSession = new AriaFlowVoiceSession({ runtime, stt, tts });
63
+ * await server.startSession(callId, voiceSession);
30
64
  * });
31
65
  *
66
+ * **Native** (startNativeSession):
67
+ * server.onCall(async (transport, callId) => {
68
+ * await server.startNativeSession(callId, {
69
+ * runtime: realtimeRuntime,
70
+ * createModelClient: () => new GeminiLiveSession({ apiKey, model }),
71
+ * });
72
+ * });
73
+ *
74
+ * **Realtime** (startRealtimeSession):
75
+ * server.onCall(async (transport, callId) => {
76
+ * const binding = await createAriaFlowRealtimeAgent({ authority, agentId });
77
+ * const session = await server.startRealtimeSession(callId, {
78
+ * model: new google.beta.realtime.RealtimeModel({ apiKey }),
79
+ * agent: binding.agent,
80
+ * onSessionEnd: (reason) => binding.controller.detach(reason),
81
+ * });
82
+ * binding.controller.attach(session);
83
+ * });
32
84
  */
33
85
  export declare class SIPAgentServer {
34
86
  private options;
@@ -39,9 +91,30 @@ export declare class SIPAgentServer {
39
91
  private activeTransports;
40
92
  private transportType;
41
93
  private activeVoiceSessions;
94
+ private nativeSessions;
95
+ private realtimeSessions;
42
96
  constructor(options: SIPServerOptions);
43
97
  onCall(handler: (adapter: SIPTransportAdapter, callId: string) => void | Promise<void>): void;
98
+ /**
99
+ * Start a cascaded voice session (STT → LLM → TTS).
100
+ */
44
101
  startSession(callId: string, voiceSession: AriaFlowVoiceSession): Promise<voice.AgentSession>;
102
+ /**
103
+ * Start a native audio session that routes audio directly through
104
+ * RealtimeRuntime (e.g., Gemini Live, OpenAI Realtime).
105
+ *
106
+ * Uses the SIP native audio bridge to convert RTP 8kHz ↔ 24kHz PCM
107
+ * for the realtime model client.
108
+ */
109
+ startNativeSession(callId: string, options: SIPNativeSessionOptions): Promise<RealtimeSessionHandle>;
110
+ /**
111
+ * Start a direct LiveKit AgentSession over the SIP transport.
112
+ *
113
+ * This path is for native realtime models such as
114
+ * @livekit/agents-plugin-google's RealtimeModel. It wires the transport's
115
+ * AudioInput/AudioOutput/TextOutput directly into AgentSession.
116
+ */
117
+ startRealtimeSession(callId: string, options: SIPRealtimeSessionOptions): Promise<voice.AgentSession>;
45
118
  /**
46
119
  * Allocate a local RTP port and create a transport adapter.
47
120
  * This is called internally when a SIP INVITE is received.
@@ -49,11 +122,12 @@ export declare class SIPAgentServer {
49
122
  private createTransport;
50
123
  /**
51
124
  * Start listening for SIP INVITE requests.
52
- *
53
- * For UDP transport: Binds UDP socket and listens for SIP messages.
54
- * For WebSocket transport: Connects to WebSocket server and listens for SIP messages.
55
125
  */
56
126
  listen(): Promise<void>;
127
+ /**
128
+ * Clean up all resources for a call across all three session modes.
129
+ */
130
+ private cleanupCall;
57
131
  /**
58
132
  * Hang up an active call.
59
133
  */
@@ -73,4 +147,3 @@ export declare class SIPAgentServer {
73
147
  get status(): 'disconnected' | 'connecting' | 'connected';
74
148
  close(): Promise<void>;
75
149
  }
76
- //# sourceMappingURL=server.d.ts.map
package/dist/server.js CHANGED
@@ -8,13 +8,20 @@
8
8
  * - Works with PBX systems and SIP gateways
9
9
  * - G.711 codec negotiation
10
10
  *
11
+ * Supports three session modes:
12
+ * - Cascaded: startSession() — STT → LLM → TTS pipeline
13
+ * - Native: startNativeSession() — RealtimeRuntime + RealtimeAudioClient
14
+ * - Realtime: startRealtimeSession() — LiveKit AgentSession + provider RealtimeModel
15
+ *
11
16
  * For WebSocket/WebRTC SIP signaling, use
12
17
  * `@ariaflow/livekit-plugin-transport-sip-jssip`.
13
18
  */
14
19
  import { SessionManager } from '@ariaflowagents/livekit-plugin';
20
+ import { voice } from '@livekit/agents';
15
21
  import { SIPTransportAdapter } from './transport_adapter.js';
16
22
  import { RtpSession } from './rtp/rtp_session.js';
17
23
  import { SIPSignaling } from './sip_signaling.js';
24
+ import { createSipNativeAudioTransport } from './native_bridge.js';
18
25
  /**
19
26
  * Parse the remote RTP endpoint from an SDP body.
20
27
  * Extracts the connection address (c= line) and audio port (m= line).
@@ -35,17 +42,32 @@ function parseSdpRemoteEndpoint(sdp) {
35
42
  /**
36
43
  * A SIP server that accepts incoming calls and creates agent sessions.
37
44
  *
38
- * This server handles SIP signaling (via UDP or WebSocket) and manages
39
- * RTP sessions for audio transmission.
45
+ * Supports three session modes:
46
+ *
47
+ * **Cascaded** (startSession):
48
+ * server.onCall(async (transport, callId) => {
49
+ * const voiceSession = new AriaFlowVoiceSession({ runtime, stt, tts });
50
+ * await server.startSession(callId, voiceSession);
51
+ * });
40
52
  *
41
- * UDP Example:
42
- * const server = new SIPAgentServer({
43
- * transport: 'udp',
44
- * localAddress: '10.0.0.5',
45
- * sipPort: 5060,
46
- * rtpPortStart: 10000,
53
+ * **Native** (startNativeSession):
54
+ * server.onCall(async (transport, callId) => {
55
+ * await server.startNativeSession(callId, {
56
+ * runtime: realtimeRuntime,
57
+ * createModelClient: () => new GeminiLiveSession({ apiKey, model }),
58
+ * });
47
59
  * });
48
60
  *
61
+ * **Realtime** (startRealtimeSession):
62
+ * server.onCall(async (transport, callId) => {
63
+ * const binding = await createAriaFlowRealtimeAgent({ authority, agentId });
64
+ * const session = await server.startRealtimeSession(callId, {
65
+ * model: new google.beta.realtime.RealtimeModel({ apiKey }),
66
+ * agent: binding.agent,
67
+ * onSessionEnd: (reason) => binding.controller.detach(reason),
68
+ * });
69
+ * binding.controller.attach(session);
70
+ * });
49
71
  */
50
72
  export class SIPAgentServer {
51
73
  options;
@@ -56,6 +78,8 @@ export class SIPAgentServer {
56
78
  activeTransports = new Map();
57
79
  transportType;
58
80
  activeVoiceSessions = new Map();
81
+ nativeSessions = new Map();
82
+ realtimeSessions = new Map();
59
83
  constructor(options) {
60
84
  this.options = options;
61
85
  this.transportType = options.transport ?? 'udp';
@@ -69,6 +93,9 @@ export class SIPAgentServer {
69
93
  onCall(handler) {
70
94
  this.callHandler = handler;
71
95
  }
96
+ /**
97
+ * Start a cascaded voice session (STT → LLM → TTS).
98
+ */
72
99
  async startSession(callId, voiceSession) {
73
100
  const transport = this.activeTransports.get(callId);
74
101
  if (!transport) {
@@ -78,6 +105,123 @@ export class SIPAgentServer {
78
105
  this.activeVoiceSessions.set(callId, voiceSession);
79
106
  return agentSession;
80
107
  }
108
+ /**
109
+ * Start a native audio session that routes audio directly through
110
+ * RealtimeRuntime (e.g., Gemini Live, OpenAI Realtime).
111
+ *
112
+ * Uses the SIP native audio bridge to convert RTP 8kHz ↔ 24kHz PCM
113
+ * for the realtime model client.
114
+ */
115
+ async startNativeSession(callId, options) {
116
+ const adapter = this.activeTransports.get(callId);
117
+ if (!adapter) {
118
+ throw new Error(`Transport not found for call: ${callId}`);
119
+ }
120
+ const nativeTransport = createSipNativeAudioTransport(adapter.rtpSession);
121
+ const modelClient = options.createModelClient();
122
+ let handle;
123
+ try {
124
+ handle = await options.runtime.startSession({
125
+ modelClient,
126
+ transport: nativeTransport,
127
+ sessionId: options.sessionId ?? callId,
128
+ userId: options.userId,
129
+ agentId: options.agentId,
130
+ });
131
+ }
132
+ catch (err) {
133
+ nativeTransport.close();
134
+ throw err;
135
+ }
136
+ this.nativeSessions.set(callId, { handle, transport: nativeTransport });
137
+ console.log(`[SIPAgentServer] native session started for call: ${callId}`);
138
+ return handle;
139
+ }
140
+ /**
141
+ * Start a direct LiveKit AgentSession over the SIP transport.
142
+ *
143
+ * This path is for native realtime models such as
144
+ * @livekit/agents-plugin-google's RealtimeModel. It wires the transport's
145
+ * AudioInput/AudioOutput/TextOutput directly into AgentSession.
146
+ */
147
+ async startRealtimeSession(callId, options) {
148
+ const adapter = this.activeTransports.get(callId);
149
+ if (!adapter) {
150
+ throw new Error(`Transport not found for call: ${callId}`);
151
+ }
152
+ const sessionId = options.sessionId ?? callId;
153
+ let sessionEnded = false;
154
+ const session = new voice.AgentSession({
155
+ llm: options.model,
156
+ maxToolSteps: options.maxToolSteps,
157
+ });
158
+ // Wire I/O directly
159
+ session.input.audio = adapter.audioInput;
160
+ session.output.audio = adapter.audioOutput;
161
+ session.output.transcription = adapter.textOutput;
162
+ const emitEvent = options.onEvent
163
+ ? (type, data) => {
164
+ options.onEvent({ type, callId, timestamp: Date.now(), data });
165
+ }
166
+ : undefined;
167
+ session.on(voice.AgentSessionEventTypes.AgentStateChanged, (event) => {
168
+ emitEvent?.('agent_state', event.newState);
169
+ });
170
+ session.on(voice.AgentSessionEventTypes.UserStateChanged, (event) => {
171
+ emitEvent?.('user_state', event.newState);
172
+ });
173
+ session.on(voice.AgentSessionEventTypes.UserInputTranscribed, (event) => {
174
+ emitEvent?.('user_transcription', { text: event.transcript, isFinal: event.isFinal });
175
+ });
176
+ session.on(voice.AgentSessionEventTypes.FunctionToolsExecuted, (event) => {
177
+ event.functionCalls.forEach((call, index) => {
178
+ emitEvent?.('tool_result', {
179
+ toolName: call.name,
180
+ success: Boolean(event.functionCallOutputs[index]),
181
+ });
182
+ });
183
+ });
184
+ session.on(voice.AgentSessionEventTypes.MetricsCollected, (event) => {
185
+ emitEvent?.('metrics_collected', {
186
+ metricsType: typeof event.metrics?.type === 'string' ? event.metrics.type : undefined,
187
+ });
188
+ });
189
+ session.on(voice.AgentSessionEventTypes.Close, () => {
190
+ if (sessionEnded)
191
+ return;
192
+ sessionEnded = true;
193
+ this.realtimeSessions.delete(callId);
194
+ options.onSessionEnd?.('session_close');
195
+ });
196
+ session.on(voice.AgentSessionEventTypes.Error, (event) => {
197
+ const msg = event.error instanceof Error ? event.error.message : String(event.error);
198
+ console.error(`[SIPAgentServer] realtime session error for ${callId}: ${msg}`);
199
+ emitEvent?.('error', { message: msg });
200
+ });
201
+ const closeRealtimeSession = (reason) => {
202
+ if (sessionEnded)
203
+ return;
204
+ sessionEnded = true;
205
+ this.realtimeSessions.delete(callId);
206
+ options.onSessionEnd?.(reason);
207
+ session.close().catch((err) => {
208
+ console.error(`[SIPAgentServer] realtime session close error for ${callId}:`, err instanceof Error ? err.message : String(err));
209
+ });
210
+ };
211
+ try {
212
+ await session.start({ agent: options.agent });
213
+ }
214
+ catch (err) {
215
+ try {
216
+ await session.close();
217
+ }
218
+ catch { /* swallow cleanup errors */ }
219
+ throw err;
220
+ }
221
+ this.realtimeSessions.set(callId, { session, close: closeRealtimeSession });
222
+ console.log(`[SIPAgentServer] realtime session started for call: ${callId}`);
223
+ return session;
224
+ }
81
225
  /**
82
226
  * Allocate a local RTP port and create a transport adapter.
83
227
  * This is called internally when a SIP INVITE is received.
@@ -103,9 +247,6 @@ export class SIPAgentServer {
103
247
  }
104
248
  /**
105
249
  * Start listening for SIP INVITE requests.
106
- *
107
- * For UDP transport: Binds UDP socket and listens for SIP messages.
108
- * For WebSocket transport: Connects to WebSocket server and listens for SIP messages.
109
250
  */
110
251
  async listen() {
111
252
  const transportName = this.transportType === 'websocket' ? 'WebSocket' : 'UDP';
@@ -131,33 +272,50 @@ export class SIPAgentServer {
131
272
  }, async (callId) => {
132
273
  // Handle BYE from remote party
133
274
  console.log(`[SIPAgentServer] Remote party hung up call: ${callId}`);
134
- const transport = this.activeTransports.get(callId);
135
- if (transport) {
136
- await transport.close();
137
- this.activeTransports.delete(callId);
138
- }
139
- const voiceSession = this.activeVoiceSessions.get(callId);
140
- if (voiceSession) {
141
- await voiceSession.close();
142
- this.activeVoiceSessions.delete(callId);
143
- }
275
+ await this.cleanupCall(callId, 'remote_hangup');
144
276
  });
145
277
  console.log(`[SIPAgentServer] Listening for calls (${transportName})`);
146
278
  }
147
279
  /**
148
- * Hang up an active call.
280
+ * Clean up all resources for a call across all three session modes.
149
281
  */
150
- async hangup(callId) {
151
- const transport = this.activeTransports.get(callId);
152
- if (transport) {
153
- await transport.close();
154
- this.activeTransports.delete(callId);
155
- }
282
+ async cleanupCall(callId, reason) {
283
+ // Cascaded cleanup
156
284
  const voiceSession = this.activeVoiceSessions.get(callId);
157
285
  if (voiceSession) {
158
- await voiceSession.close();
159
286
  this.activeVoiceSessions.delete(callId);
287
+ await voiceSession.close().catch((err) => {
288
+ console.error(`[SIPAgentServer] voice session close error for ${callId}:`, err instanceof Error ? err.message : String(err));
289
+ });
160
290
  }
291
+ // Native cleanup
292
+ const nativeEntry = this.nativeSessions.get(callId);
293
+ if (nativeEntry) {
294
+ this.nativeSessions.delete(callId);
295
+ await nativeEntry.handle.stop().catch((err) => {
296
+ console.error(`[SIPAgentServer] native session stop error for ${callId}:`, err instanceof Error ? err.message : String(err));
297
+ });
298
+ nativeEntry.transport.close();
299
+ }
300
+ // Realtime cleanup
301
+ const realtimeEntry = this.realtimeSessions.get(callId);
302
+ if (realtimeEntry) {
303
+ realtimeEntry.close(reason);
304
+ }
305
+ // Transport cleanup (last — sessions may still be using it)
306
+ const transport = this.activeTransports.get(callId);
307
+ if (transport) {
308
+ this.activeTransports.delete(callId);
309
+ await transport.close().catch((err) => {
310
+ console.error(`[SIPAgentServer] transport close error for ${callId}:`, err instanceof Error ? err.message : String(err));
311
+ });
312
+ }
313
+ }
314
+ /**
315
+ * Hang up an active call.
316
+ */
317
+ async hangup(callId) {
318
+ await this.cleanupCall(callId, 'local_hangup');
161
319
  await this.signaling.hangup(callId);
162
320
  }
163
321
  get sessions() {
@@ -182,16 +340,27 @@ export class SIPAgentServer {
182
340
  return 'connected';
183
341
  }
184
342
  async close() {
185
- // Close all active voice sessions
186
- const voiceSessionPromises = Array.from(this.activeVoiceSessions.values()).map((s) => s.close());
343
+ // Close all cascaded voice sessions
344
+ const voiceSessionPromises = Array.from(this.activeVoiceSessions.values()).map((s) => s.close().catch(() => { }));
187
345
  await Promise.allSettled(voiceSessionPromises);
188
346
  this.activeVoiceSessions.clear();
347
+ // Stop all native sessions
348
+ const nativeStops = Array.from(this.nativeSessions.values()).map(async (entry) => {
349
+ await entry.handle.stop().catch(() => { });
350
+ entry.transport.close();
351
+ });
352
+ await Promise.allSettled(nativeStops);
353
+ this.nativeSessions.clear();
354
+ // Close all realtime sessions
355
+ for (const [, entry] of this.realtimeSessions) {
356
+ entry.close('server_shutdown');
357
+ }
358
+ this.realtimeSessions.clear();
189
359
  // Close all active transports
190
- const closePromises = Array.from(this.activeTransports.values()).map((t) => t.close());
360
+ const closePromises = Array.from(this.activeTransports.values()).map((t) => t.close().catch(() => { }));
191
361
  await Promise.allSettled(closePromises);
192
362
  this.activeTransports.clear();
193
363
  await this.sessionManager.closeAll();
194
364
  await this.signaling.stop();
195
365
  }
196
366
  }
197
- //# sourceMappingURL=server.js.map
@@ -70,4 +70,3 @@ export declare class SIPSignaling {
70
70
  private generateTag;
71
71
  private generateBranch;
72
72
  }
73
- //# sourceMappingURL=sip_signaling.d.ts.map
@@ -550,4 +550,3 @@ export class SIPSignaling {
550
550
  return `z9hG4bK${Math.random().toString(36).slice(2, 12)}`;
551
551
  }
552
552
  }
553
- //# sourceMappingURL=sip_signaling.js.map
@@ -13,4 +13,3 @@ export declare class SIPTextOutput extends TextOutput {
13
13
  captureText(text: string | TimedString): Promise<void>;
14
14
  flush(): void;
15
15
  }
16
- //# sourceMappingURL=text_output.d.ts.map
@@ -32,4 +32,3 @@ export class SIPTextOutput extends TextOutput {
32
32
  }
33
33
  }
34
34
  }
35
- //# sourceMappingURL=text_output.js.map
@@ -5,18 +5,25 @@ import { SIPTextOutput } from './text_output.js';
5
5
  import type { RtpSession } from './rtp/rtp_session.js';
6
6
  import type { Codec } from '@ariaflowagents/livekit-plugin/codec/g711';
7
7
  export declare class SIPTransportAdapter extends TransportAdapter {
8
- private rtpSession;
9
8
  readonly id: string;
10
9
  readonly audioInput: SIPAudioInput;
11
10
  readonly audioOutput: SIPAudioOutput;
12
11
  readonly textOutput: SIPTextOutput;
13
12
  readonly config: TransportAdapterConfig;
13
+ private _rtpSession;
14
14
  private _isOpen;
15
15
  constructor(rtpSession: RtpSession, codec: Codec, options: {
16
16
  id: string;
17
17
  packetDurationMs?: number;
18
+ /**
19
+ * Output sample rate for SIPAudioInput. Default 24kHz (matching
20
+ * Twilio and all other transports). Pass 16000 for cascaded mode
21
+ * with GeminiLiveSTT backward compatibility.
22
+ */
23
+ outputSampleRate?: number;
18
24
  });
25
+ /** The underlying RTP session (for native bridge access). */
26
+ get rtpSession(): RtpSession;
19
27
  get isOpen(): boolean;
20
28
  close(): Promise<void>;
21
29
  }
22
- //# sourceMappingURL=transport_adapter.d.ts.map