@dtelecom/agents-js 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,343 @@
1
+ import * as _dtelecom_server_sdk_node from '@dtelecom/server-sdk-node';
2
+ import { Room, AudioSource, RemoteAudioTrack, AudioFrame } from '@dtelecom/server-sdk-node';
3
+ import { EventEmitter } from 'events';
4
+ import { A as AgentConfig, a as AgentStartOptions, M as Message, L as LLMPlugin, P as PipelineOptions, b as AgentState, S as STTStream, T as TranscriptionResult } from './types-Cs5uUoTC.mjs';
5
+ export { c as AgentEvents, d as AudioOutput, D as DataMessageHandler, E as Embedder, e as LLMChunk, f as MemoryConfig, g as PipelineEvents, R as RespondMode, h as RoomMemory, i as RoomMemoryConfig, j as STTPlugin, k as STTStreamOptions, l as TTSPlugin } from './types-Cs5uUoTC.mjs';
6
+
7
+ declare class VoiceAgent extends EventEmitter {
8
+ private readonly config;
9
+ private connection;
10
+ private pipeline;
11
+ private audioInputs;
12
+ private audioOutput;
13
+ private memory;
14
+ private _running;
15
+ constructor(config: AgentConfig);
16
+ get running(): boolean;
17
+ get room(): _dtelecom_server_sdk_node.Room | null;
18
+ /** Enable saving raw TTS audio as WAV files to `dir` for debugging. */
19
+ enableAudioDump(dir: string): void;
20
+ private _dumpDir;
21
+ /**
22
+ * Speak text directly via TTS, bypassing the LLM.
23
+ * Use for greetings or announcements. Supports barge-in.
24
+ */
25
+ say(text: string): Promise<void>;
26
+ /** Start the agent — connect to room and begin listening. */
27
+ start(options: AgentStartOptions): Promise<void>;
28
+ /** Stop the agent — disconnect and clean up. */
29
+ stop(): Promise<void>;
30
+ private setupDataChannel;
31
+ private handleTrackSubscribed;
32
+ private handleTrackUnsubscribed;
33
+ private handleParticipantDisconnected;
34
+ private pipeAudioToSTT;
35
+ }
36
+
37
+ interface ContextManagerOptions {
38
+ /** System instructions for the agent */
39
+ instructions: string;
40
+ /** Max tokens before triggering summarization (default: 5000) */
41
+ maxContextTokens?: number;
42
+ /** Number of recent turns to keep verbatim (default: 4) */
43
+ recentTurnsToKeep?: number;
44
+ }
45
+ declare class ContextManager {
46
+ private readonly instructions;
47
+ private readonly maxContextTokens;
48
+ private readonly recentTurnsToKeep;
49
+ private turns;
50
+ private summary;
51
+ constructor(options: ContextManagerOptions);
52
+ /** Add a user's speech turn to the conversation */
53
+ addUserTurn(speaker: string, text: string): void;
54
+ /** Add the agent's response to the conversation */
55
+ addAgentTurn(text: string): void;
56
+ /**
57
+ * Build the messages array for the LLM call.
58
+ *
59
+ * Structure:
60
+ * [system prompt]
61
+ * [memory context, if provided]
62
+ * [conversation summary, if any]
63
+ * [recent verbatim turns]
64
+ *
65
+ * @param memoryContext - Optional relevant context injected by the application
66
+ */
67
+ buildMessages(memoryContext?: string): Message[];
68
+ /** Check if summarization should be triggered */
69
+ shouldSummarize(): boolean;
70
+ /**
71
+ * Summarize older turns using the LLM.
72
+ * Keeps the most recent turns verbatim.
73
+ */
74
+ summarize(llm: LLMPlugin): Promise<void>;
75
+ /** Get the full transcript */
76
+ getFullTranscript(): string;
77
+ /** Reset the context */
78
+ reset(): void;
79
+ }
80
+
81
+ /**
82
+ * Pipeline — coordinates the STT -> LLM -> TTS flow.
83
+ *
84
+ * Uses a producer/consumer pattern:
85
+ * - Producer: LLM tokens -> sentence splitter -> sentence queue
86
+ * - Consumer: sentence queue -> TTS -> audio output
87
+ * Both run concurrently so audio playback never blocks LLM consumption.
88
+ *
89
+ * Supports barge-in (interruption cancels both producer and consumer).
90
+ */
91
+
92
+ declare class Pipeline extends EventEmitter {
93
+ private readonly stt;
94
+ private readonly llm;
95
+ private readonly tts;
96
+ private readonly audioOutput;
97
+ private readonly context;
98
+ private readonly turnDetector;
99
+ private readonly bargeIn;
100
+ private readonly splitter;
101
+ private readonly respondMode;
102
+ private readonly agentName;
103
+ private readonly nameVariants;
104
+ private readonly beforeRespond?;
105
+ private readonly memory?;
106
+ /** Active STT streams, keyed by participant identity */
107
+ private sttStreams;
108
+ private _processing;
109
+ private _running;
110
+ private _agentState;
111
+ /** Queued turn while current one is still processing */
112
+ private pendingTurn;
113
+ constructor(options: PipelineOptions);
114
+ get processing(): boolean;
115
+ get running(): boolean;
116
+ get agentState(): AgentState;
117
+ private setAgentState;
118
+ addParticipant(identity: string): STTStream;
119
+ removeParticipant(identity: string): Promise<void>;
120
+ stop(): Promise<void>;
121
+ getContextManager(): ContextManager;
122
+ private lastFinalAt;
123
+ private lastSttDuration;
124
+ private handleTranscription;
125
+ /**
126
+ * Determine if the agent should respond to this turn.
127
+ * In 'always' mode: responds to everything.
128
+ * In 'addressed' mode: only when agent name is mentioned + optional beforeRespond hook.
129
+ */
130
+ private shouldRespond;
131
+ private processTurn;
132
+ /**
133
+ * Speak text directly via TTS, bypassing the LLM.
134
+ * Supports barge-in — if the student speaks, the greeting is cut short.
135
+ * Adds the text to conversation context so the LLM knows what was said.
136
+ */
137
+ say(text: string): Promise<void>;
138
+ private synthesizeAndPlay;
139
+ }
140
+
141
+ /**
142
+ * SentenceSplitter — buffers streaming LLM tokens into speakable chunks
143
+ * for TTS synthesis.
144
+ *
145
+ * Split strategy:
146
+ * 1. Sentence boundary (.!?) — always split
147
+ * 2. Clause boundary (,;:—) — split if buffer >= MIN_CHUNK chars
148
+ * 3. Word boundary — forced split if buffer >= MAX_CHUNK chars
149
+ */
150
+ declare class SentenceSplitter {
151
+ private buffer;
152
+ /** Add a token and get back any speakable chunks */
153
+ push(token: string): string[];
154
+ /** Flush any remaining text as a final chunk */
155
+ flush(): string | null;
156
+ /** Reset the splitter */
157
+ reset(): void;
158
+ private extractChunks;
159
+ }
160
+
161
+ interface TurnDetectorOptions {
162
+ /** Silence duration after final transcription before triggering (default: 800ms) */
163
+ silenceTimeoutMs?: number;
164
+ }
165
+ declare class TurnDetector {
166
+ private readonly silenceTimeoutMs;
167
+ private silenceTimer;
168
+ private _onTurnEnd;
169
+ private lastFinalText;
170
+ constructor(options?: TurnDetectorOptions);
171
+ /** Set the callback for when a turn ends */
172
+ set onTurnEnd(cb: (() => void) | null);
173
+ /**
174
+ * Feed a transcription result.
175
+ * Returns true if this result represents a completed turn.
176
+ */
177
+ handleTranscription(text: string, isFinal: boolean): boolean;
178
+ /** Force-trigger turn end */
179
+ forceTurnEnd(): void;
180
+ /** Reset state */
181
+ reset(): void;
182
+ private clearTimer;
183
+ }
184
+
185
+ declare class BargeIn {
186
+ private abortController;
187
+ private _interrupted;
188
+ private _onInterrupt;
189
+ get interrupted(): boolean;
190
+ /** Set the callback for when barge-in occurs */
191
+ set onInterrupt(cb: (() => void) | null);
192
+ /**
193
+ * Create a new AbortController for the current response cycle.
194
+ * Call this at the start of each STT->LLM->TTS cycle.
195
+ */
196
+ startCycle(): AbortSignal;
197
+ /** Trigger barge-in. Called when STT detects speech during agent output. */
198
+ trigger(): void;
199
+ /** Reset after the interrupted cycle is cleaned up */
200
+ reset(): void;
201
+ }
202
+
203
+ /**
204
+ * Abstract base class for STT streams.
205
+ * Provides typed EventEmitter interface for transcription events.
206
+ * Provider implementations should extend this class.
207
+ */
208
+ declare abstract class BaseSTTStream extends EventEmitter implements STTStream {
209
+ abstract sendAudio(pcm16: Buffer): void;
210
+ abstract close(): Promise<void>;
211
+ on(event: 'transcription', cb: (result: TranscriptionResult) => void): this;
212
+ on(event: 'error', cb: (error: Error) => void): this;
213
+ emit(event: 'transcription', result: TranscriptionResult): boolean;
214
+ emit(event: 'error', error: Error): boolean;
215
+ }
216
+
217
+ interface RoomConnectionOptions {
218
+ room: string;
219
+ apiKey: string;
220
+ apiSecret: string;
221
+ identity?: string;
222
+ name?: string;
223
+ }
224
+ declare class RoomConnection {
225
+ readonly room: Room;
226
+ private audioSource;
227
+ private localTrack;
228
+ private _connected;
229
+ constructor();
230
+ get connected(): boolean;
231
+ /**
232
+ * Connect to a dTelecom room.
233
+ *
234
+ * 1. Create an Ed25519 JWT via AccessToken
235
+ * 2. Discover nearest SFU via getWsUrl()
236
+ * 3. Connect Room via WebRTC
237
+ * 4. Publish an audio track for the agent to speak through
238
+ */
239
+ connect(options: RoomConnectionOptions): Promise<void>;
240
+ /**
241
+ * Publish an audio track so the agent can speak.
242
+ * Returns the AudioSource to feed PCM16 audio into.
243
+ */
244
+ publishAudioTrack(): Promise<AudioSource>;
245
+ /** Disconnect from the room and clean up resources. */
246
+ disconnect(): Promise<void>;
247
+ }
248
+
249
+ declare class AudioInput {
250
+ readonly participantIdentity: string;
251
+ private stream;
252
+ private _closed;
253
+ private frameCount;
254
+ constructor(track: RemoteAudioTrack, participantIdentity: string);
255
+ get closed(): boolean;
256
+ /**
257
+ * Async iterate over PCM16 buffers from this participant.
258
+ * Each yielded Buffer is 16kHz mono PCM16 LE.
259
+ */
260
+ frames(): AsyncGenerator<Buffer>;
261
+ /** Async iterate over AudioFrame objects. */
262
+ audioFrames(): AsyncGenerator<AudioFrame>;
263
+ close(): void;
264
+ }
265
+
266
+ /**
267
+ * MemoryStore — SQLite + sqlite-vec database layer for room memory.
268
+ *
269
+ * Single .db file stores:
270
+ * - turns: every spoken turn (full transcript)
271
+ * - sessions: meeting metadata + LLM-generated summaries
272
+ * - turn_vectors: embedding index for semantic turn search
273
+ * - session_vectors: embedding index for session summary search
274
+ */
275
+ interface TurnRow {
276
+ id: number;
277
+ room: string;
278
+ session_id: string;
279
+ speaker: string;
280
+ text: string;
281
+ is_agent: number;
282
+ created_at: number;
283
+ }
284
+ interface SessionRow {
285
+ id: string;
286
+ room: string;
287
+ started_at: number;
288
+ ended_at: number | null;
289
+ participants: string | null;
290
+ summary: string | null;
291
+ turn_count: number;
292
+ }
293
+ interface SearchResult {
294
+ speaker: string;
295
+ text: string;
296
+ created_at: number;
297
+ session_id: string;
298
+ distance: number;
299
+ }
300
+ interface SessionSearchResult {
301
+ session_id: string;
302
+ summary: string;
303
+ started_at: number;
304
+ distance: number;
305
+ }
306
+ declare class MemoryStore {
307
+ private db;
308
+ constructor(dbPath: string);
309
+ private createTables;
310
+ /** Insert a turn and its embedding vector. */
311
+ insertTurn(room: string, sessionId: string, speaker: string, text: string, isAgent: boolean, embedding: Float32Array): number;
312
+ /** Create a new session record. */
313
+ insertSession(id: string, room: string): void;
314
+ /** Update a session with summary and end time. */
315
+ updateSessionSummary(sessionId: string, summary: string, turnCount: number, participants: string[], embedding: Float32Array): void;
316
+ /** End a session without summary (e.g., too few turns). */
317
+ endSession(sessionId: string, turnCount: number, participants: string[]): void;
318
+ /** KNN search turns by embedding similarity. */
319
+ searchTurns(room: string, queryEmbedding: Float32Array, limit: number): SearchResult[];
320
+ /** KNN search session summaries by embedding similarity. */
321
+ searchSessions(room: string, queryEmbedding: Float32Array, limit: number): SessionSearchResult[];
322
+ /** Get the last N turns from a specific session. */
323
+ getRecentTurns(room: string, sessionId: string, limit: number): TurnRow[];
324
+ /** Get all turns for a session (for summarization). */
325
+ getSessionTurns(sessionId: string): TurnRow[];
326
+ /** Get total turn count for a session. */
327
+ getSessionTurnCount(sessionId: string): number;
328
+ /** Close the database. */
329
+ close(): void;
330
+ }
331
+
332
+ type LogLevel = 'debug' | 'info' | 'warn' | 'error' | 'silent';
333
+ declare function setLogLevel(level: LogLevel): void;
334
+ declare function getLogLevel(): LogLevel;
335
+ interface Logger {
336
+ debug(...args: unknown[]): void;
337
+ info(...args: unknown[]): void;
338
+ warn(...args: unknown[]): void;
339
+ error(...args: unknown[]): void;
340
+ }
341
+ declare function createLogger(tag: string): Logger;
342
+
343
+ export { AgentConfig, AgentStartOptions, AgentState, AudioInput, BargeIn, BaseSTTStream, ContextManager, type ContextManagerOptions, LLMPlugin, type LogLevel, type Logger, MemoryStore, Message, Pipeline, PipelineOptions, RoomConnection, type RoomConnectionOptions, STTStream, type SearchResult, SentenceSplitter, type SessionRow, type SessionSearchResult, TranscriptionResult, TurnDetector, type TurnDetectorOptions, type TurnRow, VoiceAgent, createLogger, getLogLevel, setLogLevel };
@@ -0,0 +1,343 @@
1
+ import * as _dtelecom_server_sdk_node from '@dtelecom/server-sdk-node';
2
+ import { Room, AudioSource, RemoteAudioTrack, AudioFrame } from '@dtelecom/server-sdk-node';
3
+ import { EventEmitter } from 'events';
4
+ import { A as AgentConfig, a as AgentStartOptions, M as Message, L as LLMPlugin, P as PipelineOptions, b as AgentState, S as STTStream, T as TranscriptionResult } from './types-Cs5uUoTC.js';
5
+ export { c as AgentEvents, d as AudioOutput, D as DataMessageHandler, E as Embedder, e as LLMChunk, f as MemoryConfig, g as PipelineEvents, R as RespondMode, h as RoomMemory, i as RoomMemoryConfig, j as STTPlugin, k as STTStreamOptions, l as TTSPlugin } from './types-Cs5uUoTC.js';
6
+
7
+ declare class VoiceAgent extends EventEmitter {
8
+ private readonly config;
9
+ private connection;
10
+ private pipeline;
11
+ private audioInputs;
12
+ private audioOutput;
13
+ private memory;
14
+ private _running;
15
+ constructor(config: AgentConfig);
16
+ get running(): boolean;
17
+ get room(): _dtelecom_server_sdk_node.Room | null;
18
+ /** Enable saving raw TTS audio as WAV files to `dir` for debugging. */
19
+ enableAudioDump(dir: string): void;
20
+ private _dumpDir;
21
+ /**
22
+ * Speak text directly via TTS, bypassing the LLM.
23
+ * Use for greetings or announcements. Supports barge-in.
24
+ */
25
+ say(text: string): Promise<void>;
26
+ /** Start the agent — connect to room and begin listening. */
27
+ start(options: AgentStartOptions): Promise<void>;
28
+ /** Stop the agent — disconnect and clean up. */
29
+ stop(): Promise<void>;
30
+ private setupDataChannel;
31
+ private handleTrackSubscribed;
32
+ private handleTrackUnsubscribed;
33
+ private handleParticipantDisconnected;
34
+ private pipeAudioToSTT;
35
+ }
36
+
37
+ interface ContextManagerOptions {
38
+ /** System instructions for the agent */
39
+ instructions: string;
40
+ /** Max tokens before triggering summarization (default: 5000) */
41
+ maxContextTokens?: number;
42
+ /** Number of recent turns to keep verbatim (default: 4) */
43
+ recentTurnsToKeep?: number;
44
+ }
45
+ declare class ContextManager {
46
+ private readonly instructions;
47
+ private readonly maxContextTokens;
48
+ private readonly recentTurnsToKeep;
49
+ private turns;
50
+ private summary;
51
+ constructor(options: ContextManagerOptions);
52
+ /** Add a user's speech turn to the conversation */
53
+ addUserTurn(speaker: string, text: string): void;
54
+ /** Add the agent's response to the conversation */
55
+ addAgentTurn(text: string): void;
56
+ /**
57
+ * Build the messages array for the LLM call.
58
+ *
59
+ * Structure:
60
+ * [system prompt]
61
+ * [memory context, if provided]
62
+ * [conversation summary, if any]
63
+ * [recent verbatim turns]
64
+ *
65
+ * @param memoryContext - Optional relevant context injected by the application
66
+ */
67
+ buildMessages(memoryContext?: string): Message[];
68
+ /** Check if summarization should be triggered */
69
+ shouldSummarize(): boolean;
70
+ /**
71
+ * Summarize older turns using the LLM.
72
+ * Keeps the most recent turns verbatim.
73
+ */
74
+ summarize(llm: LLMPlugin): Promise<void>;
75
+ /** Get the full transcript */
76
+ getFullTranscript(): string;
77
+ /** Reset the context */
78
+ reset(): void;
79
+ }
80
+
81
+ /**
82
+ * Pipeline — coordinates the STT -> LLM -> TTS flow.
83
+ *
84
+ * Uses a producer/consumer pattern:
85
+ * - Producer: LLM tokens -> sentence splitter -> sentence queue
86
+ * - Consumer: sentence queue -> TTS -> audio output
87
+ * Both run concurrently so audio playback never blocks LLM consumption.
88
+ *
89
+ * Supports barge-in (interruption cancels both producer and consumer).
90
+ */
91
+
92
+ declare class Pipeline extends EventEmitter {
93
+ private readonly stt;
94
+ private readonly llm;
95
+ private readonly tts;
96
+ private readonly audioOutput;
97
+ private readonly context;
98
+ private readonly turnDetector;
99
+ private readonly bargeIn;
100
+ private readonly splitter;
101
+ private readonly respondMode;
102
+ private readonly agentName;
103
+ private readonly nameVariants;
104
+ private readonly beforeRespond?;
105
+ private readonly memory?;
106
+ /** Active STT streams, keyed by participant identity */
107
+ private sttStreams;
108
+ private _processing;
109
+ private _running;
110
+ private _agentState;
111
+ /** Queued turn while current one is still processing */
112
+ private pendingTurn;
113
+ constructor(options: PipelineOptions);
114
+ get processing(): boolean;
115
+ get running(): boolean;
116
+ get agentState(): AgentState;
117
+ private setAgentState;
118
+ addParticipant(identity: string): STTStream;
119
+ removeParticipant(identity: string): Promise<void>;
120
+ stop(): Promise<void>;
121
+ getContextManager(): ContextManager;
122
+ private lastFinalAt;
123
+ private lastSttDuration;
124
+ private handleTranscription;
125
+ /**
126
+ * Determine if the agent should respond to this turn.
127
+ * In 'always' mode: responds to everything.
128
+ * In 'addressed' mode: only when agent name is mentioned + optional beforeRespond hook.
129
+ */
130
+ private shouldRespond;
131
+ private processTurn;
132
+ /**
133
+ * Speak text directly via TTS, bypassing the LLM.
134
+ * Supports barge-in — if the student speaks, the greeting is cut short.
135
+ * Adds the text to conversation context so the LLM knows what was said.
136
+ */
137
+ say(text: string): Promise<void>;
138
+ private synthesizeAndPlay;
139
+ }
140
+
141
+ /**
142
+ * SentenceSplitter — buffers streaming LLM tokens into speakable chunks
143
+ * for TTS synthesis.
144
+ *
145
+ * Split strategy:
146
+ * 1. Sentence boundary (.!?) — always split
147
+ * 2. Clause boundary (,;:—) — split if buffer >= MIN_CHUNK chars
148
+ * 3. Word boundary — forced split if buffer >= MAX_CHUNK chars
149
+ */
150
+ declare class SentenceSplitter {
151
+ private buffer;
152
+ /** Add a token and get back any speakable chunks */
153
+ push(token: string): string[];
154
+ /** Flush any remaining text as a final chunk */
155
+ flush(): string | null;
156
+ /** Reset the splitter */
157
+ reset(): void;
158
+ private extractChunks;
159
+ }
160
+
161
+ interface TurnDetectorOptions {
162
+ /** Silence duration after final transcription before triggering (default: 800ms) */
163
+ silenceTimeoutMs?: number;
164
+ }
165
+ declare class TurnDetector {
166
+ private readonly silenceTimeoutMs;
167
+ private silenceTimer;
168
+ private _onTurnEnd;
169
+ private lastFinalText;
170
+ constructor(options?: TurnDetectorOptions);
171
+ /** Set the callback for when a turn ends */
172
+ set onTurnEnd(cb: (() => void) | null);
173
+ /**
174
+ * Feed a transcription result.
175
+ * Returns true if this result represents a completed turn.
176
+ */
177
+ handleTranscription(text: string, isFinal: boolean): boolean;
178
+ /** Force-trigger turn end */
179
+ forceTurnEnd(): void;
180
+ /** Reset state */
181
+ reset(): void;
182
+ private clearTimer;
183
+ }
184
+
185
+ declare class BargeIn {
186
+ private abortController;
187
+ private _interrupted;
188
+ private _onInterrupt;
189
+ get interrupted(): boolean;
190
+ /** Set the callback for when barge-in occurs */
191
+ set onInterrupt(cb: (() => void) | null);
192
+ /**
193
+ * Create a new AbortController for the current response cycle.
194
+ * Call this at the start of each STT->LLM->TTS cycle.
195
+ */
196
+ startCycle(): AbortSignal;
197
+ /** Trigger barge-in. Called when STT detects speech during agent output. */
198
+ trigger(): void;
199
+ /** Reset after the interrupted cycle is cleaned up */
200
+ reset(): void;
201
+ }
202
+
203
+ /**
204
+ * Abstract base class for STT streams.
205
+ * Provides typed EventEmitter interface for transcription events.
206
+ * Provider implementations should extend this class.
207
+ */
208
+ declare abstract class BaseSTTStream extends EventEmitter implements STTStream {
209
+ abstract sendAudio(pcm16: Buffer): void;
210
+ abstract close(): Promise<void>;
211
+ on(event: 'transcription', cb: (result: TranscriptionResult) => void): this;
212
+ on(event: 'error', cb: (error: Error) => void): this;
213
+ emit(event: 'transcription', result: TranscriptionResult): boolean;
214
+ emit(event: 'error', error: Error): boolean;
215
+ }
216
+
217
+ interface RoomConnectionOptions {
218
+ room: string;
219
+ apiKey: string;
220
+ apiSecret: string;
221
+ identity?: string;
222
+ name?: string;
223
+ }
224
+ declare class RoomConnection {
225
+ readonly room: Room;
226
+ private audioSource;
227
+ private localTrack;
228
+ private _connected;
229
+ constructor();
230
+ get connected(): boolean;
231
+ /**
232
+ * Connect to a dTelecom room.
233
+ *
234
+ * 1. Create an Ed25519 JWT via AccessToken
235
+ * 2. Discover nearest SFU via getWsUrl()
236
+ * 3. Connect Room via WebRTC
237
+ * 4. Publish an audio track for the agent to speak through
238
+ */
239
+ connect(options: RoomConnectionOptions): Promise<void>;
240
+ /**
241
+ * Publish an audio track so the agent can speak.
242
+ * Returns the AudioSource to feed PCM16 audio into.
243
+ */
244
+ publishAudioTrack(): Promise<AudioSource>;
245
+ /** Disconnect from the room and clean up resources. */
246
+ disconnect(): Promise<void>;
247
+ }
248
+
249
+ declare class AudioInput {
250
+ readonly participantIdentity: string;
251
+ private stream;
252
+ private _closed;
253
+ private frameCount;
254
+ constructor(track: RemoteAudioTrack, participantIdentity: string);
255
+ get closed(): boolean;
256
+ /**
257
+ * Async iterate over PCM16 buffers from this participant.
258
+ * Each yielded Buffer is 16kHz mono PCM16 LE.
259
+ */
260
+ frames(): AsyncGenerator<Buffer>;
261
+ /** Async iterate over AudioFrame objects. */
262
+ audioFrames(): AsyncGenerator<AudioFrame>;
263
+ close(): void;
264
+ }
265
+
266
+ /**
267
+ * MemoryStore — SQLite + sqlite-vec database layer for room memory.
268
+ *
269
+ * Single .db file stores:
270
+ * - turns: every spoken turn (full transcript)
271
+ * - sessions: meeting metadata + LLM-generated summaries
272
+ * - turn_vectors: embedding index for semantic turn search
273
+ * - session_vectors: embedding index for session summary search
274
+ */
275
+ interface TurnRow {
276
+ id: number;
277
+ room: string;
278
+ session_id: string;
279
+ speaker: string;
280
+ text: string;
281
+ is_agent: number;
282
+ created_at: number;
283
+ }
284
+ interface SessionRow {
285
+ id: string;
286
+ room: string;
287
+ started_at: number;
288
+ ended_at: number | null;
289
+ participants: string | null;
290
+ summary: string | null;
291
+ turn_count: number;
292
+ }
293
+ interface SearchResult {
294
+ speaker: string;
295
+ text: string;
296
+ created_at: number;
297
+ session_id: string;
298
+ distance: number;
299
+ }
300
+ interface SessionSearchResult {
301
+ session_id: string;
302
+ summary: string;
303
+ started_at: number;
304
+ distance: number;
305
+ }
306
+ declare class MemoryStore {
307
+ private db;
308
+ constructor(dbPath: string);
309
+ private createTables;
310
+ /** Insert a turn and its embedding vector. */
311
+ insertTurn(room: string, sessionId: string, speaker: string, text: string, isAgent: boolean, embedding: Float32Array): number;
312
+ /** Create a new session record. */
313
+ insertSession(id: string, room: string): void;
314
+ /** Update a session with summary and end time. */
315
+ updateSessionSummary(sessionId: string, summary: string, turnCount: number, participants: string[], embedding: Float32Array): void;
316
+ /** End a session without summary (e.g., too few turns). */
317
+ endSession(sessionId: string, turnCount: number, participants: string[]): void;
318
+ /** KNN search turns by embedding similarity. */
319
+ searchTurns(room: string, queryEmbedding: Float32Array, limit: number): SearchResult[];
320
+ /** KNN search session summaries by embedding similarity. */
321
+ searchSessions(room: string, queryEmbedding: Float32Array, limit: number): SessionSearchResult[];
322
+ /** Get the last N turns from a specific session. */
323
+ getRecentTurns(room: string, sessionId: string, limit: number): TurnRow[];
324
+ /** Get all turns for a session (for summarization). */
325
+ getSessionTurns(sessionId: string): TurnRow[];
326
+ /** Get total turn count for a session. */
327
+ getSessionTurnCount(sessionId: string): number;
328
+ /** Close the database. */
329
+ close(): void;
330
+ }
331
+
332
+ type LogLevel = 'debug' | 'info' | 'warn' | 'error' | 'silent';
333
+ declare function setLogLevel(level: LogLevel): void;
334
+ declare function getLogLevel(): LogLevel;
335
+ interface Logger {
336
+ debug(...args: unknown[]): void;
337
+ info(...args: unknown[]): void;
338
+ warn(...args: unknown[]): void;
339
+ error(...args: unknown[]): void;
340
+ }
341
+ declare function createLogger(tag: string): Logger;
342
+
343
+ export { AgentConfig, AgentStartOptions, AgentState, AudioInput, BargeIn, BaseSTTStream, ContextManager, type ContextManagerOptions, LLMPlugin, type LogLevel, type Logger, MemoryStore, Message, Pipeline, PipelineOptions, RoomConnection, type RoomConnectionOptions, STTStream, type SearchResult, SentenceSplitter, type SessionRow, type SessionSearchResult, TranscriptionResult, TurnDetector, type TurnDetectorOptions, type TurnRow, VoiceAgent, createLogger, getLogLevel, setLogLevel };