@voxdiscover/voiceserver 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,580 @@
1
+ import EventEmitter from 'eventemitter3';
2
+
3
+ /**
4
+ * Connection states for voice session.
5
+ * Per user decision: Detailed states (connecting, connected, reconnecting, disconnected, failed).
6
+ */
7
+ type ConnectionState = 'connecting' | 'connected' | 'reconnecting' | 'disconnected' | 'failed';
8
+ /**
9
+ * Configuration for VoiceAgent initialization.
10
+ * Per user decision: Minimal required config (only token required).
11
+ */
12
+ interface VoiceAgentConfig {
13
+ /** Session token from backend (POST /v1/sessions) */
14
+ token: string;
15
+ /** Optional base URL for session validation (defaults to production) */
16
+ baseUrl?: string;
17
+ /** Optional reconnection configuration */
18
+ reconnection?: {
19
+ enabled?: boolean;
20
+ maxAttempts?: number;
21
+ };
22
+ /**
23
+ * Optional mem0 API key for client-side memory integration.
24
+ * Requires `npm install mem0ai` in the consuming app.
25
+ *
26
+ * When set, conversation is synced to mem0 on disconnect() for the specified userId.
27
+ *
28
+ * SECURITY WARNING: API keys in client code can be extracted from DevTools.
29
+ * For production, prefer the server-side pattern: backend handles mem0 with
30
+ * user_id from session context.
31
+ *
32
+ * @see https://docs.mem0.ai/platform/quickstart
33
+ */
34
+ mem0ApiKey?: string;
35
+ /**
36
+ * Optional user identifier for mem0 memory scoping.
37
+ * Required when mem0ApiKey is set.
38
+ * Maps to mem0 user_id parameter for per-user memory isolation.
39
+ */
40
+ userId?: string;
41
+ }
42
+ /**
43
+ * Transcript data from conversation.
44
+ * Per user decision: Streaming with interim/final separation.
45
+ */
46
+ interface TranscriptData {
47
+ text: string;
48
+ speaker: 'user' | 'agent';
49
+ timestamp?: Date;
50
+ }
51
+ /**
52
+ * Analytics event types for lifecycle and error tracking.
53
+ * Per RESEARCH.md Pattern 5 (Analytics Hooks with Lifecycle Events).
54
+ * Per user decision: lifecycle + error events only (not full event stream).
55
+ */
56
+ type AnalyticsEventType = 'session_started' | 'session_ended' | 'connection_failed' | 'agent_swap_completed' | 'agent_swap_failed' | 'error';
57
+ /**
58
+ * Analytics event payload with standard fields.
59
+ * Per user decision: timestamp, event_type, session_id, agent_id, user_id, custom_context.
60
+ *
61
+ * Analytics callbacks MUST be read-only (no SDK method calls) to prevent infinite
62
+ * loops. Calling SDK methods from inside a callback will trigger circuit breaker
63
+ * and disable analytics. See RESEARCH.md Pitfall #5.
64
+ */
65
+ interface AnalyticsEvent {
66
+ /** Unix timestamp in milliseconds when the event occurred */
67
+ timestamp: number;
68
+ /** Type of lifecycle or error event */
69
+ eventType: AnalyticsEventType;
70
+ /** Session identifier from session token */
71
+ sessionId: string;
72
+ /** Agent identifier from session token */
73
+ agentId?: string;
74
+ /** User identifier from session context */
75
+ userId?: string;
76
+ /** Custom context data from session creation */
77
+ customContext?: Record<string, any>;
78
+ /** Error details for connection_failed and error events */
79
+ error?: {
80
+ /** Error code for programmatic handling */
81
+ code: string;
82
+ /** Human-readable error message */
83
+ message: string;
84
+ /** Whether the operation that caused the error can be retried */
85
+ retryable: boolean;
86
+ };
87
+ }
88
+ /**
89
+ * Callback type for analytics event handlers.
90
+ * IMPORTANT: Callbacks MUST be read-only - do NOT call SDK methods (connect,
91
+ * disconnect, mute, etc.) inside a callback. This causes infinite loops.
92
+ */
93
+ type AnalyticsCallback = (event: AnalyticsEvent) => void;
94
+ /**
95
+ * Custom context for session-specific data injection.
96
+ * Per Phase 15 ADV-02: custom context with predefined fields and arbitrary JSON.
97
+ *
98
+ * Predefined fields map to backend CustomContext model.
99
+ * The custom field holds arbitrary JSON for agent context (max 10KB, 50 fields).
100
+ */
101
+ interface CustomContext {
102
+ /** User identifier (used for mem0 memory integration) */
103
+ user_id?: string;
104
+ /** Customer/tenant identifier */
105
+ customer_id?: string;
106
+ /** Structured session metadata */
107
+ session_metadata?: Record<string, unknown>;
108
+ /** Arbitrary JSON for agent context (10KB max, 50 fields max) */
109
+ custom?: Record<string, unknown>;
110
+ }
111
+ /**
112
+ * Payload emitted with context:updated event.
113
+ */
114
+ interface ContextUpdateData {
115
+ /** The context updates that were applied */
116
+ updates: Partial<CustomContext>;
117
+ /** Timestamp of the update (milliseconds since epoch) */
118
+ timestamp: number;
119
+ }
120
+ /**
121
+ * Individual cost breakdown for a single provider call.
122
+ * Per RESEARCH.md Pattern 4 (Web SDK - Cost Aggregation).
123
+ */
124
+ interface CostBreakdown {
125
+ /** Provider name (openai, elevenlabs, deepgram, cartesia, etc.) */
126
+ provider: string;
127
+ /** Service type for the provider call */
128
+ serviceType: 'stt' | 'llm' | 'tts';
129
+ /** Model name used for this call */
130
+ model: string;
131
+ /** Cost in USD for this single call */
132
+ costUsd: number;
133
+ /** Unix timestamp when this cost was incurred */
134
+ timestamp: number;
135
+ }
136
+ /**
137
+ * Cumulative cost summary for the session.
138
+ * Aggregated from all CostBreakdown entries during the session.
139
+ */
140
+ interface CostSummary {
141
+ /** Total session cost in USD (sum of all breakdown entries) */
142
+ totalUsd: number;
143
+ /** All individual cost breakdown entries */
144
+ breakdown: CostBreakdown[];
145
+ /** Total cost grouped by provider name */
146
+ byProvider: Record<string, number>;
147
+ /** Total cost grouped by service type (stt, llm, tts) */
148
+ byServiceType: Record<string, number>;
149
+ }
150
+ /**
151
+ * Options for agent hot-swap.
152
+ * Per Phase 15 ADV-01: controls context/transcript preservation and timeout.
153
+ */
154
+ interface AgentSwapOptions {
155
+ /** Transfer LLM context to new agent (default: true) */
156
+ preserveContext?: boolean;
157
+ /** Transfer transcript history to new agent (default: true) */
158
+ preserveTranscripts?: boolean;
159
+ /** Milliseconds to wait for new agent to connect before failing (default: 5000) */
160
+ timeout?: number;
161
+ }
162
+ /**
163
+ * Payload for agent swap events.
164
+ */
165
+ interface AgentSwapEventData {
166
+ /** New agent ID being swapped to */
167
+ newAgentId: string;
168
+ /** Unix timestamp in milliseconds */
169
+ timestamp: number;
170
+ /** Error message (only on agent:swap-failed) */
171
+ error?: string;
172
+ }
173
+ /**
174
+ * Event map for typed EventEmitter.
175
+ * Per RESEARCH.md Pattern 2 and user decision.
176
+ */
177
+ interface VoiceAgentEvents {
178
+ 'connection:state': (state: ConnectionState) => void;
179
+ 'connection:error': (error: Error) => void;
180
+ 'transcript:interim': (data: TranscriptData) => void;
181
+ 'transcript:final': (data: TranscriptData) => void;
182
+ 'audio:muted': () => void;
183
+ 'audio:unmuted': () => void;
184
+ 'session:expiring': (expiresIn: number) => void;
185
+ 'context:updated': (data: ContextUpdateData) => void;
186
+ 'cost:update': (summary: CostSummary) => void;
187
+ 'agent:swapping': (data: AgentSwapEventData) => void;
188
+ 'agent:swapped': (data: AgentSwapEventData) => void;
189
+ 'agent:swap-failed': (data: AgentSwapEventData) => void;
190
+ }
191
+
192
+ /**
193
+ * Error codes for VoiceAgent SDK.
194
+ * Per user decision: both instanceof checks AND code property checks.
195
+ */
196
+ type VoiceAgentErrorCode = 'TOKEN_EXPIRED' | 'TOKEN_INVALID' | 'CONNECTION_FAILED' | 'PERMISSION_DENIED' | 'NETWORK_ERROR' | 'UNKNOWN_ERROR';
197
+ /**
198
+ * Base error class for VoiceAgent SDK.
199
+ *
200
+ * Per user decision: includes human-readable message, suggested fix/action,
201
+ * original cause, context data, and retryability indicator.
202
+ *
203
+ * Supports both error handling patterns:
204
+ * - Pattern 1: instanceof checks
205
+ * - Pattern 2: code property checks
206
+ *
207
+ * @example
208
+ * ```typescript
209
+ * try {
210
+ * await agent.connect();
211
+ * } catch (err) {
212
+ * // Pattern 1: instanceof
213
+ * if (err instanceof TokenExpiredError) {
214
+ * console.log(err.context?.suggestion);
215
+ * }
216
+ *
217
+ * // Pattern 2: code property
218
+ * if (err.code === 'TOKEN_EXPIRED') {
219
+ * refreshToken();
220
+ * }
221
+ *
222
+ * // Retryability check
223
+ * if (err.retryable) {
224
+ * retry();
225
+ * }
226
+ * }
227
+ * ```
228
+ */
229
+ declare class VoiceAgentError extends Error {
230
+ readonly code: VoiceAgentErrorCode;
231
+ readonly cause?: Error;
232
+ readonly context?: Record<string, any>;
233
+ readonly retryable: boolean;
234
+ constructor(message: string, code: VoiceAgentErrorCode, options?: {
235
+ cause?: Error;
236
+ context?: Record<string, any>;
237
+ retryable?: boolean;
238
+ });
239
+ }
240
+ /**
241
+ * Session token has expired.
242
+ * Per user decision: non-retryable, includes suggested action.
243
+ */
244
+ declare class TokenExpiredError extends VoiceAgentError {
245
+ constructor(message?: string, cause?: Error);
246
+ }
247
+ /**
248
+ * Session token is invalid or malformed.
249
+ * Per user decision: non-retryable, includes suggested action.
250
+ */
251
+ declare class TokenInvalidError extends VoiceAgentError {
252
+ constructor(message?: string, cause?: Error);
253
+ }
254
+ /**
255
+ * WebRTC connection failed.
256
+ * Per user decision: retryable, includes suggested action.
257
+ */
258
+ declare class ConnectionFailedError extends VoiceAgentError {
259
+ constructor(message: string, cause?: Error);
260
+ }
261
+ /**
262
+ * Microphone or camera permission denied by user.
263
+ * Per user decision: non-retryable, includes suggested action.
264
+ */
265
+ declare class PermissionDeniedError extends VoiceAgentError {
266
+ constructor(permission: 'microphone' | 'camera');
267
+ }
268
+ /**
269
+ * Network error occurred during API call or WebRTC connection.
270
+ * Per user decision: retryable.
271
+ */
272
+ declare class NetworkError extends VoiceAgentError {
273
+ constructor(message: string, cause?: Error);
274
+ }
275
+
276
+ /**
277
+ * VoiceAgent SDK for Voice_server voice conversations.
278
+ *
279
+ * Per user decisions:
280
+ * - Constructor pattern with minimal required config (only token)
281
+ * - Explicit connect() - no auto-connect on init
282
+ * - disconnect() performs full cleanup (leave + destroy)
283
+ * - Read-only state property
284
+ * - Async validation before connect
285
+ *
286
+ * @example
287
+ * ```typescript
288
+ * const agent = new VoiceAgent({ token: sessionToken });
289
+ *
290
+ * agent.on('connection:state', (state) => {
291
+ * console.log('State:', state);
292
+ * });
293
+ *
294
+ * await agent.connect();
295
+ * agent.mute();
296
+ * await agent.disconnect();
297
+ * ```
298
+ */
299
+ declare class VoiceAgent extends EventEmitter<VoiceAgentEvents> {
300
+ private config;
301
+ private sessionData;
302
+ private dailyCall;
303
+ private _state;
304
+ private reconnectionManager;
305
+ /**
306
+ * Audio elements for remote participants.
307
+ * Daily.js createCallObject() does not auto-play remote audio — we must
308
+ * create <audio> elements ourselves in the track-started handler.
309
+ */
310
+ private remoteAudioElements;
311
+ /**
312
+ * Cumulative cost breakdown for the current session.
313
+ * Per RESEARCH.md Pattern 4: appended on each cost-update app-message.
314
+ * Reset to [] on disconnect() for clean state on reconnection.
315
+ */
316
+ private costBreakdown;
317
+ /**
318
+ * Current agent ID tracked for duplicate-swap guard.
319
+ * Initialized from session token on connect().
320
+ */
321
+ private currentAgentId;
322
+ /**
323
+ * Registered analytics callbacks.
324
+ * Per RESEARCH.md Pattern 5: observer pattern for lifecycle and error events.
325
+ */
326
+ private analyticsCallbacks;
327
+ /**
328
+ * Circuit breaker counter for analytics to prevent infinite loops.
329
+ * Per RESEARCH.md Pitfall #5: if same event emitted >10 times/second, disable analytics.
330
+ */
331
+ private analyticsCallCount;
332
+ private analyticsCallResetTimer;
333
+ private analyticsDisabled;
334
+ /**
335
+ * Optional mem0 memory client for client-side conversation memory.
336
+ * Initialized from config.mem0ApiKey via dynamic import (optional peer dependency).
337
+ * Per RESEARCH.md Pattern 3: client-side pattern syncs on session end.
338
+ */
339
+ private memoryClient;
340
+ /**
341
+ * Transcript history for mem0 sync on session end.
342
+ * Accumulates final transcripts as {role, content} pairs.
343
+ * Reset on disconnect() for clean state on reconnection.
344
+ */
345
+ private transcriptHistory;
346
+ /**
347
+ * Create VoiceAgent instance.
348
+ * Per user decision: constructor pattern, minimal config (only token required).
349
+ */
350
+ constructor(config: VoiceAgentConfig);
351
+ /**
352
+ * Get current connection state.
353
+ * Per user decision: read-only state property.
354
+ */
355
+ get state(): ConnectionState;
356
+ /**
357
+ * Register an analytics callback for lifecycle and error events.
358
+ *
359
+ * Emits: session_started, session_ended, connection_failed, and error events.
360
+ *
361
+ * IMPORTANT: Analytics callbacks MUST be read-only. Do NOT call SDK methods
362
+ * (connect, disconnect, mute, etc.) inside a callback. Doing so will trigger
363
+ * the circuit breaker and disable analytics for the remainder of the session.
364
+ * See RESEARCH.md Pitfall #5.
365
+ *
366
+ * @param callback Function called with each analytics event
367
+ *
368
+ * @example
369
+ * ```typescript
370
+ * agent.onAnalyticsEvent((event) => {
371
+ * // Integrate with Segment, DataDog, PostHog, etc.
372
+ * analytics.track(event.eventType, {
373
+ * session_id: event.sessionId,
374
+ * user_id: event.userId,
375
+ * });
376
+ * });
377
+ * ```
378
+ */
379
+ onAnalyticsEvent(callback: AnalyticsCallback): void;
380
+ /**
381
+ * Search user memories from mem0 for context-aware responses.
382
+ *
383
+ * Requires mem0ApiKey and userId in config. Returns empty array if mem0
384
+ * is not configured or an error occurs.
385
+ *
386
+ * @param query Search query (e.g., "user preferences", "previous orders")
387
+ * @param limit Maximum number of results (default: 5)
388
+ * @returns Array of memory objects with 'memory' and 'score' fields
389
+ *
390
+ * @example
391
+ * ```typescript
392
+ * const memories = await agent.searchMemories('user preferences', 3);
393
+ * memories.forEach(m => console.log(m.memory));
394
+ * ```
395
+ */
396
+ searchMemories(query: string, limit?: number): Promise<any[]>;
397
+ /**
398
+ * Initialize optional mem0 memory client via dynamic import.
399
+ * Uses dynamic import so missing mem0ai package is a soft warning, not an error.
400
+ * Per RESEARCH.md Pattern 3 (Client-side - Web SDK).
401
+ */
402
+ private initMemoryClient;
403
+ /**
404
+ * Emit analytics event to all registered callbacks.
405
+ * Per RESEARCH.md Pattern 5: try/catch per callback prevents analytics errors from breaking SDK.
406
+ * Per RESEARCH.md Pitfall #5: circuit breaker disables analytics on excessive calls.
407
+ */
408
+ private emitAnalyticsEvent;
409
+ /**
410
+ * Connect to voice session.
411
+ * Per user decision: explicit connect(), async validation before connect.
412
+ */
413
+ connect(): Promise<void>;
414
+ /**
415
+ * Disconnect from voice session.
416
+ * Per user decision: full cleanup (leave + destroy + remove listeners).
417
+ * Per RESEARCH.md Pitfall 2: Both leave() and destroy() required.
418
+ */
419
+ disconnect(): Promise<void>;
420
+ /**
421
+ * Mute microphone.
422
+ */
423
+ mute(): void;
424
+ /**
425
+ * Unmute microphone.
426
+ */
427
+ unmute(): void;
428
+ /**
429
+ * Update session context mid-session.
430
+ *
431
+ * Per Phase 15 ADV-02: allows developer to update user_id, customer_id,
432
+ * session_metadata, and custom JSON during an active session.
433
+ *
434
+ * Validates context client-side (10KB size, 50 fields) with warnings.
435
+ * POSTs to /v1/sessions/{session_id}/context endpoint.
436
+ * Emits 'context:updated' event on success.
437
+ *
438
+ * @param contextUpdates Partial context to merge into existing session context
439
+ * @throws Error if no active session
440
+ * @throws Error if the context update API call fails
441
+ *
442
+ * @example
443
+ * ```typescript
444
+ * await agent.updateContext({
445
+ * user_id: 'user_123',
446
+ * custom: { preferred_language: 'Spanish', loyalty_tier: 'gold' }
447
+ * });
448
+ * ```
449
+ */
450
+ updateContext(contextUpdates: Partial<CustomContext>): Promise<void>;
451
+ /**
452
+ * Validate context client-side for size (10KB) and field count (50 max).
453
+ * Validation is permissive: warns but does not throw on limits exceeded.
454
+ * Per user decision: don't break session on validation warning.
455
+ *
456
+ * @param context Context object to validate
457
+ */
458
+ private validateContext;
459
+ /**
460
+ * Switch agent mid-session without disconnecting WebRTC.
461
+ *
462
+ * Per Phase 15 ADV-01: hot-swap the backend bot while keeping the Daily room
463
+ * connection alive. The new agent receives full conversation context and
464
+ * transcript history.
465
+ *
466
+ * Protocol:
467
+ * 1. Validate connected and not already using newAgentId
468
+ * 2. Emit agent:swapping event (for analytics / UI loading indicator)
469
+ * 3. Send Daily app-message to backend bot requesting swap
470
+ * 4. Wait for agent-swap-complete confirmation (with timeout)
471
+ * 5. Update currentAgentId and emit agent:swapped on success
472
+ * 6. Emit agent:swap-failed and throw on timeout or backend error
473
+ *
474
+ * @param newAgentId UUID of the agent to swap to
475
+ * @param options Swap options (preserveContext, preserveTranscripts, timeout)
476
+ * @throws Error if not connected, already using newAgentId, or swap fails/times out
477
+ *
478
+ * @example
479
+ * ```typescript
480
+ * try {
481
+ * await agent.switchAgent('specialist-agent-uuid');
482
+ * console.log('Agent switched successfully');
483
+ * } catch (err) {
484
+ * console.error('Swap failed:', err);
485
+ * // Old agent is still active (resilient fallback)
486
+ * }
487
+ * ```
488
+ */
489
+ switchAgent(newAgentId: string, options?: AgentSwapOptions): Promise<void>;
490
+ /**
491
+ * Wait for backend agent-swap-complete confirmation via Daily app-message.
492
+ *
493
+ * Resolves when agent-swap-complete is received with matching agent_id.
494
+ * Rejects on agent-swap-failed message or timeout.
495
+ *
496
+ * @param agentId Expected new agent ID in confirmation
497
+ * @param timeoutMs Milliseconds before timing out
498
+ */
499
+ private waitForSwapConfirmation;
500
+ /**
501
+ * Subscribe to real-time cost updates for the session.
502
+ *
503
+ * Registers a callback that is called after each provider API call (STT, LLM, TTS)
504
+ * with the cumulative cost summary including a breakdown by provider and service type.
505
+ *
506
+ * Per RESEARCH.md Pattern 4: Backend streams cost events; SDK aggregates and exposes
507
+ * via this callback for developer integration (budget alerts, user-facing displays).
508
+ *
509
+ * @param callback Function called with CostSummary on each cost update
510
+ *
511
+ * @example
512
+ * ```typescript
513
+ * agent.onCostUpdate((summary) => {
514
+ * console.log(`Session cost: $${summary.totalUsd.toFixed(4)}`);
515
+ * console.log('By provider:', summary.byProvider);
516
+ * console.log('By service:', summary.byServiceType);
517
+ * });
518
+ * ```
519
+ */
520
+ onCostUpdate(callback: (summary: CostSummary) => void): void;
521
+ /**
522
+ * Get the current cumulative cost summary for the session.
523
+ *
524
+ * Returns a snapshot of all costs incurred so far, aggregated by provider
525
+ * and service type. Returns empty summary if no costs have been tracked.
526
+ *
527
+ * @returns CostSummary with totalUsd, breakdown array, byProvider, and byServiceType
528
+ */
529
+ getCostSummary(): CostSummary;
530
+ /**
531
+ * Set up cost tracking subscription on the Daily call object.
532
+ * Per RESEARCH.md Pattern 4: subscribe to cost-update app-messages from backend.
533
+ * Called from setupDailyEventListeners() after Daily call object is created.
534
+ */
535
+ private setupCostTracking;
536
+ /**
537
+ * Handle cost-update app-message from backend bot.
538
+ * Parses cost event, appends to costBreakdown, and emits cost:update event.
539
+ * Per RESEARCH.md Pattern 4: cost events contain provider, service_type, model, cost_usd.
540
+ */
541
+ private handleCostUpdate;
542
+ private setState;
543
+ private setupDailyEventListeners;
544
+ private handleJoined;
545
+ private handleLeft;
546
+ private handleParticipantJoined;
547
+ private handleParticipantLeft;
548
+ /**
549
+ * Create and play an <audio> element when a remote participant's audio track starts.
550
+ * Daily.js createCallObject() does NOT auto-play remote audio in headless mode —
551
+ * the app must handle track-started and wire the track to an audio element.
552
+ */
553
+ private handleTrackStarted;
554
+ private handleTrackStopped;
555
+ private handleDailyError;
556
+ private handleTranscript;
557
+ private attemptReconnect;
558
+ }
559
+
560
+ /**
561
+ * Manages automatic reconnection with exponential backoff and jitter.
562
+ * Per RESEARCH.md Pattern 4 and research recommendations.
563
+ */
564
+ declare class ReconnectionManager {
565
+ private maxRetries;
566
+ private initialDelayMs;
567
+ private maxDelayMs;
568
+ constructor(config?: {
569
+ maxRetries?: number;
570
+ initialDelayMs?: number;
571
+ maxDelayMs?: number;
572
+ });
573
+ /**
574
+ * Execute connection function with exponential backoff retry logic.
575
+ * Adds jitter to prevent thundering herd problem.
576
+ */
577
+ reconnectWithBackoff(connectFn: () => Promise<void>, onRetry?: (attempt: number, delayMs: number) => void): Promise<void>;
578
+ }
579
+
580
+ export { type AnalyticsCallback, type AnalyticsEvent, type AnalyticsEventType, ConnectionFailedError, type ConnectionState, type ContextUpdateData, type CostBreakdown, type CostSummary, type CustomContext, NetworkError, PermissionDeniedError, ReconnectionManager, TokenExpiredError, TokenInvalidError, type TranscriptData, VoiceAgent, type VoiceAgentConfig, VoiceAgentError, type VoiceAgentErrorCode, type VoiceAgentEvents };