@absolutejs/voice 0.0.22-beta.472 → 0.0.22-beta.474

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -8,6 +8,107 @@ Use it when you want Vapi/Retell/Bland-style voice-agent capability, but you wan
8
8
 
9
9
  ## What's new
10
10
 
11
+ ### 0.0.22-beta.474 · Phase 5 runtime hookup — auto-wire monitor sockets to live sessions
12
+
13
+ The Phase 5 monitor primitive is now first-class in the voice runtime. Pass a `monitor` binding to `voice({...})` and every session opened against the voice plugin auto-registers in the monitor registry, outbound TTS audio auto-fans-out to all listeners, and the close/superseded/session-switch paths deregister automatically. Supervisors connecting to the listen route get a live audio stream without any manual `record.emit()` calls.
14
+
15
+ ```ts
16
+ import { Elysia } from "elysia";
17
+ import {
18
+ createVoiceInMemoryMonitorRegistry,
19
+ createVoiceLiveMonitorRoutes,
20
+ createVoiceMonitorRuntimeBinding,
21
+ voice,
22
+ } from "@absolutejs/voice";
23
+
24
+ const monitorRegistry = createVoiceInMemoryMonitorRegistry();
25
+
26
+ const app = new Elysia()
27
+ .use(
28
+ voice({
29
+ path: "/voice/realtime",
30
+ stt: deepgram({ apiKey: process.env.DEEPGRAM_API_KEY! }),
31
+ tts: elevenlabs({ apiKey: process.env.ELEVENLABS_API_KEY! }),
32
+ onTurn: async (session, turn, api) => {
33
+ // your business logic
34
+ },
35
+ session: sessionStore,
36
+ monitor: createVoiceMonitorRuntimeBinding(monitorRegistry, {
37
+ audioFormat: { channels: 1, container: "raw", encoding: "pcm_s16le", sampleRateHz: 24_000 },
38
+ }),
39
+ }),
40
+ )
41
+ .use(
42
+ createVoiceLiveMonitorRoutes({
43
+ registry: monitorRegistry,
44
+ authenticate: async ({ request }) => await verifySupervisorJWT(request),
45
+ }),
46
+ );
47
+ ```
48
+
49
+ Surface additions:
50
+ - **`createVoiceMonitorRuntimeBinding(registry, { audioFormat?, defaultSource? })`** — returns a `VoiceMonitorRuntimeBinding` you pass to the voice plugin's new `monitor` option. Internally, each session open calls `registerSession({ handle, sessionId })`, audio fans out via `emitAudio()` on every binary `socket.send`, and the close/superseded/session-switch paths call `deregister(reason)`.
51
+ - **`VoicePluginConfig.monitor?: VoiceMonitorRuntimeBinding`** — new optional field on the main voice plugin config.
52
+ - **`VoiceMonitorMutableRegistry.deregister(sessionId, reason?)`** — explicit deregister path so the runtime binding can tear down stale records on re-register without throwing. The `register()`-returned deregister fn now also accepts an optional reason.
53
+ - The runtime binding is **opt-in** — if you don't pass `monitor`, voice behaves exactly as before. No overhead on the audio-send hot path beyond a single `Map.get`.
54
+
55
+ 4 new tests cover the runtime binding's register/emit/deregister cycle, no-op-after-deregister, re-registration tear-down, and the audioFormat + defaultSource option threading. Full voice suite now 963 pass / 1 pre-existing fail.
56
+
57
+ ### 0.0.22-beta.473 · Phase 5 — live listen + control monitor sockets (Vapi `monitorPlan` parity)
58
+
59
+ Two new WebSocket routes per session that mirror Vapi's `monitorPlan.listenUrl` + `monitorPlan.controlUrl`. Supervisors can subscribe to a live call's outbound audio and send control commands (transfer, hangup, escalate, voicemail, no-answer, plus caller-defined mute/say/inject) without touching the call itself.
60
+
61
+ ```ts
62
+ import { Elysia } from "elysia";
63
+ import {
64
+ buildVoiceMonitorPlan,
65
+ createVoiceInMemoryMonitorRegistry,
66
+ createVoiceLiveMonitorRoutes,
67
+ createVoiceMonitorSession,
68
+ } from "@absolutejs/voice";
69
+
70
+ const registry = createVoiceInMemoryMonitorRegistry();
71
+
72
+ // In your runtime: when a session opens, register it so supervisors can listen.
73
+ const record = createVoiceMonitorSession({ handle, sessionId: handle.id });
74
+ const deregister = registry.register(record);
75
+ // When audio leaves the assistant, fan it out:
76
+ // record.emit({ at: Date.now(), chunk, format, source: 'assistant' });
77
+ // On call end:
78
+ // deregister();
79
+
80
+ const app = new Elysia()
81
+ .use(
82
+ createVoiceLiveMonitorRoutes({
83
+ authenticate: async ({ sessionId, route, request }) =>
84
+ await verifySupervisorJWT(request),
85
+ controlHandlers: {
86
+ say: async ({ message, session }) => {
87
+ await yourTtsRuntime.sayInSession(session.sessionId, message.text);
88
+ return { detail: `Said: ${message.text}`, ok: true, type: "say" };
89
+ },
90
+ },
91
+ htmlPath: "/voice/monitor",
92
+ registry,
93
+ }),
94
+ );
95
+
96
+ const plan = buildVoiceMonitorPlan({
97
+ baseUrl: "wss://api.example.com",
98
+ sessionId: handle.id,
99
+ });
100
+ // plan.listenUrl → wss://api.example.com/api/voice/monitor/<sessionId>/listen
101
+ // plan.controlUrl → wss://api.example.com/api/voice/monitor/<sessionId>/control
102
+ ```
103
+
104
+ Surface summary:
105
+ - **`createVoiceInMemoryMonitorRegistry()`** — `{ register, get, list, emit, emitClose }`. Voice's session runtime (or any caller) wires `register()` on open + the returned deregister on close; `emit()` fans out outbound audio frames; `emitClose()` notifies listeners that the call ended.
106
+ - **`createVoiceLiveMonitorRoutes(options)`** — Elysia plugin that mounts two `.ws()` routes per session: `:sessionId/listen` (read-only outbound audio as binary frames) and `:sessionId/control` (JSON control messages). Default handlers map `transfer`/`hangup`/`escalate`/`voicemail`/`no-answer` onto `VoiceSessionHandle` verbs; `mute`/`say`/`inject` require caller-supplied handlers via `controlHandlers`.
107
+ - **`buildVoiceMonitorPlan(input)`** — Vapi-shaped helper returning `{ listenUrl, controlUrl }` for inclusion in your call-create API response.
108
+ - **Auth hook** — `authenticate?: ({ sessionId, route, request }) => Promise<boolean>` runs at the start of both routes; rejected connections get a 4401 close.
109
+
110
+ This is a **primitive**: wiring voice's existing `activeSessions` runtime into the registry (so audio actually flows from a real call without manual `record.emit()` calls) is the next step and intentionally left out to keep this change additive. Until then, registries built by hand (e.g. from a custom telephony adapter where you control the outbound audio buffer) work out of the box.
111
+
11
112
  ### 0.0.22-beta.472 · Phase 6 — multilingual STT proof gate
12
113
 
13
114
  `runVoiceMultilingualProof(...)` turns the `voice-fixtures-multilingual` corpus (FLEURS + BSC Catalan-Spanish code-switch + CoSHE Hindi-English code-switch) into a gateable readiness/proof artifact. Buyers evaluating Vapi-replacement can now run any combination of STT adapters against the multilingual corpus and assert per-language WER / pass-rate / term-recall budgets in CI.
package/dist/index.d.ts CHANGED
@@ -225,4 +225,6 @@ export { buildVoiceProofPackInput, buildVoiceProofPack, buildVoiceProofPackFromO
225
225
  export type { VoiceProofPack, VoiceProofPackBuildContext, VoiceProofPackBuildContextOptions, VoiceProofPackBuildTiming, VoiceProofPackEvidence, VoiceProofPackInput, VoiceProofPackInputBuilderLoaderInput, VoiceProofPackInputBuilderOperationsLoaderInput, VoiceProofPackInputBuilderOptions, VoiceProofPackInputBuilderSupportBundle, VoiceProofPackRefreshState, VoiceProofPackRefreshStatus, VoiceProofPackRoutesOptions, VoiceProofPackSection, VoiceProofPackSourceValue, VoiceProofPackStatus, VoiceProofPackStaleWhileRefreshSource, VoiceProofPackStaleWhileRefreshSourceOptions, VoiceProofPackWriteResult, VoiceProofRefreshSnapshot, VoiceProofRefreshSnapshotOptions, } from "./proofPack";
226
226
  export { buildVoiceMultilingualProofReadinessCheck, renderVoiceMultilingualProofMarkdown, runVoiceMultilingualProof, } from "./multilingualProof";
227
227
  export type { VoiceMultilingualLanguageCode, VoiceMultilingualProofAdapterEntry, VoiceMultilingualProofAdapterReport, VoiceMultilingualProofDefaultThresholds, VoiceMultilingualProofLanguageMetrics, VoiceMultilingualProofLanguageReport, VoiceMultilingualProofLanguageThresholds, VoiceMultilingualProofOptions, VoiceMultilingualProofReadinessCheck, VoiceMultilingualProofReadinessOptions, VoiceMultilingualProofReport, } from "./multilingualProof";
228
+ export { buildVoiceMonitorPlan, createVoiceInMemoryMonitorRegistry, createVoiceLiveMonitorRoutes, createVoiceMonitorRuntimeBinding, createVoiceMonitorSession, } from "./monitor";
229
+ export type { VoiceMonitorAudioEvent, VoiceMonitorAudioSource, VoiceMonitorAuthenticate, VoiceMonitorAuthenticateInput, VoiceMonitorControlAck, VoiceMonitorControlHandler, VoiceMonitorControlHandlerInput, VoiceMonitorControlMessage, VoiceMonitorMutableRegistry, VoiceMonitorPlan, VoiceMonitorPlanInput, VoiceMonitorRegistry, VoiceMonitorRegistryRegisterInput, VoiceLiveMonitorRoutesOptions, VoiceMonitorRuntimeBindingOptions, VoiceMonitorSessionRecord, } from "./monitor";
228
230
  export * from "./types";
package/dist/index.js CHANGED
@@ -6265,6 +6265,7 @@ var voice = (config) => {
6265
6265
  if (!config.stt && !config.realtime) {
6266
6266
  throw new Error("voice requires either an stt or realtime adapter.");
6267
6267
  }
6268
+ const monitorBindings = new Map;
6268
6269
  const runtime = {
6269
6270
  activeSessions: new Map,
6270
6271
  logger: resolveLogger(config.logger),
@@ -6272,6 +6273,57 @@ var voice = (config) => {
6272
6273
  profileSwitchGuardedSessions: new Set,
6273
6274
  socketSessions: new WeakMap
6274
6275
  };
6276
+ const monitor = config.monitor;
6277
+ const registerMonitorSession = (sessionId, handle) => {
6278
+ if (!monitor)
6279
+ return;
6280
+ const existing = monitorBindings.get(sessionId);
6281
+ if (existing) {
6282
+ try {
6283
+ existing.deregister("superseded");
6284
+ } catch {}
6285
+ monitorBindings.delete(sessionId);
6286
+ }
6287
+ try {
6288
+ const binding = monitor.registerSession({ handle, sessionId });
6289
+ monitorBindings.set(sessionId, binding);
6290
+ } catch (error) {
6291
+ runtime.logger.warn?.(`[voice] failed to register session "${sessionId}" with monitor runtime: ${error instanceof Error ? error.message : String(error)}`);
6292
+ }
6293
+ };
6294
+ const deregisterMonitorSession = (sessionId, reason) => {
6295
+ const binding = monitorBindings.get(sessionId);
6296
+ if (!binding)
6297
+ return;
6298
+ monitorBindings.delete(sessionId);
6299
+ try {
6300
+ binding.deregister(reason);
6301
+ } catch (error) {
6302
+ runtime.logger.warn?.(`[voice] failed to deregister monitor binding for session "${sessionId}": ${error instanceof Error ? error.message : String(error)}`);
6303
+ }
6304
+ };
6305
+ const buildSocketAdapter = (ws, sessionId) => {
6306
+ if (!monitor)
6307
+ return createSocketAdapter(ws);
6308
+ return {
6309
+ close: async (code, reason) => {
6310
+ ws.close(code, reason);
6311
+ },
6312
+ send: async (data) => {
6313
+ if (typeof data !== "string") {
6314
+ const binding = monitorBindings.get(sessionId);
6315
+ if (binding) {
6316
+ try {
6317
+ binding.emitAudio(data);
6318
+ } catch (error) {
6319
+ runtime.logger.warn?.(`[voice] monitor emitAudio failed for session "${sessionId}": ${error instanceof Error ? error.message : String(error)}`);
6320
+ }
6321
+ }
6322
+ }
6323
+ ws.send(data);
6324
+ }
6325
+ };
6326
+ };
6275
6327
  const onTurn = normalizeOnTurn(config.onTurn);
6276
6328
  const sessionOptions = resolveSessionOptions(config);
6277
6329
  const htmxOptions = config.htmx && typeof config.htmx === "object" ? config.htmx : undefined;
@@ -6394,6 +6446,7 @@ var voice = (config) => {
6394
6446
  }
6395
6447
  const session = runtime.activeSessions.get(socketState.sessionId);
6396
6448
  runtime.activeSessions.delete(socketState.sessionId);
6449
+ deregisterMonitorSession(socketState.sessionId, reason ?? `ws-close-${String(code)}`);
6397
6450
  if (session) {
6398
6451
  await session.disconnect({
6399
6452
  code,
@@ -6417,6 +6470,7 @@ var voice = (config) => {
6417
6470
  if (message.type === "close" && current) {
6418
6471
  await current.close(message.reason);
6419
6472
  runtime.activeSessions.delete(sessionState.sessionId);
6473
+ deregisterMonitorSession(sessionState.sessionId, message.reason);
6420
6474
  }
6421
6475
  if (message.type === "call_control" && current) {
6422
6476
  if (message.action === "transfer") {
@@ -6459,6 +6513,7 @@ var voice = (config) => {
6459
6513
  if (currentSession) {
6460
6514
  await currentSession.close("session-switch");
6461
6515
  runtime.activeSessions.delete(sessionState.sessionId);
6516
+ deregisterMonitorSession(sessionState.sessionId, "session-switch");
6462
6517
  }
6463
6518
  sessionState.sessionId = message.sessionId;
6464
6519
  runtime.socketSessions.set(ws, {
@@ -6482,8 +6537,10 @@ var voice = (config) => {
6482
6537
  }
6483
6538
  const session = current ?? await createManagedSession(ws, sessionState.sessionId, sessionState.scenarioId ?? undefined);
6484
6539
  if (!current) {
6485
- runtime.activeSessions.set(sessionState.sessionId, session);
6486
- await session.connect(createSocketAdapter(ws));
6540
+ const typedSession = session;
6541
+ runtime.activeSessions.set(sessionState.sessionId, typedSession);
6542
+ registerMonitorSession(sessionState.sessionId, typedSession);
6543
+ await session.connect(buildSocketAdapter(ws, sessionState.sessionId));
6487
6544
  }
6488
6545
  await session.receiveAudio(audio);
6489
6546
  },
@@ -6493,10 +6550,13 @@ var voice = (config) => {
6493
6550
  if (existing) {
6494
6551
  await existing.close("superseded");
6495
6552
  runtime.activeSessions.delete(sessionState.sessionId);
6553
+ deregisterMonitorSession(sessionState.sessionId, "superseded");
6496
6554
  }
6497
6555
  const session = await createManagedSession(ws, sessionState.sessionId, sessionState.scenarioId ?? undefined);
6498
- runtime.activeSessions.set(sessionState.sessionId, session);
6499
- await session.connect(createSocketAdapter(ws));
6556
+ const typedSession = session;
6557
+ runtime.activeSessions.set(sessionState.sessionId, typedSession);
6558
+ registerMonitorSession(sessionState.sessionId, typedSession);
6559
+ await session.connect(buildSocketAdapter(ws, sessionState.sessionId));
6500
6560
  }
6501
6561
  }).use(htmxRoutes());
6502
6562
  };
@@ -44454,6 +44514,466 @@ var buildVoiceMultilingualProofReadinessCheck = (report, options = {}) => {
44454
44514
  value: failedAdapters.length
44455
44515
  };
44456
44516
  };
44517
+ // src/monitor.ts
44518
+ import { Elysia as Elysia70 } from "elysia";
44519
+ var buildAudioFanout = () => {
44520
+ const handlers = new Set;
44521
+ return {
44522
+ emit: (event) => {
44523
+ for (const handler of handlers)
44524
+ handler(event);
44525
+ },
44526
+ onAudio: (handler) => {
44527
+ handlers.add(handler);
44528
+ return () => {
44529
+ handlers.delete(handler);
44530
+ };
44531
+ }
44532
+ };
44533
+ };
44534
+ var buildCloseFanout = () => {
44535
+ const handlers = new Set;
44536
+ return {
44537
+ emitClose: (reason) => {
44538
+ for (const handler of handlers)
44539
+ handler(reason);
44540
+ },
44541
+ onClose: (handler) => {
44542
+ handlers.add(handler);
44543
+ return () => {
44544
+ handlers.delete(handler);
44545
+ };
44546
+ }
44547
+ };
44548
+ };
44549
+ var createVoiceMonitorSession = (input) => {
44550
+ const audio = buildAudioFanout();
44551
+ const close = buildCloseFanout();
44552
+ return {
44553
+ emit: audio.emit,
44554
+ emitClose: close.emitClose,
44555
+ handle: input.handle,
44556
+ metadata: input.metadata,
44557
+ onAudio: audio.onAudio,
44558
+ onClose: close.onClose,
44559
+ sessionId: input.sessionId
44560
+ };
44561
+ };
44562
+ var createVoiceInMemoryMonitorRegistry = () => {
44563
+ const records = new Map;
44564
+ const deregister = (sessionId, reason) => {
44565
+ const existing = records.get(sessionId);
44566
+ if (!existing)
44567
+ return;
44568
+ records.delete(sessionId);
44569
+ existing.emitClose(reason ?? "deregistered");
44570
+ };
44571
+ return {
44572
+ deregister,
44573
+ emit: (sessionId, event) => {
44574
+ records.get(sessionId)?.emit(event);
44575
+ },
44576
+ emitClose: (sessionId, reason) => {
44577
+ records.get(sessionId)?.emitClose(reason);
44578
+ },
44579
+ get: (sessionId) => records.get(sessionId),
44580
+ list: () => Array.from(records.keys()).map((sessionId) => ({ sessionId })),
44581
+ register: (record) => {
44582
+ const existing = records.get(record.sessionId);
44583
+ if (existing) {
44584
+ throw new Error(`VoiceMonitorRegistry already has a session "${record.sessionId}"; deregister it before re-registering.`);
44585
+ }
44586
+ const wrapped = "emit" in record && typeof record.emit === "function" ? record : createVoiceMonitorSession({
44587
+ handle: record.handle,
44588
+ metadata: record.metadata,
44589
+ sessionId: record.sessionId
44590
+ });
44591
+ if (wrapped !== record) {
44592
+ record.onAudio((event) => wrapped.emit(event));
44593
+ record.onClose((reason) => wrapped.emitClose(reason));
44594
+ }
44595
+ records.set(record.sessionId, wrapped);
44596
+ return (reason) => deregister(record.sessionId, reason);
44597
+ }
44598
+ };
44599
+ };
44600
+ var buildDefaultControlHandler = (type) => {
44601
+ if (type === "transfer") {
44602
+ return async ({ message, session }) => {
44603
+ if (message.type !== "transfer") {
44604
+ return { error: "internal: type mismatch", ok: false, type };
44605
+ }
44606
+ await session.handle.transfer({
44607
+ metadata: message.metadata,
44608
+ reason: message.reason,
44609
+ target: message.target
44610
+ });
44611
+ return { detail: `Transferred to ${message.target}.`, ok: true, type };
44612
+ };
44613
+ }
44614
+ if (type === "hangup") {
44615
+ return async ({ message, session }) => {
44616
+ if (message.type !== "hangup") {
44617
+ return { error: "internal: type mismatch", ok: false, type };
44618
+ }
44619
+ await session.handle.complete();
44620
+ return {
44621
+ detail: message.reason ? `Hangup: ${message.reason}` : "Hangup.",
44622
+ ok: true,
44623
+ type
44624
+ };
44625
+ };
44626
+ }
44627
+ if (type === "escalate") {
44628
+ return async ({ message, session }) => {
44629
+ if (message.type !== "escalate") {
44630
+ return { error: "internal: type mismatch", ok: false, type };
44631
+ }
44632
+ await session.handle.escalate({
44633
+ metadata: message.metadata,
44634
+ reason: message.reason ?? "monitor-requested-escalation"
44635
+ });
44636
+ return { detail: "Escalated.", ok: true, type };
44637
+ };
44638
+ }
44639
+ if (type === "voicemail") {
44640
+ return async ({ message, session }) => {
44641
+ if (message.type !== "voicemail") {
44642
+ return { error: "internal: type mismatch", ok: false, type };
44643
+ }
44644
+ await session.handle.markVoicemail({ metadata: message.metadata });
44645
+ return { detail: "Voicemail marked.", ok: true, type };
44646
+ };
44647
+ }
44648
+ if (type === "no-answer") {
44649
+ return async ({ message, session }) => {
44650
+ if (message.type !== "no-answer") {
44651
+ return { error: "internal: type mismatch", ok: false, type };
44652
+ }
44653
+ await session.handle.markNoAnswer({ metadata: message.metadata });
44654
+ return { detail: "Marked no-answer.", ok: true, type };
44655
+ };
44656
+ }
44657
+ return;
44658
+ };
44659
+ var parseControlMessage = (raw) => {
44660
+ if (!raw || typeof raw !== "object")
44661
+ return;
44662
+ const record = raw;
44663
+ const type = record.type;
44664
+ if (typeof type !== "string")
44665
+ return;
44666
+ if (type === "transfer") {
44667
+ if (typeof record.target !== "string")
44668
+ return;
44669
+ return {
44670
+ metadata: record.metadata,
44671
+ reason: typeof record.reason === "string" ? record.reason : undefined,
44672
+ target: record.target,
44673
+ type
44674
+ };
44675
+ }
44676
+ if (type === "hangup") {
44677
+ return {
44678
+ reason: typeof record.reason === "string" ? record.reason : undefined,
44679
+ type
44680
+ };
44681
+ }
44682
+ if (type === "escalate") {
44683
+ return {
44684
+ metadata: record.metadata,
44685
+ reason: typeof record.reason === "string" ? record.reason : undefined,
44686
+ type
44687
+ };
44688
+ }
44689
+ if (type === "voicemail" || type === "no-answer") {
44690
+ return {
44691
+ metadata: record.metadata,
44692
+ type
44693
+ };
44694
+ }
44695
+ if (type === "mute") {
44696
+ if (typeof record.muted !== "boolean" || record.target !== "assistant" && record.target !== "caller") {
44697
+ return;
44698
+ }
44699
+ return { muted: record.muted, target: record.target, type };
44700
+ }
44701
+ if (type === "say") {
44702
+ if (typeof record.text !== "string")
44703
+ return;
44704
+ return {
44705
+ interrupt: typeof record.interrupt === "boolean" ? record.interrupt : undefined,
44706
+ text: record.text,
44707
+ type
44708
+ };
44709
+ }
44710
+ if (type === "inject") {
44711
+ if (typeof record.text !== "string" || record.role !== "assistant" && record.role !== "system" && record.role !== "user") {
44712
+ return;
44713
+ }
44714
+ return { role: record.role, text: record.text, type };
44715
+ }
44716
+ return;
44717
+ };
44718
+ var DEFAULT_BASE_PATH = "/api/voice/monitor";
44719
+ var DEFAULT_LISTEN_PATH = ":sessionId/listen";
44720
+ var DEFAULT_CONTROL_PATH = ":sessionId/control";
44721
+ var joinPath = (...parts) => parts.filter((part) => part.length > 0).map((part) => part.replace(/^\/+|\/+$/g, "")).filter((part) => part.length > 0).reduce((path, part) => `${path}/${part}`, "");
44722
+ var substituteSessionId = (template, sessionId) => template.replace(":sessionId", encodeURIComponent(sessionId));
44723
+ var buildVoiceMonitorPlan = (input) => {
44724
+ const basePath = input.basePath ?? DEFAULT_BASE_PATH;
44725
+ const listenTemplate = input.listenPath ?? joinPath(basePath, DEFAULT_LISTEN_PATH);
44726
+ const controlTemplate = input.controlPath ?? joinPath(basePath, DEFAULT_CONTROL_PATH);
44727
+ const baseUrl = input.baseUrl.replace(/\/+$/, "");
44728
+ return {
44729
+ controlUrl: `${baseUrl}${substituteSessionId(controlTemplate, input.sessionId)}`,
44730
+ listenUrl: `${baseUrl}${substituteSessionId(listenTemplate, input.sessionId)}`
44731
+ };
44732
+ };
44733
+ var DEFAULT_RUNTIME_AUDIO_FORMAT = {
44734
+ channels: 1,
44735
+ container: "raw",
44736
+ encoding: "pcm_s16le",
44737
+ sampleRateHz: 16000
44738
+ };
44739
+ var toUint8 = (chunk) => chunk instanceof Uint8Array ? chunk : new Uint8Array(chunk);
44740
+ var createVoiceMonitorRuntimeBinding = (registry, options = {}) => {
44741
+ const audioFormat = options.audioFormat ?? DEFAULT_RUNTIME_AUDIO_FORMAT;
44742
+ const defaultSource = options.defaultSource ?? "assistant";
44743
+ return {
44744
+ registerSession: (input) => {
44745
+ registry.deregister(input.sessionId, "superseded");
44746
+ const record = createVoiceMonitorSession({
44747
+ handle: input.handle,
44748
+ metadata: input.metadata,
44749
+ sessionId: input.sessionId
44750
+ });
44751
+ const deregisterFromRegistry = registry.register(record);
44752
+ let closed = false;
44753
+ return {
44754
+ deregister: (reason) => {
44755
+ if (closed)
44756
+ return;
44757
+ closed = true;
44758
+ try {
44759
+ deregisterFromRegistry(reason);
44760
+ } catch {}
44761
+ },
44762
+ emitAudio: (chunk, opts) => {
44763
+ if (closed)
44764
+ return;
44765
+ const bytes = toUint8(chunk);
44766
+ if (bytes.byteLength === 0)
44767
+ return;
44768
+ record.emit({
44769
+ at: Date.now(),
44770
+ chunk: bytes,
44771
+ format: audioFormat,
44772
+ source: opts?.source ?? defaultSource
44773
+ });
44774
+ }
44775
+ };
44776
+ }
44777
+ };
44778
+ };
44779
+ var resolveSessionId3 = (ws) => {
44780
+ const params = ws.data?.params;
44781
+ if (!params)
44782
+ return;
44783
+ const value = params.sessionId;
44784
+ if (typeof value !== "string" || value.length === 0)
44785
+ return;
44786
+ return value;
44787
+ };
44788
+ var resolveAuthenticate = async (authenticate, input) => {
44789
+ if (!authenticate)
44790
+ return true;
44791
+ return await authenticate(input);
44792
+ };
44793
+ var createVoiceLiveMonitorRoutes = (options) => {
44794
+ const basePath = options.basePath ?? DEFAULT_BASE_PATH;
44795
+ const listenPath = options.listenPath === undefined ? joinPath(basePath, DEFAULT_LISTEN_PATH) : options.listenPath;
44796
+ const controlPath = options.controlPath === undefined ? joinPath(basePath, DEFAULT_CONTROL_PATH) : options.controlPath;
44797
+ const handlers = {
44798
+ escalate: options.controlHandlers?.escalate ?? buildDefaultControlHandler("escalate"),
44799
+ hangup: options.controlHandlers?.hangup ?? buildDefaultControlHandler("hangup"),
44800
+ inject: options.controlHandlers?.inject,
44801
+ mute: options.controlHandlers?.mute,
44802
+ "no-answer": options.controlHandlers?.["no-answer"] ?? buildDefaultControlHandler("no-answer"),
44803
+ say: options.controlHandlers?.say,
44804
+ transfer: options.controlHandlers?.transfer ?? buildDefaultControlHandler("transfer"),
44805
+ voicemail: options.controlHandlers?.voicemail ?? buildDefaultControlHandler("voicemail")
44806
+ };
44807
+ const app = new Elysia70({ name: "absolutejs-voice-monitor" });
44808
+ const unsubscribers = new WeakMap;
44809
+ if (listenPath !== false && listenPath.length > 0) {
44810
+ app.ws(`/${listenPath.replace(/^\/+/, "")}`, {
44811
+ close: (ws) => {
44812
+ const subs = unsubscribers.get(ws);
44813
+ if (subs) {
44814
+ for (const unsub of subs)
44815
+ unsub();
44816
+ unsubscribers.delete(ws);
44817
+ }
44818
+ },
44819
+ open: async (ws) => {
44820
+ const webSocket = ws;
44821
+ const sessionId = resolveSessionId3(webSocket);
44822
+ if (!sessionId) {
44823
+ webSocket.send(JSON.stringify({
44824
+ error: "missing sessionId in path params",
44825
+ type: "error"
44826
+ }));
44827
+ webSocket.close(4400, "missing sessionId");
44828
+ return;
44829
+ }
44830
+ const authed = await resolveAuthenticate(options.authenticate, {
44831
+ request: webSocket.raw?.request,
44832
+ route: "listen",
44833
+ sessionId
44834
+ });
44835
+ if (!authed) {
44836
+ webSocket.send(JSON.stringify({ error: "unauthorized", type: "error" }));
44837
+ webSocket.close(4401, "unauthorized");
44838
+ return;
44839
+ }
44840
+ const record = options.registry.get(sessionId);
44841
+ if (!record) {
44842
+ webSocket.send(JSON.stringify({
44843
+ error: `session "${sessionId}" not found`,
44844
+ type: "error"
44845
+ }));
44846
+ webSocket.close(4404, "session not found");
44847
+ return;
44848
+ }
44849
+ const subs = [];
44850
+ webSocket.send(JSON.stringify({
44851
+ sessionId,
44852
+ type: "subscribed"
44853
+ }));
44854
+ subs.push(record.onAudio((event) => {
44855
+ webSocket.send(event.chunk);
44856
+ }));
44857
+ subs.push(record.onClose((reason) => {
44858
+ webSocket.send(JSON.stringify({
44859
+ reason,
44860
+ sessionId,
44861
+ type: "session-closed"
44862
+ }));
44863
+ webSocket.close(1000, reason ?? "session-closed");
44864
+ }));
44865
+ unsubscribers.set(ws, subs);
44866
+ }
44867
+ });
44868
+ }
44869
+ if (controlPath !== false && controlPath.length > 0) {
44870
+ app.ws(`/${controlPath.replace(/^\/+/, "")}`, {
44871
+ close: (ws) => {
44872
+ unsubscribers.delete(ws);
44873
+ },
44874
+ message: async (ws, raw) => {
44875
+ const webSocket = ws;
44876
+ const sessionId = resolveSessionId3(webSocket);
44877
+ if (!sessionId) {
44878
+ webSocket.send(JSON.stringify({
44879
+ error: "missing sessionId in path params",
44880
+ ok: false,
44881
+ type: "error"
44882
+ }));
44883
+ return;
44884
+ }
44885
+ const message = parseControlMessage(raw);
44886
+ if (!message) {
44887
+ webSocket.send(JSON.stringify({
44888
+ error: "invalid control message",
44889
+ ok: false,
44890
+ type: "error"
44891
+ }));
44892
+ return;
44893
+ }
44894
+ const record = options.registry.get(sessionId);
44895
+ if (!record) {
44896
+ webSocket.send(JSON.stringify({
44897
+ error: `session "${sessionId}" not found`,
44898
+ ok: false,
44899
+ type: message.type
44900
+ }));
44901
+ return;
44902
+ }
44903
+ const handler = handlers[message.type];
44904
+ if (!handler) {
44905
+ webSocket.send(JSON.stringify({
44906
+ error: `no handler registered for control type "${message.type}"`,
44907
+ ok: false,
44908
+ type: message.type
44909
+ }));
44910
+ return;
44911
+ }
44912
+ try {
44913
+ const ack = await handler({
44914
+ message,
44915
+ raw,
44916
+ session: record
44917
+ });
44918
+ webSocket.send(JSON.stringify(ack));
44919
+ } catch (error) {
44920
+ webSocket.send(JSON.stringify({
44921
+ error: error instanceof Error ? error.message : String(error),
44922
+ ok: false,
44923
+ type: message.type
44924
+ }));
44925
+ }
44926
+ },
44927
+ open: async (ws) => {
44928
+ const webSocket = ws;
44929
+ const sessionId = resolveSessionId3(webSocket);
44930
+ if (!sessionId) {
44931
+ webSocket.send(JSON.stringify({
44932
+ error: "missing sessionId in path params",
44933
+ type: "error"
44934
+ }));
44935
+ webSocket.close(4400, "missing sessionId");
44936
+ return;
44937
+ }
44938
+ const authed = await resolveAuthenticate(options.authenticate, {
44939
+ request: webSocket.raw?.request,
44940
+ route: "control",
44941
+ sessionId
44942
+ });
44943
+ if (!authed) {
44944
+ webSocket.send(JSON.stringify({ error: "unauthorized", type: "error" }));
44945
+ webSocket.close(4401, "unauthorized");
44946
+ return;
44947
+ }
44948
+ const record = options.registry.get(sessionId);
44949
+ if (!record) {
44950
+ webSocket.send(JSON.stringify({
44951
+ error: `session "${sessionId}" not found`,
44952
+ type: "error"
44953
+ }));
44954
+ webSocket.close(4404, "session not found");
44955
+ return;
44956
+ }
44957
+ webSocket.send(JSON.stringify({
44958
+ sessionId,
44959
+ supports: Object.entries(handlers).filter(([, value]) => value !== undefined).map(([key]) => key),
44960
+ type: "ready"
44961
+ }));
44962
+ }
44963
+ });
44964
+ }
44965
+ if (options.htmlPath !== undefined && options.htmlPath !== false) {
44966
+ const path = options.htmlPath;
44967
+ app.get(path, () => {
44968
+ const sessions = options.registry.list().map((entry) => `<li><code>${entry.sessionId}</code></li>`).join("");
44969
+ const body = `<!doctype html><html lang="en"><head><meta charset="utf-8" /><title>Voice Monitor</title><style>body{background:#0b1216;color:#f6f1e7;font-family:ui-sans-serif,system-ui,sans-serif;margin:0;padding:32px}main{margin:auto;max-width:960px}h1{font-size:clamp(2rem,5vw,3.2rem);letter-spacing:-.04em;margin:.2rem 0 1rem}code{background:#171f25;border:1px solid #2c3a44;border-radius:8px;padding:2px 6px}ul{margin:8px 0;padding-left:18px}p.muted{color:#9aa8b2}</style></head><body><main><h1>Voice Monitor</h1><p class="muted">Active sessions registered with this monitor registry.</p><ul>${sessions || "<li><em>None.</em></li>"}</ul><p class="muted">Open <code>${listenPath}</code> and <code>${controlPath}</code> via WebSocket per session for live listen + control.</p></main></body></html>`;
44970
+ return new Response(body, {
44971
+ headers: { "content-type": "text/html; charset=utf-8" }
44972
+ });
44973
+ });
44974
+ }
44975
+ return app;
44976
+ };
44457
44977
  export {
44458
44978
  writeVoiceProofPack,
44459
44979
  writeVoiceMediaPipelineArtifacts,
@@ -44948,6 +45468,8 @@ export {
44948
45468
  createVoiceObservabilityExportRoutes,
44949
45469
  createVoiceObservabilityExportReplayRoutes,
44950
45470
  createVoiceMonitorWebhookNotifier,
45471
+ createVoiceMonitorSession,
45472
+ createVoiceMonitorRuntimeBinding,
44951
45473
  createVoiceMonitorRunnerRoutes,
44952
45474
  createVoiceMonitorRunner,
44953
45475
  createVoiceMonitorRoutes,
@@ -44967,6 +45489,7 @@ export {
44967
45489
  createVoiceMediaPipelineRoutes,
44968
45490
  createVoiceLiveOpsRoutes,
44969
45491
  createVoiceLiveOpsController,
45492
+ createVoiceLiveMonitorRoutes,
44970
45493
  createVoiceLiveLatencyRoutes,
44971
45494
  createVoiceLinearIssueUpdateSink,
44972
45495
  createVoiceLinearIssueSyncSinks,
@@ -44978,6 +45501,7 @@ export {
44978
45501
  createVoiceIncidentTimelineRoutes,
44979
45502
  createVoiceIncidentBundleRoutes,
44980
45503
  createVoiceInMemoryRealCallProfileRecoveryJobStore,
45504
+ createVoiceInMemoryMonitorRegistry,
44981
45505
  createVoiceHubSpotTaskUpdateSink,
44982
45506
  createVoiceHubSpotTaskSyncSinks,
44983
45507
  createVoiceHubSpotTaskSink,
@@ -45153,6 +45677,7 @@ export {
45153
45677
  buildVoiceObservabilityArtifactIndex,
45154
45678
  buildVoiceMultilingualProofReadinessCheck,
45155
45679
  buildVoiceMonitorRunReport,
45680
+ buildVoiceMonitorPlan,
45156
45681
  buildVoiceMediaPipelineReport,
45157
45682
  buildVoiceMediaPipelineReadinessChecks,
45158
45683
  buildVoiceMediaPipelineIncidentEvents,
@@ -0,0 +1,148 @@
1
+ import { Elysia } from "elysia";
2
+ import type { AudioFormat, VoiceMonitorRuntimeBinding, VoiceSessionHandle, VoiceSessionRecord } from "./types";
3
+ export type VoiceMonitorAudioSource = "assistant" | "caller" | (string & {});
4
+ export type VoiceMonitorAudioEvent = {
5
+ at: number;
6
+ chunk: Uint8Array;
7
+ format: AudioFormat;
8
+ source: VoiceMonitorAudioSource;
9
+ };
10
+ export type VoiceMonitorSessionRecord = {
11
+ handle: VoiceSessionHandle<unknown, VoiceSessionRecord, unknown>;
12
+ metadata?: Record<string, unknown>;
13
+ onAudio: (handler: (event: VoiceMonitorAudioEvent) => void) => () => void;
14
+ onClose: (handler: (reason?: string) => void) => () => void;
15
+ sessionId: string;
16
+ };
17
+ export type VoiceMonitorRegistry = {
18
+ get: (sessionId: string) => VoiceMonitorSessionRecord | undefined;
19
+ list: () => readonly {
20
+ sessionId: string;
21
+ }[];
22
+ };
23
+ export type VoiceMonitorMutableRegistry = VoiceMonitorRegistry & {
24
+ emit: (sessionId: string, event: VoiceMonitorAudioEvent) => void;
25
+ emitClose: (sessionId: string, reason?: string) => void;
26
+ /**
27
+ * Deregister a session by id. No-op if the id isn't registered. The
28
+ * `reason` flows into the close fan-out exactly once.
29
+ */
30
+ deregister: (sessionId: string, reason?: string) => void;
31
+ register: (record: VoiceMonitorSessionRecord) => (reason?: string) => void;
32
+ };
33
+ export type VoiceMonitorRegistryRegisterInput = {
34
+ handle: VoiceSessionHandle<unknown, VoiceSessionRecord, unknown>;
35
+ metadata?: Record<string, unknown>;
36
+ sessionId: string;
37
+ };
38
+ export declare const createVoiceMonitorSession: (input: VoiceMonitorRegistryRegisterInput) => VoiceMonitorSessionRecord & {
39
+ emit: (event: VoiceMonitorAudioEvent) => void;
40
+ emitClose: (reason?: string) => void;
41
+ };
42
+ export declare const createVoiceInMemoryMonitorRegistry: () => VoiceMonitorMutableRegistry;
43
+ export type VoiceMonitorControlMessage = {
44
+ metadata?: Record<string, unknown>;
45
+ reason?: string;
46
+ target: string;
47
+ type: "transfer";
48
+ } | {
49
+ reason?: string;
50
+ type: "hangup";
51
+ } | {
52
+ metadata?: Record<string, unknown>;
53
+ reason?: string;
54
+ type: "escalate";
55
+ } | {
56
+ metadata?: Record<string, unknown>;
57
+ type: "voicemail";
58
+ } | {
59
+ metadata?: Record<string, unknown>;
60
+ type: "no-answer";
61
+ } | {
62
+ muted: boolean;
63
+ target: "assistant" | "caller";
64
+ type: "mute";
65
+ } | {
66
+ interrupt?: boolean;
67
+ text: string;
68
+ type: "say";
69
+ } | {
70
+ role: "assistant" | "system" | "user";
71
+ text: string;
72
+ type: "inject";
73
+ };
74
+ export type VoiceMonitorControlAck = {
75
+ detail?: string;
76
+ ok: true;
77
+ type: VoiceMonitorControlMessage["type"];
78
+ } | {
79
+ error: string;
80
+ ok: false;
81
+ type: VoiceMonitorControlMessage["type"];
82
+ };
83
+ export type VoiceMonitorControlHandlerInput = {
84
+ message: VoiceMonitorControlMessage;
85
+ raw: unknown;
86
+ session: VoiceMonitorSessionRecord;
87
+ };
88
+ export type VoiceMonitorControlHandler = (input: VoiceMonitorControlHandlerInput) => Promise<VoiceMonitorControlAck> | VoiceMonitorControlAck;
89
+ export type VoiceMonitorAuthenticateInput = {
90
+ request: unknown;
91
+ route: "control" | "listen";
92
+ sessionId: string;
93
+ };
94
+ export type VoiceMonitorAuthenticate = (input: VoiceMonitorAuthenticateInput) => boolean | Promise<boolean>;
95
+ export type VoiceLiveMonitorRoutesOptions = {
96
+ authenticate?: VoiceMonitorAuthenticate;
97
+ basePath?: string;
98
+ controlHandlers?: Partial<Record<VoiceMonitorControlMessage["type"], VoiceMonitorControlHandler>>;
99
+ controlPath?: false | string;
100
+ htmlPath?: false | string;
101
+ listenPath?: false | string;
102
+ registry: VoiceMonitorRegistry;
103
+ };
104
+ export type VoiceMonitorPlanInput = {
105
+ basePath?: string;
106
+ baseUrl: string;
107
+ controlPath?: string;
108
+ listenPath?: string;
109
+ sessionId: string;
110
+ };
111
+ export type VoiceMonitorPlan = {
112
+ controlUrl: string;
113
+ listenUrl: string;
114
+ };
115
+ export declare const buildVoiceMonitorPlan: (input: VoiceMonitorPlanInput) => VoiceMonitorPlan;
116
+ export type VoiceMonitorRuntimeBindingOptions = {
117
+ audioFormat?: AudioFormat;
118
+ defaultSource?: VoiceMonitorAudioSource;
119
+ };
120
+ export declare const createVoiceMonitorRuntimeBinding: (registry: VoiceMonitorMutableRegistry, options?: VoiceMonitorRuntimeBindingOptions) => VoiceMonitorRuntimeBinding;
121
+ export declare const createVoiceLiveMonitorRoutes: (options: VoiceLiveMonitorRoutesOptions) => Elysia<"", {
122
+ decorator: {};
123
+ store: {};
124
+ derive: {};
125
+ resolve: {};
126
+ }, {
127
+ typebox: {};
128
+ error: {};
129
+ }, {
130
+ schema: {};
131
+ standaloneSchema: {};
132
+ macro: {};
133
+ macroFn: {};
134
+ parser: {};
135
+ response: {};
136
+ }, {}, {
137
+ derive: {};
138
+ resolve: {};
139
+ schema: {};
140
+ standaloneSchema: {};
141
+ response: {};
142
+ }, {
143
+ derive: {};
144
+ resolve: {};
145
+ schema: {};
146
+ standaloneSchema: {};
147
+ response: {};
148
+ }>;
package/dist/types.d.ts CHANGED
@@ -411,6 +411,20 @@ export type VoiceSocket = {
411
411
  send: (data: string | Uint8Array | ArrayBuffer) => void | Promise<void>;
412
412
  close: (code?: number, reason?: string) => void | Promise<void>;
413
413
  };
414
+ export type VoiceMonitorRuntimeSessionBinding = {
415
+ deregister: (reason?: string) => void;
416
+ emitAudio: (chunk: Uint8Array | ArrayBuffer, options?: {
417
+ source?: "assistant" | "caller" | (string & {});
418
+ }) => void;
419
+ };
420
+ export type VoiceMonitorRuntimeRegisterInput = {
421
+ handle: VoiceSessionHandle<unknown, VoiceSessionRecord, unknown>;
422
+ metadata?: Record<string, unknown>;
423
+ sessionId: string;
424
+ };
425
+ export type VoiceMonitorRuntimeBinding = {
426
+ registerSession: (input: VoiceMonitorRuntimeRegisterInput) => VoiceMonitorRuntimeSessionBinding;
427
+ };
414
428
  export type VoiceSessionHandle<TContext = unknown, TSession extends VoiceSessionRecord = VoiceSessionRecord, TResult = unknown> = {
415
429
  id: string;
416
430
  connect: (socket: VoiceSocket) => Promise<void>;
@@ -679,6 +693,7 @@ export type VoicePluginConfig<TContext = unknown, TSession extends VoiceSessionR
679
693
  handoff?: VoiceHandoffConfig<TContext, TSession, TResult>;
680
694
  ops?: VoiceRuntimeOpsConfig<TContext, TSession, TResult>;
681
695
  liveOps?: VoiceLiveOpsRuntimeConfig;
696
+ monitor?: VoiceMonitorRuntimeBinding;
682
697
  profileSwitchGuard?: VoicePluginProfileSwitchGuardConfig<TContext, TSession, TResult>;
683
698
  trace?: VoiceTraceEventStore;
684
699
  } & VoiceRouteConfig<TContext, TSession, TResult>;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@absolutejs/voice",
3
- "version": "0.0.22-beta.472",
3
+ "version": "0.0.22-beta.474",
4
4
  "description": "Voice primitives and Elysia plugin for AbsoluteJS",
5
5
  "repository": {
6
6
  "type": "git",