npm - @cognidesk/voice-websocket - Versions diffs - 0.0.1 - Mend

@cognidesk/voice-websocket 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

package/dist/index.js ADDED Viewed

@@ -0,0 +1,841 @@
+// src/index.ts
+import { WebSocketServer } from "ws";
+var COGNIDESK_VOICE_PROTOCOL = "cognidesk.voice.v1";
+function createInMemoryVoiceSessionStore(options = {}) {
+  const sessions = /* @__PURE__ */ new Map();
+  const tokens = /* @__PURE__ */ new Map();
+  const createToken = options.createToken ?? (() => createId("voice_socket_token"));
+  return {
+    async createSession(input) {
+      const now = input.now ?? /* @__PURE__ */ new Date();
+      const result = input.result;
+      const session = {
+        id: result.connection.id,
+        conversation: result.conversation,
+        channelSegment: result.channelSegment,
+        connection: result.connection,
+        events: result.events,
+        createdAt: now.toISOString(),
+        updatedAt: now.toISOString(),
+        status: "pending",
+        lastAckSequence: 0
+      };
+      sessions.set(session.id, session);
+      const token = createTokenRecord({
+        createToken,
+        connectionId: session.connection.id,
+        sessionId: session.id,
+        purpose: "start",
+        ttlMs: input.tokenTtlMs,
+        now
+      });
+      tokens.set(token.token, token);
+      return {
+        session,
+        socket: {
+          url: "",
+          token: token.token,
+          expiresAt: token.expiresAt,
+          protocol: COGNIDESK_VOICE_PROTOCOL
+        }
+      };
+    },
+    async claimToken(input) {
+      const now = input.now ?? /* @__PURE__ */ new Date();
+      const token = tokens.get(input.token);
+      if (!token) return null;
+      if (token.connectionId !== input.connectionId) return null;
+      if (token.consumedAt) return null;
+      if (Date.parse(token.expiresAt) <= now.getTime()) return null;
+      const session = sessions.get(token.sessionId);
+      if (!session || session.status === "ended") return null;
+      token.consumedAt = now.toISOString();
+      tokens.set(token.token, token);
+      return {
+        session,
+        token,
+        reconnect: token.purpose === "reconnect"
+      };
+    },
+    async issueReconnectToken(input) {
+      const session = sessions.get(input.sessionId);
+      if (!session) throw new Error(`Voice session '${input.sessionId}' was not found.`);
+      const token = createTokenRecord({
+        createToken,
+        connectionId: session.connection.id,
+        sessionId: session.id,
+        purpose: "reconnect",
+        ttlMs: input.ttlMs,
+        now: input.now ?? /* @__PURE__ */ new Date()
+      });
+      tokens.set(token.token, token);
+      return token;
+    },
+    async acknowledgeAudio(input) {
+      const session = requireSession(sessions, input.sessionId);
+      if (input.sequence > session.lastAckSequence) {
+        session.lastAckSequence = input.sequence;
+        session.updatedAt = (input.now ?? /* @__PURE__ */ new Date()).toISOString();
+        sessions.set(session.id, session);
+      }
+      return session;
+    },
+    async markConnected(sessionId, now = /* @__PURE__ */ new Date()) {
+      const session = requireSession(sessions, sessionId);
+      session.status = "connected";
+      session.updatedAt = now.toISOString();
+      delete session.reconnectGraceUntil;
+      sessions.set(session.id, session);
+      return session;
+    },
+    async markReconnecting(sessionId, now = /* @__PURE__ */ new Date(), graceMs = 3e4) {
+      const session = requireSession(sessions, sessionId);
+      session.status = "reconnecting";
+      session.updatedAt = now.toISOString();
+      session.reconnectGraceUntil = new Date(now.getTime() + graceMs).toISOString();
+      sessions.set(session.id, session);
+      return session;
+    },
+    async markEnded(sessionId, now = /* @__PURE__ */ new Date()) {
+      const session = requireSession(sessions, sessionId);
+      session.status = "ended";
+      session.updatedAt = now.toISOString();
+      sessions.set(session.id, session);
+      return session;
+    },
+    async getSession(sessionId) {
+      return sessions.get(sessionId) ?? null;
+    }
+  };
+}
+function createVoiceSocketHandshake(options) {
+  const tokenTtlMs = options.tokenTtlMs ?? 6e4;
+  const pathPrefix = normalizePathPrefix(options.pathPrefix ?? "/voice/connections");
+  return {
+    async createSocket(input) {
+      const created = await options.store.createSession({
+        result: input.result,
+        tokenTtlMs
+      });
+      return {
+        ...created.socket,
+        url: buildSocketUrl({
+          requestUrl: input.request.url,
+          basePath: input.basePath,
+          pathPrefix,
+          connectionId: input.result.connection.id,
+          token: created.socket.token,
+          ...options.baseUrl ? { baseUrl: options.baseUrl } : {}
+        })
+      };
+    }
+  };
+}
+async function handleVoiceSocket(options) {
+  const claimed = await options.store.claimToken({
+    connectionId: options.connectionId,
+    token: options.token
+  });
+  if (!claimed) {
+    send(options.socket, {
+      type: "error",
+      event_id: createId("voice_event"),
+      error: {
+        code: "invalid_voice_socket_token",
+        message: "Voice socket token is invalid, expired, or already used."
+      }
+    });
+    options.socket.close(4401, "Invalid voice socket token");
+    return;
+  }
+  const controller = new AbortController();
+  const abort = () => controller.abort();
+  options.signal?.addEventListener("abort", abort, { once: true });
+  const session = await options.store.markConnected(claimed.session.id);
+  let providerSession = null;
+  let closed = false;
+  const inputTranscriptDebounceMs = Math.max(0, options.inputTranscriptDebounceMs ?? 350);
+  const turnPreambleMs = Math.max(0, options.turnPreambleMs ?? 1200);
+  const useRealtimeControl = Boolean(options.control);
+  let pendingInputTranscript = null;
+  let pendingInputTranscriptTimer = null;
+  let turnPreambleTimer = null;
+  let inputTranscriptQueue = Promise.resolve();
+  let speechQueue = Promise.resolve();
+  let speechGeneration = 0;
+  const sendRuntimeEvents = (events) => {
+    for (const event of events) {
+      send(options.socket, {
+        type: "cognidesk.runtime_event",
+        event_id: createId("voice_event"),
+        event
+      });
+    }
+  };
+  const issueReconnect = async () => {
+    const token = await options.store.issueReconnectToken({
+      sessionId: session.id,
+      ttlMs: options.reconnectTokenTtlMs ?? 3e4
+    });
+    send(options.socket, {
+      type: "cognidesk.connection.reconnect_token",
+      event_id: createId("voice_event"),
+      token: token.token,
+      expiresAt: token.expiresAt
+    });
+  };
+  const clearTurnPreambleTimer = () => {
+    if (!turnPreambleTimer) return;
+    clearTimeout(turnPreambleTimer);
+    turnPreambleTimer = null;
+  };
+  const queueSpeechAction = (generation, action) => {
+    const queued = speechQueue.catch(() => void 0).then(async () => {
+      if (closed || generation !== speechGeneration) return;
+      await action();
+    });
+    speechQueue = queued.catch((error) => {
+      send(options.socket, {
+        type: "error",
+        event_id: createId("voice_event"),
+        error: {
+          code: "voice_speech_failed",
+          message: error instanceof Error ? error.message : "Failed to queue voice speech."
+        }
+      });
+    });
+  };
+  const startTurnPreambleTimer = (text, generation) => {
+    clearTurnPreambleTimer();
+    if (!providerSession?.preamble) return;
+    if (turnPreambleMs === 0) return;
+    turnPreambleTimer = setTimeout(() => {
+      turnPreambleTimer = null;
+      queueSpeechAction(generation, () => providerSession?.preamble?.({ text }));
+    }, turnPreambleMs);
+  };
+  const handleProviderEvent = async (event) => {
+    if (event.kind === "runtime_events") {
+      sendRuntimeEvents(event.events);
+      return;
+    }
+    if (event.kind === "server_event") {
+      if (isAgentResponseSignal(event.event)) clearTurnPreambleTimer();
+      send(options.socket, event.event);
+      if (event.event.type === "response.output_audio.delta") {
+        await options.recorder?.onAudio?.({
+          session,
+          speaker: "assistant",
+          audio: event.event.delta
+        });
+      }
+      if (useRealtimeControl && event.event.type === "response.output_audio_transcript.done") {
+        await commitControlAssistantTranscript(event.event.transcript, "openai-realtime");
+      }
+      return;
+    }
+    if (event.kind === "error") {
+      send(options.socket, {
+        type: "error",
+        event_id: createId("voice_event"),
+        error: {
+          code: event.code ?? "voice_provider_error",
+          message: event.message,
+          ...event.retryable !== void 0 ? { retryable: event.retryable } : {},
+          ...event.details !== void 0 ? { details: event.details } : {}
+        }
+      });
+      return;
+    }
+    scheduleInputTranscript(event);
+  };
+  const scheduleInputTranscript = (event) => {
+    const text = event.text.trim();
+    if (!text) return;
+    sendInputTranscriptCompleted(event, text);
+    pendingInputTranscript = mergeInputTranscript(
+      pendingInputTranscript,
+      {
+        ...event,
+        text
+      }
+    );
+    if (pendingInputTranscriptTimer) clearTimeout(pendingInputTranscriptTimer);
+    if (inputTranscriptDebounceMs === 0) {
+      const transcript = pendingInputTranscript;
+      pendingInputTranscript = null;
+      if (transcript) queueInputTranscript(transcript);
+      return;
+    }
+    const waitMs = debounceMsForTranscript(pendingInputTranscript.text, inputTranscriptDebounceMs);
+    pendingInputTranscriptTimer = setTimeout(() => {
+      const transcript = pendingInputTranscript;
+      pendingInputTranscript = null;
+      pendingInputTranscriptTimer = null;
+      if (transcript) queueInputTranscript(transcript);
+    }, waitMs);
+  };
+  const sendInputTranscriptCompleted = (event, text) => {
+    send(options.socket, {
+      type: "input_audio_transcription.completed",
+      event_id: createId("voice_event"),
+      text,
+      ...optionalStringField("item_id", event.itemId),
+      ...optionalNumberField("startedAtMs", event.startedAtMs),
+      ...optionalNumberField("endedAtMs", event.endedAtMs),
+      ...optionalStringField("transcriptionSource", event.transcriptionSource),
+      ...event.metadata !== void 0 ? { metadata: event.metadata } : {}
+    });
+  };
+  const queueInputTranscript = (event) => {
+    inputTranscriptQueue = inputTranscriptQueue.then(() => useRealtimeControl ? commitControlInputTranscript(event) : commitInputTranscript(event)).catch((error) => {
+      send(options.socket, {
+        type: "error",
+        event_id: createId("voice_event"),
+        error: {
+          code: "voice_transcript_commit_failed",
+          message: error instanceof Error ? error.message : "Failed to commit voice transcript."
+        }
+      });
+    });
+  };
+  const flushPendingInputTranscript = async () => {
+    if (pendingInputTranscriptTimer) {
+      clearTimeout(pendingInputTranscriptTimer);
+      pendingInputTranscriptTimer = null;
+    }
+    const transcript = pendingInputTranscript;
+    pendingInputTranscript = null;
+    if (transcript) queueInputTranscript(transcript);
+    await inputTranscriptQueue;
+  };
+  const commitControlInputTranscript = async (event) => {
+    if (!options.runtime.commitVoiceTranscript) return;
+    const committed = await options.runtime.commitVoiceTranscript({
+      conversationId: session.conversation.id,
+      channelSegmentId: session.channelSegment.id,
+      speaker: "user",
+      text: event.text,
+      transcriptionSource: event.transcriptionSource ?? "provider",
+      ...optionalNumberField("startedAtMs", event.startedAtMs),
+      ...optionalNumberField("endedAtMs", event.endedAtMs),
+      ...event.metadata !== void 0 ? { metadata: event.metadata } : {}
+    });
+    sendRuntimeEvents(committed.events);
+    await options.recorder?.onTranscript?.({
+      session,
+      speaker: "user",
+      text: event.text,
+      runtimeEvent: committed.event
+    });
+  };
+  const commitControlAssistantTranscript = async (text, transcriptionSource) => {
+    const normalized = normalizeSpeechText(text ?? "");
+    if (!normalized || !options.runtime.commitVoiceTranscript) return;
+    await flushPendingInputTranscript();
+    const committed = await options.runtime.commitVoiceTranscript({
+      conversationId: session.conversation.id,
+      channelSegmentId: session.channelSegment.id,
+      speaker: "assistant",
+      text: normalized,
+      transcriptionSource
+    });
+    sendRuntimeEvents(committed.events);
+    await options.recorder?.onTranscript?.({
+      session,
+      speaker: "assistant",
+      text: normalized,
+      runtimeEvent: committed.event
+    });
+    send(options.socket, {
+      type: "cognidesk.turn.completed",
+      event_id: createId("voice_event"),
+      text: normalized
+    });
+  };
+  const controlSurface = options.control ? {
+    ...options.control,
+    handleToolCall: async (call) => {
+      await flushPendingInputTranscript();
+      return options.control.handleToolCall(call);
+    }
+  } : void 0;
+  const commitInputTranscript = async (event) => {
+    const generation = ++speechGeneration;
+    let assistantSpeechBuffer = "";
+    let assistantSpeechQueued = false;
+    const queueAssistantSpeech = (text, result2) => {
+      const normalized = normalizeSpeechText(text);
+      if (!normalized) return;
+      clearTurnPreambleTimer();
+      assistantSpeechQueued = true;
+      queueSpeechAction(generation, () => providerSession?.speak({ text: normalized, ...result2 ? { result: result2 } : {} }));
+    };
+    const flushAssistantSpeech = (force) => {
+      while (true) {
+        const chunk = takeSpeakablePrefix(assistantSpeechBuffer, force);
+        if (!chunk) return;
+        assistantSpeechBuffer = assistantSpeechBuffer.slice(chunk.consumed).trimStart();
+        queueAssistantSpeech(chunk.text);
+        if (!force) return;
+      }
+    };
+    startTurnPreambleTimer(event.text, generation);
+    const result = await options.runtime.handleVoiceUserMessage({
+      conversationId: session.conversation.id,
+      channelSegmentId: session.channelSegment.id,
+      connectionId: session.connection.id,
+      text: event.text,
+      transcriptionSource: event.transcriptionSource ?? "provider",
+      ...optionalNumberField("startedAtMs", event.startedAtMs),
+      ...optionalNumberField("endedAtMs", event.endedAtMs),
+      ...event.metadata !== void 0 ? { metadata: event.metadata } : {},
+      onAssistantTextDelta: (textDelta) => {
+        assistantSpeechBuffer += textDelta;
+        flushAssistantSpeech(false);
+      }
+    });
+    clearTurnPreambleTimer();
+    flushAssistantSpeech(true);
+    if (!assistantSpeechQueued) {
+      queueAssistantSpeech(result.text, result);
+    }
+    sendRuntimeEvents(result.events);
+    const userRuntimeEvent = result.voiceEvents.find(
+      (candidate) => candidate.type === "voice.transcript.committed" && candidate.data.speaker === "user"
+    );
+    await options.recorder?.onTranscript?.({
+      session,
+      speaker: "user",
+      text: event.text,
+      ...userRuntimeEvent ? { runtimeEvent: userRuntimeEvent } : {}
+    });
+    const assistantRuntimeEvent = result.voiceEvents.find(
+      (candidate) => candidate.type === "voice.transcript.committed" && candidate.data.speaker === "assistant"
+    );
+    await options.recorder?.onTranscript?.({
+      session,
+      speaker: "assistant",
+      text: result.text,
+      ...assistantRuntimeEvent ? { runtimeEvent: assistantRuntimeEvent } : {}
+    });
+    send(options.socket, {
+      type: "cognidesk.turn.completed",
+      event_id: createId("voice_event"),
+      text: result.text,
+      ...result.activeJourneyId ? { activeJourneyId: result.activeJourneyId } : {}
+    });
+  };
+  try {
+    const controlInstructions = await options.control?.createSessionInstructions?.({ session });
+    providerSession = await options.provider.connect({
+      session,
+      ...options.profile ? { profile: options.profile } : {},
+      ...controlSurface ? {
+        control: {
+          ...controlSurface,
+          instructions: [
+            controlSurface.instructions,
+            controlInstructions
+          ].filter(Boolean).join("\n\n")
+        }
+      } : {},
+      signal: controller.signal,
+      onEvent: handleProviderEvent
+    });
+  } catch (error) {
+    send(options.socket, {
+      type: "error",
+      event_id: createId("voice_event"),
+      error: {
+        code: "voice_provider_connect_failed",
+        message: error instanceof Error ? error.message : "Voice provider connection failed."
+      }
+    });
+    options.socket.close(1011, "Voice provider connection failed");
+    return;
+  }
+  send(options.socket, {
+    type: "cognidesk.connection.ready",
+    event_id: createId("voice_event"),
+    protocol: COGNIDESK_VOICE_PROTOCOL,
+    conversation: session.conversation,
+    channelSegment: session.channelSegment,
+    connection: session.connection,
+    lastAckSequence: session.lastAckSequence
+  });
+  await issueReconnect();
+  if (options.initialGreeting?.trim()) {
+    queueSpeechAction(speechGeneration, () => providerSession?.speak({ text: options.initialGreeting.trim() }));
+  }
+  options.socket.on("message", (data) => {
+    void handleClientMessage(String(data)).catch((error) => {
+      send(options.socket, {
+        type: "error",
+        event_id: createId("voice_event"),
+        error: {
+          code: "voice_socket_message_failed",
+          message: error instanceof Error ? error.message : "Failed to handle voice socket message."
+        }
+      });
+    });
+  });
+  options.socket.on("error", (error) => {
+    send(options.socket, {
+      type: "error",
+      event_id: createId("voice_event"),
+      error: {
+        code: "voice_socket_error",
+        message: error instanceof Error ? error.message : "Voice socket error."
+      }
+    });
+  });
+  options.socket.on("close", (code) => {
+    if (closed) return;
+    closed = true;
+    speechGeneration++;
+    if (pendingInputTranscriptTimer) clearTimeout(pendingInputTranscriptTimer);
+    clearTurnPreambleTimer();
+    pendingInputTranscript = null;
+    controller.abort();
+    options.signal?.removeEventListener("abort", abort);
+    void providerSession?.close();
+    const normalClose = code === void 0 || code === 1e3 || code === 1001;
+    if (normalClose) {
+      void options.store.markEnded(session.id).then(
+        () => options.runtime.endVoiceSegment({
+          conversationId: session.conversation.id,
+          channelSegmentId: session.channelSegment.id,
+          connectionId: session.connection.id,
+          reason: "socket_closed"
+        })
+      );
+    } else {
+      void options.store.markReconnecting(
+        session.id,
+        /* @__PURE__ */ new Date(),
+        options.reconnectGraceMs ?? 3e4
+      );
+    }
+  });
+  async function handleClientMessage(raw) {
+    const event = parseClientEvent(raw);
+    if (event.type === "input_audio_buffer.append") {
+      assertBase64Audio(event.audio);
+      const sequence = event.sequence;
+      const previousAckSequence = session.lastAckSequence;
+      if (sequence !== void 0) {
+        await options.store.acknowledgeAudio({ sessionId: session.id, sequence });
+        send(options.socket, {
+          type: "cognidesk.audio.ack",
+          event_id: createId("voice_event"),
+          sequence
+        });
+      }
+      await options.recorder?.onAudio?.({
+        session,
+        speaker: "user",
+        audio: event.audio,
+        ...sequence !== void 0 ? { sequence } : {}
+      });
+      if (sequence === void 0 || sequence > previousAckSequence) {
+        await providerSession?.send(event);
+      }
+      return;
+    }
+    if (event.type === "response.cancel") {
+      speechGeneration++;
+      clearTurnPreambleTimer();
+      await providerSession?.send(event);
+      const interruption = await options.runtime.recordVoiceInterruption({
+        conversationId: session.conversation.id,
+        channelSegmentId: session.channelSegment.id,
+        connectionId: session.connection.id,
+        source: "userSpeech",
+        reason: event.reason ?? "client_cancelled_response",
+        ...optionalStringField("interruptedMessageId", event.interruptedMessageId),
+        ...optionalNumberField("offsetMs", event.playedUntilMs ?? event.audioEndMs)
+      });
+      send(options.socket, {
+        type: "cognidesk.interruption.recorded",
+        event_id: createId("voice_event"),
+        event: interruption
+      });
+      sendRuntimeEvents([interruption]);
+      return;
+    }
+    await providerSession?.send(event);
+  }
+}
+function attachNodeVoiceWebSocketAdapter(options) {
+  const pathPrefix = normalizePathPrefix(options.pathPrefix ?? "/voice/connections");
+  const webSocketServer = new WebSocketServer({ noServer: true });
+  const upgradeListener = (request, socket, head) => {
+    const parsed = parseVoiceSocketRequest(request, pathPrefix);
+    if (!parsed) return;
+    webSocketServer.handleUpgrade(request, socket, head, (webSocket) => {
+      webSocketServer.emit("connection", webSocket, request, parsed);
+    });
+  };
+  options.server.on("upgrade", upgradeListener);
+  webSocketServer.on("connection", (webSocket, _request, parsed) => {
+    void handleVoiceSocket({
+      socket: adaptNodeWebSocket(webSocket),
+      connectionId: parsed.connectionId,
+      token: parsed.token,
+      store: options.store,
+      runtime: options.runtime,
+      provider: options.provider,
+      ...options.control ? { control: options.control } : {},
+      ...options.profile ? { profile: options.profile } : {},
+      ...options.recorder ? { recorder: options.recorder } : {},
+      ...options.initialGreeting !== void 0 ? { initialGreeting: options.initialGreeting } : {},
+      ...options.reconnectTokenTtlMs !== void 0 ? { reconnectTokenTtlMs: options.reconnectTokenTtlMs } : {},
+      ...options.reconnectGraceMs !== void 0 ? { reconnectGraceMs: options.reconnectGraceMs } : {},
+      ...options.turnPreambleMs !== void 0 ? { turnPreambleMs: options.turnPreambleMs } : {}
+    });
+  });
+  return {
+    close() {
+      options.server.off("upgrade", upgradeListener);
+      webSocketServer.close();
+    },
+    webSocketServer
+  };
+}
+function parseVoiceSocketRequest(request, pathPrefix) {
+  if (!request.url) return null;
+  const url = new URL(request.url, "http://localhost");
+  const expectedPrefix = `${pathPrefix}/`;
+  if (!url.pathname.startsWith(expectedPrefix) || !url.pathname.endsWith("/socket")) return null;
+  const connectionId = decodeURIComponent(url.pathname.slice(expectedPrefix.length, -"/socket".length));
+  if (!connectionId) return null;
+  const token = url.searchParams.get("token") ?? parseTokenFromProtocol(request.headers["sec-websocket-protocol"]);
+  if (!token) return null;
+  return { connectionId, token };
+}
+function adaptNodeWebSocket(socket) {
+  return {
+    send(data) {
+      socket.send(data);
+    },
+    close(code, reason) {
+      socket.close(code, reason);
+    },
+    on(event, listener) {
+      if (event === "message") {
+        socket.on("message", (data) => {
+          listener(rawDataToString(data));
+        });
+        return;
+      }
+      if (event === "close") {
+        socket.on("close", (code, reason) => {
+          listener(code, reason.toString("utf8"));
+        });
+        return;
+      }
+      socket.on("error", listener);
+    }
+  };
+}
+function parseClientEvent(raw) {
+  let parsed;
+  try {
+    parsed = JSON.parse(raw);
+  } catch {
+    throw new Error("Voice socket message must be valid JSON.");
+  }
+  if (!isRecord(parsed)) throw new Error("Voice socket message must be a JSON object.");
+  const type = parsed.type;
+  if (typeof type !== "string") throw new Error("Voice socket message type is required.");
+  switch (type) {
+    case "session.update":
+      return { type, ...optionalEventId(parsed), ...isRecord(parsed.session) ? { session: parsed.session } : {} };
+    case "input_audio_buffer.append": {
+      const audio = requiredString(parsed, "audio");
+      const sequence = optionalInteger(parsed, "sequence");
+      return { type, audio, ...optionalEventId(parsed), ...sequence !== void 0 ? { sequence } : {} };
+    }
+    case "input_audio_buffer.commit":
+    case "input_audio_buffer.clear":
+      return { type, ...optionalEventId(parsed) };
+    case "response.cancel":
+      return {
+        type,
+        ...optionalEventId(parsed),
+        ...optionalStringField("response_id", optionalString(parsed, "response_id")),
+        ...optionalStringField("interruptedMessageId", optionalString(parsed, "interruptedMessageId")),
+        ...optionalStringField("reason", optionalString(parsed, "reason")),
+        ...optionalNumberField("playedUntilMs", optionalInteger(parsed, "playedUntilMs")),
+        ...optionalNumberField("audioEndMs", optionalInteger(parsed, "audioEndMs"))
+      };
+    case "conversation.item.truncate":
+      return {
+        type,
+        ...optionalEventId(parsed),
+        ...optionalStringField("item_id", optionalString(parsed, "item_id")),
+        ...optionalNumberField("content_index", optionalInteger(parsed, "content_index")),
+        ...optionalNumberField("audio_end_ms", optionalInteger(parsed, "audio_end_ms"))
+      };
+    default:
+      throw new Error(`Unsupported voice socket event '${type}'.`);
+  }
+}
+function buildSocketUrl(input) {
+  const requestUrl = new URL(input.requestUrl);
+  const base = input.baseUrl ? new URL(input.baseUrl) : requestUrl;
+  const protocol = base.protocol === "https:" ? "wss:" : "ws:";
+  const url = new URL(`${input.basePath}${input.pathPrefix}/${encodeURIComponent(input.connectionId)}/socket`, base);
+  url.protocol = protocol;
+  url.searchParams.set("token", input.token);
+  return url.toString();
+}
+function normalizePathPrefix(path) {
+  const withSlash = path.startsWith("/") ? path : `/${path}`;
+  return withSlash.endsWith("/") ? withSlash.slice(0, -1) : withSlash;
+}
+function createTokenRecord(input) {
+  return {
+    token: input.createToken(),
+    connectionId: input.connectionId,
+    sessionId: input.sessionId,
+    purpose: input.purpose,
+    expiresAt: new Date(input.now.getTime() + input.ttlMs).toISOString()
+  };
+}
+function requireSession(sessions, sessionId) {
+  const session = sessions.get(sessionId);
+  if (!session) throw new Error(`Voice session '${sessionId}' was not found.`);
+  return session;
+}
+function send(socket, event) {
+  socket.send(JSON.stringify(event));
+}
+function mergeInputTranscript(current, next) {
+  if (!current) return next;
+  const merged = {
+    kind: "input_transcript.completed",
+    text: `${current.text} ${next.text}`.trim()
+  };
+  const itemId = next.itemId ?? current.itemId;
+  if (itemId) merged.itemId = itemId;
+  const startedAtMs = current.startedAtMs ?? next.startedAtMs;
+  if (startedAtMs !== void 0) merged.startedAtMs = startedAtMs;
+  const endedAtMs = next.endedAtMs ?? current.endedAtMs;
+  if (endedAtMs !== void 0) merged.endedAtMs = endedAtMs;
+  const transcriptionSource = next.transcriptionSource ?? current.transcriptionSource;
+  if (transcriptionSource) merged.transcriptionSource = transcriptionSource;
+  const metadata = {
+    ...current.metadata ?? {},
+    ...next.metadata ?? {}
+  };
+  if (Object.keys(metadata).length > 0) merged.metadata = metadata;
+  return merged;
+}
+function isAgentResponseSignal(event) {
+  return event.type === "response.output_audio.delta" || event.type === "response.output_audio_transcript.delta" || event.type === "response.output_audio_transcript.done" || event.type === "response.done";
+}
+function takeSpeakablePrefix(text, force) {
+  if (!text.trim()) return null;
+  if (force) return { text: normalizeSpeechText(text), consumed: text.length };
+  const sentenceBoundary = findLastSentenceBoundary(text);
+  if (sentenceBoundary > 0) {
+    return {
+      text: normalizeSpeechText(text.slice(0, sentenceBoundary)),
+      consumed: sentenceBoundary
+    };
+  }
+  if (text.length < 180) return null;
+  const softBoundary = findSoftBoundary(text, 140);
+  if (softBoundary <= 0) return null;
+  return {
+    text: normalizeSpeechText(text.slice(0, softBoundary)),
+    consumed: softBoundary
+  };
+}
+function findLastSentenceBoundary(text) {
+  let boundary = -1;
+  const pattern = /[.!?。！？](?:["')\]]+)?\s+/g;
+  let match;
+  while ((match = pattern.exec(text)) !== null) {
+    boundary = match.index + match[0].length;
+  }
+  return boundary;
+}
+function findSoftBoundary(text, minIndex) {
+  const candidates = [", ", "; ", ": ", "\n", " "];
+  for (const candidate of candidates) {
+    const boundary = text.lastIndexOf(candidate);
+    if (boundary >= minIndex) return boundary + candidate.length;
+  }
+  return -1;
+}
+function normalizeSpeechText(text) {
+  return text.replace(/\s+/g, " ").trim();
+}
+function debounceMsForTranscript(text, baseMs) {
+  const wordCount = text.trim().split(" ").filter(Boolean).length;
+  return wordCount <= 2 ? Math.max(baseMs, 900) : baseMs;
+}
+function rawDataToString(data) {
+  if (typeof data === "string") return data;
+  if (Buffer.isBuffer(data)) return data.toString("utf8");
+  if (Array.isArray(data)) return Buffer.concat(data).toString("utf8");
+  return Buffer.from(data).toString("utf8");
+}
+function parseTokenFromProtocol(value) {
+  const raw = Array.isArray(value) ? value.join(",") : value;
+  if (!raw) return void 0;
+  const protocols = raw.split(",").map((candidate) => candidate.trim()).filter(Boolean);
+  const bearer = protocols.find((candidate) => candidate.startsWith("cognidesk.voice.token."));
+  return bearer?.slice("cognidesk.voice.token.".length);
+}
+function assertBase64Audio(value) {
+  if (value.length === 0) throw new Error("audio must not be empty.");
+  if (!/^[A-Za-z0-9+/]+={0,2}$/.test(value)) throw new Error("audio must be base64 encoded.");
+}
+function optionalEventId(value) {
+  return optionalStringField("event_id", optionalString(value, "event_id"));
+}
+function requiredString(value, key) {
+  const result = optionalString(value, key);
+  if (!result) throw new Error(`${key} must be a non-empty string.`);
+  return result;
+}
+function optionalString(value, key) {
+  const candidate = value[key];
+  if (candidate === void 0 || candidate === null) return void 0;
+  if (typeof candidate !== "string") throw new Error(`${key} must be a string.`);
+  const trimmed = candidate.trim();
+  return trimmed.length > 0 ? trimmed : void 0;
+}
+function optionalInteger(value, key) {
+  const candidate = value[key];
+  if (candidate === void 0 || candidate === null) return void 0;
+  if (typeof candidate !== "number" || !Number.isSafeInteger(candidate) || candidate < 0) {
+    throw new Error(`${key} must be a non-negative integer.`);
+  }
+  return candidate;
+}
+function optionalStringField(key, value) {
+  return value ? { [key]: value } : {};
+}
+function optionalNumberField(key, value) {
+  return value !== void 0 ? { [key]: value } : {};
+}
+function isRecord(value) {
+  return Boolean(value && typeof value === "object" && !Array.isArray(value));
+}
+function createId(prefix) {
+  const random = globalThis.crypto?.randomUUID?.() ?? Math.random().toString(36).slice(2);
+  return `${prefix}_${random}`;
+}
+export {
+  COGNIDESK_VOICE_PROTOCOL,
+  attachNodeVoiceWebSocketAdapter,
+  createInMemoryVoiceSessionStore,
+  createVoiceSocketHandshake,
+  handleVoiceSocket
+};
+//# sourceMappingURL=index.js.map