@absolutejs/voice 0.0.22-beta.475 → 0.0.22-beta.477

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,10 @@
1
+ import type { AIProviderConfig } from "@absolutejs/ai";
2
+ import type { VoiceAgentModel } from "./agent";
3
+ import type { VoiceSessionRecord } from "./types";
4
+ export type CreateAIVoiceModelOptions = {
5
+ model: string;
6
+ provider: AIProviderConfig;
7
+ signal?: AbortSignal;
8
+ systemPrompt?: string;
9
+ };
10
+ export declare const createAIVoiceModel: <TContext = unknown, TSession extends VoiceSessionRecord = VoiceSessionRecord, TResult = unknown>(options: CreateAIVoiceModelOptions) => VoiceAgentModel<TContext, TSession, TResult>;
@@ -7,6 +7,7 @@ import { type StoredVoiceTraceEvent, type VoiceTraceSinkDeliveryRecord, type Voi
7
7
  import type { StoredVoiceIntegrationEvent, StoredVoiceExternalObjectMap, StoredVoiceOpsTask, VoiceExternalObjectMap, VoiceExternalObjectMapStore, VoiceIntegrationEvent, VoiceIntegrationEventStore, VoiceOpsTask, VoiceOpsTaskStore } from "./ops";
8
8
  import type { StoredVoiceCallReviewArtifact, VoiceCallReviewArtifact, VoiceCallReviewStore } from "./testing/review";
9
9
  import type { VoiceSessionRecord, VoiceSessionStore } from "./types";
10
+ import type { VoiceRecordingStore } from "./recordingStore";
10
11
  export type VoiceFileStoreOptions = {
11
12
  directory: string;
12
13
  pretty?: boolean;
@@ -50,3 +51,4 @@ export declare const createStoredVoiceIntegrationEvent: <TEvent extends Omit<Voi
50
51
  export declare const createStoredVoiceExternalObjectMap: <TMapping extends Omit<VoiceExternalObjectMap, "id" | "createdAt" | "updatedAt"> = Omit<VoiceExternalObjectMap, "id" | "createdAt" | "updatedAt">>(mapping: TMapping & {
51
52
  at?: number;
52
53
  }) => VoiceExternalObjectMap;
54
+ export declare const createVoiceFileRecordingStore: (options: VoiceFileStoreOptions) => VoiceRecordingStore;
package/dist/index.d.ts CHANGED
@@ -69,6 +69,8 @@ export { assertVoiceSimulationSuiteEvidence, createVoiceSimulationSuiteRoutes, e
69
69
  export { createVoiceWorkflowContract, createVoiceWorkflowContractHandler, createVoiceWorkflowContractPreset, createVoiceWorkflowScenario, recordVoiceWorkflowContractTrace, validateVoiceWorkflowRouteResult, } from "./workflowContract";
70
70
  export { createVoiceSessionListRoutes, createVoiceSessionReplayHTMLHandler, createVoiceSessionReplayJSONHandler, createVoiceSessionReplayRoutes, createVoiceSessionsHTMLHandler, createVoiceSessionsJSONHandler, renderVoiceSessionsHTML, summarizeVoiceProviderFallbackRecovery, summarizeVoiceSessions, summarizeVoiceSessionReplay, } from "./sessionReplay";
71
71
  export { createVoiceAgent, createVoiceAgentSquad, createVoiceAgentTool, } from "./agent";
72
+ export { createAIVoiceModel } from "./aiVoiceModel";
73
+ export type { CreateAIVoiceModelOptions } from "./aiVoiceModel";
72
74
  export { createVoiceRAGTool } from "./ragTool";
73
75
  export type { VoiceRAGCollectionLike, VoiceRAGQueryResult, VoiceRAGSearchInput, VoiceRAGToolArgs, VoiceRAGToolOptions, VoiceRAGToolResult, } from "./ragTool";
74
76
  export { createVoiceApiRequestTool, createVoiceDTMFTool, createVoiceEndCallTool, createVoiceTransferCallTool, createVoiceVoicemailDetectionTool, } from "./agentTools";
@@ -85,7 +87,9 @@ export { createVoiceTurnQualityHTMLHandler, createVoiceTurnQualityJSONHandler, c
85
87
  export { assertVoiceOutcomeContractEvidence, createVoiceOutcomeContractHTMLHandler, createVoiceOutcomeContractJSONHandler, createVoiceOutcomeContractRoutes, evaluateVoiceOutcomeContractEvidence, renderVoiceOutcomeContractHTML, runVoiceOutcomeContractSuite, } from "./outcomeContract";
86
88
  export { applyVoiceTelephonyOutcome, assertVoiceTelephonyWebhookNormalizationEvidence, createMemoryVoiceTelephonyWebhookIdempotencyStore, createVoiceTelephonyOutcomePolicy, createVoiceTelephonyWebhookHandler, createVoiceTelephonyWebhookRoutes, evaluateVoiceTelephonyWebhookNormalizationEvidence, parseVoiceTelephonyWebhookEvent, resolveVoiceTelephonyOutcome, signVoiceTwilioWebhook, verifyVoiceTwilioWebhookSignature, voiceTelephonyOutcomeToRouteResult, } from "./telephonyOutcome";
87
89
  export { assertVoicePhoneCallControlEvidence, assertVoicePhoneAssistantEvidence, createVoicePhoneAgent, evaluateVoicePhoneCallControlEvidence, evaluateVoicePhoneAssistantEvidence, } from "./phoneAgent";
88
- export { createStoredVoiceCallReviewArtifact, createStoredVoiceExternalObjectMap, createStoredVoiceIntegrationEvent, createStoredVoiceOpsTask, createVoiceFileIncidentBundleStore, createVoiceFileExternalObjectMapStore, createVoiceFileAssistantMemoryStore, createVoiceFileAuditEventStore, createVoiceFileAuditSinkDeliveryStore, createVoiceFileCampaignStore, createVoiceFileIntegrationEventStore, createVoiceFileReviewStore, createVoiceFileRuntimeStorage, createVoiceFileSessionStore, createVoiceFileTaskStore, createVoiceFileTraceSinkDeliveryStore, createVoiceFileTraceEventStore, } from "./fileStore";
90
+ export { createStoredVoiceCallReviewArtifact, createStoredVoiceExternalObjectMap, createStoredVoiceIntegrationEvent, createStoredVoiceOpsTask, createVoiceFileIncidentBundleStore, createVoiceFileExternalObjectMapStore, createVoiceFileAssistantMemoryStore, createVoiceFileAuditEventStore, createVoiceFileAuditSinkDeliveryStore, createVoiceFileCampaignStore, createVoiceFileIntegrationEventStore, createVoiceFileRecordingStore, createVoiceFileReviewStore, createVoiceFileRuntimeStorage, createVoiceFileSessionStore, createVoiceFileTaskStore, createVoiceFileTraceSinkDeliveryStore, createVoiceFileTraceEventStore, } from "./fileStore";
91
+ export { computePcmDurationMs, createVoiceMemoryRecordingStore, encodePcmAsWav, } from "./recordingStore";
92
+ export type { StoredVoiceRecordingArtifact, VoiceRecordingArtifact, VoiceRecordingChannel, VoiceRecordingStore, } from "./recordingStore";
89
93
  export { createVoiceAssistantMemoryHandle, createVoiceAssistantMemoryRecord, createVoiceMemoryAssistantMemoryStore, resolveVoiceAssistantMemoryNamespace, } from "./assistantMemory";
90
94
  export { createAnthropicVoiceAssistantModel, createGeminiVoiceAssistantModel, createJSONVoiceAssistantModel, createOpenAIVoiceAssistantModel, createVoiceProviderOrchestrationProfile, resolveVoiceProviderRoutingPolicyPreset, createVoiceProviderRouter, } from "./modelAdapters";
91
95
  export { createOpenAIVoiceTTS } from "./openaiTTS";
package/dist/index.js CHANGED
@@ -3369,6 +3369,77 @@ var buildTurnText = (transcripts, partialText, options = {}) => {
3369
3369
  return selectPreferredTranscriptText(finalText, nextPartial);
3370
3370
  };
3371
3371
 
3372
+ // src/types.ts
3373
+ var ttsAdapterSessionCanCancel = (session) => typeof session.cancel === "function";
3374
+
3375
+ // src/recordingStore.ts
3376
+ var writeUint32LE = (view, offset, value) => {
3377
+ view.setUint32(offset, value, true);
3378
+ };
3379
+ var writeUint16LE = (view, offset, value) => {
3380
+ view.setUint16(offset, value, true);
3381
+ };
3382
+ var writeAscii = (view, offset, value) => {
3383
+ for (let index = 0;index < value.length; index += 1) {
3384
+ view.setUint8(offset + index, value.charCodeAt(index));
3385
+ }
3386
+ };
3387
+ var encodePcmAsWav = (pcm, format) => {
3388
+ if (format.container !== "raw" || format.encoding !== "pcm_s16le") {
3389
+ throw new Error(`encodePcmAsWav only supports raw pcm_s16le input (got container=${format.container}, encoding=${format.encoding})`);
3390
+ }
3391
+ const channels = format.channels;
3392
+ const sampleRate = format.sampleRateHz;
3393
+ const bitsPerSample = 16;
3394
+ const byteRate = sampleRate * channels * bitsPerSample / 8;
3395
+ const blockAlign = channels * bitsPerSample / 8;
3396
+ const dataSize = pcm.byteLength;
3397
+ const buffer = new ArrayBuffer(44 + dataSize);
3398
+ const view = new DataView(buffer);
3399
+ writeAscii(view, 0, "RIFF");
3400
+ writeUint32LE(view, 4, 36 + dataSize);
3401
+ writeAscii(view, 8, "WAVE");
3402
+ writeAscii(view, 12, "fmt ");
3403
+ writeUint32LE(view, 16, 16);
3404
+ writeUint16LE(view, 20, 1);
3405
+ writeUint16LE(view, 22, channels);
3406
+ writeUint32LE(view, 24, sampleRate);
3407
+ writeUint32LE(view, 28, byteRate);
3408
+ writeUint16LE(view, 32, blockAlign);
3409
+ writeUint16LE(view, 34, bitsPerSample);
3410
+ writeAscii(view, 36, "data");
3411
+ writeUint32LE(view, 40, dataSize);
3412
+ const output = new Uint8Array(buffer);
3413
+ output.set(pcm, 44);
3414
+ return output;
3415
+ };
3416
+ var computePcmDurationMs = (pcmByteLength, format) => {
3417
+ if (format.container !== "raw" || format.encoding !== "pcm_s16le") {
3418
+ return 0;
3419
+ }
3420
+ const bytesPerSecond = format.sampleRateHz * format.channels * 2;
3421
+ if (bytesPerSecond === 0) {
3422
+ return 0;
3423
+ }
3424
+ return Math.round(pcmByteLength / bytesPerSecond * 1000);
3425
+ };
3426
+ var createVoiceMemoryRecordingStore = () => {
3427
+ const records = new Map;
3428
+ const key = (sessionId, channel) => `${sessionId}::${channel}`;
3429
+ return {
3430
+ get: async (sessionId, channel) => records.get(key(sessionId, channel)),
3431
+ list: async (sessionId) => Array.from(records.values()).filter((record) => record.sessionId === sessionId),
3432
+ put: async (artifact) => {
3433
+ const stored = {
3434
+ ...artifact,
3435
+ recordingUrl: `memory://recording/${artifact.sessionId}/${artifact.channel}.wav`
3436
+ };
3437
+ records.set(key(artifact.sessionId, artifact.channel), stored);
3438
+ return stored;
3439
+ }
3440
+ };
3441
+ };
3442
+
3372
3443
  // src/session.ts
3373
3444
  var DEFAULT_RECONNECT_TIMEOUT = 30000;
3374
3445
  var DEFAULT_MAX_RECONNECT_ATTEMPTS = 10;
@@ -3611,6 +3682,39 @@ var createVoiceSession = (options) => {
3611
3682
  const currentTurnAudio = [];
3612
3683
  let fallbackAttemptsForCurrentTurn = 0;
3613
3684
  let fallbackReplayAudioMsForCurrentTurn = 0;
3685
+ const recordingConfig = options.recording;
3686
+ const recordingChannels = new Set(recordingConfig?.channels ?? ["assistant", "user"]);
3687
+ const recordingMaxBytes = recordingConfig?.maxBytesPerChannel ?? 50 * 1024 * 1024;
3688
+ const recordingBuffers = {
3689
+ assistant: [],
3690
+ user: []
3691
+ };
3692
+ const recordingByteTotals = {
3693
+ assistant: 0,
3694
+ user: 0
3695
+ };
3696
+ const recordingFormats = {};
3697
+ let recordingPersisted = false;
3698
+ const captureRecordingChunk = (channel, bytes, format) => {
3699
+ if (!recordingConfig || recordingPersisted) {
3700
+ return;
3701
+ }
3702
+ if (!recordingChannels.has(channel)) {
3703
+ return;
3704
+ }
3705
+ if (format.container !== "raw" || format.encoding !== "pcm_s16le") {
3706
+ return;
3707
+ }
3708
+ const currentTotal = recordingByteTotals[channel];
3709
+ if (currentTotal >= recordingMaxBytes) {
3710
+ return;
3711
+ }
3712
+ const remaining = recordingMaxBytes - currentTotal;
3713
+ const slice = bytes.byteLength <= remaining ? bytes : bytes.subarray(0, remaining);
3714
+ recordingBuffers[channel].push(new Uint8Array(slice));
3715
+ recordingByteTotals[channel] += slice.byteLength;
3716
+ recordingFormats[channel] = format;
3717
+ };
3614
3718
  const pruneTurnAudio = () => {
3615
3719
  const replayWindowMs = sttFallback?.replayWindowMs ?? DEFAULT_FALLBACK_REPLAY_MS;
3616
3720
  const cutoffAt = Date.now() - replayWindowMs;
@@ -3789,8 +3893,83 @@ var createVoiceSession = (options) => {
3789
3893
  });
3790
3894
  }
3791
3895
  };
3896
+ const persistRecordings = async () => {
3897
+ if (!recordingConfig || recordingPersisted) {
3898
+ return;
3899
+ }
3900
+ recordingPersisted = true;
3901
+ const channels = ["assistant", "user"];
3902
+ for (const channel of channels) {
3903
+ if (!recordingChannels.has(channel)) {
3904
+ continue;
3905
+ }
3906
+ const chunks = recordingBuffers[channel];
3907
+ const format = recordingFormats[channel];
3908
+ if (chunks.length === 0 || !format) {
3909
+ continue;
3910
+ }
3911
+ const totalBytes = recordingByteTotals[channel];
3912
+ const merged = new Uint8Array(totalBytes);
3913
+ let offset = 0;
3914
+ for (const chunk of chunks) {
3915
+ merged.set(chunk, offset);
3916
+ offset += chunk.byteLength;
3917
+ }
3918
+ try {
3919
+ const stored = await recordingConfig.store.put({
3920
+ audioBytes: merged,
3921
+ capturedAt: Date.now(),
3922
+ channel,
3923
+ durationMs: computePcmDurationMs(totalBytes, format),
3924
+ format,
3925
+ sessionId: options.id
3926
+ });
3927
+ await appendTrace({
3928
+ payload: {
3929
+ channel,
3930
+ durationMs: stored.durationMs,
3931
+ recordingUrl: stored.recordingUrl,
3932
+ sessionId: options.id,
3933
+ sizeBytes: merged.byteLength
3934
+ },
3935
+ type: "recording.ready"
3936
+ });
3937
+ } catch (error) {
3938
+ logger.warn("voice recording persist failed", {
3939
+ channel,
3940
+ error: toError(error).message,
3941
+ sessionId: options.id
3942
+ });
3943
+ } finally {
3944
+ recordingBuffers[channel] = [];
3945
+ recordingByteTotals[channel] = 0;
3946
+ }
3947
+ }
3948
+ };
3949
+ const cancelActiveTTS = async (reason) => {
3950
+ const activeSession = ttsSession;
3951
+ const cancelledTurnId = activeTTSTurnId;
3952
+ if (!activeSession || cancelledTurnId === undefined) {
3953
+ return;
3954
+ }
3955
+ activeTTSTurnId = undefined;
3956
+ if (!ttsAdapterSessionCanCancel(activeSession)) {
3957
+ return;
3958
+ }
3959
+ try {
3960
+ await activeSession.cancel(reason);
3961
+ } catch (error) {
3962
+ logger.warn("voice tts adapter cancel failed", {
3963
+ error: toError(error).message,
3964
+ reason,
3965
+ sessionId: options.id,
3966
+ turnId: cancelledTurnId
3967
+ });
3968
+ }
3969
+ };
3792
3970
  const sendAssistantAudio = async (chunk, input) => {
3793
3971
  const normalizedChunk = chunk instanceof Uint8Array ? new Uint8Array(chunk) : chunk instanceof ArrayBuffer ? new Uint8Array(chunk.slice(0)) : new Uint8Array(chunk.buffer.slice(chunk.byteOffset, chunk.byteOffset + chunk.byteLength));
3972
+ captureRecordingChunk("assistant", normalizedChunk, input.format);
3794
3973
  await send({
3795
3974
  chunkBase64: encodeBase64(normalizedChunk),
3796
3975
  format: input.format,
@@ -3887,6 +4066,7 @@ var createVoiceSession = (options) => {
3887
4066
  });
3888
4067
  await closeTTSSession("failed");
3889
4068
  await closeAdapter("failed");
4069
+ await persistRecordings();
3890
4070
  speechDetected = false;
3891
4071
  rewindFallbackTurnAudio();
3892
4072
  await options.route.onError?.({
@@ -3955,6 +4135,7 @@ var createVoiceSession = (options) => {
3955
4135
  });
3956
4136
  await closeTTSSession("complete");
3957
4137
  await closeAdapter("complete");
4138
+ await persistRecordings();
3958
4139
  speechDetected = false;
3959
4140
  rewindFallbackTurnAudio();
3960
4141
  if (disposition === "transferred" && input.target) {
@@ -5096,7 +5277,14 @@ var createVoiceSession = (options) => {
5096
5277
  if (shouldStoreAudio) {
5097
5278
  pushTurnAudio(conditionedAudio);
5098
5279
  }
5280
+ if (recordingConfig?.userInputFormat) {
5281
+ const userBytes = conditionedAudio instanceof Uint8Array ? conditionedAudio : conditionedAudio instanceof ArrayBuffer ? new Uint8Array(conditionedAudio) : new Uint8Array(conditionedAudio.buffer, conditionedAudio.byteOffset, conditionedAudio.byteLength);
5282
+ captureRecordingChunk("user", userBytes, recordingConfig.userInputFormat);
5283
+ }
5099
5284
  if (audioLevel >= turnDetection.speechThreshold) {
5285
+ if (!speechDetected && activeTTSTurnId !== undefined) {
5286
+ cancelActiveTTS("barge-in");
5287
+ }
5100
5288
  speechDetected = true;
5101
5289
  clearSilenceTimer();
5102
5290
  } else if (speechDetected) {
@@ -5129,6 +5317,7 @@ var createVoiceSession = (options) => {
5129
5317
  clearSilenceTimer();
5130
5318
  await closeTTSSession(reason);
5131
5319
  await closeAdapter(reason);
5320
+ await persistRecordings();
5132
5321
  await Promise.resolve(socket.close(1000, reason));
5133
5322
  if (session.call?.endedAt && session.call.disposition === "closed") {
5134
5323
  await appendTrace({
@@ -34559,6 +34748,76 @@ var createVoiceWorkflowContractHandler = (input) => {
34559
34748
  return result;
34560
34749
  };
34561
34750
  };
34751
+ // src/aiVoiceModel.ts
34752
+ var toProviderMessages = (messages) => {
34753
+ const out = [];
34754
+ for (const message of messages) {
34755
+ if (message.role === "tool") {
34756
+ out.push({
34757
+ content: [
34758
+ {
34759
+ content: message.content,
34760
+ tool_use_id: message.toolCallId ?? message.name ?? "",
34761
+ type: "tool_result"
34762
+ }
34763
+ ],
34764
+ role: "user"
34765
+ });
34766
+ continue;
34767
+ }
34768
+ if (message.role === "system") {
34769
+ out.push({ content: message.content, role: "user" });
34770
+ continue;
34771
+ }
34772
+ out.push({ content: message.content, role: message.role });
34773
+ }
34774
+ return out;
34775
+ };
34776
+ var toProviderTools = (tools) => {
34777
+ if (tools.length === 0) {
34778
+ return;
34779
+ }
34780
+ return tools.map((tool) => ({
34781
+ description: tool.description ?? "",
34782
+ input_schema: tool.parameters ?? {
34783
+ properties: {},
34784
+ type: "object"
34785
+ },
34786
+ name: tool.name
34787
+ }));
34788
+ };
34789
+ var createAIVoiceModel = (options) => ({
34790
+ generate: async (input) => {
34791
+ const systemPrompt = input.system ?? options.systemPrompt;
34792
+ const stream = options.provider.stream({
34793
+ messages: toProviderMessages(input.messages),
34794
+ model: options.model,
34795
+ signal: options.signal,
34796
+ systemPrompt,
34797
+ tools: toProviderTools(input.tools)
34798
+ });
34799
+ let assistantText = "";
34800
+ const toolCalls = [];
34801
+ for await (const chunk of stream) {
34802
+ if (chunk.type === "text") {
34803
+ assistantText += chunk.content;
34804
+ } else if (chunk.type === "tool_use") {
34805
+ toolCalls.push({
34806
+ args: chunk.input ?? {},
34807
+ id: chunk.id,
34808
+ name: chunk.name
34809
+ });
34810
+ }
34811
+ }
34812
+ const output = {
34813
+ assistantText
34814
+ };
34815
+ if (toolCalls.length > 0) {
34816
+ output.toolCalls = toolCalls;
34817
+ }
34818
+ return output;
34819
+ }
34820
+ });
34562
34821
  // src/ragTool.ts
34563
34822
  var DEFAULT_TOOL_NAME = "searchKnowledgeBase";
34564
34823
  var DEFAULT_DESCRIPTION = "Search the knowledge base and return short grounded citations. Use this whenever the caller asks a question that may be answered by indexed reference material.";
@@ -37004,6 +37263,66 @@ var createStoredVoiceExternalObjectMap = (mapping) => createVoiceExternalObjectM
37004
37263
  sourceId: mapping.sourceId,
37005
37264
  sourceType: mapping.sourceType
37006
37265
  });
37266
+ var recordingFileName = (sessionId, channel) => `${encodeURIComponent(sessionId)}_${channel}.wav`;
37267
+ var recordingMetadataFileName = (sessionId, channel) => `${encodeURIComponent(sessionId)}_${channel}.json`;
37268
+ var createVoiceFileRecordingStore = (options) => {
37269
+ const ensureDir = async () => {
37270
+ await mkdir4(options.directory, { recursive: true });
37271
+ };
37272
+ const put = async (artifact) => {
37273
+ await ensureDir();
37274
+ const wavPath = join3(options.directory, recordingFileName(artifact.sessionId, artifact.channel));
37275
+ const metadataPath = join3(options.directory, recordingMetadataFileName(artifact.sessionId, artifact.channel));
37276
+ const wav = encodePcmAsWav(artifact.audioBytes, artifact.format);
37277
+ await writeFile(wavPath, wav);
37278
+ const recordingUrl = `file://${wavPath}`;
37279
+ const metadata = {
37280
+ capturedAt: artifact.capturedAt,
37281
+ channel: artifact.channel,
37282
+ durationMs: artifact.durationMs,
37283
+ format: artifact.format,
37284
+ recordingUrl,
37285
+ sessionId: artifact.sessionId
37286
+ };
37287
+ await writeFile(metadataPath, options.pretty ? JSON.stringify(metadata, null, 2) : JSON.stringify(metadata));
37288
+ return {
37289
+ ...artifact,
37290
+ recordingUrl
37291
+ };
37292
+ };
37293
+ const readMetadata = async (sessionId, channel) => {
37294
+ const metadataPath = join3(options.directory, recordingMetadataFileName(sessionId, channel));
37295
+ const wavPath = join3(options.directory, recordingFileName(sessionId, channel));
37296
+ try {
37297
+ const [metaText, wavBytes] = await Promise.all([
37298
+ readFile2(metadataPath, "utf8"),
37299
+ readFile2(wavPath)
37300
+ ]);
37301
+ const meta = JSON.parse(metaText);
37302
+ return {
37303
+ audioBytes: new Uint8Array(wavBytes.buffer, wavBytes.byteOffset, wavBytes.byteLength),
37304
+ capturedAt: meta.capturedAt,
37305
+ channel: meta.channel,
37306
+ durationMs: meta.durationMs,
37307
+ format: meta.format,
37308
+ recordingUrl: meta.recordingUrl,
37309
+ sessionId: meta.sessionId
37310
+ };
37311
+ } catch (error) {
37312
+ if (error.code === "ENOENT") {
37313
+ return;
37314
+ }
37315
+ throw error;
37316
+ }
37317
+ };
37318
+ const get = (sessionId, channel) => readMetadata(sessionId, channel);
37319
+ const list = async (sessionId) => {
37320
+ const channels = ["assistant", "user"];
37321
+ const records = await Promise.all(channels.map((channel) => readMetadata(sessionId, channel)));
37322
+ return records.filter((record) => record !== undefined);
37323
+ };
37324
+ return { get, list, put };
37325
+ };
37007
37326
  // src/modelAdapters.ts
37008
37327
  var isVoiceProviderRoutingPolicyPreset = (value) => value === "balanced" || value === "cost-cap" || value === "cost-first" || value === "latency-first" || value === "quality-first";
37009
37328
  var resolveVoiceProviderRoutingPolicyPreset = (preset, options = {}) => {
@@ -44991,6 +45310,7 @@ export {
44991
45310
  verifyVoiceOpsWebhookSignature,
44992
45311
  validateVoiceWorkflowRouteResult,
44993
45312
  validateVoiceObservabilityExportRecord,
45313
+ ttsAdapterSessionCanCancel,
44994
45314
  transcodeTwilioInboundPayloadToPCM16,
44995
45315
  transcodePCMToTwilioOutboundPayload,
44996
45316
  summarizeVoiceTurnQuality,
@@ -45263,6 +45583,7 @@ export {
45263
45583
  evaluateVoiceBrowserCallProfileEvidence,
45264
45584
  evaluateVoiceAgentSquadContractEvidence,
45265
45585
  encodeTwilioMulawBase64,
45586
+ encodePcmAsWav,
45266
45587
  deliverVoiceTraceEventsToSinks,
45267
45588
  deliverVoiceObservabilityExport,
45268
45589
  deliverVoiceMonitorIssueNotifications,
@@ -45476,6 +45797,7 @@ export {
45476
45797
  createVoiceMemoryTraceSinkDeliveryStore,
45477
45798
  createVoiceMemoryTraceEventStore,
45478
45799
  createVoiceMemoryStore,
45800
+ createVoiceMemoryRecordingStore,
45479
45801
  createVoiceMemoryObservabilityExportDeliveryReceiptStore,
45480
45802
  createVoiceMemoryMonitorNotifierDeliveryReceiptStore,
45481
45803
  createVoiceMemoryMonitorIssueStore,
@@ -45522,6 +45844,7 @@ export {
45522
45844
  createVoiceFileScenarioFixtureStore,
45523
45845
  createVoiceFileRuntimeStorage,
45524
45846
  createVoiceFileReviewStore,
45847
+ createVoiceFileRecordingStore,
45525
45848
  createVoiceFileObservabilityExportDeliveryReceiptStore,
45526
45849
  createVoiceFileIntegrationEventStore,
45527
45850
  createVoiceFileIncidentBundleStore,
@@ -45614,7 +45937,9 @@ export {
45614
45937
  createDomainPhraseHints,
45615
45938
  createDomainLexicon,
45616
45939
  createAnthropicVoiceAssistantModel,
45940
+ createAIVoiceModel,
45617
45941
  conditionAudioChunk,
45942
+ computePcmDurationMs,
45618
45943
  completeVoiceOpsTask,
45619
45944
  compareVoiceEvalBaseline,
45620
45945
  claimVoiceOpsTask,
@@ -0,0 +1,21 @@
1
+ import type { AudioFormat } from "./types";
2
+ export type VoiceRecordingChannel = "assistant" | "user";
3
+ export type VoiceRecordingArtifact = {
4
+ audioBytes: Uint8Array;
5
+ capturedAt: number;
6
+ channel: VoiceRecordingChannel;
7
+ durationMs: number;
8
+ format: AudioFormat;
9
+ sessionId: string;
10
+ };
11
+ export type StoredVoiceRecordingArtifact = VoiceRecordingArtifact & {
12
+ recordingUrl?: string;
13
+ };
14
+ export type VoiceRecordingStore = {
15
+ get: (sessionId: string, channel: VoiceRecordingChannel) => Promise<StoredVoiceRecordingArtifact | undefined>;
16
+ list: (sessionId: string) => Promise<StoredVoiceRecordingArtifact[]>;
17
+ put: (artifact: VoiceRecordingArtifact) => Promise<StoredVoiceRecordingArtifact>;
18
+ };
19
+ export declare const encodePcmAsWav: (pcm: Uint8Array, format: AudioFormat) => Uint8Array;
20
+ export declare const computePcmDurationMs: (pcmByteLength: number, format: AudioFormat) => number;
21
+ export declare const createVoiceMemoryRecordingStore: () => VoiceRecordingStore;
@@ -5337,6 +5337,77 @@ var resolveLogger = (logger) => ({
5337
5337
  ...logger
5338
5338
  });
5339
5339
 
5340
+ // src/types.ts
5341
+ var ttsAdapterSessionCanCancel = (session) => typeof session.cancel === "function";
5342
+
5343
+ // src/recordingStore.ts
5344
+ var writeUint32LE = (view, offset, value) => {
5345
+ view.setUint32(offset, value, true);
5346
+ };
5347
+ var writeUint16LE = (view, offset, value) => {
5348
+ view.setUint16(offset, value, true);
5349
+ };
5350
+ var writeAscii = (view, offset, value) => {
5351
+ for (let index = 0;index < value.length; index += 1) {
5352
+ view.setUint8(offset + index, value.charCodeAt(index));
5353
+ }
5354
+ };
5355
+ var encodePcmAsWav = (pcm, format) => {
5356
+ if (format.container !== "raw" || format.encoding !== "pcm_s16le") {
5357
+ throw new Error(`encodePcmAsWav only supports raw pcm_s16le input (got container=${format.container}, encoding=${format.encoding})`);
5358
+ }
5359
+ const channels = format.channels;
5360
+ const sampleRate = format.sampleRateHz;
5361
+ const bitsPerSample = 16;
5362
+ const byteRate = sampleRate * channels * bitsPerSample / 8;
5363
+ const blockAlign = channels * bitsPerSample / 8;
5364
+ const dataSize = pcm.byteLength;
5365
+ const buffer = new ArrayBuffer(44 + dataSize);
5366
+ const view = new DataView(buffer);
5367
+ writeAscii(view, 0, "RIFF");
5368
+ writeUint32LE(view, 4, 36 + dataSize);
5369
+ writeAscii(view, 8, "WAVE");
5370
+ writeAscii(view, 12, "fmt ");
5371
+ writeUint32LE(view, 16, 16);
5372
+ writeUint16LE(view, 20, 1);
5373
+ writeUint16LE(view, 22, channels);
5374
+ writeUint32LE(view, 24, sampleRate);
5375
+ writeUint32LE(view, 28, byteRate);
5376
+ writeUint16LE(view, 32, blockAlign);
5377
+ writeUint16LE(view, 34, bitsPerSample);
5378
+ writeAscii(view, 36, "data");
5379
+ writeUint32LE(view, 40, dataSize);
5380
+ const output = new Uint8Array(buffer);
5381
+ output.set(pcm, 44);
5382
+ return output;
5383
+ };
5384
+ var computePcmDurationMs = (pcmByteLength, format) => {
5385
+ if (format.container !== "raw" || format.encoding !== "pcm_s16le") {
5386
+ return 0;
5387
+ }
5388
+ const bytesPerSecond = format.sampleRateHz * format.channels * 2;
5389
+ if (bytesPerSecond === 0) {
5390
+ return 0;
5391
+ }
5392
+ return Math.round(pcmByteLength / bytesPerSecond * 1000);
5393
+ };
5394
+ var createVoiceMemoryRecordingStore = () => {
5395
+ const records = new Map;
5396
+ const key = (sessionId, channel) => `${sessionId}::${channel}`;
5397
+ return {
5398
+ get: async (sessionId, channel) => records.get(key(sessionId, channel)),
5399
+ list: async (sessionId) => Array.from(records.values()).filter((record) => record.sessionId === sessionId),
5400
+ put: async (artifact) => {
5401
+ const stored = {
5402
+ ...artifact,
5403
+ recordingUrl: `memory://recording/${artifact.sessionId}/${artifact.channel}.wav`
5404
+ };
5405
+ records.set(key(artifact.sessionId, artifact.channel), stored);
5406
+ return stored;
5407
+ }
5408
+ };
5409
+ };
5410
+
5340
5411
  // src/session.ts
5341
5412
  var DEFAULT_RECONNECT_TIMEOUT = 30000;
5342
5413
  var DEFAULT_MAX_RECONNECT_ATTEMPTS2 = 10;
@@ -5579,6 +5650,39 @@ var createVoiceSession = (options) => {
5579
5650
  const currentTurnAudio = [];
5580
5651
  let fallbackAttemptsForCurrentTurn = 0;
5581
5652
  let fallbackReplayAudioMsForCurrentTurn = 0;
5653
+ const recordingConfig = options.recording;
5654
+ const recordingChannels = new Set(recordingConfig?.channels ?? ["assistant", "user"]);
5655
+ const recordingMaxBytes = recordingConfig?.maxBytesPerChannel ?? 50 * 1024 * 1024;
5656
+ const recordingBuffers = {
5657
+ assistant: [],
5658
+ user: []
5659
+ };
5660
+ const recordingByteTotals = {
5661
+ assistant: 0,
5662
+ user: 0
5663
+ };
5664
+ const recordingFormats = {};
5665
+ let recordingPersisted = false;
5666
+ const captureRecordingChunk = (channel, bytes, format) => {
5667
+ if (!recordingConfig || recordingPersisted) {
5668
+ return;
5669
+ }
5670
+ if (!recordingChannels.has(channel)) {
5671
+ return;
5672
+ }
5673
+ if (format.container !== "raw" || format.encoding !== "pcm_s16le") {
5674
+ return;
5675
+ }
5676
+ const currentTotal = recordingByteTotals[channel];
5677
+ if (currentTotal >= recordingMaxBytes) {
5678
+ return;
5679
+ }
5680
+ const remaining = recordingMaxBytes - currentTotal;
5681
+ const slice = bytes.byteLength <= remaining ? bytes : bytes.subarray(0, remaining);
5682
+ recordingBuffers[channel].push(new Uint8Array(slice));
5683
+ recordingByteTotals[channel] += slice.byteLength;
5684
+ recordingFormats[channel] = format;
5685
+ };
5582
5686
  const pruneTurnAudio = () => {
5583
5687
  const replayWindowMs = sttFallback?.replayWindowMs ?? DEFAULT_FALLBACK_REPLAY_MS;
5584
5688
  const cutoffAt = Date.now() - replayWindowMs;
@@ -5757,8 +5861,83 @@ var createVoiceSession = (options) => {
5757
5861
  });
5758
5862
  }
5759
5863
  };
5864
+ const persistRecordings = async () => {
5865
+ if (!recordingConfig || recordingPersisted) {
5866
+ return;
5867
+ }
5868
+ recordingPersisted = true;
5869
+ const channels = ["assistant", "user"];
5870
+ for (const channel of channels) {
5871
+ if (!recordingChannels.has(channel)) {
5872
+ continue;
5873
+ }
5874
+ const chunks = recordingBuffers[channel];
5875
+ const format = recordingFormats[channel];
5876
+ if (chunks.length === 0 || !format) {
5877
+ continue;
5878
+ }
5879
+ const totalBytes = recordingByteTotals[channel];
5880
+ const merged = new Uint8Array(totalBytes);
5881
+ let offset = 0;
5882
+ for (const chunk of chunks) {
5883
+ merged.set(chunk, offset);
5884
+ offset += chunk.byteLength;
5885
+ }
5886
+ try {
5887
+ const stored = await recordingConfig.store.put({
5888
+ audioBytes: merged,
5889
+ capturedAt: Date.now(),
5890
+ channel,
5891
+ durationMs: computePcmDurationMs(totalBytes, format),
5892
+ format,
5893
+ sessionId: options.id
5894
+ });
5895
+ await appendTrace({
5896
+ payload: {
5897
+ channel,
5898
+ durationMs: stored.durationMs,
5899
+ recordingUrl: stored.recordingUrl,
5900
+ sessionId: options.id,
5901
+ sizeBytes: merged.byteLength
5902
+ },
5903
+ type: "recording.ready"
5904
+ });
5905
+ } catch (error) {
5906
+ logger.warn("voice recording persist failed", {
5907
+ channel,
5908
+ error: toError(error).message,
5909
+ sessionId: options.id
5910
+ });
5911
+ } finally {
5912
+ recordingBuffers[channel] = [];
5913
+ recordingByteTotals[channel] = 0;
5914
+ }
5915
+ }
5916
+ };
5917
+ const cancelActiveTTS = async (reason) => {
5918
+ const activeSession = ttsSession;
5919
+ const cancelledTurnId = activeTTSTurnId;
5920
+ if (!activeSession || cancelledTurnId === undefined) {
5921
+ return;
5922
+ }
5923
+ activeTTSTurnId = undefined;
5924
+ if (!ttsAdapterSessionCanCancel(activeSession)) {
5925
+ return;
5926
+ }
5927
+ try {
5928
+ await activeSession.cancel(reason);
5929
+ } catch (error) {
5930
+ logger.warn("voice tts adapter cancel failed", {
5931
+ error: toError(error).message,
5932
+ reason,
5933
+ sessionId: options.id,
5934
+ turnId: cancelledTurnId
5935
+ });
5936
+ }
5937
+ };
5760
5938
  const sendAssistantAudio = async (chunk, input) => {
5761
5939
  const normalizedChunk = chunk instanceof Uint8Array ? new Uint8Array(chunk) : chunk instanceof ArrayBuffer ? new Uint8Array(chunk.slice(0)) : new Uint8Array(chunk.buffer.slice(chunk.byteOffset, chunk.byteOffset + chunk.byteLength));
5940
+ captureRecordingChunk("assistant", normalizedChunk, input.format);
5762
5941
  await send({
5763
5942
  chunkBase64: encodeBase64(normalizedChunk),
5764
5943
  format: input.format,
@@ -5855,6 +6034,7 @@ var createVoiceSession = (options) => {
5855
6034
  });
5856
6035
  await closeTTSSession("failed");
5857
6036
  await closeAdapter("failed");
6037
+ await persistRecordings();
5858
6038
  speechDetected = false;
5859
6039
  rewindFallbackTurnAudio();
5860
6040
  await options.route.onError?.({
@@ -5923,6 +6103,7 @@ var createVoiceSession = (options) => {
5923
6103
  });
5924
6104
  await closeTTSSession("complete");
5925
6105
  await closeAdapter("complete");
6106
+ await persistRecordings();
5926
6107
  speechDetected = false;
5927
6108
  rewindFallbackTurnAudio();
5928
6109
  if (disposition === "transferred" && input.target) {
@@ -7064,7 +7245,14 @@ var createVoiceSession = (options) => {
7064
7245
  if (shouldStoreAudio) {
7065
7246
  pushTurnAudio(conditionedAudio);
7066
7247
  }
7248
+ if (recordingConfig?.userInputFormat) {
7249
+ const userBytes = conditionedAudio instanceof Uint8Array ? conditionedAudio : conditionedAudio instanceof ArrayBuffer ? new Uint8Array(conditionedAudio) : new Uint8Array(conditionedAudio.buffer, conditionedAudio.byteOffset, conditionedAudio.byteLength);
7250
+ captureRecordingChunk("user", userBytes, recordingConfig.userInputFormat);
7251
+ }
7067
7252
  if (audioLevel >= turnDetection.speechThreshold) {
7253
+ if (!speechDetected && activeTTSTurnId !== undefined) {
7254
+ cancelActiveTTS("barge-in");
7255
+ }
7068
7256
  speechDetected = true;
7069
7257
  clearSilenceTimer();
7070
7258
  } else if (speechDetected) {
@@ -7097,6 +7285,7 @@ var createVoiceSession = (options) => {
7097
7285
  clearSilenceTimer();
7098
7286
  await closeTTSSession(reason);
7099
7287
  await closeAdapter(reason);
7288
+ await persistRecordings();
7100
7289
  await Promise.resolve(socket.close(1000, reason));
7101
7290
  if (session.call?.endedAt && session.call.disposition === "closed") {
7102
7291
  await appendTrace({
@@ -13154,8 +13343,9 @@ var runTTSAdapterFixture = async (adapter, fixture, options = {}) => {
13154
13343
  sessionId: `tts-benchmark:${fixture.id}`,
13155
13344
  ...openOptions ?? {}
13156
13345
  });
13346
+ const sessionOn = session.on;
13157
13347
  const unsubscribers = [
13158
- session.on("audio", ({ chunk, format, receivedAt }) => {
13348
+ sessionOn("audio", ({ chunk, format, receivedAt }) => {
13159
13349
  const normalizedChunk = chunk instanceof Uint8Array ? chunk : chunk instanceof ArrayBuffer ? new Uint8Array(chunk) : new Uint8Array(chunk.buffer, chunk.byteOffset, chunk.byteLength);
13160
13350
  audioChunkCount += 1;
13161
13351
  totalAudioBytes += normalizedChunk.byteLength;
@@ -13175,10 +13365,10 @@ var runTTSAdapterFixture = async (adapter, fixture, options = {}) => {
13175
13365
  }, options.interruptAfterFirstAudioMs);
13176
13366
  }
13177
13367
  }),
13178
- session.on("error", () => {
13368
+ sessionOn("error", () => {
13179
13369
  errorCount += 1;
13180
13370
  }),
13181
- session.on("close", () => {
13371
+ sessionOn("close", () => {
13182
13372
  closeCount += 1;
13183
13373
  closed = true;
13184
13374
  closedAt = Date.now();
package/dist/trace.d.ts CHANGED
@@ -1,5 +1,5 @@
1
1
  import type { S3Client, S3Options } from "bun";
2
- export type VoiceTraceEventType = "assistant.guardrail" | "assistant.memory" | "assistant.run" | "agent.context" | "agent.handoff" | "agent.model" | "agent.result" | "agent.tool" | "call.handoff" | "call.lifecycle" | "client.barge_in" | "client.browser_media" | "client.live_latency" | "client.reconnect" | "client.telephony_media" | "operator.action" | "provider.decision" | "session.error" | "turn.assistant" | "turn.committed" | "turn.cost" | "turn_latency.stage" | "turn.transcript" | "workflow.contract";
2
+ export type VoiceTraceEventType = "assistant.guardrail" | "assistant.memory" | "assistant.run" | "agent.context" | "agent.handoff" | "agent.model" | "agent.result" | "agent.tool" | "call.handoff" | "call.lifecycle" | "client.barge_in" | "client.browser_media" | "client.live_latency" | "client.reconnect" | "client.telephony_media" | "operator.action" | "provider.decision" | "recording.ready" | "session.error" | "turn.assistant" | "turn.committed" | "turn.cost" | "turn_latency.stage" | "turn.transcript" | "workflow.contract";
3
3
  export type VoiceTraceEvent<TPayload extends Record<string, unknown> = Record<string, unknown>> = {
4
4
  at: number;
5
5
  id?: string;
package/dist/types.d.ts CHANGED
@@ -166,8 +166,12 @@ export type TTSSessionEventMap = {
166
166
  export type TTSAdapterSession = {
167
167
  on: <K extends keyof TTSSessionEventMap>(event: K, handler: (payload: TTSSessionEventMap[K]) => void | Promise<void>) => () => void;
168
168
  send: (text: string) => Promise<void>;
169
+ cancel?: (reason?: string) => Promise<void>;
169
170
  close: (reason?: string) => Promise<void>;
170
171
  };
172
+ export declare const ttsAdapterSessionCanCancel: (session: TTSAdapterSession) => session is TTSAdapterSession & {
173
+ cancel: (reason?: string) => Promise<void>;
174
+ };
171
175
  export type TTSAdapterOpenOptions = {
172
176
  sessionId: string;
173
177
  lexicon?: VoiceLexiconEntry[];
@@ -697,6 +701,12 @@ export type VoicePluginConfig<TContext = unknown, TSession extends VoiceSessionR
697
701
  profileSwitchGuard?: VoicePluginProfileSwitchGuardConfig<TContext, TSession, TResult>;
698
702
  trace?: VoiceTraceEventStore;
699
703
  } & VoiceRouteConfig<TContext, TSession, TResult>;
704
+ export type VoiceSessionRecordingConfig = {
705
+ channels?: ReadonlyArray<"assistant" | "user">;
706
+ maxBytesPerChannel?: number;
707
+ store: import("./recordingStore").VoiceRecordingStore;
708
+ userInputFormat?: AudioFormat;
709
+ };
700
710
  export type CreateVoiceSessionOptions<TContext = unknown, TSession extends VoiceSessionRecord = VoiceSessionRecord, TResult = unknown> = {
701
711
  costTelemetry?: VoiceCostTelemetryConfig<TContext, TSession, TResult>;
702
712
  id: string;
@@ -711,6 +721,7 @@ export type CreateVoiceSessionOptions<TContext = unknown, TSession extends Voice
711
721
  sttFallback?: VoiceResolvedSTTFallbackConfig;
712
722
  store: VoiceSessionStore<TSession>;
713
723
  trace?: VoiceTraceEventStore;
724
+ recording?: VoiceSessionRecordingConfig;
714
725
  reconnect: Required<VoiceReconnectConfig>;
715
726
  phraseHints?: VoicePhraseHint[];
716
727
  sessionMetadata?: Record<string, unknown>;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@absolutejs/voice",
3
- "version": "0.0.22-beta.475",
3
+ "version": "0.0.22-beta.477",
4
4
  "description": "Voice primitives and Elysia plugin for AbsoluteJS",
5
5
  "repository": {
6
6
  "type": "git",
@@ -160,7 +160,7 @@
160
160
  "bench:stt": "bun run ./scripts/benchmark-stt.ts all",
161
161
  "bench:assemblyai:sessions": "bun run ./scripts/benchmark-session.ts assemblyai",
162
162
  "bench:openai:sessions": "bun run ./scripts/benchmark-session.ts openai",
163
- "build": "bun run ./scripts/build-htmx-bootstrap-asset.ts && rm -rf dist && bun build ./src/index.ts ./src/client/index.ts ./src/react/index.ts ./src/vue/index.ts ./src/svelte/index.ts ./src/angular/index.ts ./src/testing/index.ts --outdir dist --target bun --external elysia --external react --external vue --external @angular/core --external @absolutejs/absolute --external @absolutejs/media && bun build ./src/client/htmxBootstrap.ts --outdir dist/client --target browser --format esm && tsc --emitDeclarationOnly --project tsconfig.json",
163
+ "build": "bun run ./scripts/build-htmx-bootstrap-asset.ts && rm -rf dist && bun build ./src/index.ts ./src/client/index.ts ./src/react/index.ts ./src/vue/index.ts ./src/svelte/index.ts ./src/angular/index.ts ./src/testing/index.ts --outdir dist --target bun --external elysia --external react --external vue --external @angular/core --external @absolutejs/absolute --external @absolutejs/ai --external @absolutejs/media && bun build ./src/client/htmxBootstrap.ts --outdir dist/client --target browser --format esm && tsc --emitDeclarationOnly --project tsconfig.json",
164
164
  "format": "prettier --write \"./**/*.{js,jsx,ts,tsx,json,md}\"",
165
165
  "lint": "eslint ./src",
166
166
  "release": "bun run format && bun run build && bun publish",
@@ -229,12 +229,16 @@
229
229
  },
230
230
  "peerDependencies": {
231
231
  "@absolutejs/absolute": ">=0.19.0-beta.646",
232
+ "@absolutejs/ai": ">=0.0.5",
232
233
  "@angular/core": ">=21.0.0",
233
234
  "elysia": ">=1.4.18",
234
235
  "react": ">=19.0.0",
235
236
  "vue": ">=3.5.0"
236
237
  },
237
238
  "peerDependenciesMeta": {
239
+ "@absolutejs/ai": {
240
+ "optional": true
241
+ },
238
242
  "@angular/core": {
239
243
  "optional": true
240
244
  },
@@ -250,6 +254,7 @@
250
254
  },
251
255
  "devDependencies": {
252
256
  "@absolutejs/absolute": "0.19.0-beta.646",
257
+ "@absolutejs/ai": "0.0.5",
253
258
  "@angular/core": "^21.0.0",
254
259
  "@types/bun": "1.3.9",
255
260
  "@types/react": "19.2.0",