@absolutejs/voice 0.0.22-beta.476 → 0.0.22-beta.477

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -7,6 +7,7 @@ import { type StoredVoiceTraceEvent, type VoiceTraceSinkDeliveryRecord, type Voi
7
7
  import type { StoredVoiceIntegrationEvent, StoredVoiceExternalObjectMap, StoredVoiceOpsTask, VoiceExternalObjectMap, VoiceExternalObjectMapStore, VoiceIntegrationEvent, VoiceIntegrationEventStore, VoiceOpsTask, VoiceOpsTaskStore } from "./ops";
8
8
  import type { StoredVoiceCallReviewArtifact, VoiceCallReviewArtifact, VoiceCallReviewStore } from "./testing/review";
9
9
  import type { VoiceSessionRecord, VoiceSessionStore } from "./types";
10
+ import type { VoiceRecordingStore } from "./recordingStore";
10
11
  export type VoiceFileStoreOptions = {
11
12
  directory: string;
12
13
  pretty?: boolean;
@@ -50,3 +51,4 @@ export declare const createStoredVoiceIntegrationEvent: <TEvent extends Omit<Voi
50
51
  export declare const createStoredVoiceExternalObjectMap: <TMapping extends Omit<VoiceExternalObjectMap, "id" | "createdAt" | "updatedAt"> = Omit<VoiceExternalObjectMap, "id" | "createdAt" | "updatedAt">>(mapping: TMapping & {
51
52
  at?: number;
52
53
  }) => VoiceExternalObjectMap;
54
+ export declare const createVoiceFileRecordingStore: (options: VoiceFileStoreOptions) => VoiceRecordingStore;
package/dist/index.d.ts CHANGED
@@ -87,7 +87,9 @@ export { createVoiceTurnQualityHTMLHandler, createVoiceTurnQualityJSONHandler, c
87
87
  export { assertVoiceOutcomeContractEvidence, createVoiceOutcomeContractHTMLHandler, createVoiceOutcomeContractJSONHandler, createVoiceOutcomeContractRoutes, evaluateVoiceOutcomeContractEvidence, renderVoiceOutcomeContractHTML, runVoiceOutcomeContractSuite, } from "./outcomeContract";
88
88
  export { applyVoiceTelephonyOutcome, assertVoiceTelephonyWebhookNormalizationEvidence, createMemoryVoiceTelephonyWebhookIdempotencyStore, createVoiceTelephonyOutcomePolicy, createVoiceTelephonyWebhookHandler, createVoiceTelephonyWebhookRoutes, evaluateVoiceTelephonyWebhookNormalizationEvidence, parseVoiceTelephonyWebhookEvent, resolveVoiceTelephonyOutcome, signVoiceTwilioWebhook, verifyVoiceTwilioWebhookSignature, voiceTelephonyOutcomeToRouteResult, } from "./telephonyOutcome";
89
89
  export { assertVoicePhoneCallControlEvidence, assertVoicePhoneAssistantEvidence, createVoicePhoneAgent, evaluateVoicePhoneCallControlEvidence, evaluateVoicePhoneAssistantEvidence, } from "./phoneAgent";
90
- export { createStoredVoiceCallReviewArtifact, createStoredVoiceExternalObjectMap, createStoredVoiceIntegrationEvent, createStoredVoiceOpsTask, createVoiceFileIncidentBundleStore, createVoiceFileExternalObjectMapStore, createVoiceFileAssistantMemoryStore, createVoiceFileAuditEventStore, createVoiceFileAuditSinkDeliveryStore, createVoiceFileCampaignStore, createVoiceFileIntegrationEventStore, createVoiceFileReviewStore, createVoiceFileRuntimeStorage, createVoiceFileSessionStore, createVoiceFileTaskStore, createVoiceFileTraceSinkDeliveryStore, createVoiceFileTraceEventStore, } from "./fileStore";
90
+ export { createStoredVoiceCallReviewArtifact, createStoredVoiceExternalObjectMap, createStoredVoiceIntegrationEvent, createStoredVoiceOpsTask, createVoiceFileIncidentBundleStore, createVoiceFileExternalObjectMapStore, createVoiceFileAssistantMemoryStore, createVoiceFileAuditEventStore, createVoiceFileAuditSinkDeliveryStore, createVoiceFileCampaignStore, createVoiceFileIntegrationEventStore, createVoiceFileRecordingStore, createVoiceFileReviewStore, createVoiceFileRuntimeStorage, createVoiceFileSessionStore, createVoiceFileTaskStore, createVoiceFileTraceSinkDeliveryStore, createVoiceFileTraceEventStore, } from "./fileStore";
91
+ export { computePcmDurationMs, createVoiceMemoryRecordingStore, encodePcmAsWav, } from "./recordingStore";
92
+ export type { StoredVoiceRecordingArtifact, VoiceRecordingArtifact, VoiceRecordingChannel, VoiceRecordingStore, } from "./recordingStore";
91
93
  export { createVoiceAssistantMemoryHandle, createVoiceAssistantMemoryRecord, createVoiceMemoryAssistantMemoryStore, resolveVoiceAssistantMemoryNamespace, } from "./assistantMemory";
92
94
  export { createAnthropicVoiceAssistantModel, createGeminiVoiceAssistantModel, createJSONVoiceAssistantModel, createOpenAIVoiceAssistantModel, createVoiceProviderOrchestrationProfile, resolveVoiceProviderRoutingPolicyPreset, createVoiceProviderRouter, } from "./modelAdapters";
93
95
  export { createOpenAIVoiceTTS } from "./openaiTTS";
package/dist/index.js CHANGED
@@ -3372,6 +3372,74 @@ var buildTurnText = (transcripts, partialText, options = {}) => {
3372
3372
  // src/types.ts
3373
3373
  var ttsAdapterSessionCanCancel = (session) => typeof session.cancel === "function";
3374
3374
 
3375
+ // src/recordingStore.ts
3376
+ var writeUint32LE = (view, offset, value) => {
3377
+ view.setUint32(offset, value, true);
3378
+ };
3379
+ var writeUint16LE = (view, offset, value) => {
3380
+ view.setUint16(offset, value, true);
3381
+ };
3382
+ var writeAscii = (view, offset, value) => {
3383
+ for (let index = 0;index < value.length; index += 1) {
3384
+ view.setUint8(offset + index, value.charCodeAt(index));
3385
+ }
3386
+ };
3387
+ var encodePcmAsWav = (pcm, format) => {
3388
+ if (format.container !== "raw" || format.encoding !== "pcm_s16le") {
3389
+ throw new Error(`encodePcmAsWav only supports raw pcm_s16le input (got container=${format.container}, encoding=${format.encoding})`);
3390
+ }
3391
+ const channels = format.channels;
3392
+ const sampleRate = format.sampleRateHz;
3393
+ const bitsPerSample = 16;
3394
+ const byteRate = sampleRate * channels * bitsPerSample / 8;
3395
+ const blockAlign = channels * bitsPerSample / 8;
3396
+ const dataSize = pcm.byteLength;
3397
+ const buffer = new ArrayBuffer(44 + dataSize);
3398
+ const view = new DataView(buffer);
3399
+ writeAscii(view, 0, "RIFF");
3400
+ writeUint32LE(view, 4, 36 + dataSize);
3401
+ writeAscii(view, 8, "WAVE");
3402
+ writeAscii(view, 12, "fmt ");
3403
+ writeUint32LE(view, 16, 16);
3404
+ writeUint16LE(view, 20, 1);
3405
+ writeUint16LE(view, 22, channels);
3406
+ writeUint32LE(view, 24, sampleRate);
3407
+ writeUint32LE(view, 28, byteRate);
3408
+ writeUint16LE(view, 32, blockAlign);
3409
+ writeUint16LE(view, 34, bitsPerSample);
3410
+ writeAscii(view, 36, "data");
3411
+ writeUint32LE(view, 40, dataSize);
3412
+ const output = new Uint8Array(buffer);
3413
+ output.set(pcm, 44);
3414
+ return output;
3415
+ };
3416
+ var computePcmDurationMs = (pcmByteLength, format) => {
3417
+ if (format.container !== "raw" || format.encoding !== "pcm_s16le") {
3418
+ return 0;
3419
+ }
3420
+ const bytesPerSecond = format.sampleRateHz * format.channels * 2;
3421
+ if (bytesPerSecond === 0) {
3422
+ return 0;
3423
+ }
3424
+ return Math.round(pcmByteLength / bytesPerSecond * 1000);
3425
+ };
3426
+ var createVoiceMemoryRecordingStore = () => {
3427
+ const records = new Map;
3428
+ const key = (sessionId, channel) => `${sessionId}::${channel}`;
3429
+ return {
3430
+ get: async (sessionId, channel) => records.get(key(sessionId, channel)),
3431
+ list: async (sessionId) => Array.from(records.values()).filter((record) => record.sessionId === sessionId),
3432
+ put: async (artifact) => {
3433
+ const stored = {
3434
+ ...artifact,
3435
+ recordingUrl: `memory://recording/${artifact.sessionId}/${artifact.channel}.wav`
3436
+ };
3437
+ records.set(key(artifact.sessionId, artifact.channel), stored);
3438
+ return stored;
3439
+ }
3440
+ };
3441
+ };
3442
+
3375
3443
  // src/session.ts
3376
3444
  var DEFAULT_RECONNECT_TIMEOUT = 30000;
3377
3445
  var DEFAULT_MAX_RECONNECT_ATTEMPTS = 10;
@@ -3614,6 +3682,39 @@ var createVoiceSession = (options) => {
3614
3682
  const currentTurnAudio = [];
3615
3683
  let fallbackAttemptsForCurrentTurn = 0;
3616
3684
  let fallbackReplayAudioMsForCurrentTurn = 0;
3685
+ const recordingConfig = options.recording;
3686
+ const recordingChannels = new Set(recordingConfig?.channels ?? ["assistant", "user"]);
3687
+ const recordingMaxBytes = recordingConfig?.maxBytesPerChannel ?? 50 * 1024 * 1024;
3688
+ const recordingBuffers = {
3689
+ assistant: [],
3690
+ user: []
3691
+ };
3692
+ const recordingByteTotals = {
3693
+ assistant: 0,
3694
+ user: 0
3695
+ };
3696
+ const recordingFormats = {};
3697
+ let recordingPersisted = false;
3698
+ const captureRecordingChunk = (channel, bytes, format) => {
3699
+ if (!recordingConfig || recordingPersisted) {
3700
+ return;
3701
+ }
3702
+ if (!recordingChannels.has(channel)) {
3703
+ return;
3704
+ }
3705
+ if (format.container !== "raw" || format.encoding !== "pcm_s16le") {
3706
+ return;
3707
+ }
3708
+ const currentTotal = recordingByteTotals[channel];
3709
+ if (currentTotal >= recordingMaxBytes) {
3710
+ return;
3711
+ }
3712
+ const remaining = recordingMaxBytes - currentTotal;
3713
+ const slice = bytes.byteLength <= remaining ? bytes : bytes.subarray(0, remaining);
3714
+ recordingBuffers[channel].push(new Uint8Array(slice));
3715
+ recordingByteTotals[channel] += slice.byteLength;
3716
+ recordingFormats[channel] = format;
3717
+ };
3617
3718
  const pruneTurnAudio = () => {
3618
3719
  const replayWindowMs = sttFallback?.replayWindowMs ?? DEFAULT_FALLBACK_REPLAY_MS;
3619
3720
  const cutoffAt = Date.now() - replayWindowMs;
@@ -3792,6 +3893,59 @@ var createVoiceSession = (options) => {
3792
3893
  });
3793
3894
  }
3794
3895
  };
3896
+ const persistRecordings = async () => {
3897
+ if (!recordingConfig || recordingPersisted) {
3898
+ return;
3899
+ }
3900
+ recordingPersisted = true;
3901
+ const channels = ["assistant", "user"];
3902
+ for (const channel of channels) {
3903
+ if (!recordingChannels.has(channel)) {
3904
+ continue;
3905
+ }
3906
+ const chunks = recordingBuffers[channel];
3907
+ const format = recordingFormats[channel];
3908
+ if (chunks.length === 0 || !format) {
3909
+ continue;
3910
+ }
3911
+ const totalBytes = recordingByteTotals[channel];
3912
+ const merged = new Uint8Array(totalBytes);
3913
+ let offset = 0;
3914
+ for (const chunk of chunks) {
3915
+ merged.set(chunk, offset);
3916
+ offset += chunk.byteLength;
3917
+ }
3918
+ try {
3919
+ const stored = await recordingConfig.store.put({
3920
+ audioBytes: merged,
3921
+ capturedAt: Date.now(),
3922
+ channel,
3923
+ durationMs: computePcmDurationMs(totalBytes, format),
3924
+ format,
3925
+ sessionId: options.id
3926
+ });
3927
+ await appendTrace({
3928
+ payload: {
3929
+ channel,
3930
+ durationMs: stored.durationMs,
3931
+ recordingUrl: stored.recordingUrl,
3932
+ sessionId: options.id,
3933
+ sizeBytes: merged.byteLength
3934
+ },
3935
+ type: "recording.ready"
3936
+ });
3937
+ } catch (error) {
3938
+ logger.warn("voice recording persist failed", {
3939
+ channel,
3940
+ error: toError(error).message,
3941
+ sessionId: options.id
3942
+ });
3943
+ } finally {
3944
+ recordingBuffers[channel] = [];
3945
+ recordingByteTotals[channel] = 0;
3946
+ }
3947
+ }
3948
+ };
3795
3949
  const cancelActiveTTS = async (reason) => {
3796
3950
  const activeSession = ttsSession;
3797
3951
  const cancelledTurnId = activeTTSTurnId;
@@ -3815,6 +3969,7 @@ var createVoiceSession = (options) => {
3815
3969
  };
3816
3970
  const sendAssistantAudio = async (chunk, input) => {
3817
3971
  const normalizedChunk = chunk instanceof Uint8Array ? new Uint8Array(chunk) : chunk instanceof ArrayBuffer ? new Uint8Array(chunk.slice(0)) : new Uint8Array(chunk.buffer.slice(chunk.byteOffset, chunk.byteOffset + chunk.byteLength));
3972
+ captureRecordingChunk("assistant", normalizedChunk, input.format);
3818
3973
  await send({
3819
3974
  chunkBase64: encodeBase64(normalizedChunk),
3820
3975
  format: input.format,
@@ -3911,6 +4066,7 @@ var createVoiceSession = (options) => {
3911
4066
  });
3912
4067
  await closeTTSSession("failed");
3913
4068
  await closeAdapter("failed");
4069
+ await persistRecordings();
3914
4070
  speechDetected = false;
3915
4071
  rewindFallbackTurnAudio();
3916
4072
  await options.route.onError?.({
@@ -3979,6 +4135,7 @@ var createVoiceSession = (options) => {
3979
4135
  });
3980
4136
  await closeTTSSession("complete");
3981
4137
  await closeAdapter("complete");
4138
+ await persistRecordings();
3982
4139
  speechDetected = false;
3983
4140
  rewindFallbackTurnAudio();
3984
4141
  if (disposition === "transferred" && input.target) {
@@ -5120,6 +5277,10 @@ var createVoiceSession = (options) => {
5120
5277
  if (shouldStoreAudio) {
5121
5278
  pushTurnAudio(conditionedAudio);
5122
5279
  }
5280
+ if (recordingConfig?.userInputFormat) {
5281
+ const userBytes = conditionedAudio instanceof Uint8Array ? conditionedAudio : conditionedAudio instanceof ArrayBuffer ? new Uint8Array(conditionedAudio) : new Uint8Array(conditionedAudio.buffer, conditionedAudio.byteOffset, conditionedAudio.byteLength);
5282
+ captureRecordingChunk("user", userBytes, recordingConfig.userInputFormat);
5283
+ }
5123
5284
  if (audioLevel >= turnDetection.speechThreshold) {
5124
5285
  if (!speechDetected && activeTTSTurnId !== undefined) {
5125
5286
  cancelActiveTTS("barge-in");
@@ -5156,6 +5317,7 @@ var createVoiceSession = (options) => {
5156
5317
  clearSilenceTimer();
5157
5318
  await closeTTSSession(reason);
5158
5319
  await closeAdapter(reason);
5320
+ await persistRecordings();
5159
5321
  await Promise.resolve(socket.close(1000, reason));
5160
5322
  if (session.call?.endedAt && session.call.disposition === "closed") {
5161
5323
  await appendTrace({
@@ -37101,6 +37263,66 @@ var createStoredVoiceExternalObjectMap = (mapping) => createVoiceExternalObjectM
37101
37263
  sourceId: mapping.sourceId,
37102
37264
  sourceType: mapping.sourceType
37103
37265
  });
37266
+ var recordingFileName = (sessionId, channel) => `${encodeURIComponent(sessionId)}_${channel}.wav`;
37267
+ var recordingMetadataFileName = (sessionId, channel) => `${encodeURIComponent(sessionId)}_${channel}.json`;
37268
+ var createVoiceFileRecordingStore = (options) => {
37269
+ const ensureDir = async () => {
37270
+ await mkdir4(options.directory, { recursive: true });
37271
+ };
37272
+ const put = async (artifact) => {
37273
+ await ensureDir();
37274
+ const wavPath = join3(options.directory, recordingFileName(artifact.sessionId, artifact.channel));
37275
+ const metadataPath = join3(options.directory, recordingMetadataFileName(artifact.sessionId, artifact.channel));
37276
+ const wav = encodePcmAsWav(artifact.audioBytes, artifact.format);
37277
+ await writeFile(wavPath, wav);
37278
+ const recordingUrl = `file://${wavPath}`;
37279
+ const metadata = {
37280
+ capturedAt: artifact.capturedAt,
37281
+ channel: artifact.channel,
37282
+ durationMs: artifact.durationMs,
37283
+ format: artifact.format,
37284
+ recordingUrl,
37285
+ sessionId: artifact.sessionId
37286
+ };
37287
+ await writeFile(metadataPath, options.pretty ? JSON.stringify(metadata, null, 2) : JSON.stringify(metadata));
37288
+ return {
37289
+ ...artifact,
37290
+ recordingUrl
37291
+ };
37292
+ };
37293
+ const readMetadata = async (sessionId, channel) => {
37294
+ const metadataPath = join3(options.directory, recordingMetadataFileName(sessionId, channel));
37295
+ const wavPath = join3(options.directory, recordingFileName(sessionId, channel));
37296
+ try {
37297
+ const [metaText, wavBytes] = await Promise.all([
37298
+ readFile2(metadataPath, "utf8"),
37299
+ readFile2(wavPath)
37300
+ ]);
37301
+ const meta = JSON.parse(metaText);
37302
+ return {
37303
+ audioBytes: new Uint8Array(wavBytes.buffer, wavBytes.byteOffset, wavBytes.byteLength),
37304
+ capturedAt: meta.capturedAt,
37305
+ channel: meta.channel,
37306
+ durationMs: meta.durationMs,
37307
+ format: meta.format,
37308
+ recordingUrl: meta.recordingUrl,
37309
+ sessionId: meta.sessionId
37310
+ };
37311
+ } catch (error) {
37312
+ if (error.code === "ENOENT") {
37313
+ return;
37314
+ }
37315
+ throw error;
37316
+ }
37317
+ };
37318
+ const get = (sessionId, channel) => readMetadata(sessionId, channel);
37319
+ const list = async (sessionId) => {
37320
+ const channels = ["assistant", "user"];
37321
+ const records = await Promise.all(channels.map((channel) => readMetadata(sessionId, channel)));
37322
+ return records.filter((record) => record !== undefined);
37323
+ };
37324
+ return { get, list, put };
37325
+ };
37104
37326
  // src/modelAdapters.ts
37105
37327
  var isVoiceProviderRoutingPolicyPreset = (value) => value === "balanced" || value === "cost-cap" || value === "cost-first" || value === "latency-first" || value === "quality-first";
37106
37328
  var resolveVoiceProviderRoutingPolicyPreset = (preset, options = {}) => {
@@ -45361,6 +45583,7 @@ export {
45361
45583
  evaluateVoiceBrowserCallProfileEvidence,
45362
45584
  evaluateVoiceAgentSquadContractEvidence,
45363
45585
  encodeTwilioMulawBase64,
45586
+ encodePcmAsWav,
45364
45587
  deliverVoiceTraceEventsToSinks,
45365
45588
  deliverVoiceObservabilityExport,
45366
45589
  deliverVoiceMonitorIssueNotifications,
@@ -45574,6 +45797,7 @@ export {
45574
45797
  createVoiceMemoryTraceSinkDeliveryStore,
45575
45798
  createVoiceMemoryTraceEventStore,
45576
45799
  createVoiceMemoryStore,
45800
+ createVoiceMemoryRecordingStore,
45577
45801
  createVoiceMemoryObservabilityExportDeliveryReceiptStore,
45578
45802
  createVoiceMemoryMonitorNotifierDeliveryReceiptStore,
45579
45803
  createVoiceMemoryMonitorIssueStore,
@@ -45620,6 +45844,7 @@ export {
45620
45844
  createVoiceFileScenarioFixtureStore,
45621
45845
  createVoiceFileRuntimeStorage,
45622
45846
  createVoiceFileReviewStore,
45847
+ createVoiceFileRecordingStore,
45623
45848
  createVoiceFileObservabilityExportDeliveryReceiptStore,
45624
45849
  createVoiceFileIntegrationEventStore,
45625
45850
  createVoiceFileIncidentBundleStore,
@@ -45714,6 +45939,7 @@ export {
45714
45939
  createAnthropicVoiceAssistantModel,
45715
45940
  createAIVoiceModel,
45716
45941
  conditionAudioChunk,
45942
+ computePcmDurationMs,
45717
45943
  completeVoiceOpsTask,
45718
45944
  compareVoiceEvalBaseline,
45719
45945
  claimVoiceOpsTask,
@@ -0,0 +1,21 @@
1
+ import type { AudioFormat } from "./types";
2
+ export type VoiceRecordingChannel = "assistant" | "user";
3
+ export type VoiceRecordingArtifact = {
4
+ audioBytes: Uint8Array;
5
+ capturedAt: number;
6
+ channel: VoiceRecordingChannel;
7
+ durationMs: number;
8
+ format: AudioFormat;
9
+ sessionId: string;
10
+ };
11
+ export type StoredVoiceRecordingArtifact = VoiceRecordingArtifact & {
12
+ recordingUrl?: string;
13
+ };
14
+ export type VoiceRecordingStore = {
15
+ get: (sessionId: string, channel: VoiceRecordingChannel) => Promise<StoredVoiceRecordingArtifact | undefined>;
16
+ list: (sessionId: string) => Promise<StoredVoiceRecordingArtifact[]>;
17
+ put: (artifact: VoiceRecordingArtifact) => Promise<StoredVoiceRecordingArtifact>;
18
+ };
19
+ export declare const encodePcmAsWav: (pcm: Uint8Array, format: AudioFormat) => Uint8Array;
20
+ export declare const computePcmDurationMs: (pcmByteLength: number, format: AudioFormat) => number;
21
+ export declare const createVoiceMemoryRecordingStore: () => VoiceRecordingStore;
@@ -5340,6 +5340,74 @@ var resolveLogger = (logger) => ({
5340
5340
  // src/types.ts
5341
5341
  var ttsAdapterSessionCanCancel = (session) => typeof session.cancel === "function";
5342
5342
 
5343
+ // src/recordingStore.ts
5344
+ var writeUint32LE = (view, offset, value) => {
5345
+ view.setUint32(offset, value, true);
5346
+ };
5347
+ var writeUint16LE = (view, offset, value) => {
5348
+ view.setUint16(offset, value, true);
5349
+ };
5350
+ var writeAscii = (view, offset, value) => {
5351
+ for (let index = 0;index < value.length; index += 1) {
5352
+ view.setUint8(offset + index, value.charCodeAt(index));
5353
+ }
5354
+ };
5355
+ var encodePcmAsWav = (pcm, format) => {
5356
+ if (format.container !== "raw" || format.encoding !== "pcm_s16le") {
5357
+ throw new Error(`encodePcmAsWav only supports raw pcm_s16le input (got container=${format.container}, encoding=${format.encoding})`);
5358
+ }
5359
+ const channels = format.channels;
5360
+ const sampleRate = format.sampleRateHz;
5361
+ const bitsPerSample = 16;
5362
+ const byteRate = sampleRate * channels * bitsPerSample / 8;
5363
+ const blockAlign = channels * bitsPerSample / 8;
5364
+ const dataSize = pcm.byteLength;
5365
+ const buffer = new ArrayBuffer(44 + dataSize);
5366
+ const view = new DataView(buffer);
5367
+ writeAscii(view, 0, "RIFF");
5368
+ writeUint32LE(view, 4, 36 + dataSize);
5369
+ writeAscii(view, 8, "WAVE");
5370
+ writeAscii(view, 12, "fmt ");
5371
+ writeUint32LE(view, 16, 16);
5372
+ writeUint16LE(view, 20, 1);
5373
+ writeUint16LE(view, 22, channels);
5374
+ writeUint32LE(view, 24, sampleRate);
5375
+ writeUint32LE(view, 28, byteRate);
5376
+ writeUint16LE(view, 32, blockAlign);
5377
+ writeUint16LE(view, 34, bitsPerSample);
5378
+ writeAscii(view, 36, "data");
5379
+ writeUint32LE(view, 40, dataSize);
5380
+ const output = new Uint8Array(buffer);
5381
+ output.set(pcm, 44);
5382
+ return output;
5383
+ };
5384
+ var computePcmDurationMs = (pcmByteLength, format) => {
5385
+ if (format.container !== "raw" || format.encoding !== "pcm_s16le") {
5386
+ return 0;
5387
+ }
5388
+ const bytesPerSecond = format.sampleRateHz * format.channels * 2;
5389
+ if (bytesPerSecond === 0) {
5390
+ return 0;
5391
+ }
5392
+ return Math.round(pcmByteLength / bytesPerSecond * 1000);
5393
+ };
5394
+ var createVoiceMemoryRecordingStore = () => {
5395
+ const records = new Map;
5396
+ const key = (sessionId, channel) => `${sessionId}::${channel}`;
5397
+ return {
5398
+ get: async (sessionId, channel) => records.get(key(sessionId, channel)),
5399
+ list: async (sessionId) => Array.from(records.values()).filter((record) => record.sessionId === sessionId),
5400
+ put: async (artifact) => {
5401
+ const stored = {
5402
+ ...artifact,
5403
+ recordingUrl: `memory://recording/${artifact.sessionId}/${artifact.channel}.wav`
5404
+ };
5405
+ records.set(key(artifact.sessionId, artifact.channel), stored);
5406
+ return stored;
5407
+ }
5408
+ };
5409
+ };
5410
+
5343
5411
  // src/session.ts
5344
5412
  var DEFAULT_RECONNECT_TIMEOUT = 30000;
5345
5413
  var DEFAULT_MAX_RECONNECT_ATTEMPTS2 = 10;
@@ -5582,6 +5650,39 @@ var createVoiceSession = (options) => {
5582
5650
  const currentTurnAudio = [];
5583
5651
  let fallbackAttemptsForCurrentTurn = 0;
5584
5652
  let fallbackReplayAudioMsForCurrentTurn = 0;
5653
+ const recordingConfig = options.recording;
5654
+ const recordingChannels = new Set(recordingConfig?.channels ?? ["assistant", "user"]);
5655
+ const recordingMaxBytes = recordingConfig?.maxBytesPerChannel ?? 50 * 1024 * 1024;
5656
+ const recordingBuffers = {
5657
+ assistant: [],
5658
+ user: []
5659
+ };
5660
+ const recordingByteTotals = {
5661
+ assistant: 0,
5662
+ user: 0
5663
+ };
5664
+ const recordingFormats = {};
5665
+ let recordingPersisted = false;
5666
+ const captureRecordingChunk = (channel, bytes, format) => {
5667
+ if (!recordingConfig || recordingPersisted) {
5668
+ return;
5669
+ }
5670
+ if (!recordingChannels.has(channel)) {
5671
+ return;
5672
+ }
5673
+ if (format.container !== "raw" || format.encoding !== "pcm_s16le") {
5674
+ return;
5675
+ }
5676
+ const currentTotal = recordingByteTotals[channel];
5677
+ if (currentTotal >= recordingMaxBytes) {
5678
+ return;
5679
+ }
5680
+ const remaining = recordingMaxBytes - currentTotal;
5681
+ const slice = bytes.byteLength <= remaining ? bytes : bytes.subarray(0, remaining);
5682
+ recordingBuffers[channel].push(new Uint8Array(slice));
5683
+ recordingByteTotals[channel] += slice.byteLength;
5684
+ recordingFormats[channel] = format;
5685
+ };
5585
5686
  const pruneTurnAudio = () => {
5586
5687
  const replayWindowMs = sttFallback?.replayWindowMs ?? DEFAULT_FALLBACK_REPLAY_MS;
5587
5688
  const cutoffAt = Date.now() - replayWindowMs;
@@ -5760,6 +5861,59 @@ var createVoiceSession = (options) => {
5760
5861
  });
5761
5862
  }
5762
5863
  };
5864
+ const persistRecordings = async () => {
5865
+ if (!recordingConfig || recordingPersisted) {
5866
+ return;
5867
+ }
5868
+ recordingPersisted = true;
5869
+ const channels = ["assistant", "user"];
5870
+ for (const channel of channels) {
5871
+ if (!recordingChannels.has(channel)) {
5872
+ continue;
5873
+ }
5874
+ const chunks = recordingBuffers[channel];
5875
+ const format = recordingFormats[channel];
5876
+ if (chunks.length === 0 || !format) {
5877
+ continue;
5878
+ }
5879
+ const totalBytes = recordingByteTotals[channel];
5880
+ const merged = new Uint8Array(totalBytes);
5881
+ let offset = 0;
5882
+ for (const chunk of chunks) {
5883
+ merged.set(chunk, offset);
5884
+ offset += chunk.byteLength;
5885
+ }
5886
+ try {
5887
+ const stored = await recordingConfig.store.put({
5888
+ audioBytes: merged,
5889
+ capturedAt: Date.now(),
5890
+ channel,
5891
+ durationMs: computePcmDurationMs(totalBytes, format),
5892
+ format,
5893
+ sessionId: options.id
5894
+ });
5895
+ await appendTrace({
5896
+ payload: {
5897
+ channel,
5898
+ durationMs: stored.durationMs,
5899
+ recordingUrl: stored.recordingUrl,
5900
+ sessionId: options.id,
5901
+ sizeBytes: merged.byteLength
5902
+ },
5903
+ type: "recording.ready"
5904
+ });
5905
+ } catch (error) {
5906
+ logger.warn("voice recording persist failed", {
5907
+ channel,
5908
+ error: toError(error).message,
5909
+ sessionId: options.id
5910
+ });
5911
+ } finally {
5912
+ recordingBuffers[channel] = [];
5913
+ recordingByteTotals[channel] = 0;
5914
+ }
5915
+ }
5916
+ };
5763
5917
  const cancelActiveTTS = async (reason) => {
5764
5918
  const activeSession = ttsSession;
5765
5919
  const cancelledTurnId = activeTTSTurnId;
@@ -5783,6 +5937,7 @@ var createVoiceSession = (options) => {
5783
5937
  };
5784
5938
  const sendAssistantAudio = async (chunk, input) => {
5785
5939
  const normalizedChunk = chunk instanceof Uint8Array ? new Uint8Array(chunk) : chunk instanceof ArrayBuffer ? new Uint8Array(chunk.slice(0)) : new Uint8Array(chunk.buffer.slice(chunk.byteOffset, chunk.byteOffset + chunk.byteLength));
5940
+ captureRecordingChunk("assistant", normalizedChunk, input.format);
5786
5941
  await send({
5787
5942
  chunkBase64: encodeBase64(normalizedChunk),
5788
5943
  format: input.format,
@@ -5879,6 +6034,7 @@ var createVoiceSession = (options) => {
5879
6034
  });
5880
6035
  await closeTTSSession("failed");
5881
6036
  await closeAdapter("failed");
6037
+ await persistRecordings();
5882
6038
  speechDetected = false;
5883
6039
  rewindFallbackTurnAudio();
5884
6040
  await options.route.onError?.({
@@ -5947,6 +6103,7 @@ var createVoiceSession = (options) => {
5947
6103
  });
5948
6104
  await closeTTSSession("complete");
5949
6105
  await closeAdapter("complete");
6106
+ await persistRecordings();
5950
6107
  speechDetected = false;
5951
6108
  rewindFallbackTurnAudio();
5952
6109
  if (disposition === "transferred" && input.target) {
@@ -7088,6 +7245,10 @@ var createVoiceSession = (options) => {
7088
7245
  if (shouldStoreAudio) {
7089
7246
  pushTurnAudio(conditionedAudio);
7090
7247
  }
7248
+ if (recordingConfig?.userInputFormat) {
7249
+ const userBytes = conditionedAudio instanceof Uint8Array ? conditionedAudio : conditionedAudio instanceof ArrayBuffer ? new Uint8Array(conditionedAudio) : new Uint8Array(conditionedAudio.buffer, conditionedAudio.byteOffset, conditionedAudio.byteLength);
7250
+ captureRecordingChunk("user", userBytes, recordingConfig.userInputFormat);
7251
+ }
7091
7252
  if (audioLevel >= turnDetection.speechThreshold) {
7092
7253
  if (!speechDetected && activeTTSTurnId !== undefined) {
7093
7254
  cancelActiveTTS("barge-in");
@@ -7124,6 +7285,7 @@ var createVoiceSession = (options) => {
7124
7285
  clearSilenceTimer();
7125
7286
  await closeTTSSession(reason);
7126
7287
  await closeAdapter(reason);
7288
+ await persistRecordings();
7127
7289
  await Promise.resolve(socket.close(1000, reason));
7128
7290
  if (session.call?.endedAt && session.call.disposition === "closed") {
7129
7291
  await appendTrace({
package/dist/trace.d.ts CHANGED
@@ -1,5 +1,5 @@
1
1
  import type { S3Client, S3Options } from "bun";
2
- export type VoiceTraceEventType = "assistant.guardrail" | "assistant.memory" | "assistant.run" | "agent.context" | "agent.handoff" | "agent.model" | "agent.result" | "agent.tool" | "call.handoff" | "call.lifecycle" | "client.barge_in" | "client.browser_media" | "client.live_latency" | "client.reconnect" | "client.telephony_media" | "operator.action" | "provider.decision" | "session.error" | "turn.assistant" | "turn.committed" | "turn.cost" | "turn_latency.stage" | "turn.transcript" | "workflow.contract";
2
+ export type VoiceTraceEventType = "assistant.guardrail" | "assistant.memory" | "assistant.run" | "agent.context" | "agent.handoff" | "agent.model" | "agent.result" | "agent.tool" | "call.handoff" | "call.lifecycle" | "client.barge_in" | "client.browser_media" | "client.live_latency" | "client.reconnect" | "client.telephony_media" | "operator.action" | "provider.decision" | "recording.ready" | "session.error" | "turn.assistant" | "turn.committed" | "turn.cost" | "turn_latency.stage" | "turn.transcript" | "workflow.contract";
3
3
  export type VoiceTraceEvent<TPayload extends Record<string, unknown> = Record<string, unknown>> = {
4
4
  at: number;
5
5
  id?: string;
package/dist/types.d.ts CHANGED
@@ -701,6 +701,12 @@ export type VoicePluginConfig<TContext = unknown, TSession extends VoiceSessionR
701
701
  profileSwitchGuard?: VoicePluginProfileSwitchGuardConfig<TContext, TSession, TResult>;
702
702
  trace?: VoiceTraceEventStore;
703
703
  } & VoiceRouteConfig<TContext, TSession, TResult>;
704
+ export type VoiceSessionRecordingConfig = {
705
+ channels?: ReadonlyArray<"assistant" | "user">;
706
+ maxBytesPerChannel?: number;
707
+ store: import("./recordingStore").VoiceRecordingStore;
708
+ userInputFormat?: AudioFormat;
709
+ };
704
710
  export type CreateVoiceSessionOptions<TContext = unknown, TSession extends VoiceSessionRecord = VoiceSessionRecord, TResult = unknown> = {
705
711
  costTelemetry?: VoiceCostTelemetryConfig<TContext, TSession, TResult>;
706
712
  id: string;
@@ -715,6 +721,7 @@ export type CreateVoiceSessionOptions<TContext = unknown, TSession extends Voice
715
721
  sttFallback?: VoiceResolvedSTTFallbackConfig;
716
722
  store: VoiceSessionStore<TSession>;
717
723
  trace?: VoiceTraceEventStore;
724
+ recording?: VoiceSessionRecordingConfig;
718
725
  reconnect: Required<VoiceReconnectConfig>;
719
726
  phraseHints?: VoicePhraseHint[];
720
727
  sessionMetadata?: Record<string, unknown>;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@absolutejs/voice",
3
- "version": "0.0.22-beta.476",
3
+ "version": "0.0.22-beta.477",
4
4
  "description": "Voice primitives and Elysia plugin for AbsoluteJS",
5
5
  "repository": {
6
6
  "type": "git",