@absolutejs/voice 0.0.22-beta.476 → 0.0.22-beta.477
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/fileStore.d.ts +2 -0
- package/dist/index.d.ts +3 -1
- package/dist/index.js +226 -0
- package/dist/recordingStore.d.ts +21 -0
- package/dist/testing/index.js +162 -0
- package/dist/trace.d.ts +1 -1
- package/dist/types.d.ts +7 -0
- package/package.json +1 -1
package/dist/fileStore.d.ts
CHANGED
|
@@ -7,6 +7,7 @@ import { type StoredVoiceTraceEvent, type VoiceTraceSinkDeliveryRecord, type Voi
|
|
|
7
7
|
import type { StoredVoiceIntegrationEvent, StoredVoiceExternalObjectMap, StoredVoiceOpsTask, VoiceExternalObjectMap, VoiceExternalObjectMapStore, VoiceIntegrationEvent, VoiceIntegrationEventStore, VoiceOpsTask, VoiceOpsTaskStore } from "./ops";
|
|
8
8
|
import type { StoredVoiceCallReviewArtifact, VoiceCallReviewArtifact, VoiceCallReviewStore } from "./testing/review";
|
|
9
9
|
import type { VoiceSessionRecord, VoiceSessionStore } from "./types";
|
|
10
|
+
import type { VoiceRecordingStore } from "./recordingStore";
|
|
10
11
|
export type VoiceFileStoreOptions = {
|
|
11
12
|
directory: string;
|
|
12
13
|
pretty?: boolean;
|
|
@@ -50,3 +51,4 @@ export declare const createStoredVoiceIntegrationEvent: <TEvent extends Omit<Voi
|
|
|
50
51
|
export declare const createStoredVoiceExternalObjectMap: <TMapping extends Omit<VoiceExternalObjectMap, "id" | "createdAt" | "updatedAt"> = Omit<VoiceExternalObjectMap, "id" | "createdAt" | "updatedAt">>(mapping: TMapping & {
|
|
51
52
|
at?: number;
|
|
52
53
|
}) => VoiceExternalObjectMap;
|
|
54
|
+
export declare const createVoiceFileRecordingStore: (options: VoiceFileStoreOptions) => VoiceRecordingStore;
|
package/dist/index.d.ts
CHANGED
|
@@ -87,7 +87,9 @@ export { createVoiceTurnQualityHTMLHandler, createVoiceTurnQualityJSONHandler, c
|
|
|
87
87
|
export { assertVoiceOutcomeContractEvidence, createVoiceOutcomeContractHTMLHandler, createVoiceOutcomeContractJSONHandler, createVoiceOutcomeContractRoutes, evaluateVoiceOutcomeContractEvidence, renderVoiceOutcomeContractHTML, runVoiceOutcomeContractSuite, } from "./outcomeContract";
|
|
88
88
|
export { applyVoiceTelephonyOutcome, assertVoiceTelephonyWebhookNormalizationEvidence, createMemoryVoiceTelephonyWebhookIdempotencyStore, createVoiceTelephonyOutcomePolicy, createVoiceTelephonyWebhookHandler, createVoiceTelephonyWebhookRoutes, evaluateVoiceTelephonyWebhookNormalizationEvidence, parseVoiceTelephonyWebhookEvent, resolveVoiceTelephonyOutcome, signVoiceTwilioWebhook, verifyVoiceTwilioWebhookSignature, voiceTelephonyOutcomeToRouteResult, } from "./telephonyOutcome";
|
|
89
89
|
export { assertVoicePhoneCallControlEvidence, assertVoicePhoneAssistantEvidence, createVoicePhoneAgent, evaluateVoicePhoneCallControlEvidence, evaluateVoicePhoneAssistantEvidence, } from "./phoneAgent";
|
|
90
|
-
export { createStoredVoiceCallReviewArtifact, createStoredVoiceExternalObjectMap, createStoredVoiceIntegrationEvent, createStoredVoiceOpsTask, createVoiceFileIncidentBundleStore, createVoiceFileExternalObjectMapStore, createVoiceFileAssistantMemoryStore, createVoiceFileAuditEventStore, createVoiceFileAuditSinkDeliveryStore, createVoiceFileCampaignStore, createVoiceFileIntegrationEventStore, createVoiceFileReviewStore, createVoiceFileRuntimeStorage, createVoiceFileSessionStore, createVoiceFileTaskStore, createVoiceFileTraceSinkDeliveryStore, createVoiceFileTraceEventStore, } from "./fileStore";
|
|
90
|
+
export { createStoredVoiceCallReviewArtifact, createStoredVoiceExternalObjectMap, createStoredVoiceIntegrationEvent, createStoredVoiceOpsTask, createVoiceFileIncidentBundleStore, createVoiceFileExternalObjectMapStore, createVoiceFileAssistantMemoryStore, createVoiceFileAuditEventStore, createVoiceFileAuditSinkDeliveryStore, createVoiceFileCampaignStore, createVoiceFileIntegrationEventStore, createVoiceFileRecordingStore, createVoiceFileReviewStore, createVoiceFileRuntimeStorage, createVoiceFileSessionStore, createVoiceFileTaskStore, createVoiceFileTraceSinkDeliveryStore, createVoiceFileTraceEventStore, } from "./fileStore";
|
|
91
|
+
export { computePcmDurationMs, createVoiceMemoryRecordingStore, encodePcmAsWav, } from "./recordingStore";
|
|
92
|
+
export type { StoredVoiceRecordingArtifact, VoiceRecordingArtifact, VoiceRecordingChannel, VoiceRecordingStore, } from "./recordingStore";
|
|
91
93
|
export { createVoiceAssistantMemoryHandle, createVoiceAssistantMemoryRecord, createVoiceMemoryAssistantMemoryStore, resolveVoiceAssistantMemoryNamespace, } from "./assistantMemory";
|
|
92
94
|
export { createAnthropicVoiceAssistantModel, createGeminiVoiceAssistantModel, createJSONVoiceAssistantModel, createOpenAIVoiceAssistantModel, createVoiceProviderOrchestrationProfile, resolveVoiceProviderRoutingPolicyPreset, createVoiceProviderRouter, } from "./modelAdapters";
|
|
93
95
|
export { createOpenAIVoiceTTS } from "./openaiTTS";
|
package/dist/index.js
CHANGED
|
@@ -3372,6 +3372,74 @@ var buildTurnText = (transcripts, partialText, options = {}) => {
|
|
|
3372
3372
|
// src/types.ts
|
|
3373
3373
|
var ttsAdapterSessionCanCancel = (session) => typeof session.cancel === "function";
|
|
3374
3374
|
|
|
3375
|
+
// src/recordingStore.ts
|
|
3376
|
+
var writeUint32LE = (view, offset, value) => {
|
|
3377
|
+
view.setUint32(offset, value, true);
|
|
3378
|
+
};
|
|
3379
|
+
var writeUint16LE = (view, offset, value) => {
|
|
3380
|
+
view.setUint16(offset, value, true);
|
|
3381
|
+
};
|
|
3382
|
+
var writeAscii = (view, offset, value) => {
|
|
3383
|
+
for (let index = 0;index < value.length; index += 1) {
|
|
3384
|
+
view.setUint8(offset + index, value.charCodeAt(index));
|
|
3385
|
+
}
|
|
3386
|
+
};
|
|
3387
|
+
var encodePcmAsWav = (pcm, format) => {
|
|
3388
|
+
if (format.container !== "raw" || format.encoding !== "pcm_s16le") {
|
|
3389
|
+
throw new Error(`encodePcmAsWav only supports raw pcm_s16le input (got container=${format.container}, encoding=${format.encoding})`);
|
|
3390
|
+
}
|
|
3391
|
+
const channels = format.channels;
|
|
3392
|
+
const sampleRate = format.sampleRateHz;
|
|
3393
|
+
const bitsPerSample = 16;
|
|
3394
|
+
const byteRate = sampleRate * channels * bitsPerSample / 8;
|
|
3395
|
+
const blockAlign = channels * bitsPerSample / 8;
|
|
3396
|
+
const dataSize = pcm.byteLength;
|
|
3397
|
+
const buffer = new ArrayBuffer(44 + dataSize);
|
|
3398
|
+
const view = new DataView(buffer);
|
|
3399
|
+
writeAscii(view, 0, "RIFF");
|
|
3400
|
+
writeUint32LE(view, 4, 36 + dataSize);
|
|
3401
|
+
writeAscii(view, 8, "WAVE");
|
|
3402
|
+
writeAscii(view, 12, "fmt ");
|
|
3403
|
+
writeUint32LE(view, 16, 16);
|
|
3404
|
+
writeUint16LE(view, 20, 1);
|
|
3405
|
+
writeUint16LE(view, 22, channels);
|
|
3406
|
+
writeUint32LE(view, 24, sampleRate);
|
|
3407
|
+
writeUint32LE(view, 28, byteRate);
|
|
3408
|
+
writeUint16LE(view, 32, blockAlign);
|
|
3409
|
+
writeUint16LE(view, 34, bitsPerSample);
|
|
3410
|
+
writeAscii(view, 36, "data");
|
|
3411
|
+
writeUint32LE(view, 40, dataSize);
|
|
3412
|
+
const output = new Uint8Array(buffer);
|
|
3413
|
+
output.set(pcm, 44);
|
|
3414
|
+
return output;
|
|
3415
|
+
};
|
|
3416
|
+
var computePcmDurationMs = (pcmByteLength, format) => {
|
|
3417
|
+
if (format.container !== "raw" || format.encoding !== "pcm_s16le") {
|
|
3418
|
+
return 0;
|
|
3419
|
+
}
|
|
3420
|
+
const bytesPerSecond = format.sampleRateHz * format.channels * 2;
|
|
3421
|
+
if (bytesPerSecond === 0) {
|
|
3422
|
+
return 0;
|
|
3423
|
+
}
|
|
3424
|
+
return Math.round(pcmByteLength / bytesPerSecond * 1000);
|
|
3425
|
+
};
|
|
3426
|
+
var createVoiceMemoryRecordingStore = () => {
|
|
3427
|
+
const records = new Map;
|
|
3428
|
+
const key = (sessionId, channel) => `${sessionId}::${channel}`;
|
|
3429
|
+
return {
|
|
3430
|
+
get: async (sessionId, channel) => records.get(key(sessionId, channel)),
|
|
3431
|
+
list: async (sessionId) => Array.from(records.values()).filter((record) => record.sessionId === sessionId),
|
|
3432
|
+
put: async (artifact) => {
|
|
3433
|
+
const stored = {
|
|
3434
|
+
...artifact,
|
|
3435
|
+
recordingUrl: `memory://recording/${artifact.sessionId}/${artifact.channel}.wav`
|
|
3436
|
+
};
|
|
3437
|
+
records.set(key(artifact.sessionId, artifact.channel), stored);
|
|
3438
|
+
return stored;
|
|
3439
|
+
}
|
|
3440
|
+
};
|
|
3441
|
+
};
|
|
3442
|
+
|
|
3375
3443
|
// src/session.ts
|
|
3376
3444
|
var DEFAULT_RECONNECT_TIMEOUT = 30000;
|
|
3377
3445
|
var DEFAULT_MAX_RECONNECT_ATTEMPTS = 10;
|
|
@@ -3614,6 +3682,39 @@ var createVoiceSession = (options) => {
|
|
|
3614
3682
|
const currentTurnAudio = [];
|
|
3615
3683
|
let fallbackAttemptsForCurrentTurn = 0;
|
|
3616
3684
|
let fallbackReplayAudioMsForCurrentTurn = 0;
|
|
3685
|
+
const recordingConfig = options.recording;
|
|
3686
|
+
const recordingChannels = new Set(recordingConfig?.channels ?? ["assistant", "user"]);
|
|
3687
|
+
const recordingMaxBytes = recordingConfig?.maxBytesPerChannel ?? 50 * 1024 * 1024;
|
|
3688
|
+
const recordingBuffers = {
|
|
3689
|
+
assistant: [],
|
|
3690
|
+
user: []
|
|
3691
|
+
};
|
|
3692
|
+
const recordingByteTotals = {
|
|
3693
|
+
assistant: 0,
|
|
3694
|
+
user: 0
|
|
3695
|
+
};
|
|
3696
|
+
const recordingFormats = {};
|
|
3697
|
+
let recordingPersisted = false;
|
|
3698
|
+
const captureRecordingChunk = (channel, bytes, format) => {
|
|
3699
|
+
if (!recordingConfig || recordingPersisted) {
|
|
3700
|
+
return;
|
|
3701
|
+
}
|
|
3702
|
+
if (!recordingChannels.has(channel)) {
|
|
3703
|
+
return;
|
|
3704
|
+
}
|
|
3705
|
+
if (format.container !== "raw" || format.encoding !== "pcm_s16le") {
|
|
3706
|
+
return;
|
|
3707
|
+
}
|
|
3708
|
+
const currentTotal = recordingByteTotals[channel];
|
|
3709
|
+
if (currentTotal >= recordingMaxBytes) {
|
|
3710
|
+
return;
|
|
3711
|
+
}
|
|
3712
|
+
const remaining = recordingMaxBytes - currentTotal;
|
|
3713
|
+
const slice = bytes.byteLength <= remaining ? bytes : bytes.subarray(0, remaining);
|
|
3714
|
+
recordingBuffers[channel].push(new Uint8Array(slice));
|
|
3715
|
+
recordingByteTotals[channel] += slice.byteLength;
|
|
3716
|
+
recordingFormats[channel] = format;
|
|
3717
|
+
};
|
|
3617
3718
|
const pruneTurnAudio = () => {
|
|
3618
3719
|
const replayWindowMs = sttFallback?.replayWindowMs ?? DEFAULT_FALLBACK_REPLAY_MS;
|
|
3619
3720
|
const cutoffAt = Date.now() - replayWindowMs;
|
|
@@ -3792,6 +3893,59 @@ var createVoiceSession = (options) => {
|
|
|
3792
3893
|
});
|
|
3793
3894
|
}
|
|
3794
3895
|
};
|
|
3896
|
+
const persistRecordings = async () => {
|
|
3897
|
+
if (!recordingConfig || recordingPersisted) {
|
|
3898
|
+
return;
|
|
3899
|
+
}
|
|
3900
|
+
recordingPersisted = true;
|
|
3901
|
+
const channels = ["assistant", "user"];
|
|
3902
|
+
for (const channel of channels) {
|
|
3903
|
+
if (!recordingChannels.has(channel)) {
|
|
3904
|
+
continue;
|
|
3905
|
+
}
|
|
3906
|
+
const chunks = recordingBuffers[channel];
|
|
3907
|
+
const format = recordingFormats[channel];
|
|
3908
|
+
if (chunks.length === 0 || !format) {
|
|
3909
|
+
continue;
|
|
3910
|
+
}
|
|
3911
|
+
const totalBytes = recordingByteTotals[channel];
|
|
3912
|
+
const merged = new Uint8Array(totalBytes);
|
|
3913
|
+
let offset = 0;
|
|
3914
|
+
for (const chunk of chunks) {
|
|
3915
|
+
merged.set(chunk, offset);
|
|
3916
|
+
offset += chunk.byteLength;
|
|
3917
|
+
}
|
|
3918
|
+
try {
|
|
3919
|
+
const stored = await recordingConfig.store.put({
|
|
3920
|
+
audioBytes: merged,
|
|
3921
|
+
capturedAt: Date.now(),
|
|
3922
|
+
channel,
|
|
3923
|
+
durationMs: computePcmDurationMs(totalBytes, format),
|
|
3924
|
+
format,
|
|
3925
|
+
sessionId: options.id
|
|
3926
|
+
});
|
|
3927
|
+
await appendTrace({
|
|
3928
|
+
payload: {
|
|
3929
|
+
channel,
|
|
3930
|
+
durationMs: stored.durationMs,
|
|
3931
|
+
recordingUrl: stored.recordingUrl,
|
|
3932
|
+
sessionId: options.id,
|
|
3933
|
+
sizeBytes: merged.byteLength
|
|
3934
|
+
},
|
|
3935
|
+
type: "recording.ready"
|
|
3936
|
+
});
|
|
3937
|
+
} catch (error) {
|
|
3938
|
+
logger.warn("voice recording persist failed", {
|
|
3939
|
+
channel,
|
|
3940
|
+
error: toError(error).message,
|
|
3941
|
+
sessionId: options.id
|
|
3942
|
+
});
|
|
3943
|
+
} finally {
|
|
3944
|
+
recordingBuffers[channel] = [];
|
|
3945
|
+
recordingByteTotals[channel] = 0;
|
|
3946
|
+
}
|
|
3947
|
+
}
|
|
3948
|
+
};
|
|
3795
3949
|
const cancelActiveTTS = async (reason) => {
|
|
3796
3950
|
const activeSession = ttsSession;
|
|
3797
3951
|
const cancelledTurnId = activeTTSTurnId;
|
|
@@ -3815,6 +3969,7 @@ var createVoiceSession = (options) => {
|
|
|
3815
3969
|
};
|
|
3816
3970
|
const sendAssistantAudio = async (chunk, input) => {
|
|
3817
3971
|
const normalizedChunk = chunk instanceof Uint8Array ? new Uint8Array(chunk) : chunk instanceof ArrayBuffer ? new Uint8Array(chunk.slice(0)) : new Uint8Array(chunk.buffer.slice(chunk.byteOffset, chunk.byteOffset + chunk.byteLength));
|
|
3972
|
+
captureRecordingChunk("assistant", normalizedChunk, input.format);
|
|
3818
3973
|
await send({
|
|
3819
3974
|
chunkBase64: encodeBase64(normalizedChunk),
|
|
3820
3975
|
format: input.format,
|
|
@@ -3911,6 +4066,7 @@ var createVoiceSession = (options) => {
|
|
|
3911
4066
|
});
|
|
3912
4067
|
await closeTTSSession("failed");
|
|
3913
4068
|
await closeAdapter("failed");
|
|
4069
|
+
await persistRecordings();
|
|
3914
4070
|
speechDetected = false;
|
|
3915
4071
|
rewindFallbackTurnAudio();
|
|
3916
4072
|
await options.route.onError?.({
|
|
@@ -3979,6 +4135,7 @@ var createVoiceSession = (options) => {
|
|
|
3979
4135
|
});
|
|
3980
4136
|
await closeTTSSession("complete");
|
|
3981
4137
|
await closeAdapter("complete");
|
|
4138
|
+
await persistRecordings();
|
|
3982
4139
|
speechDetected = false;
|
|
3983
4140
|
rewindFallbackTurnAudio();
|
|
3984
4141
|
if (disposition === "transferred" && input.target) {
|
|
@@ -5120,6 +5277,10 @@ var createVoiceSession = (options) => {
|
|
|
5120
5277
|
if (shouldStoreAudio) {
|
|
5121
5278
|
pushTurnAudio(conditionedAudio);
|
|
5122
5279
|
}
|
|
5280
|
+
if (recordingConfig?.userInputFormat) {
|
|
5281
|
+
const userBytes = conditionedAudio instanceof Uint8Array ? conditionedAudio : conditionedAudio instanceof ArrayBuffer ? new Uint8Array(conditionedAudio) : new Uint8Array(conditionedAudio.buffer, conditionedAudio.byteOffset, conditionedAudio.byteLength);
|
|
5282
|
+
captureRecordingChunk("user", userBytes, recordingConfig.userInputFormat);
|
|
5283
|
+
}
|
|
5123
5284
|
if (audioLevel >= turnDetection.speechThreshold) {
|
|
5124
5285
|
if (!speechDetected && activeTTSTurnId !== undefined) {
|
|
5125
5286
|
cancelActiveTTS("barge-in");
|
|
@@ -5156,6 +5317,7 @@ var createVoiceSession = (options) => {
|
|
|
5156
5317
|
clearSilenceTimer();
|
|
5157
5318
|
await closeTTSSession(reason);
|
|
5158
5319
|
await closeAdapter(reason);
|
|
5320
|
+
await persistRecordings();
|
|
5159
5321
|
await Promise.resolve(socket.close(1000, reason));
|
|
5160
5322
|
if (session.call?.endedAt && session.call.disposition === "closed") {
|
|
5161
5323
|
await appendTrace({
|
|
@@ -37101,6 +37263,66 @@ var createStoredVoiceExternalObjectMap = (mapping) => createVoiceExternalObjectM
|
|
|
37101
37263
|
sourceId: mapping.sourceId,
|
|
37102
37264
|
sourceType: mapping.sourceType
|
|
37103
37265
|
});
|
|
37266
|
+
var recordingFileName = (sessionId, channel) => `${encodeURIComponent(sessionId)}_${channel}.wav`;
|
|
37267
|
+
var recordingMetadataFileName = (sessionId, channel) => `${encodeURIComponent(sessionId)}_${channel}.json`;
|
|
37268
|
+
var createVoiceFileRecordingStore = (options) => {
|
|
37269
|
+
const ensureDir = async () => {
|
|
37270
|
+
await mkdir4(options.directory, { recursive: true });
|
|
37271
|
+
};
|
|
37272
|
+
const put = async (artifact) => {
|
|
37273
|
+
await ensureDir();
|
|
37274
|
+
const wavPath = join3(options.directory, recordingFileName(artifact.sessionId, artifact.channel));
|
|
37275
|
+
const metadataPath = join3(options.directory, recordingMetadataFileName(artifact.sessionId, artifact.channel));
|
|
37276
|
+
const wav = encodePcmAsWav(artifact.audioBytes, artifact.format);
|
|
37277
|
+
await writeFile(wavPath, wav);
|
|
37278
|
+
const recordingUrl = `file://${wavPath}`;
|
|
37279
|
+
const metadata = {
|
|
37280
|
+
capturedAt: artifact.capturedAt,
|
|
37281
|
+
channel: artifact.channel,
|
|
37282
|
+
durationMs: artifact.durationMs,
|
|
37283
|
+
format: artifact.format,
|
|
37284
|
+
recordingUrl,
|
|
37285
|
+
sessionId: artifact.sessionId
|
|
37286
|
+
};
|
|
37287
|
+
await writeFile(metadataPath, options.pretty ? JSON.stringify(metadata, null, 2) : JSON.stringify(metadata));
|
|
37288
|
+
return {
|
|
37289
|
+
...artifact,
|
|
37290
|
+
recordingUrl
|
|
37291
|
+
};
|
|
37292
|
+
};
|
|
37293
|
+
const readMetadata = async (sessionId, channel) => {
|
|
37294
|
+
const metadataPath = join3(options.directory, recordingMetadataFileName(sessionId, channel));
|
|
37295
|
+
const wavPath = join3(options.directory, recordingFileName(sessionId, channel));
|
|
37296
|
+
try {
|
|
37297
|
+
const [metaText, wavBytes] = await Promise.all([
|
|
37298
|
+
readFile2(metadataPath, "utf8"),
|
|
37299
|
+
readFile2(wavPath)
|
|
37300
|
+
]);
|
|
37301
|
+
const meta = JSON.parse(metaText);
|
|
37302
|
+
return {
|
|
37303
|
+
audioBytes: new Uint8Array(wavBytes.buffer, wavBytes.byteOffset, wavBytes.byteLength),
|
|
37304
|
+
capturedAt: meta.capturedAt,
|
|
37305
|
+
channel: meta.channel,
|
|
37306
|
+
durationMs: meta.durationMs,
|
|
37307
|
+
format: meta.format,
|
|
37308
|
+
recordingUrl: meta.recordingUrl,
|
|
37309
|
+
sessionId: meta.sessionId
|
|
37310
|
+
};
|
|
37311
|
+
} catch (error) {
|
|
37312
|
+
if (error.code === "ENOENT") {
|
|
37313
|
+
return;
|
|
37314
|
+
}
|
|
37315
|
+
throw error;
|
|
37316
|
+
}
|
|
37317
|
+
};
|
|
37318
|
+
const get = (sessionId, channel) => readMetadata(sessionId, channel);
|
|
37319
|
+
const list = async (sessionId) => {
|
|
37320
|
+
const channels = ["assistant", "user"];
|
|
37321
|
+
const records = await Promise.all(channels.map((channel) => readMetadata(sessionId, channel)));
|
|
37322
|
+
return records.filter((record) => record !== undefined);
|
|
37323
|
+
};
|
|
37324
|
+
return { get, list, put };
|
|
37325
|
+
};
|
|
37104
37326
|
// src/modelAdapters.ts
|
|
37105
37327
|
var isVoiceProviderRoutingPolicyPreset = (value) => value === "balanced" || value === "cost-cap" || value === "cost-first" || value === "latency-first" || value === "quality-first";
|
|
37106
37328
|
var resolveVoiceProviderRoutingPolicyPreset = (preset, options = {}) => {
|
|
@@ -45361,6 +45583,7 @@ export {
|
|
|
45361
45583
|
evaluateVoiceBrowserCallProfileEvidence,
|
|
45362
45584
|
evaluateVoiceAgentSquadContractEvidence,
|
|
45363
45585
|
encodeTwilioMulawBase64,
|
|
45586
|
+
encodePcmAsWav,
|
|
45364
45587
|
deliverVoiceTraceEventsToSinks,
|
|
45365
45588
|
deliverVoiceObservabilityExport,
|
|
45366
45589
|
deliverVoiceMonitorIssueNotifications,
|
|
@@ -45574,6 +45797,7 @@ export {
|
|
|
45574
45797
|
createVoiceMemoryTraceSinkDeliveryStore,
|
|
45575
45798
|
createVoiceMemoryTraceEventStore,
|
|
45576
45799
|
createVoiceMemoryStore,
|
|
45800
|
+
createVoiceMemoryRecordingStore,
|
|
45577
45801
|
createVoiceMemoryObservabilityExportDeliveryReceiptStore,
|
|
45578
45802
|
createVoiceMemoryMonitorNotifierDeliveryReceiptStore,
|
|
45579
45803
|
createVoiceMemoryMonitorIssueStore,
|
|
@@ -45620,6 +45844,7 @@ export {
|
|
|
45620
45844
|
createVoiceFileScenarioFixtureStore,
|
|
45621
45845
|
createVoiceFileRuntimeStorage,
|
|
45622
45846
|
createVoiceFileReviewStore,
|
|
45847
|
+
createVoiceFileRecordingStore,
|
|
45623
45848
|
createVoiceFileObservabilityExportDeliveryReceiptStore,
|
|
45624
45849
|
createVoiceFileIntegrationEventStore,
|
|
45625
45850
|
createVoiceFileIncidentBundleStore,
|
|
@@ -45714,6 +45939,7 @@ export {
|
|
|
45714
45939
|
createAnthropicVoiceAssistantModel,
|
|
45715
45940
|
createAIVoiceModel,
|
|
45716
45941
|
conditionAudioChunk,
|
|
45942
|
+
computePcmDurationMs,
|
|
45717
45943
|
completeVoiceOpsTask,
|
|
45718
45944
|
compareVoiceEvalBaseline,
|
|
45719
45945
|
claimVoiceOpsTask,
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
import type { AudioFormat } from "./types";
|
|
2
|
+
export type VoiceRecordingChannel = "assistant" | "user";
|
|
3
|
+
export type VoiceRecordingArtifact = {
|
|
4
|
+
audioBytes: Uint8Array;
|
|
5
|
+
capturedAt: number;
|
|
6
|
+
channel: VoiceRecordingChannel;
|
|
7
|
+
durationMs: number;
|
|
8
|
+
format: AudioFormat;
|
|
9
|
+
sessionId: string;
|
|
10
|
+
};
|
|
11
|
+
export type StoredVoiceRecordingArtifact = VoiceRecordingArtifact & {
|
|
12
|
+
recordingUrl?: string;
|
|
13
|
+
};
|
|
14
|
+
export type VoiceRecordingStore = {
|
|
15
|
+
get: (sessionId: string, channel: VoiceRecordingChannel) => Promise<StoredVoiceRecordingArtifact | undefined>;
|
|
16
|
+
list: (sessionId: string) => Promise<StoredVoiceRecordingArtifact[]>;
|
|
17
|
+
put: (artifact: VoiceRecordingArtifact) => Promise<StoredVoiceRecordingArtifact>;
|
|
18
|
+
};
|
|
19
|
+
export declare const encodePcmAsWav: (pcm: Uint8Array, format: AudioFormat) => Uint8Array;
|
|
20
|
+
export declare const computePcmDurationMs: (pcmByteLength: number, format: AudioFormat) => number;
|
|
21
|
+
export declare const createVoiceMemoryRecordingStore: () => VoiceRecordingStore;
|
package/dist/testing/index.js
CHANGED
|
@@ -5340,6 +5340,74 @@ var resolveLogger = (logger) => ({
|
|
|
5340
5340
|
// src/types.ts
|
|
5341
5341
|
var ttsAdapterSessionCanCancel = (session) => typeof session.cancel === "function";
|
|
5342
5342
|
|
|
5343
|
+
// src/recordingStore.ts
|
|
5344
|
+
var writeUint32LE = (view, offset, value) => {
|
|
5345
|
+
view.setUint32(offset, value, true);
|
|
5346
|
+
};
|
|
5347
|
+
var writeUint16LE = (view, offset, value) => {
|
|
5348
|
+
view.setUint16(offset, value, true);
|
|
5349
|
+
};
|
|
5350
|
+
var writeAscii = (view, offset, value) => {
|
|
5351
|
+
for (let index = 0;index < value.length; index += 1) {
|
|
5352
|
+
view.setUint8(offset + index, value.charCodeAt(index));
|
|
5353
|
+
}
|
|
5354
|
+
};
|
|
5355
|
+
var encodePcmAsWav = (pcm, format) => {
|
|
5356
|
+
if (format.container !== "raw" || format.encoding !== "pcm_s16le") {
|
|
5357
|
+
throw new Error(`encodePcmAsWav only supports raw pcm_s16le input (got container=${format.container}, encoding=${format.encoding})`);
|
|
5358
|
+
}
|
|
5359
|
+
const channels = format.channels;
|
|
5360
|
+
const sampleRate = format.sampleRateHz;
|
|
5361
|
+
const bitsPerSample = 16;
|
|
5362
|
+
const byteRate = sampleRate * channels * bitsPerSample / 8;
|
|
5363
|
+
const blockAlign = channels * bitsPerSample / 8;
|
|
5364
|
+
const dataSize = pcm.byteLength;
|
|
5365
|
+
const buffer = new ArrayBuffer(44 + dataSize);
|
|
5366
|
+
const view = new DataView(buffer);
|
|
5367
|
+
writeAscii(view, 0, "RIFF");
|
|
5368
|
+
writeUint32LE(view, 4, 36 + dataSize);
|
|
5369
|
+
writeAscii(view, 8, "WAVE");
|
|
5370
|
+
writeAscii(view, 12, "fmt ");
|
|
5371
|
+
writeUint32LE(view, 16, 16);
|
|
5372
|
+
writeUint16LE(view, 20, 1);
|
|
5373
|
+
writeUint16LE(view, 22, channels);
|
|
5374
|
+
writeUint32LE(view, 24, sampleRate);
|
|
5375
|
+
writeUint32LE(view, 28, byteRate);
|
|
5376
|
+
writeUint16LE(view, 32, blockAlign);
|
|
5377
|
+
writeUint16LE(view, 34, bitsPerSample);
|
|
5378
|
+
writeAscii(view, 36, "data");
|
|
5379
|
+
writeUint32LE(view, 40, dataSize);
|
|
5380
|
+
const output = new Uint8Array(buffer);
|
|
5381
|
+
output.set(pcm, 44);
|
|
5382
|
+
return output;
|
|
5383
|
+
};
|
|
5384
|
+
var computePcmDurationMs = (pcmByteLength, format) => {
|
|
5385
|
+
if (format.container !== "raw" || format.encoding !== "pcm_s16le") {
|
|
5386
|
+
return 0;
|
|
5387
|
+
}
|
|
5388
|
+
const bytesPerSecond = format.sampleRateHz * format.channels * 2;
|
|
5389
|
+
if (bytesPerSecond === 0) {
|
|
5390
|
+
return 0;
|
|
5391
|
+
}
|
|
5392
|
+
return Math.round(pcmByteLength / bytesPerSecond * 1000);
|
|
5393
|
+
};
|
|
5394
|
+
var createVoiceMemoryRecordingStore = () => {
|
|
5395
|
+
const records = new Map;
|
|
5396
|
+
const key = (sessionId, channel) => `${sessionId}::${channel}`;
|
|
5397
|
+
return {
|
|
5398
|
+
get: async (sessionId, channel) => records.get(key(sessionId, channel)),
|
|
5399
|
+
list: async (sessionId) => Array.from(records.values()).filter((record) => record.sessionId === sessionId),
|
|
5400
|
+
put: async (artifact) => {
|
|
5401
|
+
const stored = {
|
|
5402
|
+
...artifact,
|
|
5403
|
+
recordingUrl: `memory://recording/${artifact.sessionId}/${artifact.channel}.wav`
|
|
5404
|
+
};
|
|
5405
|
+
records.set(key(artifact.sessionId, artifact.channel), stored);
|
|
5406
|
+
return stored;
|
|
5407
|
+
}
|
|
5408
|
+
};
|
|
5409
|
+
};
|
|
5410
|
+
|
|
5343
5411
|
// src/session.ts
|
|
5344
5412
|
var DEFAULT_RECONNECT_TIMEOUT = 30000;
|
|
5345
5413
|
var DEFAULT_MAX_RECONNECT_ATTEMPTS2 = 10;
|
|
@@ -5582,6 +5650,39 @@ var createVoiceSession = (options) => {
|
|
|
5582
5650
|
const currentTurnAudio = [];
|
|
5583
5651
|
let fallbackAttemptsForCurrentTurn = 0;
|
|
5584
5652
|
let fallbackReplayAudioMsForCurrentTurn = 0;
|
|
5653
|
+
const recordingConfig = options.recording;
|
|
5654
|
+
const recordingChannels = new Set(recordingConfig?.channels ?? ["assistant", "user"]);
|
|
5655
|
+
const recordingMaxBytes = recordingConfig?.maxBytesPerChannel ?? 50 * 1024 * 1024;
|
|
5656
|
+
const recordingBuffers = {
|
|
5657
|
+
assistant: [],
|
|
5658
|
+
user: []
|
|
5659
|
+
};
|
|
5660
|
+
const recordingByteTotals = {
|
|
5661
|
+
assistant: 0,
|
|
5662
|
+
user: 0
|
|
5663
|
+
};
|
|
5664
|
+
const recordingFormats = {};
|
|
5665
|
+
let recordingPersisted = false;
|
|
5666
|
+
const captureRecordingChunk = (channel, bytes, format) => {
|
|
5667
|
+
if (!recordingConfig || recordingPersisted) {
|
|
5668
|
+
return;
|
|
5669
|
+
}
|
|
5670
|
+
if (!recordingChannels.has(channel)) {
|
|
5671
|
+
return;
|
|
5672
|
+
}
|
|
5673
|
+
if (format.container !== "raw" || format.encoding !== "pcm_s16le") {
|
|
5674
|
+
return;
|
|
5675
|
+
}
|
|
5676
|
+
const currentTotal = recordingByteTotals[channel];
|
|
5677
|
+
if (currentTotal >= recordingMaxBytes) {
|
|
5678
|
+
return;
|
|
5679
|
+
}
|
|
5680
|
+
const remaining = recordingMaxBytes - currentTotal;
|
|
5681
|
+
const slice = bytes.byteLength <= remaining ? bytes : bytes.subarray(0, remaining);
|
|
5682
|
+
recordingBuffers[channel].push(new Uint8Array(slice));
|
|
5683
|
+
recordingByteTotals[channel] += slice.byteLength;
|
|
5684
|
+
recordingFormats[channel] = format;
|
|
5685
|
+
};
|
|
5585
5686
|
const pruneTurnAudio = () => {
|
|
5586
5687
|
const replayWindowMs = sttFallback?.replayWindowMs ?? DEFAULT_FALLBACK_REPLAY_MS;
|
|
5587
5688
|
const cutoffAt = Date.now() - replayWindowMs;
|
|
@@ -5760,6 +5861,59 @@ var createVoiceSession = (options) => {
|
|
|
5760
5861
|
});
|
|
5761
5862
|
}
|
|
5762
5863
|
};
|
|
5864
|
+
const persistRecordings = async () => {
|
|
5865
|
+
if (!recordingConfig || recordingPersisted) {
|
|
5866
|
+
return;
|
|
5867
|
+
}
|
|
5868
|
+
recordingPersisted = true;
|
|
5869
|
+
const channels = ["assistant", "user"];
|
|
5870
|
+
for (const channel of channels) {
|
|
5871
|
+
if (!recordingChannels.has(channel)) {
|
|
5872
|
+
continue;
|
|
5873
|
+
}
|
|
5874
|
+
const chunks = recordingBuffers[channel];
|
|
5875
|
+
const format = recordingFormats[channel];
|
|
5876
|
+
if (chunks.length === 0 || !format) {
|
|
5877
|
+
continue;
|
|
5878
|
+
}
|
|
5879
|
+
const totalBytes = recordingByteTotals[channel];
|
|
5880
|
+
const merged = new Uint8Array(totalBytes);
|
|
5881
|
+
let offset = 0;
|
|
5882
|
+
for (const chunk of chunks) {
|
|
5883
|
+
merged.set(chunk, offset);
|
|
5884
|
+
offset += chunk.byteLength;
|
|
5885
|
+
}
|
|
5886
|
+
try {
|
|
5887
|
+
const stored = await recordingConfig.store.put({
|
|
5888
|
+
audioBytes: merged,
|
|
5889
|
+
capturedAt: Date.now(),
|
|
5890
|
+
channel,
|
|
5891
|
+
durationMs: computePcmDurationMs(totalBytes, format),
|
|
5892
|
+
format,
|
|
5893
|
+
sessionId: options.id
|
|
5894
|
+
});
|
|
5895
|
+
await appendTrace({
|
|
5896
|
+
payload: {
|
|
5897
|
+
channel,
|
|
5898
|
+
durationMs: stored.durationMs,
|
|
5899
|
+
recordingUrl: stored.recordingUrl,
|
|
5900
|
+
sessionId: options.id,
|
|
5901
|
+
sizeBytes: merged.byteLength
|
|
5902
|
+
},
|
|
5903
|
+
type: "recording.ready"
|
|
5904
|
+
});
|
|
5905
|
+
} catch (error) {
|
|
5906
|
+
logger.warn("voice recording persist failed", {
|
|
5907
|
+
channel,
|
|
5908
|
+
error: toError(error).message,
|
|
5909
|
+
sessionId: options.id
|
|
5910
|
+
});
|
|
5911
|
+
} finally {
|
|
5912
|
+
recordingBuffers[channel] = [];
|
|
5913
|
+
recordingByteTotals[channel] = 0;
|
|
5914
|
+
}
|
|
5915
|
+
}
|
|
5916
|
+
};
|
|
5763
5917
|
const cancelActiveTTS = async (reason) => {
|
|
5764
5918
|
const activeSession = ttsSession;
|
|
5765
5919
|
const cancelledTurnId = activeTTSTurnId;
|
|
@@ -5783,6 +5937,7 @@ var createVoiceSession = (options) => {
|
|
|
5783
5937
|
};
|
|
5784
5938
|
const sendAssistantAudio = async (chunk, input) => {
|
|
5785
5939
|
const normalizedChunk = chunk instanceof Uint8Array ? new Uint8Array(chunk) : chunk instanceof ArrayBuffer ? new Uint8Array(chunk.slice(0)) : new Uint8Array(chunk.buffer.slice(chunk.byteOffset, chunk.byteOffset + chunk.byteLength));
|
|
5940
|
+
captureRecordingChunk("assistant", normalizedChunk, input.format);
|
|
5786
5941
|
await send({
|
|
5787
5942
|
chunkBase64: encodeBase64(normalizedChunk),
|
|
5788
5943
|
format: input.format,
|
|
@@ -5879,6 +6034,7 @@ var createVoiceSession = (options) => {
|
|
|
5879
6034
|
});
|
|
5880
6035
|
await closeTTSSession("failed");
|
|
5881
6036
|
await closeAdapter("failed");
|
|
6037
|
+
await persistRecordings();
|
|
5882
6038
|
speechDetected = false;
|
|
5883
6039
|
rewindFallbackTurnAudio();
|
|
5884
6040
|
await options.route.onError?.({
|
|
@@ -5947,6 +6103,7 @@ var createVoiceSession = (options) => {
|
|
|
5947
6103
|
});
|
|
5948
6104
|
await closeTTSSession("complete");
|
|
5949
6105
|
await closeAdapter("complete");
|
|
6106
|
+
await persistRecordings();
|
|
5950
6107
|
speechDetected = false;
|
|
5951
6108
|
rewindFallbackTurnAudio();
|
|
5952
6109
|
if (disposition === "transferred" && input.target) {
|
|
@@ -7088,6 +7245,10 @@ var createVoiceSession = (options) => {
|
|
|
7088
7245
|
if (shouldStoreAudio) {
|
|
7089
7246
|
pushTurnAudio(conditionedAudio);
|
|
7090
7247
|
}
|
|
7248
|
+
if (recordingConfig?.userInputFormat) {
|
|
7249
|
+
const userBytes = conditionedAudio instanceof Uint8Array ? conditionedAudio : conditionedAudio instanceof ArrayBuffer ? new Uint8Array(conditionedAudio) : new Uint8Array(conditionedAudio.buffer, conditionedAudio.byteOffset, conditionedAudio.byteLength);
|
|
7250
|
+
captureRecordingChunk("user", userBytes, recordingConfig.userInputFormat);
|
|
7251
|
+
}
|
|
7091
7252
|
if (audioLevel >= turnDetection.speechThreshold) {
|
|
7092
7253
|
if (!speechDetected && activeTTSTurnId !== undefined) {
|
|
7093
7254
|
cancelActiveTTS("barge-in");
|
|
@@ -7124,6 +7285,7 @@ var createVoiceSession = (options) => {
|
|
|
7124
7285
|
clearSilenceTimer();
|
|
7125
7286
|
await closeTTSSession(reason);
|
|
7126
7287
|
await closeAdapter(reason);
|
|
7288
|
+
await persistRecordings();
|
|
7127
7289
|
await Promise.resolve(socket.close(1000, reason));
|
|
7128
7290
|
if (session.call?.endedAt && session.call.disposition === "closed") {
|
|
7129
7291
|
await appendTrace({
|
package/dist/trace.d.ts
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import type { S3Client, S3Options } from "bun";
|
|
2
|
-
export type VoiceTraceEventType = "assistant.guardrail" | "assistant.memory" | "assistant.run" | "agent.context" | "agent.handoff" | "agent.model" | "agent.result" | "agent.tool" | "call.handoff" | "call.lifecycle" | "client.barge_in" | "client.browser_media" | "client.live_latency" | "client.reconnect" | "client.telephony_media" | "operator.action" | "provider.decision" | "session.error" | "turn.assistant" | "turn.committed" | "turn.cost" | "turn_latency.stage" | "turn.transcript" | "workflow.contract";
|
|
2
|
+
export type VoiceTraceEventType = "assistant.guardrail" | "assistant.memory" | "assistant.run" | "agent.context" | "agent.handoff" | "agent.model" | "agent.result" | "agent.tool" | "call.handoff" | "call.lifecycle" | "client.barge_in" | "client.browser_media" | "client.live_latency" | "client.reconnect" | "client.telephony_media" | "operator.action" | "provider.decision" | "recording.ready" | "session.error" | "turn.assistant" | "turn.committed" | "turn.cost" | "turn_latency.stage" | "turn.transcript" | "workflow.contract";
|
|
3
3
|
export type VoiceTraceEvent<TPayload extends Record<string, unknown> = Record<string, unknown>> = {
|
|
4
4
|
at: number;
|
|
5
5
|
id?: string;
|
package/dist/types.d.ts
CHANGED
|
@@ -701,6 +701,12 @@ export type VoicePluginConfig<TContext = unknown, TSession extends VoiceSessionR
|
|
|
701
701
|
profileSwitchGuard?: VoicePluginProfileSwitchGuardConfig<TContext, TSession, TResult>;
|
|
702
702
|
trace?: VoiceTraceEventStore;
|
|
703
703
|
} & VoiceRouteConfig<TContext, TSession, TResult>;
|
|
704
|
+
export type VoiceSessionRecordingConfig = {
|
|
705
|
+
channels?: ReadonlyArray<"assistant" | "user">;
|
|
706
|
+
maxBytesPerChannel?: number;
|
|
707
|
+
store: import("./recordingStore").VoiceRecordingStore;
|
|
708
|
+
userInputFormat?: AudioFormat;
|
|
709
|
+
};
|
|
704
710
|
export type CreateVoiceSessionOptions<TContext = unknown, TSession extends VoiceSessionRecord = VoiceSessionRecord, TResult = unknown> = {
|
|
705
711
|
costTelemetry?: VoiceCostTelemetryConfig<TContext, TSession, TResult>;
|
|
706
712
|
id: string;
|
|
@@ -715,6 +721,7 @@ export type CreateVoiceSessionOptions<TContext = unknown, TSession extends Voice
|
|
|
715
721
|
sttFallback?: VoiceResolvedSTTFallbackConfig;
|
|
716
722
|
store: VoiceSessionStore<TSession>;
|
|
717
723
|
trace?: VoiceTraceEventStore;
|
|
724
|
+
recording?: VoiceSessionRecordingConfig;
|
|
718
725
|
reconnect: Required<VoiceReconnectConfig>;
|
|
719
726
|
phraseHints?: VoicePhraseHint[];
|
|
720
727
|
sessionMetadata?: Record<string, unknown>;
|