@absolutejs/voice 0.0.22-beta.475 → 0.0.22-beta.477
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/aiVoiceModel.d.ts +10 -0
- package/dist/fileStore.d.ts +2 -0
- package/dist/index.d.ts +5 -1
- package/dist/index.js +325 -0
- package/dist/recordingStore.d.ts +21 -0
- package/dist/testing/index.js +193 -3
- package/dist/trace.d.ts +1 -1
- package/dist/types.d.ts +11 -0
- package/package.json +7 -2
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
import type { AIProviderConfig } from "@absolutejs/ai";
|
|
2
|
+
import type { VoiceAgentModel } from "./agent";
|
|
3
|
+
import type { VoiceSessionRecord } from "./types";
|
|
4
|
+
export type CreateAIVoiceModelOptions = {
|
|
5
|
+
model: string;
|
|
6
|
+
provider: AIProviderConfig;
|
|
7
|
+
signal?: AbortSignal;
|
|
8
|
+
systemPrompt?: string;
|
|
9
|
+
};
|
|
10
|
+
export declare const createAIVoiceModel: <TContext = unknown, TSession extends VoiceSessionRecord = VoiceSessionRecord, TResult = unknown>(options: CreateAIVoiceModelOptions) => VoiceAgentModel<TContext, TSession, TResult>;
|
package/dist/fileStore.d.ts
CHANGED
|
@@ -7,6 +7,7 @@ import { type StoredVoiceTraceEvent, type VoiceTraceSinkDeliveryRecord, type Voi
|
|
|
7
7
|
import type { StoredVoiceIntegrationEvent, StoredVoiceExternalObjectMap, StoredVoiceOpsTask, VoiceExternalObjectMap, VoiceExternalObjectMapStore, VoiceIntegrationEvent, VoiceIntegrationEventStore, VoiceOpsTask, VoiceOpsTaskStore } from "./ops";
|
|
8
8
|
import type { StoredVoiceCallReviewArtifact, VoiceCallReviewArtifact, VoiceCallReviewStore } from "./testing/review";
|
|
9
9
|
import type { VoiceSessionRecord, VoiceSessionStore } from "./types";
|
|
10
|
+
import type { VoiceRecordingStore } from "./recordingStore";
|
|
10
11
|
export type VoiceFileStoreOptions = {
|
|
11
12
|
directory: string;
|
|
12
13
|
pretty?: boolean;
|
|
@@ -50,3 +51,4 @@ export declare const createStoredVoiceIntegrationEvent: <TEvent extends Omit<Voi
|
|
|
50
51
|
export declare const createStoredVoiceExternalObjectMap: <TMapping extends Omit<VoiceExternalObjectMap, "id" | "createdAt" | "updatedAt"> = Omit<VoiceExternalObjectMap, "id" | "createdAt" | "updatedAt">>(mapping: TMapping & {
|
|
51
52
|
at?: number;
|
|
52
53
|
}) => VoiceExternalObjectMap;
|
|
54
|
+
export declare const createVoiceFileRecordingStore: (options: VoiceFileStoreOptions) => VoiceRecordingStore;
|
package/dist/index.d.ts
CHANGED
|
@@ -69,6 +69,8 @@ export { assertVoiceSimulationSuiteEvidence, createVoiceSimulationSuiteRoutes, e
|
|
|
69
69
|
export { createVoiceWorkflowContract, createVoiceWorkflowContractHandler, createVoiceWorkflowContractPreset, createVoiceWorkflowScenario, recordVoiceWorkflowContractTrace, validateVoiceWorkflowRouteResult, } from "./workflowContract";
|
|
70
70
|
export { createVoiceSessionListRoutes, createVoiceSessionReplayHTMLHandler, createVoiceSessionReplayJSONHandler, createVoiceSessionReplayRoutes, createVoiceSessionsHTMLHandler, createVoiceSessionsJSONHandler, renderVoiceSessionsHTML, summarizeVoiceProviderFallbackRecovery, summarizeVoiceSessions, summarizeVoiceSessionReplay, } from "./sessionReplay";
|
|
71
71
|
export { createVoiceAgent, createVoiceAgentSquad, createVoiceAgentTool, } from "./agent";
|
|
72
|
+
export { createAIVoiceModel } from "./aiVoiceModel";
|
|
73
|
+
export type { CreateAIVoiceModelOptions } from "./aiVoiceModel";
|
|
72
74
|
export { createVoiceRAGTool } from "./ragTool";
|
|
73
75
|
export type { VoiceRAGCollectionLike, VoiceRAGQueryResult, VoiceRAGSearchInput, VoiceRAGToolArgs, VoiceRAGToolOptions, VoiceRAGToolResult, } from "./ragTool";
|
|
74
76
|
export { createVoiceApiRequestTool, createVoiceDTMFTool, createVoiceEndCallTool, createVoiceTransferCallTool, createVoiceVoicemailDetectionTool, } from "./agentTools";
|
|
@@ -85,7 +87,9 @@ export { createVoiceTurnQualityHTMLHandler, createVoiceTurnQualityJSONHandler, c
|
|
|
85
87
|
export { assertVoiceOutcomeContractEvidence, createVoiceOutcomeContractHTMLHandler, createVoiceOutcomeContractJSONHandler, createVoiceOutcomeContractRoutes, evaluateVoiceOutcomeContractEvidence, renderVoiceOutcomeContractHTML, runVoiceOutcomeContractSuite, } from "./outcomeContract";
|
|
86
88
|
export { applyVoiceTelephonyOutcome, assertVoiceTelephonyWebhookNormalizationEvidence, createMemoryVoiceTelephonyWebhookIdempotencyStore, createVoiceTelephonyOutcomePolicy, createVoiceTelephonyWebhookHandler, createVoiceTelephonyWebhookRoutes, evaluateVoiceTelephonyWebhookNormalizationEvidence, parseVoiceTelephonyWebhookEvent, resolveVoiceTelephonyOutcome, signVoiceTwilioWebhook, verifyVoiceTwilioWebhookSignature, voiceTelephonyOutcomeToRouteResult, } from "./telephonyOutcome";
|
|
87
89
|
export { assertVoicePhoneCallControlEvidence, assertVoicePhoneAssistantEvidence, createVoicePhoneAgent, evaluateVoicePhoneCallControlEvidence, evaluateVoicePhoneAssistantEvidence, } from "./phoneAgent";
|
|
88
|
-
export { createStoredVoiceCallReviewArtifact, createStoredVoiceExternalObjectMap, createStoredVoiceIntegrationEvent, createStoredVoiceOpsTask, createVoiceFileIncidentBundleStore, createVoiceFileExternalObjectMapStore, createVoiceFileAssistantMemoryStore, createVoiceFileAuditEventStore, createVoiceFileAuditSinkDeliveryStore, createVoiceFileCampaignStore, createVoiceFileIntegrationEventStore, createVoiceFileReviewStore, createVoiceFileRuntimeStorage, createVoiceFileSessionStore, createVoiceFileTaskStore, createVoiceFileTraceSinkDeliveryStore, createVoiceFileTraceEventStore, } from "./fileStore";
|
|
90
|
+
export { createStoredVoiceCallReviewArtifact, createStoredVoiceExternalObjectMap, createStoredVoiceIntegrationEvent, createStoredVoiceOpsTask, createVoiceFileIncidentBundleStore, createVoiceFileExternalObjectMapStore, createVoiceFileAssistantMemoryStore, createVoiceFileAuditEventStore, createVoiceFileAuditSinkDeliveryStore, createVoiceFileCampaignStore, createVoiceFileIntegrationEventStore, createVoiceFileRecordingStore, createVoiceFileReviewStore, createVoiceFileRuntimeStorage, createVoiceFileSessionStore, createVoiceFileTaskStore, createVoiceFileTraceSinkDeliveryStore, createVoiceFileTraceEventStore, } from "./fileStore";
|
|
91
|
+
export { computePcmDurationMs, createVoiceMemoryRecordingStore, encodePcmAsWav, } from "./recordingStore";
|
|
92
|
+
export type { StoredVoiceRecordingArtifact, VoiceRecordingArtifact, VoiceRecordingChannel, VoiceRecordingStore, } from "./recordingStore";
|
|
89
93
|
export { createVoiceAssistantMemoryHandle, createVoiceAssistantMemoryRecord, createVoiceMemoryAssistantMemoryStore, resolveVoiceAssistantMemoryNamespace, } from "./assistantMemory";
|
|
90
94
|
export { createAnthropicVoiceAssistantModel, createGeminiVoiceAssistantModel, createJSONVoiceAssistantModel, createOpenAIVoiceAssistantModel, createVoiceProviderOrchestrationProfile, resolveVoiceProviderRoutingPolicyPreset, createVoiceProviderRouter, } from "./modelAdapters";
|
|
91
95
|
export { createOpenAIVoiceTTS } from "./openaiTTS";
|
package/dist/index.js
CHANGED
|
@@ -3369,6 +3369,77 @@ var buildTurnText = (transcripts, partialText, options = {}) => {
|
|
|
3369
3369
|
return selectPreferredTranscriptText(finalText, nextPartial);
|
|
3370
3370
|
};
|
|
3371
3371
|
|
|
3372
|
+
// src/types.ts
|
|
3373
|
+
var ttsAdapterSessionCanCancel = (session) => typeof session.cancel === "function";
|
|
3374
|
+
|
|
3375
|
+
// src/recordingStore.ts
|
|
3376
|
+
var writeUint32LE = (view, offset, value) => {
|
|
3377
|
+
view.setUint32(offset, value, true);
|
|
3378
|
+
};
|
|
3379
|
+
var writeUint16LE = (view, offset, value) => {
|
|
3380
|
+
view.setUint16(offset, value, true);
|
|
3381
|
+
};
|
|
3382
|
+
var writeAscii = (view, offset, value) => {
|
|
3383
|
+
for (let index = 0;index < value.length; index += 1) {
|
|
3384
|
+
view.setUint8(offset + index, value.charCodeAt(index));
|
|
3385
|
+
}
|
|
3386
|
+
};
|
|
3387
|
+
var encodePcmAsWav = (pcm, format) => {
|
|
3388
|
+
if (format.container !== "raw" || format.encoding !== "pcm_s16le") {
|
|
3389
|
+
throw new Error(`encodePcmAsWav only supports raw pcm_s16le input (got container=${format.container}, encoding=${format.encoding})`);
|
|
3390
|
+
}
|
|
3391
|
+
const channels = format.channels;
|
|
3392
|
+
const sampleRate = format.sampleRateHz;
|
|
3393
|
+
const bitsPerSample = 16;
|
|
3394
|
+
const byteRate = sampleRate * channels * bitsPerSample / 8;
|
|
3395
|
+
const blockAlign = channels * bitsPerSample / 8;
|
|
3396
|
+
const dataSize = pcm.byteLength;
|
|
3397
|
+
const buffer = new ArrayBuffer(44 + dataSize);
|
|
3398
|
+
const view = new DataView(buffer);
|
|
3399
|
+
writeAscii(view, 0, "RIFF");
|
|
3400
|
+
writeUint32LE(view, 4, 36 + dataSize);
|
|
3401
|
+
writeAscii(view, 8, "WAVE");
|
|
3402
|
+
writeAscii(view, 12, "fmt ");
|
|
3403
|
+
writeUint32LE(view, 16, 16);
|
|
3404
|
+
writeUint16LE(view, 20, 1);
|
|
3405
|
+
writeUint16LE(view, 22, channels);
|
|
3406
|
+
writeUint32LE(view, 24, sampleRate);
|
|
3407
|
+
writeUint32LE(view, 28, byteRate);
|
|
3408
|
+
writeUint16LE(view, 32, blockAlign);
|
|
3409
|
+
writeUint16LE(view, 34, bitsPerSample);
|
|
3410
|
+
writeAscii(view, 36, "data");
|
|
3411
|
+
writeUint32LE(view, 40, dataSize);
|
|
3412
|
+
const output = new Uint8Array(buffer);
|
|
3413
|
+
output.set(pcm, 44);
|
|
3414
|
+
return output;
|
|
3415
|
+
};
|
|
3416
|
+
var computePcmDurationMs = (pcmByteLength, format) => {
|
|
3417
|
+
if (format.container !== "raw" || format.encoding !== "pcm_s16le") {
|
|
3418
|
+
return 0;
|
|
3419
|
+
}
|
|
3420
|
+
const bytesPerSecond = format.sampleRateHz * format.channels * 2;
|
|
3421
|
+
if (bytesPerSecond === 0) {
|
|
3422
|
+
return 0;
|
|
3423
|
+
}
|
|
3424
|
+
return Math.round(pcmByteLength / bytesPerSecond * 1000);
|
|
3425
|
+
};
|
|
3426
|
+
var createVoiceMemoryRecordingStore = () => {
|
|
3427
|
+
const records = new Map;
|
|
3428
|
+
const key = (sessionId, channel) => `${sessionId}::${channel}`;
|
|
3429
|
+
return {
|
|
3430
|
+
get: async (sessionId, channel) => records.get(key(sessionId, channel)),
|
|
3431
|
+
list: async (sessionId) => Array.from(records.values()).filter((record) => record.sessionId === sessionId),
|
|
3432
|
+
put: async (artifact) => {
|
|
3433
|
+
const stored = {
|
|
3434
|
+
...artifact,
|
|
3435
|
+
recordingUrl: `memory://recording/${artifact.sessionId}/${artifact.channel}.wav`
|
|
3436
|
+
};
|
|
3437
|
+
records.set(key(artifact.sessionId, artifact.channel), stored);
|
|
3438
|
+
return stored;
|
|
3439
|
+
}
|
|
3440
|
+
};
|
|
3441
|
+
};
|
|
3442
|
+
|
|
3372
3443
|
// src/session.ts
|
|
3373
3444
|
var DEFAULT_RECONNECT_TIMEOUT = 30000;
|
|
3374
3445
|
var DEFAULT_MAX_RECONNECT_ATTEMPTS = 10;
|
|
@@ -3611,6 +3682,39 @@ var createVoiceSession = (options) => {
|
|
|
3611
3682
|
const currentTurnAudio = [];
|
|
3612
3683
|
let fallbackAttemptsForCurrentTurn = 0;
|
|
3613
3684
|
let fallbackReplayAudioMsForCurrentTurn = 0;
|
|
3685
|
+
const recordingConfig = options.recording;
|
|
3686
|
+
const recordingChannels = new Set(recordingConfig?.channels ?? ["assistant", "user"]);
|
|
3687
|
+
const recordingMaxBytes = recordingConfig?.maxBytesPerChannel ?? 50 * 1024 * 1024;
|
|
3688
|
+
const recordingBuffers = {
|
|
3689
|
+
assistant: [],
|
|
3690
|
+
user: []
|
|
3691
|
+
};
|
|
3692
|
+
const recordingByteTotals = {
|
|
3693
|
+
assistant: 0,
|
|
3694
|
+
user: 0
|
|
3695
|
+
};
|
|
3696
|
+
const recordingFormats = {};
|
|
3697
|
+
let recordingPersisted = false;
|
|
3698
|
+
const captureRecordingChunk = (channel, bytes, format) => {
|
|
3699
|
+
if (!recordingConfig || recordingPersisted) {
|
|
3700
|
+
return;
|
|
3701
|
+
}
|
|
3702
|
+
if (!recordingChannels.has(channel)) {
|
|
3703
|
+
return;
|
|
3704
|
+
}
|
|
3705
|
+
if (format.container !== "raw" || format.encoding !== "pcm_s16le") {
|
|
3706
|
+
return;
|
|
3707
|
+
}
|
|
3708
|
+
const currentTotal = recordingByteTotals[channel];
|
|
3709
|
+
if (currentTotal >= recordingMaxBytes) {
|
|
3710
|
+
return;
|
|
3711
|
+
}
|
|
3712
|
+
const remaining = recordingMaxBytes - currentTotal;
|
|
3713
|
+
const slice = bytes.byteLength <= remaining ? bytes : bytes.subarray(0, remaining);
|
|
3714
|
+
recordingBuffers[channel].push(new Uint8Array(slice));
|
|
3715
|
+
recordingByteTotals[channel] += slice.byteLength;
|
|
3716
|
+
recordingFormats[channel] = format;
|
|
3717
|
+
};
|
|
3614
3718
|
const pruneTurnAudio = () => {
|
|
3615
3719
|
const replayWindowMs = sttFallback?.replayWindowMs ?? DEFAULT_FALLBACK_REPLAY_MS;
|
|
3616
3720
|
const cutoffAt = Date.now() - replayWindowMs;
|
|
@@ -3789,8 +3893,83 @@ var createVoiceSession = (options) => {
|
|
|
3789
3893
|
});
|
|
3790
3894
|
}
|
|
3791
3895
|
};
|
|
3896
|
+
const persistRecordings = async () => {
|
|
3897
|
+
if (!recordingConfig || recordingPersisted) {
|
|
3898
|
+
return;
|
|
3899
|
+
}
|
|
3900
|
+
recordingPersisted = true;
|
|
3901
|
+
const channels = ["assistant", "user"];
|
|
3902
|
+
for (const channel of channels) {
|
|
3903
|
+
if (!recordingChannels.has(channel)) {
|
|
3904
|
+
continue;
|
|
3905
|
+
}
|
|
3906
|
+
const chunks = recordingBuffers[channel];
|
|
3907
|
+
const format = recordingFormats[channel];
|
|
3908
|
+
if (chunks.length === 0 || !format) {
|
|
3909
|
+
continue;
|
|
3910
|
+
}
|
|
3911
|
+
const totalBytes = recordingByteTotals[channel];
|
|
3912
|
+
const merged = new Uint8Array(totalBytes);
|
|
3913
|
+
let offset = 0;
|
|
3914
|
+
for (const chunk of chunks) {
|
|
3915
|
+
merged.set(chunk, offset);
|
|
3916
|
+
offset += chunk.byteLength;
|
|
3917
|
+
}
|
|
3918
|
+
try {
|
|
3919
|
+
const stored = await recordingConfig.store.put({
|
|
3920
|
+
audioBytes: merged,
|
|
3921
|
+
capturedAt: Date.now(),
|
|
3922
|
+
channel,
|
|
3923
|
+
durationMs: computePcmDurationMs(totalBytes, format),
|
|
3924
|
+
format,
|
|
3925
|
+
sessionId: options.id
|
|
3926
|
+
});
|
|
3927
|
+
await appendTrace({
|
|
3928
|
+
payload: {
|
|
3929
|
+
channel,
|
|
3930
|
+
durationMs: stored.durationMs,
|
|
3931
|
+
recordingUrl: stored.recordingUrl,
|
|
3932
|
+
sessionId: options.id,
|
|
3933
|
+
sizeBytes: merged.byteLength
|
|
3934
|
+
},
|
|
3935
|
+
type: "recording.ready"
|
|
3936
|
+
});
|
|
3937
|
+
} catch (error) {
|
|
3938
|
+
logger.warn("voice recording persist failed", {
|
|
3939
|
+
channel,
|
|
3940
|
+
error: toError(error).message,
|
|
3941
|
+
sessionId: options.id
|
|
3942
|
+
});
|
|
3943
|
+
} finally {
|
|
3944
|
+
recordingBuffers[channel] = [];
|
|
3945
|
+
recordingByteTotals[channel] = 0;
|
|
3946
|
+
}
|
|
3947
|
+
}
|
|
3948
|
+
};
|
|
3949
|
+
const cancelActiveTTS = async (reason) => {
|
|
3950
|
+
const activeSession = ttsSession;
|
|
3951
|
+
const cancelledTurnId = activeTTSTurnId;
|
|
3952
|
+
if (!activeSession || cancelledTurnId === undefined) {
|
|
3953
|
+
return;
|
|
3954
|
+
}
|
|
3955
|
+
activeTTSTurnId = undefined;
|
|
3956
|
+
if (!ttsAdapterSessionCanCancel(activeSession)) {
|
|
3957
|
+
return;
|
|
3958
|
+
}
|
|
3959
|
+
try {
|
|
3960
|
+
await activeSession.cancel(reason);
|
|
3961
|
+
} catch (error) {
|
|
3962
|
+
logger.warn("voice tts adapter cancel failed", {
|
|
3963
|
+
error: toError(error).message,
|
|
3964
|
+
reason,
|
|
3965
|
+
sessionId: options.id,
|
|
3966
|
+
turnId: cancelledTurnId
|
|
3967
|
+
});
|
|
3968
|
+
}
|
|
3969
|
+
};
|
|
3792
3970
|
const sendAssistantAudio = async (chunk, input) => {
|
|
3793
3971
|
const normalizedChunk = chunk instanceof Uint8Array ? new Uint8Array(chunk) : chunk instanceof ArrayBuffer ? new Uint8Array(chunk.slice(0)) : new Uint8Array(chunk.buffer.slice(chunk.byteOffset, chunk.byteOffset + chunk.byteLength));
|
|
3972
|
+
captureRecordingChunk("assistant", normalizedChunk, input.format);
|
|
3794
3973
|
await send({
|
|
3795
3974
|
chunkBase64: encodeBase64(normalizedChunk),
|
|
3796
3975
|
format: input.format,
|
|
@@ -3887,6 +4066,7 @@ var createVoiceSession = (options) => {
|
|
|
3887
4066
|
});
|
|
3888
4067
|
await closeTTSSession("failed");
|
|
3889
4068
|
await closeAdapter("failed");
|
|
4069
|
+
await persistRecordings();
|
|
3890
4070
|
speechDetected = false;
|
|
3891
4071
|
rewindFallbackTurnAudio();
|
|
3892
4072
|
await options.route.onError?.({
|
|
@@ -3955,6 +4135,7 @@ var createVoiceSession = (options) => {
|
|
|
3955
4135
|
});
|
|
3956
4136
|
await closeTTSSession("complete");
|
|
3957
4137
|
await closeAdapter("complete");
|
|
4138
|
+
await persistRecordings();
|
|
3958
4139
|
speechDetected = false;
|
|
3959
4140
|
rewindFallbackTurnAudio();
|
|
3960
4141
|
if (disposition === "transferred" && input.target) {
|
|
@@ -5096,7 +5277,14 @@ var createVoiceSession = (options) => {
|
|
|
5096
5277
|
if (shouldStoreAudio) {
|
|
5097
5278
|
pushTurnAudio(conditionedAudio);
|
|
5098
5279
|
}
|
|
5280
|
+
if (recordingConfig?.userInputFormat) {
|
|
5281
|
+
const userBytes = conditionedAudio instanceof Uint8Array ? conditionedAudio : conditionedAudio instanceof ArrayBuffer ? new Uint8Array(conditionedAudio) : new Uint8Array(conditionedAudio.buffer, conditionedAudio.byteOffset, conditionedAudio.byteLength);
|
|
5282
|
+
captureRecordingChunk("user", userBytes, recordingConfig.userInputFormat);
|
|
5283
|
+
}
|
|
5099
5284
|
if (audioLevel >= turnDetection.speechThreshold) {
|
|
5285
|
+
if (!speechDetected && activeTTSTurnId !== undefined) {
|
|
5286
|
+
cancelActiveTTS("barge-in");
|
|
5287
|
+
}
|
|
5100
5288
|
speechDetected = true;
|
|
5101
5289
|
clearSilenceTimer();
|
|
5102
5290
|
} else if (speechDetected) {
|
|
@@ -5129,6 +5317,7 @@ var createVoiceSession = (options) => {
|
|
|
5129
5317
|
clearSilenceTimer();
|
|
5130
5318
|
await closeTTSSession(reason);
|
|
5131
5319
|
await closeAdapter(reason);
|
|
5320
|
+
await persistRecordings();
|
|
5132
5321
|
await Promise.resolve(socket.close(1000, reason));
|
|
5133
5322
|
if (session.call?.endedAt && session.call.disposition === "closed") {
|
|
5134
5323
|
await appendTrace({
|
|
@@ -34559,6 +34748,76 @@ var createVoiceWorkflowContractHandler = (input) => {
|
|
|
34559
34748
|
return result;
|
|
34560
34749
|
};
|
|
34561
34750
|
};
|
|
34751
|
+
// src/aiVoiceModel.ts
|
|
34752
|
+
var toProviderMessages = (messages) => {
|
|
34753
|
+
const out = [];
|
|
34754
|
+
for (const message of messages) {
|
|
34755
|
+
if (message.role === "tool") {
|
|
34756
|
+
out.push({
|
|
34757
|
+
content: [
|
|
34758
|
+
{
|
|
34759
|
+
content: message.content,
|
|
34760
|
+
tool_use_id: message.toolCallId ?? message.name ?? "",
|
|
34761
|
+
type: "tool_result"
|
|
34762
|
+
}
|
|
34763
|
+
],
|
|
34764
|
+
role: "user"
|
|
34765
|
+
});
|
|
34766
|
+
continue;
|
|
34767
|
+
}
|
|
34768
|
+
if (message.role === "system") {
|
|
34769
|
+
out.push({ content: message.content, role: "user" });
|
|
34770
|
+
continue;
|
|
34771
|
+
}
|
|
34772
|
+
out.push({ content: message.content, role: message.role });
|
|
34773
|
+
}
|
|
34774
|
+
return out;
|
|
34775
|
+
};
|
|
34776
|
+
var toProviderTools = (tools) => {
|
|
34777
|
+
if (tools.length === 0) {
|
|
34778
|
+
return;
|
|
34779
|
+
}
|
|
34780
|
+
return tools.map((tool) => ({
|
|
34781
|
+
description: tool.description ?? "",
|
|
34782
|
+
input_schema: tool.parameters ?? {
|
|
34783
|
+
properties: {},
|
|
34784
|
+
type: "object"
|
|
34785
|
+
},
|
|
34786
|
+
name: tool.name
|
|
34787
|
+
}));
|
|
34788
|
+
};
|
|
34789
|
+
var createAIVoiceModel = (options) => ({
|
|
34790
|
+
generate: async (input) => {
|
|
34791
|
+
const systemPrompt = input.system ?? options.systemPrompt;
|
|
34792
|
+
const stream = options.provider.stream({
|
|
34793
|
+
messages: toProviderMessages(input.messages),
|
|
34794
|
+
model: options.model,
|
|
34795
|
+
signal: options.signal,
|
|
34796
|
+
systemPrompt,
|
|
34797
|
+
tools: toProviderTools(input.tools)
|
|
34798
|
+
});
|
|
34799
|
+
let assistantText = "";
|
|
34800
|
+
const toolCalls = [];
|
|
34801
|
+
for await (const chunk of stream) {
|
|
34802
|
+
if (chunk.type === "text") {
|
|
34803
|
+
assistantText += chunk.content;
|
|
34804
|
+
} else if (chunk.type === "tool_use") {
|
|
34805
|
+
toolCalls.push({
|
|
34806
|
+
args: chunk.input ?? {},
|
|
34807
|
+
id: chunk.id,
|
|
34808
|
+
name: chunk.name
|
|
34809
|
+
});
|
|
34810
|
+
}
|
|
34811
|
+
}
|
|
34812
|
+
const output = {
|
|
34813
|
+
assistantText
|
|
34814
|
+
};
|
|
34815
|
+
if (toolCalls.length > 0) {
|
|
34816
|
+
output.toolCalls = toolCalls;
|
|
34817
|
+
}
|
|
34818
|
+
return output;
|
|
34819
|
+
}
|
|
34820
|
+
});
|
|
34562
34821
|
// src/ragTool.ts
|
|
34563
34822
|
var DEFAULT_TOOL_NAME = "searchKnowledgeBase";
|
|
34564
34823
|
var DEFAULT_DESCRIPTION = "Search the knowledge base and return short grounded citations. Use this whenever the caller asks a question that may be answered by indexed reference material.";
|
|
@@ -37004,6 +37263,66 @@ var createStoredVoiceExternalObjectMap = (mapping) => createVoiceExternalObjectM
|
|
|
37004
37263
|
sourceId: mapping.sourceId,
|
|
37005
37264
|
sourceType: mapping.sourceType
|
|
37006
37265
|
});
|
|
37266
|
+
var recordingFileName = (sessionId, channel) => `${encodeURIComponent(sessionId)}_${channel}.wav`;
|
|
37267
|
+
var recordingMetadataFileName = (sessionId, channel) => `${encodeURIComponent(sessionId)}_${channel}.json`;
|
|
37268
|
+
var createVoiceFileRecordingStore = (options) => {
|
|
37269
|
+
const ensureDir = async () => {
|
|
37270
|
+
await mkdir4(options.directory, { recursive: true });
|
|
37271
|
+
};
|
|
37272
|
+
const put = async (artifact) => {
|
|
37273
|
+
await ensureDir();
|
|
37274
|
+
const wavPath = join3(options.directory, recordingFileName(artifact.sessionId, artifact.channel));
|
|
37275
|
+
const metadataPath = join3(options.directory, recordingMetadataFileName(artifact.sessionId, artifact.channel));
|
|
37276
|
+
const wav = encodePcmAsWav(artifact.audioBytes, artifact.format);
|
|
37277
|
+
await writeFile(wavPath, wav);
|
|
37278
|
+
const recordingUrl = `file://${wavPath}`;
|
|
37279
|
+
const metadata = {
|
|
37280
|
+
capturedAt: artifact.capturedAt,
|
|
37281
|
+
channel: artifact.channel,
|
|
37282
|
+
durationMs: artifact.durationMs,
|
|
37283
|
+
format: artifact.format,
|
|
37284
|
+
recordingUrl,
|
|
37285
|
+
sessionId: artifact.sessionId
|
|
37286
|
+
};
|
|
37287
|
+
await writeFile(metadataPath, options.pretty ? JSON.stringify(metadata, null, 2) : JSON.stringify(metadata));
|
|
37288
|
+
return {
|
|
37289
|
+
...artifact,
|
|
37290
|
+
recordingUrl
|
|
37291
|
+
};
|
|
37292
|
+
};
|
|
37293
|
+
const readMetadata = async (sessionId, channel) => {
|
|
37294
|
+
const metadataPath = join3(options.directory, recordingMetadataFileName(sessionId, channel));
|
|
37295
|
+
const wavPath = join3(options.directory, recordingFileName(sessionId, channel));
|
|
37296
|
+
try {
|
|
37297
|
+
const [metaText, wavBytes] = await Promise.all([
|
|
37298
|
+
readFile2(metadataPath, "utf8"),
|
|
37299
|
+
readFile2(wavPath)
|
|
37300
|
+
]);
|
|
37301
|
+
const meta = JSON.parse(metaText);
|
|
37302
|
+
return {
|
|
37303
|
+
audioBytes: new Uint8Array(wavBytes.buffer, wavBytes.byteOffset, wavBytes.byteLength),
|
|
37304
|
+
capturedAt: meta.capturedAt,
|
|
37305
|
+
channel: meta.channel,
|
|
37306
|
+
durationMs: meta.durationMs,
|
|
37307
|
+
format: meta.format,
|
|
37308
|
+
recordingUrl: meta.recordingUrl,
|
|
37309
|
+
sessionId: meta.sessionId
|
|
37310
|
+
};
|
|
37311
|
+
} catch (error) {
|
|
37312
|
+
if (error.code === "ENOENT") {
|
|
37313
|
+
return;
|
|
37314
|
+
}
|
|
37315
|
+
throw error;
|
|
37316
|
+
}
|
|
37317
|
+
};
|
|
37318
|
+
const get = (sessionId, channel) => readMetadata(sessionId, channel);
|
|
37319
|
+
const list = async (sessionId) => {
|
|
37320
|
+
const channels = ["assistant", "user"];
|
|
37321
|
+
const records = await Promise.all(channels.map((channel) => readMetadata(sessionId, channel)));
|
|
37322
|
+
return records.filter((record) => record !== undefined);
|
|
37323
|
+
};
|
|
37324
|
+
return { get, list, put };
|
|
37325
|
+
};
|
|
37007
37326
|
// src/modelAdapters.ts
|
|
37008
37327
|
var isVoiceProviderRoutingPolicyPreset = (value) => value === "balanced" || value === "cost-cap" || value === "cost-first" || value === "latency-first" || value === "quality-first";
|
|
37009
37328
|
var resolveVoiceProviderRoutingPolicyPreset = (preset, options = {}) => {
|
|
@@ -44991,6 +45310,7 @@ export {
|
|
|
44991
45310
|
verifyVoiceOpsWebhookSignature,
|
|
44992
45311
|
validateVoiceWorkflowRouteResult,
|
|
44993
45312
|
validateVoiceObservabilityExportRecord,
|
|
45313
|
+
ttsAdapterSessionCanCancel,
|
|
44994
45314
|
transcodeTwilioInboundPayloadToPCM16,
|
|
44995
45315
|
transcodePCMToTwilioOutboundPayload,
|
|
44996
45316
|
summarizeVoiceTurnQuality,
|
|
@@ -45263,6 +45583,7 @@ export {
|
|
|
45263
45583
|
evaluateVoiceBrowserCallProfileEvidence,
|
|
45264
45584
|
evaluateVoiceAgentSquadContractEvidence,
|
|
45265
45585
|
encodeTwilioMulawBase64,
|
|
45586
|
+
encodePcmAsWav,
|
|
45266
45587
|
deliverVoiceTraceEventsToSinks,
|
|
45267
45588
|
deliverVoiceObservabilityExport,
|
|
45268
45589
|
deliverVoiceMonitorIssueNotifications,
|
|
@@ -45476,6 +45797,7 @@ export {
|
|
|
45476
45797
|
createVoiceMemoryTraceSinkDeliveryStore,
|
|
45477
45798
|
createVoiceMemoryTraceEventStore,
|
|
45478
45799
|
createVoiceMemoryStore,
|
|
45800
|
+
createVoiceMemoryRecordingStore,
|
|
45479
45801
|
createVoiceMemoryObservabilityExportDeliveryReceiptStore,
|
|
45480
45802
|
createVoiceMemoryMonitorNotifierDeliveryReceiptStore,
|
|
45481
45803
|
createVoiceMemoryMonitorIssueStore,
|
|
@@ -45522,6 +45844,7 @@ export {
|
|
|
45522
45844
|
createVoiceFileScenarioFixtureStore,
|
|
45523
45845
|
createVoiceFileRuntimeStorage,
|
|
45524
45846
|
createVoiceFileReviewStore,
|
|
45847
|
+
createVoiceFileRecordingStore,
|
|
45525
45848
|
createVoiceFileObservabilityExportDeliveryReceiptStore,
|
|
45526
45849
|
createVoiceFileIntegrationEventStore,
|
|
45527
45850
|
createVoiceFileIncidentBundleStore,
|
|
@@ -45614,7 +45937,9 @@ export {
|
|
|
45614
45937
|
createDomainPhraseHints,
|
|
45615
45938
|
createDomainLexicon,
|
|
45616
45939
|
createAnthropicVoiceAssistantModel,
|
|
45940
|
+
createAIVoiceModel,
|
|
45617
45941
|
conditionAudioChunk,
|
|
45942
|
+
computePcmDurationMs,
|
|
45618
45943
|
completeVoiceOpsTask,
|
|
45619
45944
|
compareVoiceEvalBaseline,
|
|
45620
45945
|
claimVoiceOpsTask,
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
import type { AudioFormat } from "./types";
|
|
2
|
+
export type VoiceRecordingChannel = "assistant" | "user";
|
|
3
|
+
export type VoiceRecordingArtifact = {
|
|
4
|
+
audioBytes: Uint8Array;
|
|
5
|
+
capturedAt: number;
|
|
6
|
+
channel: VoiceRecordingChannel;
|
|
7
|
+
durationMs: number;
|
|
8
|
+
format: AudioFormat;
|
|
9
|
+
sessionId: string;
|
|
10
|
+
};
|
|
11
|
+
export type StoredVoiceRecordingArtifact = VoiceRecordingArtifact & {
|
|
12
|
+
recordingUrl?: string;
|
|
13
|
+
};
|
|
14
|
+
export type VoiceRecordingStore = {
|
|
15
|
+
get: (sessionId: string, channel: VoiceRecordingChannel) => Promise<StoredVoiceRecordingArtifact | undefined>;
|
|
16
|
+
list: (sessionId: string) => Promise<StoredVoiceRecordingArtifact[]>;
|
|
17
|
+
put: (artifact: VoiceRecordingArtifact) => Promise<StoredVoiceRecordingArtifact>;
|
|
18
|
+
};
|
|
19
|
+
export declare const encodePcmAsWav: (pcm: Uint8Array, format: AudioFormat) => Uint8Array;
|
|
20
|
+
export declare const computePcmDurationMs: (pcmByteLength: number, format: AudioFormat) => number;
|
|
21
|
+
export declare const createVoiceMemoryRecordingStore: () => VoiceRecordingStore;
|
package/dist/testing/index.js
CHANGED
|
@@ -5337,6 +5337,77 @@ var resolveLogger = (logger) => ({
|
|
|
5337
5337
|
...logger
|
|
5338
5338
|
});
|
|
5339
5339
|
|
|
5340
|
+
// src/types.ts
|
|
5341
|
+
var ttsAdapterSessionCanCancel = (session) => typeof session.cancel === "function";
|
|
5342
|
+
|
|
5343
|
+
// src/recordingStore.ts
|
|
5344
|
+
var writeUint32LE = (view, offset, value) => {
|
|
5345
|
+
view.setUint32(offset, value, true);
|
|
5346
|
+
};
|
|
5347
|
+
var writeUint16LE = (view, offset, value) => {
|
|
5348
|
+
view.setUint16(offset, value, true);
|
|
5349
|
+
};
|
|
5350
|
+
var writeAscii = (view, offset, value) => {
|
|
5351
|
+
for (let index = 0;index < value.length; index += 1) {
|
|
5352
|
+
view.setUint8(offset + index, value.charCodeAt(index));
|
|
5353
|
+
}
|
|
5354
|
+
};
|
|
5355
|
+
var encodePcmAsWav = (pcm, format) => {
|
|
5356
|
+
if (format.container !== "raw" || format.encoding !== "pcm_s16le") {
|
|
5357
|
+
throw new Error(`encodePcmAsWav only supports raw pcm_s16le input (got container=${format.container}, encoding=${format.encoding})`);
|
|
5358
|
+
}
|
|
5359
|
+
const channels = format.channels;
|
|
5360
|
+
const sampleRate = format.sampleRateHz;
|
|
5361
|
+
const bitsPerSample = 16;
|
|
5362
|
+
const byteRate = sampleRate * channels * bitsPerSample / 8;
|
|
5363
|
+
const blockAlign = channels * bitsPerSample / 8;
|
|
5364
|
+
const dataSize = pcm.byteLength;
|
|
5365
|
+
const buffer = new ArrayBuffer(44 + dataSize);
|
|
5366
|
+
const view = new DataView(buffer);
|
|
5367
|
+
writeAscii(view, 0, "RIFF");
|
|
5368
|
+
writeUint32LE(view, 4, 36 + dataSize);
|
|
5369
|
+
writeAscii(view, 8, "WAVE");
|
|
5370
|
+
writeAscii(view, 12, "fmt ");
|
|
5371
|
+
writeUint32LE(view, 16, 16);
|
|
5372
|
+
writeUint16LE(view, 20, 1);
|
|
5373
|
+
writeUint16LE(view, 22, channels);
|
|
5374
|
+
writeUint32LE(view, 24, sampleRate);
|
|
5375
|
+
writeUint32LE(view, 28, byteRate);
|
|
5376
|
+
writeUint16LE(view, 32, blockAlign);
|
|
5377
|
+
writeUint16LE(view, 34, bitsPerSample);
|
|
5378
|
+
writeAscii(view, 36, "data");
|
|
5379
|
+
writeUint32LE(view, 40, dataSize);
|
|
5380
|
+
const output = new Uint8Array(buffer);
|
|
5381
|
+
output.set(pcm, 44);
|
|
5382
|
+
return output;
|
|
5383
|
+
};
|
|
5384
|
+
var computePcmDurationMs = (pcmByteLength, format) => {
|
|
5385
|
+
if (format.container !== "raw" || format.encoding !== "pcm_s16le") {
|
|
5386
|
+
return 0;
|
|
5387
|
+
}
|
|
5388
|
+
const bytesPerSecond = format.sampleRateHz * format.channels * 2;
|
|
5389
|
+
if (bytesPerSecond === 0) {
|
|
5390
|
+
return 0;
|
|
5391
|
+
}
|
|
5392
|
+
return Math.round(pcmByteLength / bytesPerSecond * 1000);
|
|
5393
|
+
};
|
|
5394
|
+
var createVoiceMemoryRecordingStore = () => {
|
|
5395
|
+
const records = new Map;
|
|
5396
|
+
const key = (sessionId, channel) => `${sessionId}::${channel}`;
|
|
5397
|
+
return {
|
|
5398
|
+
get: async (sessionId, channel) => records.get(key(sessionId, channel)),
|
|
5399
|
+
list: async (sessionId) => Array.from(records.values()).filter((record) => record.sessionId === sessionId),
|
|
5400
|
+
put: async (artifact) => {
|
|
5401
|
+
const stored = {
|
|
5402
|
+
...artifact,
|
|
5403
|
+
recordingUrl: `memory://recording/${artifact.sessionId}/${artifact.channel}.wav`
|
|
5404
|
+
};
|
|
5405
|
+
records.set(key(artifact.sessionId, artifact.channel), stored);
|
|
5406
|
+
return stored;
|
|
5407
|
+
}
|
|
5408
|
+
};
|
|
5409
|
+
};
|
|
5410
|
+
|
|
5340
5411
|
// src/session.ts
|
|
5341
5412
|
var DEFAULT_RECONNECT_TIMEOUT = 30000;
|
|
5342
5413
|
var DEFAULT_MAX_RECONNECT_ATTEMPTS2 = 10;
|
|
@@ -5579,6 +5650,39 @@ var createVoiceSession = (options) => {
|
|
|
5579
5650
|
const currentTurnAudio = [];
|
|
5580
5651
|
let fallbackAttemptsForCurrentTurn = 0;
|
|
5581
5652
|
let fallbackReplayAudioMsForCurrentTurn = 0;
|
|
5653
|
+
const recordingConfig = options.recording;
|
|
5654
|
+
const recordingChannels = new Set(recordingConfig?.channels ?? ["assistant", "user"]);
|
|
5655
|
+
const recordingMaxBytes = recordingConfig?.maxBytesPerChannel ?? 50 * 1024 * 1024;
|
|
5656
|
+
const recordingBuffers = {
|
|
5657
|
+
assistant: [],
|
|
5658
|
+
user: []
|
|
5659
|
+
};
|
|
5660
|
+
const recordingByteTotals = {
|
|
5661
|
+
assistant: 0,
|
|
5662
|
+
user: 0
|
|
5663
|
+
};
|
|
5664
|
+
const recordingFormats = {};
|
|
5665
|
+
let recordingPersisted = false;
|
|
5666
|
+
const captureRecordingChunk = (channel, bytes, format) => {
|
|
5667
|
+
if (!recordingConfig || recordingPersisted) {
|
|
5668
|
+
return;
|
|
5669
|
+
}
|
|
5670
|
+
if (!recordingChannels.has(channel)) {
|
|
5671
|
+
return;
|
|
5672
|
+
}
|
|
5673
|
+
if (format.container !== "raw" || format.encoding !== "pcm_s16le") {
|
|
5674
|
+
return;
|
|
5675
|
+
}
|
|
5676
|
+
const currentTotal = recordingByteTotals[channel];
|
|
5677
|
+
if (currentTotal >= recordingMaxBytes) {
|
|
5678
|
+
return;
|
|
5679
|
+
}
|
|
5680
|
+
const remaining = recordingMaxBytes - currentTotal;
|
|
5681
|
+
const slice = bytes.byteLength <= remaining ? bytes : bytes.subarray(0, remaining);
|
|
5682
|
+
recordingBuffers[channel].push(new Uint8Array(slice));
|
|
5683
|
+
recordingByteTotals[channel] += slice.byteLength;
|
|
5684
|
+
recordingFormats[channel] = format;
|
|
5685
|
+
};
|
|
5582
5686
|
const pruneTurnAudio = () => {
|
|
5583
5687
|
const replayWindowMs = sttFallback?.replayWindowMs ?? DEFAULT_FALLBACK_REPLAY_MS;
|
|
5584
5688
|
const cutoffAt = Date.now() - replayWindowMs;
|
|
@@ -5757,8 +5861,83 @@ var createVoiceSession = (options) => {
|
|
|
5757
5861
|
});
|
|
5758
5862
|
}
|
|
5759
5863
|
};
|
|
5864
|
+
const persistRecordings = async () => {
|
|
5865
|
+
if (!recordingConfig || recordingPersisted) {
|
|
5866
|
+
return;
|
|
5867
|
+
}
|
|
5868
|
+
recordingPersisted = true;
|
|
5869
|
+
const channels = ["assistant", "user"];
|
|
5870
|
+
for (const channel of channels) {
|
|
5871
|
+
if (!recordingChannels.has(channel)) {
|
|
5872
|
+
continue;
|
|
5873
|
+
}
|
|
5874
|
+
const chunks = recordingBuffers[channel];
|
|
5875
|
+
const format = recordingFormats[channel];
|
|
5876
|
+
if (chunks.length === 0 || !format) {
|
|
5877
|
+
continue;
|
|
5878
|
+
}
|
|
5879
|
+
const totalBytes = recordingByteTotals[channel];
|
|
5880
|
+
const merged = new Uint8Array(totalBytes);
|
|
5881
|
+
let offset = 0;
|
|
5882
|
+
for (const chunk of chunks) {
|
|
5883
|
+
merged.set(chunk, offset);
|
|
5884
|
+
offset += chunk.byteLength;
|
|
5885
|
+
}
|
|
5886
|
+
try {
|
|
5887
|
+
const stored = await recordingConfig.store.put({
|
|
5888
|
+
audioBytes: merged,
|
|
5889
|
+
capturedAt: Date.now(),
|
|
5890
|
+
channel,
|
|
5891
|
+
durationMs: computePcmDurationMs(totalBytes, format),
|
|
5892
|
+
format,
|
|
5893
|
+
sessionId: options.id
|
|
5894
|
+
});
|
|
5895
|
+
await appendTrace({
|
|
5896
|
+
payload: {
|
|
5897
|
+
channel,
|
|
5898
|
+
durationMs: stored.durationMs,
|
|
5899
|
+
recordingUrl: stored.recordingUrl,
|
|
5900
|
+
sessionId: options.id,
|
|
5901
|
+
sizeBytes: merged.byteLength
|
|
5902
|
+
},
|
|
5903
|
+
type: "recording.ready"
|
|
5904
|
+
});
|
|
5905
|
+
} catch (error) {
|
|
5906
|
+
logger.warn("voice recording persist failed", {
|
|
5907
|
+
channel,
|
|
5908
|
+
error: toError(error).message,
|
|
5909
|
+
sessionId: options.id
|
|
5910
|
+
});
|
|
5911
|
+
} finally {
|
|
5912
|
+
recordingBuffers[channel] = [];
|
|
5913
|
+
recordingByteTotals[channel] = 0;
|
|
5914
|
+
}
|
|
5915
|
+
}
|
|
5916
|
+
};
|
|
5917
|
+
const cancelActiveTTS = async (reason) => {
|
|
5918
|
+
const activeSession = ttsSession;
|
|
5919
|
+
const cancelledTurnId = activeTTSTurnId;
|
|
5920
|
+
if (!activeSession || cancelledTurnId === undefined) {
|
|
5921
|
+
return;
|
|
5922
|
+
}
|
|
5923
|
+
activeTTSTurnId = undefined;
|
|
5924
|
+
if (!ttsAdapterSessionCanCancel(activeSession)) {
|
|
5925
|
+
return;
|
|
5926
|
+
}
|
|
5927
|
+
try {
|
|
5928
|
+
await activeSession.cancel(reason);
|
|
5929
|
+
} catch (error) {
|
|
5930
|
+
logger.warn("voice tts adapter cancel failed", {
|
|
5931
|
+
error: toError(error).message,
|
|
5932
|
+
reason,
|
|
5933
|
+
sessionId: options.id,
|
|
5934
|
+
turnId: cancelledTurnId
|
|
5935
|
+
});
|
|
5936
|
+
}
|
|
5937
|
+
};
|
|
5760
5938
|
const sendAssistantAudio = async (chunk, input) => {
|
|
5761
5939
|
const normalizedChunk = chunk instanceof Uint8Array ? new Uint8Array(chunk) : chunk instanceof ArrayBuffer ? new Uint8Array(chunk.slice(0)) : new Uint8Array(chunk.buffer.slice(chunk.byteOffset, chunk.byteOffset + chunk.byteLength));
|
|
5940
|
+
captureRecordingChunk("assistant", normalizedChunk, input.format);
|
|
5762
5941
|
await send({
|
|
5763
5942
|
chunkBase64: encodeBase64(normalizedChunk),
|
|
5764
5943
|
format: input.format,
|
|
@@ -5855,6 +6034,7 @@ var createVoiceSession = (options) => {
|
|
|
5855
6034
|
});
|
|
5856
6035
|
await closeTTSSession("failed");
|
|
5857
6036
|
await closeAdapter("failed");
|
|
6037
|
+
await persistRecordings();
|
|
5858
6038
|
speechDetected = false;
|
|
5859
6039
|
rewindFallbackTurnAudio();
|
|
5860
6040
|
await options.route.onError?.({
|
|
@@ -5923,6 +6103,7 @@ var createVoiceSession = (options) => {
|
|
|
5923
6103
|
});
|
|
5924
6104
|
await closeTTSSession("complete");
|
|
5925
6105
|
await closeAdapter("complete");
|
|
6106
|
+
await persistRecordings();
|
|
5926
6107
|
speechDetected = false;
|
|
5927
6108
|
rewindFallbackTurnAudio();
|
|
5928
6109
|
if (disposition === "transferred" && input.target) {
|
|
@@ -7064,7 +7245,14 @@ var createVoiceSession = (options) => {
|
|
|
7064
7245
|
if (shouldStoreAudio) {
|
|
7065
7246
|
pushTurnAudio(conditionedAudio);
|
|
7066
7247
|
}
|
|
7248
|
+
if (recordingConfig?.userInputFormat) {
|
|
7249
|
+
const userBytes = conditionedAudio instanceof Uint8Array ? conditionedAudio : conditionedAudio instanceof ArrayBuffer ? new Uint8Array(conditionedAudio) : new Uint8Array(conditionedAudio.buffer, conditionedAudio.byteOffset, conditionedAudio.byteLength);
|
|
7250
|
+
captureRecordingChunk("user", userBytes, recordingConfig.userInputFormat);
|
|
7251
|
+
}
|
|
7067
7252
|
if (audioLevel >= turnDetection.speechThreshold) {
|
|
7253
|
+
if (!speechDetected && activeTTSTurnId !== undefined) {
|
|
7254
|
+
cancelActiveTTS("barge-in");
|
|
7255
|
+
}
|
|
7068
7256
|
speechDetected = true;
|
|
7069
7257
|
clearSilenceTimer();
|
|
7070
7258
|
} else if (speechDetected) {
|
|
@@ -7097,6 +7285,7 @@ var createVoiceSession = (options) => {
|
|
|
7097
7285
|
clearSilenceTimer();
|
|
7098
7286
|
await closeTTSSession(reason);
|
|
7099
7287
|
await closeAdapter(reason);
|
|
7288
|
+
await persistRecordings();
|
|
7100
7289
|
await Promise.resolve(socket.close(1000, reason));
|
|
7101
7290
|
if (session.call?.endedAt && session.call.disposition === "closed") {
|
|
7102
7291
|
await appendTrace({
|
|
@@ -13154,8 +13343,9 @@ var runTTSAdapterFixture = async (adapter, fixture, options = {}) => {
|
|
|
13154
13343
|
sessionId: `tts-benchmark:${fixture.id}`,
|
|
13155
13344
|
...openOptions ?? {}
|
|
13156
13345
|
});
|
|
13346
|
+
const sessionOn = session.on;
|
|
13157
13347
|
const unsubscribers = [
|
|
13158
|
-
|
|
13348
|
+
sessionOn("audio", ({ chunk, format, receivedAt }) => {
|
|
13159
13349
|
const normalizedChunk = chunk instanceof Uint8Array ? chunk : chunk instanceof ArrayBuffer ? new Uint8Array(chunk) : new Uint8Array(chunk.buffer, chunk.byteOffset, chunk.byteLength);
|
|
13160
13350
|
audioChunkCount += 1;
|
|
13161
13351
|
totalAudioBytes += normalizedChunk.byteLength;
|
|
@@ -13175,10 +13365,10 @@ var runTTSAdapterFixture = async (adapter, fixture, options = {}) => {
|
|
|
13175
13365
|
}, options.interruptAfterFirstAudioMs);
|
|
13176
13366
|
}
|
|
13177
13367
|
}),
|
|
13178
|
-
|
|
13368
|
+
sessionOn("error", () => {
|
|
13179
13369
|
errorCount += 1;
|
|
13180
13370
|
}),
|
|
13181
|
-
|
|
13371
|
+
sessionOn("close", () => {
|
|
13182
13372
|
closeCount += 1;
|
|
13183
13373
|
closed = true;
|
|
13184
13374
|
closedAt = Date.now();
|
package/dist/trace.d.ts
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import type { S3Client, S3Options } from "bun";
|
|
2
|
-
export type VoiceTraceEventType = "assistant.guardrail" | "assistant.memory" | "assistant.run" | "agent.context" | "agent.handoff" | "agent.model" | "agent.result" | "agent.tool" | "call.handoff" | "call.lifecycle" | "client.barge_in" | "client.browser_media" | "client.live_latency" | "client.reconnect" | "client.telephony_media" | "operator.action" | "provider.decision" | "session.error" | "turn.assistant" | "turn.committed" | "turn.cost" | "turn_latency.stage" | "turn.transcript" | "workflow.contract";
|
|
2
|
+
export type VoiceTraceEventType = "assistant.guardrail" | "assistant.memory" | "assistant.run" | "agent.context" | "agent.handoff" | "agent.model" | "agent.result" | "agent.tool" | "call.handoff" | "call.lifecycle" | "client.barge_in" | "client.browser_media" | "client.live_latency" | "client.reconnect" | "client.telephony_media" | "operator.action" | "provider.decision" | "recording.ready" | "session.error" | "turn.assistant" | "turn.committed" | "turn.cost" | "turn_latency.stage" | "turn.transcript" | "workflow.contract";
|
|
3
3
|
export type VoiceTraceEvent<TPayload extends Record<string, unknown> = Record<string, unknown>> = {
|
|
4
4
|
at: number;
|
|
5
5
|
id?: string;
|
package/dist/types.d.ts
CHANGED
|
@@ -166,8 +166,12 @@ export type TTSSessionEventMap = {
|
|
|
166
166
|
export type TTSAdapterSession = {
|
|
167
167
|
on: <K extends keyof TTSSessionEventMap>(event: K, handler: (payload: TTSSessionEventMap[K]) => void | Promise<void>) => () => void;
|
|
168
168
|
send: (text: string) => Promise<void>;
|
|
169
|
+
cancel?: (reason?: string) => Promise<void>;
|
|
169
170
|
close: (reason?: string) => Promise<void>;
|
|
170
171
|
};
|
|
172
|
+
export declare const ttsAdapterSessionCanCancel: (session: TTSAdapterSession) => session is TTSAdapterSession & {
|
|
173
|
+
cancel: (reason?: string) => Promise<void>;
|
|
174
|
+
};
|
|
171
175
|
export type TTSAdapterOpenOptions = {
|
|
172
176
|
sessionId: string;
|
|
173
177
|
lexicon?: VoiceLexiconEntry[];
|
|
@@ -697,6 +701,12 @@ export type VoicePluginConfig<TContext = unknown, TSession extends VoiceSessionR
|
|
|
697
701
|
profileSwitchGuard?: VoicePluginProfileSwitchGuardConfig<TContext, TSession, TResult>;
|
|
698
702
|
trace?: VoiceTraceEventStore;
|
|
699
703
|
} & VoiceRouteConfig<TContext, TSession, TResult>;
|
|
704
|
+
export type VoiceSessionRecordingConfig = {
|
|
705
|
+
channels?: ReadonlyArray<"assistant" | "user">;
|
|
706
|
+
maxBytesPerChannel?: number;
|
|
707
|
+
store: import("./recordingStore").VoiceRecordingStore;
|
|
708
|
+
userInputFormat?: AudioFormat;
|
|
709
|
+
};
|
|
700
710
|
export type CreateVoiceSessionOptions<TContext = unknown, TSession extends VoiceSessionRecord = VoiceSessionRecord, TResult = unknown> = {
|
|
701
711
|
costTelemetry?: VoiceCostTelemetryConfig<TContext, TSession, TResult>;
|
|
702
712
|
id: string;
|
|
@@ -711,6 +721,7 @@ export type CreateVoiceSessionOptions<TContext = unknown, TSession extends Voice
|
|
|
711
721
|
sttFallback?: VoiceResolvedSTTFallbackConfig;
|
|
712
722
|
store: VoiceSessionStore<TSession>;
|
|
713
723
|
trace?: VoiceTraceEventStore;
|
|
724
|
+
recording?: VoiceSessionRecordingConfig;
|
|
714
725
|
reconnect: Required<VoiceReconnectConfig>;
|
|
715
726
|
phraseHints?: VoicePhraseHint[];
|
|
716
727
|
sessionMetadata?: Record<string, unknown>;
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@absolutejs/voice",
|
|
3
|
-
"version": "0.0.22-beta.
|
|
3
|
+
"version": "0.0.22-beta.477",
|
|
4
4
|
"description": "Voice primitives and Elysia plugin for AbsoluteJS",
|
|
5
5
|
"repository": {
|
|
6
6
|
"type": "git",
|
|
@@ -160,7 +160,7 @@
|
|
|
160
160
|
"bench:stt": "bun run ./scripts/benchmark-stt.ts all",
|
|
161
161
|
"bench:assemblyai:sessions": "bun run ./scripts/benchmark-session.ts assemblyai",
|
|
162
162
|
"bench:openai:sessions": "bun run ./scripts/benchmark-session.ts openai",
|
|
163
|
-
"build": "bun run ./scripts/build-htmx-bootstrap-asset.ts && rm -rf dist && bun build ./src/index.ts ./src/client/index.ts ./src/react/index.ts ./src/vue/index.ts ./src/svelte/index.ts ./src/angular/index.ts ./src/testing/index.ts --outdir dist --target bun --external elysia --external react --external vue --external @angular/core --external @absolutejs/absolute --external @absolutejs/media && bun build ./src/client/htmxBootstrap.ts --outdir dist/client --target browser --format esm && tsc --emitDeclarationOnly --project tsconfig.json",
|
|
163
|
+
"build": "bun run ./scripts/build-htmx-bootstrap-asset.ts && rm -rf dist && bun build ./src/index.ts ./src/client/index.ts ./src/react/index.ts ./src/vue/index.ts ./src/svelte/index.ts ./src/angular/index.ts ./src/testing/index.ts --outdir dist --target bun --external elysia --external react --external vue --external @angular/core --external @absolutejs/absolute --external @absolutejs/ai --external @absolutejs/media && bun build ./src/client/htmxBootstrap.ts --outdir dist/client --target browser --format esm && tsc --emitDeclarationOnly --project tsconfig.json",
|
|
164
164
|
"format": "prettier --write \"./**/*.{js,jsx,ts,tsx,json,md}\"",
|
|
165
165
|
"lint": "eslint ./src",
|
|
166
166
|
"release": "bun run format && bun run build && bun publish",
|
|
@@ -229,12 +229,16 @@
|
|
|
229
229
|
},
|
|
230
230
|
"peerDependencies": {
|
|
231
231
|
"@absolutejs/absolute": ">=0.19.0-beta.646",
|
|
232
|
+
"@absolutejs/ai": ">=0.0.5",
|
|
232
233
|
"@angular/core": ">=21.0.0",
|
|
233
234
|
"elysia": ">=1.4.18",
|
|
234
235
|
"react": ">=19.0.0",
|
|
235
236
|
"vue": ">=3.5.0"
|
|
236
237
|
},
|
|
237
238
|
"peerDependenciesMeta": {
|
|
239
|
+
"@absolutejs/ai": {
|
|
240
|
+
"optional": true
|
|
241
|
+
},
|
|
238
242
|
"@angular/core": {
|
|
239
243
|
"optional": true
|
|
240
244
|
},
|
|
@@ -250,6 +254,7 @@
|
|
|
250
254
|
},
|
|
251
255
|
"devDependencies": {
|
|
252
256
|
"@absolutejs/absolute": "0.19.0-beta.646",
|
|
257
|
+
"@absolutejs/ai": "0.0.5",
|
|
253
258
|
"@angular/core": "^21.0.0",
|
|
254
259
|
"@types/bun": "1.3.9",
|
|
255
260
|
"@types/react": "19.2.0",
|