@absolutejs/voice 0.0.22-beta.478 → 0.0.22-beta.479
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/amdDetector.d.ts +25 -0
- package/dist/index.d.ts +2 -0
- package/dist/index.js +102 -0
- package/dist/testing/index.js +71 -0
- package/dist/types.d.ts +1 -0
- package/package.json +1 -1
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
import type { Transcript, VoiceSessionHandle, VoiceSessionRecord } from "./types";
|
|
2
|
+
export type VoiceAMDDetectorInput<TContext = unknown, TSession extends VoiceSessionRecord = VoiceSessionRecord, TResult = unknown> = {
|
|
3
|
+
api: VoiceSessionHandle<TContext, TSession, TResult>;
|
|
4
|
+
audioLevel: number | undefined;
|
|
5
|
+
elapsedSinceFirstAudioMs: number;
|
|
6
|
+
elapsedSinceLastTurnCommitMs: number;
|
|
7
|
+
partialTranscript: string;
|
|
8
|
+
session: TSession;
|
|
9
|
+
transcripts: Transcript[];
|
|
10
|
+
};
|
|
11
|
+
export type VoiceAMDVerdict = {
|
|
12
|
+
metadata?: Record<string, unknown>;
|
|
13
|
+
reason?: string;
|
|
14
|
+
};
|
|
15
|
+
export type VoiceAMDDetector<TContext = unknown, TSession extends VoiceSessionRecord = VoiceSessionRecord, TResult = unknown> = {
|
|
16
|
+
evaluate: (input: VoiceAMDDetectorInput<TContext, TSession, TResult>) => Promise<VoiceAMDVerdict | undefined> | VoiceAMDVerdict | undefined;
|
|
17
|
+
intervalMs?: number;
|
|
18
|
+
};
|
|
19
|
+
export type MonologueAMDDetectorOptions = {
|
|
20
|
+
intervalMs?: number;
|
|
21
|
+
minMonologueMs?: number;
|
|
22
|
+
reason?: string;
|
|
23
|
+
requireFirstAudio?: boolean;
|
|
24
|
+
};
|
|
25
|
+
export declare const createMonologueAMDDetector: <TContext = unknown, TSession extends VoiceSessionRecord = VoiceSessionRecord, TResult = unknown>(options?: MonologueAMDDetectorOptions) => VoiceAMDDetector<TContext, TSession, TResult>;
|
package/dist/index.d.ts
CHANGED
|
@@ -71,6 +71,8 @@ export { createVoiceSessionListRoutes, createVoiceSessionReplayHTMLHandler, crea
|
|
|
71
71
|
export { createVoiceAgent, createVoiceAgentSquad, createVoiceAgentTool, } from "./agent";
|
|
72
72
|
export { createAIVoiceModel } from "./aiVoiceModel";
|
|
73
73
|
export type { CreateAIVoiceModelOptions } from "./aiVoiceModel";
|
|
74
|
+
export { createMonologueAMDDetector } from "./amdDetector";
|
|
75
|
+
export type { MonologueAMDDetectorOptions, VoiceAMDDetector, VoiceAMDDetectorInput, VoiceAMDVerdict, } from "./amdDetector";
|
|
74
76
|
export { createVoiceRAGTool } from "./ragTool";
|
|
75
77
|
export type { VoiceRAGCollectionLike, VoiceRAGQueryResult, VoiceRAGSearchInput, VoiceRAGToolArgs, VoiceRAGToolOptions, VoiceRAGToolResult, } from "./ragTool";
|
|
76
78
|
export { createVoiceApiRequestTool, createVoiceDTMFTool, createVoiceEndCallTool, createVoiceTransferCallTool, createVoiceVoicemailDetectionTool, } from "./agentTools";
|
package/dist/index.js
CHANGED
|
@@ -3682,6 +3682,66 @@ var createVoiceSession = (options) => {
|
|
|
3682
3682
|
const currentTurnAudio = [];
|
|
3683
3683
|
let fallbackAttemptsForCurrentTurn = 0;
|
|
3684
3684
|
let fallbackReplayAudioMsForCurrentTurn = 0;
|
|
3685
|
+
const amdDetector = options.amd;
|
|
3686
|
+
let amdEvaluationTimer = null;
|
|
3687
|
+
let amdFired = false;
|
|
3688
|
+
let amdFirstAudioAt;
|
|
3689
|
+
let amdLastTurnCommitAt;
|
|
3690
|
+
let amdLastAudioLevel;
|
|
3691
|
+
const clearAmdEvaluationTimer = () => {
|
|
3692
|
+
if (amdEvaluationTimer) {
|
|
3693
|
+
clearInterval(amdEvaluationTimer);
|
|
3694
|
+
amdEvaluationTimer = null;
|
|
3695
|
+
}
|
|
3696
|
+
};
|
|
3697
|
+
const evaluateAmd = async () => {
|
|
3698
|
+
if (!amdDetector || amdFired) {
|
|
3699
|
+
return;
|
|
3700
|
+
}
|
|
3701
|
+
let snapshot;
|
|
3702
|
+
try {
|
|
3703
|
+
snapshot = await readSession();
|
|
3704
|
+
} catch {
|
|
3705
|
+
return;
|
|
3706
|
+
}
|
|
3707
|
+
const now = Date.now();
|
|
3708
|
+
const verdict = await Promise.resolve(amdDetector.evaluate({
|
|
3709
|
+
api,
|
|
3710
|
+
audioLevel: amdLastAudioLevel,
|
|
3711
|
+
elapsedSinceFirstAudioMs: amdFirstAudioAt === undefined ? 0 : now - amdFirstAudioAt,
|
|
3712
|
+
elapsedSinceLastTurnCommitMs: amdLastTurnCommitAt === undefined ? 0 : now - amdLastTurnCommitAt,
|
|
3713
|
+
partialTranscript: snapshot.currentTurn.partialText,
|
|
3714
|
+
session: snapshot,
|
|
3715
|
+
transcripts: [
|
|
3716
|
+
...snapshot.transcripts,
|
|
3717
|
+
...snapshot.currentTurn.transcripts
|
|
3718
|
+
]
|
|
3719
|
+
}));
|
|
3720
|
+
if (!verdict || amdFired) {
|
|
3721
|
+
return;
|
|
3722
|
+
}
|
|
3723
|
+
amdFired = true;
|
|
3724
|
+
clearAmdEvaluationTimer();
|
|
3725
|
+
try {
|
|
3726
|
+
await api.markVoicemail({
|
|
3727
|
+
metadata: verdict.metadata
|
|
3728
|
+
});
|
|
3729
|
+
} catch (error) {
|
|
3730
|
+
logger.warn("voice amd markVoicemail failed", {
|
|
3731
|
+
error: toError(error).message,
|
|
3732
|
+
sessionId: options.id
|
|
3733
|
+
});
|
|
3734
|
+
}
|
|
3735
|
+
};
|
|
3736
|
+
const startAmdEvaluationTimer = () => {
|
|
3737
|
+
if (!amdDetector || amdEvaluationTimer || amdFired) {
|
|
3738
|
+
return;
|
|
3739
|
+
}
|
|
3740
|
+
const intervalMs = amdDetector.intervalMs ?? 1000;
|
|
3741
|
+
amdEvaluationTimer = setInterval(() => {
|
|
3742
|
+
evaluateAmd();
|
|
3743
|
+
}, intervalMs);
|
|
3744
|
+
};
|
|
3685
3745
|
const callSilenceTimeoutMs = options.callSilenceTimeoutMs && options.callSilenceTimeoutMs > 0 ? options.callSilenceTimeoutMs : undefined;
|
|
3686
3746
|
let callSilenceWatchdog = null;
|
|
3687
3747
|
let callSilenceFired = false;
|
|
@@ -4089,6 +4149,8 @@ var createVoiceSession = (options) => {
|
|
|
4089
4149
|
recoverable: false,
|
|
4090
4150
|
type: "error"
|
|
4091
4151
|
});
|
|
4152
|
+
clearCallSilenceWatchdog();
|
|
4153
|
+
clearAmdEvaluationTimer();
|
|
4092
4154
|
await closeTTSSession("failed");
|
|
4093
4155
|
await closeAdapter("failed");
|
|
4094
4156
|
await persistRecordings();
|
|
@@ -4158,6 +4220,8 @@ var createVoiceSession = (options) => {
|
|
|
4158
4220
|
sessionId: options.id,
|
|
4159
4221
|
type: "complete"
|
|
4160
4222
|
});
|
|
4223
|
+
clearCallSilenceWatchdog();
|
|
4224
|
+
clearAmdEvaluationTimer();
|
|
4161
4225
|
await closeTTSSession("complete");
|
|
4162
4226
|
await closeAdapter("complete");
|
|
4163
4227
|
await persistRecordings();
|
|
@@ -5015,6 +5079,7 @@ var createVoiceSession = (options) => {
|
|
|
5015
5079
|
};
|
|
5016
5080
|
const commitTurnInternal = async (reason = "manual") => {
|
|
5017
5081
|
clearSilenceTimer();
|
|
5082
|
+
amdLastTurnCommitAt = Date.now();
|
|
5018
5083
|
const session = await readSession();
|
|
5019
5084
|
if (session.status === "completed" || session.status === "failed") {
|
|
5020
5085
|
return;
|
|
@@ -5262,6 +5327,7 @@ var createVoiceSession = (options) => {
|
|
|
5262
5327
|
await ensureAdapter();
|
|
5263
5328
|
warmTTSSession();
|
|
5264
5329
|
kickCallSilenceWatchdog();
|
|
5330
|
+
startAmdEvaluationTimer();
|
|
5265
5331
|
};
|
|
5266
5332
|
const disconnectInternal = async (event) => {
|
|
5267
5333
|
clearSilenceTimer();
|
|
@@ -5309,7 +5375,11 @@ var createVoiceSession = (options) => {
|
|
|
5309
5375
|
const userBytes = conditionedAudio instanceof Uint8Array ? conditionedAudio : conditionedAudio instanceof ArrayBuffer ? new Uint8Array(conditionedAudio) : new Uint8Array(conditionedAudio.buffer, conditionedAudio.byteOffset, conditionedAudio.byteLength);
|
|
5310
5376
|
captureRecordingChunk("user", userBytes, recordingConfig.userInputFormat);
|
|
5311
5377
|
}
|
|
5378
|
+
amdLastAudioLevel = audioLevel;
|
|
5312
5379
|
if (audioLevel >= turnDetection.speechThreshold) {
|
|
5380
|
+
if (amdFirstAudioAt === undefined) {
|
|
5381
|
+
amdFirstAudioAt = Date.now();
|
|
5382
|
+
}
|
|
5313
5383
|
if (!speechDetected && activeTTSTurnId !== undefined) {
|
|
5314
5384
|
cancelActiveTTS("barge-in");
|
|
5315
5385
|
}
|
|
@@ -5342,6 +5412,7 @@ var createVoiceSession = (options) => {
|
|
|
5342
5412
|
});
|
|
5343
5413
|
clearSilenceTimer();
|
|
5344
5414
|
clearCallSilenceWatchdog();
|
|
5415
|
+
clearAmdEvaluationTimer();
|
|
5345
5416
|
await closeTTSSession(reason);
|
|
5346
5417
|
await closeAdapter(reason);
|
|
5347
5418
|
await persistRecordings();
|
|
@@ -34864,6 +34935,36 @@ var createAIVoiceModel = (options) => ({
|
|
|
34864
34935
|
return output;
|
|
34865
34936
|
}
|
|
34866
34937
|
});
|
|
34938
|
+
// src/amdDetector.ts
|
|
34939
|
+
var createMonologueAMDDetector = (options = {}) => {
|
|
34940
|
+
const minMonologueMs = options.minMonologueMs ?? 8000;
|
|
34941
|
+
const reason = options.reason ?? "monologue-suspected-voicemail";
|
|
34942
|
+
const requireFirstAudio = options.requireFirstAudio ?? true;
|
|
34943
|
+
return {
|
|
34944
|
+
evaluate: ({
|
|
34945
|
+
elapsedSinceFirstAudioMs,
|
|
34946
|
+
elapsedSinceLastTurnCommitMs,
|
|
34947
|
+
session
|
|
34948
|
+
}) => {
|
|
34949
|
+
if (requireFirstAudio && elapsedSinceFirstAudioMs <= 0) {
|
|
34950
|
+
return;
|
|
34951
|
+
}
|
|
34952
|
+
const noTurnsYet = session.turns.length === 0;
|
|
34953
|
+
const monologueElapsed = noTurnsYet ? elapsedSinceFirstAudioMs : elapsedSinceLastTurnCommitMs;
|
|
34954
|
+
if (monologueElapsed < minMonologueMs) {
|
|
34955
|
+
return;
|
|
34956
|
+
}
|
|
34957
|
+
return {
|
|
34958
|
+
metadata: {
|
|
34959
|
+
detector: "monologue",
|
|
34960
|
+
monologueMs: monologueElapsed
|
|
34961
|
+
},
|
|
34962
|
+
reason
|
|
34963
|
+
};
|
|
34964
|
+
},
|
|
34965
|
+
intervalMs: options.intervalMs ?? 1000
|
|
34966
|
+
};
|
|
34967
|
+
};
|
|
34867
34968
|
// src/ragTool.ts
|
|
34868
34969
|
var DEFAULT_TOOL_NAME = "searchKnowledgeBase";
|
|
34869
34970
|
var DEFAULT_DESCRIPTION = "Search the knowledge base and return short grounded citations. Use this whenever the caller asks a question that may be answered by indexed reference material.";
|
|
@@ -46032,6 +46133,7 @@ export {
|
|
|
46032
46133
|
createPhraseHintCorrectionHandler,
|
|
46033
46134
|
createOpenAIVoiceTTS,
|
|
46034
46135
|
createOpenAIVoiceAssistantModel,
|
|
46136
|
+
createMonologueAMDDetector,
|
|
46035
46137
|
createMemoryVoiceTelnyxWebhookEventStore,
|
|
46036
46138
|
createMemoryVoiceTelephonyWebhookIdempotencyStore,
|
|
46037
46139
|
createMemoryVoicePlivoWebhookNonceStore,
|
package/dist/testing/index.js
CHANGED
|
@@ -5650,6 +5650,66 @@ var createVoiceSession = (options) => {
|
|
|
5650
5650
|
const currentTurnAudio = [];
|
|
5651
5651
|
let fallbackAttemptsForCurrentTurn = 0;
|
|
5652
5652
|
let fallbackReplayAudioMsForCurrentTurn = 0;
|
|
5653
|
+
const amdDetector = options.amd;
|
|
5654
|
+
let amdEvaluationTimer = null;
|
|
5655
|
+
let amdFired = false;
|
|
5656
|
+
let amdFirstAudioAt;
|
|
5657
|
+
let amdLastTurnCommitAt;
|
|
5658
|
+
let amdLastAudioLevel;
|
|
5659
|
+
const clearAmdEvaluationTimer = () => {
|
|
5660
|
+
if (amdEvaluationTimer) {
|
|
5661
|
+
clearInterval(amdEvaluationTimer);
|
|
5662
|
+
amdEvaluationTimer = null;
|
|
5663
|
+
}
|
|
5664
|
+
};
|
|
5665
|
+
const evaluateAmd = async () => {
|
|
5666
|
+
if (!amdDetector || amdFired) {
|
|
5667
|
+
return;
|
|
5668
|
+
}
|
|
5669
|
+
let snapshot;
|
|
5670
|
+
try {
|
|
5671
|
+
snapshot = await readSession();
|
|
5672
|
+
} catch {
|
|
5673
|
+
return;
|
|
5674
|
+
}
|
|
5675
|
+
const now = Date.now();
|
|
5676
|
+
const verdict = await Promise.resolve(amdDetector.evaluate({
|
|
5677
|
+
api,
|
|
5678
|
+
audioLevel: amdLastAudioLevel,
|
|
5679
|
+
elapsedSinceFirstAudioMs: amdFirstAudioAt === undefined ? 0 : now - amdFirstAudioAt,
|
|
5680
|
+
elapsedSinceLastTurnCommitMs: amdLastTurnCommitAt === undefined ? 0 : now - amdLastTurnCommitAt,
|
|
5681
|
+
partialTranscript: snapshot.currentTurn.partialText,
|
|
5682
|
+
session: snapshot,
|
|
5683
|
+
transcripts: [
|
|
5684
|
+
...snapshot.transcripts,
|
|
5685
|
+
...snapshot.currentTurn.transcripts
|
|
5686
|
+
]
|
|
5687
|
+
}));
|
|
5688
|
+
if (!verdict || amdFired) {
|
|
5689
|
+
return;
|
|
5690
|
+
}
|
|
5691
|
+
amdFired = true;
|
|
5692
|
+
clearAmdEvaluationTimer();
|
|
5693
|
+
try {
|
|
5694
|
+
await api.markVoicemail({
|
|
5695
|
+
metadata: verdict.metadata
|
|
5696
|
+
});
|
|
5697
|
+
} catch (error) {
|
|
5698
|
+
logger.warn("voice amd markVoicemail failed", {
|
|
5699
|
+
error: toError(error).message,
|
|
5700
|
+
sessionId: options.id
|
|
5701
|
+
});
|
|
5702
|
+
}
|
|
5703
|
+
};
|
|
5704
|
+
const startAmdEvaluationTimer = () => {
|
|
5705
|
+
if (!amdDetector || amdEvaluationTimer || amdFired) {
|
|
5706
|
+
return;
|
|
5707
|
+
}
|
|
5708
|
+
const intervalMs = amdDetector.intervalMs ?? 1000;
|
|
5709
|
+
amdEvaluationTimer = setInterval(() => {
|
|
5710
|
+
evaluateAmd();
|
|
5711
|
+
}, intervalMs);
|
|
5712
|
+
};
|
|
5653
5713
|
const callSilenceTimeoutMs = options.callSilenceTimeoutMs && options.callSilenceTimeoutMs > 0 ? options.callSilenceTimeoutMs : undefined;
|
|
5654
5714
|
let callSilenceWatchdog = null;
|
|
5655
5715
|
let callSilenceFired = false;
|
|
@@ -6057,6 +6117,8 @@ var createVoiceSession = (options) => {
|
|
|
6057
6117
|
recoverable: false,
|
|
6058
6118
|
type: "error"
|
|
6059
6119
|
});
|
|
6120
|
+
clearCallSilenceWatchdog();
|
|
6121
|
+
clearAmdEvaluationTimer();
|
|
6060
6122
|
await closeTTSSession("failed");
|
|
6061
6123
|
await closeAdapter("failed");
|
|
6062
6124
|
await persistRecordings();
|
|
@@ -6126,6 +6188,8 @@ var createVoiceSession = (options) => {
|
|
|
6126
6188
|
sessionId: options.id,
|
|
6127
6189
|
type: "complete"
|
|
6128
6190
|
});
|
|
6191
|
+
clearCallSilenceWatchdog();
|
|
6192
|
+
clearAmdEvaluationTimer();
|
|
6129
6193
|
await closeTTSSession("complete");
|
|
6130
6194
|
await closeAdapter("complete");
|
|
6131
6195
|
await persistRecordings();
|
|
@@ -6983,6 +7047,7 @@ var createVoiceSession = (options) => {
|
|
|
6983
7047
|
};
|
|
6984
7048
|
const commitTurnInternal = async (reason = "manual") => {
|
|
6985
7049
|
clearSilenceTimer();
|
|
7050
|
+
amdLastTurnCommitAt = Date.now();
|
|
6986
7051
|
const session = await readSession();
|
|
6987
7052
|
if (session.status === "completed" || session.status === "failed") {
|
|
6988
7053
|
return;
|
|
@@ -7230,6 +7295,7 @@ var createVoiceSession = (options) => {
|
|
|
7230
7295
|
await ensureAdapter();
|
|
7231
7296
|
warmTTSSession();
|
|
7232
7297
|
kickCallSilenceWatchdog();
|
|
7298
|
+
startAmdEvaluationTimer();
|
|
7233
7299
|
};
|
|
7234
7300
|
const disconnectInternal = async (event) => {
|
|
7235
7301
|
clearSilenceTimer();
|
|
@@ -7277,7 +7343,11 @@ var createVoiceSession = (options) => {
|
|
|
7277
7343
|
const userBytes = conditionedAudio instanceof Uint8Array ? conditionedAudio : conditionedAudio instanceof ArrayBuffer ? new Uint8Array(conditionedAudio) : new Uint8Array(conditionedAudio.buffer, conditionedAudio.byteOffset, conditionedAudio.byteLength);
|
|
7278
7344
|
captureRecordingChunk("user", userBytes, recordingConfig.userInputFormat);
|
|
7279
7345
|
}
|
|
7346
|
+
amdLastAudioLevel = audioLevel;
|
|
7280
7347
|
if (audioLevel >= turnDetection.speechThreshold) {
|
|
7348
|
+
if (amdFirstAudioAt === undefined) {
|
|
7349
|
+
amdFirstAudioAt = Date.now();
|
|
7350
|
+
}
|
|
7281
7351
|
if (!speechDetected && activeTTSTurnId !== undefined) {
|
|
7282
7352
|
cancelActiveTTS("barge-in");
|
|
7283
7353
|
}
|
|
@@ -7310,6 +7380,7 @@ var createVoiceSession = (options) => {
|
|
|
7310
7380
|
});
|
|
7311
7381
|
clearSilenceTimer();
|
|
7312
7382
|
clearCallSilenceWatchdog();
|
|
7383
|
+
clearAmdEvaluationTimer();
|
|
7313
7384
|
await closeTTSSession(reason);
|
|
7314
7385
|
await closeAdapter(reason);
|
|
7315
7386
|
await persistRecordings();
|
package/dist/types.d.ts
CHANGED
|
@@ -725,6 +725,7 @@ export type CreateVoiceSessionOptions<TContext = unknown, TSession extends Voice
|
|
|
725
725
|
trace?: VoiceTraceEventStore;
|
|
726
726
|
recording?: VoiceSessionRecordingConfig;
|
|
727
727
|
callSilenceTimeoutMs?: number;
|
|
728
|
+
amd?: import("./amdDetector").VoiceAMDDetector<TContext, TSession, TResult>;
|
|
728
729
|
reconnect: Required<VoiceReconnectConfig>;
|
|
729
730
|
phraseHints?: VoicePhraseHint[];
|
|
730
731
|
sessionMetadata?: Record<string, unknown>;
|