@absolutejs/voice 0.0.22-beta.478 → 0.0.22-beta.479

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,25 @@
1
+ import type { Transcript, VoiceSessionHandle, VoiceSessionRecord } from "./types";
2
+ export type VoiceAMDDetectorInput<TContext = unknown, TSession extends VoiceSessionRecord = VoiceSessionRecord, TResult = unknown> = {
3
+ api: VoiceSessionHandle<TContext, TSession, TResult>;
4
+ audioLevel: number | undefined;
5
+ elapsedSinceFirstAudioMs: number;
6
+ elapsedSinceLastTurnCommitMs: number;
7
+ partialTranscript: string;
8
+ session: TSession;
9
+ transcripts: Transcript[];
10
+ };
11
+ export type VoiceAMDVerdict = {
12
+ metadata?: Record<string, unknown>;
13
+ reason?: string;
14
+ };
15
+ export type VoiceAMDDetector<TContext = unknown, TSession extends VoiceSessionRecord = VoiceSessionRecord, TResult = unknown> = {
16
+ evaluate: (input: VoiceAMDDetectorInput<TContext, TSession, TResult>) => Promise<VoiceAMDVerdict | undefined> | VoiceAMDVerdict | undefined;
17
+ intervalMs?: number;
18
+ };
19
+ export type MonologueAMDDetectorOptions = {
20
+ intervalMs?: number;
21
+ minMonologueMs?: number;
22
+ reason?: string;
23
+ requireFirstAudio?: boolean;
24
+ };
25
+ export declare const createMonologueAMDDetector: <TContext = unknown, TSession extends VoiceSessionRecord = VoiceSessionRecord, TResult = unknown>(options?: MonologueAMDDetectorOptions) => VoiceAMDDetector<TContext, TSession, TResult>;
package/dist/index.d.ts CHANGED
@@ -71,6 +71,8 @@ export { createVoiceSessionListRoutes, createVoiceSessionReplayHTMLHandler, crea
71
71
  export { createVoiceAgent, createVoiceAgentSquad, createVoiceAgentTool, } from "./agent";
72
72
  export { createAIVoiceModel } from "./aiVoiceModel";
73
73
  export type { CreateAIVoiceModelOptions } from "./aiVoiceModel";
74
+ export { createMonologueAMDDetector } from "./amdDetector";
75
+ export type { MonologueAMDDetectorOptions, VoiceAMDDetector, VoiceAMDDetectorInput, VoiceAMDVerdict, } from "./amdDetector";
74
76
  export { createVoiceRAGTool } from "./ragTool";
75
77
  export type { VoiceRAGCollectionLike, VoiceRAGQueryResult, VoiceRAGSearchInput, VoiceRAGToolArgs, VoiceRAGToolOptions, VoiceRAGToolResult, } from "./ragTool";
76
78
  export { createVoiceApiRequestTool, createVoiceDTMFTool, createVoiceEndCallTool, createVoiceTransferCallTool, createVoiceVoicemailDetectionTool, } from "./agentTools";
package/dist/index.js CHANGED
@@ -3682,6 +3682,66 @@ var createVoiceSession = (options) => {
3682
3682
  const currentTurnAudio = [];
3683
3683
  let fallbackAttemptsForCurrentTurn = 0;
3684
3684
  let fallbackReplayAudioMsForCurrentTurn = 0;
3685
+ const amdDetector = options.amd;
3686
+ let amdEvaluationTimer = null;
3687
+ let amdFired = false;
3688
+ let amdFirstAudioAt;
3689
+ let amdLastTurnCommitAt;
3690
+ let amdLastAudioLevel;
3691
+ const clearAmdEvaluationTimer = () => {
3692
+ if (amdEvaluationTimer) {
3693
+ clearInterval(amdEvaluationTimer);
3694
+ amdEvaluationTimer = null;
3695
+ }
3696
+ };
3697
+ const evaluateAmd = async () => {
3698
+ if (!amdDetector || amdFired) {
3699
+ return;
3700
+ }
3701
+ let snapshot;
3702
+ try {
3703
+ snapshot = await readSession();
3704
+ } catch {
3705
+ return;
3706
+ }
3707
+ const now = Date.now();
3708
+ const verdict = await Promise.resolve(amdDetector.evaluate({
3709
+ api,
3710
+ audioLevel: amdLastAudioLevel,
3711
+ elapsedSinceFirstAudioMs: amdFirstAudioAt === undefined ? 0 : now - amdFirstAudioAt,
3712
+ elapsedSinceLastTurnCommitMs: amdLastTurnCommitAt === undefined ? 0 : now - amdLastTurnCommitAt,
3713
+ partialTranscript: snapshot.currentTurn.partialText,
3714
+ session: snapshot,
3715
+ transcripts: [
3716
+ ...snapshot.transcripts,
3717
+ ...snapshot.currentTurn.transcripts
3718
+ ]
3719
+ }));
3720
+ if (!verdict || amdFired) {
3721
+ return;
3722
+ }
3723
+ amdFired = true;
3724
+ clearAmdEvaluationTimer();
3725
+ try {
3726
+ await api.markVoicemail({
3727
+ metadata: verdict.metadata
3728
+ });
3729
+ } catch (error) {
3730
+ logger.warn("voice amd markVoicemail failed", {
3731
+ error: toError(error).message,
3732
+ sessionId: options.id
3733
+ });
3734
+ }
3735
+ };
3736
+ const startAmdEvaluationTimer = () => {
3737
+ if (!amdDetector || amdEvaluationTimer || amdFired) {
3738
+ return;
3739
+ }
3740
+ const intervalMs = amdDetector.intervalMs ?? 1000;
3741
+ amdEvaluationTimer = setInterval(() => {
3742
+ evaluateAmd();
3743
+ }, intervalMs);
3744
+ };
3685
3745
  const callSilenceTimeoutMs = options.callSilenceTimeoutMs && options.callSilenceTimeoutMs > 0 ? options.callSilenceTimeoutMs : undefined;
3686
3746
  let callSilenceWatchdog = null;
3687
3747
  let callSilenceFired = false;
@@ -4089,6 +4149,8 @@ var createVoiceSession = (options) => {
4089
4149
  recoverable: false,
4090
4150
  type: "error"
4091
4151
  });
4152
+ clearCallSilenceWatchdog();
4153
+ clearAmdEvaluationTimer();
4092
4154
  await closeTTSSession("failed");
4093
4155
  await closeAdapter("failed");
4094
4156
  await persistRecordings();
@@ -4158,6 +4220,8 @@ var createVoiceSession = (options) => {
4158
4220
  sessionId: options.id,
4159
4221
  type: "complete"
4160
4222
  });
4223
+ clearCallSilenceWatchdog();
4224
+ clearAmdEvaluationTimer();
4161
4225
  await closeTTSSession("complete");
4162
4226
  await closeAdapter("complete");
4163
4227
  await persistRecordings();
@@ -5015,6 +5079,7 @@ var createVoiceSession = (options) => {
5015
5079
  };
5016
5080
  const commitTurnInternal = async (reason = "manual") => {
5017
5081
  clearSilenceTimer();
5082
+ amdLastTurnCommitAt = Date.now();
5018
5083
  const session = await readSession();
5019
5084
  if (session.status === "completed" || session.status === "failed") {
5020
5085
  return;
@@ -5262,6 +5327,7 @@ var createVoiceSession = (options) => {
5262
5327
  await ensureAdapter();
5263
5328
  warmTTSSession();
5264
5329
  kickCallSilenceWatchdog();
5330
+ startAmdEvaluationTimer();
5265
5331
  };
5266
5332
  const disconnectInternal = async (event) => {
5267
5333
  clearSilenceTimer();
@@ -5309,7 +5375,11 @@ var createVoiceSession = (options) => {
5309
5375
  const userBytes = conditionedAudio instanceof Uint8Array ? conditionedAudio : conditionedAudio instanceof ArrayBuffer ? new Uint8Array(conditionedAudio) : new Uint8Array(conditionedAudio.buffer, conditionedAudio.byteOffset, conditionedAudio.byteLength);
5310
5376
  captureRecordingChunk("user", userBytes, recordingConfig.userInputFormat);
5311
5377
  }
5378
+ amdLastAudioLevel = audioLevel;
5312
5379
  if (audioLevel >= turnDetection.speechThreshold) {
5380
+ if (amdFirstAudioAt === undefined) {
5381
+ amdFirstAudioAt = Date.now();
5382
+ }
5313
5383
  if (!speechDetected && activeTTSTurnId !== undefined) {
5314
5384
  cancelActiveTTS("barge-in");
5315
5385
  }
@@ -5342,6 +5412,7 @@ var createVoiceSession = (options) => {
5342
5412
  });
5343
5413
  clearSilenceTimer();
5344
5414
  clearCallSilenceWatchdog();
5415
+ clearAmdEvaluationTimer();
5345
5416
  await closeTTSSession(reason);
5346
5417
  await closeAdapter(reason);
5347
5418
  await persistRecordings();
@@ -34864,6 +34935,36 @@ var createAIVoiceModel = (options) => ({
34864
34935
  return output;
34865
34936
  }
34866
34937
  });
34938
+ // src/amdDetector.ts
34939
+ var createMonologueAMDDetector = (options = {}) => {
34940
+ const minMonologueMs = options.minMonologueMs ?? 8000;
34941
+ const reason = options.reason ?? "monologue-suspected-voicemail";
34942
+ const requireFirstAudio = options.requireFirstAudio ?? true;
34943
+ return {
34944
+ evaluate: ({
34945
+ elapsedSinceFirstAudioMs,
34946
+ elapsedSinceLastTurnCommitMs,
34947
+ session
34948
+ }) => {
34949
+ if (requireFirstAudio && elapsedSinceFirstAudioMs <= 0) {
34950
+ return;
34951
+ }
34952
+ const noTurnsYet = session.turns.length === 0;
34953
+ const monologueElapsed = noTurnsYet ? elapsedSinceFirstAudioMs : elapsedSinceLastTurnCommitMs;
34954
+ if (monologueElapsed < minMonologueMs) {
34955
+ return;
34956
+ }
34957
+ return {
34958
+ metadata: {
34959
+ detector: "monologue",
34960
+ monologueMs: monologueElapsed
34961
+ },
34962
+ reason
34963
+ };
34964
+ },
34965
+ intervalMs: options.intervalMs ?? 1000
34966
+ };
34967
+ };
34867
34968
  // src/ragTool.ts
34868
34969
  var DEFAULT_TOOL_NAME = "searchKnowledgeBase";
34869
34970
  var DEFAULT_DESCRIPTION = "Search the knowledge base and return short grounded citations. Use this whenever the caller asks a question that may be answered by indexed reference material.";
@@ -46032,6 +46133,7 @@ export {
46032
46133
  createPhraseHintCorrectionHandler,
46033
46134
  createOpenAIVoiceTTS,
46034
46135
  createOpenAIVoiceAssistantModel,
46136
+ createMonologueAMDDetector,
46035
46137
  createMemoryVoiceTelnyxWebhookEventStore,
46036
46138
  createMemoryVoiceTelephonyWebhookIdempotencyStore,
46037
46139
  createMemoryVoicePlivoWebhookNonceStore,
@@ -5650,6 +5650,66 @@ var createVoiceSession = (options) => {
5650
5650
  const currentTurnAudio = [];
5651
5651
  let fallbackAttemptsForCurrentTurn = 0;
5652
5652
  let fallbackReplayAudioMsForCurrentTurn = 0;
5653
+ const amdDetector = options.amd;
5654
+ let amdEvaluationTimer = null;
5655
+ let amdFired = false;
5656
+ let amdFirstAudioAt;
5657
+ let amdLastTurnCommitAt;
5658
+ let amdLastAudioLevel;
5659
+ const clearAmdEvaluationTimer = () => {
5660
+ if (amdEvaluationTimer) {
5661
+ clearInterval(amdEvaluationTimer);
5662
+ amdEvaluationTimer = null;
5663
+ }
5664
+ };
5665
+ const evaluateAmd = async () => {
5666
+ if (!amdDetector || amdFired) {
5667
+ return;
5668
+ }
5669
+ let snapshot;
5670
+ try {
5671
+ snapshot = await readSession();
5672
+ } catch {
5673
+ return;
5674
+ }
5675
+ const now = Date.now();
5676
+ const verdict = await Promise.resolve(amdDetector.evaluate({
5677
+ api,
5678
+ audioLevel: amdLastAudioLevel,
5679
+ elapsedSinceFirstAudioMs: amdFirstAudioAt === undefined ? 0 : now - amdFirstAudioAt,
5680
+ elapsedSinceLastTurnCommitMs: amdLastTurnCommitAt === undefined ? 0 : now - amdLastTurnCommitAt,
5681
+ partialTranscript: snapshot.currentTurn.partialText,
5682
+ session: snapshot,
5683
+ transcripts: [
5684
+ ...snapshot.transcripts,
5685
+ ...snapshot.currentTurn.transcripts
5686
+ ]
5687
+ }));
5688
+ if (!verdict || amdFired) {
5689
+ return;
5690
+ }
5691
+ amdFired = true;
5692
+ clearAmdEvaluationTimer();
5693
+ try {
5694
+ await api.markVoicemail({
5695
+ metadata: verdict.metadata
5696
+ });
5697
+ } catch (error) {
5698
+ logger.warn("voice amd markVoicemail failed", {
5699
+ error: toError(error).message,
5700
+ sessionId: options.id
5701
+ });
5702
+ }
5703
+ };
5704
+ const startAmdEvaluationTimer = () => {
5705
+ if (!amdDetector || amdEvaluationTimer || amdFired) {
5706
+ return;
5707
+ }
5708
+ const intervalMs = amdDetector.intervalMs ?? 1000;
5709
+ amdEvaluationTimer = setInterval(() => {
5710
+ evaluateAmd();
5711
+ }, intervalMs);
5712
+ };
5653
5713
  const callSilenceTimeoutMs = options.callSilenceTimeoutMs && options.callSilenceTimeoutMs > 0 ? options.callSilenceTimeoutMs : undefined;
5654
5714
  let callSilenceWatchdog = null;
5655
5715
  let callSilenceFired = false;
@@ -6057,6 +6117,8 @@ var createVoiceSession = (options) => {
6057
6117
  recoverable: false,
6058
6118
  type: "error"
6059
6119
  });
6120
+ clearCallSilenceWatchdog();
6121
+ clearAmdEvaluationTimer();
6060
6122
  await closeTTSSession("failed");
6061
6123
  await closeAdapter("failed");
6062
6124
  await persistRecordings();
@@ -6126,6 +6188,8 @@ var createVoiceSession = (options) => {
6126
6188
  sessionId: options.id,
6127
6189
  type: "complete"
6128
6190
  });
6191
+ clearCallSilenceWatchdog();
6192
+ clearAmdEvaluationTimer();
6129
6193
  await closeTTSSession("complete");
6130
6194
  await closeAdapter("complete");
6131
6195
  await persistRecordings();
@@ -6983,6 +7047,7 @@ var createVoiceSession = (options) => {
6983
7047
  };
6984
7048
  const commitTurnInternal = async (reason = "manual") => {
6985
7049
  clearSilenceTimer();
7050
+ amdLastTurnCommitAt = Date.now();
6986
7051
  const session = await readSession();
6987
7052
  if (session.status === "completed" || session.status === "failed") {
6988
7053
  return;
@@ -7230,6 +7295,7 @@ var createVoiceSession = (options) => {
7230
7295
  await ensureAdapter();
7231
7296
  warmTTSSession();
7232
7297
  kickCallSilenceWatchdog();
7298
+ startAmdEvaluationTimer();
7233
7299
  };
7234
7300
  const disconnectInternal = async (event) => {
7235
7301
  clearSilenceTimer();
@@ -7277,7 +7343,11 @@ var createVoiceSession = (options) => {
7277
7343
  const userBytes = conditionedAudio instanceof Uint8Array ? conditionedAudio : conditionedAudio instanceof ArrayBuffer ? new Uint8Array(conditionedAudio) : new Uint8Array(conditionedAudio.buffer, conditionedAudio.byteOffset, conditionedAudio.byteLength);
7278
7344
  captureRecordingChunk("user", userBytes, recordingConfig.userInputFormat);
7279
7345
  }
7346
+ amdLastAudioLevel = audioLevel;
7280
7347
  if (audioLevel >= turnDetection.speechThreshold) {
7348
+ if (amdFirstAudioAt === undefined) {
7349
+ amdFirstAudioAt = Date.now();
7350
+ }
7281
7351
  if (!speechDetected && activeTTSTurnId !== undefined) {
7282
7352
  cancelActiveTTS("barge-in");
7283
7353
  }
@@ -7310,6 +7380,7 @@ var createVoiceSession = (options) => {
7310
7380
  });
7311
7381
  clearSilenceTimer();
7312
7382
  clearCallSilenceWatchdog();
7383
+ clearAmdEvaluationTimer();
7313
7384
  await closeTTSSession(reason);
7314
7385
  await closeAdapter(reason);
7315
7386
  await persistRecordings();
package/dist/types.d.ts CHANGED
@@ -725,6 +725,7 @@ export type CreateVoiceSessionOptions<TContext = unknown, TSession extends Voice
725
725
  trace?: VoiceTraceEventStore;
726
726
  recording?: VoiceSessionRecordingConfig;
727
727
  callSilenceTimeoutMs?: number;
728
+ amd?: import("./amdDetector").VoiceAMDDetector<TContext, TSession, TResult>;
728
729
  reconnect: Required<VoiceReconnectConfig>;
729
730
  phraseHints?: VoicePhraseHint[];
730
731
  sessionMetadata?: Record<string, unknown>;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@absolutejs/voice",
3
- "version": "0.0.22-beta.478",
3
+ "version": "0.0.22-beta.479",
4
4
  "description": "Voice primitives and Elysia plugin for AbsoluteJS",
5
5
  "repository": {
6
6
  "type": "git",