@absolutejs/voice 0.0.22-beta.562 → 0.0.22-beta.564
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/core/types.d.ts +14 -0
- package/dist/index.js +44 -3
- package/dist/telephony/twilio.d.ts +8 -0
- package/dist/testing/index.js +44 -3
- package/package.json +1 -1
package/dist/core/types.d.ts
CHANGED
|
@@ -913,6 +913,20 @@ export type CreateVoiceSessionOptions<TContext = unknown, TSession extends Voice
|
|
|
913
913
|
* Set `fillerPhrases: []` (or omit) to disable. Reasonable defaults if
|
|
914
914
|
* you enable: `["Hmm.", "Got it.", "Right.", "Mm-hm.", "Let me think.", "Okay."]`.
|
|
915
915
|
*/
|
|
916
|
+
/**
|
|
917
|
+
* Minimum word count in an STT partial transcript before speech-gated
|
|
918
|
+
* barge-in cancels the in-flight assistant TTS. Default 1 (any non-empty
|
|
919
|
+
* partial triggers barge-in — backwards-compatible).
|
|
920
|
+
*
|
|
921
|
+
* Set to 2 (or higher) on phone routes where the caller's brief
|
|
922
|
+
* acknowledgements ("yeah", "uh-huh", "you", "am i") would otherwise
|
|
923
|
+
* cut the bot off mid-question. Each extra word added typically delays
|
|
924
|
+
* barge-in by ~100-200ms (one extra STT partial cycle) — cheap compared
|
|
925
|
+
* to losing the bot's response.
|
|
926
|
+
*
|
|
927
|
+
* Word splitting is whitespace-based. Punctuation is left attached.
|
|
928
|
+
*/
|
|
929
|
+
bargeInMinPartialWords?: number;
|
|
916
930
|
fillerPhrases?: ReadonlyArray<string>;
|
|
917
931
|
/** Milliseconds after turn-commit before the filler fires. Default 250ms — short enough to feel instant, long enough to skip if the LLM is very fast. */
|
|
918
932
|
fillerDelayMs?: number;
|
package/dist/index.js
CHANGED
|
@@ -3852,7 +3852,7 @@ var createVoiceSession = (options) => {
|
|
|
3852
3852
|
};
|
|
3853
3853
|
const appendTurnLatencyStage = async (input) => appendTrace({
|
|
3854
3854
|
at: input.at,
|
|
3855
|
-
payload: { stage: input.stage },
|
|
3855
|
+
payload: { stage: input.stage, ...input.metadata ?? {} },
|
|
3856
3856
|
session: input.session,
|
|
3857
3857
|
turnId: input.turnId,
|
|
3858
3858
|
type: "turn_latency.stage"
|
|
@@ -3875,6 +3875,7 @@ var createVoiceSession = (options) => {
|
|
|
3875
3875
|
let fillerToken = 0;
|
|
3876
3876
|
const fillerPhrases = (options.fillerPhrases ?? []).filter((p) => typeof p === "string" && p.trim().length > 0);
|
|
3877
3877
|
const fillerDelayMs = options.fillerDelayMs ?? 250;
|
|
3878
|
+
const bargeInMinPartialWords = Math.max(1, options.bargeInMinPartialWords ?? 1);
|
|
3878
3879
|
const fillerFor = options.fillerFor;
|
|
3879
3880
|
const fillerForTimeoutMs = options.fillerForTimeoutMs ?? 600;
|
|
3880
3881
|
const currentTurnAudio = [];
|
|
@@ -4261,6 +4262,11 @@ var createVoiceSession = (options) => {
|
|
|
4261
4262
|
return;
|
|
4262
4263
|
}
|
|
4263
4264
|
activeTTSTurnId = undefined;
|
|
4265
|
+
appendTurnLatencyStage({
|
|
4266
|
+
metadata: { reason },
|
|
4267
|
+
stage: "tts_canceled",
|
|
4268
|
+
turnId: cancelledTurnId
|
|
4269
|
+
}).catch(() => {});
|
|
4264
4270
|
Promise.resolve(socket.clear?.()).catch(() => {});
|
|
4265
4271
|
if (!ttsAdapterSessionCanCancel(activeSession)) {
|
|
4266
4272
|
return;
|
|
@@ -4919,8 +4925,33 @@ var createVoiceSession = (options) => {
|
|
|
4919
4925
|
};
|
|
4920
4926
|
};
|
|
4921
4927
|
const handlePartial = async (transcript) => {
|
|
4922
|
-
if (activeTTSTurnId !== undefined
|
|
4923
|
-
|
|
4928
|
+
if (activeTTSTurnId !== undefined) {
|
|
4929
|
+
const triggeringText = transcript.text.trim();
|
|
4930
|
+
if (triggeringText) {
|
|
4931
|
+
const wordCount = triggeringText.split(/\s+/).length;
|
|
4932
|
+
if (wordCount >= bargeInMinPartialWords) {
|
|
4933
|
+
appendTurnLatencyStage({
|
|
4934
|
+
metadata: {
|
|
4935
|
+
partial: triggeringText.slice(0, 200),
|
|
4936
|
+
source: "stt_partial",
|
|
4937
|
+
wordCount
|
|
4938
|
+
},
|
|
4939
|
+
stage: "barge_in",
|
|
4940
|
+
turnId: activeTTSTurnId
|
|
4941
|
+
}).catch(() => {});
|
|
4942
|
+
cancelActiveTTS("barge-in");
|
|
4943
|
+
} else {
|
|
4944
|
+
appendTurnLatencyStage({
|
|
4945
|
+
metadata: {
|
|
4946
|
+
partial: triggeringText.slice(0, 200),
|
|
4947
|
+
reason: "below_min_words",
|
|
4948
|
+
wordCount
|
|
4949
|
+
},
|
|
4950
|
+
stage: "barge_in_suppressed",
|
|
4951
|
+
turnId: activeTTSTurnId
|
|
4952
|
+
}).catch(() => {});
|
|
4953
|
+
}
|
|
4954
|
+
}
|
|
4924
4955
|
}
|
|
4925
4956
|
const session = await writeSession((session2) => {
|
|
4926
4957
|
const nextPartialStartedAt = transcript.startedAtMs ?? session2.currentTurn.partialStartedAt;
|
|
@@ -5327,13 +5358,17 @@ var createVoiceSession = (options) => {
|
|
|
5327
5358
|
if (myToken !== fillerToken || activeTTSTurnId === turn.id)
|
|
5328
5359
|
return;
|
|
5329
5360
|
let phrase = null;
|
|
5361
|
+
let source = "static";
|
|
5330
5362
|
if (fillerForPromise) {
|
|
5331
5363
|
phrase = await fillerForPromise;
|
|
5364
|
+
if (phrase)
|
|
5365
|
+
source = "fillerFor";
|
|
5332
5366
|
if (myToken !== fillerToken || activeTTSTurnId === turn.id)
|
|
5333
5367
|
return;
|
|
5334
5368
|
}
|
|
5335
5369
|
if (!phrase && fillerPhrases.length > 0) {
|
|
5336
5370
|
phrase = fillerPhrases[Math.floor(Math.random() * fillerPhrases.length)] ?? null;
|
|
5371
|
+
source = "static";
|
|
5337
5372
|
}
|
|
5338
5373
|
if (!phrase)
|
|
5339
5374
|
return;
|
|
@@ -5341,6 +5376,11 @@ var createVoiceSession = (options) => {
|
|
|
5341
5376
|
if (!adapterSession)
|
|
5342
5377
|
return;
|
|
5343
5378
|
fillerActive = true;
|
|
5379
|
+
appendTurnLatencyStage({
|
|
5380
|
+
metadata: { phrase, source },
|
|
5381
|
+
stage: "filler_sent",
|
|
5382
|
+
turnId: turn.id
|
|
5383
|
+
}).catch(() => {});
|
|
5344
5384
|
try {
|
|
5345
5385
|
await adapterSession.send(phrase);
|
|
5346
5386
|
} catch {
|
|
@@ -24742,6 +24782,7 @@ var createTwilioMediaStreamBridge = (socket, options) => {
|
|
|
24742
24782
|
...options.semanticTurnDetector ? { semanticTurnDetector: options.semanticTurnDetector } : {},
|
|
24743
24783
|
...options.fillerPhrases ? { fillerPhrases: options.fillerPhrases } : {},
|
|
24744
24784
|
...options.fillerDelayMs !== undefined ? { fillerDelayMs: options.fillerDelayMs } : {},
|
|
24785
|
+
...options.bargeInMinPartialWords !== undefined ? { bargeInMinPartialWords: options.bargeInMinPartialWords } : {},
|
|
24745
24786
|
...options.fillerFor ? { fillerFor: options.fillerFor } : {},
|
|
24746
24787
|
...options.fillerForTimeoutMs !== undefined ? { fillerForTimeoutMs: options.fillerForTimeoutMs } : {},
|
|
24747
24788
|
...options.defaultSilentTurnAck !== undefined ? { defaultSilentTurnAck: options.defaultSilentTurnAck } : {},
|
|
@@ -132,6 +132,14 @@ export type TwilioMediaStreamBridgeOptions<TContext = unknown, TSession extends
|
|
|
132
132
|
fillerPhrases?: ReadonlyArray<string>;
|
|
133
133
|
/** Milliseconds after turn-commit before the filler fires. Default 250ms. */
|
|
134
134
|
fillerDelayMs?: number;
|
|
135
|
+
/**
|
|
136
|
+
* Minimum word count in an STT partial transcript before barge-in
|
|
137
|
+
* cancels in-flight assistant TTS. Default 1 (any partial).
|
|
138
|
+
* Recommended 2 on phone routes — single-word partials ("you", "am i")
|
|
139
|
+
* cut the bot off mid-question per live-test 2026-05-27. See
|
|
140
|
+
* CreateVoiceSessionOptions for full semantics.
|
|
141
|
+
*/
|
|
142
|
+
bargeInMinPartialWords?: number;
|
|
135
143
|
/**
|
|
136
144
|
* Content-aware filler (Latency Theater). Called in parallel with the
|
|
137
145
|
* main LLM turn; if it resolves within `fillerForTimeoutMs` the runtime
|
package/dist/testing/index.js
CHANGED
|
@@ -5723,7 +5723,7 @@ var createVoiceSession = (options) => {
|
|
|
5723
5723
|
};
|
|
5724
5724
|
const appendTurnLatencyStage = async (input) => appendTrace({
|
|
5725
5725
|
at: input.at,
|
|
5726
|
-
payload: { stage: input.stage },
|
|
5726
|
+
payload: { stage: input.stage, ...input.metadata ?? {} },
|
|
5727
5727
|
session: input.session,
|
|
5728
5728
|
turnId: input.turnId,
|
|
5729
5729
|
type: "turn_latency.stage"
|
|
@@ -5746,6 +5746,7 @@ var createVoiceSession = (options) => {
|
|
|
5746
5746
|
let fillerToken = 0;
|
|
5747
5747
|
const fillerPhrases = (options.fillerPhrases ?? []).filter((p) => typeof p === "string" && p.trim().length > 0);
|
|
5748
5748
|
const fillerDelayMs = options.fillerDelayMs ?? 250;
|
|
5749
|
+
const bargeInMinPartialWords = Math.max(1, options.bargeInMinPartialWords ?? 1);
|
|
5749
5750
|
const fillerFor = options.fillerFor;
|
|
5750
5751
|
const fillerForTimeoutMs = options.fillerForTimeoutMs ?? 600;
|
|
5751
5752
|
const currentTurnAudio = [];
|
|
@@ -6132,6 +6133,11 @@ var createVoiceSession = (options) => {
|
|
|
6132
6133
|
return;
|
|
6133
6134
|
}
|
|
6134
6135
|
activeTTSTurnId = undefined;
|
|
6136
|
+
appendTurnLatencyStage({
|
|
6137
|
+
metadata: { reason },
|
|
6138
|
+
stage: "tts_canceled",
|
|
6139
|
+
turnId: cancelledTurnId
|
|
6140
|
+
}).catch(() => {});
|
|
6135
6141
|
Promise.resolve(socket.clear?.()).catch(() => {});
|
|
6136
6142
|
if (!ttsAdapterSessionCanCancel(activeSession)) {
|
|
6137
6143
|
return;
|
|
@@ -6790,8 +6796,33 @@ var createVoiceSession = (options) => {
|
|
|
6790
6796
|
};
|
|
6791
6797
|
};
|
|
6792
6798
|
const handlePartial = async (transcript) => {
|
|
6793
|
-
if (activeTTSTurnId !== undefined
|
|
6794
|
-
|
|
6799
|
+
if (activeTTSTurnId !== undefined) {
|
|
6800
|
+
const triggeringText = transcript.text.trim();
|
|
6801
|
+
if (triggeringText) {
|
|
6802
|
+
const wordCount = triggeringText.split(/\s+/).length;
|
|
6803
|
+
if (wordCount >= bargeInMinPartialWords) {
|
|
6804
|
+
appendTurnLatencyStage({
|
|
6805
|
+
metadata: {
|
|
6806
|
+
partial: triggeringText.slice(0, 200),
|
|
6807
|
+
source: "stt_partial",
|
|
6808
|
+
wordCount
|
|
6809
|
+
},
|
|
6810
|
+
stage: "barge_in",
|
|
6811
|
+
turnId: activeTTSTurnId
|
|
6812
|
+
}).catch(() => {});
|
|
6813
|
+
cancelActiveTTS("barge-in");
|
|
6814
|
+
} else {
|
|
6815
|
+
appendTurnLatencyStage({
|
|
6816
|
+
metadata: {
|
|
6817
|
+
partial: triggeringText.slice(0, 200),
|
|
6818
|
+
reason: "below_min_words",
|
|
6819
|
+
wordCount
|
|
6820
|
+
},
|
|
6821
|
+
stage: "barge_in_suppressed",
|
|
6822
|
+
turnId: activeTTSTurnId
|
|
6823
|
+
}).catch(() => {});
|
|
6824
|
+
}
|
|
6825
|
+
}
|
|
6795
6826
|
}
|
|
6796
6827
|
const session = await writeSession((session2) => {
|
|
6797
6828
|
const nextPartialStartedAt = transcript.startedAtMs ?? session2.currentTurn.partialStartedAt;
|
|
@@ -7198,13 +7229,17 @@ var createVoiceSession = (options) => {
|
|
|
7198
7229
|
if (myToken !== fillerToken || activeTTSTurnId === turn.id)
|
|
7199
7230
|
return;
|
|
7200
7231
|
let phrase = null;
|
|
7232
|
+
let source = "static";
|
|
7201
7233
|
if (fillerForPromise) {
|
|
7202
7234
|
phrase = await fillerForPromise;
|
|
7235
|
+
if (phrase)
|
|
7236
|
+
source = "fillerFor";
|
|
7203
7237
|
if (myToken !== fillerToken || activeTTSTurnId === turn.id)
|
|
7204
7238
|
return;
|
|
7205
7239
|
}
|
|
7206
7240
|
if (!phrase && fillerPhrases.length > 0) {
|
|
7207
7241
|
phrase = fillerPhrases[Math.floor(Math.random() * fillerPhrases.length)] ?? null;
|
|
7242
|
+
source = "static";
|
|
7208
7243
|
}
|
|
7209
7244
|
if (!phrase)
|
|
7210
7245
|
return;
|
|
@@ -7212,6 +7247,11 @@ var createVoiceSession = (options) => {
|
|
|
7212
7247
|
if (!adapterSession)
|
|
7213
7248
|
return;
|
|
7214
7249
|
fillerActive = true;
|
|
7250
|
+
appendTurnLatencyStage({
|
|
7251
|
+
metadata: { phrase, source },
|
|
7252
|
+
stage: "filler_sent",
|
|
7253
|
+
turnId: turn.id
|
|
7254
|
+
}).catch(() => {});
|
|
7215
7255
|
try {
|
|
7216
7256
|
await adapterSession.send(phrase);
|
|
7217
7257
|
} catch {
|
|
@@ -13332,6 +13372,7 @@ var createTwilioMediaStreamBridge = (socket, options) => {
|
|
|
13332
13372
|
...options.semanticTurnDetector ? { semanticTurnDetector: options.semanticTurnDetector } : {},
|
|
13333
13373
|
...options.fillerPhrases ? { fillerPhrases: options.fillerPhrases } : {},
|
|
13334
13374
|
...options.fillerDelayMs !== undefined ? { fillerDelayMs: options.fillerDelayMs } : {},
|
|
13375
|
+
...options.bargeInMinPartialWords !== undefined ? { bargeInMinPartialWords: options.bargeInMinPartialWords } : {},
|
|
13335
13376
|
...options.fillerFor ? { fillerFor: options.fillerFor } : {},
|
|
13336
13377
|
...options.fillerForTimeoutMs !== undefined ? { fillerForTimeoutMs: options.fillerForTimeoutMs } : {},
|
|
13337
13378
|
...options.defaultSilentTurnAck !== undefined ? { defaultSilentTurnAck: options.defaultSilentTurnAck } : {},
|