@absolutejs/voice 0.0.22-beta.563 → 0.0.22-beta.564

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -913,6 +913,20 @@ export type CreateVoiceSessionOptions<TContext = unknown, TSession extends Voice
913
913
  * Set `fillerPhrases: []` (or omit) to disable. Reasonable defaults if
914
914
  * you enable: `["Hmm.", "Got it.", "Right.", "Mm-hm.", "Let me think.", "Okay."]`.
915
915
  */
916
+ /**
917
+ * Minimum word count in an STT partial transcript before speech-gated
918
+ * barge-in cancels the in-flight assistant TTS. Default 1 (any non-empty
919
+ * partial triggers barge-in — backwards-compatible).
920
+ *
921
+ * Set to 2 (or higher) on phone routes where the caller's brief
922
+ * acknowledgements ("yeah", "uh-huh", "you", "am i") would otherwise
923
+ * cut the bot off mid-question. Each extra word added typically delays
924
+ * barge-in by ~100-200ms (one extra STT partial cycle) — cheap compared
925
+ * to losing the bot's response.
926
+ *
927
+ * Word splitting is whitespace-based. Punctuation is left attached.
928
+ */
929
+ bargeInMinPartialWords?: number;
916
930
  fillerPhrases?: ReadonlyArray<string>;
917
931
  /** Milliseconds after turn-commit before the filler fires. Default 250ms — short enough to feel instant, long enough to skip if the LLM is very fast. */
918
932
  fillerDelayMs?: number;
package/dist/index.js CHANGED
@@ -3875,6 +3875,7 @@ var createVoiceSession = (options) => {
3875
3875
  let fillerToken = 0;
3876
3876
  const fillerPhrases = (options.fillerPhrases ?? []).filter((p) => typeof p === "string" && p.trim().length > 0);
3877
3877
  const fillerDelayMs = options.fillerDelayMs ?? 250;
3878
+ const bargeInMinPartialWords = Math.max(1, options.bargeInMinPartialWords ?? 1);
3878
3879
  const fillerFor = options.fillerFor;
3879
3880
  const fillerForTimeoutMs = options.fillerForTimeoutMs ?? 600;
3880
3881
  const currentTurnAudio = [];
@@ -4924,17 +4925,33 @@ var createVoiceSession = (options) => {
4924
4925
  };
4925
4926
  };
4926
4927
  const handlePartial = async (transcript) => {
4927
- if (activeTTSTurnId !== undefined && transcript.text.trim()) {
4928
+ if (activeTTSTurnId !== undefined) {
4928
4929
  const triggeringText = transcript.text.trim();
4929
- appendTurnLatencyStage({
4930
- metadata: {
4931
- partial: triggeringText.slice(0, 200),
4932
- source: "stt_partial"
4933
- },
4934
- stage: "barge_in",
4935
- turnId: activeTTSTurnId
4936
- }).catch(() => {});
4937
- cancelActiveTTS("barge-in");
4930
+ if (triggeringText) {
4931
+ const wordCount = triggeringText.split(/\s+/).length;
4932
+ if (wordCount >= bargeInMinPartialWords) {
4933
+ appendTurnLatencyStage({
4934
+ metadata: {
4935
+ partial: triggeringText.slice(0, 200),
4936
+ source: "stt_partial",
4937
+ wordCount
4938
+ },
4939
+ stage: "barge_in",
4940
+ turnId: activeTTSTurnId
4941
+ }).catch(() => {});
4942
+ cancelActiveTTS("barge-in");
4943
+ } else {
4944
+ appendTurnLatencyStage({
4945
+ metadata: {
4946
+ partial: triggeringText.slice(0, 200),
4947
+ reason: "below_min_words",
4948
+ wordCount
4949
+ },
4950
+ stage: "barge_in_suppressed",
4951
+ turnId: activeTTSTurnId
4952
+ }).catch(() => {});
4953
+ }
4954
+ }
4938
4955
  }
4939
4956
  const session = await writeSession((session2) => {
4940
4957
  const nextPartialStartedAt = transcript.startedAtMs ?? session2.currentTurn.partialStartedAt;
@@ -24765,6 +24782,7 @@ var createTwilioMediaStreamBridge = (socket, options) => {
24765
24782
  ...options.semanticTurnDetector ? { semanticTurnDetector: options.semanticTurnDetector } : {},
24766
24783
  ...options.fillerPhrases ? { fillerPhrases: options.fillerPhrases } : {},
24767
24784
  ...options.fillerDelayMs !== undefined ? { fillerDelayMs: options.fillerDelayMs } : {},
24785
+ ...options.bargeInMinPartialWords !== undefined ? { bargeInMinPartialWords: options.bargeInMinPartialWords } : {},
24768
24786
  ...options.fillerFor ? { fillerFor: options.fillerFor } : {},
24769
24787
  ...options.fillerForTimeoutMs !== undefined ? { fillerForTimeoutMs: options.fillerForTimeoutMs } : {},
24770
24788
  ...options.defaultSilentTurnAck !== undefined ? { defaultSilentTurnAck: options.defaultSilentTurnAck } : {},
@@ -132,6 +132,14 @@ export type TwilioMediaStreamBridgeOptions<TContext = unknown, TSession extends
132
132
  fillerPhrases?: ReadonlyArray<string>;
133
133
  /** Milliseconds after turn-commit before the filler fires. Default 250ms. */
134
134
  fillerDelayMs?: number;
135
+ /**
136
+ * Minimum word count in an STT partial transcript before barge-in
137
+ * cancels in-flight assistant TTS. Default 1 (any partial).
138
+ * Recommended 2 on phone routes — single-word partials ("you", "am i")
139
+ * cut the bot off mid-question per live-test 2026-05-27. See
140
+ * CreateVoiceSessionOptions for full semantics.
141
+ */
142
+ bargeInMinPartialWords?: number;
135
143
  /**
136
144
  * Content-aware filler (Latency Theater). Called in parallel with the
137
145
  * main LLM turn; if it resolves within `fillerForTimeoutMs` the runtime
@@ -5746,6 +5746,7 @@ var createVoiceSession = (options) => {
5746
5746
  let fillerToken = 0;
5747
5747
  const fillerPhrases = (options.fillerPhrases ?? []).filter((p) => typeof p === "string" && p.trim().length > 0);
5748
5748
  const fillerDelayMs = options.fillerDelayMs ?? 250;
5749
+ const bargeInMinPartialWords = Math.max(1, options.bargeInMinPartialWords ?? 1);
5749
5750
  const fillerFor = options.fillerFor;
5750
5751
  const fillerForTimeoutMs = options.fillerForTimeoutMs ?? 600;
5751
5752
  const currentTurnAudio = [];
@@ -6795,17 +6796,33 @@ var createVoiceSession = (options) => {
6795
6796
  };
6796
6797
  };
6797
6798
  const handlePartial = async (transcript) => {
6798
- if (activeTTSTurnId !== undefined && transcript.text.trim()) {
6799
+ if (activeTTSTurnId !== undefined) {
6799
6800
  const triggeringText = transcript.text.trim();
6800
- appendTurnLatencyStage({
6801
- metadata: {
6802
- partial: triggeringText.slice(0, 200),
6803
- source: "stt_partial"
6804
- },
6805
- stage: "barge_in",
6806
- turnId: activeTTSTurnId
6807
- }).catch(() => {});
6808
- cancelActiveTTS("barge-in");
6801
+ if (triggeringText) {
6802
+ const wordCount = triggeringText.split(/\s+/).length;
6803
+ if (wordCount >= bargeInMinPartialWords) {
6804
+ appendTurnLatencyStage({
6805
+ metadata: {
6806
+ partial: triggeringText.slice(0, 200),
6807
+ source: "stt_partial",
6808
+ wordCount
6809
+ },
6810
+ stage: "barge_in",
6811
+ turnId: activeTTSTurnId
6812
+ }).catch(() => {});
6813
+ cancelActiveTTS("barge-in");
6814
+ } else {
6815
+ appendTurnLatencyStage({
6816
+ metadata: {
6817
+ partial: triggeringText.slice(0, 200),
6818
+ reason: "below_min_words",
6819
+ wordCount
6820
+ },
6821
+ stage: "barge_in_suppressed",
6822
+ turnId: activeTTSTurnId
6823
+ }).catch(() => {});
6824
+ }
6825
+ }
6809
6826
  }
6810
6827
  const session = await writeSession((session2) => {
6811
6828
  const nextPartialStartedAt = transcript.startedAtMs ?? session2.currentTurn.partialStartedAt;
@@ -13355,6 +13372,7 @@ var createTwilioMediaStreamBridge = (socket, options) => {
13355
13372
  ...options.semanticTurnDetector ? { semanticTurnDetector: options.semanticTurnDetector } : {},
13356
13373
  ...options.fillerPhrases ? { fillerPhrases: options.fillerPhrases } : {},
13357
13374
  ...options.fillerDelayMs !== undefined ? { fillerDelayMs: options.fillerDelayMs } : {},
13375
+ ...options.bargeInMinPartialWords !== undefined ? { bargeInMinPartialWords: options.bargeInMinPartialWords } : {},
13358
13376
  ...options.fillerFor ? { fillerFor: options.fillerFor } : {},
13359
13377
  ...options.fillerForTimeoutMs !== undefined ? { fillerForTimeoutMs: options.fillerForTimeoutMs } : {},
13360
13378
  ...options.defaultSilentTurnAck !== undefined ? { defaultSilentTurnAck: options.defaultSilentTurnAck } : {},
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@absolutejs/voice",
3
- "version": "0.0.22-beta.563",
3
+ "version": "0.0.22-beta.564",
4
4
  "description": "Voice primitives and Elysia plugin for AbsoluteJS",
5
5
  "repository": {
6
6
  "type": "git",