@absolutejs/voice 0.0.22-beta.553 → 0.0.22-beta.555

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -13,6 +13,14 @@ export type OpenAIVoiceAssistantModelOptions = {
13
13
  model?: string;
14
14
  onUsage?: (usage: Record<string, unknown>) => Promise<void> | void;
15
15
  temperature?: number;
16
+ /**
17
+ * Hard cap on a single /responses stream. Aborts via AbortController if
18
+ * the stream doesn't complete in time. Default 60s — generous for typical
19
+ * gpt-4.1-mini conversational turns (1-3s) but catches infinite hangs
20
+ * (server-side stream stalls observed on rare complex inputs). On timeout
21
+ * the agent loop falls through to default-silent-turn-ack.
22
+ */
23
+ timeoutMs?: number;
16
24
  };
17
25
  export type AnthropicVoiceAssistantModelOptions = {
18
26
  apiKey: string;
@@ -916,6 +916,13 @@ export type CreateVoiceSessionOptions<TContext = unknown, TSession extends Voice
916
916
  fillerPhrases?: ReadonlyArray<string>;
917
917
  /** Milliseconds after turn-commit before the filler fires. Default 250ms — short enough to feel instant, long enough to skip if the LLM is very fast. */
918
918
  fillerDelayMs?: number;
919
+ /**
920
+ * Default spoken ack if the model returns ONLY tool calls (no text) and the
921
+ * turn isn't ending. Without this, the caller hears total silence after
922
+ * their turn and assumes the line dropped. Default is "Sorry, one moment."
923
+ * Set to "" to opt out entirely.
924
+ */
925
+ defaultSilentTurnAck?: string;
919
926
  assistantMode?: import("./assistantMode").VoiceAssistantMode;
920
927
  modalities?: ReadonlyArray<"audio" | "text">;
921
928
  prosody?: VoiceTTSProsody;
package/dist/index.js CHANGED
@@ -5303,17 +5303,41 @@ var createVoiceSession = (options) => {
5303
5303
  });
5304
5304
  }, fillerDelayMs);
5305
5305
  }
5306
- const committedOutput = await options.route.onTurn({
5307
- api,
5308
- context: options.context,
5309
- liveOps: liveOpsControl ? {
5310
- control: liveOpsControl,
5311
- injectedInstruction
5312
- } : undefined,
5313
- onTextDelta: ttsStreamer?.push,
5314
- session,
5315
- turn
5316
- });
5306
+ let committedOutput;
5307
+ const onTurnStartedAt = Date.now();
5308
+ try {
5309
+ committedOutput = await options.route.onTurn({
5310
+ api,
5311
+ context: options.context,
5312
+ liveOps: liveOpsControl ? {
5313
+ control: liveOpsControl,
5314
+ injectedInstruction
5315
+ } : undefined,
5316
+ onTextDelta: ttsStreamer?.push,
5317
+ session,
5318
+ turn
5319
+ });
5320
+ } catch (error) {
5321
+ const message = toError(error).message;
5322
+ logger.warn("voice route.onTurn failed", {
5323
+ elapsedMs: Date.now() - onTurnStartedAt,
5324
+ error: message,
5325
+ sessionId: options.id,
5326
+ turnId: turn.id
5327
+ });
5328
+ console.error(`[voice] onTurn failed for session ${options.id} turn ${turn.id} after ${Date.now() - onTurnStartedAt}ms:`, message);
5329
+ await appendTrace({
5330
+ payload: {
5331
+ elapsedMs: Date.now() - onTurnStartedAt,
5332
+ error: message,
5333
+ stage: "route.onTurn"
5334
+ },
5335
+ session,
5336
+ turnId: turn.id,
5337
+ type: "session.error"
5338
+ });
5339
+ committedOutput = undefined;
5340
+ }
5317
5341
  const output = {
5318
5342
  assistantText: committedOutput?.assistantText,
5319
5343
  citations: committedOutput?.citations,
@@ -5464,6 +5488,53 @@ var createVoiceSession = (options) => {
5464
5488
  });
5465
5489
  }
5466
5490
  }
5491
+ const audioWasSent = Boolean(streamResult?.streamed) || Boolean(output?.assistantText?.trim());
5492
+ const turnIsEnding = Boolean(output?.complete) || Boolean(output?.transfer) || Boolean(output?.escalate) || Boolean(output?.voicemail) || Boolean(output?.noAnswer);
5493
+ if (!audioWasSent && !turnIsEnding) {
5494
+ const fallback = typeof options.defaultSilentTurnAck === "string" ? options.defaultSilentTurnAck : "Sorry, one moment.";
5495
+ if (fallback.trim() && options.tts) {
5496
+ try {
5497
+ const activeTTSSession = await ensureTTSSession();
5498
+ if (activeTTSSession) {
5499
+ fillerToken += 1;
5500
+ if (fillerTimer) {
5501
+ clearTimeout(fillerTimer);
5502
+ fillerTimer = null;
5503
+ }
5504
+ if (fillerActive) {
5505
+ await cancelActiveTTS("filler-superseded").catch(() => {});
5506
+ fillerActive = false;
5507
+ }
5508
+ activeTTSTurnId = turn.id;
5509
+ await activeTTSSession.send(fallback);
5510
+ await appendTrace({
5511
+ payload: {
5512
+ assistantMode: resolveVoiceAssistantMode(options),
5513
+ fallback: true,
5514
+ realtimeConfigured: Boolean(options.realtime),
5515
+ reason: "model-returned-no-text",
5516
+ text: fallback,
5517
+ ttsConfigured: Boolean(options.tts)
5518
+ },
5519
+ session,
5520
+ turnId: turn.id,
5521
+ type: "turn.assistant"
5522
+ });
5523
+ if (options.costAccountant) {
5524
+ options.costAccountant.recordTTS({
5525
+ characters: fallback.length
5526
+ });
5527
+ }
5528
+ }
5529
+ } catch (error) {
5530
+ logger.warn("voice default-silent-turn-ack fallback send failed", {
5531
+ error: toError(error).message,
5532
+ sessionId: options.id,
5533
+ turnId: turn.id
5534
+ });
5535
+ }
5536
+ }
5537
+ }
5467
5538
  if (output?.result !== undefined) {
5468
5539
  await writeSession((currentSession) => {
5469
5540
  setTurnResult(currentSession, turn.id, {
@@ -24608,6 +24679,7 @@ var createTwilioMediaStreamBridge = (socket, options) => {
24608
24679
  ...options.semanticTurnDetector ? { semanticTurnDetector: options.semanticTurnDetector } : {},
24609
24680
  ...options.fillerPhrases ? { fillerPhrases: options.fillerPhrases } : {},
24610
24681
  ...options.fillerDelayMs !== undefined ? { fillerDelayMs: options.fillerDelayMs } : {},
24682
+ ...options.defaultSilentTurnAck !== undefined ? { defaultSilentTurnAck: options.defaultSilentTurnAck } : {},
24611
24683
  trace: options.trace,
24612
24684
  tts: options.tts,
24613
24685
  turnDetection
@@ -45058,41 +45130,61 @@ var createOpenAIVoiceAssistantModel = (options) => {
45058
45130
  const fetchImpl = options.fetch ?? globalThis.fetch;
45059
45131
  const baseUrl = options.baseUrl ?? "https://api.openai.com/v1";
45060
45132
  const model = options.model ?? "gpt-4.1-mini";
45133
+ const timeoutMs = options.timeoutMs ?? 60000;
45061
45134
  return {
45062
45135
  generate: async (input) => {
45063
- const response = await fetchImpl(`${baseUrl.replace(/\/$/, "")}/responses`, {
45064
- body: JSON.stringify({
45065
- input: messagesToOpenAIInput(input.messages),
45066
- instructions: [input.system, VOICE_SYSTEM_INSTRUCTIONS].filter(Boolean).join(`
45136
+ const ac = new AbortController;
45137
+ const timer = setTimeout(() => {
45138
+ ac.abort(new Error(`OpenAI /responses timed out after ${timeoutMs}ms (no completion event received)`));
45139
+ }, timeoutMs);
45140
+ let response;
45141
+ try {
45142
+ response = await fetchImpl(`${baseUrl.replace(/\/$/, "")}/responses`, {
45143
+ body: JSON.stringify({
45144
+ input: messagesToOpenAIInput(input.messages),
45145
+ instructions: [input.system, VOICE_SYSTEM_INSTRUCTIONS].filter(Boolean).join(`
45067
45146
 
45068
45147
  `),
45069
- max_output_tokens: options.maxOutputTokens,
45070
- model,
45071
- stream: true,
45072
- temperature: options.temperature,
45073
- tool_choice: input.tools.length ? "auto" : "none",
45074
- tools: input.tools.map((tool) => ({
45075
- description: tool.description,
45076
- name: tool.name,
45077
- parameters: tool.parameters ?? {
45078
- additionalProperties: true,
45079
- type: "object"
45080
- },
45081
- strict: false,
45082
- type: "function"
45083
- }))
45084
- }),
45085
- headers: {
45086
- accept: "text/event-stream",
45087
- authorization: `Bearer ${options.apiKey}`,
45088
- "content-type": "application/json"
45089
- },
45090
- method: "POST"
45091
- });
45148
+ max_output_tokens: options.maxOutputTokens,
45149
+ model,
45150
+ stream: true,
45151
+ temperature: options.temperature,
45152
+ tool_choice: input.tools.length ? "auto" : "none",
45153
+ tools: input.tools.map((tool) => ({
45154
+ description: tool.description,
45155
+ name: tool.name,
45156
+ parameters: tool.parameters ?? {
45157
+ additionalProperties: true,
45158
+ type: "object"
45159
+ },
45160
+ strict: false,
45161
+ type: "function"
45162
+ }))
45163
+ }),
45164
+ headers: {
45165
+ accept: "text/event-stream",
45166
+ authorization: `Bearer ${options.apiKey}`,
45167
+ "content-type": "application/json"
45168
+ },
45169
+ method: "POST",
45170
+ signal: ac.signal
45171
+ });
45172
+ } catch (error) {
45173
+ clearTimeout(timer);
45174
+ throw error;
45175
+ }
45092
45176
  if (!response.ok) {
45177
+ clearTimeout(timer);
45093
45178
  throw createHTTPError("OpenAI", response);
45094
45179
  }
45095
- const { assistantText, toolCalls, usage } = await consumeOpenAIResponsesStream(response, input.onTextDelta);
45180
+ let assistantText;
45181
+ let toolCalls;
45182
+ let usage;
45183
+ try {
45184
+ ({ assistantText, toolCalls, usage } = await consumeOpenAIResponsesStream(response, input.onTextDelta));
45185
+ } finally {
45186
+ clearTimeout(timer);
45187
+ }
45096
45188
  if (usage) {
45097
45189
  await options.onUsage?.(usage);
45098
45190
  }
@@ -132,6 +132,13 @@ export type TwilioMediaStreamBridgeOptions<TContext = unknown, TSession extends
132
132
  fillerPhrases?: ReadonlyArray<string>;
133
133
  /** Milliseconds after turn-commit before the filler fires. Default 250ms. */
134
134
  fillerDelayMs?: number;
135
+ /**
136
+ * Default spoken ack if the model returns ONLY tool calls (no text) and
137
+ * the turn isn't ending. Without this, the caller hears silence and
138
+ * assumes the line dropped. Default "Sorry, one moment." — set to ""
139
+ * to opt out. See CreateVoiceSessionOptions for full semantics.
140
+ */
141
+ defaultSilentTurnAck?: string;
135
142
  };
136
143
  export type TwilioMediaStreamBridge = {
137
144
  close: (reason?: string) => Promise<void>;
@@ -4600,41 +4600,61 @@ var createOpenAIVoiceAssistantModel = (options) => {
4600
4600
  const fetchImpl = options.fetch ?? globalThis.fetch;
4601
4601
  const baseUrl = options.baseUrl ?? "https://api.openai.com/v1";
4602
4602
  const model = options.model ?? "gpt-4.1-mini";
4603
+ const timeoutMs = options.timeoutMs ?? 60000;
4603
4604
  return {
4604
4605
  generate: async (input) => {
4605
- const response = await fetchImpl(`${baseUrl.replace(/\/$/, "")}/responses`, {
4606
- body: JSON.stringify({
4607
- input: messagesToOpenAIInput(input.messages),
4608
- instructions: [input.system, VOICE_SYSTEM_INSTRUCTIONS].filter(Boolean).join(`
4606
+ const ac = new AbortController;
4607
+ const timer = setTimeout(() => {
4608
+ ac.abort(new Error(`OpenAI /responses timed out after ${timeoutMs}ms (no completion event received)`));
4609
+ }, timeoutMs);
4610
+ let response;
4611
+ try {
4612
+ response = await fetchImpl(`${baseUrl.replace(/\/$/, "")}/responses`, {
4613
+ body: JSON.stringify({
4614
+ input: messagesToOpenAIInput(input.messages),
4615
+ instructions: [input.system, VOICE_SYSTEM_INSTRUCTIONS].filter(Boolean).join(`
4609
4616
 
4610
4617
  `),
4611
- max_output_tokens: options.maxOutputTokens,
4612
- model,
4613
- stream: true,
4614
- temperature: options.temperature,
4615
- tool_choice: input.tools.length ? "auto" : "none",
4616
- tools: input.tools.map((tool) => ({
4617
- description: tool.description,
4618
- name: tool.name,
4619
- parameters: tool.parameters ?? {
4620
- additionalProperties: true,
4621
- type: "object"
4622
- },
4623
- strict: false,
4624
- type: "function"
4625
- }))
4626
- }),
4627
- headers: {
4628
- accept: "text/event-stream",
4629
- authorization: `Bearer ${options.apiKey}`,
4630
- "content-type": "application/json"
4631
- },
4632
- method: "POST"
4633
- });
4618
+ max_output_tokens: options.maxOutputTokens,
4619
+ model,
4620
+ stream: true,
4621
+ temperature: options.temperature,
4622
+ tool_choice: input.tools.length ? "auto" : "none",
4623
+ tools: input.tools.map((tool) => ({
4624
+ description: tool.description,
4625
+ name: tool.name,
4626
+ parameters: tool.parameters ?? {
4627
+ additionalProperties: true,
4628
+ type: "object"
4629
+ },
4630
+ strict: false,
4631
+ type: "function"
4632
+ }))
4633
+ }),
4634
+ headers: {
4635
+ accept: "text/event-stream",
4636
+ authorization: `Bearer ${options.apiKey}`,
4637
+ "content-type": "application/json"
4638
+ },
4639
+ method: "POST",
4640
+ signal: ac.signal
4641
+ });
4642
+ } catch (error) {
4643
+ clearTimeout(timer);
4644
+ throw error;
4645
+ }
4634
4646
  if (!response.ok) {
4647
+ clearTimeout(timer);
4635
4648
  throw createHTTPError("OpenAI", response);
4636
4649
  }
4637
- const { assistantText, toolCalls, usage } = await consumeOpenAIResponsesStream(response, input.onTextDelta);
4650
+ let assistantText;
4651
+ let toolCalls;
4652
+ let usage;
4653
+ try {
4654
+ ({ assistantText, toolCalls, usage } = await consumeOpenAIResponsesStream(response, input.onTextDelta));
4655
+ } finally {
4656
+ clearTimeout(timer);
4657
+ }
4638
4658
  if (usage) {
4639
4659
  await options.onUsage?.(usage);
4640
4660
  }
@@ -7120,17 +7140,41 @@ var createVoiceSession = (options) => {
7120
7140
  });
7121
7141
  }, fillerDelayMs);
7122
7142
  }
7123
- const committedOutput = await options.route.onTurn({
7124
- api,
7125
- context: options.context,
7126
- liveOps: liveOpsControl ? {
7127
- control: liveOpsControl,
7128
- injectedInstruction
7129
- } : undefined,
7130
- onTextDelta: ttsStreamer?.push,
7131
- session,
7132
- turn
7133
- });
7143
+ let committedOutput;
7144
+ const onTurnStartedAt = Date.now();
7145
+ try {
7146
+ committedOutput = await options.route.onTurn({
7147
+ api,
7148
+ context: options.context,
7149
+ liveOps: liveOpsControl ? {
7150
+ control: liveOpsControl,
7151
+ injectedInstruction
7152
+ } : undefined,
7153
+ onTextDelta: ttsStreamer?.push,
7154
+ session,
7155
+ turn
7156
+ });
7157
+ } catch (error) {
7158
+ const message = toError(error).message;
7159
+ logger.warn("voice route.onTurn failed", {
7160
+ elapsedMs: Date.now() - onTurnStartedAt,
7161
+ error: message,
7162
+ sessionId: options.id,
7163
+ turnId: turn.id
7164
+ });
7165
+ console.error(`[voice] onTurn failed for session ${options.id} turn ${turn.id} after ${Date.now() - onTurnStartedAt}ms:`, message);
7166
+ await appendTrace({
7167
+ payload: {
7168
+ elapsedMs: Date.now() - onTurnStartedAt,
7169
+ error: message,
7170
+ stage: "route.onTurn"
7171
+ },
7172
+ session,
7173
+ turnId: turn.id,
7174
+ type: "session.error"
7175
+ });
7176
+ committedOutput = undefined;
7177
+ }
7134
7178
  const output = {
7135
7179
  assistantText: committedOutput?.assistantText,
7136
7180
  citations: committedOutput?.citations,
@@ -7281,6 +7325,53 @@ var createVoiceSession = (options) => {
7281
7325
  });
7282
7326
  }
7283
7327
  }
7328
+ const audioWasSent = Boolean(streamResult?.streamed) || Boolean(output?.assistantText?.trim());
7329
+ const turnIsEnding = Boolean(output?.complete) || Boolean(output?.transfer) || Boolean(output?.escalate) || Boolean(output?.voicemail) || Boolean(output?.noAnswer);
7330
+ if (!audioWasSent && !turnIsEnding) {
7331
+ const fallback = typeof options.defaultSilentTurnAck === "string" ? options.defaultSilentTurnAck : "Sorry, one moment.";
7332
+ if (fallback.trim() && options.tts) {
7333
+ try {
7334
+ const activeTTSSession = await ensureTTSSession();
7335
+ if (activeTTSSession) {
7336
+ fillerToken += 1;
7337
+ if (fillerTimer) {
7338
+ clearTimeout(fillerTimer);
7339
+ fillerTimer = null;
7340
+ }
7341
+ if (fillerActive) {
7342
+ await cancelActiveTTS("filler-superseded").catch(() => {});
7343
+ fillerActive = false;
7344
+ }
7345
+ activeTTSTurnId = turn.id;
7346
+ await activeTTSSession.send(fallback);
7347
+ await appendTrace({
7348
+ payload: {
7349
+ assistantMode: resolveVoiceAssistantMode(options),
7350
+ fallback: true,
7351
+ realtimeConfigured: Boolean(options.realtime),
7352
+ reason: "model-returned-no-text",
7353
+ text: fallback,
7354
+ ttsConfigured: Boolean(options.tts)
7355
+ },
7356
+ session,
7357
+ turnId: turn.id,
7358
+ type: "turn.assistant"
7359
+ });
7360
+ if (options.costAccountant) {
7361
+ options.costAccountant.recordTTS({
7362
+ characters: fallback.length
7363
+ });
7364
+ }
7365
+ }
7366
+ } catch (error) {
7367
+ logger.warn("voice default-silent-turn-ack fallback send failed", {
7368
+ error: toError(error).message,
7369
+ sessionId: options.id,
7370
+ turnId: turn.id
7371
+ });
7372
+ }
7373
+ }
7374
+ }
7284
7375
  if (output?.result !== undefined) {
7285
7376
  await writeSession((currentSession) => {
7286
7377
  setTurnResult(currentSession, turn.id, {
@@ -13144,6 +13235,7 @@ var createTwilioMediaStreamBridge = (socket, options) => {
13144
13235
  ...options.semanticTurnDetector ? { semanticTurnDetector: options.semanticTurnDetector } : {},
13145
13236
  ...options.fillerPhrases ? { fillerPhrases: options.fillerPhrases } : {},
13146
13237
  ...options.fillerDelayMs !== undefined ? { fillerDelayMs: options.fillerDelayMs } : {},
13238
+ ...options.defaultSilentTurnAck !== undefined ? { defaultSilentTurnAck: options.defaultSilentTurnAck } : {},
13147
13239
  trace: options.trace,
13148
13240
  tts: options.tts,
13149
13241
  turnDetection
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@absolutejs/voice",
3
- "version": "0.0.22-beta.553",
3
+ "version": "0.0.22-beta.555",
4
4
  "description": "Voice primitives and Elysia plugin for AbsoluteJS",
5
5
  "repository": {
6
6
  "type": "git",