@absolutejs/voice 0.0.22-beta.483 → 0.0.22-beta.484

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,22 @@
1
+ import type { CreateVoiceSessionOptions, VoiceSessionRecord } from "./types";
2
+ export type VoiceAssistantMode = "cascade" | "s2s";
3
+ export type VoiceSemanticVADConfig = {
4
+ createResponseAutomatically?: boolean;
5
+ eagerness?: "auto" | "high" | "low" | "medium";
6
+ silenceDurationMs?: number;
7
+ };
8
+ export type VoiceAssistantModality = "audio" | "text";
9
+ export declare const resolveVoiceAssistantMode: <TContext = unknown, TSession extends VoiceSessionRecord = VoiceSessionRecord, TResult = unknown>(options: Pick<CreateVoiceSessionOptions<TContext, TSession, TResult>, "realtime" | "stt" | "tts"> & {
10
+ assistantMode?: VoiceAssistantMode;
11
+ }) => VoiceAssistantMode;
12
+ export type VoiceAssistantModeDescriptor = {
13
+ hasRealtime: boolean;
14
+ hasSTT: boolean;
15
+ hasTTS: boolean;
16
+ mode: VoiceAssistantMode;
17
+ modalities: VoiceAssistantModality[];
18
+ };
19
+ export declare const describeVoiceAssistantMode: <TContext = unknown, TSession extends VoiceSessionRecord = VoiceSessionRecord, TResult = unknown>(options: Pick<CreateVoiceSessionOptions<TContext, TSession, TResult>, "realtime" | "stt" | "tts"> & {
20
+ assistantMode?: VoiceAssistantMode;
21
+ modalities?: ReadonlyArray<VoiceAssistantModality>;
22
+ }) => VoiceAssistantModeDescriptor;
package/dist/index.d.ts CHANGED
@@ -77,6 +77,8 @@ export { DEFAULT_VOICE_REDACTION_PATTERNS, createVoiceTranscriptRedactor, redact
77
77
  export type { CreateVoiceTranscriptRedactorOptions, VoiceRedactionPattern, VoiceTranscriptRedactor, } from "./redaction";
78
78
  export { DEFAULT_VOICE_PRICE_BOOK, createVoiceCostAccountant, } from "./costAccounting";
79
79
  export type { CreateVoiceCostAccountantOptions, VoiceCostAccountant, VoiceCostBreakdown, VoiceCostLLMRecord, VoiceCostSTTRecord, VoiceCostTTSRecord, VoiceCostTelephonyRecord, VoicePriceBook, VoiceProviderRates, } from "./costAccounting";
80
+ export { describeVoiceAssistantMode, resolveVoiceAssistantMode, } from "./assistantMode";
81
+ export type { VoiceAssistantMode, VoiceAssistantModality, VoiceAssistantModeDescriptor, VoiceSemanticVADConfig, } from "./assistantMode";
80
82
  export { createPunctuationSemanticTurnDetector, createRegexSemanticTurnDetector, } from "./semanticTurn";
81
83
  export type { CreatePunctuationSemanticTurnDetectorOptions, CreateRegexSemanticTurnDetectorOptions, VoiceSemanticTurnDetector, VoiceSemanticTurnInput, VoiceSemanticTurnVerdict, } from "./semanticTurn";
82
84
  export { createMonologueAMDDetector } from "./amdDetector";
package/dist/index.js CHANGED
@@ -3440,6 +3440,28 @@ var createVoiceMemoryRecordingStore = () => {
3440
3440
  };
3441
3441
  };
3442
3442
 
3443
+ // src/assistantMode.ts
3444
+ var resolveVoiceAssistantMode = (options) => {
3445
+ if (options.assistantMode) {
3446
+ return options.assistantMode;
3447
+ }
3448
+ if (options.realtime) {
3449
+ return "s2s";
3450
+ }
3451
+ return "cascade";
3452
+ };
3453
+ var describeVoiceAssistantMode = (options) => {
3454
+ const mode = resolveVoiceAssistantMode(options);
3455
+ const modalities = options.modalities ? Array.from(new Set(options.modalities)) : ["audio"];
3456
+ return {
3457
+ hasRealtime: Boolean(options.realtime),
3458
+ hasSTT: Boolean(options.stt),
3459
+ hasTTS: Boolean(options.tts),
3460
+ modalities,
3461
+ mode
3462
+ };
3463
+ };
3464
+
3443
3465
  // src/session.ts
3444
3466
  var DEFAULT_RECONNECT_TIMEOUT = 30000;
3445
3467
  var DEFAULT_MAX_RECONNECT_ATTEMPTS = 10;
@@ -4822,13 +4844,20 @@ var createVoiceSession = (options) => {
4822
4844
  if (!inputAdapter) {
4823
4845
  throw new Error("Voice session requires either an stt or realtime adapter.");
4824
4846
  }
4825
- const openedSession = await inputAdapter.open({
4826
- format: options.realtime ? options.realtimeInputFormat ?? DEFAULT_REALTIME_FORMAT : DEFAULT_FORMAT,
4847
+ const openedSession = await (options.realtime ? options.realtime.open({
4848
+ format: options.realtimeInputFormat ?? DEFAULT_REALTIME_FORMAT,
4827
4849
  languageStrategy: options.languageStrategy,
4828
4850
  lexicon,
4851
+ modalities: options.modalities,
4829
4852
  phraseHints,
4830
4853
  sessionId: options.id
4831
- });
4854
+ }) : inputAdapter.open({
4855
+ format: DEFAULT_FORMAT,
4856
+ languageStrategy: options.languageStrategy,
4857
+ lexicon,
4858
+ phraseHints,
4859
+ sessionId: options.id
4860
+ }));
4832
4861
  const generation = ++adapterGenerationCounter;
4833
4862
  sttSession = openedSession;
4834
4863
  activeAdapterGeneration = generation;
@@ -4998,9 +5027,10 @@ var createVoiceSession = (options) => {
4998
5027
  });
4999
5028
  await appendTrace({
5000
5029
  payload: {
5030
+ assistantMode: resolveVoiceAssistantMode(options),
5031
+ realtimeConfigured: Boolean(options.realtime),
5001
5032
  text: output.assistantText,
5002
- ttsConfigured: Boolean(options.tts),
5003
- realtimeConfigured: Boolean(options.realtime)
5033
+ ttsConfigured: Boolean(options.tts)
5004
5034
  },
5005
5035
  session,
5006
5036
  turnId: turn.id,
@@ -46034,6 +46064,7 @@ export {
46034
46064
  resolveVoiceDiagnosticsTraceFilter,
46035
46065
  resolveVoiceAuditTrailFilter,
46036
46066
  resolveVoiceAuditDeliveryFilter,
46067
+ resolveVoiceAssistantMode,
46037
46068
  resolveVoiceAssistantMemoryNamespace,
46038
46069
  resolveTurnDetectionConfig,
46039
46070
  resolveLatestVoiceCallDebuggerSessionId,
@@ -46228,6 +46259,7 @@ export {
46228
46259
  evaluateVoiceAgentSquadContractEvidence,
46229
46260
  encodeTwilioMulawBase64,
46230
46261
  encodePcmAsWav,
46262
+ describeVoiceAssistantMode,
46231
46263
  deliverVoiceTraceEventsToSinks,
46232
46264
  deliverVoiceObservabilityExport,
46233
46265
  deliverVoiceMonitorIssueNotifications,
@@ -5408,6 +5408,28 @@ var createVoiceMemoryRecordingStore = () => {
5408
5408
  };
5409
5409
  };
5410
5410
 
5411
+ // src/assistantMode.ts
5412
+ var resolveVoiceAssistantMode = (options) => {
5413
+ if (options.assistantMode) {
5414
+ return options.assistantMode;
5415
+ }
5416
+ if (options.realtime) {
5417
+ return "s2s";
5418
+ }
5419
+ return "cascade";
5420
+ };
5421
+ var describeVoiceAssistantMode = (options) => {
5422
+ const mode = resolveVoiceAssistantMode(options);
5423
+ const modalities = options.modalities ? Array.from(new Set(options.modalities)) : ["audio"];
5424
+ return {
5425
+ hasRealtime: Boolean(options.realtime),
5426
+ hasSTT: Boolean(options.stt),
5427
+ hasTTS: Boolean(options.tts),
5428
+ modalities,
5429
+ mode
5430
+ };
5431
+ };
5432
+
5411
5433
  // src/session.ts
5412
5434
  var DEFAULT_RECONNECT_TIMEOUT = 30000;
5413
5435
  var DEFAULT_MAX_RECONNECT_ATTEMPTS2 = 10;
@@ -6790,13 +6812,20 @@ var createVoiceSession = (options) => {
6790
6812
  if (!inputAdapter) {
6791
6813
  throw new Error("Voice session requires either an stt or realtime adapter.");
6792
6814
  }
6793
- const openedSession = await inputAdapter.open({
6794
- format: options.realtime ? options.realtimeInputFormat ?? DEFAULT_REALTIME_FORMAT : DEFAULT_FORMAT,
6815
+ const openedSession = await (options.realtime ? options.realtime.open({
6816
+ format: options.realtimeInputFormat ?? DEFAULT_REALTIME_FORMAT,
6795
6817
  languageStrategy: options.languageStrategy,
6796
6818
  lexicon,
6819
+ modalities: options.modalities,
6797
6820
  phraseHints,
6798
6821
  sessionId: options.id
6799
- });
6822
+ }) : inputAdapter.open({
6823
+ format: DEFAULT_FORMAT,
6824
+ languageStrategy: options.languageStrategy,
6825
+ lexicon,
6826
+ phraseHints,
6827
+ sessionId: options.id
6828
+ }));
6800
6829
  const generation = ++adapterGenerationCounter;
6801
6830
  sttSession = openedSession;
6802
6831
  activeAdapterGeneration = generation;
@@ -6966,9 +6995,10 @@ var createVoiceSession = (options) => {
6966
6995
  });
6967
6996
  await appendTrace({
6968
6997
  payload: {
6998
+ assistantMode: resolveVoiceAssistantMode(options),
6999
+ realtimeConfigured: Boolean(options.realtime),
6969
7000
  text: output.assistantText,
6970
- ttsConfigured: Boolean(options.tts),
6971
- realtimeConfigured: Boolean(options.realtime)
7001
+ ttsConfigured: Boolean(options.tts)
6972
7002
  },
6973
7003
  session,
6974
7004
  turnId: turn.id,
package/dist/types.d.ts CHANGED
@@ -194,7 +194,10 @@ export type RealtimeAdapterOpenOptions = {
194
194
  format: AudioFormat;
195
195
  languageStrategy?: VoiceLanguageStrategy;
196
196
  lexicon?: VoiceLexiconEntry[];
197
+ modalities?: ReadonlyArray<"audio" | "text">;
197
198
  phraseHints?: VoicePhraseHint[];
199
+ promptCacheKey?: string;
200
+ semanticVAD?: import("./assistantMode").VoiceSemanticVADConfig;
198
201
  signal?: AbortSignal;
199
202
  };
200
203
  export type RealtimeAdapter<TOptions extends RealtimeAdapterOpenOptions = RealtimeAdapterOpenOptions> = {
@@ -732,6 +735,8 @@ export type CreateVoiceSessionOptions<TContext = unknown, TSession extends Voice
732
735
  };
733
736
  redact?: import("./redaction").VoiceTranscriptRedactor;
734
737
  semanticTurnDetector?: import("./semanticTurn").VoiceSemanticTurnDetector;
738
+ assistantMode?: import("./assistantMode").VoiceAssistantMode;
739
+ modalities?: ReadonlyArray<"audio" | "text">;
735
740
  reconnect: Required<VoiceReconnectConfig>;
736
741
  phraseHints?: VoicePhraseHint[];
737
742
  sessionMetadata?: Record<string, unknown>;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@absolutejs/voice",
3
- "version": "0.0.22-beta.483",
3
+ "version": "0.0.22-beta.484",
4
4
  "description": "Voice primitives and Elysia plugin for AbsoluteJS",
5
5
  "repository": {
6
6
  "type": "git",