@absolutejs/voice 0.0.22-beta.483 → 0.0.22-beta.485

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,22 @@
1
+ import type { CreateVoiceSessionOptions, VoiceSessionRecord } from "./types";
2
+ export type VoiceAssistantMode = "cascade" | "s2s";
3
+ export type VoiceSemanticVADConfig = {
4
+ createResponseAutomatically?: boolean;
5
+ eagerness?: "auto" | "high" | "low" | "medium";
6
+ silenceDurationMs?: number;
7
+ };
8
+ export type VoiceAssistantModality = "audio" | "text";
9
+ export declare const resolveVoiceAssistantMode: <TContext = unknown, TSession extends VoiceSessionRecord = VoiceSessionRecord, TResult = unknown>(options: Pick<CreateVoiceSessionOptions<TContext, TSession, TResult>, "realtime" | "stt" | "tts"> & {
10
+ assistantMode?: VoiceAssistantMode;
11
+ }) => VoiceAssistantMode;
12
+ export type VoiceAssistantModeDescriptor = {
13
+ hasRealtime: boolean;
14
+ hasSTT: boolean;
15
+ hasTTS: boolean;
16
+ mode: VoiceAssistantMode;
17
+ modalities: VoiceAssistantModality[];
18
+ };
19
+ export declare const describeVoiceAssistantMode: <TContext = unknown, TSession extends VoiceSessionRecord = VoiceSessionRecord, TResult = unknown>(options: Pick<CreateVoiceSessionOptions<TContext, TSession, TResult>, "realtime" | "stt" | "tts"> & {
20
+ assistantMode?: VoiceAssistantMode;
21
+ modalities?: ReadonlyArray<VoiceAssistantModality>;
22
+ }) => VoiceAssistantModeDescriptor;
package/dist/index.d.ts CHANGED
@@ -77,7 +77,11 @@ export { DEFAULT_VOICE_REDACTION_PATTERNS, createVoiceTranscriptRedactor, redact
77
77
  export type { CreateVoiceTranscriptRedactorOptions, VoiceRedactionPattern, VoiceTranscriptRedactor, } from "./redaction";
78
78
  export { DEFAULT_VOICE_PRICE_BOOK, createVoiceCostAccountant, } from "./costAccounting";
79
79
  export type { CreateVoiceCostAccountantOptions, VoiceCostAccountant, VoiceCostBreakdown, VoiceCostLLMRecord, VoiceCostSTTRecord, VoiceCostTTSRecord, VoiceCostTelephonyRecord, VoicePriceBook, VoiceProviderRates, } from "./costAccounting";
80
+ export { describeVoiceAssistantMode, resolveVoiceAssistantMode, } from "./assistantMode";
81
+ export type { VoiceAssistantMode, VoiceAssistantModality, VoiceAssistantModeDescriptor, VoiceSemanticVADConfig, } from "./assistantMode";
80
82
  export { createPunctuationSemanticTurnDetector, createRegexSemanticTurnDetector, } from "./semanticTurn";
83
+ export { VOICE_WEBHOOK_SIGNATURE_HEADER, VOICE_WEBHOOK_TIMESTAMP_HEADER, extractVoiceWebhookSignatureFromHeaders, signVoiceWebhookBody, verifyVoiceWebhookSignature, } from "./webhookVerification";
84
+ export type { VoiceWebhookVerificationInput, VoiceWebhookVerificationReason, VoiceWebhookVerificationResult, } from "./webhookVerification";
81
85
  export type { CreatePunctuationSemanticTurnDetectorOptions, CreateRegexSemanticTurnDetectorOptions, VoiceSemanticTurnDetector, VoiceSemanticTurnInput, VoiceSemanticTurnVerdict, } from "./semanticTurn";
82
86
  export { createMonologueAMDDetector } from "./amdDetector";
83
87
  export type { MonologueAMDDetectorOptions, VoiceAMDDetector, VoiceAMDDetectorInput, VoiceAMDVerdict, } from "./amdDetector";
package/dist/index.js CHANGED
@@ -3440,6 +3440,28 @@ var createVoiceMemoryRecordingStore = () => {
3440
3440
  };
3441
3441
  };
3442
3442
 
3443
+ // src/assistantMode.ts
3444
+ var resolveVoiceAssistantMode = (options) => {
3445
+ if (options.assistantMode) {
3446
+ return options.assistantMode;
3447
+ }
3448
+ if (options.realtime) {
3449
+ return "s2s";
3450
+ }
3451
+ return "cascade";
3452
+ };
3453
+ var describeVoiceAssistantMode = (options) => {
3454
+ const mode = resolveVoiceAssistantMode(options);
3455
+ const modalities = options.modalities ? Array.from(new Set(options.modalities)) : ["audio"];
3456
+ return {
3457
+ hasRealtime: Boolean(options.realtime),
3458
+ hasSTT: Boolean(options.stt),
3459
+ hasTTS: Boolean(options.tts),
3460
+ modalities,
3461
+ mode
3462
+ };
3463
+ };
3464
+
3443
3465
  // src/session.ts
3444
3466
  var DEFAULT_RECONNECT_TIMEOUT = 30000;
3445
3467
  var DEFAULT_MAX_RECONNECT_ATTEMPTS = 10;
@@ -4822,13 +4844,20 @@ var createVoiceSession = (options) => {
4822
4844
  if (!inputAdapter) {
4823
4845
  throw new Error("Voice session requires either an stt or realtime adapter.");
4824
4846
  }
4825
- const openedSession = await inputAdapter.open({
4826
- format: options.realtime ? options.realtimeInputFormat ?? DEFAULT_REALTIME_FORMAT : DEFAULT_FORMAT,
4847
+ const openedSession = await (options.realtime ? options.realtime.open({
4848
+ format: options.realtimeInputFormat ?? DEFAULT_REALTIME_FORMAT,
4827
4849
  languageStrategy: options.languageStrategy,
4828
4850
  lexicon,
4851
+ modalities: options.modalities,
4829
4852
  phraseHints,
4830
4853
  sessionId: options.id
4831
- });
4854
+ }) : inputAdapter.open({
4855
+ format: DEFAULT_FORMAT,
4856
+ languageStrategy: options.languageStrategy,
4857
+ lexicon,
4858
+ phraseHints,
4859
+ sessionId: options.id
4860
+ }));
4832
4861
  const generation = ++adapterGenerationCounter;
4833
4862
  sttSession = openedSession;
4834
4863
  activeAdapterGeneration = generation;
@@ -4998,9 +5027,10 @@ var createVoiceSession = (options) => {
4998
5027
  });
4999
5028
  await appendTrace({
5000
5029
  payload: {
5030
+ assistantMode: resolveVoiceAssistantMode(options),
5031
+ realtimeConfigured: Boolean(options.realtime),
5001
5032
  text: output.assistantText,
5002
- ttsConfigured: Boolean(options.tts),
5003
- realtimeConfigured: Boolean(options.realtime)
5033
+ ttsConfigured: Boolean(options.tts)
5004
5034
  },
5005
5035
  session,
5006
5036
  turnId: turn.id,
@@ -35374,6 +35404,77 @@ var createRegexSemanticTurnDetector = (options) => {
35374
35404
  }
35375
35405
  };
35376
35406
  };
35407
+ // src/webhookVerification.ts
35408
+ var VOICE_WEBHOOK_SIGNATURE_HEADER = "x-absolutejs-signature";
35409
+ var VOICE_WEBHOOK_TIMESTAMP_HEADER = "x-absolutejs-timestamp";
35410
+ var toHex6 = (bytes) => Array.from(bytes, (byte) => byte.toString(16).padStart(2, "0")).join("");
35411
+ var timingSafeEqual3 = (left, right) => {
35412
+ if (left.length !== right.length) {
35413
+ return false;
35414
+ }
35415
+ let result = 0;
35416
+ for (let index = 0;index < left.length; index += 1) {
35417
+ result |= left.charCodeAt(index) ^ right.charCodeAt(index);
35418
+ }
35419
+ return result === 0;
35420
+ };
35421
+ var computeSignature = async (input) => {
35422
+ const encoder2 = new TextEncoder;
35423
+ const key = await crypto.subtle.importKey("raw", encoder2.encode(input.secret), { hash: "SHA-256", name: "HMAC" }, false, ["sign"]);
35424
+ const payload = encoder2.encode(`${input.timestamp}.${input.body}`);
35425
+ const signature = await crypto.subtle.sign("HMAC", key, payload);
35426
+ return `sha256=${toHex6(new Uint8Array(signature))}`;
35427
+ };
35428
+ var signVoiceWebhookBody = async (input) => computeSignature(input);
35429
+ var verifyVoiceWebhookSignature = async (input) => {
35430
+ if (!input.secret) {
35431
+ return { ok: false, reason: "missing-secret" };
35432
+ }
35433
+ if (!input.signature) {
35434
+ return { ok: false, reason: "missing-signature" };
35435
+ }
35436
+ if (!input.signature.startsWith("sha256=")) {
35437
+ return { ok: false, reason: "unsupported-algorithm" };
35438
+ }
35439
+ if (!input.timestamp) {
35440
+ return { ok: false, reason: "missing-timestamp" };
35441
+ }
35442
+ const timestampMs = Number(input.timestamp);
35443
+ const toleranceMs = Math.max(0, input.toleranceMs ?? 5 * 60 * 1000);
35444
+ if (!Number.isFinite(timestampMs) || toleranceMs > 0 && Math.abs((input.now ?? Date.now()) - timestampMs) > toleranceMs) {
35445
+ return { ok: false, reason: "stale-timestamp" };
35446
+ }
35447
+ const expected = await computeSignature({
35448
+ body: input.body,
35449
+ secret: input.secret,
35450
+ timestamp: input.timestamp
35451
+ });
35452
+ if (!timingSafeEqual3(expected, input.signature)) {
35453
+ return { ok: false, reason: "signature-mismatch" };
35454
+ }
35455
+ return { ok: true };
35456
+ };
35457
+ var extractVoiceWebhookSignatureFromHeaders = (headers) => {
35458
+ const get = (name) => {
35459
+ if (headers instanceof Headers) {
35460
+ return headers.get(name);
35461
+ }
35462
+ const lowerTarget = name.toLowerCase();
35463
+ for (const [key, value] of Object.entries(headers)) {
35464
+ if (key.toLowerCase() === lowerTarget) {
35465
+ if (Array.isArray(value)) {
35466
+ return value[0] ?? null;
35467
+ }
35468
+ return value ?? null;
35469
+ }
35470
+ }
35471
+ return null;
35472
+ };
35473
+ return {
35474
+ signature: get(VOICE_WEBHOOK_SIGNATURE_HEADER),
35475
+ timestamp: get(VOICE_WEBHOOK_TIMESTAMP_HEADER)
35476
+ };
35477
+ };
35377
35478
  // src/amdDetector.ts
35378
35479
  var createMonologueAMDDetector = (options = {}) => {
35379
35480
  const minMonologueMs = options.minMonologueMs ?? 8000;
@@ -41921,7 +42022,7 @@ var createVoiceMemoryStore = () => {
41921
42022
  };
41922
42023
  // src/opsWebhook.ts
41923
42024
  import { Elysia as Elysia66 } from "elysia";
41924
- var toHex6 = (bytes) => Array.from(bytes, (byte) => byte.toString(16).padStart(2, "0")).join("");
42025
+ var toHex7 = (bytes) => Array.from(bytes, (byte) => byte.toString(16).padStart(2, "0")).join("");
41925
42026
  var signVoiceOpsWebhookBody = async (input) => {
41926
42027
  const encoder2 = new TextEncoder;
41927
42028
  const key = await crypto.subtle.importKey("raw", encoder2.encode(input.secret), {
@@ -41929,9 +42030,9 @@ var signVoiceOpsWebhookBody = async (input) => {
41929
42030
  name: "HMAC"
41930
42031
  }, false, ["sign"]);
41931
42032
  const signature = await crypto.subtle.sign("HMAC", key, encoder2.encode(`${input.timestamp}.${input.body}`));
41932
- return `sha256=${toHex6(new Uint8Array(signature))}`;
42033
+ return `sha256=${toHex7(new Uint8Array(signature))}`;
41933
42034
  };
41934
- var timingSafeEqual3 = (left, right) => {
42035
+ var timingSafeEqual4 = (left, right) => {
41935
42036
  const encoder2 = new TextEncoder;
41936
42037
  const leftBytes = encoder2.encode(left);
41937
42038
  const rightBytes = encoder2.encode(right);
@@ -42038,7 +42139,7 @@ var verifyVoiceOpsWebhookSignature = async (input) => {
42038
42139
  secret: input.secret,
42039
42140
  timestamp: input.timestamp
42040
42141
  });
42041
- if (!timingSafeEqual3(expected, input.signature)) {
42142
+ if (!timingSafeEqual4(expected, input.signature)) {
42042
42143
  return {
42043
42144
  ok: false,
42044
42145
  reason: "invalid-signature"
@@ -45947,6 +46048,7 @@ export {
45947
46048
  voiceGuardrailPolicyPresets,
45948
46049
  voiceComplianceRedactionDefaults,
45949
46050
  voice,
46051
+ verifyVoiceWebhookSignature,
45950
46052
  verifyVoiceTwilioWebhookSignature,
45951
46053
  verifyVoiceTelnyxWebhookSignature,
45952
46054
  verifyVoicePlivoWebhookSignature,
@@ -45990,6 +46092,7 @@ export {
45990
46092
  summarizeVoiceAssistantRuns,
45991
46093
  summarizeVoiceAssistantHealth,
45992
46094
  startVoiceOpsTask,
46095
+ signVoiceWebhookBody,
45993
46096
  signVoiceTwilioWebhook,
45994
46097
  signVoicePlivoWebhook,
45995
46098
  shapeTelephonyAssistantText,
@@ -46034,6 +46137,7 @@ export {
46034
46137
  resolveVoiceDiagnosticsTraceFilter,
46035
46138
  resolveVoiceAuditTrailFilter,
46036
46139
  resolveVoiceAuditDeliveryFilter,
46140
+ resolveVoiceAssistantMode,
46037
46141
  resolveVoiceAssistantMemoryNamespace,
46038
46142
  resolveTurnDetectionConfig,
46039
46143
  resolveLatestVoiceCallDebuggerSessionId,
@@ -46188,6 +46292,7 @@ export {
46188
46292
  filterVoiceAuditEvents,
46189
46293
  fetchVoiceProofTarget,
46190
46294
  failVoiceOpsTask,
46295
+ extractVoiceWebhookSignatureFromHeaders,
46191
46296
  extractVoiceMediaPipelineIssueEntries,
46192
46297
  exportVoiceTrace,
46193
46298
  exportVoiceAuditTrail,
@@ -46228,6 +46333,7 @@ export {
46228
46333
  evaluateVoiceAgentSquadContractEvidence,
46229
46334
  encodeTwilioMulawBase64,
46230
46335
  encodePcmAsWav,
46336
+ describeVoiceAssistantMode,
46231
46337
  deliverVoiceTraceEventsToSinks,
46232
46338
  deliverVoiceObservabilityExport,
46233
46339
  deliverVoiceMonitorIssueNotifications,
@@ -46733,6 +46839,8 @@ export {
46733
46839
  appendVoiceProviderRouterTraceEvent,
46734
46840
  appendVoiceIOProviderRouterTraceEvent,
46735
46841
  acknowledgeVoiceMonitorIssue,
46842
+ VOICE_WEBHOOK_TIMESTAMP_HEADER,
46843
+ VOICE_WEBHOOK_SIGNATURE_HEADER,
46736
46844
  VOICE_LIVE_OPS_ACTIONS,
46737
46845
  TURN_PROFILE_DEFAULTS,
46738
46846
  DEFAULT_VOICE_REDACTION_PATTERNS,
@@ -5408,6 +5408,28 @@ var createVoiceMemoryRecordingStore = () => {
5408
5408
  };
5409
5409
  };
5410
5410
 
5411
+ // src/assistantMode.ts
5412
+ var resolveVoiceAssistantMode = (options) => {
5413
+ if (options.assistantMode) {
5414
+ return options.assistantMode;
5415
+ }
5416
+ if (options.realtime) {
5417
+ return "s2s";
5418
+ }
5419
+ return "cascade";
5420
+ };
5421
+ var describeVoiceAssistantMode = (options) => {
5422
+ const mode = resolveVoiceAssistantMode(options);
5423
+ const modalities = options.modalities ? Array.from(new Set(options.modalities)) : ["audio"];
5424
+ return {
5425
+ hasRealtime: Boolean(options.realtime),
5426
+ hasSTT: Boolean(options.stt),
5427
+ hasTTS: Boolean(options.tts),
5428
+ modalities,
5429
+ mode
5430
+ };
5431
+ };
5432
+
5411
5433
  // src/session.ts
5412
5434
  var DEFAULT_RECONNECT_TIMEOUT = 30000;
5413
5435
  var DEFAULT_MAX_RECONNECT_ATTEMPTS2 = 10;
@@ -6790,13 +6812,20 @@ var createVoiceSession = (options) => {
6790
6812
  if (!inputAdapter) {
6791
6813
  throw new Error("Voice session requires either an stt or realtime adapter.");
6792
6814
  }
6793
- const openedSession = await inputAdapter.open({
6794
- format: options.realtime ? options.realtimeInputFormat ?? DEFAULT_REALTIME_FORMAT : DEFAULT_FORMAT,
6815
+ const openedSession = await (options.realtime ? options.realtime.open({
6816
+ format: options.realtimeInputFormat ?? DEFAULT_REALTIME_FORMAT,
6795
6817
  languageStrategy: options.languageStrategy,
6796
6818
  lexicon,
6819
+ modalities: options.modalities,
6797
6820
  phraseHints,
6798
6821
  sessionId: options.id
6799
- });
6822
+ }) : inputAdapter.open({
6823
+ format: DEFAULT_FORMAT,
6824
+ languageStrategy: options.languageStrategy,
6825
+ lexicon,
6826
+ phraseHints,
6827
+ sessionId: options.id
6828
+ }));
6800
6829
  const generation = ++adapterGenerationCounter;
6801
6830
  sttSession = openedSession;
6802
6831
  activeAdapterGeneration = generation;
@@ -6966,9 +6995,10 @@ var createVoiceSession = (options) => {
6966
6995
  });
6967
6996
  await appendTrace({
6968
6997
  payload: {
6998
+ assistantMode: resolveVoiceAssistantMode(options),
6999
+ realtimeConfigured: Boolean(options.realtime),
6969
7000
  text: output.assistantText,
6970
- ttsConfigured: Boolean(options.tts),
6971
- realtimeConfigured: Boolean(options.realtime)
7001
+ ttsConfigured: Boolean(options.tts)
6972
7002
  },
6973
7003
  session,
6974
7004
  turnId: turn.id,
package/dist/types.d.ts CHANGED
@@ -194,7 +194,10 @@ export type RealtimeAdapterOpenOptions = {
194
194
  format: AudioFormat;
195
195
  languageStrategy?: VoiceLanguageStrategy;
196
196
  lexicon?: VoiceLexiconEntry[];
197
+ modalities?: ReadonlyArray<"audio" | "text">;
197
198
  phraseHints?: VoicePhraseHint[];
199
+ promptCacheKey?: string;
200
+ semanticVAD?: import("./assistantMode").VoiceSemanticVADConfig;
198
201
  signal?: AbortSignal;
199
202
  };
200
203
  export type RealtimeAdapter<TOptions extends RealtimeAdapterOpenOptions = RealtimeAdapterOpenOptions> = {
@@ -732,6 +735,8 @@ export type CreateVoiceSessionOptions<TContext = unknown, TSession extends Voice
732
735
  };
733
736
  redact?: import("./redaction").VoiceTranscriptRedactor;
734
737
  semanticTurnDetector?: import("./semanticTurn").VoiceSemanticTurnDetector;
738
+ assistantMode?: import("./assistantMode").VoiceAssistantMode;
739
+ modalities?: ReadonlyArray<"audio" | "text">;
735
740
  reconnect: Required<VoiceReconnectConfig>;
736
741
  phraseHints?: VoicePhraseHint[];
737
742
  sessionMetadata?: Record<string, unknown>;
@@ -0,0 +1,27 @@
1
+ export type VoiceWebhookVerificationReason = "missing-secret" | "missing-signature" | "missing-timestamp" | "signature-mismatch" | "stale-timestamp" | "unsupported-algorithm";
2
+ export type VoiceWebhookVerificationResult = {
3
+ ok: true;
4
+ } | {
5
+ ok: false;
6
+ reason: VoiceWebhookVerificationReason;
7
+ };
8
+ export type VoiceWebhookVerificationInput = {
9
+ body: string;
10
+ now?: number;
11
+ secret?: string;
12
+ signature?: string | null;
13
+ timestamp?: string | null;
14
+ toleranceMs?: number;
15
+ };
16
+ export declare const VOICE_WEBHOOK_SIGNATURE_HEADER = "x-absolutejs-signature";
17
+ export declare const VOICE_WEBHOOK_TIMESTAMP_HEADER = "x-absolutejs-timestamp";
18
+ export declare const signVoiceWebhookBody: (input: {
19
+ body: string;
20
+ secret: string;
21
+ timestamp: string;
22
+ }) => Promise<string>;
23
+ export declare const verifyVoiceWebhookSignature: (input: VoiceWebhookVerificationInput) => Promise<VoiceWebhookVerificationResult>;
24
+ export declare const extractVoiceWebhookSignatureFromHeaders: (headers: Headers | Record<string, string | string[] | undefined>) => {
25
+ signature: string | null;
26
+ timestamp: string | null;
27
+ };
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@absolutejs/voice",
3
- "version": "0.0.22-beta.483",
3
+ "version": "0.0.22-beta.485",
4
4
  "description": "Voice primitives and Elysia plugin for AbsoluteJS",
5
5
  "repository": {
6
6
  "type": "git",