@absolutejs/voice 0.0.22-beta.617 → 0.0.22-beta.619

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -531,10 +531,18 @@ export type VoiceSessionHandle<TContext = unknown, TSession extends VoiceSession
531
531
  transcriptStabilityMs: number;
532
532
  }>;
533
533
  };
534
+ export type VoiceLLMUsage = {
535
+ provider?: string;
536
+ model?: string;
537
+ inputTokens?: number;
538
+ outputTokens?: number;
539
+ cachedInputTokens?: number;
540
+ };
534
541
  export type VoiceRouteResult<TResult = unknown> = {
535
542
  complete?: boolean;
536
543
  result?: TResult;
537
544
  assistantText?: string;
545
+ usage?: VoiceLLMUsage;
538
546
  citations?: ReadonlyArray<VoiceTurnCitation>;
539
547
  transfer?: {
540
548
  metadata?: Record<string, unknown>;
package/dist/index.js CHANGED
@@ -5898,6 +5898,9 @@ var createVoiceSession = (options) => {
5898
5898
  });
5899
5899
  committedOutput = undefined;
5900
5900
  }
5901
+ if (options.costAccountant && committedOutput?.usage) {
5902
+ options.costAccountant.recordLLM(committedOutput.usage);
5903
+ }
5901
5904
  const output = {
5902
5905
  assistantText: committedOutput?.assistantText,
5903
5906
  citations: committedOutput?.citations,
@@ -7612,6 +7615,20 @@ var createVoiceRAGTool = (collection, options = {}) => {
7612
7615
  };
7613
7616
 
7614
7617
  // src/core/agent.ts
7618
+ var addVoiceUsage = (acc, next) => {
7619
+ if (!next)
7620
+ return acc;
7621
+ if (!acc)
7622
+ return next;
7623
+ const sum = (left, right) => left === undefined && right === undefined ? undefined : (left ?? 0) + (right ?? 0);
7624
+ return {
7625
+ cachedInputTokens: sum(acc.cachedInputTokens, next.cachedInputTokens),
7626
+ inputTokens: sum(acc.inputTokens, next.inputTokens),
7627
+ model: next.model ?? acc.model,
7628
+ outputTokens: sum(acc.outputTokens, next.outputTokens),
7629
+ provider: next.provider ?? acc.provider
7630
+ };
7631
+ };
7615
7632
  var normalizeText3 = (value) => typeof value === "string" ? value.trim() : "";
7616
7633
  var toErrorMessage3 = (error) => error instanceof Error ? error.message : String(error);
7617
7634
  var createHistoryMessages = (session, turn) => {
@@ -7830,6 +7847,7 @@ var createVoiceAgent = (options) => {
7830
7847
  `) || undefined;
7831
7848
  stamp("agent.system-resolved", { systemChars: system?.length ?? 0 });
7832
7849
  let output = {};
7850
+ let turnUsage;
7833
7851
  for (let round = 0;round <= maxToolRounds; round += 1) {
7834
7852
  const modelStartedAt = Date.now();
7835
7853
  stamp(`agent.round${round}.generate-start`);
@@ -7848,6 +7866,7 @@ var createVoiceAgent = (options) => {
7848
7866
  })),
7849
7867
  turn: input.turn
7850
7868
  });
7869
+ turnUsage = addVoiceUsage(turnUsage, output.usage);
7851
7870
  stamp(`agent.round${round}.generate-done`, {
7852
7871
  ms: Date.now() - modelStartedAt,
7853
7872
  textChars: output.assistantText?.length ?? 0,
@@ -8111,6 +8130,7 @@ var createVoiceAgent = (options) => {
8111
8130
  result: output.result,
8112
8131
  toolResults,
8113
8132
  transfer: output.transfer,
8133
+ ...turnUsage ? { usage: turnUsage } : {},
8114
8134
  voicemail: output.voicemail
8115
8135
  };
8116
8136
  };
@@ -39579,6 +39599,7 @@ var voice = (config) => {
39579
39599
  const monitorBindings = new Map;
39580
39600
  const runtime = {
39581
39601
  activeSessions: new Map,
39602
+ pendingSessions: new Map,
39582
39603
  logger: resolveLogger(config.logger),
39583
39604
  profileSwitchGuardAutoSwitchCounts: new Map,
39584
39605
  profileSwitchGuardedSessions: new Set,
@@ -39738,6 +39759,31 @@ var voice = (config) => {
39738
39759
  turnDetection: sessionOptions.turnDetection
39739
39760
  });
39740
39761
  };
39762
+ const createAndConnectSession = async (ws, sessionId, scenarioId) => {
39763
+ const session = await createManagedSession(ws, sessionId, scenarioId);
39764
+ const typedSession = session;
39765
+ runtime.activeSessions.set(sessionId, typedSession);
39766
+ registerMonitorSession(sessionId, typedSession);
39767
+ await session.connect(buildSocketAdapter(ws, sessionId));
39768
+ return typedSession;
39769
+ };
39770
+ const ensureManagedSession = async (ws, sessionId, scenarioId) => {
39771
+ const active = runtime.activeSessions.get(sessionId);
39772
+ if (active) {
39773
+ return active;
39774
+ }
39775
+ const inFlight = runtime.pendingSessions.get(sessionId);
39776
+ if (inFlight) {
39777
+ return inFlight;
39778
+ }
39779
+ const creation = createAndConnectSession(ws, sessionId, scenarioId);
39780
+ runtime.pendingSessions.set(sessionId, creation);
39781
+ try {
39782
+ return await creation;
39783
+ } finally {
39784
+ runtime.pendingSessions.delete(sessionId);
39785
+ }
39786
+ };
39741
39787
  const mountSurface = (app, value, factory) => {
39742
39788
  if (value === undefined || value === false) {
39743
39789
  return app;
@@ -39950,13 +39996,7 @@ var voice = (config) => {
39950
39996
  if (!audio) {
39951
39997
  return;
39952
39998
  }
39953
- const session = current ?? await createManagedSession(ws, sessionState.sessionId, sessionState.scenarioId ?? undefined);
39954
- if (!current) {
39955
- const typedSession = session;
39956
- runtime.activeSessions.set(sessionState.sessionId, typedSession);
39957
- registerMonitorSession(sessionState.sessionId, typedSession);
39958
- await session.connect(buildSocketAdapter(ws, sessionState.sessionId));
39959
- }
39999
+ const session = current ?? await ensureManagedSession(ws, sessionState.sessionId, sessionState.scenarioId ?? undefined);
39960
40000
  await session.receiveAudio(audio);
39961
40001
  },
39962
40002
  open: async (ws) => {
@@ -39967,11 +40007,7 @@ var voice = (config) => {
39967
40007
  runtime.activeSessions.delete(sessionState.sessionId);
39968
40008
  deregisterMonitorSession(sessionState.sessionId, "superseded");
39969
40009
  }
39970
- const session = await createManagedSession(ws, sessionState.sessionId, sessionState.scenarioId ?? undefined);
39971
- const typedSession = session;
39972
- runtime.activeSessions.set(sessionState.sessionId, typedSession);
39973
- registerMonitorSession(sessionState.sessionId, typedSession);
39974
- await session.connect(buildSocketAdapter(ws, sessionState.sessionId));
40010
+ await ensureManagedSession(ws, sessionState.sessionId, sessionState.scenarioId ?? undefined);
39975
40011
  }
39976
40012
  }).use(htmxRoutes()).use(surfaceRoutes());
39977
40013
  };
@@ -41796,6 +41832,13 @@ var shouldRetainVoiceRecording = (policy) => policy.retain.recordings;
41796
41832
  var shouldRetainVoiceTranscript = (policy) => policy.retain.transcriptText;
41797
41833
  // src/core/costAccounting.ts
41798
41834
  var DEFAULT_VOICE_PRICE_BOOK = {
41835
+ "anthropic:claude-haiku-4-5-20251001": {
41836
+ llm: {
41837
+ cachedInputPerMillionTokensUsd: 0.1,
41838
+ inputPerMillionTokensUsd: 1,
41839
+ outputPerMillionTokensUsd: 5
41840
+ }
41841
+ },
41799
41842
  "anthropic:claude-opus-4-5": {
41800
41843
  llm: {
41801
41844
  cachedInputPerMillionTokensUsd: 1.5,
@@ -41803,6 +41846,13 @@ var DEFAULT_VOICE_PRICE_BOOK = {
41803
41846
  outputPerMillionTokensUsd: 75
41804
41847
  }
41805
41848
  },
41849
+ "anthropic:claude-opus-4-6": {
41850
+ llm: {
41851
+ cachedInputPerMillionTokensUsd: 0.5,
41852
+ inputPerMillionTokensUsd: 5,
41853
+ outputPerMillionTokensUsd: 25
41854
+ }
41855
+ },
41806
41856
  "anthropic:claude-sonnet-4-5": {
41807
41857
  llm: {
41808
41858
  cachedInputPerMillionTokensUsd: 0.3,
@@ -41810,11 +41860,25 @@ var DEFAULT_VOICE_PRICE_BOOK = {
41810
41860
  outputPerMillionTokensUsd: 15
41811
41861
  }
41812
41862
  },
41863
+ "anthropic:claude-sonnet-4-6": {
41864
+ llm: {
41865
+ cachedInputPerMillionTokensUsd: 0.3,
41866
+ inputPerMillionTokensUsd: 3,
41867
+ outputPerMillionTokensUsd: 15
41868
+ }
41869
+ },
41813
41870
  "assemblyai:streaming": { stt: { perSecondUsd: 0.00018 } },
41814
41871
  "azure:tts-neural": { tts: { perMillionCharactersUsd: 16 } },
41815
41872
  "cartesia:sonic-2": { tts: { perMillionCharactersUsd: 65 } },
41816
41873
  "deepgram:nova-3": { stt: { perSecondUsd: 0.000077 } },
41817
41874
  "elevenlabs:flash-v2-5": { tts: { perMillionCharactersUsd: 50 } },
41875
+ "openai:gpt-4.1": {
41876
+ llm: {
41877
+ cachedInputPerMillionTokensUsd: 0.5,
41878
+ inputPerMillionTokensUsd: 2,
41879
+ outputPerMillionTokensUsd: 8
41880
+ }
41881
+ },
41818
41882
  "openai:gpt-4o-mini": {
41819
41883
  llm: {
41820
41884
  cachedInputPerMillionTokensUsd: 0.075,
@@ -45262,6 +45326,16 @@ var createVoiceFileRecordingStore = (options) => {
45262
45326
  return { get, list, put };
45263
45327
  };
45264
45328
  // src/core/modelAdapters.ts
45329
+ var normalizeVoiceUsage = (provider, model, raw) => {
45330
+ if (!raw)
45331
+ return;
45332
+ const toNum = (value) => typeof value === "number" && Number.isFinite(value) ? value : undefined;
45333
+ const details = typeof raw.input_tokens_details === "object" && raw.input_tokens_details !== null ? raw.input_tokens_details : undefined;
45334
+ const inputTokens = toNum(raw.input_tokens) ?? toNum(raw.prompt_tokens) ?? toNum(raw.promptTokenCount);
45335
+ const outputTokens = toNum(raw.output_tokens) ?? toNum(raw.completion_tokens) ?? toNum(raw.candidatesTokenCount);
45336
+ const cachedInputTokens = toNum(raw.cache_read_input_tokens) ?? toNum(details?.cached_tokens) ?? toNum(raw.cachedContentTokenCount);
45337
+ return { cachedInputTokens, inputTokens, model, outputTokens, provider };
45338
+ };
45265
45339
  var isVoiceProviderRoutingPolicyPreset = (value) => value === "balanced" || value === "cost-cap" || value === "cost-first" || value === "latency-first" || value === "quality-first";
45266
45340
  var resolveVoiceProviderRoutingPolicyPreset = (preset, options = {}) => {
45267
45341
  switch (preset) {
@@ -46122,7 +46196,8 @@ var createOpenAIVoiceAssistantModel = (options) => {
46122
46196
  }
46123
46197
  return {
46124
46198
  ...assistantText ? { assistantText } : {},
46125
- ...toolCalls.length ? { toolCalls } : {}
46199
+ ...toolCalls.length ? { toolCalls } : {},
46200
+ ...usage ? { usage: normalizeVoiceUsage("openai", options.model, usage) } : {}
46126
46201
  };
46127
46202
  }
46128
46203
  };
@@ -46205,7 +46280,8 @@ var createAnthropicVoiceAssistantModel = (options) => {
46205
46280
  }
46206
46281
  return {
46207
46282
  ...assistantText ? { assistantText } : {},
46208
- ...toolCalls.length ? { toolCalls } : {}
46283
+ ...toolCalls.length ? { toolCalls } : {},
46284
+ ...usage ? { usage: normalizeVoiceUsage("anthropic", options.model, usage) } : {}
46209
46285
  };
46210
46286
  }
46211
46287
  };
@@ -46311,7 +46387,8 @@ var createGeminiVoiceAssistantModel = (options) => {
46311
46387
  }
46312
46388
  return {
46313
46389
  ...assistantText ? { assistantText } : {},
46314
- ...toolCalls.length ? { toolCalls } : {}
46390
+ ...toolCalls.length ? { toolCalls } : {},
46391
+ ...usage ? { usage: normalizeVoiceUsage("google", options.model, usage) } : {}
46315
46392
  };
46316
46393
  }
46317
46394
  };
@@ -4285,6 +4285,16 @@ var hardenFetch = (baseFetch = globalThis.fetch) => Object.assign(async (input,
4285
4285
  }, { preconnect: baseFetch.preconnect.bind(baseFetch) });
4286
4286
 
4287
4287
  // src/core/modelAdapters.ts
4288
+ var normalizeVoiceUsage = (provider, model, raw) => {
4289
+ if (!raw)
4290
+ return;
4291
+ const toNum = (value) => typeof value === "number" && Number.isFinite(value) ? value : undefined;
4292
+ const details = typeof raw.input_tokens_details === "object" && raw.input_tokens_details !== null ? raw.input_tokens_details : undefined;
4293
+ const inputTokens = toNum(raw.input_tokens) ?? toNum(raw.prompt_tokens) ?? toNum(raw.promptTokenCount);
4294
+ const outputTokens = toNum(raw.output_tokens) ?? toNum(raw.completion_tokens) ?? toNum(raw.candidatesTokenCount);
4295
+ const cachedInputTokens = toNum(raw.cache_read_input_tokens) ?? toNum(details?.cached_tokens) ?? toNum(raw.cachedContentTokenCount);
4296
+ return { cachedInputTokens, inputTokens, model, outputTokens, provider };
4297
+ };
4288
4298
  var isVoiceProviderRoutingPolicyPreset = (value) => value === "balanced" || value === "cost-cap" || value === "cost-first" || value === "latency-first" || value === "quality-first";
4289
4299
  var resolveVoiceProviderRoutingPolicyPreset = (preset, options = {}) => {
4290
4300
  switch (preset) {
@@ -5145,7 +5155,8 @@ var createOpenAIVoiceAssistantModel = (options) => {
5145
5155
  }
5146
5156
  return {
5147
5157
  ...assistantText ? { assistantText } : {},
5148
- ...toolCalls.length ? { toolCalls } : {}
5158
+ ...toolCalls.length ? { toolCalls } : {},
5159
+ ...usage ? { usage: normalizeVoiceUsage("openai", options.model, usage) } : {}
5149
5160
  };
5150
5161
  }
5151
5162
  };
@@ -5228,7 +5239,8 @@ var createAnthropicVoiceAssistantModel = (options) => {
5228
5239
  }
5229
5240
  return {
5230
5241
  ...assistantText ? { assistantText } : {},
5231
- ...toolCalls.length ? { toolCalls } : {}
5242
+ ...toolCalls.length ? { toolCalls } : {},
5243
+ ...usage ? { usage: normalizeVoiceUsage("anthropic", options.model, usage) } : {}
5232
5244
  };
5233
5245
  }
5234
5246
  };
@@ -5334,7 +5346,8 @@ var createGeminiVoiceAssistantModel = (options) => {
5334
5346
  }
5335
5347
  return {
5336
5348
  ...assistantText ? { assistantText } : {},
5337
- ...toolCalls.length ? { toolCalls } : {}
5349
+ ...toolCalls.length ? { toolCalls } : {},
5350
+ ...usage ? { usage: normalizeVoiceUsage("google", options.model, usage) } : {}
5338
5351
  };
5339
5352
  }
5340
5353
  };
@@ -8199,6 +8212,9 @@ var createVoiceSession = (options) => {
8199
8212
  });
8200
8213
  committedOutput = undefined;
8201
8214
  }
8215
+ if (options.costAccountant && committedOutput?.usage) {
8216
+ options.costAccountant.recordLLM(committedOutput.usage);
8217
+ }
8202
8218
  const output = {
8203
8219
  assistantText: committedOutput?.assistantText,
8204
8220
  citations: committedOutput?.citations,
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@absolutejs/voice",
3
- "version": "0.0.22-beta.617",
3
+ "version": "0.0.22-beta.619",
4
4
  "description": "Voice primitives and Elysia plugin for AbsoluteJS",
5
5
  "repository": {
6
6
  "type": "git",