@absolutejs/voice 0.0.22-beta.479 → 0.0.22-beta.480

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,9 +1,14 @@
1
- import type { AIProviderConfig } from "@absolutejs/ai";
1
+ import type { AIProviderConfig, AIUsage } from "@absolutejs/ai";
2
2
  import type { VoiceAgentModel } from "./agent";
3
3
  import type { VoiceSessionRecord } from "./types";
4
4
  export type CreateAIVoiceModelOptions = {
5
5
  model: string;
6
+ onUsage?: (usage: AIUsage & {
7
+ model: string;
8
+ provider?: string;
9
+ }) => void;
6
10
  provider: AIProviderConfig;
11
+ providerName?: string;
7
12
  signal?: AbortSignal;
8
13
  systemPrompt?: string;
9
14
  };
@@ -0,0 +1,76 @@
1
+ export type VoiceProviderRates = {
2
+ llm?: {
3
+ cachedInputPerMillionTokensUsd?: number;
4
+ inputPerMillionTokensUsd: number;
5
+ outputPerMillionTokensUsd: number;
6
+ };
7
+ stt?: {
8
+ perSecondUsd: number;
9
+ };
10
+ telephony?: {
11
+ perMinuteUsd: number;
12
+ };
13
+ tts?: {
14
+ perMillionCharactersUsd?: number;
15
+ perSecondUsd?: number;
16
+ };
17
+ };
18
+ export type VoicePriceBook = Record<string, VoiceProviderRates>;
19
+ export declare const DEFAULT_VOICE_PRICE_BOOK: VoicePriceBook;
20
+ export type VoiceCostLLMRecord = {
21
+ cachedInputTokens?: number;
22
+ inputTokens?: number;
23
+ model?: string;
24
+ outputTokens?: number;
25
+ provider?: string;
26
+ };
27
+ export type VoiceCostTTSRecord = {
28
+ audioMs?: number;
29
+ characters?: number;
30
+ provider?: string;
31
+ voice?: string;
32
+ };
33
+ export type VoiceCostSTTRecord = {
34
+ audioMs: number;
35
+ model?: string;
36
+ provider?: string;
37
+ };
38
+ export type VoiceCostTelephonyRecord = {
39
+ minutes: number;
40
+ provider?: string;
41
+ };
42
+ export type VoiceCostBreakdown = {
43
+ llm: {
44
+ cachedInputTokens: number;
45
+ inputTokens: number;
46
+ outputTokens: number;
47
+ usd: number;
48
+ };
49
+ sessionId?: string;
50
+ stt: {
51
+ audioMs: number;
52
+ usd: number;
53
+ };
54
+ telephony: {
55
+ minutes: number;
56
+ usd: number;
57
+ };
58
+ totalUsd: number;
59
+ tts: {
60
+ audioMs: number;
61
+ characters: number;
62
+ usd: number;
63
+ };
64
+ };
65
+ export type VoiceCostAccountant = {
66
+ recordLLM: (usage: VoiceCostLLMRecord) => void;
67
+ recordSTT: (input: VoiceCostSTTRecord) => void;
68
+ recordTTS: (input: VoiceCostTTSRecord) => void;
69
+ recordTelephony: (input: VoiceCostTelephonyRecord) => void;
70
+ snapshot: () => VoiceCostBreakdown;
71
+ };
72
+ export type CreateVoiceCostAccountantOptions = {
73
+ priceBook?: VoicePriceBook;
74
+ sessionId?: string;
75
+ };
76
+ export declare const createVoiceCostAccountant: (options?: CreateVoiceCostAccountantOptions) => VoiceCostAccountant;
package/dist/index.d.ts CHANGED
@@ -71,6 +71,8 @@ export { createVoiceSessionListRoutes, createVoiceSessionReplayHTMLHandler, crea
71
71
  export { createVoiceAgent, createVoiceAgentSquad, createVoiceAgentTool, } from "./agent";
72
72
  export { createAIVoiceModel } from "./aiVoiceModel";
73
73
  export type { CreateAIVoiceModelOptions } from "./aiVoiceModel";
74
+ export { DEFAULT_VOICE_PRICE_BOOK, createVoiceCostAccountant, } from "./costAccounting";
75
+ export type { CreateVoiceCostAccountantOptions, VoiceCostAccountant, VoiceCostBreakdown, VoiceCostLLMRecord, VoiceCostSTTRecord, VoiceCostTTSRecord, VoiceCostTelephonyRecord, VoicePriceBook, VoiceProviderRates, } from "./costAccounting";
74
76
  export { createMonologueAMDDetector } from "./amdDetector";
75
77
  export type { MonologueAMDDetectorOptions, VoiceAMDDetector, VoiceAMDDetectorInput, VoiceAMDVerdict, } from "./amdDetector";
76
78
  export { createVoiceRAGTool } from "./ragTool";
package/dist/index.js CHANGED
@@ -4030,6 +4030,31 @@ var createVoiceSession = (options) => {
4030
4030
  }
4031
4031
  }
4032
4032
  };
4033
+ const finalizeCostReport = async (session) => {
4034
+ if (!options.costAccountant) {
4035
+ return;
4036
+ }
4037
+ const lifecycle = session.call;
4038
+ if (lifecycle?.startedAt && lifecycle.endedAt) {
4039
+ const durationMs = Math.max(0, lifecycle.endedAt - lifecycle.startedAt);
4040
+ const minutes = durationMs / 60000;
4041
+ if (minutes > 0) {
4042
+ options.costAccountant.recordTelephony({
4043
+ minutes,
4044
+ provider: options.costTelephony?.provider
4045
+ });
4046
+ }
4047
+ }
4048
+ const breakdown = options.costAccountant.snapshot();
4049
+ await appendTrace({
4050
+ payload: {
4051
+ ...breakdown,
4052
+ sessionId: options.id
4053
+ },
4054
+ session,
4055
+ type: "cost.ready"
4056
+ });
4057
+ };
4033
4058
  const cancelActiveTTS = async (reason) => {
4034
4059
  const activeSession = ttsSession;
4035
4060
  const cancelledTurnId = activeTTSTurnId;
@@ -4977,6 +5002,11 @@ var createVoiceSession = (options) => {
4977
5002
  turnId: turn.id
4978
5003
  });
4979
5004
  await activeTTSSession.send(output.assistantText);
5005
+ if (options.costAccountant) {
5006
+ options.costAccountant.recordTTS({
5007
+ characters: output.assistantText.length
5008
+ });
5009
+ }
4980
5010
  await appendTurnLatencyStage({
4981
5011
  session,
4982
5012
  stage: "tts_send_completed",
@@ -5145,6 +5175,11 @@ var createVoiceSession = (options) => {
5145
5175
  primaryAudioMs: getBufferedAudioDurationMs(currentTurnAudio.map((audio) => audio.chunk)),
5146
5176
  primaryPassCostUnit: options.costTelemetry?.primaryPassCostUnit
5147
5177
  });
5178
+ if (options.costAccountant && costEstimate.totalBillableAudioMs > 0) {
5179
+ options.costAccountant.recordSTT({
5180
+ audioMs: costEstimate.totalBillableAudioMs
5181
+ });
5182
+ }
5148
5183
  const turn = {
5149
5184
  committedAt: Date.now(),
5150
5185
  id: createId(),
@@ -5416,6 +5451,7 @@ var createVoiceSession = (options) => {
5416
5451
  await closeTTSSession(reason);
5417
5452
  await closeAdapter(reason);
5418
5453
  await persistRecordings();
5454
+ await finalizeCostReport(session);
5419
5455
  await Promise.resolve(socket.close(1000, reason));
5420
5456
  if (session.call?.endedAt && session.call.disposition === disposition) {
5421
5457
  await appendTrace({
@@ -34924,6 +34960,12 @@ var createAIVoiceModel = (options) => ({
34924
34960
  id: chunk.id,
34925
34961
  name: chunk.name
34926
34962
  });
34963
+ } else if (chunk.type === "done" && chunk.usage && options.onUsage) {
34964
+ options.onUsage({
34965
+ ...chunk.usage,
34966
+ model: options.model,
34967
+ provider: options.providerName
34968
+ });
34927
34969
  }
34928
34970
  }
34929
34971
  const output = {
@@ -34935,6 +34977,149 @@ var createAIVoiceModel = (options) => ({
34935
34977
  return output;
34936
34978
  }
34937
34979
  });
34980
+ // src/costAccounting.ts
34981
+ var DEFAULT_VOICE_PRICE_BOOK = {
34982
+ "anthropic:claude-opus-4-5": {
34983
+ llm: {
34984
+ cachedInputPerMillionTokensUsd: 1.5,
34985
+ inputPerMillionTokensUsd: 15,
34986
+ outputPerMillionTokensUsd: 75
34987
+ }
34988
+ },
34989
+ "anthropic:claude-sonnet-4-5": {
34990
+ llm: {
34991
+ cachedInputPerMillionTokensUsd: 0.3,
34992
+ inputPerMillionTokensUsd: 3,
34993
+ outputPerMillionTokensUsd: 15
34994
+ }
34995
+ },
34996
+ "assemblyai:streaming": { stt: { perSecondUsd: 0.00018 } },
34997
+ "azure:tts-neural": { tts: { perMillionCharactersUsd: 16 } },
34998
+ "cartesia:sonic-2": { tts: { perMillionCharactersUsd: 65 } },
34999
+ "deepgram:nova-3": { stt: { perSecondUsd: 0.000077 } },
35000
+ "elevenlabs:flash-v2-5": { tts: { perMillionCharactersUsd: 50 } },
35001
+ "openai:gpt-4o-mini": {
35002
+ llm: {
35003
+ cachedInputPerMillionTokensUsd: 0.075,
35004
+ inputPerMillionTokensUsd: 0.15,
35005
+ outputPerMillionTokensUsd: 0.6
35006
+ }
35007
+ },
35008
+ "openai:gpt-4o-realtime": {
35009
+ llm: {
35010
+ cachedInputPerMillionTokensUsd: 2.5,
35011
+ inputPerMillionTokensUsd: 5,
35012
+ outputPerMillionTokensUsd: 20
35013
+ }
35014
+ },
35015
+ "openai:whisper-1": { stt: { perSecondUsd: 0.0001 } },
35016
+ telnyx: { telephony: { perMinuteUsd: 0.007 } },
35017
+ twilio: { telephony: { perMinuteUsd: 0.014 } }
35018
+ };
35019
+ var resolveProviderKey = (provider, model) => {
35020
+ if (provider && model) {
35021
+ return `${provider.toLowerCase()}:${model.toLowerCase()}`;
35022
+ }
35023
+ if (provider) {
35024
+ return provider.toLowerCase();
35025
+ }
35026
+ return;
35027
+ };
35028
+ var lookupRates = (priceBook, provider, model) => {
35029
+ const exactKey = resolveProviderKey(provider, model);
35030
+ if (exactKey && priceBook[exactKey]) {
35031
+ return priceBook[exactKey];
35032
+ }
35033
+ const providerKey = provider?.toLowerCase();
35034
+ if (providerKey && priceBook[providerKey]) {
35035
+ return priceBook[providerKey];
35036
+ }
35037
+ return;
35038
+ };
35039
+ var createVoiceCostAccountant = (options = {}) => {
35040
+ const priceBook = options.priceBook ?? DEFAULT_VOICE_PRICE_BOOK;
35041
+ let llmInput = 0;
35042
+ let llmCachedInput = 0;
35043
+ let llmOutput = 0;
35044
+ let llmUsd = 0;
35045
+ let ttsCharacters = 0;
35046
+ let ttsAudioMs = 0;
35047
+ let ttsUsd = 0;
35048
+ let sttAudioMs = 0;
35049
+ let sttUsd = 0;
35050
+ let telephonyMinutes = 0;
35051
+ let telephonyUsd = 0;
35052
+ return {
35053
+ recordLLM: (usage) => {
35054
+ const input = usage.inputTokens ?? 0;
35055
+ const cached = usage.cachedInputTokens ?? 0;
35056
+ const output = usage.outputTokens ?? 0;
35057
+ llmInput += input;
35058
+ llmCachedInput += cached;
35059
+ llmOutput += output;
35060
+ const rates = lookupRates(priceBook, usage.provider, usage.model)?.llm;
35061
+ if (!rates) {
35062
+ return;
35063
+ }
35064
+ const cachedRate = rates.cachedInputPerMillionTokensUsd ?? rates.inputPerMillionTokensUsd;
35065
+ llmUsd += Math.max(0, input - cached) * rates.inputPerMillionTokensUsd / 1e6 + cached * cachedRate / 1e6 + output * rates.outputPerMillionTokensUsd / 1e6;
35066
+ },
35067
+ recordSTT: (input) => {
35068
+ sttAudioMs += Math.max(0, input.audioMs);
35069
+ const rates = lookupRates(priceBook, input.provider, input.model)?.stt;
35070
+ if (!rates) {
35071
+ return;
35072
+ }
35073
+ sttUsd += Math.max(0, input.audioMs) / 1000 * rates.perSecondUsd;
35074
+ },
35075
+ recordTTS: (input) => {
35076
+ const chars = input.characters ?? 0;
35077
+ const audioMs = input.audioMs ?? 0;
35078
+ ttsCharacters += chars;
35079
+ ttsAudioMs += audioMs;
35080
+ const rates = lookupRates(priceBook, input.provider, input.voice)?.tts;
35081
+ if (!rates) {
35082
+ return;
35083
+ }
35084
+ if (rates.perMillionCharactersUsd !== undefined && chars > 0) {
35085
+ ttsUsd += chars * rates.perMillionCharactersUsd / 1e6;
35086
+ } else if (rates.perSecondUsd !== undefined && audioMs > 0) {
35087
+ ttsUsd += audioMs / 1000 * rates.perSecondUsd;
35088
+ }
35089
+ },
35090
+ recordTelephony: (input) => {
35091
+ telephonyMinutes += Math.max(0, input.minutes);
35092
+ const rates = lookupRates(priceBook, input.provider)?.telephony;
35093
+ if (!rates) {
35094
+ return;
35095
+ }
35096
+ telephonyUsd += Math.max(0, input.minutes) * rates.perMinuteUsd;
35097
+ },
35098
+ snapshot: () => ({
35099
+ llm: {
35100
+ cachedInputTokens: llmCachedInput,
35101
+ inputTokens: llmInput,
35102
+ outputTokens: llmOutput,
35103
+ usd: Math.round(llmUsd * 1e6) / 1e6
35104
+ },
35105
+ sessionId: options.sessionId,
35106
+ stt: {
35107
+ audioMs: sttAudioMs,
35108
+ usd: Math.round(sttUsd * 1e6) / 1e6
35109
+ },
35110
+ telephony: {
35111
+ minutes: telephonyMinutes,
35112
+ usd: Math.round(telephonyUsd * 1e6) / 1e6
35113
+ },
35114
+ totalUsd: Math.round((llmUsd + ttsUsd + sttUsd + telephonyUsd) * 1e6) / 1e6,
35115
+ tts: {
35116
+ audioMs: ttsAudioMs,
35117
+ characters: ttsCharacters,
35118
+ usd: Math.round(ttsUsd * 1e6) / 1e6
35119
+ }
35120
+ })
35121
+ };
35122
+ };
34938
35123
  // src/amdDetector.ts
34939
35124
  var createMonologueAMDDetector = (options = {}) => {
34940
35125
  const minMonologueMs = options.minMonologueMs ?? 8000;
@@ -46076,6 +46261,7 @@ export {
46076
46261
  createVoiceDeliveryRuntime,
46077
46262
  createVoiceDataControlRoutes,
46078
46263
  createVoiceDTMFTool,
46264
+ createVoiceCostAccountant,
46079
46265
  createVoiceCompetitiveCoverageRoutes,
46080
46266
  createVoiceCampaignWorkerLoop,
46081
46267
  createVoiceCampaignWorker,
@@ -46290,5 +46476,6 @@ export {
46290
46476
  VOICE_LIVE_OPS_ACTIONS,
46291
46477
  TURN_PROFILE_DEFAULTS,
46292
46478
  DEFAULT_VOICE_PROOF_TREND_PROFILE_DEFINITIONS,
46293
- DEFAULT_VOICE_PROOF_TRENDS_MAX_AGE_MS
46479
+ DEFAULT_VOICE_PROOF_TRENDS_MAX_AGE_MS,
46480
+ DEFAULT_VOICE_PRICE_BOOK
46294
46481
  };
@@ -5998,6 +5998,31 @@ var createVoiceSession = (options) => {
5998
5998
  }
5999
5999
  }
6000
6000
  };
6001
+ const finalizeCostReport = async (session) => {
6002
+ if (!options.costAccountant) {
6003
+ return;
6004
+ }
6005
+ const lifecycle = session.call;
6006
+ if (lifecycle?.startedAt && lifecycle.endedAt) {
6007
+ const durationMs = Math.max(0, lifecycle.endedAt - lifecycle.startedAt);
6008
+ const minutes = durationMs / 60000;
6009
+ if (minutes > 0) {
6010
+ options.costAccountant.recordTelephony({
6011
+ minutes,
6012
+ provider: options.costTelephony?.provider
6013
+ });
6014
+ }
6015
+ }
6016
+ const breakdown = options.costAccountant.snapshot();
6017
+ await appendTrace({
6018
+ payload: {
6019
+ ...breakdown,
6020
+ sessionId: options.id
6021
+ },
6022
+ session,
6023
+ type: "cost.ready"
6024
+ });
6025
+ };
6001
6026
  const cancelActiveTTS = async (reason) => {
6002
6027
  const activeSession = ttsSession;
6003
6028
  const cancelledTurnId = activeTTSTurnId;
@@ -6945,6 +6970,11 @@ var createVoiceSession = (options) => {
6945
6970
  turnId: turn.id
6946
6971
  });
6947
6972
  await activeTTSSession.send(output.assistantText);
6973
+ if (options.costAccountant) {
6974
+ options.costAccountant.recordTTS({
6975
+ characters: output.assistantText.length
6976
+ });
6977
+ }
6948
6978
  await appendTurnLatencyStage({
6949
6979
  session,
6950
6980
  stage: "tts_send_completed",
@@ -7113,6 +7143,11 @@ var createVoiceSession = (options) => {
7113
7143
  primaryAudioMs: getBufferedAudioDurationMs(currentTurnAudio.map((audio) => audio.chunk)),
7114
7144
  primaryPassCostUnit: options.costTelemetry?.primaryPassCostUnit
7115
7145
  });
7146
+ if (options.costAccountant && costEstimate.totalBillableAudioMs > 0) {
7147
+ options.costAccountant.recordSTT({
7148
+ audioMs: costEstimate.totalBillableAudioMs
7149
+ });
7150
+ }
7116
7151
  const turn = {
7117
7152
  committedAt: Date.now(),
7118
7153
  id: createId(),
@@ -7384,6 +7419,7 @@ var createVoiceSession = (options) => {
7384
7419
  await closeTTSSession(reason);
7385
7420
  await closeAdapter(reason);
7386
7421
  await persistRecordings();
7422
+ await finalizeCostReport(session);
7387
7423
  await Promise.resolve(socket.close(1000, reason));
7388
7424
  if (session.call?.endedAt && session.call.disposition === disposition) {
7389
7425
  await appendTrace({
package/dist/trace.d.ts CHANGED
@@ -1,5 +1,5 @@
1
1
  import type { S3Client, S3Options } from "bun";
2
- export type VoiceTraceEventType = "assistant.guardrail" | "assistant.memory" | "assistant.run" | "agent.context" | "agent.handoff" | "agent.model" | "agent.result" | "agent.tool" | "call.handoff" | "call.lifecycle" | "client.barge_in" | "client.browser_media" | "client.live_latency" | "client.reconnect" | "client.telephony_media" | "operator.action" | "provider.decision" | "recording.ready" | "session.error" | "turn.assistant" | "turn.committed" | "turn.cost" | "turn_latency.stage" | "turn.transcript" | "workflow.contract";
2
+ export type VoiceTraceEventType = "assistant.guardrail" | "assistant.memory" | "assistant.run" | "agent.context" | "agent.handoff" | "agent.model" | "agent.result" | "agent.tool" | "call.handoff" | "call.lifecycle" | "client.barge_in" | "client.browser_media" | "client.live_latency" | "client.reconnect" | "client.telephony_media" | "cost.ready" | "operator.action" | "provider.decision" | "recording.ready" | "session.error" | "turn.assistant" | "turn.committed" | "turn.cost" | "turn_latency.stage" | "turn.transcript" | "workflow.contract";
3
3
  export type VoiceTraceEvent<TPayload extends Record<string, unknown> = Record<string, unknown>> = {
4
4
  at: number;
5
5
  id?: string;
package/dist/types.d.ts CHANGED
@@ -726,6 +726,10 @@ export type CreateVoiceSessionOptions<TContext = unknown, TSession extends Voice
726
726
  recording?: VoiceSessionRecordingConfig;
727
727
  callSilenceTimeoutMs?: number;
728
728
  amd?: import("./amdDetector").VoiceAMDDetector<TContext, TSession, TResult>;
729
+ costAccountant?: import("./costAccounting").VoiceCostAccountant;
730
+ costTelephony?: {
731
+ provider?: string;
732
+ };
729
733
  reconnect: Required<VoiceReconnectConfig>;
730
734
  phraseHints?: VoicePhraseHint[];
731
735
  sessionMetadata?: Record<string, unknown>;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@absolutejs/voice",
3
- "version": "0.0.22-beta.479",
3
+ "version": "0.0.22-beta.480",
4
4
  "description": "Voice primitives and Elysia plugin for AbsoluteJS",
5
5
  "repository": {
6
6
  "type": "git",