@absolutejs/voice 0.0.22-beta.495 → 0.0.22-beta.497

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/agent.d.ts CHANGED
@@ -3,7 +3,19 @@ import type { VoiceTraceEventStore } from "./trace";
3
3
  import type { VoiceToolRuntime } from "./toolRuntime";
4
4
  import { type VoiceAuditEventStore, type VoiceAuditLogger } from "./audit";
5
5
  export type VoiceAgentMessageRole = "assistant" | "system" | "tool" | "user";
6
+ export type VoiceAgentImageMediaType = "image/gif" | "image/jpeg" | "image/png" | "image/webp";
7
+ export type VoiceAgentMessageAttachment = {
8
+ data: string;
9
+ kind: "image";
10
+ mediaType: VoiceAgentImageMediaType;
11
+ } | {
12
+ data: string;
13
+ kind: "document";
14
+ mediaType: "application/pdf";
15
+ name?: string;
16
+ };
6
17
  export type VoiceAgentMessage = {
18
+ attachments?: VoiceAgentMessageAttachment[];
7
19
  content: string;
8
20
  metadata?: Record<string, unknown>;
9
21
  name?: string;
package/dist/index.js CHANGED
@@ -3702,6 +3702,7 @@ var createVoiceSession = (options) => {
3702
3702
  let activeAdapterGeneration = 0;
3703
3703
  let activeTTSTurnId;
3704
3704
  const currentTurnAudio = [];
3705
+ const pendingUserAttachments = [];
3705
3706
  let fallbackAttemptsForCurrentTurn = 0;
3706
3707
  let fallbackReplayAudioMsForCurrentTurn = 0;
3707
3708
  const amdDetector = options.amd;
@@ -4915,6 +4916,7 @@ var createVoiceSession = (options) => {
4915
4916
  ttsSessionPromise = (async () => {
4916
4917
  const openedSession = await ttsAdapter.open({
4917
4918
  lexicon,
4919
+ prosody: options.prosody,
4918
4920
  sessionId: options.id
4919
4921
  });
4920
4922
  ttsSession = openedSession;
@@ -5226,7 +5228,9 @@ var createVoiceSession = (options) => {
5226
5228
  audioMs: costEstimate.totalBillableAudioMs
5227
5229
  });
5228
5230
  }
5231
+ const drainedAttachments = pendingUserAttachments.length > 0 ? pendingUserAttachments.splice(0, pendingUserAttachments.length) : undefined;
5229
5232
  const turn = {
5233
+ attachments: drainedAttachments,
5230
5234
  committedAt: Date.now(),
5231
5235
  id: createId(),
5232
5236
  text: finalText,
@@ -5520,6 +5524,9 @@ var createVoiceSession = (options) => {
5520
5524
  };
5521
5525
  const api = {
5522
5526
  id: options.id,
5527
+ attachUserMedia: async (attachment) => {
5528
+ pendingUserAttachments.push(attachment);
5529
+ },
5523
5530
  close: async (reason) => {
5524
5531
  await runSerial("api.close", async () => {
5525
5532
  const disposition = reason === "silence-timeout" ? "silence-timeout" : "closed";
@@ -8559,8 +8566,10 @@ var createHistoryMessages = (session, turn) => {
8559
8566
  if (previousTurn.id === turn.id) {
8560
8567
  continue;
8561
8568
  }
8562
- if (previousTurn.text.trim()) {
8569
+ const previousAttachments = previousTurn.attachments && previousTurn.attachments.length > 0 ? previousTurn.attachments : undefined;
8570
+ if (previousTurn.text.trim() || previousAttachments) {
8563
8571
  messages.push({
8572
+ attachments: previousAttachments,
8564
8573
  content: previousTurn.text,
8565
8574
  role: "user"
8566
8575
  });
@@ -8572,7 +8581,9 @@ var createHistoryMessages = (session, turn) => {
8572
8581
  });
8573
8582
  }
8574
8583
  }
8584
+ const currentAttachments = turn.attachments && turn.attachments.length > 0 ? turn.attachments : undefined;
8575
8585
  messages.push({
8586
+ attachments: currentAttachments,
8576
8587
  content: turn.text,
8577
8588
  role: "user"
8578
8589
  });
@@ -34968,6 +34979,36 @@ var toProviderMessages = (messages) => {
34968
34979
  out.push({ content: message.content, role: "user" });
34969
34980
  continue;
34970
34981
  }
34982
+ if (message.role === "user" && message.attachments && message.attachments.length > 0) {
34983
+ const blocks = [];
34984
+ if (message.content) {
34985
+ blocks.push({ content: message.content, type: "text" });
34986
+ }
34987
+ for (const attachment of message.attachments) {
34988
+ if (attachment.kind === "image") {
34989
+ blocks.push({
34990
+ source: {
34991
+ data: attachment.data,
34992
+ media_type: attachment.mediaType,
34993
+ type: "base64"
34994
+ },
34995
+ type: "image"
34996
+ });
34997
+ } else if (attachment.kind === "document") {
34998
+ blocks.push({
34999
+ name: attachment.name,
35000
+ source: {
35001
+ data: attachment.data,
35002
+ media_type: attachment.mediaType,
35003
+ type: "base64"
35004
+ },
35005
+ type: "document"
35006
+ });
35007
+ }
35008
+ }
35009
+ out.push({ content: blocks, role: "user" });
35010
+ continue;
35011
+ }
34971
35012
  out.push({ content: message.content, role: message.role });
34972
35013
  }
34973
35014
  return out;
@@ -36748,6 +36789,7 @@ var toHandoffExpectation = (event) => ({
36748
36789
  targetAgentId: getPayloadString2(event, "targetAgentId")
36749
36790
  });
36750
36791
  var createContractApi = (session) => ({
36792
+ attachUserMedia: async () => {},
36751
36793
  close: async () => {},
36752
36794
  commitTurn: async () => {},
36753
36795
  complete: async () => {},
@@ -5670,6 +5670,7 @@ var createVoiceSession = (options) => {
5670
5670
  let activeAdapterGeneration = 0;
5671
5671
  let activeTTSTurnId;
5672
5672
  const currentTurnAudio = [];
5673
+ const pendingUserAttachments = [];
5673
5674
  let fallbackAttemptsForCurrentTurn = 0;
5674
5675
  let fallbackReplayAudioMsForCurrentTurn = 0;
5675
5676
  const amdDetector = options.amd;
@@ -6883,6 +6884,7 @@ var createVoiceSession = (options) => {
6883
6884
  ttsSessionPromise = (async () => {
6884
6885
  const openedSession = await ttsAdapter.open({
6885
6886
  lexicon,
6887
+ prosody: options.prosody,
6886
6888
  sessionId: options.id
6887
6889
  });
6888
6890
  ttsSession = openedSession;
@@ -7194,7 +7196,9 @@ var createVoiceSession = (options) => {
7194
7196
  audioMs: costEstimate.totalBillableAudioMs
7195
7197
  });
7196
7198
  }
7199
+ const drainedAttachments = pendingUserAttachments.length > 0 ? pendingUserAttachments.splice(0, pendingUserAttachments.length) : undefined;
7197
7200
  const turn = {
7201
+ attachments: drainedAttachments,
7198
7202
  committedAt: Date.now(),
7199
7203
  id: createId(),
7200
7204
  text: finalText,
@@ -7488,6 +7492,9 @@ var createVoiceSession = (options) => {
7488
7492
  };
7489
7493
  const api = {
7490
7494
  id: options.id,
7495
+ attachUserMedia: async (attachment) => {
7496
+ pendingUserAttachments.push(attachment);
7497
+ },
7491
7498
  close: async (reason) => {
7492
7499
  await runSerial("api.close", async () => {
7493
7500
  const disposition = reason === "silence-timeout" ? "silence-timeout" : "closed";
package/dist/types.d.ts CHANGED
@@ -49,12 +49,18 @@ export type VoiceLexiconEntry = {
49
49
  metadata?: Record<string, unknown>;
50
50
  pronunciation?: string;
51
51
  };
52
+ export type VoiceTranscriptSentiment = {
53
+ label: "negative" | "neutral" | "positive" | (string & {});
54
+ metadata?: Record<string, unknown>;
55
+ score?: number;
56
+ };
52
57
  export type Transcript = {
53
58
  id: string;
54
59
  text: string;
55
60
  isFinal: boolean;
56
61
  confidence?: number;
57
62
  language?: string;
63
+ sentiment?: VoiceTranscriptSentiment;
58
64
  speaker?: string | number;
59
65
  startedAtMs?: number;
60
66
  endedAtMs?: number;
@@ -172,9 +178,16 @@ export type TTSAdapterSession = {
172
178
  export declare const ttsAdapterSessionCanCancel: (session: TTSAdapterSession) => session is TTSAdapterSession & {
173
179
  cancel: (reason?: string) => Promise<void>;
174
180
  };
181
+ export type VoiceTTSProsody = {
182
+ emphasis?: number;
183
+ pitch?: number;
184
+ speed?: number;
185
+ style?: string;
186
+ };
175
187
  export type TTSAdapterOpenOptions = {
176
188
  sessionId: string;
177
189
  lexicon?: VoiceLexiconEntry[];
190
+ prosody?: VoiceTTSProsody;
178
191
  signal?: AbortSignal;
179
192
  };
180
193
  export type TTSAdapter<TOptions extends TTSAdapterOpenOptions = TTSAdapterOpenOptions> = {
@@ -220,6 +233,7 @@ export type VoiceTurnRecord<TResult = unknown> = {
220
233
  quality?: VoiceTranscriptQuality;
221
234
  transcripts: Transcript[];
222
235
  assistantText?: string;
236
+ attachments?: import("./agent").VoiceAgentMessageAttachment[];
223
237
  committedAt: number;
224
238
  result?: TResult;
225
239
  };
@@ -436,6 +450,7 @@ export type VoiceSessionHandle<TContext = unknown, TSession extends VoiceSession
436
450
  id: string;
437
451
  connect: (socket: VoiceSocket) => Promise<void>;
438
452
  receiveAudio: (audio: AudioChunk) => Promise<void>;
453
+ attachUserMedia: (attachment: import("./agent").VoiceAgentMessageAttachment) => Promise<void>;
439
454
  commitTurn: (reason?: VoiceEndOfTurnEvent["reason"]) => Promise<void>;
440
455
  disconnect: (event?: VoiceCloseEvent) => Promise<void>;
441
456
  complete: (result?: TResult) => Promise<void>;
@@ -737,6 +752,7 @@ export type CreateVoiceSessionOptions<TContext = unknown, TSession extends Voice
737
752
  semanticTurnDetector?: import("./semanticTurn").VoiceSemanticTurnDetector;
738
753
  assistantMode?: import("./assistantMode").VoiceAssistantMode;
739
754
  modalities?: ReadonlyArray<"audio" | "text">;
755
+ prosody?: VoiceTTSProsody;
740
756
  reconnect: Required<VoiceReconnectConfig>;
741
757
  phraseHints?: VoicePhraseHint[];
742
758
  sessionMetadata?: Record<string, unknown>;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@absolutejs/voice",
3
- "version": "0.0.22-beta.495",
3
+ "version": "0.0.22-beta.497",
4
4
  "description": "Voice primitives and Elysia plugin for AbsoluteJS",
5
5
  "repository": {
6
6
  "type": "git",