@absolutejs/voice 0.0.22-beta.495 → 0.0.22-beta.496

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/agent.d.ts CHANGED
@@ -3,7 +3,19 @@ import type { VoiceTraceEventStore } from "./trace";
3
3
  import type { VoiceToolRuntime } from "./toolRuntime";
4
4
  import { type VoiceAuditEventStore, type VoiceAuditLogger } from "./audit";
5
5
  export type VoiceAgentMessageRole = "assistant" | "system" | "tool" | "user";
6
+ export type VoiceAgentImageMediaType = "image/gif" | "image/jpeg" | "image/png" | "image/webp";
7
+ export type VoiceAgentMessageAttachment = {
8
+ data: string;
9
+ kind: "image";
10
+ mediaType: VoiceAgentImageMediaType;
11
+ } | {
12
+ data: string;
13
+ kind: "document";
14
+ mediaType: "application/pdf";
15
+ name?: string;
16
+ };
6
17
  export type VoiceAgentMessage = {
18
+ attachments?: VoiceAgentMessageAttachment[];
7
19
  content: string;
8
20
  metadata?: Record<string, unknown>;
9
21
  name?: string;
package/dist/index.js CHANGED
@@ -3702,6 +3702,7 @@ var createVoiceSession = (options) => {
3702
3702
  let activeAdapterGeneration = 0;
3703
3703
  let activeTTSTurnId;
3704
3704
  const currentTurnAudio = [];
3705
+ const pendingUserAttachments = [];
3705
3706
  let fallbackAttemptsForCurrentTurn = 0;
3706
3707
  let fallbackReplayAudioMsForCurrentTurn = 0;
3707
3708
  const amdDetector = options.amd;
@@ -5226,7 +5227,9 @@ var createVoiceSession = (options) => {
5226
5227
  audioMs: costEstimate.totalBillableAudioMs
5227
5228
  });
5228
5229
  }
5230
+ const drainedAttachments = pendingUserAttachments.length > 0 ? pendingUserAttachments.splice(0, pendingUserAttachments.length) : undefined;
5229
5231
  const turn = {
5232
+ attachments: drainedAttachments,
5230
5233
  committedAt: Date.now(),
5231
5234
  id: createId(),
5232
5235
  text: finalText,
@@ -5520,6 +5523,9 @@ var createVoiceSession = (options) => {
5520
5523
  };
5521
5524
  const api = {
5522
5525
  id: options.id,
5526
+ attachUserMedia: async (attachment) => {
5527
+ pendingUserAttachments.push(attachment);
5528
+ },
5523
5529
  close: async (reason) => {
5524
5530
  await runSerial("api.close", async () => {
5525
5531
  const disposition = reason === "silence-timeout" ? "silence-timeout" : "closed";
@@ -8559,8 +8565,10 @@ var createHistoryMessages = (session, turn) => {
8559
8565
  if (previousTurn.id === turn.id) {
8560
8566
  continue;
8561
8567
  }
8562
- if (previousTurn.text.trim()) {
8568
+ const previousAttachments = previousTurn.attachments && previousTurn.attachments.length > 0 ? previousTurn.attachments : undefined;
8569
+ if (previousTurn.text.trim() || previousAttachments) {
8563
8570
  messages.push({
8571
+ attachments: previousAttachments,
8564
8572
  content: previousTurn.text,
8565
8573
  role: "user"
8566
8574
  });
@@ -8572,7 +8580,9 @@ var createHistoryMessages = (session, turn) => {
8572
8580
  });
8573
8581
  }
8574
8582
  }
8583
+ const currentAttachments = turn.attachments && turn.attachments.length > 0 ? turn.attachments : undefined;
8575
8584
  messages.push({
8585
+ attachments: currentAttachments,
8576
8586
  content: turn.text,
8577
8587
  role: "user"
8578
8588
  });
@@ -34968,6 +34978,36 @@ var toProviderMessages = (messages) => {
34968
34978
  out.push({ content: message.content, role: "user" });
34969
34979
  continue;
34970
34980
  }
34981
+ if (message.role === "user" && message.attachments && message.attachments.length > 0) {
34982
+ const blocks = [];
34983
+ if (message.content) {
34984
+ blocks.push({ content: message.content, type: "text" });
34985
+ }
34986
+ for (const attachment of message.attachments) {
34987
+ if (attachment.kind === "image") {
34988
+ blocks.push({
34989
+ source: {
34990
+ data: attachment.data,
34991
+ media_type: attachment.mediaType,
34992
+ type: "base64"
34993
+ },
34994
+ type: "image"
34995
+ });
34996
+ } else if (attachment.kind === "document") {
34997
+ blocks.push({
34998
+ name: attachment.name,
34999
+ source: {
35000
+ data: attachment.data,
35001
+ media_type: attachment.mediaType,
35002
+ type: "base64"
35003
+ },
35004
+ type: "document"
35005
+ });
35006
+ }
35007
+ }
35008
+ out.push({ content: blocks, role: "user" });
35009
+ continue;
35010
+ }
34971
35011
  out.push({ content: message.content, role: message.role });
34972
35012
  }
34973
35013
  return out;
@@ -36748,6 +36788,7 @@ var toHandoffExpectation = (event) => ({
36748
36788
  targetAgentId: getPayloadString2(event, "targetAgentId")
36749
36789
  });
36750
36790
  var createContractApi = (session) => ({
36791
+ attachUserMedia: async () => {},
36751
36792
  close: async () => {},
36752
36793
  commitTurn: async () => {},
36753
36794
  complete: async () => {},
@@ -5670,6 +5670,7 @@ var createVoiceSession = (options) => {
5670
5670
  let activeAdapterGeneration = 0;
5671
5671
  let activeTTSTurnId;
5672
5672
  const currentTurnAudio = [];
5673
+ const pendingUserAttachments = [];
5673
5674
  let fallbackAttemptsForCurrentTurn = 0;
5674
5675
  let fallbackReplayAudioMsForCurrentTurn = 0;
5675
5676
  const amdDetector = options.amd;
@@ -7194,7 +7195,9 @@ var createVoiceSession = (options) => {
7194
7195
  audioMs: costEstimate.totalBillableAudioMs
7195
7196
  });
7196
7197
  }
7198
+ const drainedAttachments = pendingUserAttachments.length > 0 ? pendingUserAttachments.splice(0, pendingUserAttachments.length) : undefined;
7197
7199
  const turn = {
7200
+ attachments: drainedAttachments,
7198
7201
  committedAt: Date.now(),
7199
7202
  id: createId(),
7200
7203
  text: finalText,
@@ -7488,6 +7491,9 @@ var createVoiceSession = (options) => {
7488
7491
  };
7489
7492
  const api = {
7490
7493
  id: options.id,
7494
+ attachUserMedia: async (attachment) => {
7495
+ pendingUserAttachments.push(attachment);
7496
+ },
7491
7497
  close: async (reason) => {
7492
7498
  await runSerial("api.close", async () => {
7493
7499
  const disposition = reason === "silence-timeout" ? "silence-timeout" : "closed";
package/dist/types.d.ts CHANGED
@@ -220,6 +220,7 @@ export type VoiceTurnRecord<TResult = unknown> = {
220
220
  quality?: VoiceTranscriptQuality;
221
221
  transcripts: Transcript[];
222
222
  assistantText?: string;
223
+ attachments?: import("./agent").VoiceAgentMessageAttachment[];
223
224
  committedAt: number;
224
225
  result?: TResult;
225
226
  };
@@ -436,6 +437,7 @@ export type VoiceSessionHandle<TContext = unknown, TSession extends VoiceSession
436
437
  id: string;
437
438
  connect: (socket: VoiceSocket) => Promise<void>;
438
439
  receiveAudio: (audio: AudioChunk) => Promise<void>;
440
+ attachUserMedia: (attachment: import("./agent").VoiceAgentMessageAttachment) => Promise<void>;
439
441
  commitTurn: (reason?: VoiceEndOfTurnEvent["reason"]) => Promise<void>;
440
442
  disconnect: (event?: VoiceCloseEvent) => Promise<void>;
441
443
  complete: (result?: TResult) => Promise<void>;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@absolutejs/voice",
3
- "version": "0.0.22-beta.495",
3
+ "version": "0.0.22-beta.496",
4
4
  "description": "Voice primitives and Elysia plugin for AbsoluteJS",
5
5
  "repository": {
6
6
  "type": "git",