@absolutejs/voice 0.0.22-beta.495 → 0.0.22-beta.497
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agent.d.ts +12 -0
- package/dist/index.js +43 -1
- package/dist/testing/index.js +7 -0
- package/dist/types.d.ts +16 -0
- package/package.json +1 -1
package/dist/agent.d.ts
CHANGED
|
@@ -3,7 +3,19 @@ import type { VoiceTraceEventStore } from "./trace";
|
|
|
3
3
|
import type { VoiceToolRuntime } from "./toolRuntime";
|
|
4
4
|
import { type VoiceAuditEventStore, type VoiceAuditLogger } from "./audit";
|
|
5
5
|
export type VoiceAgentMessageRole = "assistant" | "system" | "tool" | "user";
|
|
6
|
+
export type VoiceAgentImageMediaType = "image/gif" | "image/jpeg" | "image/png" | "image/webp";
|
|
7
|
+
export type VoiceAgentMessageAttachment = {
|
|
8
|
+
data: string;
|
|
9
|
+
kind: "image";
|
|
10
|
+
mediaType: VoiceAgentImageMediaType;
|
|
11
|
+
} | {
|
|
12
|
+
data: string;
|
|
13
|
+
kind: "document";
|
|
14
|
+
mediaType: "application/pdf";
|
|
15
|
+
name?: string;
|
|
16
|
+
};
|
|
6
17
|
export type VoiceAgentMessage = {
|
|
18
|
+
attachments?: VoiceAgentMessageAttachment[];
|
|
7
19
|
content: string;
|
|
8
20
|
metadata?: Record<string, unknown>;
|
|
9
21
|
name?: string;
|
package/dist/index.js
CHANGED
|
@@ -3702,6 +3702,7 @@ var createVoiceSession = (options) => {
|
|
|
3702
3702
|
let activeAdapterGeneration = 0;
|
|
3703
3703
|
let activeTTSTurnId;
|
|
3704
3704
|
const currentTurnAudio = [];
|
|
3705
|
+
const pendingUserAttachments = [];
|
|
3705
3706
|
let fallbackAttemptsForCurrentTurn = 0;
|
|
3706
3707
|
let fallbackReplayAudioMsForCurrentTurn = 0;
|
|
3707
3708
|
const amdDetector = options.amd;
|
|
@@ -4915,6 +4916,7 @@ var createVoiceSession = (options) => {
|
|
|
4915
4916
|
ttsSessionPromise = (async () => {
|
|
4916
4917
|
const openedSession = await ttsAdapter.open({
|
|
4917
4918
|
lexicon,
|
|
4919
|
+
prosody: options.prosody,
|
|
4918
4920
|
sessionId: options.id
|
|
4919
4921
|
});
|
|
4920
4922
|
ttsSession = openedSession;
|
|
@@ -5226,7 +5228,9 @@ var createVoiceSession = (options) => {
|
|
|
5226
5228
|
audioMs: costEstimate.totalBillableAudioMs
|
|
5227
5229
|
});
|
|
5228
5230
|
}
|
|
5231
|
+
const drainedAttachments = pendingUserAttachments.length > 0 ? pendingUserAttachments.splice(0, pendingUserAttachments.length) : undefined;
|
|
5229
5232
|
const turn = {
|
|
5233
|
+
attachments: drainedAttachments,
|
|
5230
5234
|
committedAt: Date.now(),
|
|
5231
5235
|
id: createId(),
|
|
5232
5236
|
text: finalText,
|
|
@@ -5520,6 +5524,9 @@ var createVoiceSession = (options) => {
|
|
|
5520
5524
|
};
|
|
5521
5525
|
const api = {
|
|
5522
5526
|
id: options.id,
|
|
5527
|
+
attachUserMedia: async (attachment) => {
|
|
5528
|
+
pendingUserAttachments.push(attachment);
|
|
5529
|
+
},
|
|
5523
5530
|
close: async (reason) => {
|
|
5524
5531
|
await runSerial("api.close", async () => {
|
|
5525
5532
|
const disposition = reason === "silence-timeout" ? "silence-timeout" : "closed";
|
|
@@ -8559,8 +8566,10 @@ var createHistoryMessages = (session, turn) => {
|
|
|
8559
8566
|
if (previousTurn.id === turn.id) {
|
|
8560
8567
|
continue;
|
|
8561
8568
|
}
|
|
8562
|
-
|
|
8569
|
+
const previousAttachments = previousTurn.attachments && previousTurn.attachments.length > 0 ? previousTurn.attachments : undefined;
|
|
8570
|
+
if (previousTurn.text.trim() || previousAttachments) {
|
|
8563
8571
|
messages.push({
|
|
8572
|
+
attachments: previousAttachments,
|
|
8564
8573
|
content: previousTurn.text,
|
|
8565
8574
|
role: "user"
|
|
8566
8575
|
});
|
|
@@ -8572,7 +8581,9 @@ var createHistoryMessages = (session, turn) => {
|
|
|
8572
8581
|
});
|
|
8573
8582
|
}
|
|
8574
8583
|
}
|
|
8584
|
+
const currentAttachments = turn.attachments && turn.attachments.length > 0 ? turn.attachments : undefined;
|
|
8575
8585
|
messages.push({
|
|
8586
|
+
attachments: currentAttachments,
|
|
8576
8587
|
content: turn.text,
|
|
8577
8588
|
role: "user"
|
|
8578
8589
|
});
|
|
@@ -34968,6 +34979,36 @@ var toProviderMessages = (messages) => {
|
|
|
34968
34979
|
out.push({ content: message.content, role: "user" });
|
|
34969
34980
|
continue;
|
|
34970
34981
|
}
|
|
34982
|
+
if (message.role === "user" && message.attachments && message.attachments.length > 0) {
|
|
34983
|
+
const blocks = [];
|
|
34984
|
+
if (message.content) {
|
|
34985
|
+
blocks.push({ content: message.content, type: "text" });
|
|
34986
|
+
}
|
|
34987
|
+
for (const attachment of message.attachments) {
|
|
34988
|
+
if (attachment.kind === "image") {
|
|
34989
|
+
blocks.push({
|
|
34990
|
+
source: {
|
|
34991
|
+
data: attachment.data,
|
|
34992
|
+
media_type: attachment.mediaType,
|
|
34993
|
+
type: "base64"
|
|
34994
|
+
},
|
|
34995
|
+
type: "image"
|
|
34996
|
+
});
|
|
34997
|
+
} else if (attachment.kind === "document") {
|
|
34998
|
+
blocks.push({
|
|
34999
|
+
name: attachment.name,
|
|
35000
|
+
source: {
|
|
35001
|
+
data: attachment.data,
|
|
35002
|
+
media_type: attachment.mediaType,
|
|
35003
|
+
type: "base64"
|
|
35004
|
+
},
|
|
35005
|
+
type: "document"
|
|
35006
|
+
});
|
|
35007
|
+
}
|
|
35008
|
+
}
|
|
35009
|
+
out.push({ content: blocks, role: "user" });
|
|
35010
|
+
continue;
|
|
35011
|
+
}
|
|
34971
35012
|
out.push({ content: message.content, role: message.role });
|
|
34972
35013
|
}
|
|
34973
35014
|
return out;
|
|
@@ -36748,6 +36789,7 @@ var toHandoffExpectation = (event) => ({
|
|
|
36748
36789
|
targetAgentId: getPayloadString2(event, "targetAgentId")
|
|
36749
36790
|
});
|
|
36750
36791
|
var createContractApi = (session) => ({
|
|
36792
|
+
attachUserMedia: async () => {},
|
|
36751
36793
|
close: async () => {},
|
|
36752
36794
|
commitTurn: async () => {},
|
|
36753
36795
|
complete: async () => {},
|
package/dist/testing/index.js
CHANGED
|
@@ -5670,6 +5670,7 @@ var createVoiceSession = (options) => {
|
|
|
5670
5670
|
let activeAdapterGeneration = 0;
|
|
5671
5671
|
let activeTTSTurnId;
|
|
5672
5672
|
const currentTurnAudio = [];
|
|
5673
|
+
const pendingUserAttachments = [];
|
|
5673
5674
|
let fallbackAttemptsForCurrentTurn = 0;
|
|
5674
5675
|
let fallbackReplayAudioMsForCurrentTurn = 0;
|
|
5675
5676
|
const amdDetector = options.amd;
|
|
@@ -6883,6 +6884,7 @@ var createVoiceSession = (options) => {
|
|
|
6883
6884
|
ttsSessionPromise = (async () => {
|
|
6884
6885
|
const openedSession = await ttsAdapter.open({
|
|
6885
6886
|
lexicon,
|
|
6887
|
+
prosody: options.prosody,
|
|
6886
6888
|
sessionId: options.id
|
|
6887
6889
|
});
|
|
6888
6890
|
ttsSession = openedSession;
|
|
@@ -7194,7 +7196,9 @@ var createVoiceSession = (options) => {
|
|
|
7194
7196
|
audioMs: costEstimate.totalBillableAudioMs
|
|
7195
7197
|
});
|
|
7196
7198
|
}
|
|
7199
|
+
const drainedAttachments = pendingUserAttachments.length > 0 ? pendingUserAttachments.splice(0, pendingUserAttachments.length) : undefined;
|
|
7197
7200
|
const turn = {
|
|
7201
|
+
attachments: drainedAttachments,
|
|
7198
7202
|
committedAt: Date.now(),
|
|
7199
7203
|
id: createId(),
|
|
7200
7204
|
text: finalText,
|
|
@@ -7488,6 +7492,9 @@ var createVoiceSession = (options) => {
|
|
|
7488
7492
|
};
|
|
7489
7493
|
const api = {
|
|
7490
7494
|
id: options.id,
|
|
7495
|
+
attachUserMedia: async (attachment) => {
|
|
7496
|
+
pendingUserAttachments.push(attachment);
|
|
7497
|
+
},
|
|
7491
7498
|
close: async (reason) => {
|
|
7492
7499
|
await runSerial("api.close", async () => {
|
|
7493
7500
|
const disposition = reason === "silence-timeout" ? "silence-timeout" : "closed";
|
package/dist/types.d.ts
CHANGED
|
@@ -49,12 +49,18 @@ export type VoiceLexiconEntry = {
|
|
|
49
49
|
metadata?: Record<string, unknown>;
|
|
50
50
|
pronunciation?: string;
|
|
51
51
|
};
|
|
52
|
+
export type VoiceTranscriptSentiment = {
|
|
53
|
+
label: "negative" | "neutral" | "positive" | (string & {});
|
|
54
|
+
metadata?: Record<string, unknown>;
|
|
55
|
+
score?: number;
|
|
56
|
+
};
|
|
52
57
|
export type Transcript = {
|
|
53
58
|
id: string;
|
|
54
59
|
text: string;
|
|
55
60
|
isFinal: boolean;
|
|
56
61
|
confidence?: number;
|
|
57
62
|
language?: string;
|
|
63
|
+
sentiment?: VoiceTranscriptSentiment;
|
|
58
64
|
speaker?: string | number;
|
|
59
65
|
startedAtMs?: number;
|
|
60
66
|
endedAtMs?: number;
|
|
@@ -172,9 +178,16 @@ export type TTSAdapterSession = {
|
|
|
172
178
|
export declare const ttsAdapterSessionCanCancel: (session: TTSAdapterSession) => session is TTSAdapterSession & {
|
|
173
179
|
cancel: (reason?: string) => Promise<void>;
|
|
174
180
|
};
|
|
181
|
+
export type VoiceTTSProsody = {
|
|
182
|
+
emphasis?: number;
|
|
183
|
+
pitch?: number;
|
|
184
|
+
speed?: number;
|
|
185
|
+
style?: string;
|
|
186
|
+
};
|
|
175
187
|
export type TTSAdapterOpenOptions = {
|
|
176
188
|
sessionId: string;
|
|
177
189
|
lexicon?: VoiceLexiconEntry[];
|
|
190
|
+
prosody?: VoiceTTSProsody;
|
|
178
191
|
signal?: AbortSignal;
|
|
179
192
|
};
|
|
180
193
|
export type TTSAdapter<TOptions extends TTSAdapterOpenOptions = TTSAdapterOpenOptions> = {
|
|
@@ -220,6 +233,7 @@ export type VoiceTurnRecord<TResult = unknown> = {
|
|
|
220
233
|
quality?: VoiceTranscriptQuality;
|
|
221
234
|
transcripts: Transcript[];
|
|
222
235
|
assistantText?: string;
|
|
236
|
+
attachments?: import("./agent").VoiceAgentMessageAttachment[];
|
|
223
237
|
committedAt: number;
|
|
224
238
|
result?: TResult;
|
|
225
239
|
};
|
|
@@ -436,6 +450,7 @@ export type VoiceSessionHandle<TContext = unknown, TSession extends VoiceSession
|
|
|
436
450
|
id: string;
|
|
437
451
|
connect: (socket: VoiceSocket) => Promise<void>;
|
|
438
452
|
receiveAudio: (audio: AudioChunk) => Promise<void>;
|
|
453
|
+
attachUserMedia: (attachment: import("./agent").VoiceAgentMessageAttachment) => Promise<void>;
|
|
439
454
|
commitTurn: (reason?: VoiceEndOfTurnEvent["reason"]) => Promise<void>;
|
|
440
455
|
disconnect: (event?: VoiceCloseEvent) => Promise<void>;
|
|
441
456
|
complete: (result?: TResult) => Promise<void>;
|
|
@@ -737,6 +752,7 @@ export type CreateVoiceSessionOptions<TContext = unknown, TSession extends Voice
|
|
|
737
752
|
semanticTurnDetector?: import("./semanticTurn").VoiceSemanticTurnDetector;
|
|
738
753
|
assistantMode?: import("./assistantMode").VoiceAssistantMode;
|
|
739
754
|
modalities?: ReadonlyArray<"audio" | "text">;
|
|
755
|
+
prosody?: VoiceTTSProsody;
|
|
740
756
|
reconnect: Required<VoiceReconnectConfig>;
|
|
741
757
|
phraseHints?: VoicePhraseHint[];
|
|
742
758
|
sessionMetadata?: Record<string, unknown>;
|