perso-interactive-sdk-web 1.1.0 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +146 -49
- package/dist/client/index.cjs +1 -1
- package/dist/client/index.d.ts +280 -80
- package/dist/client/index.iife.js +1 -1
- package/dist/client/index.js +1 -1
- package/dist/server/index.cjs +1 -1
- package/dist/server/index.d.ts +73 -2
- package/dist/server/index.js +1 -1
- package/package.json +93 -93
package/dist/client/index.d.ts
CHANGED
|
@@ -1,3 +1,45 @@
|
|
|
1
|
+
interface Chat {
|
|
2
|
+
text: string;
|
|
3
|
+
isUser: boolean;
|
|
4
|
+
timestamp: Date;
|
|
5
|
+
}
|
|
6
|
+
declare enum ChatState {
|
|
7
|
+
RECORDING = "RECORDING",
|
|
8
|
+
LLM = "LLM",
|
|
9
|
+
ANALYZING = "ANALYZING",
|
|
10
|
+
SPEAKING = "SPEAKING",
|
|
11
|
+
TTS = "TTS"
|
|
12
|
+
}
|
|
13
|
+
declare class ChatTool<TArg = any, TResult extends object = object> {
|
|
14
|
+
name: string;
|
|
15
|
+
description: string;
|
|
16
|
+
parameters: object;
|
|
17
|
+
call: (arg: TArg) => TResult | Promise<TResult>;
|
|
18
|
+
executeOnly: boolean;
|
|
19
|
+
constructor(name: string, description: string, parameters: object, call: (arg: TArg) => TResult | Promise<TResult>, executeOnly?: boolean);
|
|
20
|
+
}
|
|
21
|
+
type LLMStreamChunk = {
|
|
22
|
+
type: 'message';
|
|
23
|
+
chunks: string[];
|
|
24
|
+
message: string;
|
|
25
|
+
finish: boolean;
|
|
26
|
+
} | ({
|
|
27
|
+
type: 'tool_call';
|
|
28
|
+
tool_calls: Array<object>;
|
|
29
|
+
} & Record<string, unknown>) | ({
|
|
30
|
+
type: 'tool_result';
|
|
31
|
+
tool_call_id: string;
|
|
32
|
+
result: object;
|
|
33
|
+
} & Record<string, unknown>) | {
|
|
34
|
+
type: 'error';
|
|
35
|
+
error: Error;
|
|
36
|
+
};
|
|
37
|
+
interface ProcessLLMOptions {
|
|
38
|
+
message: string;
|
|
39
|
+
tools?: Array<ChatTool>;
|
|
40
|
+
signal?: AbortSignal;
|
|
41
|
+
}
|
|
42
|
+
|
|
1
43
|
/**
|
|
2
44
|
* High-level controller around a WebRTC PeerConnection that proxies Perso's
|
|
3
45
|
* real-time APIs through convenience helpers.
|
|
@@ -8,7 +50,7 @@ declare class Perso extends EventTarget {
|
|
|
8
50
|
dc: RTCDataChannel;
|
|
9
51
|
streams: Array<MediaStream>;
|
|
10
52
|
pingTime: number;
|
|
11
|
-
pingIntervalId:
|
|
53
|
+
pingIntervalId: ReturnType<typeof setInterval> | null;
|
|
12
54
|
/**
|
|
13
55
|
* Hooks a peer/data channel pair to status/ping listeners so consumers can
|
|
14
56
|
* interact with the remote Perso session through a single object.
|
|
@@ -17,17 +59,22 @@ declare class Perso extends EventTarget {
|
|
|
17
59
|
*/
|
|
18
60
|
constructor(pc: RTCPeerConnection, dc: RTCDataChannel);
|
|
19
61
|
/**
|
|
20
|
-
*
|
|
21
|
-
*
|
|
62
|
+
* Negotiates WebRTC connectivity and waits until the first remote stream is ready.
|
|
63
|
+
*
|
|
64
|
+
* When an optional `stream` is provided (legacy bidirectional mode), the stream's
|
|
65
|
+
* tracks are added to the peer connection so the server can receive client audio.
|
|
66
|
+
* Without a stream the audio transceiver is set to receive-only.
|
|
67
|
+
*
|
|
22
68
|
* @param apiServer Perso API server URL.
|
|
23
69
|
* @param sessionId Session identifier created via `createSessionId`.
|
|
24
|
-
* @param stream Local camera/mic stream shared with the agent.
|
|
25
70
|
* @param width Desired avatar canvas width.
|
|
26
71
|
* @param height Desired avatar canvas height.
|
|
27
|
-
* @
|
|
72
|
+
* @param stream Optional local media stream for bidirectional audio (legacy mode).
|
|
73
|
+
* @returns Ready-to-use `Perso` instance, or `null` when the session has no STF capability.
|
|
74
|
+
* @throws ApiError When session event or WebRTC negotiation fails.
|
|
28
75
|
* @throws Timeout When remote streams fail to arrive in time.
|
|
29
76
|
*/
|
|
30
|
-
static create(apiServer: string, sessionId: string,
|
|
77
|
+
static create(apiServer: string, sessionId: string, width: number, height: number, stream?: MediaStream): Promise<Perso | null>;
|
|
31
78
|
/**
|
|
32
79
|
* Configures a browser `RTCPeerConnection` with the ICE servers provided by
|
|
33
80
|
* the Perso API.
|
|
@@ -74,6 +121,26 @@ declare class Perso extends EventTarget {
|
|
|
74
121
|
* @param message Text to synthesize and animate.
|
|
75
122
|
*/
|
|
76
123
|
ttstf(message: string): void;
|
|
124
|
+
private static readonly BACKPRESSURE_THRESHOLD;
|
|
125
|
+
/**
|
|
126
|
+
* Sends a file to the remote peer via a dedicated WebRTC data channel.
|
|
127
|
+
* The file is chunked and transmitted in binary format. Applies
|
|
128
|
+
* backpressure when the channel's buffer exceeds 512 KB to avoid
|
|
129
|
+
* SCTP overflow on large files.
|
|
130
|
+
* @param file The file blob to send.
|
|
131
|
+
* @param chunksize Size of each chunk in bytes (default: 65536).
|
|
132
|
+
* @returns Promise resolving to the file reference string from the server.
|
|
133
|
+
*/
|
|
134
|
+
sendFile(file: Blob, chunksize?: number): Promise<string>;
|
|
135
|
+
/**
|
|
136
|
+
* Sends an audio file for Speech-to-Face (STF) processing.
|
|
137
|
+
* The avatar will lip-sync to the provided audio.
|
|
138
|
+
* @param file Audio file blob (mp3 or wav).
|
|
139
|
+
* @param format Audio format ('mp3' or 'wav').
|
|
140
|
+
* @param message Optional text message associated with the audio.
|
|
141
|
+
* @returns Promise resolving to the file reference string.
|
|
142
|
+
*/
|
|
143
|
+
stf(file: Blob, format: string, message: string): Promise<string>;
|
|
77
144
|
/**
|
|
78
145
|
* Signals the remote agent to start buffering microphone audio.
|
|
79
146
|
*/
|
|
@@ -118,7 +185,8 @@ declare class Perso extends EventTarget {
|
|
|
118
185
|
* @param callback Handler invoked with the parsed payload.
|
|
119
186
|
* @returns Function that removes the listener.
|
|
120
187
|
*/
|
|
121
|
-
setMessageCallback(type: string, callback: (data:
|
|
188
|
+
setMessageCallback<T = any>(type: string, callback: (data: T) => void): () => void;
|
|
189
|
+
tts(base64: string, resample?: boolean): Promise<Blob>;
|
|
122
190
|
/**
|
|
123
191
|
* Tears down the PeerConnection due to remote/network failure and emits a
|
|
124
192
|
* timeout status so the UI can inform users.
|
|
@@ -131,36 +199,6 @@ declare class Perso extends EventTarget {
|
|
|
131
199
|
closeSelf(): void;
|
|
132
200
|
}
|
|
133
201
|
|
|
134
|
-
/**
|
|
135
|
-
* Represents a single entry shown in the chat log UI.
|
|
136
|
-
*/
|
|
137
|
-
interface Chat {
|
|
138
|
-
text: string;
|
|
139
|
-
isUser: boolean;
|
|
140
|
-
timestamp: Date;
|
|
141
|
-
}
|
|
142
|
-
/**
|
|
143
|
-
* Discrete states that describe where the conversation currently is
|
|
144
|
-
* (recording, running the LLM, analyzing text, speaking back, etc.).
|
|
145
|
-
*/
|
|
146
|
-
declare enum ChatState {
|
|
147
|
-
RECORDING = "RECORDING",
|
|
148
|
-
LLM = "LLM",
|
|
149
|
-
ANALYZING = "ANALYZING",
|
|
150
|
-
SPEAKING = "SPEAKING"
|
|
151
|
-
}
|
|
152
|
-
/**
|
|
153
|
-
* Container describing a callable tool (local client helper or remote MCP)
|
|
154
|
-
* that the LLM runtime can invoke during conversations.
|
|
155
|
-
*/
|
|
156
|
-
declare class ChatTool {
|
|
157
|
-
name: string;
|
|
158
|
-
description: string;
|
|
159
|
-
parameters: object;
|
|
160
|
-
call: (arg: any) => object | Promise<object>;
|
|
161
|
-
executeOnly: boolean;
|
|
162
|
-
constructor(name: string, description: string, parameters: object, call: (arg: any) => object | Promise<object>, executeOnly?: boolean);
|
|
163
|
-
}
|
|
164
202
|
/**
|
|
165
203
|
* Manages a full Perso chat session including UI state, LLM orchestration,
|
|
166
204
|
* microphone handling, and speech synthesis triggers.
|
|
@@ -168,8 +206,7 @@ declare class ChatTool {
|
|
|
168
206
|
declare class Session {
|
|
169
207
|
apiServer: string;
|
|
170
208
|
sessionId: string;
|
|
171
|
-
|
|
172
|
-
perso: Perso;
|
|
209
|
+
perso: Perso | null;
|
|
173
210
|
clientTools: Array<ChatTool>;
|
|
174
211
|
private chatStatesHandler;
|
|
175
212
|
private chatLogHandler;
|
|
@@ -180,17 +217,29 @@ declare class Session {
|
|
|
180
217
|
private stfTimeoutStartTime;
|
|
181
218
|
private messageHistory;
|
|
182
219
|
private chatLog;
|
|
220
|
+
private llmProcessor;
|
|
183
221
|
private chatStateMap;
|
|
184
222
|
private emojiRegex;
|
|
223
|
+
private sttRecorder;
|
|
224
|
+
private sttTimeoutHandle;
|
|
225
|
+
private sttTimeoutAudioFile;
|
|
226
|
+
private heartbeatIntervalId;
|
|
227
|
+
private readonly legacyVoiceChatMode;
|
|
228
|
+
private readonly stream;
|
|
185
229
|
/**
|
|
186
230
|
* Sets up message listeners and chat-state trackers for a Perso session.
|
|
187
231
|
* @param apiServer Perso API server URL.
|
|
188
232
|
* @param sessionId Id of the session negotiated with the backend.
|
|
189
|
-
* @param stream Local audio stream shared with the session.
|
|
190
233
|
* @param perso Underlying Perso WebRTC controller.
|
|
191
234
|
* @param clientTools Tools exposed to the LLM for function calling.
|
|
192
|
-
|
|
193
|
-
|
|
235
|
+
* @param options Optional configuration.
|
|
236
|
+
* @param options.stream Local audio stream for legacy bidirectional mode.
|
|
237
|
+
* @param options.legacyVoiceChatMode Whether legacy voice chat mode is enabled.
|
|
238
|
+
*/
|
|
239
|
+
constructor(apiServer: string, sessionId: string, perso: Perso | null, clientTools: Array<ChatTool>, options?: {
|
|
240
|
+
stream?: MediaStream;
|
|
241
|
+
legacyVoiceChatMode?: boolean;
|
|
242
|
+
});
|
|
194
243
|
private llmJob;
|
|
195
244
|
/**
|
|
196
245
|
* Sends a user utterance through Perso's internal LLM and speaks the result
|
|
@@ -202,32 +251,67 @@ declare class Session {
|
|
|
202
251
|
* - Maintains `messageHistory` for subsequent LLM calls.
|
|
203
252
|
*/
|
|
204
253
|
processChat(message: string): Promise<void>;
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
* @param message Assistant response generated externally.
|
|
209
|
-
* @remarks
|
|
210
|
-
* - Does not mutate `messageHistory`.
|
|
211
|
-
* - Does not emit chat-log updates.
|
|
212
|
-
* - Does not toggle the `LLM` chat state.
|
|
213
|
-
*/
|
|
254
|
+
processLLM(options: ProcessLLMOptions): AsyncGenerator<LLMStreamChunk>;
|
|
255
|
+
getMessageHistory(): ReadonlyArray<object>;
|
|
256
|
+
/** @deprecated Use processTTSTF() with explicit history management instead. */
|
|
214
257
|
processCustomChat(message: string): void;
|
|
215
258
|
/**
|
|
216
259
|
* Sends an assistant message to the LLM history and triggers TTSTF playback.
|
|
217
260
|
* @param message Assistant output that should be spoken immediately.
|
|
218
261
|
*/
|
|
219
262
|
processTTSTF(message: string): void;
|
|
263
|
+
transcribeAudio(audio: Blob | File, language?: string): Promise<string>;
|
|
264
|
+
processSTF(file: Blob, format: string, message: string): Promise<string>;
|
|
265
|
+
processTTS(message: string, options?: {
|
|
266
|
+
resample?: boolean;
|
|
267
|
+
}): Promise<Blob | undefined>;
|
|
220
268
|
/**
|
|
221
269
|
* Triggers the recording state and instructs Perso to buffer microphone
|
|
222
270
|
* audio for speech-to-text.
|
|
271
|
+
*
|
|
272
|
+
* In legacy mode this sends a `record-start` DataChannel message to the
|
|
273
|
+
* server which begins buffering the bidirectional audio stream.
|
|
274
|
+
*
|
|
223
275
|
* @returns Result of `perso.recordStart()`.
|
|
276
|
+
* @deprecated Use startProcessSTT() instead. Legacy voice chat mode will be removed in a future version.
|
|
224
277
|
*/
|
|
225
278
|
startVoiceChat(): void;
|
|
226
279
|
/**
|
|
227
280
|
* Stops the microphone capture, transitions the UI to analyzing, and sends
|
|
228
281
|
* the buffered audio to STT.
|
|
282
|
+
*
|
|
283
|
+
* In legacy mode this sends a `record-end-stt` DataChannel message. The
|
|
284
|
+
* server responds with a `"stt"` message which is handled by the
|
|
285
|
+
* `setMessageCallback("stt")` listener in the constructor, triggering
|
|
286
|
+
* `processChat` automatically.
|
|
287
|
+
*
|
|
288
|
+
* @deprecated Use stopProcessSTT() instead. Legacy voice chat mode will be removed in a future version.
|
|
229
289
|
*/
|
|
230
290
|
stopVoiceChat(): void;
|
|
291
|
+
/**
|
|
292
|
+
* Starts recording audio for STT processing.
|
|
293
|
+
* Uses Web Audio API internally to capture microphone input and encode to WAV format.
|
|
294
|
+
* @param timeout Optional timeout in milliseconds to automatically stop recording.
|
|
295
|
+
* @throws Error if already recording or if microphone access is denied.
|
|
296
|
+
*/
|
|
297
|
+
startProcessSTT(timeout?: number): Promise<void>;
|
|
298
|
+
/**
|
|
299
|
+
* Result of STT processing including transcribed text and recorded audio.
|
|
300
|
+
*/
|
|
301
|
+
lastRecordedAudioFile: File | null;
|
|
302
|
+
/**
|
|
303
|
+
* Stops STT recording and sends the audio to the STT API for transcription.
|
|
304
|
+
* @param language Optional language code for STT (e.g., 'ko', 'en').
|
|
305
|
+
* @returns Promise resolving to the transcribed text.
|
|
306
|
+
* @throws STTError if the API call fails.
|
|
307
|
+
* @throws Error if not currently recording.
|
|
308
|
+
*/
|
|
309
|
+
stopProcessSTT(language?: string): Promise<string>;
|
|
310
|
+
/**
|
|
311
|
+
* Checks if STT recording is currently in progress or has audio pending processing.
|
|
312
|
+
* @returns True if recording is active or audio is pending from timeout.
|
|
313
|
+
*/
|
|
314
|
+
isSTTRecording(): boolean;
|
|
231
315
|
/**
|
|
232
316
|
* Resizes the avatar video canvas on the remote renderer.
|
|
233
317
|
* @param width Target width in CSS pixels.
|
|
@@ -244,16 +328,18 @@ declare class Session {
|
|
|
244
328
|
* @param element Target video element.
|
|
245
329
|
*/
|
|
246
330
|
setSrc(element: HTMLVideoElement): void;
|
|
247
|
-
/**
|
|
248
|
-
* Returns the local microphone stream associated with the session.
|
|
249
|
-
* @returns Local `MediaStream`.
|
|
250
|
-
*/
|
|
251
|
-
getLocalStream(): MediaStream;
|
|
252
331
|
/**
|
|
253
332
|
* Returns the first remote stream exposed by the Perso renderer.
|
|
254
333
|
* @returns Remote `MediaStream`.
|
|
255
334
|
*/
|
|
256
|
-
getRemoteStream(): MediaStream;
|
|
335
|
+
getRemoteStream(): MediaStream | undefined;
|
|
336
|
+
/**
|
|
337
|
+
* Returns the local microphone stream associated with the session.
|
|
338
|
+
* Only available in legacy voice chat mode.
|
|
339
|
+
* @returns Local `MediaStream` or `null` if not in legacy mode.
|
|
340
|
+
* @deprecated Legacy voice chat mode will be removed in a future version.
|
|
341
|
+
*/
|
|
342
|
+
getLocalStream(): MediaStream | null;
|
|
257
343
|
/**
|
|
258
344
|
* Gracefully closes the session and remote connection.
|
|
259
345
|
*/
|
|
@@ -261,6 +347,11 @@ declare class Session {
|
|
|
261
347
|
/**
|
|
262
348
|
* Subscribes to Perso status events and notifies the caller when the session
|
|
263
349
|
* closes (distinguishing manual/automatic closure).
|
|
350
|
+
*
|
|
351
|
+
* In non-WebRTC mode (perso is null), the callback is never invoked and a
|
|
352
|
+
* no-op unsubscribe is returned. Use `setErrorHandler` to detect session
|
|
353
|
+
* termination caused by heartbeat failure instead.
|
|
354
|
+
*
|
|
264
355
|
* @param callback Invoked with `true` when closed manually.
|
|
265
356
|
* @returns Function to unsubscribe the listener.
|
|
266
357
|
*/
|
|
@@ -294,17 +385,6 @@ declare class Session {
|
|
|
294
385
|
* @returns Session identifier assigned by the backend.
|
|
295
386
|
*/
|
|
296
387
|
getSessionId(): string;
|
|
297
|
-
/**
|
|
298
|
-
* Streams responses from the Perso LLM endpoint, handles tool calls, and
|
|
299
|
-
* updates chat history/state accordingly.
|
|
300
|
-
* @param message Optional user message array or string injected ahead of the
|
|
301
|
-
* pending history (null when recursively continuing after tool calls).
|
|
302
|
-
* @remarks
|
|
303
|
-
* - Accumulates `type: "message"` chunks until a non-message event arrives.
|
|
304
|
-
* - When tool calls are returned, executes client tools (and recursively calls
|
|
305
|
-
* itself if follow-up LLM output is required).
|
|
306
|
-
* - Adds every spoken assistant message to the chat log and messageHistory.
|
|
307
|
-
*/
|
|
308
388
|
private processChatInternal;
|
|
309
389
|
/**
|
|
310
390
|
* Looks up a tool definition by the function name provided in a tool_call.
|
|
@@ -313,11 +393,12 @@ declare class Session {
|
|
|
313
393
|
* @returns Matching `ChatTool` or null.
|
|
314
394
|
*/
|
|
315
395
|
private getChatTool;
|
|
316
|
-
private llmCancel;
|
|
317
396
|
/**
|
|
318
397
|
* Cancels any in-flight LLM stream by flipping the cancellation flag and
|
|
319
398
|
* awaiting the pending promise if necessary.
|
|
320
399
|
*/
|
|
400
|
+
private llmCancel;
|
|
401
|
+
private pipelineSuppressed;
|
|
321
402
|
private clearLLMJob;
|
|
322
403
|
/**
|
|
323
404
|
* Filters/sanitizes text and sends it to Perso's TTSTF endpoint while toggling
|
|
@@ -365,6 +446,8 @@ declare class Session {
|
|
|
365
446
|
* Gracefully closes the underlying Perso connection on behalf of the session.
|
|
366
447
|
*/
|
|
367
448
|
private close;
|
|
449
|
+
private startHeartbeat;
|
|
450
|
+
private stopHeartbeat;
|
|
368
451
|
/**
|
|
369
452
|
* Strips emoji characters that TTSTF may not render correctly.
|
|
370
453
|
* @param str Text to sanitize.
|
|
@@ -373,6 +456,109 @@ declare class Session {
|
|
|
373
456
|
private removeEmoji;
|
|
374
457
|
}
|
|
375
458
|
|
|
459
|
+
/**
|
|
460
|
+
* Callbacks that LlmProcessor uses to notify the host of side effects.
|
|
461
|
+
*/
|
|
462
|
+
interface LlmProcessorCallbacks {
|
|
463
|
+
onChatStateChange: (add: ChatState | null, remove: ChatState | null) => void;
|
|
464
|
+
onError: (error: Error) => void;
|
|
465
|
+
onChatLog: (message: string, isUser: boolean) => void;
|
|
466
|
+
onTTSTF: (message: string) => void;
|
|
467
|
+
}
|
|
468
|
+
/**
|
|
469
|
+
* Configuration for LlmProcessor construction.
|
|
470
|
+
*/
|
|
471
|
+
interface LlmProcessorConfig {
|
|
472
|
+
apiServer: string;
|
|
473
|
+
sessionId: string;
|
|
474
|
+
clientTools: Array<ChatTool>;
|
|
475
|
+
callbacks: LlmProcessorCallbacks;
|
|
476
|
+
}
|
|
477
|
+
/**
|
|
478
|
+
* Handles LLM streaming, SSE parsing, tool execution, and message history
|
|
479
|
+
* management as a standalone module.
|
|
480
|
+
*/
|
|
481
|
+
declare class LlmProcessor {
|
|
482
|
+
private config;
|
|
483
|
+
private messageHistory;
|
|
484
|
+
constructor(config: LlmProcessorConfig);
|
|
485
|
+
/**
|
|
486
|
+
* Streams LLM responses as an AsyncGenerator, yielding {@link LLMStreamChunk}
|
|
487
|
+
* discriminated by `type`: `assistant`, `tool_call`, `tool_result`, `error`.
|
|
488
|
+
*
|
|
489
|
+
* Consumers get pull-based control over the stream — backpressure,
|
|
490
|
+
* early exit via `break`, and `AbortSignal` cancellation are handled
|
|
491
|
+
* naturally by the generator protocol.
|
|
492
|
+
*
|
|
493
|
+
* **Yield strategy**: message-type SSE events within a single `reader.read()`
|
|
494
|
+
* are batched into one `assistant` chunk (accumulated `chunks[]` + `message`).
|
|
495
|
+
* Non-message events (`tool_call`, `tool`) flush pending message chunks first
|
|
496
|
+
* to preserve ordering.
|
|
497
|
+
*
|
|
498
|
+
* **Tool execution** happens internally — `tool_call` and `tool_result` chunks
|
|
499
|
+
* are yielded for observability. If tools require a follow-up LLM call,
|
|
500
|
+
* the generator loops transparently.
|
|
501
|
+
*
|
|
502
|
+
* @param options - Message, optional tool overrides, and optional AbortSignal.
|
|
503
|
+
* @yields {LLMStreamChunk} Streaming chunks. The final `assistant` chunk
|
|
504
|
+
* has `finish: true` and contains the complete `chunks[]` / `message`.
|
|
505
|
+
* @throws {Error} If `options.message` is empty.
|
|
506
|
+
* @throws {LLMError} Re-thrown when the initial fetch fails with a non-API error.
|
|
507
|
+
*/
|
|
508
|
+
processLLM(options: ProcessLLMOptions): AsyncGenerator<LLMStreamChunk>;
|
|
509
|
+
private parseSSEStream;
|
|
510
|
+
private executeToolCalls;
|
|
511
|
+
addToHistory(entry: object): void;
|
|
512
|
+
getHistory(): ReadonlyArray<object>;
|
|
513
|
+
}
|
|
514
|
+
|
|
515
|
+
interface WavRecorderOptions {
|
|
516
|
+
channels?: number;
|
|
517
|
+
targetSampleRate?: number;
|
|
518
|
+
}
|
|
519
|
+
/**
|
|
520
|
+
* Records audio from the microphone and produces WAV files using Web Audio API.
|
|
521
|
+
* Uses AudioWorklet (standard API) for cross-browser compatibility.
|
|
522
|
+
*
|
|
523
|
+
* Browser Support:
|
|
524
|
+
* - Chrome 66+
|
|
525
|
+
* - Firefox 76+
|
|
526
|
+
* - Safari 14.1+
|
|
527
|
+
* - iOS Safari 14.5+
|
|
528
|
+
* - Edge 79+
|
|
529
|
+
*/
|
|
530
|
+
declare class WavRecorder {
|
|
531
|
+
private audioContext;
|
|
532
|
+
private mediaStream;
|
|
533
|
+
private workletNode;
|
|
534
|
+
private sourceNode;
|
|
535
|
+
private audioChunks;
|
|
536
|
+
private isRecordingState;
|
|
537
|
+
private channels;
|
|
538
|
+
private targetSampleRate;
|
|
539
|
+
constructor(options?: WavRecorderOptions);
|
|
540
|
+
/**
|
|
541
|
+
* Starts recording audio from the microphone.
|
|
542
|
+
* Requests microphone permission via getUserMedia.
|
|
543
|
+
* @throws Error if already recording or if microphone access is denied.
|
|
544
|
+
*/
|
|
545
|
+
start(): Promise<void>;
|
|
546
|
+
/**
|
|
547
|
+
* Stops recording and returns the recorded audio as a WAV File.
|
|
548
|
+
* Uses bidirectional communication with AudioWorklet to ensure all audio data is captured.
|
|
549
|
+
* @returns Promise resolving to a File containing the recorded WAV audio.
|
|
550
|
+
* @throws Error if not currently recording.
|
|
551
|
+
*/
|
|
552
|
+
stop(): Promise<File>;
|
|
553
|
+
isRecording(): boolean;
|
|
554
|
+
}
|
|
555
|
+
/**
|
|
556
|
+
* Factory function to create a WavRecorder.
|
|
557
|
+
* @param options Optional configuration.
|
|
558
|
+
* @returns A new WavRecorder instance.
|
|
559
|
+
*/
|
|
560
|
+
declare function createWavRecorder(options?: WavRecorderOptions): WavRecorder;
|
|
561
|
+
|
|
376
562
|
/**
|
|
377
563
|
* Retrieves the list of available LLM providers from the API.
|
|
378
564
|
* @param apiServer Perso API server URL.
|
|
@@ -440,15 +626,13 @@ declare function getAllSettings(apiServer: string, apiKey: string): Promise<{
|
|
|
440
626
|
mcpServers: any;
|
|
441
627
|
}>;
|
|
442
628
|
/**
|
|
443
|
-
*
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
*
|
|
448
|
-
* @
|
|
449
|
-
*
|
|
450
|
-
* @param clientTools Client-side tools available for function calling.
|
|
451
|
-
* @returns Initialized Session.
|
|
629
|
+
* Creates a Session with REST-based STT/TTS (current mode).
|
|
630
|
+
*/
|
|
631
|
+
declare function createSession(apiServer: string, sessionId: string, width: number, height: number, clientTools: Array<ChatTool>): Promise<Session>;
|
|
632
|
+
/**
|
|
633
|
+
* Creates a Session with bidirectional WebRTC audio (legacy mode).
|
|
634
|
+
* @deprecated Legacy voice chat mode will be removed in a future version.
|
|
635
|
+
* Use the 5-argument overload with REST-based STT/TTS instead.
|
|
452
636
|
*/
|
|
453
637
|
declare function createSession(apiServer: string, sessionId: string, width: number, height: number, enableVoiceChat: boolean, clientTools: Array<ChatTool>): Promise<Session>;
|
|
454
638
|
/**
|
|
@@ -515,6 +699,22 @@ declare class LLMStreamingResponseError extends Error {
|
|
|
515
699
|
description: string;
|
|
516
700
|
constructor(description: string);
|
|
517
701
|
}
|
|
702
|
+
declare class STTError extends Error {
|
|
703
|
+
underlyingError: ApiError;
|
|
704
|
+
constructor(underlyingError: ApiError);
|
|
705
|
+
}
|
|
706
|
+
declare class TTSError extends Error {
|
|
707
|
+
underlyingError: ApiError | TTSDecodeError;
|
|
708
|
+
constructor(underlyingError: ApiError | TTSDecodeError);
|
|
709
|
+
}
|
|
710
|
+
declare class TTSDecodeError extends Error {
|
|
711
|
+
description: string;
|
|
712
|
+
constructor(description: string);
|
|
713
|
+
}
|
|
714
|
+
|
|
715
|
+
declare function getWavSampleRate(arrayBuffer: ArrayBuffer): number;
|
|
716
|
+
|
|
717
|
+
declare const TTS_TARGET_SAMPLE_RATE = 16000;
|
|
518
718
|
|
|
519
|
-
export { ApiError, ChatState, ChatTool, LLMError, LLMStreamingResponseError, Session, createSession, createSessionId, getAllSettings, getBackgroundImages, getDocuments, getLLMs, getMcpServers, getModelStyles, getPrompts, getSTTs, getSessionInfo, getTTSs };
|
|
520
|
-
export type { Chat };
|
|
719
|
+
export { ApiError, ChatState, ChatTool, LLMError, LLMStreamingResponseError, LlmProcessor, STTError, Session, TTSDecodeError, TTSError, TTS_TARGET_SAMPLE_RATE, WavRecorder, createSession, createSessionId, createWavRecorder, getAllSettings, getBackgroundImages, getDocuments, getLLMs, getMcpServers, getModelStyles, getPrompts, getSTTs, getSessionInfo, getTTSs, getWavSampleRate };
|
|
720
|
+
export type { Chat, LLMStreamChunk, LlmProcessorCallbacks, LlmProcessorConfig, ProcessLLMOptions, WavRecorderOptions };
|