@simfinity/constellation-client 1.0.6 → 1.0.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +40 -33
- package/dist/index.d.cts +18 -27
- package/dist/index.d.ts +18 -27
- package/dist/index.js +40 -33
- package/package.json +1 -1
package/dist/index.cjs
CHANGED
|
@@ -37,20 +37,20 @@ var WebClient = class {
|
|
|
37
37
|
* be closed to release the context on server side. See endSession().
|
|
38
38
|
*
|
|
39
39
|
* @remarks
|
|
40
|
-
* A session MUST exist to connect the stream.
|
|
40
|
+
* A session MUST exist first in order to connect the stream next.
|
|
41
41
|
*
|
|
42
|
-
* @param
|
|
43
|
-
* @param
|
|
42
|
+
* @param audioEnabled whether this session can receive & produce audio as well as text
|
|
43
|
+
* @param voiceName LLM specific voice name e.g. with OpenAI this could be 'alloy'
|
|
44
44
|
*
|
|
45
45
|
* @exception
|
|
46
46
|
* This method throws new Error(...) if unable to execute successfully for any reason.
|
|
47
|
-
*
|
|
48
|
-
* @example
|
|
49
|
-
* ```TypeScript
|
|
50
|
-
* await startSession("You are useful assistant", true)
|
|
51
|
-
* ```
|
|
52
47
|
*/
|
|
53
|
-
async startSession(
|
|
48
|
+
async startSession(audioEnabled, voiceName) {
|
|
49
|
+
const prepareBody = {
|
|
50
|
+
llmProvider: this.config.llm,
|
|
51
|
+
audioEnabled,
|
|
52
|
+
voiceName
|
|
53
|
+
};
|
|
54
54
|
const response = await fetch(`${this.config.sessionEndpoint}/prepare_session`, {
|
|
55
55
|
method: "POST",
|
|
56
56
|
headers: {
|
|
@@ -58,11 +58,7 @@ var WebClient = class {
|
|
|
58
58
|
"Content-Type": "application/json",
|
|
59
59
|
"Accept": "application/json"
|
|
60
60
|
},
|
|
61
|
-
body: JSON.stringify(
|
|
62
|
-
llmProvider: this.config.llm,
|
|
63
|
-
systemPrompt: instructions,
|
|
64
|
-
audio
|
|
65
|
-
})
|
|
61
|
+
body: JSON.stringify(prepareBody)
|
|
66
62
|
});
|
|
67
63
|
if (!response.ok) {
|
|
68
64
|
throw new Error(`Could not create a new chat session
|
|
@@ -116,7 +112,7 @@ var WebClient = class {
|
|
|
116
112
|
* This method not only opens a websocket connection with the server but also initiates a
|
|
117
113
|
* handshake, where the server explicitly acknowledges and accepts the client connection.
|
|
118
114
|
*
|
|
119
|
-
* @param audio for
|
|
115
|
+
* @param audio for an audio-enabled session, request this streaming connection to include audio events
|
|
120
116
|
* @param handlers callback functions to handle every possible communication events coming from the server
|
|
121
117
|
*
|
|
122
118
|
* @exception
|
|
@@ -125,7 +121,7 @@ var WebClient = class {
|
|
|
125
121
|
* @example
|
|
126
122
|
* ```TypeScript
|
|
127
123
|
* // Open an audio connection: in this example we choose to handle only audio data, and ignore text.
|
|
128
|
-
* await
|
|
124
|
+
* await joinSession(true, {
|
|
129
125
|
* onStreamClosed: (reason: string) => { console.log("Stream connection lost"); },
|
|
130
126
|
* onAudioResponseStart: () => { console.log("The model is talking"); },
|
|
131
127
|
* onAudioResponseChunk: (audioChunk: string) => { audioPlayer.enqueue(audioChunk); },
|
|
@@ -152,27 +148,30 @@ var WebClient = class {
|
|
|
152
148
|
handlers.onStreamClosed(`WebSocket closed by peer: ${event.reason}`);
|
|
153
149
|
};
|
|
154
150
|
ws.onmessage = async (event) => {
|
|
155
|
-
var _a, _b, _c, _d, _e, _f;
|
|
151
|
+
var _a, _b, _c, _d, _e, _f, _g;
|
|
156
152
|
try {
|
|
157
153
|
const data = JSON.parse(event.data);
|
|
158
154
|
switch (data.type) {
|
|
155
|
+
case "session.configured":
|
|
156
|
+
(_a = handlers.onSessionConfigured) == null ? void 0 : _a.call(handlers, data);
|
|
157
|
+
break;
|
|
159
158
|
case "audio.response.start":
|
|
160
|
-
(
|
|
159
|
+
(_b = handlers.onAudioResponseStart) == null ? void 0 : _b.call(handlers);
|
|
161
160
|
break;
|
|
162
161
|
case "audio.response.append":
|
|
163
|
-
(
|
|
162
|
+
(_c = handlers.onAudioResponseChunk) == null ? void 0 : _c.call(handlers, data.data.audioData);
|
|
164
163
|
break;
|
|
165
164
|
case "audio.response.done":
|
|
166
|
-
(
|
|
165
|
+
(_d = handlers.onAudioResponseEnd) == null ? void 0 : _d.call(handlers);
|
|
167
166
|
break;
|
|
168
167
|
case "transcript.input":
|
|
169
|
-
(
|
|
168
|
+
(_e = handlers.onTranscriptInput) == null ? void 0 : _e.call(handlers, data.data.transcript);
|
|
170
169
|
break;
|
|
171
170
|
case "transcript.response":
|
|
172
|
-
(
|
|
171
|
+
(_f = handlers.onTranscriptResponse) == null ? void 0 : _f.call(handlers, data.data.transcript);
|
|
173
172
|
break;
|
|
174
173
|
case "technical.error":
|
|
175
|
-
(
|
|
174
|
+
(_g = handlers.onTechnicalError) == null ? void 0 : _g.call(handlers, data.data.error);
|
|
176
175
|
break;
|
|
177
176
|
default:
|
|
178
177
|
break;
|
|
@@ -184,7 +183,7 @@ var WebClient = class {
|
|
|
184
183
|
this.ws = ws;
|
|
185
184
|
}
|
|
186
185
|
/**
|
|
187
|
-
*
|
|
186
|
+
* Once a session is joined: send a "system" update of the session settings.
|
|
188
187
|
* Allows to change some behavioral parameters like the temperature or system instructions.
|
|
189
188
|
* This does not trigger a model response.
|
|
190
189
|
*
|
|
@@ -192,16 +191,16 @@ var WebClient = class {
|
|
|
192
191
|
* With openai for example, this triggers (pseudo-code):
|
|
193
192
|
* webSocket.send({ type: "session.update", session: { ... }})
|
|
194
193
|
*
|
|
195
|
-
* @param settings
|
|
194
|
+
* @param settings new system settings to apply. Omitted values will remain unchanged
|
|
196
195
|
*
|
|
197
196
|
* @exception
|
|
198
197
|
* This method throws new Error(...) if unable to execute successfully for any reason.
|
|
199
198
|
*/
|
|
200
|
-
|
|
199
|
+
configureSession(settings) {
|
|
201
200
|
this.send("session.configure", settings);
|
|
202
201
|
}
|
|
203
202
|
/**
|
|
204
|
-
*
|
|
203
|
+
* Once a session is joined: send a text input message to the LLM. This will trigger a
|
|
205
204
|
* text response as well as an audio response if the session was opened with audio mode active.
|
|
206
205
|
*
|
|
207
206
|
* @remarks
|
|
@@ -218,7 +217,7 @@ var WebClient = class {
|
|
|
218
217
|
this.send("text.input.message", { text });
|
|
219
218
|
}
|
|
220
219
|
/**
|
|
221
|
-
*
|
|
220
|
+
* Once a session is joined: send a chunk of raw audio data to the LLM.
|
|
222
221
|
* Audio data chunks do not systematically & immediately trigger a model response:
|
|
223
222
|
* They get accumulated by the model to form a single input message, until:
|
|
224
223
|
* - commitAudioChunksSent is called, which flushes the accumulated audio and triggers a response
|
|
@@ -237,8 +236,8 @@ var WebClient = class {
|
|
|
237
236
|
this.send("audio.input.append", { audioData: chunk });
|
|
238
237
|
}
|
|
239
238
|
/**
|
|
240
|
-
*
|
|
241
|
-
* the last model response. This effectively flushes the audio buffer and triggers a
|
|
239
|
+
* Once a session is joined: triggers the processing of the accumulated audio data since
|
|
240
|
+
* the last model response. This effectively flushes the audio buffer and triggers a model response.
|
|
242
241
|
*
|
|
243
242
|
* @remarks
|
|
244
243
|
* With openai for example, this triggers (pseudo-code):
|
|
@@ -276,14 +275,22 @@ var WebClient = class {
|
|
|
276
275
|
ws.onopen = () => {
|
|
277
276
|
const eventSubs = audio ? [0 /* Text */, 1 /* Audio */] : [0 /* Text */];
|
|
278
277
|
ws.send(JSON.stringify({
|
|
279
|
-
type: "connection.
|
|
280
|
-
data: {
|
|
278
|
+
type: "connection.initiate",
|
|
279
|
+
data: {
|
|
280
|
+
subscription: eventSubs,
|
|
281
|
+
settings: {
|
|
282
|
+
audio: true,
|
|
283
|
+
voice: "alloy",
|
|
284
|
+
temperature: 0.8,
|
|
285
|
+
instructions: ""
|
|
286
|
+
}
|
|
287
|
+
}
|
|
281
288
|
}));
|
|
282
289
|
};
|
|
283
290
|
ws.onmessage = (event) => {
|
|
284
291
|
try {
|
|
285
292
|
const data = JSON.parse(event.data);
|
|
286
|
-
if (data.type === "connection.
|
|
293
|
+
if (data.type === "connection.initiated")
|
|
287
294
|
resolve(true);
|
|
288
295
|
else
|
|
289
296
|
reject(new Error(`Received unexpected event: ${data.type}`));
|
package/dist/index.d.cts
CHANGED
|
@@ -31,17 +31,13 @@ interface WebClientConfig {
|
|
|
31
31
|
}
|
|
32
32
|
/**
|
|
33
33
|
* System settings influencing the model behavior:
|
|
34
|
-
* @audio: to activate voice conversation
|
|
35
|
-
* @voice: depending on the LLM solution, this is the voice name to be used in audio
|
|
36
34
|
* @temperature: LLM creativity factor in 0-1 range
|
|
37
35
|
* @instructions: system instructions giving context, rules and directions to guide the LLM behavior
|
|
38
36
|
* @maxResponseToken: 1-4096 value, maximum number of token used for a single response. Undefined means unlimited.
|
|
39
37
|
*/
|
|
40
|
-
interface
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
temperature: number;
|
|
44
|
-
instructions: string;
|
|
38
|
+
interface SessionConfig {
|
|
39
|
+
temperature?: number;
|
|
40
|
+
instructions?: string;
|
|
45
41
|
maxResponseToken?: number;
|
|
46
42
|
}
|
|
47
43
|
/**
|
|
@@ -66,7 +62,7 @@ interface SessionSettings {
|
|
|
66
62
|
*/
|
|
67
63
|
interface EventHandlers {
|
|
68
64
|
onStreamClosed: (reason: string) => void;
|
|
69
|
-
onSessionConfigured?: (settings:
|
|
65
|
+
onSessionConfigured?: (settings: SessionConfig) => void;
|
|
70
66
|
onAudioResponseStart?: () => void;
|
|
71
67
|
onAudioResponseChunk?: (audioChunk: string) => void;
|
|
72
68
|
onAudioResponseEnd?: () => void;
|
|
@@ -93,20 +89,15 @@ declare class WebClient {
|
|
|
93
89
|
* be closed to release the context on server side. See endSession().
|
|
94
90
|
*
|
|
95
91
|
* @remarks
|
|
96
|
-
* A session MUST exist to connect the stream.
|
|
92
|
+
* A session MUST exist first in order to connect the stream next.
|
|
97
93
|
*
|
|
98
|
-
* @param
|
|
99
|
-
* @param
|
|
94
|
+
* @param audioEnabled whether this session can receive & produce audio as well as text
|
|
95
|
+
* @param voiceName LLM specific voice name e.g. with OpenAI this could be 'alloy'
|
|
100
96
|
*
|
|
101
97
|
* @exception
|
|
102
98
|
* This method throws new Error(...) if unable to execute successfully for any reason.
|
|
103
|
-
*
|
|
104
|
-
* @example
|
|
105
|
-
* ```TypeScript
|
|
106
|
-
* await startSession("You are useful assistant", true)
|
|
107
|
-
* ```
|
|
108
99
|
*/
|
|
109
|
-
startSession(
|
|
100
|
+
startSession(audioEnabled: boolean, voiceName?: string): Promise<void>;
|
|
110
101
|
/**
|
|
111
102
|
* Close an opened, persistent chat room, effectively killing the streaming as well if still opened.
|
|
112
103
|
* If there is no active session, this method does nothing.
|
|
@@ -128,7 +119,7 @@ declare class WebClient {
|
|
|
128
119
|
* This method not only opens a websocket connection with the server but also initiates a
|
|
129
120
|
* handshake, where the server explicitly acknowledges and accepts the client connection.
|
|
130
121
|
*
|
|
131
|
-
* @param audio for
|
|
122
|
+
* @param audio for an audio-enabled session, request this streaming connection to include audio events
|
|
132
123
|
* @param handlers callback functions to handle every possible communication events coming from the server
|
|
133
124
|
*
|
|
134
125
|
* @exception
|
|
@@ -137,7 +128,7 @@ declare class WebClient {
|
|
|
137
128
|
* @example
|
|
138
129
|
* ```TypeScript
|
|
139
130
|
* // Open an audio connection: in this example we choose to handle only audio data, and ignore text.
|
|
140
|
-
* await
|
|
131
|
+
* await joinSession(true, {
|
|
141
132
|
* onStreamClosed: (reason: string) => { console.log("Stream connection lost"); },
|
|
142
133
|
* onAudioResponseStart: () => { console.log("The model is talking"); },
|
|
143
134
|
* onAudioResponseChunk: (audioChunk: string) => { audioPlayer.enqueue(audioChunk); },
|
|
@@ -147,7 +138,7 @@ declare class WebClient {
|
|
|
147
138
|
*/
|
|
148
139
|
joinSession(audio: boolean | undefined, handlers: EventHandlers): Promise<void>;
|
|
149
140
|
/**
|
|
150
|
-
*
|
|
141
|
+
* Once a session is joined: send a "system" update of the session settings.
|
|
151
142
|
* Allows to change some behavioral parameters like the temperature or system instructions.
|
|
152
143
|
* This does not trigger a model response.
|
|
153
144
|
*
|
|
@@ -155,14 +146,14 @@ declare class WebClient {
|
|
|
155
146
|
* With openai for example, this triggers (pseudo-code):
|
|
156
147
|
* webSocket.send({ type: "session.update", session: { ... }})
|
|
157
148
|
*
|
|
158
|
-
* @param settings
|
|
149
|
+
* @param settings new system settings to apply. Omitted values will remain unchanged
|
|
159
150
|
*
|
|
160
151
|
* @exception
|
|
161
152
|
* This method throws new Error(...) if unable to execute successfully for any reason.
|
|
162
153
|
*/
|
|
163
|
-
configureSession(settings:
|
|
154
|
+
configureSession(settings: SessionConfig): void;
|
|
164
155
|
/**
|
|
165
|
-
*
|
|
156
|
+
* Once a session is joined: send a text input message to the LLM. This will trigger a
|
|
166
157
|
* text response as well as an audio response if the session was opened with audio mode active.
|
|
167
158
|
*
|
|
168
159
|
* @remarks
|
|
@@ -177,7 +168,7 @@ declare class WebClient {
|
|
|
177
168
|
*/
|
|
178
169
|
sendText(text: string): void;
|
|
179
170
|
/**
|
|
180
|
-
*
|
|
171
|
+
* Once a session is joined: send a chunk of raw audio data to the LLM.
|
|
181
172
|
* Audio data chunks do not systematically & immediately trigger a model response:
|
|
182
173
|
* They get accumulated by the model to form a single input message, until:
|
|
183
174
|
* - commitAudioChunksSent is called, which flushes the accumulated audio and triggers a response
|
|
@@ -194,8 +185,8 @@ declare class WebClient {
|
|
|
194
185
|
*/
|
|
195
186
|
sendAudioChunk(chunk: string): void;
|
|
196
187
|
/**
|
|
197
|
-
*
|
|
198
|
-
* the last model response. This effectively flushes the audio buffer and triggers a
|
|
188
|
+
* Once a session is joined: triggers the processing of the accumulated audio data since
|
|
189
|
+
* the last model response. This effectively flushes the audio buffer and triggers a model response.
|
|
199
190
|
*
|
|
200
191
|
* @remarks
|
|
201
192
|
* With openai for example, this triggers (pseudo-code):
|
|
@@ -215,4 +206,4 @@ declare class WebClient {
|
|
|
215
206
|
private send;
|
|
216
207
|
}
|
|
217
208
|
|
|
218
|
-
export { type EventHandlers, type LlmType, type
|
|
209
|
+
export { type EventHandlers, type LlmType, type SessionConfig, WebClient, type WebClientConfig };
|
package/dist/index.d.ts
CHANGED
|
@@ -31,17 +31,13 @@ interface WebClientConfig {
|
|
|
31
31
|
}
|
|
32
32
|
/**
|
|
33
33
|
* System settings influencing the model behavior:
|
|
34
|
-
* @audio: to activate voice conversation
|
|
35
|
-
* @voice: depending on the LLM solution, this is the voice name to be used in audio
|
|
36
34
|
* @temperature: LLM creativity factor in 0-1 range
|
|
37
35
|
* @instructions: system instructions giving context, rules and directions to guide the LLM behavior
|
|
38
36
|
* @maxResponseToken: 1-4096 value, maximum number of token used for a single response. Undefined means unlimited.
|
|
39
37
|
*/
|
|
40
|
-
interface
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
temperature: number;
|
|
44
|
-
instructions: string;
|
|
38
|
+
interface SessionConfig {
|
|
39
|
+
temperature?: number;
|
|
40
|
+
instructions?: string;
|
|
45
41
|
maxResponseToken?: number;
|
|
46
42
|
}
|
|
47
43
|
/**
|
|
@@ -66,7 +62,7 @@ interface SessionSettings {
|
|
|
66
62
|
*/
|
|
67
63
|
interface EventHandlers {
|
|
68
64
|
onStreamClosed: (reason: string) => void;
|
|
69
|
-
onSessionConfigured?: (settings:
|
|
65
|
+
onSessionConfigured?: (settings: SessionConfig) => void;
|
|
70
66
|
onAudioResponseStart?: () => void;
|
|
71
67
|
onAudioResponseChunk?: (audioChunk: string) => void;
|
|
72
68
|
onAudioResponseEnd?: () => void;
|
|
@@ -93,20 +89,15 @@ declare class WebClient {
|
|
|
93
89
|
* be closed to release the context on server side. See endSession().
|
|
94
90
|
*
|
|
95
91
|
* @remarks
|
|
96
|
-
* A session MUST exist to connect the stream.
|
|
92
|
+
* A session MUST exist first in order to connect the stream next.
|
|
97
93
|
*
|
|
98
|
-
* @param
|
|
99
|
-
* @param
|
|
94
|
+
* @param audioEnabled whether this session can receive & produce audio as well as text
|
|
95
|
+
* @param voiceName LLM specific voice name e.g. with OpenAI this could be 'alloy'
|
|
100
96
|
*
|
|
101
97
|
* @exception
|
|
102
98
|
* This method throws new Error(...) if unable to execute successfully for any reason.
|
|
103
|
-
*
|
|
104
|
-
* @example
|
|
105
|
-
* ```TypeScript
|
|
106
|
-
* await startSession("You are useful assistant", true)
|
|
107
|
-
* ```
|
|
108
99
|
*/
|
|
109
|
-
startSession(
|
|
100
|
+
startSession(audioEnabled: boolean, voiceName?: string): Promise<void>;
|
|
110
101
|
/**
|
|
111
102
|
* Close an opened, persistent chat room, effectively killing the streaming as well if still opened.
|
|
112
103
|
* If there is no active session, this method does nothing.
|
|
@@ -128,7 +119,7 @@ declare class WebClient {
|
|
|
128
119
|
* This method not only opens a websocket connection with the server but also initiates a
|
|
129
120
|
* handshake, where the server explicitly acknowledges and accepts the client connection.
|
|
130
121
|
*
|
|
131
|
-
* @param audio for
|
|
122
|
+
* @param audio for an audio-enabled session, request this streaming connection to include audio events
|
|
132
123
|
* @param handlers callback functions to handle every possible communication events coming from the server
|
|
133
124
|
*
|
|
134
125
|
* @exception
|
|
@@ -137,7 +128,7 @@ declare class WebClient {
|
|
|
137
128
|
* @example
|
|
138
129
|
* ```TypeScript
|
|
139
130
|
* // Open an audio connection: in this example we choose to handle only audio data, and ignore text.
|
|
140
|
-
* await
|
|
131
|
+
* await joinSession(true, {
|
|
141
132
|
* onStreamClosed: (reason: string) => { console.log("Stream connection lost"); },
|
|
142
133
|
* onAudioResponseStart: () => { console.log("The model is talking"); },
|
|
143
134
|
* onAudioResponseChunk: (audioChunk: string) => { audioPlayer.enqueue(audioChunk); },
|
|
@@ -147,7 +138,7 @@ declare class WebClient {
|
|
|
147
138
|
*/
|
|
148
139
|
joinSession(audio: boolean | undefined, handlers: EventHandlers): Promise<void>;
|
|
149
140
|
/**
|
|
150
|
-
*
|
|
141
|
+
* Once a session is joined: send a "system" update of the session settings.
|
|
151
142
|
* Allows to change some behavioral parameters like the temperature or system instructions.
|
|
152
143
|
* This does not trigger a model response.
|
|
153
144
|
*
|
|
@@ -155,14 +146,14 @@ declare class WebClient {
|
|
|
155
146
|
* With openai for example, this triggers (pseudo-code):
|
|
156
147
|
* webSocket.send({ type: "session.update", session: { ... }})
|
|
157
148
|
*
|
|
158
|
-
* @param settings
|
|
149
|
+
* @param settings new system settings to apply. Omitted values will remain unchanged
|
|
159
150
|
*
|
|
160
151
|
* @exception
|
|
161
152
|
* This method throws new Error(...) if unable to execute successfully for any reason.
|
|
162
153
|
*/
|
|
163
|
-
configureSession(settings:
|
|
154
|
+
configureSession(settings: SessionConfig): void;
|
|
164
155
|
/**
|
|
165
|
-
*
|
|
156
|
+
* Once a session is joined: send a text input message to the LLM. This will trigger a
|
|
166
157
|
* text response as well as an audio response if the session was opened with audio mode active.
|
|
167
158
|
*
|
|
168
159
|
* @remarks
|
|
@@ -177,7 +168,7 @@ declare class WebClient {
|
|
|
177
168
|
*/
|
|
178
169
|
sendText(text: string): void;
|
|
179
170
|
/**
|
|
180
|
-
*
|
|
171
|
+
* Once a session is joined: send a chunk of raw audio data to the LLM.
|
|
181
172
|
* Audio data chunks do not systematically & immediately trigger a model response:
|
|
182
173
|
* They get accumulated by the model to form a single input message, until:
|
|
183
174
|
* - commitAudioChunksSent is called, which flushes the accumulated audio and triggers a response
|
|
@@ -194,8 +185,8 @@ declare class WebClient {
|
|
|
194
185
|
*/
|
|
195
186
|
sendAudioChunk(chunk: string): void;
|
|
196
187
|
/**
|
|
197
|
-
*
|
|
198
|
-
* the last model response. This effectively flushes the audio buffer and triggers a
|
|
188
|
+
* Once a session is joined: triggers the processing of the accumulated audio data since
|
|
189
|
+
* the last model response. This effectively flushes the audio buffer and triggers a model response.
|
|
199
190
|
*
|
|
200
191
|
* @remarks
|
|
201
192
|
* With openai for example, this triggers (pseudo-code):
|
|
@@ -215,4 +206,4 @@ declare class WebClient {
|
|
|
215
206
|
private send;
|
|
216
207
|
}
|
|
217
208
|
|
|
218
|
-
export { type EventHandlers, type LlmType, type
|
|
209
|
+
export { type EventHandlers, type LlmType, type SessionConfig, WebClient, type WebClientConfig };
|
package/dist/index.js
CHANGED
|
@@ -11,20 +11,20 @@ var WebClient = class {
|
|
|
11
11
|
* be closed to release the context on server side. See endSession().
|
|
12
12
|
*
|
|
13
13
|
* @remarks
|
|
14
|
-
* A session MUST exist to connect the stream.
|
|
14
|
+
* A session MUST exist first in order to connect the stream next.
|
|
15
15
|
*
|
|
16
|
-
* @param
|
|
17
|
-
* @param
|
|
16
|
+
* @param audioEnabled whether this session can receive & produce audio as well as text
|
|
17
|
+
* @param voiceName LLM specific voice name e.g. with OpenAI this could be 'alloy'
|
|
18
18
|
*
|
|
19
19
|
* @exception
|
|
20
20
|
* This method throws new Error(...) if unable to execute successfully for any reason.
|
|
21
|
-
*
|
|
22
|
-
* @example
|
|
23
|
-
* ```TypeScript
|
|
24
|
-
* await startSession("You are useful assistant", true)
|
|
25
|
-
* ```
|
|
26
21
|
*/
|
|
27
|
-
async startSession(
|
|
22
|
+
async startSession(audioEnabled, voiceName) {
|
|
23
|
+
const prepareBody = {
|
|
24
|
+
llmProvider: this.config.llm,
|
|
25
|
+
audioEnabled,
|
|
26
|
+
voiceName
|
|
27
|
+
};
|
|
28
28
|
const response = await fetch(`${this.config.sessionEndpoint}/prepare_session`, {
|
|
29
29
|
method: "POST",
|
|
30
30
|
headers: {
|
|
@@ -32,11 +32,7 @@ var WebClient = class {
|
|
|
32
32
|
"Content-Type": "application/json",
|
|
33
33
|
"Accept": "application/json"
|
|
34
34
|
},
|
|
35
|
-
body: JSON.stringify(
|
|
36
|
-
llmProvider: this.config.llm,
|
|
37
|
-
systemPrompt: instructions,
|
|
38
|
-
audio
|
|
39
|
-
})
|
|
35
|
+
body: JSON.stringify(prepareBody)
|
|
40
36
|
});
|
|
41
37
|
if (!response.ok) {
|
|
42
38
|
throw new Error(`Could not create a new chat session
|
|
@@ -90,7 +86,7 @@ var WebClient = class {
|
|
|
90
86
|
* This method not only opens a websocket connection with the server but also initiates a
|
|
91
87
|
* handshake, where the server explicitly acknowledges and accepts the client connection.
|
|
92
88
|
*
|
|
93
|
-
* @param audio for
|
|
89
|
+
* @param audio for an audio-enabled session, request this streaming connection to include audio events
|
|
94
90
|
* @param handlers callback functions to handle every possible communication events coming from the server
|
|
95
91
|
*
|
|
96
92
|
* @exception
|
|
@@ -99,7 +95,7 @@ var WebClient = class {
|
|
|
99
95
|
* @example
|
|
100
96
|
* ```TypeScript
|
|
101
97
|
* // Open an audio connection: in this example we choose to handle only audio data, and ignore text.
|
|
102
|
-
* await
|
|
98
|
+
* await joinSession(true, {
|
|
103
99
|
* onStreamClosed: (reason: string) => { console.log("Stream connection lost"); },
|
|
104
100
|
* onAudioResponseStart: () => { console.log("The model is talking"); },
|
|
105
101
|
* onAudioResponseChunk: (audioChunk: string) => { audioPlayer.enqueue(audioChunk); },
|
|
@@ -126,27 +122,30 @@ var WebClient = class {
|
|
|
126
122
|
handlers.onStreamClosed(`WebSocket closed by peer: ${event.reason}`);
|
|
127
123
|
};
|
|
128
124
|
ws.onmessage = async (event) => {
|
|
129
|
-
var _a, _b, _c, _d, _e, _f;
|
|
125
|
+
var _a, _b, _c, _d, _e, _f, _g;
|
|
130
126
|
try {
|
|
131
127
|
const data = JSON.parse(event.data);
|
|
132
128
|
switch (data.type) {
|
|
129
|
+
case "session.configured":
|
|
130
|
+
(_a = handlers.onSessionConfigured) == null ? void 0 : _a.call(handlers, data);
|
|
131
|
+
break;
|
|
133
132
|
case "audio.response.start":
|
|
134
|
-
(
|
|
133
|
+
(_b = handlers.onAudioResponseStart) == null ? void 0 : _b.call(handlers);
|
|
135
134
|
break;
|
|
136
135
|
case "audio.response.append":
|
|
137
|
-
(
|
|
136
|
+
(_c = handlers.onAudioResponseChunk) == null ? void 0 : _c.call(handlers, data.data.audioData);
|
|
138
137
|
break;
|
|
139
138
|
case "audio.response.done":
|
|
140
|
-
(
|
|
139
|
+
(_d = handlers.onAudioResponseEnd) == null ? void 0 : _d.call(handlers);
|
|
141
140
|
break;
|
|
142
141
|
case "transcript.input":
|
|
143
|
-
(
|
|
142
|
+
(_e = handlers.onTranscriptInput) == null ? void 0 : _e.call(handlers, data.data.transcript);
|
|
144
143
|
break;
|
|
145
144
|
case "transcript.response":
|
|
146
|
-
(
|
|
145
|
+
(_f = handlers.onTranscriptResponse) == null ? void 0 : _f.call(handlers, data.data.transcript);
|
|
147
146
|
break;
|
|
148
147
|
case "technical.error":
|
|
149
|
-
(
|
|
148
|
+
(_g = handlers.onTechnicalError) == null ? void 0 : _g.call(handlers, data.data.error);
|
|
150
149
|
break;
|
|
151
150
|
default:
|
|
152
151
|
break;
|
|
@@ -158,7 +157,7 @@ var WebClient = class {
|
|
|
158
157
|
this.ws = ws;
|
|
159
158
|
}
|
|
160
159
|
/**
|
|
161
|
-
*
|
|
160
|
+
* Once a session is joined: send a "system" update of the session settings.
|
|
162
161
|
* Allows to change some behavioral parameters like the temperature or system instructions.
|
|
163
162
|
* This does not trigger a model response.
|
|
164
163
|
*
|
|
@@ -166,16 +165,16 @@ var WebClient = class {
|
|
|
166
165
|
* With openai for example, this triggers (pseudo-code):
|
|
167
166
|
* webSocket.send({ type: "session.update", session: { ... }})
|
|
168
167
|
*
|
|
169
|
-
* @param settings
|
|
168
|
+
* @param settings new system settings to apply. Omitted values will remain unchanged
|
|
170
169
|
*
|
|
171
170
|
* @exception
|
|
172
171
|
* This method throws new Error(...) if unable to execute successfully for any reason.
|
|
173
172
|
*/
|
|
174
|
-
|
|
173
|
+
configureSession(settings) {
|
|
175
174
|
this.send("session.configure", settings);
|
|
176
175
|
}
|
|
177
176
|
/**
|
|
178
|
-
*
|
|
177
|
+
* Once a session is joined: send a text input message to the LLM. This will trigger a
|
|
179
178
|
* text response as well as an audio response if the session was opened with audio mode active.
|
|
180
179
|
*
|
|
181
180
|
* @remarks
|
|
@@ -192,7 +191,7 @@ var WebClient = class {
|
|
|
192
191
|
this.send("text.input.message", { text });
|
|
193
192
|
}
|
|
194
193
|
/**
|
|
195
|
-
*
|
|
194
|
+
* Once a session is joined: send a chunk of raw audio data to the LLM.
|
|
196
195
|
* Audio data chunks do not systematically & immediately trigger a model response:
|
|
197
196
|
* They get accumulated by the model to form a single input message, until:
|
|
198
197
|
* - commitAudioChunksSent is called, which flushes the accumulated audio and triggers a response
|
|
@@ -211,8 +210,8 @@ var WebClient = class {
|
|
|
211
210
|
this.send("audio.input.append", { audioData: chunk });
|
|
212
211
|
}
|
|
213
212
|
/**
|
|
214
|
-
*
|
|
215
|
-
* the last model response. This effectively flushes the audio buffer and triggers a
|
|
213
|
+
* Once a session is joined: triggers the processing of the accumulated audio data since
|
|
214
|
+
* the last model response. This effectively flushes the audio buffer and triggers a model response.
|
|
216
215
|
*
|
|
217
216
|
* @remarks
|
|
218
217
|
* With openai for example, this triggers (pseudo-code):
|
|
@@ -250,14 +249,22 @@ var WebClient = class {
|
|
|
250
249
|
ws.onopen = () => {
|
|
251
250
|
const eventSubs = audio ? [0 /* Text */, 1 /* Audio */] : [0 /* Text */];
|
|
252
251
|
ws.send(JSON.stringify({
|
|
253
|
-
type: "connection.
|
|
254
|
-
data: {
|
|
252
|
+
type: "connection.initiate",
|
|
253
|
+
data: {
|
|
254
|
+
subscription: eventSubs,
|
|
255
|
+
settings: {
|
|
256
|
+
audio: true,
|
|
257
|
+
voice: "alloy",
|
|
258
|
+
temperature: 0.8,
|
|
259
|
+
instructions: ""
|
|
260
|
+
}
|
|
261
|
+
}
|
|
255
262
|
}));
|
|
256
263
|
};
|
|
257
264
|
ws.onmessage = (event) => {
|
|
258
265
|
try {
|
|
259
266
|
const data = JSON.parse(event.data);
|
|
260
|
-
if (data.type === "connection.
|
|
267
|
+
if (data.type === "connection.initiated")
|
|
261
268
|
resolve(true);
|
|
262
269
|
else
|
|
263
270
|
reject(new Error(`Received unexpected event: ${data.type}`));
|