@simfinity/constellation-client 1.0.0 → 1.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +125 -0
- package/dist/index.cjs +264 -8
- package/dist/index.d.cts +185 -7
- package/dist/index.d.ts +185 -7
- package/dist/index.js +263 -7
- package/package.json +2 -6
package/README.md
ADDED
|
@@ -0,0 +1,125 @@
|
|
|
1
|
+
# @simfinity/constellation-client
|
|
2
|
+
|
|
3
|
+
## Installation
|
|
4
|
+
|
|
5
|
+
```bash
|
|
6
|
+
npm install @simfinity/constellation-client
|
|
7
|
+
# or
|
|
8
|
+
yarn add @simfinity/constellation-client
|
|
9
|
+
```
|
|
10
|
+
|
|
11
|
+
## Purpose & Usage
|
|
12
|
+
This package is a code wrapper to integrate the Simfinity constellation server.
|
|
13
|
+
The constellation server is a proxy managing streaming sessions with third party LLMs.
|
|
14
|
+
This package provides the programmatic functions covering the complete lifecycle of a streaming session:
|
|
15
|
+
- Open/start session
|
|
16
|
+
- Callbacks to continuously send and receive streamed data over a persistent connection
|
|
17
|
+
- Close/end session
|
|
18
|
+
|
|
19
|
+
### Server implementation insight
|
|
20
|
+
The Constellation server is chat-room & session manager: upon receiving a session-start request,
|
|
21
|
+
it creates a persistent chat-room, initiates the persistent connection with the LLM and configures it accordingly
|
|
22
|
+
(e.g. system instruction, temperature, audio, transcript subscription...).
|
|
23
|
+
Clients may lose connection with Constellation, but the chat-room will remain opened on server side,
|
|
24
|
+
this allows the client to reconnect and resume the session.
|
|
25
|
+
Clients MUST notify the server that a session has ended, so that Constellation can release allocated resources.
|
|
26
|
+
|
|
27
|
+
### Example
|
|
28
|
+
Key steps in pseudo-code:
|
|
29
|
+
```TypeScript
|
|
30
|
+
const client = new WebClient({
|
|
31
|
+
sessionEndpoint: "https://simfinity.constellation.com",
|
|
32
|
+
streamingEndpoint: "wss://simfinity.constellation.com:30003",
|
|
33
|
+
key: "my-key",
|
|
34
|
+
llm: "openai",
|
|
35
|
+
model: "gpt-4o-realtime-preview-2024-12-17",
|
|
36
|
+
});
|
|
37
|
+
|
|
38
|
+
try {
|
|
39
|
+
/* ... */
|
|
40
|
+
|
|
41
|
+
// Start a chat session
|
|
42
|
+
await startSession("You are a useful assistant", true);
|
|
43
|
+
await connect(true, {
|
|
44
|
+
onStreamClosed: (reason: string) => {
|
|
45
|
+
console.log("Stream connection lost");
|
|
46
|
+
},
|
|
47
|
+
onAudioResponseStart: () => {
|
|
48
|
+
console.log("The model is talking");
|
|
49
|
+
},
|
|
50
|
+
onAudioResponseChunk: (audioChunk: string) => {
|
|
51
|
+
audioPlayer.enqueue(audioChunk);
|
|
52
|
+
},
|
|
53
|
+
onAudioResponseEnd: () => {
|
|
54
|
+
console.log("The model is done talking");
|
|
55
|
+
}
|
|
56
|
+
});
|
|
57
|
+
|
|
58
|
+
/* ... */
|
|
59
|
+
|
|
60
|
+
sendAudioChunk("{PCM16 Base64-encoded data}");
|
|
61
|
+
commitAudioChunksSent();
|
|
62
|
+
|
|
63
|
+
/* ... */
|
|
64
|
+
}
|
|
65
|
+
catch {
|
|
66
|
+
|
|
67
|
+
}
|
|
68
|
+
finally {
|
|
69
|
+
await endSession();
|
|
70
|
+
}
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
### Types
|
|
74
|
+
|
|
75
|
+
**Configuration**
|
|
76
|
+
|
|
77
|
+
Configuration required to initiate a connection with the server:
|
|
78
|
+
In the client, values would typically be stored in secret stores & environment variables
|
|
79
|
+
```TypeScript
|
|
80
|
+
export interface WebClientConfig {
|
|
81
|
+
sessionEndpoint: string;
|
|
82
|
+
streamingEndpoint: string;
|
|
83
|
+
key: string;
|
|
84
|
+
llm: LlmType;
|
|
85
|
+
model: string;
|
|
86
|
+
}
|
|
87
|
+
```
|
|
88
|
+
|
|
89
|
+
**Event hooks**
|
|
90
|
+
|
|
91
|
+
Callback functions to catch all the propagated server events. Except for the
|
|
92
|
+
onStreamClosed event, assigning hooks is optional:
|
|
93
|
+
non-observed events will be silently ignored & lost.
|
|
94
|
+
```TypeScript
|
|
95
|
+
export interface EventHandlers {
|
|
96
|
+
onStreamClosed: (reason: string) => void;
|
|
97
|
+
onAudioResponseStart?: () => void;
|
|
98
|
+
onAudioResponseChunk?: (audioChunk: string) => void;
|
|
99
|
+
onAudioResponseEnd?: () => void;
|
|
100
|
+
onTranscriptInput?: (transcript: string) => void;
|
|
101
|
+
onTranscriptResponse?: (transcript: string) => void;
|
|
102
|
+
onTechnicalError?: (error: string) => void;
|
|
103
|
+
}
|
|
104
|
+
```
|
|
105
|
+
|
|
106
|
+
### Audio
|
|
107
|
+
|
|
108
|
+
* The server expect exclusively base64 encoded PCM16 format & sends responses of the same format in return
|
|
109
|
+
* The server implements VAD - voice activation detection. Configured to detect 1s silences as a response trigger
|
|
110
|
+
* Therefore, input audio data chunks can be streamed immediately without buffering
|
|
111
|
+
* Client should however implement voice detection as well to reduce network consumption
|
|
112
|
+
* 500ms ring buffer continuously filled with audio input
|
|
113
|
+
* Noise detection with minimum threshold
|
|
114
|
+
*
|
|
115
|
+
|
|
116
|
+
### Text & Transcript
|
|
117
|
+
|
|
118
|
+
* The transcript inputs/responses callbacks carry both the text exchanges and transcription of audio exchanges
|
|
119
|
+
* In an audio session, text and audio inputs will trigger:
|
|
120
|
+
* a mirrored transcript text through onTranscriptInput
|
|
121
|
+
* an audio response through onAudioResponseChunk
|
|
122
|
+
* a text transcript of the audio response through onTranscriptResponse
|
|
123
|
+
* In a text-only session, a text input will trigger:
|
|
124
|
+
* a mirrored transcript text through onTranscriptInput
|
|
125
|
+
* a text response through the onTranscriptResponse callback
|
package/dist/index.cjs
CHANGED
|
@@ -20,17 +20,273 @@ var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: tru
|
|
|
20
20
|
// src/index.ts
|
|
21
21
|
var index_exports = {};
|
|
22
22
|
__export(index_exports, {
|
|
23
|
-
|
|
23
|
+
WebClient: () => WebClient_default
|
|
24
24
|
});
|
|
25
25
|
module.exports = __toCommonJS(index_exports);
|
|
26
26
|
|
|
27
|
-
// src/
|
|
28
|
-
var
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
27
|
+
// src/WebClient.ts
|
|
28
|
+
var WebClient = class {
|
|
29
|
+
constructor(config) {
|
|
30
|
+
this.ws = null;
|
|
31
|
+
this.sessionId = null;
|
|
32
|
+
this.config = config;
|
|
33
|
+
}
|
|
34
|
+
/**
|
|
35
|
+
* Start a persistent chat room on the server, allowing for re-connection,
|
|
36
|
+
* when the streaming connection is lost. Once a session was started it must
|
|
37
|
+
* be closed to release the context on server side. See endSession().
|
|
38
|
+
*
|
|
39
|
+
* @remarks
|
|
40
|
+
* A session MUST exist to connect the stream.
|
|
41
|
+
*
|
|
42
|
+
* @param instructions to the model, added to its context. This is the "system" input instructions.
|
|
43
|
+
* @param audio whether to allow audio streaming or text-only
|
|
44
|
+
*
|
|
45
|
+
* @exception
|
|
46
|
+
* This method throws new Error(...) if unable to execute successfully for any reason.
|
|
47
|
+
*
|
|
48
|
+
* @example
|
|
49
|
+
* ```TypeScript
|
|
50
|
+
* await startSession("You are useful assistant", true)
|
|
51
|
+
* ```
|
|
52
|
+
*/
|
|
53
|
+
async startSession(instructions = "", audio = false) {
|
|
54
|
+
const response = await fetch(`${this.config.sessionEndpoint}/prepare_session`, {
|
|
55
|
+
method: "POST",
|
|
56
|
+
headers: {
|
|
57
|
+
"Authorization": `Bearer ${this.config.key}`,
|
|
58
|
+
"Content-Type": "application/json",
|
|
59
|
+
"Accept": "application/json"
|
|
60
|
+
},
|
|
61
|
+
body: JSON.stringify({
|
|
62
|
+
llmProvider: this.config.llm,
|
|
63
|
+
systemPrompt: instructions,
|
|
64
|
+
audio
|
|
65
|
+
})
|
|
66
|
+
});
|
|
67
|
+
if (!response.ok) {
|
|
68
|
+
throw new Error(`Could not create a new chat session
|
|
69
|
+
[${response.status}:${response.statusText}]`);
|
|
70
|
+
}
|
|
71
|
+
try {
|
|
72
|
+
const result = await response.json();
|
|
73
|
+
if (result) {
|
|
74
|
+
this.sessionId = result.sessionId;
|
|
75
|
+
}
|
|
76
|
+
} catch (error) {
|
|
77
|
+
throw `Failed to read session create response: ${error}`;
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
/**
|
|
81
|
+
* Close an opened, persistent chat room, effectively killing the streaming as well if still opened.
|
|
82
|
+
* If there is no active session, this method does nothing.
|
|
83
|
+
*
|
|
84
|
+
* @remarks
|
|
85
|
+
* Not closing an opened session will not prevent from starting a new one, however this could
|
|
86
|
+
* starve the server resources and affect service stability.
|
|
87
|
+
* Make sure to always close an opened session when finished.
|
|
88
|
+
*
|
|
89
|
+
* @exception
|
|
90
|
+
* This method throws new Error(...) if unable to execute successfully for any reason.
|
|
91
|
+
*/
|
|
92
|
+
async endSession() {
|
|
93
|
+
if (!this.sessionId) {
|
|
94
|
+
return;
|
|
95
|
+
}
|
|
96
|
+
const response = await fetch(`${this.config.sessionEndpoint}/end_session`, {
|
|
97
|
+
method: "POST",
|
|
98
|
+
headers: {
|
|
99
|
+
"Authorization": `Bearer ${this.config.key}`,
|
|
100
|
+
"Content-Type": "application/json",
|
|
101
|
+
"Accept": "application/json"
|
|
102
|
+
},
|
|
103
|
+
body: JSON.stringify({ sessionId: this.sessionId })
|
|
104
|
+
});
|
|
105
|
+
if (!response.ok) {
|
|
106
|
+
throw new Error(`Could not close the chat session
|
|
107
|
+
[${response.status}:${response.statusText}]`);
|
|
108
|
+
}
|
|
109
|
+
this.sessionId = "";
|
|
110
|
+
}
|
|
111
|
+
/**
|
|
112
|
+
* Following a successful startSession, open a streaming connection with the server.
|
|
113
|
+
* After a successful call to this connect() method, the client is ready to send & receive events.
|
|
114
|
+
*
|
|
115
|
+
* @remarks
|
|
116
|
+
* This method not only opens a websocket connection with the server but also initiates a
|
|
117
|
+
* handshake, where the server explicitly acknowledges and accepts the client connection.
|
|
118
|
+
*
|
|
119
|
+
* @param audio for a session that created with audio capabilities, allows this streaming to include audio
|
|
120
|
+
* @param handlers callback functions to handle every possible communication events coming from the server
|
|
121
|
+
*
|
|
122
|
+
* @exception
|
|
123
|
+
* This method throws new Error(...) if unable to execute successfully for any reason.
|
|
124
|
+
*
|
|
125
|
+
* @example
|
|
126
|
+
* ```TypeScript
|
|
127
|
+
* // Open an audio connection: in this example we choose to handle only audio data, and ignore text.
|
|
128
|
+
* await connect(true, {
|
|
129
|
+
* onStreamClosed: (reason: string) => { console.log("Stream connection lost"); },
|
|
130
|
+
* onAudioResponseStart: () => { console.log("The model is talking"); },
|
|
131
|
+
* onAudioResponseChunk: (audioChunk: string) => { audioPlayer.enqueue(audioChunk); },
|
|
132
|
+
* onAudioResponseEnd: () => { console.log("The model is done talking"); }
|
|
133
|
+
* })
|
|
134
|
+
* ```
|
|
135
|
+
*/
|
|
136
|
+
async connect(audio = false, handlers) {
|
|
137
|
+
if (!this.sessionId) {
|
|
138
|
+
throw new Error("No open session");
|
|
139
|
+
}
|
|
140
|
+
const ws = new WebSocket(
|
|
141
|
+
`${this.config.streamingEndpoint}/web/${this.sessionId}`,
|
|
142
|
+
["key", this.config.key]
|
|
143
|
+
);
|
|
144
|
+
if (!await this.serverHandShake(ws, audio)) {
|
|
145
|
+
ws.close();
|
|
146
|
+
throw new Error("Unable to establish the connection");
|
|
147
|
+
}
|
|
148
|
+
ws.onerror = (error) => {
|
|
149
|
+
handlers.onStreamClosed(`WebSocket error: ${error}`);
|
|
150
|
+
};
|
|
151
|
+
ws.onclose = (event) => {
|
|
152
|
+
handlers.onStreamClosed(`WebSocket closed by peer: ${event.reason}`);
|
|
153
|
+
};
|
|
154
|
+
ws.onmessage = async (event) => {
|
|
155
|
+
var _a, _b, _c, _d, _e, _f;
|
|
156
|
+
try {
|
|
157
|
+
const data = JSON.parse(event.data);
|
|
158
|
+
switch (data.type) {
|
|
159
|
+
case "audio.response.start":
|
|
160
|
+
(_a = handlers.onAudioResponseStart) == null ? void 0 : _a.call(handlers);
|
|
161
|
+
break;
|
|
162
|
+
case "audio.response.append":
|
|
163
|
+
(_b = handlers.onAudioResponseChunk) == null ? void 0 : _b.call(handlers, data.data.audioData);
|
|
164
|
+
break;
|
|
165
|
+
case "audio.response.done":
|
|
166
|
+
(_c = handlers.onAudioResponseEnd) == null ? void 0 : _c.call(handlers);
|
|
167
|
+
break;
|
|
168
|
+
case "transcript.input":
|
|
169
|
+
(_d = handlers.onTranscriptInput) == null ? void 0 : _d.call(handlers, data.data.transcript);
|
|
170
|
+
break;
|
|
171
|
+
case "transcript.response":
|
|
172
|
+
(_e = handlers.onTranscriptResponse) == null ? void 0 : _e.call(handlers, data.data.transcript);
|
|
173
|
+
break;
|
|
174
|
+
case "technical.error":
|
|
175
|
+
(_f = handlers.onTechnicalError) == null ? void 0 : _f.call(handlers, data.data.error);
|
|
176
|
+
break;
|
|
177
|
+
default:
|
|
178
|
+
break;
|
|
179
|
+
}
|
|
180
|
+
} catch (error) {
|
|
181
|
+
console.error("Error processing message:", error, event.data);
|
|
182
|
+
}
|
|
183
|
+
};
|
|
184
|
+
this.ws = ws;
|
|
185
|
+
}
|
|
186
|
+
/**
|
|
187
|
+
* With an opened streaming connection: send a text input message to the LLM. This will trigger a
|
|
188
|
+
* text response as well as an audio response if the session was opened with audio mode active.
|
|
189
|
+
*
|
|
190
|
+
* @remarks
|
|
191
|
+
* With openai for example, this triggers (pseudo-code):
|
|
192
|
+
* webSocket.send({ type: "conversation.item.create", item: { content: { type: "input_text": text: text }}})
|
|
193
|
+
* webSocket.send({ type: "response.create"})
|
|
194
|
+
*
|
|
195
|
+
* @param text input message
|
|
196
|
+
*
|
|
197
|
+
* @exception
|
|
198
|
+
* This method throws new Error(...) if unable to execute successfully for any reason.
|
|
199
|
+
*/
|
|
200
|
+
sendText(text) {
|
|
201
|
+
this.send("text.input.message", { text });
|
|
202
|
+
}
|
|
203
|
+
/**
|
|
204
|
+
* With an opened streaming connection: send a chunk of raw audio data to the LLM.
|
|
205
|
+
* Audio data chunks do not systematically & immediately trigger a model response:
|
|
206
|
+
* They get accumulated by the model to form a single input message, until:
|
|
207
|
+
* - commitAudioChunksSent is called, which flushes the accumulated audio and triggers a response
|
|
208
|
+
* - silence is detected for more than 1 second, which flushes the accumulated audio and triggers a response
|
|
209
|
+
*
|
|
210
|
+
* @remarks
|
|
211
|
+
* With openai for example, this triggers (pseudo-code):
|
|
212
|
+
* webSocket.send({ type: "input_audio_buffer.append", audio: "...audio data chunk..." })
|
|
213
|
+
*
|
|
214
|
+
* @param chunk base64-encoded pcm16 audio data chunk
|
|
215
|
+
*
|
|
216
|
+
* @exception
|
|
217
|
+
* This method throws new Error(...) if unable to execute successfully for any reason.
|
|
218
|
+
*/
|
|
219
|
+
sendAudioChunk(chunk) {
|
|
220
|
+
this.send("audio.input.append", { audioData: chunk });
|
|
221
|
+
}
|
|
222
|
+
/**
|
|
223
|
+
* With an opened streaming connection: triggers the processing of the accumulated audio data since
|
|
224
|
+
* the last model response. This effectively flushes the audio buffer and triggers a new model response.
|
|
225
|
+
*
|
|
226
|
+
* @remarks
|
|
227
|
+
* With openai for example, this triggers (pseudo-code):
|
|
228
|
+
* webSocket.send({ type: "input_audio_buffer.commit" })
|
|
229
|
+
*
|
|
230
|
+
* Calling commitAudioChunksSent is optional because the constellation server uses server_vad,
|
|
231
|
+
* configured to detect a ~1 second silence, before automatically triggering a model response.
|
|
232
|
+
*
|
|
233
|
+
* Calling commitAudioChunksSent will always trigger a model response, even if no audio
|
|
234
|
+
* data was sent since the last response.
|
|
235
|
+
*
|
|
236
|
+
* @exception
|
|
237
|
+
* This method throws new Error(...) if unable to execute successfully for any reason.
|
|
238
|
+
*/
|
|
239
|
+
commitAudioChunksSent() {
|
|
240
|
+
this.send("audio.input.commit");
|
|
241
|
+
}
|
|
242
|
+
// ================= Inner utils ================= //
|
|
243
|
+
async serverHandShake(ws, audio) {
|
|
244
|
+
return new Promise((resolve, reject) => {
|
|
245
|
+
const timer = setTimeout(() => {
|
|
246
|
+
cleanup();
|
|
247
|
+
reject(new Error("Handshake timeout"));
|
|
248
|
+
}, 5e3);
|
|
249
|
+
const cleanup = () => {
|
|
250
|
+
clearTimeout(timer);
|
|
251
|
+
ws.onopen = null;
|
|
252
|
+
ws.onerror = null;
|
|
253
|
+
ws.onmessage = null;
|
|
254
|
+
};
|
|
255
|
+
ws.onerror = (error) => {
|
|
256
|
+
cleanup();
|
|
257
|
+
reject(new Error(`WebSocket connection failed with ${error.type}`));
|
|
258
|
+
};
|
|
259
|
+
ws.onopen = () => {
|
|
260
|
+
const eventSubs = audio ? [0 /* Text */, 1 /* Audio */] : [0 /* Text */];
|
|
261
|
+
ws.send(JSON.stringify({
|
|
262
|
+
type: "connection.request",
|
|
263
|
+
data: { subscription: eventSubs }
|
|
264
|
+
}));
|
|
265
|
+
};
|
|
266
|
+
ws.onmessage = (event) => {
|
|
267
|
+
try {
|
|
268
|
+
const data = JSON.parse(event.data);
|
|
269
|
+
if (data.type === "connection.accepted")
|
|
270
|
+
resolve(true);
|
|
271
|
+
else
|
|
272
|
+
reject(new Error(`Received unexpected event: ${data.type}`));
|
|
273
|
+
} catch (e) {
|
|
274
|
+
reject(new Error(`An unexpected error occurred: ${e}`));
|
|
275
|
+
} finally {
|
|
276
|
+
cleanup();
|
|
277
|
+
}
|
|
278
|
+
};
|
|
279
|
+
});
|
|
280
|
+
}
|
|
281
|
+
send(type, data = null) {
|
|
282
|
+
if (!this.ws || this.ws.readyState != WebSocket.OPEN) {
|
|
283
|
+
throw new Error("Stream is not opened");
|
|
284
|
+
}
|
|
285
|
+
this.ws.send(JSON.stringify({ type, ...data && { data } }));
|
|
286
|
+
}
|
|
287
|
+
};
|
|
288
|
+
var WebClient_default = WebClient;
|
|
33
289
|
// Annotate the CommonJS export names for ESM import in node:
|
|
34
290
|
0 && (module.exports = {
|
|
35
|
-
|
|
291
|
+
WebClient
|
|
36
292
|
});
|
package/dist/index.d.cts
CHANGED
|
@@ -1,8 +1,186 @@
|
|
|
1
|
-
|
|
1
|
+
/**
|
|
2
|
+
* Available server-side LLM types
|
|
3
|
+
*/
|
|
4
|
+
type LlmType = "openai";
|
|
5
|
+
/**
|
|
6
|
+
* Configuration required to initiate a connection with the stream server:
|
|
7
|
+
*
|
|
8
|
+
* @sessionEndpoint : REST base URL to the constellation API for managing sessions
|
|
9
|
+
* @streamingEndpoint : WebSocket endpoint to the constellation server
|
|
10
|
+
* @key : Simfinity API secret key granting access to the server API
|
|
11
|
+
* @llm : which LLM service to connect to
|
|
12
|
+
* @model : depends on the LLM service. This is the model name as define by the LLM service
|
|
13
|
+
*
|
|
14
|
+
* @example
|
|
15
|
+
* ```TypeScript
|
|
16
|
+
* {
|
|
17
|
+
* sessionEndpoint: "https://simfinity.constellation.com",
|
|
18
|
+
* streamingEndpoint: "wss://simfinity.constellation.com:30003",
|
|
19
|
+
* key: "some-secret-key"
|
|
20
|
+
* llm: "openai",
|
|
21
|
+
* model: "gpt-4o-realtime-preview-2024-12-17"
|
|
22
|
+
* }
|
|
23
|
+
* ```
|
|
24
|
+
*/
|
|
25
|
+
interface WebClientConfig {
|
|
26
|
+
sessionEndpoint: string;
|
|
27
|
+
streamingEndpoint: string;
|
|
28
|
+
key: string;
|
|
29
|
+
llm: LlmType;
|
|
30
|
+
model: string;
|
|
31
|
+
}
|
|
32
|
+
/**
|
|
33
|
+
* Callback functions to catch all the propagated server events.
|
|
34
|
+
*
|
|
35
|
+
* @onStreamClosed the streaming session (web socket) shut down
|
|
36
|
+
* @onAudioResponseStart the LLM service is about to respond with streaming audio data
|
|
37
|
+
* @onAudioResponseChunk a new chunk of response audio data was received
|
|
38
|
+
* @onAudioResponseEnd the model has finished responding. Audio response has been entirely streamed
|
|
39
|
+
* @onTranscriptInput either a copy of a text input, or the transcript of an audio input sent by the client
|
|
40
|
+
* @onTranscriptResponse either a text response (to a text input) or the transcript of an audio response
|
|
41
|
+
* @onTechnicalError any technical issue encountered during the stream
|
|
42
|
+
*
|
|
43
|
+
* @remarks
|
|
44
|
+
* Un-assigned callbacks will not cause exceptions by this client when events are received from the server
|
|
45
|
+
* However the events and information attached will be lost.
|
|
46
|
+
*
|
|
47
|
+
* The transcript events have dual purpose:
|
|
48
|
+
* - In an audio exchange, they hold the text transcripts of the audio conversation
|
|
49
|
+
* - In a text exchange, they hold the actual text messages of the conversation
|
|
50
|
+
*/
|
|
51
|
+
interface EventHandlers {
|
|
52
|
+
onStreamClosed: (reason: string) => void;
|
|
53
|
+
onAudioResponseStart?: () => void;
|
|
54
|
+
onAudioResponseChunk?: (audioChunk: string) => void;
|
|
55
|
+
onAudioResponseEnd?: () => void;
|
|
56
|
+
onTranscriptInput?: (transcript: string) => void;
|
|
57
|
+
onTranscriptResponse?: (transcript: string) => void;
|
|
58
|
+
onTechnicalError?: (error: string) => void;
|
|
59
|
+
}
|
|
60
|
+
/**
|
|
61
|
+
* This class is a code wrapper to integrate the Simfinity constellation server.
|
|
62
|
+
* The constellation server is a proxy managing streaming sessions with third party LLMs.
|
|
63
|
+
* This class manages the complete lifecycle of a streaming session:
|
|
64
|
+
* - Open/start session
|
|
65
|
+
* - Continuously sending and receiving streamed data over persistent connection
|
|
66
|
+
* - Close/end session
|
|
67
|
+
*/
|
|
68
|
+
declare class WebClient {
|
|
69
|
+
private config;
|
|
70
|
+
private ws;
|
|
71
|
+
private sessionId;
|
|
72
|
+
constructor(config: WebClientConfig);
|
|
73
|
+
/**
|
|
74
|
+
* Start a persistent chat room on the server, allowing for re-connection,
|
|
75
|
+
* when the streaming connection is lost. Once a session was started it must
|
|
76
|
+
* be closed to release the context on server side. See endSession().
|
|
77
|
+
*
|
|
78
|
+
* @remarks
|
|
79
|
+
* A session MUST exist to connect the stream.
|
|
80
|
+
*
|
|
81
|
+
* @param instructions to the model, added to its context. This is the "system" input instructions.
|
|
82
|
+
* @param audio whether to allow audio streaming or text-only
|
|
83
|
+
*
|
|
84
|
+
* @exception
|
|
85
|
+
* This method throws new Error(...) if unable to execute successfully for any reason.
|
|
86
|
+
*
|
|
87
|
+
* @example
|
|
88
|
+
* ```TypeScript
|
|
89
|
+
* await startSession("You are useful assistant", true)
|
|
90
|
+
* ```
|
|
91
|
+
*/
|
|
92
|
+
startSession(instructions?: string, audio?: boolean): Promise<void>;
|
|
93
|
+
/**
|
|
94
|
+
* Close an opened, persistent chat room, effectively killing the streaming as well if still opened.
|
|
95
|
+
* If there is no active session, this method does nothing.
|
|
96
|
+
*
|
|
97
|
+
* @remarks
|
|
98
|
+
* Not closing an opened session will not prevent from starting a new one, however this could
|
|
99
|
+
* starve the server resources and affect service stability.
|
|
100
|
+
* Make sure to always close an opened session when finished.
|
|
101
|
+
*
|
|
102
|
+
* @exception
|
|
103
|
+
* This method throws new Error(...) if unable to execute successfully for any reason.
|
|
104
|
+
*/
|
|
105
|
+
endSession(): Promise<void>;
|
|
106
|
+
/**
|
|
107
|
+
* Following a successful startSession, open a streaming connection with the server.
|
|
108
|
+
* After a successful call to this connect() method, the client is ready to send & receive events.
|
|
109
|
+
*
|
|
110
|
+
* @remarks
|
|
111
|
+
* This method not only opens a websocket connection with the server but also initiates a
|
|
112
|
+
* handshake, where the server explicitly acknowledges and accepts the client connection.
|
|
113
|
+
*
|
|
114
|
+
* @param audio for a session that created with audio capabilities, allows this streaming to include audio
|
|
115
|
+
* @param handlers callback functions to handle every possible communication events coming from the server
|
|
116
|
+
*
|
|
117
|
+
* @exception
|
|
118
|
+
* This method throws new Error(...) if unable to execute successfully for any reason.
|
|
119
|
+
*
|
|
120
|
+
* @example
|
|
121
|
+
* ```TypeScript
|
|
122
|
+
* // Open an audio connection: in this example we choose to handle only audio data, and ignore text.
|
|
123
|
+
* await connect(true, {
|
|
124
|
+
* onStreamClosed: (reason: string) => { console.log("Stream connection lost"); },
|
|
125
|
+
* onAudioResponseStart: () => { console.log("The model is talking"); },
|
|
126
|
+
* onAudioResponseChunk: (audioChunk: string) => { audioPlayer.enqueue(audioChunk); },
|
|
127
|
+
* onAudioResponseEnd: () => { console.log("The model is done talking"); }
|
|
128
|
+
* })
|
|
129
|
+
* ```
|
|
130
|
+
*/
|
|
131
|
+
connect(audio: boolean | undefined, handlers: EventHandlers): Promise<void>;
|
|
132
|
+
/**
|
|
133
|
+
* With an opened streaming connection: send a text input message to the LLM. This will trigger a
|
|
134
|
+
* text response as well as an audio response if the session was opened with audio mode active.
|
|
135
|
+
*
|
|
136
|
+
* @remarks
|
|
137
|
+
* With openai for example, this triggers (pseudo-code):
|
|
138
|
+
* webSocket.send({ type: "conversation.item.create", item: { content: { type: "input_text": text: text }}})
|
|
139
|
+
* webSocket.send({ type: "response.create"})
|
|
140
|
+
*
|
|
141
|
+
* @param text input message
|
|
142
|
+
*
|
|
143
|
+
* @exception
|
|
144
|
+
* This method throws new Error(...) if unable to execute successfully for any reason.
|
|
145
|
+
*/
|
|
146
|
+
sendText(text: string): void;
|
|
147
|
+
/**
|
|
148
|
+
* With an opened streaming connection: send a chunk of raw audio data to the LLM.
|
|
149
|
+
* Audio data chunks do not systematically & immediately trigger a model response:
|
|
150
|
+
* They get accumulated by the model to form a single input message, until:
|
|
151
|
+
* - commitAudioChunksSent is called, which flushes the accumulated audio and triggers a response
|
|
152
|
+
* - silence is detected for more than 1 second, which flushes the accumulated audio and triggers a response
|
|
153
|
+
*
|
|
154
|
+
* @remarks
|
|
155
|
+
* With openai for example, this triggers (pseudo-code):
|
|
156
|
+
* webSocket.send({ type: "input_audio_buffer.append", audio: "...audio data chunk..." })
|
|
157
|
+
*
|
|
158
|
+
* @param chunk base64-encoded pcm16 audio data chunk
|
|
159
|
+
*
|
|
160
|
+
* @exception
|
|
161
|
+
* This method throws new Error(...) if unable to execute successfully for any reason.
|
|
162
|
+
*/
|
|
163
|
+
sendAudioChunk(chunk: string): void;
|
|
164
|
+
/**
|
|
165
|
+
* With an opened streaming connection: triggers the processing of the accumulated audio data since
|
|
166
|
+
* the last model response. This effectively flushes the audio buffer and triggers a new model response.
|
|
167
|
+
*
|
|
168
|
+
* @remarks
|
|
169
|
+
* With openai for example, this triggers (pseudo-code):
|
|
170
|
+
* webSocket.send({ type: "input_audio_buffer.commit" })
|
|
171
|
+
*
|
|
172
|
+
* Calling commitAudioChunksSent is optional because the constellation server uses server_vad,
|
|
173
|
+
* configured to detect a ~1 second silence, before automatically triggering a model response.
|
|
174
|
+
*
|
|
175
|
+
* Calling commitAudioChunksSent will always trigger a model response, even if no audio
|
|
176
|
+
* data was sent since the last response.
|
|
177
|
+
*
|
|
178
|
+
* @exception
|
|
179
|
+
* This method throws new Error(...) if unable to execute successfully for any reason.
|
|
180
|
+
*/
|
|
181
|
+
commitAudioChunksSent(): void;
|
|
182
|
+
private serverHandShake;
|
|
183
|
+
private send;
|
|
184
|
+
}
|
|
2
185
|
|
|
3
|
-
type
|
|
4
|
-
label: string;
|
|
5
|
-
};
|
|
6
|
-
declare function Dummy({ label }: DummyProps): react_jsx_runtime.JSX.Element;
|
|
7
|
-
|
|
8
|
-
export { Dummy, type DummyProps };
|
|
186
|
+
export { type EventHandlers, type LlmType, WebClient, type WebClientConfig };
|
package/dist/index.d.ts
CHANGED
|
@@ -1,8 +1,186 @@
|
|
|
1
|
-
|
|
1
|
+
/**
|
|
2
|
+
* Available server-side LLM types
|
|
3
|
+
*/
|
|
4
|
+
type LlmType = "openai";
|
|
5
|
+
/**
|
|
6
|
+
* Configuration required to initiate a connection with the stream server:
|
|
7
|
+
*
|
|
8
|
+
* @sessionEndpoint : REST base URL to the constellation API for managing sessions
|
|
9
|
+
* @streamingEndpoint : WebSocket endpoint to the constellation server
|
|
10
|
+
* @key : Simfinity API secret key granting access to the server API
|
|
11
|
+
* @llm : which LLM service to connect to
|
|
12
|
+
* @model : depends on the LLM service. This is the model name as define by the LLM service
|
|
13
|
+
*
|
|
14
|
+
* @example
|
|
15
|
+
* ```TypeScript
|
|
16
|
+
* {
|
|
17
|
+
* sessionEndpoint: "https://simfinity.constellation.com",
|
|
18
|
+
* streamingEndpoint: "wss://simfinity.constellation.com:30003",
|
|
19
|
+
* key: "some-secret-key"
|
|
20
|
+
* llm: "openai",
|
|
21
|
+
* model: "gpt-4o-realtime-preview-2024-12-17"
|
|
22
|
+
* }
|
|
23
|
+
* ```
|
|
24
|
+
*/
|
|
25
|
+
interface WebClientConfig {
|
|
26
|
+
sessionEndpoint: string;
|
|
27
|
+
streamingEndpoint: string;
|
|
28
|
+
key: string;
|
|
29
|
+
llm: LlmType;
|
|
30
|
+
model: string;
|
|
31
|
+
}
|
|
32
|
+
/**
|
|
33
|
+
* Callback functions to catch all the propagated server events.
|
|
34
|
+
*
|
|
35
|
+
* @onStreamClosed the streaming session (web socket) shut down
|
|
36
|
+
* @onAudioResponseStart the LLM service is about to respond with streaming audio data
|
|
37
|
+
* @onAudioResponseChunk a new chunk of response audio data was received
|
|
38
|
+
* @onAudioResponseEnd the model has finished responding. Audio response has been entirely streamed
|
|
39
|
+
* @onTranscriptInput either a copy of a text input, or the transcript of an audio input sent by the client
|
|
40
|
+
* @onTranscriptResponse either a text response (to a text input) or the transcript of an audio response
|
|
41
|
+
* @onTechnicalError any technical issue encountered during the stream
|
|
42
|
+
*
|
|
43
|
+
* @remarks
|
|
44
|
+
* Un-assigned callbacks will not cause exceptions by this client when events are received from the server
|
|
45
|
+
* However the events and information attached will be lost.
|
|
46
|
+
*
|
|
47
|
+
* The transcript events have dual purpose:
|
|
48
|
+
* - In an audio exchange, they hold the text transcripts of the audio conversation
|
|
49
|
+
* - In a text exchange, they hold the actual text messages of the conversation
|
|
50
|
+
*/
|
|
51
|
+
interface EventHandlers {
|
|
52
|
+
onStreamClosed: (reason: string) => void;
|
|
53
|
+
onAudioResponseStart?: () => void;
|
|
54
|
+
onAudioResponseChunk?: (audioChunk: string) => void;
|
|
55
|
+
onAudioResponseEnd?: () => void;
|
|
56
|
+
onTranscriptInput?: (transcript: string) => void;
|
|
57
|
+
onTranscriptResponse?: (transcript: string) => void;
|
|
58
|
+
onTechnicalError?: (error: string) => void;
|
|
59
|
+
}
|
|
60
|
+
/**
|
|
61
|
+
* This class is a code wrapper to integrate the Simfinity constellation server.
|
|
62
|
+
* The constellation server is a proxy managing streaming sessions with third party LLMs.
|
|
63
|
+
* This class manages the complete lifecycle of a streaming session:
|
|
64
|
+
* - Open/start session
|
|
65
|
+
* - Continuously sending and receiving streamed data over persistent connection
|
|
66
|
+
* - Close/end session
|
|
67
|
+
*/
|
|
68
|
+
declare class WebClient {
|
|
69
|
+
private config;
|
|
70
|
+
private ws;
|
|
71
|
+
private sessionId;
|
|
72
|
+
constructor(config: WebClientConfig);
|
|
73
|
+
/**
|
|
74
|
+
* Start a persistent chat room on the server, allowing for re-connection,
|
|
75
|
+
* when the streaming connection is lost. Once a session was started it must
|
|
76
|
+
* be closed to release the context on server side. See endSession().
|
|
77
|
+
*
|
|
78
|
+
* @remarks
|
|
79
|
+
* A session MUST exist to connect the stream.
|
|
80
|
+
*
|
|
81
|
+
* @param instructions to the model, added to its context. This is the "system" input instructions.
|
|
82
|
+
* @param audio whether to allow audio streaming or text-only
|
|
83
|
+
*
|
|
84
|
+
* @exception
|
|
85
|
+
* This method throws new Error(...) if unable to execute successfully for any reason.
|
|
86
|
+
*
|
|
87
|
+
* @example
|
|
88
|
+
* ```TypeScript
|
|
89
|
+
* await startSession("You are useful assistant", true)
|
|
90
|
+
* ```
|
|
91
|
+
*/
|
|
92
|
+
startSession(instructions?: string, audio?: boolean): Promise<void>;
|
|
93
|
+
/**
|
|
94
|
+
* Close an opened, persistent chat room, effectively killing the streaming as well if still opened.
|
|
95
|
+
* If there is no active session, this method does nothing.
|
|
96
|
+
*
|
|
97
|
+
* @remarks
|
|
98
|
+
* Not closing an opened session will not prevent from starting a new one, however this could
|
|
99
|
+
* starve the server resources and affect service stability.
|
|
100
|
+
* Make sure to always close an opened session when finished.
|
|
101
|
+
*
|
|
102
|
+
* @exception
|
|
103
|
+
* This method throws new Error(...) if unable to execute successfully for any reason.
|
|
104
|
+
*/
|
|
105
|
+
endSession(): Promise<void>;
|
|
106
|
+
/**
|
|
107
|
+
* Following a successful startSession, open a streaming connection with the server.
|
|
108
|
+
* After a successful call to this connect() method, the client is ready to send & receive events.
|
|
109
|
+
*
|
|
110
|
+
* @remarks
|
|
111
|
+
* This method not only opens a websocket connection with the server but also initiates a
|
|
112
|
+
* handshake, where the server explicitly acknowledges and accepts the client connection.
|
|
113
|
+
*
|
|
114
|
+
* @param audio for a session that created with audio capabilities, allows this streaming to include audio
|
|
115
|
+
* @param handlers callback functions to handle every possible communication events coming from the server
|
|
116
|
+
*
|
|
117
|
+
* @exception
|
|
118
|
+
* This method throws new Error(...) if unable to execute successfully for any reason.
|
|
119
|
+
*
|
|
120
|
+
* @example
|
|
121
|
+
* ```TypeScript
|
|
122
|
+
* // Open an audio connection: in this example we choose to handle only audio data, and ignore text.
|
|
123
|
+
* await connect(true, {
|
|
124
|
+
* onStreamClosed: (reason: string) => { console.log("Stream connection lost"); },
|
|
125
|
+
* onAudioResponseStart: () => { console.log("The model is talking"); },
|
|
126
|
+
* onAudioResponseChunk: (audioChunk: string) => { audioPlayer.enqueue(audioChunk); },
|
|
127
|
+
* onAudioResponseEnd: () => { console.log("The model is done talking"); }
|
|
128
|
+
* })
|
|
129
|
+
* ```
|
|
130
|
+
*/
|
|
131
|
+
connect(audio: boolean | undefined, handlers: EventHandlers): Promise<void>;
|
|
132
|
+
/**
|
|
133
|
+
* With an opened streaming connection: send a text input message to the LLM. This will trigger a
|
|
134
|
+
* text response as well as an audio response if the session was opened with audio mode active.
|
|
135
|
+
*
|
|
136
|
+
* @remarks
|
|
137
|
+
* With openai for example, this triggers (pseudo-code):
|
|
138
|
+
* webSocket.send({ type: "conversation.item.create", item: { content: { type: "input_text": text: text }}})
|
|
139
|
+
* webSocket.send({ type: "response.create"})
|
|
140
|
+
*
|
|
141
|
+
* @param text input message
|
|
142
|
+
*
|
|
143
|
+
* @exception
|
|
144
|
+
* This method throws new Error(...) if unable to execute successfully for any reason.
|
|
145
|
+
*/
|
|
146
|
+
sendText(text: string): void;
|
|
147
|
+
/**
|
|
148
|
+
* With an opened streaming connection: send a chunk of raw audio data to the LLM.
|
|
149
|
+
* Audio data chunks do not systematically & immediately trigger a model response:
|
|
150
|
+
* They get accumulated by the model to form a single input message, until:
|
|
151
|
+
* - commitAudioChunksSent is called, which flushes the accumulated audio and triggers a response
|
|
152
|
+
* - silence is detected for more than 1 second, which flushes the accumulated audio and triggers a response
|
|
153
|
+
*
|
|
154
|
+
* @remarks
|
|
155
|
+
* With openai for example, this triggers (pseudo-code):
|
|
156
|
+
* webSocket.send({ type: "input_audio_buffer.append", audio: "...audio data chunk..." })
|
|
157
|
+
*
|
|
158
|
+
* @param chunk base64-encoded pcm16 audio data chunk
|
|
159
|
+
*
|
|
160
|
+
* @exception
|
|
161
|
+
* This method throws new Error(...) if unable to execute successfully for any reason.
|
|
162
|
+
*/
|
|
163
|
+
sendAudioChunk(chunk: string): void;
|
|
164
|
+
/**
|
|
165
|
+
* With an opened streaming connection: triggers the processing of the accumulated audio data since
|
|
166
|
+
* the last model response. This effectively flushes the audio buffer and triggers a new model response.
|
|
167
|
+
*
|
|
168
|
+
* @remarks
|
|
169
|
+
* With openai for example, this triggers (pseudo-code):
|
|
170
|
+
* webSocket.send({ type: "input_audio_buffer.commit" })
|
|
171
|
+
*
|
|
172
|
+
* Calling commitAudioChunksSent is optional because the constellation server uses server_vad,
|
|
173
|
+
* configured to detect a ~1 second silence, before automatically triggering a model response.
|
|
174
|
+
*
|
|
175
|
+
* Calling commitAudioChunksSent will always trigger a model response, even if no audio
|
|
176
|
+
* data was sent since the last response.
|
|
177
|
+
*
|
|
178
|
+
* @exception
|
|
179
|
+
* This method throws new Error(...) if unable to execute successfully for any reason.
|
|
180
|
+
*/
|
|
181
|
+
commitAudioChunksSent(): void;
|
|
182
|
+
private serverHandShake;
|
|
183
|
+
private send;
|
|
184
|
+
}
|
|
2
185
|
|
|
3
|
-
type
|
|
4
|
-
label: string;
|
|
5
|
-
};
|
|
6
|
-
declare function Dummy({ label }: DummyProps): react_jsx_runtime.JSX.Element;
|
|
7
|
-
|
|
8
|
-
export { Dummy, type DummyProps };
|
|
186
|
+
export { type EventHandlers, type LlmType, WebClient, type WebClientConfig };
|
package/dist/index.js
CHANGED
|
@@ -1,9 +1,265 @@
|
|
|
1
|
-
// src/
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
1
|
+
// src/WebClient.ts
|
|
2
|
+
var WebClient = class {
|
|
3
|
+
constructor(config) {
|
|
4
|
+
this.ws = null;
|
|
5
|
+
this.sessionId = null;
|
|
6
|
+
this.config = config;
|
|
7
|
+
}
|
|
8
|
+
/**
|
|
9
|
+
* Start a persistent chat room on the server, allowing for re-connection,
|
|
10
|
+
* when the streaming connection is lost. Once a session was started it must
|
|
11
|
+
* be closed to release the context on server side. See endSession().
|
|
12
|
+
*
|
|
13
|
+
* @remarks
|
|
14
|
+
* A session MUST exist to connect the stream.
|
|
15
|
+
*
|
|
16
|
+
* @param instructions to the model, added to its context. This is the "system" input instructions.
|
|
17
|
+
* @param audio whether to allow audio streaming or text-only
|
|
18
|
+
*
|
|
19
|
+
* @exception
|
|
20
|
+
* This method throws new Error(...) if unable to execute successfully for any reason.
|
|
21
|
+
*
|
|
22
|
+
* @example
|
|
23
|
+
* ```TypeScript
|
|
24
|
+
* await startSession("You are useful assistant", true)
|
|
25
|
+
* ```
|
|
26
|
+
*/
|
|
27
|
+
async startSession(instructions = "", audio = false) {
|
|
28
|
+
const response = await fetch(`${this.config.sessionEndpoint}/prepare_session`, {
|
|
29
|
+
method: "POST",
|
|
30
|
+
headers: {
|
|
31
|
+
"Authorization": `Bearer ${this.config.key}`,
|
|
32
|
+
"Content-Type": "application/json",
|
|
33
|
+
"Accept": "application/json"
|
|
34
|
+
},
|
|
35
|
+
body: JSON.stringify({
|
|
36
|
+
llmProvider: this.config.llm,
|
|
37
|
+
systemPrompt: instructions,
|
|
38
|
+
audio
|
|
39
|
+
})
|
|
40
|
+
});
|
|
41
|
+
if (!response.ok) {
|
|
42
|
+
throw new Error(`Could not create a new chat session
|
|
43
|
+
[${response.status}:${response.statusText}]`);
|
|
44
|
+
}
|
|
45
|
+
try {
|
|
46
|
+
const result = await response.json();
|
|
47
|
+
if (result) {
|
|
48
|
+
this.sessionId = result.sessionId;
|
|
49
|
+
}
|
|
50
|
+
} catch (error) {
|
|
51
|
+
throw `Failed to read session create response: ${error}`;
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
/**
|
|
55
|
+
* Close an opened, persistent chat room, effectively killing the streaming as well if still opened.
|
|
56
|
+
* If there is no active session, this method does nothing.
|
|
57
|
+
*
|
|
58
|
+
* @remarks
|
|
59
|
+
* Not closing an opened session will not prevent from starting a new one, however this could
|
|
60
|
+
* starve the server resources and affect service stability.
|
|
61
|
+
* Make sure to always close an opened session when finished.
|
|
62
|
+
*
|
|
63
|
+
* @exception
|
|
64
|
+
* This method throws new Error(...) if unable to execute successfully for any reason.
|
|
65
|
+
*/
|
|
66
|
+
async endSession() {
|
|
67
|
+
if (!this.sessionId) {
|
|
68
|
+
return;
|
|
69
|
+
}
|
|
70
|
+
const response = await fetch(`${this.config.sessionEndpoint}/end_session`, {
|
|
71
|
+
method: "POST",
|
|
72
|
+
headers: {
|
|
73
|
+
"Authorization": `Bearer ${this.config.key}`,
|
|
74
|
+
"Content-Type": "application/json",
|
|
75
|
+
"Accept": "application/json"
|
|
76
|
+
},
|
|
77
|
+
body: JSON.stringify({ sessionId: this.sessionId })
|
|
78
|
+
});
|
|
79
|
+
if (!response.ok) {
|
|
80
|
+
throw new Error(`Could not close the chat session
|
|
81
|
+
[${response.status}:${response.statusText}]`);
|
|
82
|
+
}
|
|
83
|
+
this.sessionId = "";
|
|
84
|
+
}
|
|
85
|
+
/**
|
|
86
|
+
* Following a successful startSession, open a streaming connection with the server.
|
|
87
|
+
* After a successful call to this connect() method, the client is ready to send & receive events.
|
|
88
|
+
*
|
|
89
|
+
* @remarks
|
|
90
|
+
* This method not only opens a websocket connection with the server but also initiates a
|
|
91
|
+
* handshake, where the server explicitly acknowledges and accepts the client connection.
|
|
92
|
+
*
|
|
93
|
+
* @param audio for a session that created with audio capabilities, allows this streaming to include audio
|
|
94
|
+
* @param handlers callback functions to handle every possible communication events coming from the server
|
|
95
|
+
*
|
|
96
|
+
* @exception
|
|
97
|
+
* This method throws new Error(...) if unable to execute successfully for any reason.
|
|
98
|
+
*
|
|
99
|
+
* @example
|
|
100
|
+
* ```TypeScript
|
|
101
|
+
* // Open an audio connection: in this example we choose to handle only audio data, and ignore text.
|
|
102
|
+
* await connect(true, {
|
|
103
|
+
* onStreamClosed: (reason: string) => { console.log("Stream connection lost"); },
|
|
104
|
+
* onAudioResponseStart: () => { console.log("The model is talking"); },
|
|
105
|
+
* onAudioResponseChunk: (audioChunk: string) => { audioPlayer.enqueue(audioChunk); },
|
|
106
|
+
* onAudioResponseEnd: () => { console.log("The model is done talking"); }
|
|
107
|
+
* })
|
|
108
|
+
* ```
|
|
109
|
+
*/
|
|
110
|
+
async connect(audio = false, handlers) {
|
|
111
|
+
if (!this.sessionId) {
|
|
112
|
+
throw new Error("No open session");
|
|
113
|
+
}
|
|
114
|
+
const ws = new WebSocket(
|
|
115
|
+
`${this.config.streamingEndpoint}/web/${this.sessionId}`,
|
|
116
|
+
["key", this.config.key]
|
|
117
|
+
);
|
|
118
|
+
if (!await this.serverHandShake(ws, audio)) {
|
|
119
|
+
ws.close();
|
|
120
|
+
throw new Error("Unable to establish the connection");
|
|
121
|
+
}
|
|
122
|
+
ws.onerror = (error) => {
|
|
123
|
+
handlers.onStreamClosed(`WebSocket error: ${error}`);
|
|
124
|
+
};
|
|
125
|
+
ws.onclose = (event) => {
|
|
126
|
+
handlers.onStreamClosed(`WebSocket closed by peer: ${event.reason}`);
|
|
127
|
+
};
|
|
128
|
+
ws.onmessage = async (event) => {
|
|
129
|
+
var _a, _b, _c, _d, _e, _f;
|
|
130
|
+
try {
|
|
131
|
+
const data = JSON.parse(event.data);
|
|
132
|
+
switch (data.type) {
|
|
133
|
+
case "audio.response.start":
|
|
134
|
+
(_a = handlers.onAudioResponseStart) == null ? void 0 : _a.call(handlers);
|
|
135
|
+
break;
|
|
136
|
+
case "audio.response.append":
|
|
137
|
+
(_b = handlers.onAudioResponseChunk) == null ? void 0 : _b.call(handlers, data.data.audioData);
|
|
138
|
+
break;
|
|
139
|
+
case "audio.response.done":
|
|
140
|
+
(_c = handlers.onAudioResponseEnd) == null ? void 0 : _c.call(handlers);
|
|
141
|
+
break;
|
|
142
|
+
case "transcript.input":
|
|
143
|
+
(_d = handlers.onTranscriptInput) == null ? void 0 : _d.call(handlers, data.data.transcript);
|
|
144
|
+
break;
|
|
145
|
+
case "transcript.response":
|
|
146
|
+
(_e = handlers.onTranscriptResponse) == null ? void 0 : _e.call(handlers, data.data.transcript);
|
|
147
|
+
break;
|
|
148
|
+
case "technical.error":
|
|
149
|
+
(_f = handlers.onTechnicalError) == null ? void 0 : _f.call(handlers, data.data.error);
|
|
150
|
+
break;
|
|
151
|
+
default:
|
|
152
|
+
break;
|
|
153
|
+
}
|
|
154
|
+
} catch (error) {
|
|
155
|
+
console.error("Error processing message:", error, event.data);
|
|
156
|
+
}
|
|
157
|
+
};
|
|
158
|
+
this.ws = ws;
|
|
159
|
+
}
|
|
160
|
+
/**
|
|
161
|
+
* With an opened streaming connection: send a text input message to the LLM. This will trigger a
|
|
162
|
+
* text response as well as an audio response if the session was opened with audio mode active.
|
|
163
|
+
*
|
|
164
|
+
* @remarks
|
|
165
|
+
* With openai for example, this triggers (pseudo-code):
|
|
166
|
+
* webSocket.send({ type: "conversation.item.create", item: { content: { type: "input_text": text: text }}})
|
|
167
|
+
* webSocket.send({ type: "response.create"})
|
|
168
|
+
*
|
|
169
|
+
* @param text input message
|
|
170
|
+
*
|
|
171
|
+
* @exception
|
|
172
|
+
* This method throws new Error(...) if unable to execute successfully for any reason.
|
|
173
|
+
*/
|
|
174
|
+
sendText(text) {
|
|
175
|
+
this.send("text.input.message", { text });
|
|
176
|
+
}
|
|
177
|
+
/**
|
|
178
|
+
* With an opened streaming connection: send a chunk of raw audio data to the LLM.
|
|
179
|
+
* Audio data chunks do not systematically & immediately trigger a model response:
|
|
180
|
+
* They get accumulated by the model to form a single input message, until:
|
|
181
|
+
* - commitAudioChunksSent is called, which flushes the accumulated audio and triggers a response
|
|
182
|
+
* - silence is detected for more than 1 second, which flushes the accumulated audio and triggers a response
|
|
183
|
+
*
|
|
184
|
+
* @remarks
|
|
185
|
+
* With openai for example, this triggers (pseudo-code):
|
|
186
|
+
* webSocket.send({ type: "input_audio_buffer.append", audio: "...audio data chunk..." })
|
|
187
|
+
*
|
|
188
|
+
* @param chunk base64-encoded pcm16 audio data chunk
|
|
189
|
+
*
|
|
190
|
+
* @exception
|
|
191
|
+
* This method throws new Error(...) if unable to execute successfully for any reason.
|
|
192
|
+
*/
|
|
193
|
+
sendAudioChunk(chunk) {
|
|
194
|
+
this.send("audio.input.append", { audioData: chunk });
|
|
195
|
+
}
|
|
196
|
+
/**
|
|
197
|
+
* With an opened streaming connection: triggers the processing of the accumulated audio data since
|
|
198
|
+
* the last model response. This effectively flushes the audio buffer and triggers a new model response.
|
|
199
|
+
*
|
|
200
|
+
* @remarks
|
|
201
|
+
* With openai for example, this triggers (pseudo-code):
|
|
202
|
+
* webSocket.send({ type: "input_audio_buffer.commit" })
|
|
203
|
+
*
|
|
204
|
+
* Calling commitAudioChunksSent is optional because the constellation server uses server_vad,
|
|
205
|
+
* configured to detect a ~1 second silence, before automatically triggering a model response.
|
|
206
|
+
*
|
|
207
|
+
* Calling commitAudioChunksSent will always trigger a model response, even if no audio
|
|
208
|
+
* data was sent since the last response.
|
|
209
|
+
*
|
|
210
|
+
* @exception
|
|
211
|
+
* This method throws new Error(...) if unable to execute successfully for any reason.
|
|
212
|
+
*/
|
|
213
|
+
commitAudioChunksSent() {
|
|
214
|
+
this.send("audio.input.commit");
|
|
215
|
+
}
|
|
216
|
+
// ================= Inner utils ================= //
|
|
217
|
+
async serverHandShake(ws, audio) {
|
|
218
|
+
return new Promise((resolve, reject) => {
|
|
219
|
+
const timer = setTimeout(() => {
|
|
220
|
+
cleanup();
|
|
221
|
+
reject(new Error("Handshake timeout"));
|
|
222
|
+
}, 5e3);
|
|
223
|
+
const cleanup = () => {
|
|
224
|
+
clearTimeout(timer);
|
|
225
|
+
ws.onopen = null;
|
|
226
|
+
ws.onerror = null;
|
|
227
|
+
ws.onmessage = null;
|
|
228
|
+
};
|
|
229
|
+
ws.onerror = (error) => {
|
|
230
|
+
cleanup();
|
|
231
|
+
reject(new Error(`WebSocket connection failed with ${error.type}`));
|
|
232
|
+
};
|
|
233
|
+
ws.onopen = () => {
|
|
234
|
+
const eventSubs = audio ? [0 /* Text */, 1 /* Audio */] : [0 /* Text */];
|
|
235
|
+
ws.send(JSON.stringify({
|
|
236
|
+
type: "connection.request",
|
|
237
|
+
data: { subscription: eventSubs }
|
|
238
|
+
}));
|
|
239
|
+
};
|
|
240
|
+
ws.onmessage = (event) => {
|
|
241
|
+
try {
|
|
242
|
+
const data = JSON.parse(event.data);
|
|
243
|
+
if (data.type === "connection.accepted")
|
|
244
|
+
resolve(true);
|
|
245
|
+
else
|
|
246
|
+
reject(new Error(`Received unexpected event: ${data.type}`));
|
|
247
|
+
} catch (e) {
|
|
248
|
+
reject(new Error(`An unexpected error occurred: ${e}`));
|
|
249
|
+
} finally {
|
|
250
|
+
cleanup();
|
|
251
|
+
}
|
|
252
|
+
};
|
|
253
|
+
});
|
|
254
|
+
}
|
|
255
|
+
send(type, data = null) {
|
|
256
|
+
if (!this.ws || this.ws.readyState != WebSocket.OPEN) {
|
|
257
|
+
throw new Error("Stream is not opened");
|
|
258
|
+
}
|
|
259
|
+
this.ws.send(JSON.stringify({ type, ...data && { data } }));
|
|
260
|
+
}
|
|
261
|
+
};
|
|
262
|
+
var WebClient_default = WebClient;
|
|
7
263
|
export {
|
|
8
|
-
|
|
264
|
+
WebClient_default as WebClient
|
|
9
265
|
};
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@simfinity/constellation-client",
|
|
3
|
-
"version": "1.0.
|
|
3
|
+
"version": "1.0.2",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"exports": {
|
|
6
6
|
".": {
|
|
@@ -9,7 +9,7 @@
|
|
|
9
9
|
}
|
|
10
10
|
},
|
|
11
11
|
"main": "./dist/index.cjs",
|
|
12
|
-
"module": "./dist/index.
|
|
12
|
+
"module": "./dist/index.js",
|
|
13
13
|
"types": "./dist/index.d.ts",
|
|
14
14
|
"files": [
|
|
15
15
|
"dist"
|
|
@@ -18,16 +18,12 @@
|
|
|
18
18
|
"build": "tsup src/index.ts --format cjs,esm --dts"
|
|
19
19
|
},
|
|
20
20
|
"peerDependencies": {
|
|
21
|
-
"react": "^18.0.0 || ^19.0.0",
|
|
22
|
-
"react-dom": "^18.0.0 || ^19.0.0"
|
|
23
21
|
},
|
|
24
22
|
"author": "Simfinity",
|
|
25
23
|
"license": "MIT",
|
|
26
24
|
"dependencies": {
|
|
27
25
|
},
|
|
28
26
|
"devDependencies": {
|
|
29
|
-
"@types/react": "^19.2.11",
|
|
30
|
-
"@types/react-dom": "^19.2.3",
|
|
31
27
|
"tsup": "^8.5.1",
|
|
32
28
|
"typescript": "^5.9.3"
|
|
33
29
|
}
|