@simfinity/constellation-client 1.0.0 → 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md ADDED
@@ -0,0 +1,125 @@
1
+ # @simfinity/constellation-client
2
+
3
+ ## Installation
4
+
5
+ ```bash
6
+ npm install @simfinity/constellation-client
7
+ # or
8
+ yarn add @simfinity/constellation-client
9
+ ```
10
+
11
+ ## Purpose & Usage
12
+ This package is a code wrapper to integrate the Simfinity constellation server.
13
+ The constellation server is a proxy managing streaming sessions with third party LLMs.
14
+ This package provides the programmatic functions covering the complete lifecycle of a streaming session:
15
+ - Open/start session
16
+ - Callbacks to continuously send and receive streamed data over a persistent connection
17
+ - Close/end session
18
+
19
+ ### Server implementation insight
20
+ The Constellation server is chat-room & session manager: upon receiving a session-start request,
21
+ it creates a persistent chat-room, initiates the persistent connection with the LLM and configures it accordingly
22
+ (e.g. system instruction, temperature, audio, transcript subscription...).
23
+ Clients may lose connection with Constellation, but the chat-room will remain opened on server side,
24
+ this allows the client to reconnect and resume the session.
25
+ Clients MUST notify the server that a session has ended, so that Constellation can release allocated resources.
26
+
27
+ ### Example
28
+ Key steps in pseudo-code:
29
+ ```TypeScript
30
+ const client = new WebClient({
31
+ sessionEndpoint: "https://simfinity.constellation.com",
32
+ streamingEndpoint: "wss://simfinity.constellation.com:30003",
33
+ key: "my-key",
34
+ llm: "openai",
35
+ model: "gpt-4o-realtime-preview-2024-12-17",
36
+ });
37
+
38
+ try {
39
+ /* ... */
40
+
41
+ // Start a chat session
42
+ await startSession("You are a useful assistant", true);
43
+ await connect(true, {
44
+ onStreamClosed: (reason: string) => {
45
+ console.log("Stream connection lost");
46
+ },
47
+ onAudioResponseStart: () => {
48
+ console.log("The model is talking");
49
+ },
50
+ onAudioResponseChunk: (audioChunk: string) => {
51
+ audioPlayer.enqueue(audioChunk);
52
+ },
53
+ onAudioResponseEnd: () => {
54
+ console.log("The model is done talking");
55
+ }
56
+ });
57
+
58
+ /* ... */
59
+
60
+ sendAudioChunk("{PCM16 Base64-encoded data}");
61
+ commitAudioChunksSent();
62
+
63
+ /* ... */
64
+ }
65
+ catch {
66
+
67
+ }
68
+ finally {
69
+ await endSession();
70
+ }
71
+ ```
72
+
73
+ ### Types
74
+
75
+ **Configuration**
76
+
77
+ Configuration required to initiate a connection with the server:
78
+ In the client, values would typically be stored in secret stores & environment variables
79
+ ```TypeScript
80
+ export interface WebClientConfig {
81
+ sessionEndpoint: string;
82
+ streamingEndpoint: string;
83
+ key: string;
84
+ llm: LlmType;
85
+ model: string;
86
+ }
87
+ ```
88
+
89
+ **Event hooks**
90
+
91
+ Callback functions to catch all the propagated server events. Except for the
92
+ onStreamClosed event, assigning hooks is optional:
93
+ non-observed events will be silently ignored & lost.
94
+ ```TypeScript
95
+ export interface EventHandlers {
96
+ onStreamClosed: (reason: string) => void;
97
+ onAudioResponseStart?: () => void;
98
+ onAudioResponseChunk?: (audioChunk: string) => void;
99
+ onAudioResponseEnd?: () => void;
100
+ onTranscriptInput?: (transcript: string) => void;
101
+ onTranscriptResponse?: (transcript: string) => void;
102
+ onTechnicalError?: (error: string) => void;
103
+ }
104
+ ```
105
+
106
+ ### Audio
107
+
108
+ * The server expect exclusively base64 encoded PCM16 format & sends responses of the same format in return
109
+ * The server implements VAD - voice activation detection. Configured to detect 1s silences as a response trigger
110
+ * Therefore, input audio data chunks can be streamed immediately without buffering
111
+ * Client should however implement voice detection as well to reduce network consumption
112
+ * 500ms ring buffer continuously filled with audio input
113
+ * Noise detection with minimum threshold
114
+ *
115
+
116
+ ### Text & Transcript
117
+
118
+ * The transcript inputs/responses callbacks carry both the text exchanges and transcription of audio exchanges
119
+ * In an audio session, text and audio inputs will trigger:
120
+ * a mirrored transcript text through onTranscriptInput
121
+ * an audio response through onAudioResponseChunk
122
+ * a text transcript of the audio response through onTranscriptResponse
123
+ * In a text-only session, a text input will trigger:
124
+ * a mirrored transcript text through onTranscriptInput
125
+ * a text response through the onTranscriptResponse callback
package/dist/index.cjs CHANGED
@@ -20,17 +20,273 @@ var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: tru
20
20
  // src/index.ts
21
21
  var index_exports = {};
22
22
  __export(index_exports, {
23
- Dummy: () => Dummy_default
23
+ WebClient: () => WebClient_default
24
24
  });
25
25
  module.exports = __toCommonJS(index_exports);
26
26
 
27
- // src/Dummy.tsx
28
- var import_jsx_runtime = require("react/jsx-runtime");
29
- function Dummy({ label }) {
30
- return /* @__PURE__ */ (0, import_jsx_runtime.jsx)("div", { children: label });
31
- }
32
- var Dummy_default = Dummy;
27
+ // src/WebClient.ts
28
+ var WebClient = class {
29
+ constructor(config) {
30
+ this.ws = null;
31
+ this.sessionId = null;
32
+ this.config = config;
33
+ }
34
+ /**
35
+ * Start a persistent chat room on the server, allowing for re-connection,
36
+ * when the streaming connection is lost. Once a session was started it must
37
+ * be closed to release the context on server side. See endSession().
38
+ *
39
+ * @remarks
40
+ * A session MUST exist to connect the stream.
41
+ *
42
+ * @param instructions to the model, added to its context. This is the "system" input instructions.
43
+ * @param audio whether to allow audio streaming or text-only
44
+ *
45
+ * @exception
46
+ * This method throws new Error(...) if unable to execute successfully for any reason.
47
+ *
48
+ * @example
49
+ * ```TypeScript
50
+ * await startSession("You are useful assistant", true)
51
+ * ```
52
+ */
53
+ async startSession(instructions = "", audio = false) {
54
+ const response = await fetch(`${this.config.sessionEndpoint}/prepare_session`, {
55
+ method: "POST",
56
+ headers: {
57
+ "Authorization": `Bearer ${this.config.key}`,
58
+ "Content-Type": "application/json",
59
+ "Accept": "application/json"
60
+ },
61
+ body: JSON.stringify({
62
+ llmProvider: this.config.llm,
63
+ systemPrompt: instructions,
64
+ audio
65
+ })
66
+ });
67
+ if (!response.ok) {
68
+ throw new Error(`Could not create a new chat session
69
+ [${response.status}:${response.statusText}]`);
70
+ }
71
+ try {
72
+ const result = await response.json();
73
+ if (result) {
74
+ this.sessionId = result.sessionId;
75
+ }
76
+ } catch (error) {
77
+ throw `Failed to read session create response: ${error}`;
78
+ }
79
+ }
80
+ /**
81
+ * Close an opened, persistent chat room, effectively killing the streaming as well if still opened.
82
+ * If there is no active session, this method does nothing.
83
+ *
84
+ * @remarks
85
+ * Not closing an opened session will not prevent from starting a new one, however this could
86
+ * starve the server resources and affect service stability.
87
+ * Make sure to always close an opened session when finished.
88
+ *
89
+ * @exception
90
+ * This method throws new Error(...) if unable to execute successfully for any reason.
91
+ */
92
+ async endSession() {
93
+ if (!this.sessionId) {
94
+ return;
95
+ }
96
+ const response = await fetch(`${this.config.sessionEndpoint}/end_session`, {
97
+ method: "POST",
98
+ headers: {
99
+ "Authorization": `Bearer ${this.config.key}`,
100
+ "Content-Type": "application/json",
101
+ "Accept": "application/json"
102
+ },
103
+ body: JSON.stringify({ sessionId: this.sessionId })
104
+ });
105
+ if (!response.ok) {
106
+ throw new Error(`Could not close the chat session
107
+ [${response.status}:${response.statusText}]`);
108
+ }
109
+ this.sessionId = "";
110
+ }
111
+ /**
112
+ * Following a successful startSession, open a streaming connection with the server.
113
+ * After a successful call to this connect() method, the client is ready to send & receive events.
114
+ *
115
+ * @remarks
116
+ * This method not only opens a websocket connection with the server but also initiates a
117
+ * handshake, where the server explicitly acknowledges and accepts the client connection.
118
+ *
119
+ * @param audio for a session that created with audio capabilities, allows this streaming to include audio
120
+ * @param handlers callback functions to handle every possible communication events coming from the server
121
+ *
122
+ * @exception
123
+ * This method throws new Error(...) if unable to execute successfully for any reason.
124
+ *
125
+ * @example
126
+ * ```TypeScript
127
+ * // Open an audio connection: in this example we choose to handle only audio data, and ignore text.
128
+ * await connect(true, {
129
+ * onStreamClosed: (reason: string) => { console.log("Stream connection lost"); },
130
+ * onAudioResponseStart: () => { console.log("The model is talking"); },
131
+ * onAudioResponseChunk: (audioChunk: string) => { audioPlayer.enqueue(audioChunk); },
132
+ * onAudioResponseEnd: () => { console.log("The model is done talking"); }
133
+ * })
134
+ * ```
135
+ */
136
+ async connect(audio = false, handlers) {
137
+ if (!this.sessionId) {
138
+ throw new Error("No open session");
139
+ }
140
+ const ws = new WebSocket(
141
+ `${this.config.streamingEndpoint}/web/${this.sessionId}`,
142
+ ["key", this.config.key]
143
+ );
144
+ if (!await this.serverHandShake(ws, audio)) {
145
+ ws.close();
146
+ throw new Error("Unable to establish the connection");
147
+ }
148
+ ws.onerror = (error) => {
149
+ handlers.onStreamClosed(`WebSocket error: ${error}`);
150
+ };
151
+ ws.onclose = (event) => {
152
+ handlers.onStreamClosed(`WebSocket closed by peer: ${event.reason}`);
153
+ };
154
+ ws.onmessage = async (event) => {
155
+ var _a, _b, _c, _d, _e, _f;
156
+ try {
157
+ const data = JSON.parse(event.data);
158
+ switch (data.type) {
159
+ case "audio.response.start":
160
+ (_a = handlers.onAudioResponseStart) == null ? void 0 : _a.call(handlers);
161
+ break;
162
+ case "audio.response.append":
163
+ (_b = handlers.onAudioResponseChunk) == null ? void 0 : _b.call(handlers, data.data.audioData);
164
+ break;
165
+ case "audio.response.done":
166
+ (_c = handlers.onAudioResponseEnd) == null ? void 0 : _c.call(handlers);
167
+ break;
168
+ case "transcript.input":
169
+ (_d = handlers.onTranscriptInput) == null ? void 0 : _d.call(handlers, data.data.transcript);
170
+ break;
171
+ case "transcript.response":
172
+ (_e = handlers.onTranscriptResponse) == null ? void 0 : _e.call(handlers, data.data.transcript);
173
+ break;
174
+ case "technical.error":
175
+ (_f = handlers.onTechnicalError) == null ? void 0 : _f.call(handlers, data.data.error);
176
+ break;
177
+ default:
178
+ break;
179
+ }
180
+ } catch (error) {
181
+ console.error("Error processing message:", error, event.data);
182
+ }
183
+ };
184
+ this.ws = ws;
185
+ }
186
+ /**
187
+ * With an opened streaming connection: send a text input message to the LLM. This will trigger a
188
+ * text response as well as an audio response if the session was opened with audio mode active.
189
+ *
190
+ * @remarks
191
+ * With openai for example, this triggers (pseudo-code):
192
+ * webSocket.send({ type: "conversation.item.create", item: { content: { type: "input_text": text: text }}})
193
+ * webSocket.send({ type: "response.create"})
194
+ *
195
+ * @param text input message
196
+ *
197
+ * @exception
198
+ * This method throws new Error(...) if unable to execute successfully for any reason.
199
+ */
200
+ sendText(text) {
201
+ this.send("text.input.message", { text });
202
+ }
203
+ /**
204
+ * With an opened streaming connection: send a chunk of raw audio data to the LLM.
205
+ * Audio data chunks do not systematically & immediately trigger a model response:
206
+ * They get accumulated by the model to form a single input message, until:
207
+ * - commitAudioChunksSent is called, which flushes the accumulated audio and triggers a response
208
+ * - silence is detected for more than 1 second, which flushes the accumulated audio and triggers a response
209
+ *
210
+ * @remarks
211
+ * With openai for example, this triggers (pseudo-code):
212
+ * webSocket.send({ type: "input_audio_buffer.append", audio: "...audio data chunk..." })
213
+ *
214
+ * @param chunk base64-encoded pcm16 audio data chunk
215
+ *
216
+ * @exception
217
+ * This method throws new Error(...) if unable to execute successfully for any reason.
218
+ */
219
+ sendAudioChunk(chunk) {
220
+ this.send("audio.input.append", { audioData: chunk });
221
+ }
222
+ /**
223
+ * With an opened streaming connection: triggers the processing of the accumulated audio data since
224
+ * the last model response. This effectively flushes the audio buffer and triggers a new model response.
225
+ *
226
+ * @remarks
227
+ * With openai for example, this triggers (pseudo-code):
228
+ * webSocket.send({ type: "input_audio_buffer.commit" })
229
+ *
230
+ * Calling commitAudioChunksSent is optional because the constellation server uses server_vad,
231
+ * configured to detect a ~1 second silence, before automatically triggering a model response.
232
+ *
233
+ * Calling commitAudioChunksSent will always trigger a model response, even if no audio
234
+ * data was sent since the last response.
235
+ *
236
+ * @exception
237
+ * This method throws new Error(...) if unable to execute successfully for any reason.
238
+ */
239
+ commitAudioChunksSent() {
240
+ this.send("audio.input.commit");
241
+ }
242
+ // ================= Inner utils ================= //
243
+ async serverHandShake(ws, audio) {
244
+ return new Promise((resolve, reject) => {
245
+ const timer = setTimeout(() => {
246
+ cleanup();
247
+ reject(new Error("Handshake timeout"));
248
+ }, 5e3);
249
+ const cleanup = () => {
250
+ clearTimeout(timer);
251
+ ws.onopen = null;
252
+ ws.onerror = null;
253
+ ws.onmessage = null;
254
+ };
255
+ ws.onerror = (error) => {
256
+ cleanup();
257
+ reject(new Error(`WebSocket connection failed with ${error.type}`));
258
+ };
259
+ ws.onopen = () => {
260
+ const eventSubs = audio ? [0 /* Text */, 1 /* Audio */] : [0 /* Text */];
261
+ ws.send(JSON.stringify({
262
+ type: "connection.request",
263
+ data: { subscription: eventSubs }
264
+ }));
265
+ };
266
+ ws.onmessage = (event) => {
267
+ try {
268
+ const data = JSON.parse(event.data);
269
+ if (data.type === "connection.accepted")
270
+ resolve(true);
271
+ else
272
+ reject(new Error(`Received unexpected event: ${data.type}`));
273
+ } catch (e) {
274
+ reject(new Error(`An unexpected error occurred: ${e}`));
275
+ } finally {
276
+ cleanup();
277
+ }
278
+ };
279
+ });
280
+ }
281
+ send(type, data = null) {
282
+ if (!this.ws || this.ws.readyState != WebSocket.OPEN) {
283
+ throw new Error("Stream is not opened");
284
+ }
285
+ this.ws.send(JSON.stringify({ type, ...data && { data } }));
286
+ }
287
+ };
288
+ var WebClient_default = WebClient;
33
289
  // Annotate the CommonJS export names for ESM import in node:
34
290
  0 && (module.exports = {
35
- Dummy
291
+ WebClient
36
292
  });
package/dist/index.d.cts CHANGED
@@ -1,8 +1,186 @@
1
- import * as react_jsx_runtime from 'react/jsx-runtime';
1
+ /**
2
+ * Available server-side LLM types
3
+ */
4
+ type LlmType = "openai";
5
+ /**
6
+ * Configuration required to initiate a connection with the stream server:
7
+ *
8
+ * @sessionEndpoint : REST base URL to the constellation API for managing sessions
9
+ * @streamingEndpoint : WebSocket endpoint to the constellation server
10
+ * @key : Simfinity API secret key granting access to the server API
11
+ * @llm : which LLM service to connect to
12
+ * @model : depends on the LLM service. This is the model name as define by the LLM service
13
+ *
14
+ * @example
15
+ * ```TypeScript
16
+ * {
17
+ * sessionEndpoint: "https://simfinity.constellation.com",
18
+ * streamingEndpoint: "wss://simfinity.constellation.com:30003",
19
+ * key: "some-secret-key"
20
+ * llm: "openai",
21
+ * model: "gpt-4o-realtime-preview-2024-12-17"
22
+ * }
23
+ * ```
24
+ */
25
+ interface WebClientConfig {
26
+ sessionEndpoint: string;
27
+ streamingEndpoint: string;
28
+ key: string;
29
+ llm: LlmType;
30
+ model: string;
31
+ }
32
+ /**
33
+ * Callback functions to catch all the propagated server events.
34
+ *
35
+ * @onStreamClosed the streaming session (web socket) shut down
36
+ * @onAudioResponseStart the LLM service is about to respond with streaming audio data
37
+ * @onAudioResponseChunk a new chunk of response audio data was received
38
+ * @onAudioResponseEnd the model has finished responding. Audio response has been entirely streamed
39
+ * @onTranscriptInput either a copy of a text input, or the transcript of an audio input sent by the client
40
+ * @onTranscriptResponse either a text response (to a text input) or the transcript of an audio response
41
+ * @onTechnicalError any technical issue encountered during the stream
42
+ *
43
+ * @remarks
44
+ * Un-assigned callbacks will not cause exceptions by this client when events are received from the server
45
+ * However the events and information attached will be lost.
46
+ *
47
+ * The transcript events have dual purpose:
48
+ * - In an audio exchange, they hold the text transcripts of the audio conversation
49
+ * - In a text exchange, they hold the actual text messages of the conversation
50
+ */
51
+ interface EventHandlers {
52
+ onStreamClosed: (reason: string) => void;
53
+ onAudioResponseStart?: () => void;
54
+ onAudioResponseChunk?: (audioChunk: string) => void;
55
+ onAudioResponseEnd?: () => void;
56
+ onTranscriptInput?: (transcript: string) => void;
57
+ onTranscriptResponse?: (transcript: string) => void;
58
+ onTechnicalError?: (error: string) => void;
59
+ }
60
+ /**
61
+ * This class is a code wrapper to integrate the Simfinity constellation server.
62
+ * The constellation server is a proxy managing streaming sessions with third party LLMs.
63
+ * This class manages the complete lifecycle of a streaming session:
64
+ * - Open/start session
65
+ * - Continuously sending and receiving streamed data over persistent connection
66
+ * - Close/end session
67
+ */
68
+ declare class WebClient {
69
+ private config;
70
+ private ws;
71
+ private sessionId;
72
+ constructor(config: WebClientConfig);
73
+ /**
74
+ * Start a persistent chat room on the server, allowing for re-connection,
75
+ * when the streaming connection is lost. Once a session was started it must
76
+ * be closed to release the context on server side. See endSession().
77
+ *
78
+ * @remarks
79
+ * A session MUST exist to connect the stream.
80
+ *
81
+ * @param instructions to the model, added to its context. This is the "system" input instructions.
82
+ * @param audio whether to allow audio streaming or text-only
83
+ *
84
+ * @exception
85
+ * This method throws new Error(...) if unable to execute successfully for any reason.
86
+ *
87
+ * @example
88
+ * ```TypeScript
89
+ * await startSession("You are useful assistant", true)
90
+ * ```
91
+ */
92
+ startSession(instructions?: string, audio?: boolean): Promise<void>;
93
+ /**
94
+ * Close an opened, persistent chat room, effectively killing the streaming as well if still opened.
95
+ * If there is no active session, this method does nothing.
96
+ *
97
+ * @remarks
98
+ * Not closing an opened session will not prevent from starting a new one, however this could
99
+ * starve the server resources and affect service stability.
100
+ * Make sure to always close an opened session when finished.
101
+ *
102
+ * @exception
103
+ * This method throws new Error(...) if unable to execute successfully for any reason.
104
+ */
105
+ endSession(): Promise<void>;
106
+ /**
107
+ * Following a successful startSession, open a streaming connection with the server.
108
+ * After a successful call to this connect() method, the client is ready to send & receive events.
109
+ *
110
+ * @remarks
111
+ * This method not only opens a websocket connection with the server but also initiates a
112
+ * handshake, where the server explicitly acknowledges and accepts the client connection.
113
+ *
114
+ * @param audio for a session that created with audio capabilities, allows this streaming to include audio
115
+ * @param handlers callback functions to handle every possible communication events coming from the server
116
+ *
117
+ * @exception
118
+ * This method throws new Error(...) if unable to execute successfully for any reason.
119
+ *
120
+ * @example
121
+ * ```TypeScript
122
+ * // Open an audio connection: in this example we choose to handle only audio data, and ignore text.
123
+ * await connect(true, {
124
+ * onStreamClosed: (reason: string) => { console.log("Stream connection lost"); },
125
+ * onAudioResponseStart: () => { console.log("The model is talking"); },
126
+ * onAudioResponseChunk: (audioChunk: string) => { audioPlayer.enqueue(audioChunk); },
127
+ * onAudioResponseEnd: () => { console.log("The model is done talking"); }
128
+ * })
129
+ * ```
130
+ */
131
+ connect(audio: boolean | undefined, handlers: EventHandlers): Promise<void>;
132
+ /**
133
+ * With an opened streaming connection: send a text input message to the LLM. This will trigger a
134
+ * text response as well as an audio response if the session was opened with audio mode active.
135
+ *
136
+ * @remarks
137
+ * With openai for example, this triggers (pseudo-code):
138
+ * webSocket.send({ type: "conversation.item.create", item: { content: { type: "input_text": text: text }}})
139
+ * webSocket.send({ type: "response.create"})
140
+ *
141
+ * @param text input message
142
+ *
143
+ * @exception
144
+ * This method throws new Error(...) if unable to execute successfully for any reason.
145
+ */
146
+ sendText(text: string): void;
147
+ /**
148
+ * With an opened streaming connection: send a chunk of raw audio data to the LLM.
149
+ * Audio data chunks do not systematically & immediately trigger a model response:
150
+ * They get accumulated by the model to form a single input message, until:
151
+ * - commitAudioChunksSent is called, which flushes the accumulated audio and triggers a response
152
+ * - silence is detected for more than 1 second, which flushes the accumulated audio and triggers a response
153
+ *
154
+ * @remarks
155
+ * With openai for example, this triggers (pseudo-code):
156
+ * webSocket.send({ type: "input_audio_buffer.append", audio: "...audio data chunk..." })
157
+ *
158
+ * @param chunk base64-encoded pcm16 audio data chunk
159
+ *
160
+ * @exception
161
+ * This method throws new Error(...) if unable to execute successfully for any reason.
162
+ */
163
+ sendAudioChunk(chunk: string): void;
164
+ /**
165
+ * With an opened streaming connection: triggers the processing of the accumulated audio data since
166
+ * the last model response. This effectively flushes the audio buffer and triggers a new model response.
167
+ *
168
+ * @remarks
169
+ * With openai for example, this triggers (pseudo-code):
170
+ * webSocket.send({ type: "input_audio_buffer.commit" })
171
+ *
172
+ * Calling commitAudioChunksSent is optional because the constellation server uses server_vad,
173
+ * configured to detect a ~1 second silence, before automatically triggering a model response.
174
+ *
175
+ * Calling commitAudioChunksSent will always trigger a model response, even if no audio
176
+ * data was sent since the last response.
177
+ *
178
+ * @exception
179
+ * This method throws new Error(...) if unable to execute successfully for any reason.
180
+ */
181
+ commitAudioChunksSent(): void;
182
+ private serverHandShake;
183
+ private send;
184
+ }
2
185
 
3
- type DummyProps = {
4
- label: string;
5
- };
6
- declare function Dummy({ label }: DummyProps): react_jsx_runtime.JSX.Element;
7
-
8
- export { Dummy, type DummyProps };
186
+ export { type EventHandlers, type LlmType, WebClient, type WebClientConfig };
package/dist/index.d.ts CHANGED
@@ -1,8 +1,186 @@
1
- import * as react_jsx_runtime from 'react/jsx-runtime';
1
+ /**
2
+ * Available server-side LLM types
3
+ */
4
+ type LlmType = "openai";
5
+ /**
6
+ * Configuration required to initiate a connection with the stream server:
7
+ *
8
+ * @sessionEndpoint : REST base URL to the constellation API for managing sessions
9
+ * @streamingEndpoint : WebSocket endpoint to the constellation server
10
+ * @key : Simfinity API secret key granting access to the server API
11
+ * @llm : which LLM service to connect to
12
+ * @model : depends on the LLM service. This is the model name as define by the LLM service
13
+ *
14
+ * @example
15
+ * ```TypeScript
16
+ * {
17
+ * sessionEndpoint: "https://simfinity.constellation.com",
18
+ * streamingEndpoint: "wss://simfinity.constellation.com:30003",
19
+ * key: "some-secret-key"
20
+ * llm: "openai",
21
+ * model: "gpt-4o-realtime-preview-2024-12-17"
22
+ * }
23
+ * ```
24
+ */
25
+ interface WebClientConfig {
26
+ sessionEndpoint: string;
27
+ streamingEndpoint: string;
28
+ key: string;
29
+ llm: LlmType;
30
+ model: string;
31
+ }
32
+ /**
33
+ * Callback functions to catch all the propagated server events.
34
+ *
35
+ * @onStreamClosed the streaming session (web socket) shut down
36
+ * @onAudioResponseStart the LLM service is about to respond with streaming audio data
37
+ * @onAudioResponseChunk a new chunk of response audio data was received
38
+ * @onAudioResponseEnd the model has finished responding. Audio response has been entirely streamed
39
+ * @onTranscriptInput either a copy of a text input, or the transcript of an audio input sent by the client
40
+ * @onTranscriptResponse either a text response (to a text input) or the transcript of an audio response
41
+ * @onTechnicalError any technical issue encountered during the stream
42
+ *
43
+ * @remarks
44
+ * Un-assigned callbacks will not cause exceptions by this client when events are received from the server
45
+ * However the events and information attached will be lost.
46
+ *
47
+ * The transcript events have dual purpose:
48
+ * - In an audio exchange, they hold the text transcripts of the audio conversation
49
+ * - In a text exchange, they hold the actual text messages of the conversation
50
+ */
51
+ interface EventHandlers {
52
+ onStreamClosed: (reason: string) => void;
53
+ onAudioResponseStart?: () => void;
54
+ onAudioResponseChunk?: (audioChunk: string) => void;
55
+ onAudioResponseEnd?: () => void;
56
+ onTranscriptInput?: (transcript: string) => void;
57
+ onTranscriptResponse?: (transcript: string) => void;
58
+ onTechnicalError?: (error: string) => void;
59
+ }
60
+ /**
61
+ * This class is a code wrapper to integrate the Simfinity constellation server.
62
+ * The constellation server is a proxy managing streaming sessions with third party LLMs.
63
+ * This class manages the complete lifecycle of a streaming session:
64
+ * - Open/start session
65
+ * - Continuously sending and receiving streamed data over persistent connection
66
+ * - Close/end session
67
+ */
68
+ declare class WebClient {
69
+ private config;
70
+ private ws;
71
+ private sessionId;
72
+ constructor(config: WebClientConfig);
73
+ /**
74
+ * Start a persistent chat room on the server, allowing for re-connection,
75
+ * when the streaming connection is lost. Once a session was started it must
76
+ * be closed to release the context on server side. See endSession().
77
+ *
78
+ * @remarks
79
+ * A session MUST exist to connect the stream.
80
+ *
81
+ * @param instructions to the model, added to its context. This is the "system" input instructions.
82
+ * @param audio whether to allow audio streaming or text-only
83
+ *
84
+ * @exception
85
+ * This method throws new Error(...) if unable to execute successfully for any reason.
86
+ *
87
+ * @example
88
+ * ```TypeScript
89
+ * await startSession("You are useful assistant", true)
90
+ * ```
91
+ */
92
+ startSession(instructions?: string, audio?: boolean): Promise<void>;
93
+ /**
94
+ * Close an opened, persistent chat room, effectively killing the streaming as well if still opened.
95
+ * If there is no active session, this method does nothing.
96
+ *
97
+ * @remarks
98
+ * Not closing an opened session will not prevent from starting a new one, however this could
99
+ * starve the server resources and affect service stability.
100
+ * Make sure to always close an opened session when finished.
101
+ *
102
+ * @exception
103
+ * This method throws new Error(...) if unable to execute successfully for any reason.
104
+ */
105
+ endSession(): Promise<void>;
106
+ /**
107
+ * Following a successful startSession, open a streaming connection with the server.
108
+ * After a successful call to this connect() method, the client is ready to send & receive events.
109
+ *
110
+ * @remarks
111
+ * This method not only opens a websocket connection with the server but also initiates a
112
+ * handshake, where the server explicitly acknowledges and accepts the client connection.
113
+ *
114
+ * @param audio for a session that created with audio capabilities, allows this streaming to include audio
115
+ * @param handlers callback functions to handle every possible communication events coming from the server
116
+ *
117
+ * @exception
118
+ * This method throws new Error(...) if unable to execute successfully for any reason.
119
+ *
120
+ * @example
121
+ * ```TypeScript
122
+ * // Open an audio connection: in this example we choose to handle only audio data, and ignore text.
123
+ * await connect(true, {
124
+ * onStreamClosed: (reason: string) => { console.log("Stream connection lost"); },
125
+ * onAudioResponseStart: () => { console.log("The model is talking"); },
126
+ * onAudioResponseChunk: (audioChunk: string) => { audioPlayer.enqueue(audioChunk); },
127
+ * onAudioResponseEnd: () => { console.log("The model is done talking"); }
128
+ * })
129
+ * ```
130
+ */
131
+ connect(audio: boolean | undefined, handlers: EventHandlers): Promise<void>;
132
+ /**
133
+ * With an opened streaming connection: send a text input message to the LLM. This will trigger a
134
+ * text response as well as an audio response if the session was opened with audio mode active.
135
+ *
136
+ * @remarks
137
+ * With openai for example, this triggers (pseudo-code):
138
+ * webSocket.send({ type: "conversation.item.create", item: { content: { type: "input_text": text: text }}})
139
+ * webSocket.send({ type: "response.create"})
140
+ *
141
+ * @param text input message
142
+ *
143
+ * @exception
144
+ * This method throws new Error(...) if unable to execute successfully for any reason.
145
+ */
146
+ sendText(text: string): void;
147
+ /**
148
+ * With an opened streaming connection: send a chunk of raw audio data to the LLM.
149
+ * Audio data chunks do not systematically & immediately trigger a model response:
150
+ * They get accumulated by the model to form a single input message, until:
151
+ * - commitAudioChunksSent is called, which flushes the accumulated audio and triggers a response
152
+ * - silence is detected for more than 1 second, which flushes the accumulated audio and triggers a response
153
+ *
154
+ * @remarks
155
+ * With openai for example, this triggers (pseudo-code):
156
+ * webSocket.send({ type: "input_audio_buffer.append", audio: "...audio data chunk..." })
157
+ *
158
+ * @param chunk base64-encoded pcm16 audio data chunk
159
+ *
160
+ * @exception
161
+ * This method throws new Error(...) if unable to execute successfully for any reason.
162
+ */
163
+ sendAudioChunk(chunk: string): void;
164
+ /**
165
+ * With an opened streaming connection: triggers the processing of the accumulated audio data since
166
+ * the last model response. This effectively flushes the audio buffer and triggers a new model response.
167
+ *
168
+ * @remarks
169
+ * With openai for example, this triggers (pseudo-code):
170
+ * webSocket.send({ type: "input_audio_buffer.commit" })
171
+ *
172
+ * Calling commitAudioChunksSent is optional because the constellation server uses server_vad,
173
+ * configured to detect a ~1 second silence, before automatically triggering a model response.
174
+ *
175
+ * Calling commitAudioChunksSent will always trigger a model response, even if no audio
176
+ * data was sent since the last response.
177
+ *
178
+ * @exception
179
+ * This method throws new Error(...) if unable to execute successfully for any reason.
180
+ */
181
+ commitAudioChunksSent(): void;
182
+ private serverHandShake;
183
+ private send;
184
+ }
2
185
 
3
- type DummyProps = {
4
- label: string;
5
- };
6
- declare function Dummy({ label }: DummyProps): react_jsx_runtime.JSX.Element;
7
-
8
- export { Dummy, type DummyProps };
186
+ export { type EventHandlers, type LlmType, WebClient, type WebClientConfig };
package/dist/index.js CHANGED
@@ -1,9 +1,265 @@
1
- // src/Dummy.tsx
2
- import { jsx } from "react/jsx-runtime";
3
- function Dummy({ label }) {
4
- return /* @__PURE__ */ jsx("div", { children: label });
5
- }
6
- var Dummy_default = Dummy;
1
+ // src/WebClient.ts
2
+ var WebClient = class {
3
+ constructor(config) {
4
+ this.ws = null;
5
+ this.sessionId = null;
6
+ this.config = config;
7
+ }
8
+ /**
9
+ * Start a persistent chat room on the server, allowing for re-connection,
10
+ * when the streaming connection is lost. Once a session was started it must
11
+ * be closed to release the context on server side. See endSession().
12
+ *
13
+ * @remarks
14
+ * A session MUST exist to connect the stream.
15
+ *
16
+ * @param instructions to the model, added to its context. This is the "system" input instructions.
17
+ * @param audio whether to allow audio streaming or text-only
18
+ *
19
+ * @exception
20
+ * This method throws new Error(...) if unable to execute successfully for any reason.
21
+ *
22
+ * @example
23
+ * ```TypeScript
24
+ * await startSession("You are useful assistant", true)
25
+ * ```
26
+ */
27
+ async startSession(instructions = "", audio = false) {
28
+ const response = await fetch(`${this.config.sessionEndpoint}/prepare_session`, {
29
+ method: "POST",
30
+ headers: {
31
+ "Authorization": `Bearer ${this.config.key}`,
32
+ "Content-Type": "application/json",
33
+ "Accept": "application/json"
34
+ },
35
+ body: JSON.stringify({
36
+ llmProvider: this.config.llm,
37
+ systemPrompt: instructions,
38
+ audio
39
+ })
40
+ });
41
+ if (!response.ok) {
42
+ throw new Error(`Could not create a new chat session
43
+ [${response.status}:${response.statusText}]`);
44
+ }
45
+ try {
46
+ const result = await response.json();
47
+ if (result) {
48
+ this.sessionId = result.sessionId;
49
+ }
50
+ } catch (error) {
51
+ throw `Failed to read session create response: ${error}`;
52
+ }
53
+ }
54
+ /**
55
+ * Close an opened, persistent chat room, effectively killing the streaming as well if still opened.
56
+ * If there is no active session, this method does nothing.
57
+ *
58
+ * @remarks
59
+ * Not closing an opened session will not prevent from starting a new one, however this could
60
+ * starve the server resources and affect service stability.
61
+ * Make sure to always close an opened session when finished.
62
+ *
63
+ * @exception
64
+ * This method throws new Error(...) if unable to execute successfully for any reason.
65
+ */
66
+ async endSession() {
67
+ if (!this.sessionId) {
68
+ return;
69
+ }
70
+ const response = await fetch(`${this.config.sessionEndpoint}/end_session`, {
71
+ method: "POST",
72
+ headers: {
73
+ "Authorization": `Bearer ${this.config.key}`,
74
+ "Content-Type": "application/json",
75
+ "Accept": "application/json"
76
+ },
77
+ body: JSON.stringify({ sessionId: this.sessionId })
78
+ });
79
+ if (!response.ok) {
80
+ throw new Error(`Could not close the chat session
81
+ [${response.status}:${response.statusText}]`);
82
+ }
83
+ this.sessionId = "";
84
+ }
85
+ /**
86
+ * Following a successful startSession, open a streaming connection with the server.
87
+ * After a successful call to this connect() method, the client is ready to send & receive events.
88
+ *
89
+ * @remarks
90
+ * This method not only opens a websocket connection with the server but also initiates a
91
+ * handshake, where the server explicitly acknowledges and accepts the client connection.
92
+ *
93
+ * @param audio for a session that created with audio capabilities, allows this streaming to include audio
94
+ * @param handlers callback functions to handle every possible communication events coming from the server
95
+ *
96
+ * @exception
97
+ * This method throws new Error(...) if unable to execute successfully for any reason.
98
+ *
99
+ * @example
100
+ * ```TypeScript
101
+ * // Open an audio connection: in this example we choose to handle only audio data, and ignore text.
102
+ * await connect(true, {
103
+ * onStreamClosed: (reason: string) => { console.log("Stream connection lost"); },
104
+ * onAudioResponseStart: () => { console.log("The model is talking"); },
105
+ * onAudioResponseChunk: (audioChunk: string) => { audioPlayer.enqueue(audioChunk); },
106
+ * onAudioResponseEnd: () => { console.log("The model is done talking"); }
107
+ * })
108
+ * ```
109
+ */
110
+ async connect(audio = false, handlers) {
111
+ if (!this.sessionId) {
112
+ throw new Error("No open session");
113
+ }
114
+ const ws = new WebSocket(
115
+ `${this.config.streamingEndpoint}/web/${this.sessionId}`,
116
+ ["key", this.config.key]
117
+ );
118
+ if (!await this.serverHandShake(ws, audio)) {
119
+ ws.close();
120
+ throw new Error("Unable to establish the connection");
121
+ }
122
+ ws.onerror = (error) => {
123
+ handlers.onStreamClosed(`WebSocket error: ${error}`);
124
+ };
125
+ ws.onclose = (event) => {
126
+ handlers.onStreamClosed(`WebSocket closed by peer: ${event.reason}`);
127
+ };
128
+ ws.onmessage = async (event) => {
129
+ var _a, _b, _c, _d, _e, _f;
130
+ try {
131
+ const data = JSON.parse(event.data);
132
+ switch (data.type) {
133
+ case "audio.response.start":
134
+ (_a = handlers.onAudioResponseStart) == null ? void 0 : _a.call(handlers);
135
+ break;
136
+ case "audio.response.append":
137
+ (_b = handlers.onAudioResponseChunk) == null ? void 0 : _b.call(handlers, data.data.audioData);
138
+ break;
139
+ case "audio.response.done":
140
+ (_c = handlers.onAudioResponseEnd) == null ? void 0 : _c.call(handlers);
141
+ break;
142
+ case "transcript.input":
143
+ (_d = handlers.onTranscriptInput) == null ? void 0 : _d.call(handlers, data.data.transcript);
144
+ break;
145
+ case "transcript.response":
146
+ (_e = handlers.onTranscriptResponse) == null ? void 0 : _e.call(handlers, data.data.transcript);
147
+ break;
148
+ case "technical.error":
149
+ (_f = handlers.onTechnicalError) == null ? void 0 : _f.call(handlers, data.data.error);
150
+ break;
151
+ default:
152
+ break;
153
+ }
154
+ } catch (error) {
155
+ console.error("Error processing message:", error, event.data);
156
+ }
157
+ };
158
+ this.ws = ws;
159
+ }
160
+ /**
161
+ * With an opened streaming connection: send a text input message to the LLM. This will trigger a
162
+ * text response as well as an audio response if the session was opened with audio mode active.
163
+ *
164
+ * @remarks
165
+ * With openai for example, this triggers (pseudo-code):
166
+ * webSocket.send({ type: "conversation.item.create", item: { content: { type: "input_text": text: text }}})
167
+ * webSocket.send({ type: "response.create"})
168
+ *
169
+ * @param text input message
170
+ *
171
+ * @exception
172
+ * This method throws new Error(...) if unable to execute successfully for any reason.
173
+ */
174
+ sendText(text) {
175
+ this.send("text.input.message", { text });
176
+ }
177
+ /**
178
+ * With an opened streaming connection: send a chunk of raw audio data to the LLM.
179
+ * Audio data chunks do not systematically & immediately trigger a model response:
180
+ * They get accumulated by the model to form a single input message, until:
181
+ * - commitAudioChunksSent is called, which flushes the accumulated audio and triggers a response
182
+ * - silence is detected for more than 1 second, which flushes the accumulated audio and triggers a response
183
+ *
184
+ * @remarks
185
+ * With openai for example, this triggers (pseudo-code):
186
+ * webSocket.send({ type: "input_audio_buffer.append", audio: "...audio data chunk..." })
187
+ *
188
+ * @param chunk base64-encoded pcm16 audio data chunk
189
+ *
190
+ * @exception
191
+ * This method throws new Error(...) if unable to execute successfully for any reason.
192
+ */
193
+ sendAudioChunk(chunk) {
194
+ this.send("audio.input.append", { audioData: chunk });
195
+ }
196
+ /**
197
+ * With an opened streaming connection: triggers the processing of the accumulated audio data since
198
+ * the last model response. This effectively flushes the audio buffer and triggers a new model response.
199
+ *
200
+ * @remarks
201
+ * With openai for example, this triggers (pseudo-code):
202
+ * webSocket.send({ type: "input_audio_buffer.commit" })
203
+ *
204
+ * Calling commitAudioChunksSent is optional because the constellation server uses server_vad,
205
+ * configured to detect a ~1 second silence, before automatically triggering a model response.
206
+ *
207
+ * Calling commitAudioChunksSent will always trigger a model response, even if no audio
208
+ * data was sent since the last response.
209
+ *
210
+ * @exception
211
+ * This method throws new Error(...) if unable to execute successfully for any reason.
212
+ */
213
+ commitAudioChunksSent() {
214
+ this.send("audio.input.commit");
215
+ }
216
+ // ================= Inner utils ================= //
217
+ async serverHandShake(ws, audio) {
218
+ return new Promise((resolve, reject) => {
219
+ const timer = setTimeout(() => {
220
+ cleanup();
221
+ reject(new Error("Handshake timeout"));
222
+ }, 5e3);
223
+ const cleanup = () => {
224
+ clearTimeout(timer);
225
+ ws.onopen = null;
226
+ ws.onerror = null;
227
+ ws.onmessage = null;
228
+ };
229
+ ws.onerror = (error) => {
230
+ cleanup();
231
+ reject(new Error(`WebSocket connection failed with ${error.type}`));
232
+ };
233
+ ws.onopen = () => {
234
+ const eventSubs = audio ? [0 /* Text */, 1 /* Audio */] : [0 /* Text */];
235
+ ws.send(JSON.stringify({
236
+ type: "connection.request",
237
+ data: { subscription: eventSubs }
238
+ }));
239
+ };
240
+ ws.onmessage = (event) => {
241
+ try {
242
+ const data = JSON.parse(event.data);
243
+ if (data.type === "connection.accepted")
244
+ resolve(true);
245
+ else
246
+ reject(new Error(`Received unexpected event: ${data.type}`));
247
+ } catch (e) {
248
+ reject(new Error(`An unexpected error occurred: ${e}`));
249
+ } finally {
250
+ cleanup();
251
+ }
252
+ };
253
+ });
254
+ }
255
+ send(type, data = null) {
256
+ if (!this.ws || this.ws.readyState != WebSocket.OPEN) {
257
+ throw new Error("Stream is not opened");
258
+ }
259
+ this.ws.send(JSON.stringify({ type, ...data && { data } }));
260
+ }
261
+ };
262
+ var WebClient_default = WebClient;
7
263
  export {
8
- Dummy_default as Dummy
264
+ WebClient_default as WebClient
9
265
  };
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@simfinity/constellation-client",
3
- "version": "1.0.0",
3
+ "version": "1.0.1",
4
4
  "type": "module",
5
5
  "exports": {
6
6
  ".": {
@@ -18,16 +18,12 @@
18
18
  "build": "tsup src/index.ts --format cjs,esm --dts"
19
19
  },
20
20
  "peerDependencies": {
21
- "react": "^18.0.0 || ^19.0.0",
22
- "react-dom": "^18.0.0 || ^19.0.0"
23
21
  },
24
22
  "author": "Simfinity",
25
23
  "license": "MIT",
26
24
  "dependencies": {
27
25
  },
28
26
  "devDependencies": {
29
- "@types/react": "^19.2.11",
30
- "@types/react-dom": "^19.2.3",
31
27
  "tsup": "^8.5.1",
32
28
  "typescript": "^5.9.3"
33
29
  }