@simfinity/constellation-client 1.0.19 → 1.0.20

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -85,32 +85,51 @@ export interface WebClientConfig {
85
85
  }
86
86
  ```
87
87
 
88
+ Model behaviour configuration: will alter how the model reacts.
89
+ Omitted properties will remain unchanged in the model.
90
+ It is theoretically possible to change these settings both at session-starting time and mid-session,
91
+ however some LLMs may not support the mid-session updates, thus it is advised to define them at session start.
92
+ ```TypeScript
93
+ export interface SessionConfig {
94
+ temperature?: number;
95
+ instructions?: string;
96
+ maxResponseToken?: number;
97
+ }
98
+ ```
99
+
88
100
  **Event hooks**
89
101
 
90
102
  Callback functions to catch all the propagated server events. Except for the
91
103
  onStreamClosed event, assigning hooks is optional:
92
104
  non-observed events will be silently ignored & lost.
105
+ For more details on when these events fire and how to integrate them, please refer
106
+ to in-code comments.
93
107
  ```TypeScript
94
108
  export interface EventHandlers {
95
109
  onStreamClosed: (reason: string) => void;
96
110
  onAudioResponseStart?: () => void;
97
111
  onAudioResponseChunk?: (audioChunk: string) => void;
98
112
  onAudioResponseEnd?: () => void;
99
- onTranscriptInput?: (transcript: string) => void;
100
- onTranscriptResponse?: (transcript: string) => void;
113
+ onTranscriptInput?: (text: string) => void;
114
+ onTranscriptInputPart?: (text: string, final: boolean) => void;
115
+ onTranscriptResponse?: (text: string) => void;
116
+ onTranscriptResponsePart?: (text: string, final: boolean) => void;
101
117
  onTechnicalError?: (error: string) => void;
102
118
  }
103
119
  ```
104
120
 
105
121
  ### Audio
106
122
 
107
- * The server expect exclusively base64 encoded PCM16 format & sends responses of the same format in return
108
- * The server implements VAD - voice activation detection. Configured to detect 1s silences as a response trigger
109
- * Therefore, input audio data chunks can be streamed immediately without buffering
110
- * Client should however implement voice detection as well to reduce network consumption
111
- * 500ms ring buffer continuously filled with audio input
112
- * Noise detection with minimum threshold
113
- *
123
+ * The server expects exclusively base64 encoded PCM16, 16k hertz audio data & sends responses in the same format in return.
124
+ * The server implements VAD - voice activation detection. By default, detects 1s silences as a response trigger.
125
+ * Therefore, client input audio data chunks can be streamed immediately without buffering.
126
+ * Client should however implement voice detection as well to avoid continuously streaming silence audio data
127
+ * and thus reduce network consumption. Suggested high level approach:
128
+ - 500ms ring buffer continuously filled with audio input
129
+ - Noise detection with minimum threshold
130
+ - Confirm voice is detected with consistent sound for ~250ms
131
+ - Start streaming audio, beginning from 250ms in the past in the ring buffer
132
+
114
133
 
115
134
  ### Text & Transcript
116
135
 
@@ -119,6 +138,9 @@ export interface EventHandlers {
119
138
  * a mirrored transcript text through onTranscriptInput
120
139
  * an audio response through onAudioResponseChunk
121
140
  * a text transcript of the audio response through onTranscriptResponse
141
+ * onTranscriptInputPart and onTranscriptResponsePart are fired for each new piece of partial text available
122
142
  * In a text-only session, a text input will trigger:
123
143
  * a mirrored transcript text through onTranscriptInput
124
- * a text response through the onTranscriptResponse callback
144
+ * a text response through the onTranscriptResponse callback
145
+ * onTranscriptInputPart is expected to fire only once as the input is immediately received and echoed
146
+ * onTranscriptResponsePart is fired as soon as a new piece of partial text from the response is available
package/dist/index.cjs CHANGED
@@ -92,7 +92,7 @@ var WebClient = class {
92
92
  const response = await fetch(`${this.config.sessionEndpoint}/end_session`, {
93
93
  method: "POST",
94
94
  headers: {
95
- "Authorization": `Bearer ${this.config.key}`,
95
+ "Sim-Api-Key": `${this.config.key}`,
96
96
  "Content-Type": "application/json",
97
97
  "Accept": "application/json"
98
98
  },
@@ -148,30 +148,36 @@ var WebClient = class {
148
148
  handlers.onStreamClosed(`WebSocket closed by peer: ${event.reason}`);
149
149
  };
150
150
  ws.onmessage = async (event) => {
151
- var _a, _b, _c, _d, _e, _f, _g;
151
+ var _a, _b, _c, _d, _e, _f, _g, _h, _i;
152
152
  try {
153
- const data = JSON.parse(event.data);
154
- switch (data.type) {
153
+ const message = JSON.parse(event.data);
154
+ switch (message.type) {
155
155
  case "session.configured":
156
- (_a = handlers.onSessionConfigured) == null ? void 0 : _a.call(handlers, data);
156
+ (_a = handlers.onSessionConfigured) == null ? void 0 : _a.call(handlers, message.data);
157
157
  break;
158
158
  case "audio.response.start":
159
159
  (_b = handlers.onAudioResponseStart) == null ? void 0 : _b.call(handlers);
160
160
  break;
161
161
  case "audio.response.append":
162
- (_c = handlers.onAudioResponseChunk) == null ? void 0 : _c.call(handlers, data.data.audioData);
162
+ (_c = handlers.onAudioResponseChunk) == null ? void 0 : _c.call(handlers, message.data.audioData);
163
163
  break;
164
164
  case "audio.response.done":
165
165
  (_d = handlers.onAudioResponseEnd) == null ? void 0 : _d.call(handlers);
166
166
  break;
167
+ case "transcript.input.part":
168
+ (_e = handlers.onTranscriptInputPart) == null ? void 0 : _e.call(handlers, message.data.text, message.data.final);
169
+ break;
167
170
  case "transcript.input":
168
- (_e = handlers.onTranscriptInput) == null ? void 0 : _e.call(handlers, data.data.transcript);
171
+ (_f = handlers.onTranscriptInput) == null ? void 0 : _f.call(handlers, message.data.text);
172
+ break;
173
+ case "transcript.response.part":
174
+ (_g = handlers.onTranscriptResponsePart) == null ? void 0 : _g.call(handlers, message.data.text, message.data.final);
169
175
  break;
170
176
  case "transcript.response":
171
- (_f = handlers.onTranscriptResponse) == null ? void 0 : _f.call(handlers, data.data.transcript);
177
+ (_h = handlers.onTranscriptResponse) == null ? void 0 : _h.call(handlers, message.data.text);
172
178
  break;
173
179
  case "technical.error":
174
- (_g = handlers.onTechnicalError) == null ? void 0 : _g.call(handlers, data.data.error);
180
+ (_i = handlers.onTechnicalError) == null ? void 0 : _i.call(handlers, message.data.error);
175
181
  break;
176
182
  default:
177
183
  break;
@@ -276,15 +282,7 @@ var WebClient = class {
276
282
  const eventSubs = audio ? [0 /* Text */, 1 /* Audio */] : [0 /* Text */];
277
283
  ws.send(JSON.stringify({
278
284
  type: "connection.initiate",
279
- data: {
280
- subscription: eventSubs,
281
- settings: {
282
- audio: true,
283
- voice: "alloy",
284
- temperature: 0.8,
285
- instructions: ""
286
- }
287
- }
285
+ data: { subscription: eventSubs }
288
286
  }));
289
287
  };
290
288
  ws.onmessage = (event) => {
package/dist/index.d.cts CHANGED
@@ -27,7 +27,6 @@ interface WebClientConfig {
27
27
  streamingEndpoint: string;
28
28
  key: string;
29
29
  llm: LlmType;
30
- model: string;
31
30
  }
32
31
  /**
33
32
  * System settings influencing the model behavior:
@@ -40,17 +39,24 @@ interface SessionConfig {
40
39
  instructions?: string;
41
40
  maxResponseToken?: number;
42
41
  }
42
+ /**
43
+ * Snapshot of the current settings run by the model.
44
+ */
45
+ interface SessionSettings {
46
+ tools: any[];
47
+ audio: boolean;
48
+ voice: string;
49
+ vad: {
50
+ threshold: number;
51
+ silenceMs: number;
52
+ };
53
+ temperature: number;
54
+ instructions: string;
55
+ maxResponseToken: number;
56
+ }
43
57
  /**
44
58
  * Callback functions to catch all the propagated server events.
45
59
  *
46
- * @onStreamClosed the streaming session (web socket) shut down
47
- * @onSessionConfigured received in response to a session settings update from the client
48
- * @onAudioResponseStart the LLM service is about to respond with streaming audio data
49
- * @onAudioResponseChunk a new chunk of response audio data was received
50
- * @onAudioResponseEnd the model has finished responding. Audio response has been entirely streamed
51
- * @onTranscriptInput either a copy of a text input, or the transcript of an audio input sent by the client
52
- * @onTranscriptResponse either a text response (to a text input) or the transcript of an audio response
53
- * @onTechnicalError any technical issue encountered during the stream
54
60
  *
55
61
  * @remarks
56
62
  * Un-assigned callbacks will not cause exceptions by this client when events are received from the server
@@ -61,13 +67,71 @@ interface SessionConfig {
61
67
  * - In a text exchange, they hold the actual text messages of the conversation
62
68
  */
63
69
  interface EventHandlers {
70
+ /**
71
+ * @param reason provided by the server to explain stream closure.
72
+ */
64
73
  onStreamClosed: (reason: string) => void;
65
- onSessionConfigured?: (settings: SessionConfig) => void;
74
+ /**
75
+ * Acknowledgment by the server of a settings update,
76
+ * following a "session.configure" request through configureSession()
77
+ *
78
+ * @param settings the updated settings currently in effect in the model.
79
+ */
80
+ onSessionConfigured?: (settings: SessionSettings) => void;
81
+ /**
82
+ * Fired by the server when the Model is starting to stream an audio response.
83
+ */
66
84
  onAudioResponseStart?: () => void;
85
+ /**
86
+ * New chunk of audio data from the ongoing Model audio response.
87
+ *
88
+ * @param audioChunk audio data in base 64 PCM 16, 24k Hertz.
89
+ */
67
90
  onAudioResponseChunk?: (audioChunk: string) => void;
91
+ /**
92
+ * Fired by the server when the Model is finished streaming an audio response.
93
+ */
68
94
  onAudioResponseEnd?: () => void;
69
- onTranscriptInput?: (transcript: string) => void;
70
- onTranscriptResponse?: (transcript: string) => void;
95
+ /**
96
+ * Fired after a client text or audio input.
97
+ * This event occurs when the full text input is finalised.
98
+ *
99
+ * @param text either a copy of the text input, or the transcript of the audio input.
100
+ */
101
+ onTranscriptInput?: (text: string) => void;
102
+ /**
103
+ * Fired after a client text or audio input.
104
+ * This event occurs as soon as a new section of text, part of the complete input, is available.
105
+ * The concatenated text from these events add up to the same text finally received through onTranscriptInput.
106
+ *
107
+ * @param text the next piece of text available, part of a whole input message.
108
+ * @param final is true for the last event containing the final piece of text to complete the input.
109
+ */
110
+ onTranscriptInputPart?: (text: string, final: boolean) => void;
111
+ /**
112
+ * Fired when a model response is available, either text or audio.
113
+ * This event can happen in parallel of the onAudioResponseChunk events streaming
114
+ * the corresponding audio response and before onAudioResponseEnd is received.
115
+ *
116
+ * @param text either the model's text response in a text conversation, or the transcript of its audio
117
+ * response in a voice conversation.
118
+ */
119
+ onTranscriptResponse?: (text: string) => void;
120
+ /**
121
+ * Fired when a model response is available, either text or audio.
122
+ * This event occurs as soon as a new section of text, part of the complete response, is available.
123
+ * The concatenated text from these events add up to the same text finally received through onTranscriptResponse.
124
+ *
125
+ * @param text the next piece of text available, part of a whole response message: either the model's text response
126
+ * in a text conversation, or the transcript of its audio response in a voice conversation.
127
+ * @param final is true for the last event containing the final piece of text to complete the response.
128
+ */
129
+ onTranscriptResponsePart?: (text: string, final: boolean) => void;
130
+ /**
131
+ * Fired when the server encountered an error of any kind, either functional or technical.
132
+ *
133
+ * @param error description of the error that occurred.
134
+ */
71
135
  onTechnicalError?: (error: string) => void;
72
136
  }
73
137
  /**
package/dist/index.d.ts CHANGED
@@ -27,7 +27,6 @@ interface WebClientConfig {
27
27
  streamingEndpoint: string;
28
28
  key: string;
29
29
  llm: LlmType;
30
- model: string;
31
30
  }
32
31
  /**
33
32
  * System settings influencing the model behavior:
@@ -40,17 +39,24 @@ interface SessionConfig {
40
39
  instructions?: string;
41
40
  maxResponseToken?: number;
42
41
  }
42
+ /**
43
+ * Snapshot of the current settings run by the model.
44
+ */
45
+ interface SessionSettings {
46
+ tools: any[];
47
+ audio: boolean;
48
+ voice: string;
49
+ vad: {
50
+ threshold: number;
51
+ silenceMs: number;
52
+ };
53
+ temperature: number;
54
+ instructions: string;
55
+ maxResponseToken: number;
56
+ }
43
57
  /**
44
58
  * Callback functions to catch all the propagated server events.
45
59
  *
46
- * @onStreamClosed the streaming session (web socket) shut down
47
- * @onSessionConfigured received in response to a session settings update from the client
48
- * @onAudioResponseStart the LLM service is about to respond with streaming audio data
49
- * @onAudioResponseChunk a new chunk of response audio data was received
50
- * @onAudioResponseEnd the model has finished responding. Audio response has been entirely streamed
51
- * @onTranscriptInput either a copy of a text input, or the transcript of an audio input sent by the client
52
- * @onTranscriptResponse either a text response (to a text input) or the transcript of an audio response
53
- * @onTechnicalError any technical issue encountered during the stream
54
60
  *
55
61
  * @remarks
56
62
  * Un-assigned callbacks will not cause exceptions by this client when events are received from the server
@@ -61,13 +67,71 @@ interface SessionConfig {
61
67
  * - In a text exchange, they hold the actual text messages of the conversation
62
68
  */
63
69
  interface EventHandlers {
70
+ /**
71
+ * @param reason provided by the server to explain stream closure.
72
+ */
64
73
  onStreamClosed: (reason: string) => void;
65
- onSessionConfigured?: (settings: SessionConfig) => void;
74
+ /**
75
+ * Acknowledgment by the server of a settings update,
76
+ * following a "session.configure" request through configureSession()
77
+ *
78
+ * @param settings the updated settings currently in effect in the model.
79
+ */
80
+ onSessionConfigured?: (settings: SessionSettings) => void;
81
+ /**
82
+ * Fired by the server when the Model is starting to stream an audio response.
83
+ */
66
84
  onAudioResponseStart?: () => void;
85
+ /**
86
+ * New chunk of audio data from the ongoing Model audio response.
87
+ *
88
+ * @param audioChunk audio data in base 64 PCM 16, 24k Hertz.
89
+ */
67
90
  onAudioResponseChunk?: (audioChunk: string) => void;
91
+ /**
92
+ * Fired by the server when the Model is finished streaming an audio response.
93
+ */
68
94
  onAudioResponseEnd?: () => void;
69
- onTranscriptInput?: (transcript: string) => void;
70
- onTranscriptResponse?: (transcript: string) => void;
95
+ /**
96
+ * Fired after a client text or audio input.
97
+ * This event occurs when the full text input is finalised.
98
+ *
99
+ * @param text either a copy of the text input, or the transcript of the audio input.
100
+ */
101
+ onTranscriptInput?: (text: string) => void;
102
+ /**
103
+ * Fired after a client text or audio input.
104
+ * This event occurs as soon as a new section of text, part of the complete input, is available.
105
+ * The concatenated text from these events add up to the same text finally received through onTranscriptInput.
106
+ *
107
+ * @param text the next piece of text available, part of a whole input message.
108
+ * @param final is true for the last event containing the final piece of text to complete the input.
109
+ */
110
+ onTranscriptInputPart?: (text: string, final: boolean) => void;
111
+ /**
112
+ * Fired when a model response is available, either text or audio.
113
+ * This event can happen in parallel of the onAudioResponseChunk events streaming
114
+ * the corresponding audio response and before onAudioResponseEnd is received.
115
+ *
116
+ * @param text either the model's text response in a text conversation, or the transcript of its audio
117
+ * response in a voice conversation.
118
+ */
119
+ onTranscriptResponse?: (text: string) => void;
120
+ /**
121
+ * Fired when a model response is available, either text or audio.
122
+ * This event occurs as soon as a new section of text, part of the complete response, is available.
123
+ * The concatenated text from these events add up to the same text finally received through onTranscriptResponse.
124
+ *
125
+ * @param text the next piece of text available, part of a whole response message: either the model's text response
126
+ * in a text conversation, or the transcript of its audio response in a voice conversation.
127
+ * @param final is true for the last event containing the final piece of text to complete the response.
128
+ */
129
+ onTranscriptResponsePart?: (text: string, final: boolean) => void;
130
+ /**
131
+ * Fired when the server encountered an error of any kind, either functional or technical.
132
+ *
133
+ * @param error description of the error that occurred.
134
+ */
71
135
  onTechnicalError?: (error: string) => void;
72
136
  }
73
137
  /**
package/dist/index.js CHANGED
@@ -66,7 +66,7 @@ var WebClient = class {
66
66
  const response = await fetch(`${this.config.sessionEndpoint}/end_session`, {
67
67
  method: "POST",
68
68
  headers: {
69
- "Authorization": `Bearer ${this.config.key}`,
69
+ "Sim-Api-Key": `${this.config.key}`,
70
70
  "Content-Type": "application/json",
71
71
  "Accept": "application/json"
72
72
  },
@@ -122,30 +122,36 @@ var WebClient = class {
122
122
  handlers.onStreamClosed(`WebSocket closed by peer: ${event.reason}`);
123
123
  };
124
124
  ws.onmessage = async (event) => {
125
- var _a, _b, _c, _d, _e, _f, _g;
125
+ var _a, _b, _c, _d, _e, _f, _g, _h, _i;
126
126
  try {
127
- const data = JSON.parse(event.data);
128
- switch (data.type) {
127
+ const message = JSON.parse(event.data);
128
+ switch (message.type) {
129
129
  case "session.configured":
130
- (_a = handlers.onSessionConfigured) == null ? void 0 : _a.call(handlers, data);
130
+ (_a = handlers.onSessionConfigured) == null ? void 0 : _a.call(handlers, message.data);
131
131
  break;
132
132
  case "audio.response.start":
133
133
  (_b = handlers.onAudioResponseStart) == null ? void 0 : _b.call(handlers);
134
134
  break;
135
135
  case "audio.response.append":
136
- (_c = handlers.onAudioResponseChunk) == null ? void 0 : _c.call(handlers, data.data.audioData);
136
+ (_c = handlers.onAudioResponseChunk) == null ? void 0 : _c.call(handlers, message.data.audioData);
137
137
  break;
138
138
  case "audio.response.done":
139
139
  (_d = handlers.onAudioResponseEnd) == null ? void 0 : _d.call(handlers);
140
140
  break;
141
+ case "transcript.input.part":
142
+ (_e = handlers.onTranscriptInputPart) == null ? void 0 : _e.call(handlers, message.data.text, message.data.final);
143
+ break;
141
144
  case "transcript.input":
142
- (_e = handlers.onTranscriptInput) == null ? void 0 : _e.call(handlers, data.data.transcript);
145
+ (_f = handlers.onTranscriptInput) == null ? void 0 : _f.call(handlers, message.data.text);
146
+ break;
147
+ case "transcript.response.part":
148
+ (_g = handlers.onTranscriptResponsePart) == null ? void 0 : _g.call(handlers, message.data.text, message.data.final);
143
149
  break;
144
150
  case "transcript.response":
145
- (_f = handlers.onTranscriptResponse) == null ? void 0 : _f.call(handlers, data.data.transcript);
151
+ (_h = handlers.onTranscriptResponse) == null ? void 0 : _h.call(handlers, message.data.text);
146
152
  break;
147
153
  case "technical.error":
148
- (_g = handlers.onTechnicalError) == null ? void 0 : _g.call(handlers, data.data.error);
154
+ (_i = handlers.onTechnicalError) == null ? void 0 : _i.call(handlers, message.data.error);
149
155
  break;
150
156
  default:
151
157
  break;
@@ -250,15 +256,7 @@ var WebClient = class {
250
256
  const eventSubs = audio ? [0 /* Text */, 1 /* Audio */] : [0 /* Text */];
251
257
  ws.send(JSON.stringify({
252
258
  type: "connection.initiate",
253
- data: {
254
- subscription: eventSubs,
255
- settings: {
256
- audio: true,
257
- voice: "alloy",
258
- temperature: 0.8,
259
- instructions: ""
260
- }
261
- }
259
+ data: { subscription: eventSubs }
262
260
  }));
263
261
  };
264
262
  ws.onmessage = (event) => {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@simfinity/constellation-client",
3
- "version": "1.0.19",
3
+ "version": "1.0.20",
4
4
  "type": "module",
5
5
  "exports": {
6
6
  ".": {