npm - @simfinity/constellation-client - Versions diffs - 1.0.19 → 1.0.20 - Mend

@simfinity/constellation-client 1.0.19 → 1.0.20

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

package/README.md CHANGED Viewed

@@ -85,32 +85,51 @@ export interface WebClientConfig {
 }
 ```
+Model behaviour configuration: will alter how the model reacts.
+Omitted properties will remain unchanged in the model.
+It is theoretically possible to change these settings both at session-starting time and mid-session,
+however some LLMs may not support the mid-session updates, thus it is advised to define them at session start.
+```TypeScript
+export interface SessionConfig {
+  temperature?: number;
+  instructions?: string;
+  maxResponseToken?: number;
+}
+```
 **Event hooks**
 Callback functions to catch all the propagated server events. Except for the
 onStreamClosed event, assigning hooks is optional:
 non-observed events will be silently ignored & lost.
+For more details on when these events fire and how to integrate them, please refer
+to in-code comments.
 ```TypeScript
 export interface EventHandlers {
      onStreamClosed: (reason: string) => void;
      onAudioResponseStart?: () => void;
      onAudioResponseChunk?: (audioChunk: string) => void;
      onAudioResponseEnd?: () => void;
-     onTranscriptInput?: (transcript: string) => void;
-     onTranscriptResponse?: (transcript: string) => void;
+     onTranscriptInput?: (text: string) => void;
+     onTranscriptInputPart?: (text: string, final: boolean) => void;
+     onTranscriptResponse?: (text: string) => void;
+     onTranscriptResponsePart?: (text: string, final: boolean) => void;
      onTechnicalError?: (error: string) => void;
 }
 ```
 ### Audio
-* The server expect exclusively base64 encoded PCM16 format & sends responses of the same format in return
-* The server implements VAD - voice activation detection. Configured to detect 1s silences as a response trigger
-* Therefore, input audio data chunks can be streamed immediately without buffering
-* Client should however implement voice detection as well to reduce network consumption
-  * 500ms ring buffer continuously filled with audio input
-  * Noise detection with minimum threshold
-  *
+* The server expects exclusively base64 encoded PCM16, 16k hertz audio data & sends responses in the same format in return.
+* The server implements VAD - voice activation detection. By default, detects 1s silences as a response trigger.
+* Therefore, client input audio data chunks can be streamed immediately without buffering.
+* Client should however implement voice detection as well to avoid continuously streaming silence audio data
+* and thus reduce network consumption. Suggested high level approach:
+   - 500ms ring buffer continuously filled with audio input
+   - Noise detection with minimum threshold
+   - Confirm voice is detected with consistent sound for ~250ms
+   - Start streaming audio, beginning from 250ms in the past in the ring buffer
 ### Text & Transcript
@@ -119,6 +138,9 @@ export interface EventHandlers {
   * a mirrored transcript text through onTranscriptInput
   * an audio response through onAudioResponseChunk
   * a text transcript of the audio response through onTranscriptResponse
+  * onTranscriptInputPart and onTranscriptResponsePart are fired for each new piece of partial text available
 * In a text-only session, a text input will trigger:
   * a mirrored transcript text through onTranscriptInput
-  * a text response through the onTranscriptResponse callback
+  * a text response through the onTranscriptResponse callback
+  * onTranscriptInputPart is expected to fire only once as the input is immediately received and echoed
+  * onTranscriptResponsePart is fired as soon as a new piece of partial text from the response is available

package/dist/index.cjs CHANGED Viewed

@@ -92,7 +92,7 @@ var WebClient = class {
     const response = await fetch(`${this.config.sessionEndpoint}/end_session`, {
       method: "POST",
       headers: {
-        "Authorization": `Bearer ${this.config.key}`,
+        "Sim-Api-Key": `${this.config.key}`,
         "Content-Type": "application/json",
         "Accept": "application/json"
       },
@@ -148,30 +148,36 @@ var WebClient = class {
       handlers.onStreamClosed(`WebSocket closed by peer: ${event.reason}`);
     };
     ws.onmessage = async (event) => {
-      var _a, _b, _c, _d, _e, _f, _g;
+      var _a, _b, _c, _d, _e, _f, _g, _h, _i;
       try {
-        const data = JSON.parse(event.data);
-        switch (data.type) {
+        const message = JSON.parse(event.data);
+        switch (message.type) {
           case "session.configured":
-            (_a = handlers.onSessionConfigured) == null ? void 0 : _a.call(handlers, data);
+            (_a = handlers.onSessionConfigured) == null ? void 0 : _a.call(handlers, message.data);
             break;
           case "audio.response.start":
             (_b = handlers.onAudioResponseStart) == null ? void 0 : _b.call(handlers);
             break;
           case "audio.response.append":
-            (_c = handlers.onAudioResponseChunk) == null ? void 0 : _c.call(handlers, data.data.audioData);
+            (_c = handlers.onAudioResponseChunk) == null ? void 0 : _c.call(handlers, message.data.audioData);
             break;
           case "audio.response.done":
             (_d = handlers.onAudioResponseEnd) == null ? void 0 : _d.call(handlers);
             break;
+          case "transcript.input.part":
+            (_e = handlers.onTranscriptInputPart) == null ? void 0 : _e.call(handlers, message.data.text, message.data.final);
+            break;
           case "transcript.input":
-            (_e = handlers.onTranscriptInput) == null ? void 0 : _e.call(handlers, data.data.transcript);
+            (_f = handlers.onTranscriptInput) == null ? void 0 : _f.call(handlers, message.data.text);
+            break;
+          case "transcript.response.part":
+            (_g = handlers.onTranscriptResponsePart) == null ? void 0 : _g.call(handlers, message.data.text, message.data.final);
             break;
           case "transcript.response":
-            (_f = handlers.onTranscriptResponse) == null ? void 0 : _f.call(handlers, data.data.transcript);
+            (_h = handlers.onTranscriptResponse) == null ? void 0 : _h.call(handlers, message.data.text);
             break;
           case "technical.error":
-            (_g = handlers.onTechnicalError) == null ? void 0 : _g.call(handlers, data.data.error);
+            (_i = handlers.onTechnicalError) == null ? void 0 : _i.call(handlers, message.data.error);
             break;
           default:
             break;
@@ -276,15 +282,7 @@ var WebClient = class {
         const eventSubs = audio ? [0 /* Text */, 1 /* Audio */] : [0 /* Text */];
         ws.send(JSON.stringify({
           type: "connection.initiate",
-          data: {
-            subscription: eventSubs,
-            settings: {
-              audio: true,
-              voice: "alloy",
-              temperature: 0.8,
-              instructions: ""
-            }
-          }
+          data: { subscription: eventSubs }
         }));
       };
       ws.onmessage = (event) => {

package/dist/index.d.cts CHANGED Viewed

@@ -27,7 +27,6 @@ interface WebClientConfig {
     streamingEndpoint: string;
     key: string;
     llm: LlmType;
-    model: string;
 }
 /**
  * System settings influencing the model behavior:
@@ -40,17 +39,24 @@ interface SessionConfig {
     instructions?: string;
     maxResponseToken?: number;
 }
+/**
+ * Snapshot of the current settings run by the model.
+ */
+interface SessionSettings {
+    tools: any[];
+    audio: boolean;
+    voice: string;
+    vad: {
+        threshold: number;
+        silenceMs: number;
+    };
+    temperature: number;
+    instructions: string;
+    maxResponseToken: number;
+}
 /**
  * Callback functions to catch all the propagated server events.
  *
- * @onStreamClosed the streaming session (web socket) shut down
- * @onSessionConfigured received in response to a session settings update from the client
- * @onAudioResponseStart the LLM service is about to respond with streaming audio data
- * @onAudioResponseChunk a new chunk of response audio data was received
- * @onAudioResponseEnd the model has finished responding. Audio response has been entirely streamed
- * @onTranscriptInput either a copy of a text input, or the transcript of an audio input sent by the client
- * @onTranscriptResponse either a text response (to a text input) or the transcript of an audio response
- * @onTechnicalError any technical issue encountered during the stream
  *
  * @remarks
  * Un-assigned callbacks will not cause exceptions by this client when events are received from the server
@@ -61,13 +67,71 @@ interface SessionConfig {
  *  - In a text exchange, they hold the actual text messages of the conversation
  */
 interface EventHandlers {
+    /**
+     * @param reason provided by the server to explain stream closure.
+     */
     onStreamClosed: (reason: string) => void;
-    onSessionConfigured?: (settings: SessionConfig) => void;
+    /**
+     * Acknowledgment by the server of a settings update,
+     * following a "session.configure" request through configureSession()
+     *
+     * @param settings the updated settings currently in effect in the model.
+     */
+    onSessionConfigured?: (settings: SessionSettings) => void;
+    /**
+     * Fired by the server when the Model is starting to stream an audio response.
+     */
     onAudioResponseStart?: () => void;
+    /**
+     * New chunk of audio data from the ongoing Model audio response.
+     *
+     * @param audioChunk audio data in base 64 PCM 16, 24k Hertz.
+     */
     onAudioResponseChunk?: (audioChunk: string) => void;
+    /**
+     * Fired by the server when the Model is finished streaming an audio response.
+     */
     onAudioResponseEnd?: () => void;
-    onTranscriptInput?: (transcript: string) => void;
-    onTranscriptResponse?: (transcript: string) => void;
+    /**
+     * Fired after a client text or audio input.
+     * This event occurs when the full text input is finalised.
+     *
+     * @param text either a copy of the text input, or the transcript of the audio input.
+     */
+    onTranscriptInput?: (text: string) => void;
+    /**
+     * Fired after a client text or audio input.
+     * This event occurs as soon as a new section of text, part of the complete input, is available.
+     * The concatenated text from these events add up to the same text finally received through onTranscriptInput.
+     *
+     * @param text the next piece of text available, part of a whole input message.
+     * @param final is true for the last event containing the final piece of text to complete the input.
+     */
+    onTranscriptInputPart?: (text: string, final: boolean) => void;
+    /**
+     * Fired when a model response is available, either text or audio.
+     * This event can happen in parallel of the onAudioResponseChunk events streaming
+     * the corresponding audio response and before onAudioResponseEnd is received.
+     *
+     * @param text either the model's text response in a text conversation, or the transcript of its audio
+     *             response in a voice conversation.
+     */
+    onTranscriptResponse?: (text: string) => void;
+    /**
+     * Fired when a model response is available, either text or audio.
+     * This event occurs as soon as a new section of text, part of the complete response, is available.
+     * The concatenated text from these events add up to the same text finally received through onTranscriptResponse.
+     *
+     * @param text the next piece of text available, part of a whole response message: either the model's text response
+     *             in a text conversation, or the transcript of its audio response in a voice conversation.
+     * @param final is true for the last event containing the final piece of text to complete the response.
+     */
+    onTranscriptResponsePart?: (text: string, final: boolean) => void;
+    /**
+     * Fired when the server encountered an error of any kind, either functional or technical.
+     *
+     * @param error description of the error that occurred.
+     */
     onTechnicalError?: (error: string) => void;
 }
 /**

package/dist/index.d.ts CHANGED Viewed

@@ -27,7 +27,6 @@ interface WebClientConfig {
     streamingEndpoint: string;
     key: string;
     llm: LlmType;
-    model: string;
 }
 /**
  * System settings influencing the model behavior:
@@ -40,17 +39,24 @@ interface SessionConfig {
     instructions?: string;
     maxResponseToken?: number;
 }
+/**
+ * Snapshot of the current settings run by the model.
+ */
+interface SessionSettings {
+    tools: any[];
+    audio: boolean;
+    voice: string;
+    vad: {
+        threshold: number;
+        silenceMs: number;
+    };
+    temperature: number;
+    instructions: string;
+    maxResponseToken: number;
+}
 /**
  * Callback functions to catch all the propagated server events.
  *
- * @onStreamClosed the streaming session (web socket) shut down
- * @onSessionConfigured received in response to a session settings update from the client
- * @onAudioResponseStart the LLM service is about to respond with streaming audio data
- * @onAudioResponseChunk a new chunk of response audio data was received
- * @onAudioResponseEnd the model has finished responding. Audio response has been entirely streamed
- * @onTranscriptInput either a copy of a text input, or the transcript of an audio input sent by the client
- * @onTranscriptResponse either a text response (to a text input) or the transcript of an audio response
- * @onTechnicalError any technical issue encountered during the stream
  *
  * @remarks
  * Un-assigned callbacks will not cause exceptions by this client when events are received from the server
@@ -61,13 +67,71 @@ interface SessionConfig {
  *  - In a text exchange, they hold the actual text messages of the conversation
  */
 interface EventHandlers {
+    /**
+     * @param reason provided by the server to explain stream closure.
+     */
     onStreamClosed: (reason: string) => void;
-    onSessionConfigured?: (settings: SessionConfig) => void;
+    /**
+     * Acknowledgment by the server of a settings update,
+     * following a "session.configure" request through configureSession()
+     *
+     * @param settings the updated settings currently in effect in the model.
+     */
+    onSessionConfigured?: (settings: SessionSettings) => void;
+    /**
+     * Fired by the server when the Model is starting to stream an audio response.
+     */
     onAudioResponseStart?: () => void;
+    /**
+     * New chunk of audio data from the ongoing Model audio response.
+     *
+     * @param audioChunk audio data in base 64 PCM 16, 24k Hertz.
+     */
     onAudioResponseChunk?: (audioChunk: string) => void;
+    /**
+     * Fired by the server when the Model is finished streaming an audio response.
+     */
     onAudioResponseEnd?: () => void;
-    onTranscriptInput?: (transcript: string) => void;
-    onTranscriptResponse?: (transcript: string) => void;
+    /**
+     * Fired after a client text or audio input.
+     * This event occurs when the full text input is finalised.
+     *
+     * @param text either a copy of the text input, or the transcript of the audio input.
+     */
+    onTranscriptInput?: (text: string) => void;
+    /**
+     * Fired after a client text or audio input.
+     * This event occurs as soon as a new section of text, part of the complete input, is available.
+     * The concatenated text from these events add up to the same text finally received through onTranscriptInput.
+     *
+     * @param text the next piece of text available, part of a whole input message.
+     * @param final is true for the last event containing the final piece of text to complete the input.
+     */
+    onTranscriptInputPart?: (text: string, final: boolean) => void;
+    /**
+     * Fired when a model response is available, either text or audio.
+     * This event can happen in parallel of the onAudioResponseChunk events streaming
+     * the corresponding audio response and before onAudioResponseEnd is received.
+     *
+     * @param text either the model's text response in a text conversation, or the transcript of its audio
+     *             response in a voice conversation.
+     */
+    onTranscriptResponse?: (text: string) => void;
+    /**
+     * Fired when a model response is available, either text or audio.
+     * This event occurs as soon as a new section of text, part of the complete response, is available.
+     * The concatenated text from these events add up to the same text finally received through onTranscriptResponse.
+     *
+     * @param text the next piece of text available, part of a whole response message: either the model's text response
+     *             in a text conversation, or the transcript of its audio response in a voice conversation.
+     * @param final is true for the last event containing the final piece of text to complete the response.
+     */
+    onTranscriptResponsePart?: (text: string, final: boolean) => void;
+    /**
+     * Fired when the server encountered an error of any kind, either functional or technical.
+     *
+     * @param error description of the error that occurred.
+     */
     onTechnicalError?: (error: string) => void;
 }
 /**

package/dist/index.js CHANGED Viewed

@@ -66,7 +66,7 @@ var WebClient = class {
     const response = await fetch(`${this.config.sessionEndpoint}/end_session`, {
       method: "POST",
       headers: {
-        "Authorization": `Bearer ${this.config.key}`,
+        "Sim-Api-Key": `${this.config.key}`,
         "Content-Type": "application/json",
         "Accept": "application/json"
       },
@@ -122,30 +122,36 @@ var WebClient = class {
       handlers.onStreamClosed(`WebSocket closed by peer: ${event.reason}`);
     };
     ws.onmessage = async (event) => {
-      var _a, _b, _c, _d, _e, _f, _g;
+      var _a, _b, _c, _d, _e, _f, _g, _h, _i;
       try {
-        const data = JSON.parse(event.data);
-        switch (data.type) {
+        const message = JSON.parse(event.data);
+        switch (message.type) {
           case "session.configured":
-            (_a = handlers.onSessionConfigured) == null ? void 0 : _a.call(handlers, data);
+            (_a = handlers.onSessionConfigured) == null ? void 0 : _a.call(handlers, message.data);
             break;
           case "audio.response.start":
             (_b = handlers.onAudioResponseStart) == null ? void 0 : _b.call(handlers);
             break;
           case "audio.response.append":
-            (_c = handlers.onAudioResponseChunk) == null ? void 0 : _c.call(handlers, data.data.audioData);
+            (_c = handlers.onAudioResponseChunk) == null ? void 0 : _c.call(handlers, message.data.audioData);
             break;
           case "audio.response.done":
             (_d = handlers.onAudioResponseEnd) == null ? void 0 : _d.call(handlers);
             break;
+          case "transcript.input.part":
+            (_e = handlers.onTranscriptInputPart) == null ? void 0 : _e.call(handlers, message.data.text, message.data.final);
+            break;
           case "transcript.input":
-            (_e = handlers.onTranscriptInput) == null ? void 0 : _e.call(handlers, data.data.transcript);
+            (_f = handlers.onTranscriptInput) == null ? void 0 : _f.call(handlers, message.data.text);
+            break;
+          case "transcript.response.part":
+            (_g = handlers.onTranscriptResponsePart) == null ? void 0 : _g.call(handlers, message.data.text, message.data.final);
             break;
           case "transcript.response":
-            (_f = handlers.onTranscriptResponse) == null ? void 0 : _f.call(handlers, data.data.transcript);
+            (_h = handlers.onTranscriptResponse) == null ? void 0 : _h.call(handlers, message.data.text);
             break;
           case "technical.error":
-            (_g = handlers.onTechnicalError) == null ? void 0 : _g.call(handlers, data.data.error);
+            (_i = handlers.onTechnicalError) == null ? void 0 : _i.call(handlers, message.data.error);
             break;
           default:
             break;
@@ -250,15 +256,7 @@ var WebClient = class {
         const eventSubs = audio ? [0 /* Text */, 1 /* Audio */] : [0 /* Text */];
         ws.send(JSON.stringify({
           type: "connection.initiate",
-          data: {
-            subscription: eventSubs,
-            settings: {
-              audio: true,
-              voice: "alloy",
-              temperature: 0.8,
-              instructions: ""
-            }
-          }
+          data: { subscription: eventSubs }
         }));
       };
       ws.onmessage = (event) => {

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@simfinity/constellation-client",
-  "version": "1.0.19",
+  "version": "1.0.20",
   "type": "module",
   "exports": {
     ".": {