@simfinity/constellation-client 1.0.19 → 1.0.21
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +32 -10
- package/dist/index.cjs +25 -20
- package/dist/index.d.cts +81 -13
- package/dist/index.d.ts +81 -13
- package/dist/index.js +25 -20
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -85,32 +85,51 @@ export interface WebClientConfig {
|
|
|
85
85
|
}
|
|
86
86
|
```
|
|
87
87
|
|
|
88
|
+
Model behaviour configuration: will alter how the model reacts.
|
|
89
|
+
Omitted properties will remain unchanged in the model.
|
|
90
|
+
It is theoretically possible to change these settings both at session-starting time and mid-session,
|
|
91
|
+
however some LLMs may not support the mid-session updates, thus it is advised to define them at session start.
|
|
92
|
+
```TypeScript
|
|
93
|
+
export interface SessionConfig {
|
|
94
|
+
temperature?: number;
|
|
95
|
+
instructions?: string;
|
|
96
|
+
maxResponseToken?: number;
|
|
97
|
+
}
|
|
98
|
+
```
|
|
99
|
+
|
|
88
100
|
**Event hooks**
|
|
89
101
|
|
|
90
102
|
Callback functions to catch all the propagated server events. Except for the
|
|
91
103
|
onStreamClosed event, assigning hooks is optional:
|
|
92
104
|
non-observed events will be silently ignored & lost.
|
|
105
|
+
For more details on when these events fire and how to integrate them, please refer
|
|
106
|
+
to in-code comments.
|
|
93
107
|
```TypeScript
|
|
94
108
|
export interface EventHandlers {
|
|
95
109
|
onStreamClosed: (reason: string) => void;
|
|
96
110
|
onAudioResponseStart?: () => void;
|
|
97
111
|
onAudioResponseChunk?: (audioChunk: string) => void;
|
|
98
112
|
onAudioResponseEnd?: () => void;
|
|
99
|
-
onTranscriptInput?: (
|
|
100
|
-
|
|
113
|
+
onTranscriptInput?: (text: string) => void;
|
|
114
|
+
onTranscriptInputPart?: (text: string, final: boolean) => void;
|
|
115
|
+
onTranscriptResponse?: (text: string) => void;
|
|
116
|
+
onTranscriptResponsePart?: (text: string, final: boolean) => void;
|
|
101
117
|
onTechnicalError?: (error: string) => void;
|
|
102
118
|
}
|
|
103
119
|
```
|
|
104
120
|
|
|
105
121
|
### Audio
|
|
106
122
|
|
|
107
|
-
* The server
|
|
108
|
-
* The server implements VAD - voice activation detection.
|
|
109
|
-
* Therefore, input audio data chunks can be streamed immediately without buffering
|
|
110
|
-
* Client should however implement voice detection as well to
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
123
|
+
* The server expects exclusively base64 encoded PCM16, 16k hertz audio data & sends responses in the same format in return.
|
|
124
|
+
* The server implements VAD - voice activation detection. By default, detects 1s silences as a response trigger.
|
|
125
|
+
* Therefore, client input audio data chunks can be streamed immediately without buffering.
|
|
126
|
+
* Client should however implement voice detection as well to avoid continuously streaming silence audio data
|
|
127
|
+
* and thus reduce network consumption. Suggested high level approach:
|
|
128
|
+
- 500ms ring buffer continuously filled with audio input
|
|
129
|
+
- Noise detection with minimum threshold
|
|
130
|
+
- Confirm voice is detected with consistent sound for ~250ms
|
|
131
|
+
- Start streaming audio, beginning from 250ms in the past in the ring buffer
|
|
132
|
+
|
|
114
133
|
|
|
115
134
|
### Text & Transcript
|
|
116
135
|
|
|
@@ -119,6 +138,9 @@ export interface EventHandlers {
|
|
|
119
138
|
* a mirrored transcript text through onTranscriptInput
|
|
120
139
|
* an audio response through onAudioResponseChunk
|
|
121
140
|
* a text transcript of the audio response through onTranscriptResponse
|
|
141
|
+
* onTranscriptInputPart and onTranscriptResponsePart are fired for each new piece of partial text available
|
|
122
142
|
* In a text-only session, a text input will trigger:
|
|
123
143
|
* a mirrored transcript text through onTranscriptInput
|
|
124
|
-
* a text response through the onTranscriptResponse callback
|
|
144
|
+
* a text response through the onTranscriptResponse callback
|
|
145
|
+
* onTranscriptInputPart is expected to fire only once as the input is immediately received and echoed
|
|
146
|
+
* onTranscriptResponsePart is fired as soon as a new piece of partial text from the response is available
|
package/dist/index.cjs
CHANGED
|
@@ -41,15 +41,22 @@ var WebClient = class {
|
|
|
41
41
|
*
|
|
42
42
|
* @param voiceEnabled whether this Model-session can receive & produce audio as well as text
|
|
43
43
|
* @param voiceName LLM specific voice name e.g. with OpenAI this could be 'alloy'
|
|
44
|
+
* @param behaviour model behaviour parameters. This is optional: default settings
|
|
45
|
+
* will be used if omitted and can be changed mid-session with configureSession().
|
|
46
|
+
* WARNING: some LLMs may not support mid-session updates, thus it is
|
|
47
|
+
* advised and preferred to provide them here at startSession time.
|
|
44
48
|
*
|
|
45
49
|
* @exception
|
|
46
50
|
* This method throws new Error(...) if unable to execute successfully for any reason.
|
|
47
51
|
*/
|
|
48
|
-
async startSession(voiceEnabled, voiceName) {
|
|
52
|
+
async startSession(voiceEnabled, voiceName, behaviour) {
|
|
49
53
|
const prepareBody = {
|
|
50
54
|
llmProvider: this.config.llm,
|
|
51
55
|
audioEnabled: voiceEnabled,
|
|
52
|
-
voiceName
|
|
56
|
+
voiceName,
|
|
57
|
+
temperature: behaviour == null ? void 0 : behaviour.temperature,
|
|
58
|
+
instructions: behaviour == null ? void 0 : behaviour.instructions,
|
|
59
|
+
maxResponseToken: behaviour == null ? void 0 : behaviour.maxResponseToken
|
|
53
60
|
};
|
|
54
61
|
const response = await fetch(`${this.config.sessionEndpoint}/prepare_session`, {
|
|
55
62
|
method: "POST",
|
|
@@ -92,7 +99,7 @@ var WebClient = class {
|
|
|
92
99
|
const response = await fetch(`${this.config.sessionEndpoint}/end_session`, {
|
|
93
100
|
method: "POST",
|
|
94
101
|
headers: {
|
|
95
|
-
"
|
|
102
|
+
"Sim-Api-Key": `${this.config.key}`,
|
|
96
103
|
"Content-Type": "application/json",
|
|
97
104
|
"Accept": "application/json"
|
|
98
105
|
},
|
|
@@ -148,30 +155,36 @@ var WebClient = class {
|
|
|
148
155
|
handlers.onStreamClosed(`WebSocket closed by peer: ${event.reason}`);
|
|
149
156
|
};
|
|
150
157
|
ws.onmessage = async (event) => {
|
|
151
|
-
var _a, _b, _c, _d, _e, _f, _g;
|
|
158
|
+
var _a, _b, _c, _d, _e, _f, _g, _h, _i;
|
|
152
159
|
try {
|
|
153
|
-
const
|
|
154
|
-
switch (
|
|
160
|
+
const message = JSON.parse(event.data);
|
|
161
|
+
switch (message.type) {
|
|
155
162
|
case "session.configured":
|
|
156
|
-
(_a = handlers.onSessionConfigured) == null ? void 0 : _a.call(handlers, data);
|
|
163
|
+
(_a = handlers.onSessionConfigured) == null ? void 0 : _a.call(handlers, message.data);
|
|
157
164
|
break;
|
|
158
165
|
case "audio.response.start":
|
|
159
166
|
(_b = handlers.onAudioResponseStart) == null ? void 0 : _b.call(handlers);
|
|
160
167
|
break;
|
|
161
168
|
case "audio.response.append":
|
|
162
|
-
(_c = handlers.onAudioResponseChunk) == null ? void 0 : _c.call(handlers,
|
|
169
|
+
(_c = handlers.onAudioResponseChunk) == null ? void 0 : _c.call(handlers, message.data.audioData);
|
|
163
170
|
break;
|
|
164
171
|
case "audio.response.done":
|
|
165
172
|
(_d = handlers.onAudioResponseEnd) == null ? void 0 : _d.call(handlers);
|
|
166
173
|
break;
|
|
174
|
+
case "transcript.input.part":
|
|
175
|
+
(_e = handlers.onTranscriptInputPart) == null ? void 0 : _e.call(handlers, message.data.text, message.data.final);
|
|
176
|
+
break;
|
|
167
177
|
case "transcript.input":
|
|
168
|
-
(
|
|
178
|
+
(_f = handlers.onTranscriptInput) == null ? void 0 : _f.call(handlers, message.data.text);
|
|
179
|
+
break;
|
|
180
|
+
case "transcript.response.part":
|
|
181
|
+
(_g = handlers.onTranscriptResponsePart) == null ? void 0 : _g.call(handlers, message.data.text, message.data.final);
|
|
169
182
|
break;
|
|
170
183
|
case "transcript.response":
|
|
171
|
-
(
|
|
184
|
+
(_h = handlers.onTranscriptResponse) == null ? void 0 : _h.call(handlers, message.data.text);
|
|
172
185
|
break;
|
|
173
186
|
case "technical.error":
|
|
174
|
-
(
|
|
187
|
+
(_i = handlers.onTechnicalError) == null ? void 0 : _i.call(handlers, message.data.error);
|
|
175
188
|
break;
|
|
176
189
|
default:
|
|
177
190
|
break;
|
|
@@ -276,15 +289,7 @@ var WebClient = class {
|
|
|
276
289
|
const eventSubs = audio ? [0 /* Text */, 1 /* Audio */] : [0 /* Text */];
|
|
277
290
|
ws.send(JSON.stringify({
|
|
278
291
|
type: "connection.initiate",
|
|
279
|
-
data: {
|
|
280
|
-
subscription: eventSubs,
|
|
281
|
-
settings: {
|
|
282
|
-
audio: true,
|
|
283
|
-
voice: "alloy",
|
|
284
|
-
temperature: 0.8,
|
|
285
|
-
instructions: ""
|
|
286
|
-
}
|
|
287
|
-
}
|
|
292
|
+
data: { subscription: eventSubs }
|
|
288
293
|
}));
|
|
289
294
|
};
|
|
290
295
|
ws.onmessage = (event) => {
|
package/dist/index.d.cts
CHANGED
|
@@ -27,7 +27,6 @@ interface WebClientConfig {
|
|
|
27
27
|
streamingEndpoint: string;
|
|
28
28
|
key: string;
|
|
29
29
|
llm: LlmType;
|
|
30
|
-
model: string;
|
|
31
30
|
}
|
|
32
31
|
/**
|
|
33
32
|
* System settings influencing the model behavior:
|
|
@@ -40,17 +39,24 @@ interface SessionConfig {
|
|
|
40
39
|
instructions?: string;
|
|
41
40
|
maxResponseToken?: number;
|
|
42
41
|
}
|
|
42
|
+
/**
|
|
43
|
+
* Snapshot of the current settings run by the model.
|
|
44
|
+
*/
|
|
45
|
+
interface SessionSettings {
|
|
46
|
+
tools: any[];
|
|
47
|
+
audio: boolean;
|
|
48
|
+
voice: string;
|
|
49
|
+
vad: {
|
|
50
|
+
threshold: number;
|
|
51
|
+
silenceMs: number;
|
|
52
|
+
};
|
|
53
|
+
temperature: number;
|
|
54
|
+
instructions: string;
|
|
55
|
+
maxResponseToken: number;
|
|
56
|
+
}
|
|
43
57
|
/**
|
|
44
58
|
* Callback functions to catch all the propagated server events.
|
|
45
59
|
*
|
|
46
|
-
* @onStreamClosed the streaming session (web socket) shut down
|
|
47
|
-
* @onSessionConfigured received in response to a session settings update from the client
|
|
48
|
-
* @onAudioResponseStart the LLM service is about to respond with streaming audio data
|
|
49
|
-
* @onAudioResponseChunk a new chunk of response audio data was received
|
|
50
|
-
* @onAudioResponseEnd the model has finished responding. Audio response has been entirely streamed
|
|
51
|
-
* @onTranscriptInput either a copy of a text input, or the transcript of an audio input sent by the client
|
|
52
|
-
* @onTranscriptResponse either a text response (to a text input) or the transcript of an audio response
|
|
53
|
-
* @onTechnicalError any technical issue encountered during the stream
|
|
54
60
|
*
|
|
55
61
|
* @remarks
|
|
56
62
|
* Un-assigned callbacks will not cause exceptions by this client when events are received from the server
|
|
@@ -61,13 +67,71 @@ interface SessionConfig {
|
|
|
61
67
|
* - In a text exchange, they hold the actual text messages of the conversation
|
|
62
68
|
*/
|
|
63
69
|
interface EventHandlers {
|
|
70
|
+
/**
|
|
71
|
+
* @param reason provided by the server to explain stream closure.
|
|
72
|
+
*/
|
|
64
73
|
onStreamClosed: (reason: string) => void;
|
|
65
|
-
|
|
74
|
+
/**
|
|
75
|
+
* Acknowledgment by the server of a settings update,
|
|
76
|
+
* following a "session.configure" request through configureSession()
|
|
77
|
+
*
|
|
78
|
+
* @param settings the updated settings currently in effect in the model.
|
|
79
|
+
*/
|
|
80
|
+
onSessionConfigured?: (settings: SessionSettings) => void;
|
|
81
|
+
/**
|
|
82
|
+
* Fired by the server when the Model is starting to stream an audio response.
|
|
83
|
+
*/
|
|
66
84
|
onAudioResponseStart?: () => void;
|
|
85
|
+
/**
|
|
86
|
+
* New chunk of audio data from the ongoing Model audio response.
|
|
87
|
+
*
|
|
88
|
+
* @param audioChunk audio data in base 64 PCM 16, 24k Hertz.
|
|
89
|
+
*/
|
|
67
90
|
onAudioResponseChunk?: (audioChunk: string) => void;
|
|
91
|
+
/**
|
|
92
|
+
* Fired by the server when the Model is finished streaming an audio response.
|
|
93
|
+
*/
|
|
68
94
|
onAudioResponseEnd?: () => void;
|
|
69
|
-
|
|
70
|
-
|
|
95
|
+
/**
|
|
96
|
+
* Fired after a client text or audio input.
|
|
97
|
+
* This event occurs when the full text input is finalised.
|
|
98
|
+
*
|
|
99
|
+
* @param text either a copy of the text input, or the transcript of the audio input.
|
|
100
|
+
*/
|
|
101
|
+
onTranscriptInput?: (text: string) => void;
|
|
102
|
+
/**
|
|
103
|
+
* Fired after a client text or audio input.
|
|
104
|
+
* This event occurs as soon as a new section of text, part of the complete input, is available.
|
|
105
|
+
* The concatenated text from these events add up to the same text finally received through onTranscriptInput.
|
|
106
|
+
*
|
|
107
|
+
* @param text the next piece of text available, part of a whole input message.
|
|
108
|
+
* @param final is true for the last event containing the final piece of text to complete the input.
|
|
109
|
+
*/
|
|
110
|
+
onTranscriptInputPart?: (text: string, final: boolean) => void;
|
|
111
|
+
/**
|
|
112
|
+
* Fired when a model response is available, either text or audio.
|
|
113
|
+
* This event can happen in parallel of the onAudioResponseChunk events streaming
|
|
114
|
+
* the corresponding audio response and before onAudioResponseEnd is received.
|
|
115
|
+
*
|
|
116
|
+
* @param text either the model's text response in a text conversation, or the transcript of its audio
|
|
117
|
+
* response in a voice conversation.
|
|
118
|
+
*/
|
|
119
|
+
onTranscriptResponse?: (text: string) => void;
|
|
120
|
+
/**
|
|
121
|
+
* Fired when a model response is available, either text or audio.
|
|
122
|
+
* This event occurs as soon as a new section of text, part of the complete response, is available.
|
|
123
|
+
* The concatenated text from these events add up to the same text finally received through onTranscriptResponse.
|
|
124
|
+
*
|
|
125
|
+
* @param text the next piece of text available, part of a whole response message: either the model's text response
|
|
126
|
+
* in a text conversation, or the transcript of its audio response in a voice conversation.
|
|
127
|
+
* @param final is true for the last event containing the final piece of text to complete the response.
|
|
128
|
+
*/
|
|
129
|
+
onTranscriptResponsePart?: (text: string, final: boolean) => void;
|
|
130
|
+
/**
|
|
131
|
+
* Fired when the server encountered an error of any kind, either functional or technical.
|
|
132
|
+
*
|
|
133
|
+
* @param error description of the error that occurred.
|
|
134
|
+
*/
|
|
71
135
|
onTechnicalError?: (error: string) => void;
|
|
72
136
|
}
|
|
73
137
|
/**
|
|
@@ -93,11 +157,15 @@ declare class WebClient {
|
|
|
93
157
|
*
|
|
94
158
|
* @param voiceEnabled whether this Model-session can receive & produce audio as well as text
|
|
95
159
|
* @param voiceName LLM specific voice name e.g. with OpenAI this could be 'alloy'
|
|
160
|
+
* @param behaviour model behaviour parameters. This is optional: default settings
|
|
161
|
+
* will be used if omitted and can be changed mid-session with configureSession().
|
|
162
|
+
* WARNING: some LLMs may not support mid-session updates, thus it is
|
|
163
|
+
* advised and preferred to provide them here at startSession time.
|
|
96
164
|
*
|
|
97
165
|
* @exception
|
|
98
166
|
* This method throws new Error(...) if unable to execute successfully for any reason.
|
|
99
167
|
*/
|
|
100
|
-
startSession(voiceEnabled: boolean, voiceName?: string): Promise<void>;
|
|
168
|
+
startSession(voiceEnabled: boolean, voiceName?: string, behaviour?: SessionConfig): Promise<void>;
|
|
101
169
|
/**
|
|
102
170
|
* Close an opened, persistent chat room, effectively killing the streaming as well if still opened.
|
|
103
171
|
* If there is no active session, this method does nothing.
|
package/dist/index.d.ts
CHANGED
|
@@ -27,7 +27,6 @@ interface WebClientConfig {
|
|
|
27
27
|
streamingEndpoint: string;
|
|
28
28
|
key: string;
|
|
29
29
|
llm: LlmType;
|
|
30
|
-
model: string;
|
|
31
30
|
}
|
|
32
31
|
/**
|
|
33
32
|
* System settings influencing the model behavior:
|
|
@@ -40,17 +39,24 @@ interface SessionConfig {
|
|
|
40
39
|
instructions?: string;
|
|
41
40
|
maxResponseToken?: number;
|
|
42
41
|
}
|
|
42
|
+
/**
|
|
43
|
+
* Snapshot of the current settings run by the model.
|
|
44
|
+
*/
|
|
45
|
+
interface SessionSettings {
|
|
46
|
+
tools: any[];
|
|
47
|
+
audio: boolean;
|
|
48
|
+
voice: string;
|
|
49
|
+
vad: {
|
|
50
|
+
threshold: number;
|
|
51
|
+
silenceMs: number;
|
|
52
|
+
};
|
|
53
|
+
temperature: number;
|
|
54
|
+
instructions: string;
|
|
55
|
+
maxResponseToken: number;
|
|
56
|
+
}
|
|
43
57
|
/**
|
|
44
58
|
* Callback functions to catch all the propagated server events.
|
|
45
59
|
*
|
|
46
|
-
* @onStreamClosed the streaming session (web socket) shut down
|
|
47
|
-
* @onSessionConfigured received in response to a session settings update from the client
|
|
48
|
-
* @onAudioResponseStart the LLM service is about to respond with streaming audio data
|
|
49
|
-
* @onAudioResponseChunk a new chunk of response audio data was received
|
|
50
|
-
* @onAudioResponseEnd the model has finished responding. Audio response has been entirely streamed
|
|
51
|
-
* @onTranscriptInput either a copy of a text input, or the transcript of an audio input sent by the client
|
|
52
|
-
* @onTranscriptResponse either a text response (to a text input) or the transcript of an audio response
|
|
53
|
-
* @onTechnicalError any technical issue encountered during the stream
|
|
54
60
|
*
|
|
55
61
|
* @remarks
|
|
56
62
|
* Un-assigned callbacks will not cause exceptions by this client when events are received from the server
|
|
@@ -61,13 +67,71 @@ interface SessionConfig {
|
|
|
61
67
|
* - In a text exchange, they hold the actual text messages of the conversation
|
|
62
68
|
*/
|
|
63
69
|
interface EventHandlers {
|
|
70
|
+
/**
|
|
71
|
+
* @param reason provided by the server to explain stream closure.
|
|
72
|
+
*/
|
|
64
73
|
onStreamClosed: (reason: string) => void;
|
|
65
|
-
|
|
74
|
+
/**
|
|
75
|
+
* Acknowledgment by the server of a settings update,
|
|
76
|
+
* following a "session.configure" request through configureSession()
|
|
77
|
+
*
|
|
78
|
+
* @param settings the updated settings currently in effect in the model.
|
|
79
|
+
*/
|
|
80
|
+
onSessionConfigured?: (settings: SessionSettings) => void;
|
|
81
|
+
/**
|
|
82
|
+
* Fired by the server when the Model is starting to stream an audio response.
|
|
83
|
+
*/
|
|
66
84
|
onAudioResponseStart?: () => void;
|
|
85
|
+
/**
|
|
86
|
+
* New chunk of audio data from the ongoing Model audio response.
|
|
87
|
+
*
|
|
88
|
+
* @param audioChunk audio data in base 64 PCM 16, 24k Hertz.
|
|
89
|
+
*/
|
|
67
90
|
onAudioResponseChunk?: (audioChunk: string) => void;
|
|
91
|
+
/**
|
|
92
|
+
* Fired by the server when the Model is finished streaming an audio response.
|
|
93
|
+
*/
|
|
68
94
|
onAudioResponseEnd?: () => void;
|
|
69
|
-
|
|
70
|
-
|
|
95
|
+
/**
|
|
96
|
+
* Fired after a client text or audio input.
|
|
97
|
+
* This event occurs when the full text input is finalised.
|
|
98
|
+
*
|
|
99
|
+
* @param text either a copy of the text input, or the transcript of the audio input.
|
|
100
|
+
*/
|
|
101
|
+
onTranscriptInput?: (text: string) => void;
|
|
102
|
+
/**
|
|
103
|
+
* Fired after a client text or audio input.
|
|
104
|
+
* This event occurs as soon as a new section of text, part of the complete input, is available.
|
|
105
|
+
* The concatenated text from these events add up to the same text finally received through onTranscriptInput.
|
|
106
|
+
*
|
|
107
|
+
* @param text the next piece of text available, part of a whole input message.
|
|
108
|
+
* @param final is true for the last event containing the final piece of text to complete the input.
|
|
109
|
+
*/
|
|
110
|
+
onTranscriptInputPart?: (text: string, final: boolean) => void;
|
|
111
|
+
/**
|
|
112
|
+
* Fired when a model response is available, either text or audio.
|
|
113
|
+
* This event can happen in parallel of the onAudioResponseChunk events streaming
|
|
114
|
+
* the corresponding audio response and before onAudioResponseEnd is received.
|
|
115
|
+
*
|
|
116
|
+
* @param text either the model's text response in a text conversation, or the transcript of its audio
|
|
117
|
+
* response in a voice conversation.
|
|
118
|
+
*/
|
|
119
|
+
onTranscriptResponse?: (text: string) => void;
|
|
120
|
+
/**
|
|
121
|
+
* Fired when a model response is available, either text or audio.
|
|
122
|
+
* This event occurs as soon as a new section of text, part of the complete response, is available.
|
|
123
|
+
* The concatenated text from these events add up to the same text finally received through onTranscriptResponse.
|
|
124
|
+
*
|
|
125
|
+
* @param text the next piece of text available, part of a whole response message: either the model's text response
|
|
126
|
+
* in a text conversation, or the transcript of its audio response in a voice conversation.
|
|
127
|
+
* @param final is true for the last event containing the final piece of text to complete the response.
|
|
128
|
+
*/
|
|
129
|
+
onTranscriptResponsePart?: (text: string, final: boolean) => void;
|
|
130
|
+
/**
|
|
131
|
+
* Fired when the server encountered an error of any kind, either functional or technical.
|
|
132
|
+
*
|
|
133
|
+
* @param error description of the error that occurred.
|
|
134
|
+
*/
|
|
71
135
|
onTechnicalError?: (error: string) => void;
|
|
72
136
|
}
|
|
73
137
|
/**
|
|
@@ -93,11 +157,15 @@ declare class WebClient {
|
|
|
93
157
|
*
|
|
94
158
|
* @param voiceEnabled whether this Model-session can receive & produce audio as well as text
|
|
95
159
|
* @param voiceName LLM specific voice name e.g. with OpenAI this could be 'alloy'
|
|
160
|
+
* @param behaviour model behaviour parameters. This is optional: default settings
|
|
161
|
+
* will be used if omitted and can be changed mid-session with configureSession().
|
|
162
|
+
* WARNING: some LLMs may not support mid-session updates, thus it is
|
|
163
|
+
* advised and preferred to provide them here at startSession time.
|
|
96
164
|
*
|
|
97
165
|
* @exception
|
|
98
166
|
* This method throws new Error(...) if unable to execute successfully for any reason.
|
|
99
167
|
*/
|
|
100
|
-
startSession(voiceEnabled: boolean, voiceName?: string): Promise<void>;
|
|
168
|
+
startSession(voiceEnabled: boolean, voiceName?: string, behaviour?: SessionConfig): Promise<void>;
|
|
101
169
|
/**
|
|
102
170
|
* Close an opened, persistent chat room, effectively killing the streaming as well if still opened.
|
|
103
171
|
* If there is no active session, this method does nothing.
|
package/dist/index.js
CHANGED
|
@@ -15,15 +15,22 @@ var WebClient = class {
|
|
|
15
15
|
*
|
|
16
16
|
* @param voiceEnabled whether this Model-session can receive & produce audio as well as text
|
|
17
17
|
* @param voiceName LLM specific voice name e.g. with OpenAI this could be 'alloy'
|
|
18
|
+
* @param behaviour model behaviour parameters. This is optional: default settings
|
|
19
|
+
* will be used if omitted and can be changed mid-session with configureSession().
|
|
20
|
+
* WARNING: some LLMs may not support mid-session updates, thus it is
|
|
21
|
+
* advised and preferred to provide them here at startSession time.
|
|
18
22
|
*
|
|
19
23
|
* @exception
|
|
20
24
|
* This method throws new Error(...) if unable to execute successfully for any reason.
|
|
21
25
|
*/
|
|
22
|
-
async startSession(voiceEnabled, voiceName) {
|
|
26
|
+
async startSession(voiceEnabled, voiceName, behaviour) {
|
|
23
27
|
const prepareBody = {
|
|
24
28
|
llmProvider: this.config.llm,
|
|
25
29
|
audioEnabled: voiceEnabled,
|
|
26
|
-
voiceName
|
|
30
|
+
voiceName,
|
|
31
|
+
temperature: behaviour == null ? void 0 : behaviour.temperature,
|
|
32
|
+
instructions: behaviour == null ? void 0 : behaviour.instructions,
|
|
33
|
+
maxResponseToken: behaviour == null ? void 0 : behaviour.maxResponseToken
|
|
27
34
|
};
|
|
28
35
|
const response = await fetch(`${this.config.sessionEndpoint}/prepare_session`, {
|
|
29
36
|
method: "POST",
|
|
@@ -66,7 +73,7 @@ var WebClient = class {
|
|
|
66
73
|
const response = await fetch(`${this.config.sessionEndpoint}/end_session`, {
|
|
67
74
|
method: "POST",
|
|
68
75
|
headers: {
|
|
69
|
-
"
|
|
76
|
+
"Sim-Api-Key": `${this.config.key}`,
|
|
70
77
|
"Content-Type": "application/json",
|
|
71
78
|
"Accept": "application/json"
|
|
72
79
|
},
|
|
@@ -122,30 +129,36 @@ var WebClient = class {
|
|
|
122
129
|
handlers.onStreamClosed(`WebSocket closed by peer: ${event.reason}`);
|
|
123
130
|
};
|
|
124
131
|
ws.onmessage = async (event) => {
|
|
125
|
-
var _a, _b, _c, _d, _e, _f, _g;
|
|
132
|
+
var _a, _b, _c, _d, _e, _f, _g, _h, _i;
|
|
126
133
|
try {
|
|
127
|
-
const
|
|
128
|
-
switch (
|
|
134
|
+
const message = JSON.parse(event.data);
|
|
135
|
+
switch (message.type) {
|
|
129
136
|
case "session.configured":
|
|
130
|
-
(_a = handlers.onSessionConfigured) == null ? void 0 : _a.call(handlers, data);
|
|
137
|
+
(_a = handlers.onSessionConfigured) == null ? void 0 : _a.call(handlers, message.data);
|
|
131
138
|
break;
|
|
132
139
|
case "audio.response.start":
|
|
133
140
|
(_b = handlers.onAudioResponseStart) == null ? void 0 : _b.call(handlers);
|
|
134
141
|
break;
|
|
135
142
|
case "audio.response.append":
|
|
136
|
-
(_c = handlers.onAudioResponseChunk) == null ? void 0 : _c.call(handlers,
|
|
143
|
+
(_c = handlers.onAudioResponseChunk) == null ? void 0 : _c.call(handlers, message.data.audioData);
|
|
137
144
|
break;
|
|
138
145
|
case "audio.response.done":
|
|
139
146
|
(_d = handlers.onAudioResponseEnd) == null ? void 0 : _d.call(handlers);
|
|
140
147
|
break;
|
|
148
|
+
case "transcript.input.part":
|
|
149
|
+
(_e = handlers.onTranscriptInputPart) == null ? void 0 : _e.call(handlers, message.data.text, message.data.final);
|
|
150
|
+
break;
|
|
141
151
|
case "transcript.input":
|
|
142
|
-
(
|
|
152
|
+
(_f = handlers.onTranscriptInput) == null ? void 0 : _f.call(handlers, message.data.text);
|
|
153
|
+
break;
|
|
154
|
+
case "transcript.response.part":
|
|
155
|
+
(_g = handlers.onTranscriptResponsePart) == null ? void 0 : _g.call(handlers, message.data.text, message.data.final);
|
|
143
156
|
break;
|
|
144
157
|
case "transcript.response":
|
|
145
|
-
(
|
|
158
|
+
(_h = handlers.onTranscriptResponse) == null ? void 0 : _h.call(handlers, message.data.text);
|
|
146
159
|
break;
|
|
147
160
|
case "technical.error":
|
|
148
|
-
(
|
|
161
|
+
(_i = handlers.onTechnicalError) == null ? void 0 : _i.call(handlers, message.data.error);
|
|
149
162
|
break;
|
|
150
163
|
default:
|
|
151
164
|
break;
|
|
@@ -250,15 +263,7 @@ var WebClient = class {
|
|
|
250
263
|
const eventSubs = audio ? [0 /* Text */, 1 /* Audio */] : [0 /* Text */];
|
|
251
264
|
ws.send(JSON.stringify({
|
|
252
265
|
type: "connection.initiate",
|
|
253
|
-
data: {
|
|
254
|
-
subscription: eventSubs,
|
|
255
|
-
settings: {
|
|
256
|
-
audio: true,
|
|
257
|
-
voice: "alloy",
|
|
258
|
-
temperature: 0.8,
|
|
259
|
-
instructions: ""
|
|
260
|
-
}
|
|
261
|
-
}
|
|
266
|
+
data: { subscription: eventSubs }
|
|
262
267
|
}));
|
|
263
268
|
};
|
|
264
269
|
ws.onmessage = (event) => {
|