@simfinity/constellation-client 1.0.18 → 1.0.20
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +32 -10
- package/dist/index.cjs +18 -19
- package/dist/index.d.cts +77 -13
- package/dist/index.d.ts +77 -13
- package/dist/index.js +18 -19
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -85,32 +85,51 @@ export interface WebClientConfig {
|
|
|
85
85
|
}
|
|
86
86
|
```
|
|
87
87
|
|
|
88
|
+
Model behaviour configuration: will alter how the model reacts.
|
|
89
|
+
Omitted properties will remain unchanged in the model.
|
|
90
|
+
It is theoretically possible to change these settings both at session-starting time and mid-session,
|
|
91
|
+
however some LLMs may not support the mid-session updates, thus it is advised to define them at session start.
|
|
92
|
+
```TypeScript
|
|
93
|
+
export interface SessionConfig {
|
|
94
|
+
temperature?: number;
|
|
95
|
+
instructions?: string;
|
|
96
|
+
maxResponseToken?: number;
|
|
97
|
+
}
|
|
98
|
+
```
|
|
99
|
+
|
|
88
100
|
**Event hooks**
|
|
89
101
|
|
|
90
102
|
Callback functions to catch all the propagated server events. Except for the
|
|
91
103
|
onStreamClosed event, assigning hooks is optional:
|
|
92
104
|
non-observed events will be silently ignored & lost.
|
|
105
|
+
For more details on when these events fire and how to integrate them, please refer
|
|
106
|
+
to in-code comments.
|
|
93
107
|
```TypeScript
|
|
94
108
|
export interface EventHandlers {
|
|
95
109
|
onStreamClosed: (reason: string) => void;
|
|
96
110
|
onAudioResponseStart?: () => void;
|
|
97
111
|
onAudioResponseChunk?: (audioChunk: string) => void;
|
|
98
112
|
onAudioResponseEnd?: () => void;
|
|
99
|
-
onTranscriptInput?: (
|
|
100
|
-
|
|
113
|
+
onTranscriptInput?: (text: string) => void;
|
|
114
|
+
onTranscriptInputPart?: (text: string, final: boolean) => void;
|
|
115
|
+
onTranscriptResponse?: (text: string) => void;
|
|
116
|
+
onTranscriptResponsePart?: (text: string, final: boolean) => void;
|
|
101
117
|
onTechnicalError?: (error: string) => void;
|
|
102
118
|
}
|
|
103
119
|
```
|
|
104
120
|
|
|
105
121
|
### Audio
|
|
106
122
|
|
|
107
|
-
* The server
|
|
108
|
-
* The server implements VAD - voice activation detection.
|
|
109
|
-
* Therefore, input audio data chunks can be streamed immediately without buffering
|
|
110
|
-
* Client should however implement voice detection as well to
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
123
|
+
* The server expects exclusively base64 encoded PCM16, 16k hertz audio data & sends responses in the same format in return.
|
|
124
|
+
* The server implements VAD - voice activation detection. By default, detects 1s silences as a response trigger.
|
|
125
|
+
* Therefore, client input audio data chunks can be streamed immediately without buffering.
|
|
126
|
+
* Client should however implement voice detection as well to avoid continuously streaming silence audio data
|
|
127
|
+
* and thus reduce network consumption. Suggested high level approach:
|
|
128
|
+
- 500ms ring buffer continuously filled with audio input
|
|
129
|
+
- Noise detection with minimum threshold
|
|
130
|
+
- Confirm voice is detected with consistent sound for ~250ms
|
|
131
|
+
- Start streaming audio, beginning from 250ms in the past in the ring buffer
|
|
132
|
+
|
|
114
133
|
|
|
115
134
|
### Text & Transcript
|
|
116
135
|
|
|
@@ -119,6 +138,9 @@ export interface EventHandlers {
|
|
|
119
138
|
* a mirrored transcript text through onTranscriptInput
|
|
120
139
|
* an audio response through onAudioResponseChunk
|
|
121
140
|
* a text transcript of the audio response through onTranscriptResponse
|
|
141
|
+
* onTranscriptInputPart and onTranscriptResponsePart are fired for each new piece of partial text available
|
|
122
142
|
* In a text-only session, a text input will trigger:
|
|
123
143
|
* a mirrored transcript text through onTranscriptInput
|
|
124
|
-
* a text response through the onTranscriptResponse callback
|
|
144
|
+
* a text response through the onTranscriptResponse callback
|
|
145
|
+
* onTranscriptInputPart is expected to fire only once as the input is immediately received and echoed
|
|
146
|
+
* onTranscriptResponsePart is fired as soon as a new piece of partial text from the response is available
|
package/dist/index.cjs
CHANGED
|
@@ -92,7 +92,7 @@ var WebClient = class {
|
|
|
92
92
|
const response = await fetch(`${this.config.sessionEndpoint}/end_session`, {
|
|
93
93
|
method: "POST",
|
|
94
94
|
headers: {
|
|
95
|
-
"
|
|
95
|
+
"Sim-Api-Key": `${this.config.key}`,
|
|
96
96
|
"Content-Type": "application/json",
|
|
97
97
|
"Accept": "application/json"
|
|
98
98
|
},
|
|
@@ -148,30 +148,36 @@ var WebClient = class {
|
|
|
148
148
|
handlers.onStreamClosed(`WebSocket closed by peer: ${event.reason}`);
|
|
149
149
|
};
|
|
150
150
|
ws.onmessage = async (event) => {
|
|
151
|
-
var _a, _b, _c, _d, _e, _f, _g;
|
|
151
|
+
var _a, _b, _c, _d, _e, _f, _g, _h, _i;
|
|
152
152
|
try {
|
|
153
|
-
const
|
|
154
|
-
switch (
|
|
153
|
+
const message = JSON.parse(event.data);
|
|
154
|
+
switch (message.type) {
|
|
155
155
|
case "session.configured":
|
|
156
|
-
(_a = handlers.onSessionConfigured) == null ? void 0 : _a.call(handlers, data);
|
|
156
|
+
(_a = handlers.onSessionConfigured) == null ? void 0 : _a.call(handlers, message.data);
|
|
157
157
|
break;
|
|
158
158
|
case "audio.response.start":
|
|
159
159
|
(_b = handlers.onAudioResponseStart) == null ? void 0 : _b.call(handlers);
|
|
160
160
|
break;
|
|
161
161
|
case "audio.response.append":
|
|
162
|
-
(_c = handlers.onAudioResponseChunk) == null ? void 0 : _c.call(handlers,
|
|
162
|
+
(_c = handlers.onAudioResponseChunk) == null ? void 0 : _c.call(handlers, message.data.audioData);
|
|
163
163
|
break;
|
|
164
164
|
case "audio.response.done":
|
|
165
165
|
(_d = handlers.onAudioResponseEnd) == null ? void 0 : _d.call(handlers);
|
|
166
166
|
break;
|
|
167
|
+
case "transcript.input.part":
|
|
168
|
+
(_e = handlers.onTranscriptInputPart) == null ? void 0 : _e.call(handlers, message.data.text, message.data.final);
|
|
169
|
+
break;
|
|
167
170
|
case "transcript.input":
|
|
168
|
-
(
|
|
171
|
+
(_f = handlers.onTranscriptInput) == null ? void 0 : _f.call(handlers, message.data.text);
|
|
172
|
+
break;
|
|
173
|
+
case "transcript.response.part":
|
|
174
|
+
(_g = handlers.onTranscriptResponsePart) == null ? void 0 : _g.call(handlers, message.data.text, message.data.final);
|
|
169
175
|
break;
|
|
170
176
|
case "transcript.response":
|
|
171
|
-
(
|
|
177
|
+
(_h = handlers.onTranscriptResponse) == null ? void 0 : _h.call(handlers, message.data.text);
|
|
172
178
|
break;
|
|
173
179
|
case "technical.error":
|
|
174
|
-
(
|
|
180
|
+
(_i = handlers.onTechnicalError) == null ? void 0 : _i.call(handlers, message.data.error);
|
|
175
181
|
break;
|
|
176
182
|
default:
|
|
177
183
|
break;
|
|
@@ -276,15 +282,7 @@ var WebClient = class {
|
|
|
276
282
|
const eventSubs = audio ? [0 /* Text */, 1 /* Audio */] : [0 /* Text */];
|
|
277
283
|
ws.send(JSON.stringify({
|
|
278
284
|
type: "connection.initiate",
|
|
279
|
-
data: {
|
|
280
|
-
subscription: eventSubs,
|
|
281
|
-
settings: {
|
|
282
|
-
audio: true,
|
|
283
|
-
voice: "alloy",
|
|
284
|
-
temperature: 0.8,
|
|
285
|
-
instructions: ""
|
|
286
|
-
}
|
|
287
|
-
}
|
|
285
|
+
data: { subscription: eventSubs }
|
|
288
286
|
}));
|
|
289
287
|
};
|
|
290
288
|
ws.onmessage = (event) => {
|
|
@@ -303,8 +301,9 @@ var WebClient = class {
|
|
|
303
301
|
});
|
|
304
302
|
}
|
|
305
303
|
send(type, data = null) {
|
|
304
|
+
var _a;
|
|
306
305
|
if (!this.ws || this.ws.readyState != WebSocket.OPEN) {
|
|
307
|
-
throw new Error(
|
|
306
|
+
throw new Error(`Stream is not opened. State=${(_a = this.ws) == null ? void 0 : _a.readyState}`);
|
|
308
307
|
}
|
|
309
308
|
this.ws.send(JSON.stringify({ type, ...data && { data } }));
|
|
310
309
|
}
|
package/dist/index.d.cts
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* Available server-side LLM types
|
|
3
3
|
*/
|
|
4
|
-
type LlmType = "openai";
|
|
4
|
+
type LlmType = "openai" | "gemini";
|
|
5
5
|
/**
|
|
6
6
|
* Configuration required to initiate a connection with the stream server:
|
|
7
7
|
*
|
|
@@ -27,7 +27,6 @@ interface WebClientConfig {
|
|
|
27
27
|
streamingEndpoint: string;
|
|
28
28
|
key: string;
|
|
29
29
|
llm: LlmType;
|
|
30
|
-
model: string;
|
|
31
30
|
}
|
|
32
31
|
/**
|
|
33
32
|
* System settings influencing the model behavior:
|
|
@@ -40,17 +39,24 @@ interface SessionConfig {
|
|
|
40
39
|
instructions?: string;
|
|
41
40
|
maxResponseToken?: number;
|
|
42
41
|
}
|
|
42
|
+
/**
|
|
43
|
+
* Snapshot of the current settings run by the model.
|
|
44
|
+
*/
|
|
45
|
+
interface SessionSettings {
|
|
46
|
+
tools: any[];
|
|
47
|
+
audio: boolean;
|
|
48
|
+
voice: string;
|
|
49
|
+
vad: {
|
|
50
|
+
threshold: number;
|
|
51
|
+
silenceMs: number;
|
|
52
|
+
};
|
|
53
|
+
temperature: number;
|
|
54
|
+
instructions: string;
|
|
55
|
+
maxResponseToken: number;
|
|
56
|
+
}
|
|
43
57
|
/**
|
|
44
58
|
* Callback functions to catch all the propagated server events.
|
|
45
59
|
*
|
|
46
|
-
* @onStreamClosed the streaming session (web socket) shut down
|
|
47
|
-
* @onSessionConfigured received in response to a session settings update from the client
|
|
48
|
-
* @onAudioResponseStart the LLM service is about to respond with streaming audio data
|
|
49
|
-
* @onAudioResponseChunk a new chunk of response audio data was received
|
|
50
|
-
* @onAudioResponseEnd the model has finished responding. Audio response has been entirely streamed
|
|
51
|
-
* @onTranscriptInput either a copy of a text input, or the transcript of an audio input sent by the client
|
|
52
|
-
* @onTranscriptResponse either a text response (to a text input) or the transcript of an audio response
|
|
53
|
-
* @onTechnicalError any technical issue encountered during the stream
|
|
54
60
|
*
|
|
55
61
|
* @remarks
|
|
56
62
|
* Un-assigned callbacks will not cause exceptions by this client when events are received from the server
|
|
@@ -61,13 +67,71 @@ interface SessionConfig {
|
|
|
61
67
|
* - In a text exchange, they hold the actual text messages of the conversation
|
|
62
68
|
*/
|
|
63
69
|
interface EventHandlers {
|
|
70
|
+
/**
|
|
71
|
+
* @param reason provided by the server to explain stream closure.
|
|
72
|
+
*/
|
|
64
73
|
onStreamClosed: (reason: string) => void;
|
|
65
|
-
|
|
74
|
+
/**
|
|
75
|
+
* Acknowledgment by the server of a settings update,
|
|
76
|
+
* following a "session.configure" request through configureSession()
|
|
77
|
+
*
|
|
78
|
+
* @param settings the updated settings currently in effect in the model.
|
|
79
|
+
*/
|
|
80
|
+
onSessionConfigured?: (settings: SessionSettings) => void;
|
|
81
|
+
/**
|
|
82
|
+
* Fired by the server when the Model is starting to stream an audio response.
|
|
83
|
+
*/
|
|
66
84
|
onAudioResponseStart?: () => void;
|
|
85
|
+
/**
|
|
86
|
+
* New chunk of audio data from the ongoing Model audio response.
|
|
87
|
+
*
|
|
88
|
+
* @param audioChunk audio data in base 64 PCM 16, 24k Hertz.
|
|
89
|
+
*/
|
|
67
90
|
onAudioResponseChunk?: (audioChunk: string) => void;
|
|
91
|
+
/**
|
|
92
|
+
* Fired by the server when the Model is finished streaming an audio response.
|
|
93
|
+
*/
|
|
68
94
|
onAudioResponseEnd?: () => void;
|
|
69
|
-
|
|
70
|
-
|
|
95
|
+
/**
|
|
96
|
+
* Fired after a client text or audio input.
|
|
97
|
+
* This event occurs when the full text input is finalised.
|
|
98
|
+
*
|
|
99
|
+
* @param text either a copy of the text input, or the transcript of the audio input.
|
|
100
|
+
*/
|
|
101
|
+
onTranscriptInput?: (text: string) => void;
|
|
102
|
+
/**
|
|
103
|
+
* Fired after a client text or audio input.
|
|
104
|
+
* This event occurs as soon as a new section of text, part of the complete input, is available.
|
|
105
|
+
* The concatenated text from these events add up to the same text finally received through onTranscriptInput.
|
|
106
|
+
*
|
|
107
|
+
* @param text the next piece of text available, part of a whole input message.
|
|
108
|
+
* @param final is true for the last event containing the final piece of text to complete the input.
|
|
109
|
+
*/
|
|
110
|
+
onTranscriptInputPart?: (text: string, final: boolean) => void;
|
|
111
|
+
/**
|
|
112
|
+
* Fired when a model response is available, either text or audio.
|
|
113
|
+
* This event can happen in parallel of the onAudioResponseChunk events streaming
|
|
114
|
+
* the corresponding audio response and before onAudioResponseEnd is received.
|
|
115
|
+
*
|
|
116
|
+
* @param text either the model's text response in a text conversation, or the transcript of its audio
|
|
117
|
+
* response in a voice conversation.
|
|
118
|
+
*/
|
|
119
|
+
onTranscriptResponse?: (text: string) => void;
|
|
120
|
+
/**
|
|
121
|
+
* Fired when a model response is available, either text or audio.
|
|
122
|
+
* This event occurs as soon as a new section of text, part of the complete response, is available.
|
|
123
|
+
* The concatenated text from these events add up to the same text finally received through onTranscriptResponse.
|
|
124
|
+
*
|
|
125
|
+
* @param text the next piece of text available, part of a whole response message: either the model's text response
|
|
126
|
+
* in a text conversation, or the transcript of its audio response in a voice conversation.
|
|
127
|
+
* @param final is true for the last event containing the final piece of text to complete the response.
|
|
128
|
+
*/
|
|
129
|
+
onTranscriptResponsePart?: (text: string, final: boolean) => void;
|
|
130
|
+
/**
|
|
131
|
+
* Fired when the server encountered an error of any kind, either functional or technical.
|
|
132
|
+
*
|
|
133
|
+
* @param error description of the error that occurred.
|
|
134
|
+
*/
|
|
71
135
|
onTechnicalError?: (error: string) => void;
|
|
72
136
|
}
|
|
73
137
|
/**
|
package/dist/index.d.ts
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* Available server-side LLM types
|
|
3
3
|
*/
|
|
4
|
-
type LlmType = "openai";
|
|
4
|
+
type LlmType = "openai" | "gemini";
|
|
5
5
|
/**
|
|
6
6
|
* Configuration required to initiate a connection with the stream server:
|
|
7
7
|
*
|
|
@@ -27,7 +27,6 @@ interface WebClientConfig {
|
|
|
27
27
|
streamingEndpoint: string;
|
|
28
28
|
key: string;
|
|
29
29
|
llm: LlmType;
|
|
30
|
-
model: string;
|
|
31
30
|
}
|
|
32
31
|
/**
|
|
33
32
|
* System settings influencing the model behavior:
|
|
@@ -40,17 +39,24 @@ interface SessionConfig {
|
|
|
40
39
|
instructions?: string;
|
|
41
40
|
maxResponseToken?: number;
|
|
42
41
|
}
|
|
42
|
+
/**
|
|
43
|
+
* Snapshot of the current settings run by the model.
|
|
44
|
+
*/
|
|
45
|
+
interface SessionSettings {
|
|
46
|
+
tools: any[];
|
|
47
|
+
audio: boolean;
|
|
48
|
+
voice: string;
|
|
49
|
+
vad: {
|
|
50
|
+
threshold: number;
|
|
51
|
+
silenceMs: number;
|
|
52
|
+
};
|
|
53
|
+
temperature: number;
|
|
54
|
+
instructions: string;
|
|
55
|
+
maxResponseToken: number;
|
|
56
|
+
}
|
|
43
57
|
/**
|
|
44
58
|
* Callback functions to catch all the propagated server events.
|
|
45
59
|
*
|
|
46
|
-
* @onStreamClosed the streaming session (web socket) shut down
|
|
47
|
-
* @onSessionConfigured received in response to a session settings update from the client
|
|
48
|
-
* @onAudioResponseStart the LLM service is about to respond with streaming audio data
|
|
49
|
-
* @onAudioResponseChunk a new chunk of response audio data was received
|
|
50
|
-
* @onAudioResponseEnd the model has finished responding. Audio response has been entirely streamed
|
|
51
|
-
* @onTranscriptInput either a copy of a text input, or the transcript of an audio input sent by the client
|
|
52
|
-
* @onTranscriptResponse either a text response (to a text input) or the transcript of an audio response
|
|
53
|
-
* @onTechnicalError any technical issue encountered during the stream
|
|
54
60
|
*
|
|
55
61
|
* @remarks
|
|
56
62
|
* Un-assigned callbacks will not cause exceptions by this client when events are received from the server
|
|
@@ -61,13 +67,71 @@ interface SessionConfig {
|
|
|
61
67
|
* - In a text exchange, they hold the actual text messages of the conversation
|
|
62
68
|
*/
|
|
63
69
|
interface EventHandlers {
|
|
70
|
+
/**
|
|
71
|
+
* @param reason provided by the server to explain stream closure.
|
|
72
|
+
*/
|
|
64
73
|
onStreamClosed: (reason: string) => void;
|
|
65
|
-
|
|
74
|
+
/**
|
|
75
|
+
* Acknowledgment by the server of a settings update,
|
|
76
|
+
* following a "session.configure" request through configureSession()
|
|
77
|
+
*
|
|
78
|
+
* @param settings the updated settings currently in effect in the model.
|
|
79
|
+
*/
|
|
80
|
+
onSessionConfigured?: (settings: SessionSettings) => void;
|
|
81
|
+
/**
|
|
82
|
+
* Fired by the server when the Model is starting to stream an audio response.
|
|
83
|
+
*/
|
|
66
84
|
onAudioResponseStart?: () => void;
|
|
85
|
+
/**
|
|
86
|
+
* New chunk of audio data from the ongoing Model audio response.
|
|
87
|
+
*
|
|
88
|
+
* @param audioChunk audio data in base 64 PCM 16, 24k Hertz.
|
|
89
|
+
*/
|
|
67
90
|
onAudioResponseChunk?: (audioChunk: string) => void;
|
|
91
|
+
/**
|
|
92
|
+
* Fired by the server when the Model is finished streaming an audio response.
|
|
93
|
+
*/
|
|
68
94
|
onAudioResponseEnd?: () => void;
|
|
69
|
-
|
|
70
|
-
|
|
95
|
+
/**
|
|
96
|
+
* Fired after a client text or audio input.
|
|
97
|
+
* This event occurs when the full text input is finalised.
|
|
98
|
+
*
|
|
99
|
+
* @param text either a copy of the text input, or the transcript of the audio input.
|
|
100
|
+
*/
|
|
101
|
+
onTranscriptInput?: (text: string) => void;
|
|
102
|
+
/**
|
|
103
|
+
* Fired after a client text or audio input.
|
|
104
|
+
* This event occurs as soon as a new section of text, part of the complete input, is available.
|
|
105
|
+
* The concatenated text from these events add up to the same text finally received through onTranscriptInput.
|
|
106
|
+
*
|
|
107
|
+
* @param text the next piece of text available, part of a whole input message.
|
|
108
|
+
* @param final is true for the last event containing the final piece of text to complete the input.
|
|
109
|
+
*/
|
|
110
|
+
onTranscriptInputPart?: (text: string, final: boolean) => void;
|
|
111
|
+
/**
|
|
112
|
+
* Fired when a model response is available, either text or audio.
|
|
113
|
+
* This event can happen in parallel of the onAudioResponseChunk events streaming
|
|
114
|
+
* the corresponding audio response and before onAudioResponseEnd is received.
|
|
115
|
+
*
|
|
116
|
+
* @param text either the model's text response in a text conversation, or the transcript of its audio
|
|
117
|
+
* response in a voice conversation.
|
|
118
|
+
*/
|
|
119
|
+
onTranscriptResponse?: (text: string) => void;
|
|
120
|
+
/**
|
|
121
|
+
* Fired when a model response is available, either text or audio.
|
|
122
|
+
* This event occurs as soon as a new section of text, part of the complete response, is available.
|
|
123
|
+
* The concatenated text from these events add up to the same text finally received through onTranscriptResponse.
|
|
124
|
+
*
|
|
125
|
+
* @param text the next piece of text available, part of a whole response message: either the model's text response
|
|
126
|
+
* in a text conversation, or the transcript of its audio response in a voice conversation.
|
|
127
|
+
* @param final is true for the last event containing the final piece of text to complete the response.
|
|
128
|
+
*/
|
|
129
|
+
onTranscriptResponsePart?: (text: string, final: boolean) => void;
|
|
130
|
+
/**
|
|
131
|
+
* Fired when the server encountered an error of any kind, either functional or technical.
|
|
132
|
+
*
|
|
133
|
+
* @param error description of the error that occurred.
|
|
134
|
+
*/
|
|
71
135
|
onTechnicalError?: (error: string) => void;
|
|
72
136
|
}
|
|
73
137
|
/**
|
package/dist/index.js
CHANGED
|
@@ -66,7 +66,7 @@ var WebClient = class {
|
|
|
66
66
|
const response = await fetch(`${this.config.sessionEndpoint}/end_session`, {
|
|
67
67
|
method: "POST",
|
|
68
68
|
headers: {
|
|
69
|
-
"
|
|
69
|
+
"Sim-Api-Key": `${this.config.key}`,
|
|
70
70
|
"Content-Type": "application/json",
|
|
71
71
|
"Accept": "application/json"
|
|
72
72
|
},
|
|
@@ -122,30 +122,36 @@ var WebClient = class {
|
|
|
122
122
|
handlers.onStreamClosed(`WebSocket closed by peer: ${event.reason}`);
|
|
123
123
|
};
|
|
124
124
|
ws.onmessage = async (event) => {
|
|
125
|
-
var _a, _b, _c, _d, _e, _f, _g;
|
|
125
|
+
var _a, _b, _c, _d, _e, _f, _g, _h, _i;
|
|
126
126
|
try {
|
|
127
|
-
const
|
|
128
|
-
switch (
|
|
127
|
+
const message = JSON.parse(event.data);
|
|
128
|
+
switch (message.type) {
|
|
129
129
|
case "session.configured":
|
|
130
|
-
(_a = handlers.onSessionConfigured) == null ? void 0 : _a.call(handlers, data);
|
|
130
|
+
(_a = handlers.onSessionConfigured) == null ? void 0 : _a.call(handlers, message.data);
|
|
131
131
|
break;
|
|
132
132
|
case "audio.response.start":
|
|
133
133
|
(_b = handlers.onAudioResponseStart) == null ? void 0 : _b.call(handlers);
|
|
134
134
|
break;
|
|
135
135
|
case "audio.response.append":
|
|
136
|
-
(_c = handlers.onAudioResponseChunk) == null ? void 0 : _c.call(handlers,
|
|
136
|
+
(_c = handlers.onAudioResponseChunk) == null ? void 0 : _c.call(handlers, message.data.audioData);
|
|
137
137
|
break;
|
|
138
138
|
case "audio.response.done":
|
|
139
139
|
(_d = handlers.onAudioResponseEnd) == null ? void 0 : _d.call(handlers);
|
|
140
140
|
break;
|
|
141
|
+
case "transcript.input.part":
|
|
142
|
+
(_e = handlers.onTranscriptInputPart) == null ? void 0 : _e.call(handlers, message.data.text, message.data.final);
|
|
143
|
+
break;
|
|
141
144
|
case "transcript.input":
|
|
142
|
-
(
|
|
145
|
+
(_f = handlers.onTranscriptInput) == null ? void 0 : _f.call(handlers, message.data.text);
|
|
146
|
+
break;
|
|
147
|
+
case "transcript.response.part":
|
|
148
|
+
(_g = handlers.onTranscriptResponsePart) == null ? void 0 : _g.call(handlers, message.data.text, message.data.final);
|
|
143
149
|
break;
|
|
144
150
|
case "transcript.response":
|
|
145
|
-
(
|
|
151
|
+
(_h = handlers.onTranscriptResponse) == null ? void 0 : _h.call(handlers, message.data.text);
|
|
146
152
|
break;
|
|
147
153
|
case "technical.error":
|
|
148
|
-
(
|
|
154
|
+
(_i = handlers.onTechnicalError) == null ? void 0 : _i.call(handlers, message.data.error);
|
|
149
155
|
break;
|
|
150
156
|
default:
|
|
151
157
|
break;
|
|
@@ -250,15 +256,7 @@ var WebClient = class {
|
|
|
250
256
|
const eventSubs = audio ? [0 /* Text */, 1 /* Audio */] : [0 /* Text */];
|
|
251
257
|
ws.send(JSON.stringify({
|
|
252
258
|
type: "connection.initiate",
|
|
253
|
-
data: {
|
|
254
|
-
subscription: eventSubs,
|
|
255
|
-
settings: {
|
|
256
|
-
audio: true,
|
|
257
|
-
voice: "alloy",
|
|
258
|
-
temperature: 0.8,
|
|
259
|
-
instructions: ""
|
|
260
|
-
}
|
|
261
|
-
}
|
|
259
|
+
data: { subscription: eventSubs }
|
|
262
260
|
}));
|
|
263
261
|
};
|
|
264
262
|
ws.onmessage = (event) => {
|
|
@@ -277,8 +275,9 @@ var WebClient = class {
|
|
|
277
275
|
});
|
|
278
276
|
}
|
|
279
277
|
send(type, data = null) {
|
|
278
|
+
var _a;
|
|
280
279
|
if (!this.ws || this.ws.readyState != WebSocket.OPEN) {
|
|
281
|
-
throw new Error(
|
|
280
|
+
throw new Error(`Stream is not opened. State=${(_a = this.ws) == null ? void 0 : _a.readyState}`);
|
|
282
281
|
}
|
|
283
282
|
this.ws.send(JSON.stringify({ type, ...data && { data } }));
|
|
284
283
|
}
|