@yak-io/javascript 0.7.0 → 0.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/client.d.ts +24 -2
- package/dist/client.d.ts.map +1 -1
- package/dist/client.js +84 -4
- package/dist/embed.d.ts +50 -9
- package/dist/embed.d.ts.map +1 -1
- package/dist/embed.js +243 -70
- package/dist/index.d.ts +10 -6
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +8 -6
- package/dist/server/createYakHandler.d.ts.map +1 -1
- package/dist/server/index.d.ts +6 -6
- package/dist/server/index.d.ts.map +1 -1
- package/dist/server/index.js +1 -1
- package/dist/server/sources.d.ts +1 -1
- package/dist/tool-name.d.ts +10 -0
- package/dist/tool-name.d.ts.map +1 -0
- package/dist/tool-name.js +24 -0
- package/dist/types/config.d.ts +1 -1
- package/dist/types/config.d.ts.map +1 -1
- package/dist/types/messaging.d.ts +55 -2
- package/dist/types/messaging.d.ts.map +1 -1
- package/dist/voice-machine.d.ts +69 -0
- package/dist/voice-machine.d.ts.map +1 -0
- package/dist/voice-machine.js +163 -0
- package/dist/voice-session.d.ts +102 -0
- package/dist/voice-session.d.ts.map +1 -0
- package/dist/voice-session.js +530 -0
- package/package.json +4 -2
|
@@ -0,0 +1,530 @@
|
|
|
1
|
+
import { logger } from "./logger.js";
|
|
2
|
+
import { extractPageContext } from "./page-context.js";
|
|
3
|
+
import { generateToolId } from "./tool-name.js";
|
|
4
|
+
import { handleRealtimeMessage, INITIAL_VOICE_MACHINE, voiceReducer, } from "./voice-machine.js";
|
|
5
|
+
const DEFAULT_REALTIME_MODEL = "gpt-realtime";
|
|
6
|
+
const REALTIME_CALLS_URL = "https://api.openai.com/v1/realtime/calls";
|
|
7
|
+
function getApiOrigin() {
|
|
8
|
+
if (typeof window !== "undefined") {
|
|
9
|
+
const hostname = window.location.hostname;
|
|
10
|
+
if (hostname.endsWith(".yak.supply") || hostname === "yak.supply") {
|
|
11
|
+
return "https://chat.yak.supply";
|
|
12
|
+
}
|
|
13
|
+
if ((hostname === "localhost" || hostname === "127.0.0.1") &&
|
|
14
|
+
typeof window.__YAK_INTERNAL_DEV__ !== "undefined") {
|
|
15
|
+
return "http://localhost:3001";
|
|
16
|
+
}
|
|
17
|
+
}
|
|
18
|
+
return "https://chat.yak.io";
|
|
19
|
+
}
|
|
20
|
+
const EMPTY_RESOURCES = {
|
|
21
|
+
pc: null,
|
|
22
|
+
dataChannel: null,
|
|
23
|
+
micStream: null,
|
|
24
|
+
audioElement: null,
|
|
25
|
+
voiceSessionId: null,
|
|
26
|
+
};
|
|
27
|
+
function emptyUsage() {
|
|
28
|
+
return {
|
|
29
|
+
inputTokens: 0,
|
|
30
|
+
cachedInputTokens: 0,
|
|
31
|
+
outputTokens: 0,
|
|
32
|
+
audioInputTokens: 0,
|
|
33
|
+
audioOutputTokens: 0,
|
|
34
|
+
textInputTokens: 0,
|
|
35
|
+
textOutputTokens: 0,
|
|
36
|
+
responseCount: 0,
|
|
37
|
+
};
|
|
38
|
+
}
|
|
39
|
+
export class YakVoiceSession {
|
|
40
|
+
config;
|
|
41
|
+
machine = INITIAL_VOICE_MACHINE;
|
|
42
|
+
resources = EMPTY_RESOURCES;
|
|
43
|
+
dispatchedCallIds = new Set();
|
|
44
|
+
listeners = new Set();
|
|
45
|
+
pageHideHandler = null;
|
|
46
|
+
/** Per-session token totals, accumulated from each `response.done` event. */
|
|
47
|
+
usage = emptyUsage();
|
|
48
|
+
/**
|
|
49
|
+
* Reverse map: hashed tool id (what OpenAI calls back with) → original host
|
|
50
|
+
* tool name (what `onToolCall` expects). Populated on every `start()` from
|
|
51
|
+
* the resolved chat config.
|
|
52
|
+
*/
|
|
53
|
+
toolNameById = new Map();
|
|
54
|
+
constructor(config) {
|
|
55
|
+
this.config = config;
|
|
56
|
+
this.attachPageHide();
|
|
57
|
+
}
|
|
58
|
+
/**
|
|
59
|
+
* Resolve the API origin lazily on each call. The internal-dev flag
|
|
60
|
+
* (`window.__YAK_INTERNAL_DEV__`) is often set in a `useEffect` that fires
|
|
61
|
+
* after this session is constructed, so resolving at construction would
|
|
62
|
+
* bake in the production URL.
|
|
63
|
+
*/
|
|
64
|
+
get apiOrigin() {
|
|
65
|
+
return this.config.apiOrigin ?? getApiOrigin();
|
|
66
|
+
}
|
|
67
|
+
/** Update mutable config fields (handlers, getConfig). */
|
|
68
|
+
updateConfig(patch) {
|
|
69
|
+
this.config = { ...this.config, ...patch };
|
|
70
|
+
}
|
|
71
|
+
getState() {
|
|
72
|
+
return this.machine;
|
|
73
|
+
}
|
|
74
|
+
/**
|
|
75
|
+
* The current API origin (`chat.yak.io`, `chat.yak.supply`, or
|
|
76
|
+
* `http://localhost:3001` when `__YAK_INTERNAL_DEV__` is set). Useful for
|
|
77
|
+
* building URLs to static assets like the brand logo.
|
|
78
|
+
*/
|
|
79
|
+
getApiOrigin() {
|
|
80
|
+
return this.apiOrigin;
|
|
81
|
+
}
|
|
82
|
+
onStateChange(listener) {
|
|
83
|
+
this.listeners.add(listener);
|
|
84
|
+
return () => {
|
|
85
|
+
this.listeners.delete(listener);
|
|
86
|
+
};
|
|
87
|
+
}
|
|
88
|
+
/**
|
|
89
|
+
* Begin a voice session. Should be invoked from a user gesture (button
|
|
90
|
+
* click) so `getUserMedia` and audio playback both have transient activation.
|
|
91
|
+
*/
|
|
92
|
+
async start() {
|
|
93
|
+
if (this.machine.state !== "idle")
|
|
94
|
+
return;
|
|
95
|
+
logger.debug("Voice: start() called");
|
|
96
|
+
this.usage = emptyUsage();
|
|
97
|
+
this.dispatch({ type: "start" });
|
|
98
|
+
let chatConfig = this.config.chatConfig;
|
|
99
|
+
if (this.config.getConfig) {
|
|
100
|
+
try {
|
|
101
|
+
chatConfig = await this.config.getConfig();
|
|
102
|
+
logger.debug("Voice: getConfig() resolved", {
|
|
103
|
+
toolCount: chatConfig?.tools?.tools.length ?? 0,
|
|
104
|
+
routeCount: chatConfig?.routes?.routes.length ?? 0,
|
|
105
|
+
hasSchemas: !!chatConfig?.schemaSources?.length,
|
|
106
|
+
});
|
|
107
|
+
}
|
|
108
|
+
catch (err) {
|
|
109
|
+
logger.warn("Voice: getConfig() failed", err);
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
else if (chatConfig) {
|
|
113
|
+
logger.debug("Voice: using static chatConfig", {
|
|
114
|
+
toolCount: chatConfig.tools?.tools.length ?? 0,
|
|
115
|
+
routeCount: chatConfig.routes?.routes.length ?? 0,
|
|
116
|
+
hasSchemas: !!chatConfig.schemaSources?.length,
|
|
117
|
+
});
|
|
118
|
+
}
|
|
119
|
+
else {
|
|
120
|
+
logger.debug("Voice: no chatConfig or getConfig — only built-in tools will be available");
|
|
121
|
+
}
|
|
122
|
+
// Decorate host tools with hash ids and build the reverse lookup so we
|
|
123
|
+
// can map id-named tool calls back to the original host name when the
|
|
124
|
+
// model invokes them. Mirrors the chat-ui iframe's decoration step.
|
|
125
|
+
const decoratedManifest = this.buildDecoratedManifest(chatConfig);
|
|
126
|
+
logger.debug("Voice: decorated tools", {
|
|
127
|
+
ids: decoratedManifest.tools.map((t) => `${t.id}=${t.name}`),
|
|
128
|
+
});
|
|
129
|
+
const pageContext = this.safeExtractPageContext();
|
|
130
|
+
logger.debug("Voice: page context extracted", {
|
|
131
|
+
url: pageContext?.url,
|
|
132
|
+
title: pageContext?.title,
|
|
133
|
+
textLength: pageContext?.text?.length ?? 0,
|
|
134
|
+
});
|
|
135
|
+
let mint;
|
|
136
|
+
try {
|
|
137
|
+
logger.debug("Voice: requesting ephemeral token from mint endpoint");
|
|
138
|
+
mint = await this.mintToken(chatConfig, decoratedManifest, pageContext);
|
|
139
|
+
logger.debug("Voice: mint succeeded", {
|
|
140
|
+
voiceSessionId: mint.voiceSessionId,
|
|
141
|
+
expiresAt: mint.expiresAt,
|
|
142
|
+
});
|
|
143
|
+
}
|
|
144
|
+
catch (err) {
|
|
145
|
+
await this.failWith(err instanceof Error ? err.message : "Failed to start voice session");
|
|
146
|
+
return;
|
|
147
|
+
}
|
|
148
|
+
let micStream;
|
|
149
|
+
try {
|
|
150
|
+
logger.debug("Voice: requesting microphone access");
|
|
151
|
+
micStream = await navigator.mediaDevices.getUserMedia({ audio: true });
|
|
152
|
+
logger.debug("Voice: microphone access granted");
|
|
153
|
+
}
|
|
154
|
+
catch (err) {
|
|
155
|
+
const name = err instanceof Error ? err.name : "";
|
|
156
|
+
const message = name === "NotAllowedError" || name === "PermissionDeniedError"
|
|
157
|
+
? "Microphone permission was denied. Enable microphone access in your browser settings to use voice mode."
|
|
158
|
+
: "Could not access microphone.";
|
|
159
|
+
await this.failWith(message);
|
|
160
|
+
return;
|
|
161
|
+
}
|
|
162
|
+
const pc = new RTCPeerConnection();
|
|
163
|
+
const audioElement = document.createElement("audio");
|
|
164
|
+
audioElement.autoplay = true;
|
|
165
|
+
audioElement.style.display = "none";
|
|
166
|
+
document.body.appendChild(audioElement);
|
|
167
|
+
pc.ontrack = (event) => {
|
|
168
|
+
logger.debug("Voice: pc.ontrack received remote audio stream");
|
|
169
|
+
if (event.streams[0]) {
|
|
170
|
+
audioElement.srcObject = event.streams[0];
|
|
171
|
+
}
|
|
172
|
+
};
|
|
173
|
+
pc.oniceconnectionstatechange = () => {
|
|
174
|
+
const s = pc.iceConnectionState;
|
|
175
|
+
logger.debug("Voice: ICE connection state →", s);
|
|
176
|
+
if (s === "failed" || s === "disconnected") {
|
|
177
|
+
void this.failWith(`WebRTC connection ${s}`);
|
|
178
|
+
}
|
|
179
|
+
};
|
|
180
|
+
pc.onconnectionstatechange = () => {
|
|
181
|
+
logger.debug("Voice: peer connection state →", pc.connectionState);
|
|
182
|
+
if (pc.connectionState === "failed") {
|
|
183
|
+
void this.failWith("WebRTC connection failed");
|
|
184
|
+
}
|
|
185
|
+
};
|
|
186
|
+
for (const track of micStream.getAudioTracks()) {
|
|
187
|
+
pc.addTrack(track, micStream);
|
|
188
|
+
}
|
|
189
|
+
const dataChannel = pc.createDataChannel("oai-events");
|
|
190
|
+
dataChannel.onmessage = (event) => {
|
|
191
|
+
const raw = typeof event.data === "string" ? event.data : "";
|
|
192
|
+
if (!raw)
|
|
193
|
+
return;
|
|
194
|
+
logger.debug("Voice: ← data channel message", raw.slice(0, 200));
|
|
195
|
+
void handleRealtimeMessage(raw, this.buildMessageContext());
|
|
196
|
+
};
|
|
197
|
+
dataChannel.onopen = () => {
|
|
198
|
+
logger.debug("Voice: data channel opened");
|
|
199
|
+
this.dispatch({ type: "connected" });
|
|
200
|
+
};
|
|
201
|
+
dataChannel.onclose = () => {
|
|
202
|
+
logger.debug("Voice: data channel closed");
|
|
203
|
+
};
|
|
204
|
+
this.resources = {
|
|
205
|
+
pc,
|
|
206
|
+
dataChannel,
|
|
207
|
+
micStream,
|
|
208
|
+
audioElement,
|
|
209
|
+
voiceSessionId: mint.voiceSessionId,
|
|
210
|
+
};
|
|
211
|
+
try {
|
|
212
|
+
logger.debug("Voice: creating WebRTC offer");
|
|
213
|
+
const offer = await pc.createOffer();
|
|
214
|
+
await pc.setLocalDescription(offer);
|
|
215
|
+
logger.debug("Voice: exchanging SDP with OpenAI Realtime");
|
|
216
|
+
const answerSdp = await this.exchangeSdp(offer, mint.clientSecret);
|
|
217
|
+
await pc.setRemoteDescription({ type: "answer", sdp: answerSdp });
|
|
218
|
+
logger.debug("Voice: WebRTC negotiation complete");
|
|
219
|
+
}
|
|
220
|
+
catch (err) {
|
|
221
|
+
await this.failWith(err instanceof Error ? err.message : "Failed to negotiate voice connection");
|
|
222
|
+
return;
|
|
223
|
+
}
|
|
224
|
+
void this.postSessionEvent("start", mint.voiceSessionId, pageContext);
|
|
225
|
+
}
|
|
226
|
+
/** Stop the session and tear down all resources. */
|
|
227
|
+
async stop() {
|
|
228
|
+
logger.debug("Voice: stop() called");
|
|
229
|
+
this.dispatch({ type: "stop" });
|
|
230
|
+
await this.teardown();
|
|
231
|
+
}
|
|
232
|
+
/** Tear down everything and remove listeners. Call once before discarding the instance. */
|
|
233
|
+
destroy() {
|
|
234
|
+
void this.teardown();
|
|
235
|
+
if (this.pageHideHandler) {
|
|
236
|
+
window.removeEventListener("pagehide", this.pageHideHandler);
|
|
237
|
+
this.pageHideHandler = null;
|
|
238
|
+
}
|
|
239
|
+
this.listeners.clear();
|
|
240
|
+
}
|
|
241
|
+
// ── Internals ───────────────────────────────────────────────────────────
|
|
242
|
+
buildMessageContext() {
|
|
243
|
+
return {
|
|
244
|
+
send: (event) => this.dispatch(event),
|
|
245
|
+
sendData: (payload) => this.sendOverDataChannel(payload),
|
|
246
|
+
dispatchToolCall: (name, args) => this.routeToolCall(name, args),
|
|
247
|
+
isDispatched: (id) => this.dispatchedCallIds.has(id),
|
|
248
|
+
markDispatched: (id) => {
|
|
249
|
+
this.dispatchedCallIds.add(id);
|
|
250
|
+
},
|
|
251
|
+
recordUsage: (usage) => this.accumulateUsage(usage),
|
|
252
|
+
};
|
|
253
|
+
}
|
|
254
|
+
accumulateUsage(usage) {
|
|
255
|
+
this.usage.responseCount += 1;
|
|
256
|
+
if (typeof usage.inputTokens === "number")
|
|
257
|
+
this.usage.inputTokens += usage.inputTokens;
|
|
258
|
+
if (typeof usage.cachedInputTokens === "number") {
|
|
259
|
+
this.usage.cachedInputTokens += usage.cachedInputTokens;
|
|
260
|
+
}
|
|
261
|
+
if (typeof usage.outputTokens === "number")
|
|
262
|
+
this.usage.outputTokens += usage.outputTokens;
|
|
263
|
+
if (typeof usage.audioInputTokens === "number") {
|
|
264
|
+
this.usage.audioInputTokens += usage.audioInputTokens;
|
|
265
|
+
}
|
|
266
|
+
if (typeof usage.audioOutputTokens === "number") {
|
|
267
|
+
this.usage.audioOutputTokens += usage.audioOutputTokens;
|
|
268
|
+
}
|
|
269
|
+
if (typeof usage.textInputTokens === "number") {
|
|
270
|
+
this.usage.textInputTokens += usage.textInputTokens;
|
|
271
|
+
}
|
|
272
|
+
if (typeof usage.textOutputTokens === "number") {
|
|
273
|
+
this.usage.textOutputTokens += usage.textOutputTokens;
|
|
274
|
+
}
|
|
275
|
+
}
|
|
276
|
+
sendOverDataChannel(payload) {
|
|
277
|
+
const channel = this.resources.dataChannel;
|
|
278
|
+
if (!channel || channel.readyState !== "open") {
|
|
279
|
+
logger.warn("Voice data channel not ready; dropping payload");
|
|
280
|
+
return;
|
|
281
|
+
}
|
|
282
|
+
try {
|
|
283
|
+
const serialized = JSON.stringify(payload);
|
|
284
|
+
logger.debug("Voice: → data channel send", serialized.slice(0, 200));
|
|
285
|
+
channel.send(serialized);
|
|
286
|
+
}
|
|
287
|
+
catch (err) {
|
|
288
|
+
logger.warn("Failed to send on voice data channel", err);
|
|
289
|
+
}
|
|
290
|
+
}
|
|
291
|
+
async routeToolCall(idOrName, args) {
|
|
292
|
+
// The model calls us back using the decorated id (e.g. yt_abc12345).
|
|
293
|
+
// Resolve it to the original host tool name; fall back to the raw value
|
|
294
|
+
// (it might be `redirect` or some non-decorated name).
|
|
295
|
+
const name = this.toolNameById.get(idOrName) ?? idOrName;
|
|
296
|
+
logger.debug("Voice: tool call dispatched", { id: idOrName, name, args });
|
|
297
|
+
// MCP tools execute server-side (the org token never reaches the browser).
|
|
298
|
+
// The model calls back with the `mcp__…` name minted by the server; relay
|
|
299
|
+
// it to the exec endpoint and feed the result back over the data channel.
|
|
300
|
+
if (name.startsWith("mcp__")) {
|
|
301
|
+
return await this.execMcpTool(name, args);
|
|
302
|
+
}
|
|
303
|
+
if (name === "redirect") {
|
|
304
|
+
const path = args?.path;
|
|
305
|
+
if (typeof path !== "string") {
|
|
306
|
+
throw new Error("redirect tool requires a string `path` argument");
|
|
307
|
+
}
|
|
308
|
+
if (this.config.onRedirect) {
|
|
309
|
+
this.config.onRedirect(path);
|
|
310
|
+
}
|
|
311
|
+
else if (typeof window !== "undefined") {
|
|
312
|
+
window.location.assign(path);
|
|
313
|
+
}
|
|
314
|
+
return { success: true, redirected: true, path };
|
|
315
|
+
}
|
|
316
|
+
if (name.startsWith("graphql_") && this.config.onGraphQLSchemaCall) {
|
|
317
|
+
const schemaName = name.slice("graphql_".length);
|
|
318
|
+
return await this.config.onGraphQLSchemaCall(schemaName, args);
|
|
319
|
+
}
|
|
320
|
+
if (name.startsWith("rest_") && this.config.onRESTSchemaCall) {
|
|
321
|
+
const schemaName = name.slice("rest_".length);
|
|
322
|
+
return await this.config.onRESTSchemaCall(schemaName, args);
|
|
323
|
+
}
|
|
324
|
+
if (this.config.onToolCall) {
|
|
325
|
+
return await this.config.onToolCall(name, args);
|
|
326
|
+
}
|
|
327
|
+
throw new Error(`No handler configured for tool: ${name}`);
|
|
328
|
+
}
|
|
329
|
+
/**
|
|
330
|
+
* Relay an MCP tool call to the server, which holds the org's credentials
|
|
331
|
+
* and executes against the remote MCP server. The browser only ever passes
|
|
332
|
+
* through the tool name, args, and the opaque result.
|
|
333
|
+
*/
|
|
334
|
+
async execMcpTool(toolName, args) {
|
|
335
|
+
try {
|
|
336
|
+
const res = await fetch(`${this.apiOrigin}/api/voice/mcp-exec`, {
|
|
337
|
+
method: "POST",
|
|
338
|
+
headers: { "Content-Type": "application/json" },
|
|
339
|
+
body: JSON.stringify({
|
|
340
|
+
appId: this.config.appId,
|
|
341
|
+
toolName,
|
|
342
|
+
args: args ?? {},
|
|
343
|
+
pageContext: this.safeExtractPageContext(),
|
|
344
|
+
}),
|
|
345
|
+
});
|
|
346
|
+
if (!res.ok) {
|
|
347
|
+
const body = (await res.json().catch(() => ({})));
|
|
348
|
+
return { error: body.error ?? `MCP tool failed (${res.status})` };
|
|
349
|
+
}
|
|
350
|
+
const body = (await res.json());
|
|
351
|
+
return body.result ?? {};
|
|
352
|
+
}
|
|
353
|
+
catch (err) {
|
|
354
|
+
logger.warn("Voice: MCP tool relay failed", err);
|
|
355
|
+
return { error: "The integration could not complete this request." };
|
|
356
|
+
}
|
|
357
|
+
}
|
|
358
|
+
async mintToken(chatConfig, decoratedManifest, pageContext) {
|
|
359
|
+
const res = await fetch(`${this.apiOrigin}/api/voice/realtime-token`, {
|
|
360
|
+
method: "POST",
|
|
361
|
+
headers: { "Content-Type": "application/json" },
|
|
362
|
+
body: JSON.stringify({
|
|
363
|
+
appId: this.config.appId,
|
|
364
|
+
pageContext,
|
|
365
|
+
toolManifest: decoratedManifest,
|
|
366
|
+
routeManifest: chatConfig?.routes,
|
|
367
|
+
schemaSources: chatConfig?.schemaSources,
|
|
368
|
+
}),
|
|
369
|
+
});
|
|
370
|
+
if (!res.ok) {
|
|
371
|
+
const body = (await res.json().catch(() => ({})));
|
|
372
|
+
throw new Error(body.error ?? `Mint failed (${res.status})`);
|
|
373
|
+
}
|
|
374
|
+
return (await res.json());
|
|
375
|
+
}
|
|
376
|
+
/**
|
|
377
|
+
* Decorate the host's tool manifest + schema sources with hashed ids and
|
|
378
|
+
* populate `this.toolNameById` for reverse lookup. Mirrors the decoration
|
|
379
|
+
* the chat-ui iframe applies before sending tools to `/api/chat`.
|
|
380
|
+
*/
|
|
381
|
+
buildDecoratedManifest(chatConfig) {
|
|
382
|
+
this.toolNameById.clear();
|
|
383
|
+
const decoratedHostTools = (chatConfig?.tools?.tools ?? []).map((t) => {
|
|
384
|
+
const id = generateToolId(t.name);
|
|
385
|
+
this.toolNameById.set(id, t.name);
|
|
386
|
+
return { ...t, id };
|
|
387
|
+
});
|
|
388
|
+
const decoratedSchemaTools = (chatConfig?.schemaSources ?? []).map((source) => {
|
|
389
|
+
const name = source.type === "graphql" ? `graphql_${source.name}` : `rest_${source.name}`;
|
|
390
|
+
const id = generateToolId(name);
|
|
391
|
+
this.toolNameById.set(id, name);
|
|
392
|
+
return {
|
|
393
|
+
id,
|
|
394
|
+
name,
|
|
395
|
+
description: source.type === "graphql"
|
|
396
|
+
? `Execute GraphQL operations against ${source.name}`
|
|
397
|
+
: `Make REST API calls to ${source.name}`,
|
|
398
|
+
};
|
|
399
|
+
});
|
|
400
|
+
return { tools: [...decoratedHostTools, ...decoratedSchemaTools] };
|
|
401
|
+
}
|
|
402
|
+
async exchangeSdp(offer, clientSecret) {
|
|
403
|
+
const sdpResponse = await fetch(`${REALTIME_CALLS_URL}?model=${DEFAULT_REALTIME_MODEL}`, {
|
|
404
|
+
method: "POST",
|
|
405
|
+
headers: {
|
|
406
|
+
Authorization: `Bearer ${clientSecret}`,
|
|
407
|
+
"Content-Type": "application/sdp",
|
|
408
|
+
},
|
|
409
|
+
body: offer.sdp,
|
|
410
|
+
});
|
|
411
|
+
if (!sdpResponse.ok) {
|
|
412
|
+
const body = await sdpResponse.text().catch(() => "");
|
|
413
|
+
throw new Error(`SDP exchange failed (${sdpResponse.status}): ${body}`);
|
|
414
|
+
}
|
|
415
|
+
return await sdpResponse.text();
|
|
416
|
+
}
|
|
417
|
+
buildStopEventBody(voiceSessionId, pageContext) {
|
|
418
|
+
return {
|
|
419
|
+
appId: this.config.appId,
|
|
420
|
+
voiceSessionId,
|
|
421
|
+
event: "stop",
|
|
422
|
+
clientTimestamp: Date.now(),
|
|
423
|
+
pageContext,
|
|
424
|
+
usage: { ...this.usage },
|
|
425
|
+
};
|
|
426
|
+
}
|
|
427
|
+
async postSessionEvent(event, voiceSessionId, pageContext) {
|
|
428
|
+
try {
|
|
429
|
+
const body = event === "stop"
|
|
430
|
+
? this.buildStopEventBody(voiceSessionId, pageContext)
|
|
431
|
+
: {
|
|
432
|
+
appId: this.config.appId,
|
|
433
|
+
voiceSessionId,
|
|
434
|
+
event,
|
|
435
|
+
clientTimestamp: Date.now(),
|
|
436
|
+
pageContext,
|
|
437
|
+
};
|
|
438
|
+
await fetch(`${this.apiOrigin}/api/voice/session-event`, {
|
|
439
|
+
method: "POST",
|
|
440
|
+
headers: { "Content-Type": "application/json" },
|
|
441
|
+
body: JSON.stringify(body),
|
|
442
|
+
keepalive: event === "stop",
|
|
443
|
+
});
|
|
444
|
+
}
|
|
445
|
+
catch (err) {
|
|
446
|
+
logger.warn(`Failed to post voice.session.${event}`, err);
|
|
447
|
+
}
|
|
448
|
+
}
|
|
449
|
+
async teardown() {
|
|
450
|
+
const r = this.resources;
|
|
451
|
+
this.resources = EMPTY_RESOURCES;
|
|
452
|
+
this.dispatchedCallIds = new Set();
|
|
453
|
+
try {
|
|
454
|
+
r.dataChannel?.close();
|
|
455
|
+
}
|
|
456
|
+
catch (err) {
|
|
457
|
+
logger.warn("Error closing data channel", err);
|
|
458
|
+
}
|
|
459
|
+
try {
|
|
460
|
+
for (const track of r.micStream?.getTracks() ?? []) {
|
|
461
|
+
track.stop();
|
|
462
|
+
}
|
|
463
|
+
}
|
|
464
|
+
catch (err) {
|
|
465
|
+
logger.warn("Error stopping mic tracks", err);
|
|
466
|
+
}
|
|
467
|
+
try {
|
|
468
|
+
for (const sender of r.pc?.getSenders() ?? []) {
|
|
469
|
+
sender.track?.stop();
|
|
470
|
+
}
|
|
471
|
+
r.pc?.close();
|
|
472
|
+
}
|
|
473
|
+
catch (err) {
|
|
474
|
+
logger.warn("Error closing peer connection", err);
|
|
475
|
+
}
|
|
476
|
+
if (r.audioElement) {
|
|
477
|
+
try {
|
|
478
|
+
r.audioElement.srcObject = null;
|
|
479
|
+
r.audioElement.remove();
|
|
480
|
+
}
|
|
481
|
+
catch (err) {
|
|
482
|
+
logger.warn("Error removing audio element", err);
|
|
483
|
+
}
|
|
484
|
+
}
|
|
485
|
+
if (r.voiceSessionId) {
|
|
486
|
+
await this.postSessionEvent("stop", r.voiceSessionId, this.safeExtractPageContext());
|
|
487
|
+
}
|
|
488
|
+
}
|
|
489
|
+
async failWith(message) {
|
|
490
|
+
logger.warn("Voice session error:", message);
|
|
491
|
+
this.dispatch({ type: "error", message });
|
|
492
|
+
await this.teardown();
|
|
493
|
+
}
|
|
494
|
+
dispatch(event) {
|
|
495
|
+
const next = voiceReducer(this.machine, event);
|
|
496
|
+
if (next === this.machine)
|
|
497
|
+
return;
|
|
498
|
+
this.machine = next;
|
|
499
|
+
for (const listener of this.listeners) {
|
|
500
|
+
try {
|
|
501
|
+
listener(next);
|
|
502
|
+
}
|
|
503
|
+
catch (err) {
|
|
504
|
+
logger.warn("Voice state listener threw", err);
|
|
505
|
+
}
|
|
506
|
+
}
|
|
507
|
+
}
|
|
508
|
+
safeExtractPageContext() {
|
|
509
|
+
try {
|
|
510
|
+
return extractPageContext();
|
|
511
|
+
}
|
|
512
|
+
catch {
|
|
513
|
+
return undefined;
|
|
514
|
+
}
|
|
515
|
+
}
|
|
516
|
+
attachPageHide() {
|
|
517
|
+
if (typeof window === "undefined")
|
|
518
|
+
return;
|
|
519
|
+
this.pageHideHandler = () => {
|
|
520
|
+
const r = this.resources;
|
|
521
|
+
if (!r.voiceSessionId)
|
|
522
|
+
return;
|
|
523
|
+
const body = JSON.stringify(this.buildStopEventBody(r.voiceSessionId, undefined));
|
|
524
|
+
if (navigator.sendBeacon) {
|
|
525
|
+
navigator.sendBeacon(`${this.apiOrigin}/api/voice/session-event`, new Blob([body], { type: "application/json" }));
|
|
526
|
+
}
|
|
527
|
+
};
|
|
528
|
+
window.addEventListener("pagehide", this.pageHideHandler);
|
|
529
|
+
}
|
|
530
|
+
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@yak-io/javascript",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.8.0",
|
|
4
4
|
"description": "Core JavaScript SDK for embedding yak chatbot",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"license": "SEE LICENSE IN LICENSE",
|
|
@@ -56,8 +56,10 @@
|
|
|
56
56
|
}
|
|
57
57
|
},
|
|
58
58
|
"devDependencies": {
|
|
59
|
-
"@types/node": "^24.12.
|
|
59
|
+
"@types/node": "^24.12.4",
|
|
60
|
+
"jsdom": "^28.1.0",
|
|
60
61
|
"typescript": "^5.3.0",
|
|
62
|
+
"vitest": "^4.1.6",
|
|
61
63
|
"@repo/typescript-config": "0.0.0"
|
|
62
64
|
},
|
|
63
65
|
"scripts": {
|