@yak-io/javascript 0.6.0 → 0.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +181 -0
- package/dist/client.d.ts +24 -2
- package/dist/client.d.ts.map +1 -1
- package/dist/client.js +84 -4
- package/dist/embed.d.ts +50 -9
- package/dist/embed.d.ts.map +1 -1
- package/dist/embed.js +243 -70
- package/dist/index.d.ts +10 -6
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +8 -6
- package/dist/server/createYakHandler.d.ts.map +1 -1
- package/dist/server/index.d.ts +6 -6
- package/dist/server/index.d.ts.map +1 -1
- package/dist/server/index.js +1 -1
- package/dist/server/sources.d.ts +1 -1
- package/dist/tool-name.d.ts +10 -0
- package/dist/tool-name.d.ts.map +1 -0
- package/dist/tool-name.js +24 -0
- package/dist/types/config.d.ts +1 -1
- package/dist/types/config.d.ts.map +1 -1
- package/dist/types/messaging.d.ts +55 -2
- package/dist/types/messaging.d.ts.map +1 -1
- package/dist/voice-machine.d.ts +69 -0
- package/dist/voice-machine.d.ts.map +1 -0
- package/dist/voice-machine.js +163 -0
- package/dist/voice-session.d.ts +102 -0
- package/dist/voice-session.d.ts.map +1 -0
- package/dist/voice-session.js +530 -0
- package/package.json +4 -2
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
const TOOL_NAME_PREFIX = "yt_";
|
|
2
|
+
/**
|
|
3
|
+
* Simple 32-bit hash function (djb2 variant).
|
|
4
|
+
* Produces consistent hash values for the same input string.
|
|
5
|
+
*/
|
|
6
|
+
function hash32(str) {
|
|
7
|
+
let hash = 5381;
|
|
8
|
+
for (let i = 0; i < str.length; i++) {
|
|
9
|
+
hash = ((hash << 5) + hash + str.charCodeAt(i)) >>> 0;
|
|
10
|
+
}
|
|
11
|
+
return hash;
|
|
12
|
+
}
|
|
13
|
+
/**
|
|
14
|
+
* Generates a tool ID from a tool name using a 32-bit hash.
|
|
15
|
+
* Format: `yt_<8-char-hex-hash>`.
|
|
16
|
+
*
|
|
17
|
+
* The hash is deterministic so chat and voice always derive the same id for
|
|
18
|
+
* the same tool name — this is what lets the mint route, the iframe, and the
|
|
19
|
+
* SDK all agree on which decorated id maps back to which host tool name.
|
|
20
|
+
*/
|
|
21
|
+
export function generateToolId(originalName) {
|
|
22
|
+
const hashValue = hash32(originalName);
|
|
23
|
+
return `${TOOL_NAME_PREFIX}${hashValue.toString(16).padStart(8, "0")}`;
|
|
24
|
+
}
|
package/dist/types/config.d.ts
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import type { SchemaSource, ToolManifest } from "./tools.js";
|
|
2
1
|
import type { RouteManifest } from "./routes.js";
|
|
2
|
+
import type { SchemaSource, ToolManifest } from "./tools.js";
|
|
3
3
|
/**
|
|
4
4
|
* Combined configuration for the chatbot including routes, tools, and schema sources
|
|
5
5
|
*/
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"config.d.ts","sourceRoot":"","sources":["../../src/types/config.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,
|
|
1
|
+
{"version":3,"file":"config.d.ts","sourceRoot":"","sources":["../../src/types/config.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,aAAa,CAAC;AACjD,OAAO,KAAK,EAAE,YAAY,EAAE,YAAY,EAAE,MAAM,YAAY,CAAC;AAE7D;;GAEG;AACH,MAAM,MAAM,UAAU,GAAG;IACvB,qBAAqB;IACrB,MAAM,EAAE,aAAa,CAAC;IACtB,mDAAmD;IACnD,KAAK,CAAC,EAAE,YAAY,CAAC;IACrB,wFAAwF;IACxF,aAAa,CAAC,EAAE,YAAY,EAAE,CAAC;CAChC,CAAC;AAEF;;;GAGG;AACH,MAAM,MAAM,kBAAkB,GAAG,MAAM,OAAO,CAAC,UAAU,CAAC,GAAG,UAAU,CAAC"}
|
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
import type { SchemaSource, ToolManifest, GraphQLRequest, RESTRequest } from "./tools.js";
|
|
2
|
-
import type { RouteManifest } from "./routes.js";
|
|
3
1
|
import type { EmbedProtocolVersion } from "../version.js";
|
|
2
|
+
import type { RouteManifest } from "./routes.js";
|
|
3
|
+
import type { GraphQLRequest, RESTRequest, SchemaSource, ToolManifest } from "./tools.js";
|
|
4
4
|
/**
|
|
5
5
|
* Color values for a single mode (light or dark).
|
|
6
6
|
* Applied as CSS variables within the chat UI.
|
|
@@ -61,6 +61,21 @@ export type ChatOptions = {
|
|
|
61
61
|
/** Disable the restart session button in the header */
|
|
62
62
|
disableRestartButton?: boolean;
|
|
63
63
|
};
|
|
64
|
+
/**
|
|
65
|
+
* End-user identity for server-side conversation persistence.
|
|
66
|
+
*
|
|
67
|
+
* The integrator's backend signs `id` with the application's apiSecret using
|
|
68
|
+
* HMAC-SHA256 and passes `{ id, hash }` into the widget. When the hash
|
|
69
|
+
* verifies, conversations are stored server-side and the widget shows a
|
|
70
|
+
* history pane so end-users can resume past chats. When omitted, the widget
|
|
71
|
+
* operates in anonymous mode (no persistence).
|
|
72
|
+
*/
|
|
73
|
+
export type UserIdentity = {
|
|
74
|
+
/** Stable user id from the integrator's system. HMAC-protected. */
|
|
75
|
+
id: string;
|
|
76
|
+
/** Hex HMAC-SHA256(apiSecret, id). Computed on the integrator's backend. */
|
|
77
|
+
hash: string;
|
|
78
|
+
};
|
|
64
79
|
/**
|
|
65
80
|
* Messages sent FROM the host application TO the iframe
|
|
66
81
|
*/
|
|
@@ -79,6 +94,24 @@ export type IframeMessageFromHost = {
|
|
|
79
94
|
options?: ChatOptions;
|
|
80
95
|
/** Logging enabled flag from host (for cross-origin sync) */
|
|
81
96
|
loggingEnabled?: boolean;
|
|
97
|
+
/**
|
|
98
|
+
* Signed end-user identity. Optional metadata used to link
|
|
99
|
+
* conversations back to the integrator's user records.
|
|
100
|
+
*/
|
|
101
|
+
user?: UserIdentity;
|
|
102
|
+
/**
|
|
103
|
+
* Signed session token previously minted by the server and stored in
|
|
104
|
+
* the host SDK's localStorage. When absent, the iframe will mint a
|
|
105
|
+
* fresh one and `yak:session` it back so we can persist it.
|
|
106
|
+
*/
|
|
107
|
+
sessionToken?: string;
|
|
108
|
+
/**
|
|
109
|
+
* Pointer to the active conversation in the form
|
|
110
|
+
* `<conversationId>.<savedAt>`. Persisted in the host SDK's
|
|
111
|
+
* localStorage and shipped back into the iframe so the embed can
|
|
112
|
+
* restore that specific thread on mount.
|
|
113
|
+
*/
|
|
114
|
+
conversationPointer?: string;
|
|
82
115
|
};
|
|
83
116
|
} | {
|
|
84
117
|
type: "yak:tool_result";
|
|
@@ -145,6 +178,26 @@ export type IframeMessageToHost = {
|
|
|
145
178
|
payload: {
|
|
146
179
|
path: string;
|
|
147
180
|
};
|
|
181
|
+
} | {
|
|
182
|
+
/**
|
|
183
|
+
* Iframe → host: a freshly-minted or refreshed session token. The host
|
|
184
|
+
* SDK persists this in `localStorage` so future widget loads can pass
|
|
185
|
+
* it straight back via the next `yak:config`.
|
|
186
|
+
*/
|
|
187
|
+
type: "yak:session";
|
|
188
|
+
payload: {
|
|
189
|
+
sessionToken: string;
|
|
190
|
+
issuedAt: number;
|
|
191
|
+
};
|
|
192
|
+
} | {
|
|
193
|
+
/**
|
|
194
|
+
* Iframe → host: persist (or clear) the active-conversation pointer.
|
|
195
|
+
* `null` means "drop the stored key" (restart, stale, or invalid).
|
|
196
|
+
*/
|
|
197
|
+
type: "yak:conversation";
|
|
198
|
+
payload: {
|
|
199
|
+
pointer: string | null;
|
|
200
|
+
};
|
|
148
201
|
} | {
|
|
149
202
|
type: "yak:close";
|
|
150
203
|
};
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"messaging.d.ts","sourceRoot":"","sources":["../../src/types/messaging.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,
|
|
1
|
+
{"version":3,"file":"messaging.d.ts","sourceRoot":"","sources":["../../src/types/messaging.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,oBAAoB,EAAE,MAAM,eAAe,CAAC;AAC1D,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,aAAa,CAAC;AACjD,OAAO,KAAK,EAAE,cAAc,EAAE,WAAW,EAAE,YAAY,EAAE,YAAY,EAAE,MAAM,YAAY,CAAC;AAE1F;;;GAGG;AACH,MAAM,MAAM,WAAW,GAAG;IACxB,8CAA8C;IAC9C,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,mDAAmD;IACnD,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,2CAA2C;IAC3C,iBAAiB,CAAC,EAAE,MAAM,CAAC;IAC3B,mCAAmC;IACnC,gBAAgB,CAAC,EAAE,MAAM,CAAC;IAC1B,qCAAqC;IACrC,iBAAiB,CAAC,EAAE,MAAM,CAAC;IAC3B,oCAAoC;IACpC,qBAAqB,CAAC,EAAE,MAAM,CAAC;IAC/B,oDAAoD;IACpD,eAAe,CAAC,EAAE,MAAM,CAAC;CAC1B,CAAC;AAEF;;;;GAIG;AACH,MAAM,MAAM,cAAc,GACtB,UAAU,GACV,YAAY,GACZ,WAAW,GACX,aAAa,GACb,cAAc,GACd,aAAa,GACb,eAAe,GACf,cAAc,CAAC;AAEnB;;GAEG;AACH,MAAM,MAAM,KAAK,GAAG;IAClB,gIAAgI;IAChI,QAAQ,CAAC,EAAE,cAAc,CAAC;IAC1B,yEAAyE;IACzE,SAAS,CAAC,EAAE,OAAO,GAAG,MAAM,GAAG,QAAQ,CAAC;IACxC,uFAAuF;IACvF,WAAW,CAAC,EAAE,SAAS,GAAG,QAAQ,CAAC;IACnC,iIAAiI;IACjI,UAAU,CAAC,EAAE,OAAO,CAAC;IACrB,qGAAqG;IACrG,KAAK,CAAC,EAAE,WAAW,CAAC;IACpB,kGAAkG;IAClG,IAAI,CAAC,EAAE,WAAW,CAAC;CACpB,CAAC;AAEF;;GAEG;AACH,MAAM,MAAM,WAAW,GAAG;IACxB,GAAG,EAAE,MAAM,CAAC;IACZ,KAAK,EAAE,MAAM,CAAC;IACd,IAAI,EAAE,MAAM,CAAC;IACb,SAAS,EAAE,MAAM,CAAC;CACnB,CAAC;AAEF;;;GAGG;AACH,MAAM,MAAM,WAAW,GAAG;IACxB,uDAAuD;IACvD,oBAAoB,CAAC,EAAE,OAAO,CAAC;CAChC,CAAC;AAEF;;;;;;;;GAQG;AACH,MAAM,MAAM,YAAY,GAAG;IACzB,mEAAmE;IACnE,EAAE,EAAE,MAAM,CAAC;IACX,4EAA4E;IAC5E,IAAI,EAAE,MAAM,CAAC;CACd,CAAC;AAEF;;GAEG;AACH,MAAM,MAAM,qBAAqB,GAC7B;IACE,IAAI,EAAE,YAAY,CAAC;IACnB,OAAO,EAAE;QACP,kDAAkD;QAClD,OAAO,CAAC,EAAE,oBAAoB,CAAC;QAC/B,KAAK,EAAE,MAAM,CAAC;QACd,KAAK,CAAC,EAAE,KAAK,CAAC;QACd,YAAY,CAAC,EAAE,YAAY,CAAC;QAC5B,aAAa,CAAC,EAAE,aAAa,CAAC;QAC9B,kDAAkD;QAClD,aAAa,CAAC,EAAE,YAAY,EAAE,CAAC;QAC/B,iCAAiC;QACjC,OAAO,CAAC,EAAE,WAAW,CAAC;QACtB,6DAA6D;QAC7D,cAAc,CAAC,EAAE,OAAO,CAAC;QACzB;;;WAGG;QACH,IAAI,CAAC,EAAE,YAAY,CAAC;QACpB;;;;WAIG;QACH,YAAY,CAAC,EAAE,MAAM,CAAC;QACtB;;;;;WAKG;QACH,mBAAmB,CAAC,EAAE,MAAM,CAAC;KAC9B,CAAC;CACH,GACD;IAAE,IAAI,EAAE,iBAAiB,CAAC;IAAC,OAAO,EAAE;QAAE,EAAE,EAAE,MAAM,CAAC;QAAC,EAAE,EAAE,IAAI,CAAC;QAAC,MAAM,EAAE,OAAO,CAAA;KAAE,CAAA;CAAE,GAC/E;IAAE,IAAI,EAAE,iBAAiB,CAAC;IAAC,OAAO,EAAE;QAAE,EAAE,EAAE,MAAM,CAAC;QAAC,EAAE,EAAE,KAAK,CAAC;QAAC,KAAK,EAAE,MAAM,CAAA;KAAE,CAAA;CAAE,GAC9E;IAAE,IAAI,EAAE,kBAAkB,CAAC;IAAC,OAAO,EAAE,WAAW,CAAA;CAAE,GAClD;IAAE,IAAI,EAAE,YAAY,CAAC;IAAC,OAAO,EAAE;QAAE,MAAM,EAAE,MAAM,CAAA;KAAE,CAAA;CAAE,GACnD;IAAE,IAAI,EAAE,WAAW,CAAA;CAAE,GACrB;IAAE,IAAI,EAAE,cAAc,CAAC;IAAC,OAAO,EAAE;QAAE,UAAU,EAAE,OAAO,CAAA;KAAE,CAAA;CAAE,CAAC;AAE/D;;GAEG;AACH,MAAM,MAAM,mBAAmB,GAC3B;IACE,IAAI,EAAE,WAAW,CAAC;IAClB,mDAAmD;IACnD,OAAO,CAAC,EAAE;QAAE,OAAO,EAAE,oBAAoB,CAAA;KAAE,CAAC;CAC7C,GACD;IACE,IAAI,EAAE,eAAe,CAAC;IACtB,OAAO,EAAE;QACP,EAAE,EAAE,MAAM,CAAC;QACX,IAAI,EAAE,MAAM,CAAC;QACb,IAAI,EAAE,OAAO,CAAC;KACf,CAAC;CACH,GACD;IACE,IAAI,EAAE,yBAAyB,CAAC;IAChC,OAAO,EAAE;QACP,EAAE,EAAE,MAAM,CAAC;QACX,UAAU,EAAE,MAAM,CAAC;QACnB,OAAO,EAAE,cAAc,CAAC;KACzB,CAAC;CACH,GACD;IACE,IAAI,EAAE,sBAAsB,CAAC;IAC7B,OAAO,EAAE;QACP,EAAE,EAAE,MAAM,CAAC;QACX,UAAU,EAAE,MAAM,CAAC;QACnB,OAAO,EAAE,WAAW,CAAC;KACtB,CAAC;CACH,GACD;IAAE,IAAI,EAAE,cAAc,CAAC;IAAC,OAAO,EAAE;QAAE,IAAI,EAAE,MAAM,CAAA;KAAE,CAAA;CAAE,GACnD;IACE;;;;OAIG;IACH,IAAI,EAAE,aAAa,CAAC;IACpB,OAAO,EAAE;QAAE,YAAY,EAAE,MAAM,CAAC;QAAC,QAAQ,EAAE,MAAM,CAAA;KAAE,CAAC;CACrD,GACD;IACE;;;OAGG;IACH,IAAI,EAAE,kBAAkB,CAAC;IACzB,OAAO,EAAE;QAAE,OAAO,EAAE,MAAM,GAAG,IAAI,CAAA;KAAE,CAAC;CACrC,GACD;IAAE,IAAI,EAAE,WAAW,CAAA;CAAE,CAAC;AAE1B;;GAEG;AACH,MAAM,MAAM,aAAa,GAAG,qBAAqB,GAAG,mBAAmB,CAAC"}
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Pure state machine for a single voice session.
|
|
3
|
+
*
|
|
4
|
+
* The reducer has no DOM or WebRTC dependencies — it can be unit-tested by
|
|
5
|
+
* driving events through `voiceReducer` and checking the resulting state.
|
|
6
|
+
*
|
|
7
|
+
* The companion `handleRealtimeMessage` parses an OpenAI Realtime data-channel
|
|
8
|
+
* message and dispatches reducer events plus side effects (tool dispatch,
|
|
9
|
+
* sending follow-up events back over the data channel). Side effects are
|
|
10
|
+
* delegated to the injected `RealtimeMessageContext` so the function is
|
|
11
|
+
* testable with a plain in-memory mock.
|
|
12
|
+
*/
|
|
13
|
+
export type VoiceState = "idle" | "connecting" | "listening" | "thinking" | "speaking" | "error";
|
|
14
|
+
export type VoiceEvent = {
|
|
15
|
+
type: "start";
|
|
16
|
+
} | {
|
|
17
|
+
type: "connected";
|
|
18
|
+
} | {
|
|
19
|
+
type: "speech_started";
|
|
20
|
+
} | {
|
|
21
|
+
type: "speech_stopped";
|
|
22
|
+
} | {
|
|
23
|
+
type: "audio_delta";
|
|
24
|
+
} | {
|
|
25
|
+
type: "audio_stopped";
|
|
26
|
+
} | {
|
|
27
|
+
type: "stop";
|
|
28
|
+
} | {
|
|
29
|
+
type: "error";
|
|
30
|
+
message: string;
|
|
31
|
+
};
|
|
32
|
+
export interface VoiceMachine {
|
|
33
|
+
state: VoiceState;
|
|
34
|
+
errorMessage?: string;
|
|
35
|
+
}
|
|
36
|
+
export declare const INITIAL_VOICE_MACHINE: VoiceMachine;
|
|
37
|
+
export declare function voiceReducer(machine: VoiceMachine, event: VoiceEvent): VoiceMachine;
|
|
38
|
+
/**
|
|
39
|
+
* Per-`response.done` token usage emitted by OpenAI's Realtime API.
|
|
40
|
+
* The SDK accumulates these across the session and ships the totals to the
|
|
41
|
+
* `session-event` stop endpoint so billing has the dimensions it needs.
|
|
42
|
+
*/
|
|
43
|
+
export interface RealtimeResponseUsage {
|
|
44
|
+
/** Total input tokens for this response (text + audio combined). */
|
|
45
|
+
inputTokens?: number;
|
|
46
|
+
/** Cached input tokens — subset of `inputTokens`. */
|
|
47
|
+
cachedInputTokens?: number;
|
|
48
|
+
/** Total output tokens for this response (text + audio combined). */
|
|
49
|
+
outputTokens?: number;
|
|
50
|
+
/** Audio-input tokens (subset of `inputTokens`). */
|
|
51
|
+
audioInputTokens?: number;
|
|
52
|
+
/** Audio-output tokens (subset of `outputTokens`). */
|
|
53
|
+
audioOutputTokens?: number;
|
|
54
|
+
/** Text-input tokens (subset of `inputTokens`). */
|
|
55
|
+
textInputTokens?: number;
|
|
56
|
+
/** Text-output tokens (subset of `outputTokens`). */
|
|
57
|
+
textOutputTokens?: number;
|
|
58
|
+
}
|
|
59
|
+
export interface RealtimeMessageContext {
|
|
60
|
+
send: (event: VoiceEvent) => void;
|
|
61
|
+
sendData: (payload: unknown) => void;
|
|
62
|
+
dispatchToolCall: (name: string, args: unknown) => Promise<unknown>;
|
|
63
|
+
isDispatched: (callId: string) => boolean;
|
|
64
|
+
markDispatched: (callId: string) => void;
|
|
65
|
+
/** Forward a per-response usage payload to the session for accumulation. */
|
|
66
|
+
recordUsage?: (usage: RealtimeResponseUsage) => void;
|
|
67
|
+
}
|
|
68
|
+
export declare function handleRealtimeMessage(raw: string, ctx: RealtimeMessageContext): Promise<void>;
|
|
69
|
+
//# sourceMappingURL=voice-machine.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"voice-machine.d.ts","sourceRoot":"","sources":["../src/voice-machine.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;GAWG;AAEH,MAAM,MAAM,UAAU,GAAG,MAAM,GAAG,YAAY,GAAG,WAAW,GAAG,UAAU,GAAG,UAAU,GAAG,OAAO,CAAC;AAEjG,MAAM,MAAM,UAAU,GAClB;IAAE,IAAI,EAAE,OAAO,CAAA;CAAE,GACjB;IAAE,IAAI,EAAE,WAAW,CAAA;CAAE,GACrB;IAAE,IAAI,EAAE,gBAAgB,CAAA;CAAE,GAC1B;IAAE,IAAI,EAAE,gBAAgB,CAAA;CAAE,GAC1B;IAAE,IAAI,EAAE,aAAa,CAAA;CAAE,GACvB;IAAE,IAAI,EAAE,eAAe,CAAA;CAAE,GACzB;IAAE,IAAI,EAAE,MAAM,CAAA;CAAE,GAChB;IAAE,IAAI,EAAE,OAAO,CAAC;IAAC,OAAO,EAAE,MAAM,CAAA;CAAE,CAAC;AAEvC,MAAM,WAAW,YAAY;IAC3B,KAAK,EAAE,UAAU,CAAC;IAClB,YAAY,CAAC,EAAE,MAAM,CAAC;CACvB;AAED,eAAO,MAAM,qBAAqB,EAAE,YAAgC,CAAC;AAErE,wBAAgB,YAAY,CAAC,OAAO,EAAE,YAAY,EAAE,KAAK,EAAE,UAAU,GAAG,YAAY,CA4BnF;AAED;;;;GAIG;AACH,MAAM,WAAW,qBAAqB;IACpC,oEAAoE;IACpE,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,qDAAqD;IACrD,iBAAiB,CAAC,EAAE,MAAM,CAAC;IAC3B,qEAAqE;IACrE,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,oDAAoD;IACpD,gBAAgB,CAAC,EAAE,MAAM,CAAC;IAC1B,sDAAsD;IACtD,iBAAiB,CAAC,EAAE,MAAM,CAAC;IAC3B,mDAAmD;IACnD,eAAe,CAAC,EAAE,MAAM,CAAC;IACzB,qDAAqD;IACrD,gBAAgB,CAAC,EAAE,MAAM,CAAC;CAC3B;AAED,MAAM,WAAW,sBAAsB;IACrC,IAAI,EAAE,CAAC,KAAK,EAAE,UAAU,KAAK,IAAI,CAAC;IAClC,QAAQ,EAAE,CAAC,OAAO,EAAE,OAAO,KAAK,IAAI,CAAC;IACrC,gBAAgB,EAAE,CAAC,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,OAAO,KAAK,OAAO,CAAC,OAAO,CAAC,CAAC;IACpE,YAAY,EAAE,CAAC,MAAM,EAAE,MAAM,KAAK,OAAO,CAAC;IAC1C,cAAc,EAAE,CAAC,MAAM,EAAE,MAAM,KAAK,IAAI,CAAC;IACzC,4EAA4E;IAC5E,WAAW,CAAC,EAAE,CAAC,KAAK,EAAE,qBAAqB,KAAK,IAAI,CAAC;CACtD;AAmID,wBAAsB,qBAAqB,CACzC,GAAG,EAAE,MAAM,EACX,GAAG,EAAE,sBAAsB,GAC1B,OAAO,CAAC,IAAI,CAAC,CAmCf"}
|
|
@@ -0,0 +1,163 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Pure state machine for a single voice session.
|
|
3
|
+
*
|
|
4
|
+
* The reducer has no DOM or WebRTC dependencies — it can be unit-tested by
|
|
5
|
+
* driving events through `voiceReducer` and checking the resulting state.
|
|
6
|
+
*
|
|
7
|
+
* The companion `handleRealtimeMessage` parses an OpenAI Realtime data-channel
|
|
8
|
+
* message and dispatches reducer events plus side effects (tool dispatch,
|
|
9
|
+
* sending follow-up events back over the data channel). Side effects are
|
|
10
|
+
* delegated to the injected `RealtimeMessageContext` so the function is
|
|
11
|
+
* testable with a plain in-memory mock.
|
|
12
|
+
*/
|
|
13
|
+
export const INITIAL_VOICE_MACHINE = { state: "idle" };
|
|
14
|
+
export function voiceReducer(machine, event) {
|
|
15
|
+
switch (event.type) {
|
|
16
|
+
case "start":
|
|
17
|
+
return machine.state === "idle" ? { state: "connecting" } : machine;
|
|
18
|
+
case "connected":
|
|
19
|
+
return machine.state === "connecting" ? { state: "listening" } : machine;
|
|
20
|
+
case "speech_started":
|
|
21
|
+
if (machine.state === "idle" || machine.state === "error")
|
|
22
|
+
return machine;
|
|
23
|
+
return { state: "listening" };
|
|
24
|
+
case "speech_stopped":
|
|
25
|
+
return machine.state === "listening" ? { state: "thinking" } : machine;
|
|
26
|
+
case "audio_delta":
|
|
27
|
+
if (machine.state === "thinking" || machine.state === "speaking") {
|
|
28
|
+
return { state: "speaking" };
|
|
29
|
+
}
|
|
30
|
+
return machine;
|
|
31
|
+
case "audio_stopped":
|
|
32
|
+
return machine.state === "speaking" ? { state: "listening" } : machine;
|
|
33
|
+
case "stop":
|
|
34
|
+
return { state: "idle" };
|
|
35
|
+
case "error":
|
|
36
|
+
return { state: "error", errorMessage: event.message };
|
|
37
|
+
default: {
|
|
38
|
+
const _exhaustive = event;
|
|
39
|
+
void _exhaustive;
|
|
40
|
+
return machine;
|
|
41
|
+
}
|
|
42
|
+
}
|
|
43
|
+
}
|
|
44
|
+
function isFunctionCall(item) {
|
|
45
|
+
return item.type === "function_call";
|
|
46
|
+
}
|
|
47
|
+
function parseToolArgs(raw) {
|
|
48
|
+
if (!raw)
|
|
49
|
+
return {};
|
|
50
|
+
try {
|
|
51
|
+
return JSON.parse(raw);
|
|
52
|
+
}
|
|
53
|
+
catch {
|
|
54
|
+
return {};
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
async function dispatchFunctionCall(call, ctx) {
|
|
58
|
+
const callId = call.call_id;
|
|
59
|
+
const name = call.name;
|
|
60
|
+
if (!callId || !name)
|
|
61
|
+
return;
|
|
62
|
+
if (ctx.isDispatched(callId))
|
|
63
|
+
return;
|
|
64
|
+
ctx.markDispatched(callId);
|
|
65
|
+
const args = parseToolArgs(call.arguments);
|
|
66
|
+
let output;
|
|
67
|
+
try {
|
|
68
|
+
const result = await ctx.dispatchToolCall(name, args);
|
|
69
|
+
output = JSON.stringify(result ?? null);
|
|
70
|
+
}
|
|
71
|
+
catch (error) {
|
|
72
|
+
output = JSON.stringify({
|
|
73
|
+
error: error instanceof Error ? error.message : "Tool execution failed",
|
|
74
|
+
});
|
|
75
|
+
}
|
|
76
|
+
ctx.sendData({
|
|
77
|
+
type: "conversation.item.create",
|
|
78
|
+
item: { type: "function_call_output", call_id: callId, output },
|
|
79
|
+
});
|
|
80
|
+
ctx.sendData({ type: "response.create" });
|
|
81
|
+
}
|
|
82
|
+
function extractUsage(raw) {
|
|
83
|
+
if (!raw)
|
|
84
|
+
return null;
|
|
85
|
+
const usage = {};
|
|
86
|
+
if (typeof raw.input_tokens === "number")
|
|
87
|
+
usage.inputTokens = raw.input_tokens;
|
|
88
|
+
if (typeof raw.output_tokens === "number")
|
|
89
|
+
usage.outputTokens = raw.output_tokens;
|
|
90
|
+
const inDetails = raw.input_token_details;
|
|
91
|
+
if (inDetails) {
|
|
92
|
+
if (typeof inDetails.cached_tokens === "number") {
|
|
93
|
+
usage.cachedInputTokens = inDetails.cached_tokens;
|
|
94
|
+
}
|
|
95
|
+
if (typeof inDetails.audio_tokens === "number") {
|
|
96
|
+
usage.audioInputTokens = inDetails.audio_tokens;
|
|
97
|
+
}
|
|
98
|
+
if (typeof inDetails.text_tokens === "number") {
|
|
99
|
+
usage.textInputTokens = inDetails.text_tokens;
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
const outDetails = raw.output_token_details;
|
|
103
|
+
if (outDetails) {
|
|
104
|
+
if (typeof outDetails.audio_tokens === "number") {
|
|
105
|
+
usage.audioOutputTokens = outDetails.audio_tokens;
|
|
106
|
+
}
|
|
107
|
+
if (typeof outDetails.text_tokens === "number") {
|
|
108
|
+
usage.textOutputTokens = outDetails.text_tokens;
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
return Object.keys(usage).length > 0 ? usage : null;
|
|
112
|
+
}
|
|
113
|
+
async function handleResponseDone(response, ctx) {
|
|
114
|
+
const usage = extractUsage(response?.usage);
|
|
115
|
+
if (usage && ctx.recordUsage) {
|
|
116
|
+
try {
|
|
117
|
+
ctx.recordUsage(usage);
|
|
118
|
+
}
|
|
119
|
+
catch {
|
|
120
|
+
// recordUsage is best-effort; never let it break the session loop.
|
|
121
|
+
}
|
|
122
|
+
}
|
|
123
|
+
const calls = (response?.output ?? []).filter(isFunctionCall);
|
|
124
|
+
for (const call of calls) {
|
|
125
|
+
await dispatchFunctionCall(call, ctx);
|
|
126
|
+
}
|
|
127
|
+
}
|
|
128
|
+
export async function handleRealtimeMessage(raw, ctx) {
|
|
129
|
+
let message;
|
|
130
|
+
try {
|
|
131
|
+
message = JSON.parse(raw);
|
|
132
|
+
}
|
|
133
|
+
catch {
|
|
134
|
+
return;
|
|
135
|
+
}
|
|
136
|
+
switch (message.type) {
|
|
137
|
+
case "input_audio_buffer.speech_started":
|
|
138
|
+
ctx.send({ type: "speech_started" });
|
|
139
|
+
return;
|
|
140
|
+
case "input_audio_buffer.speech_stopped":
|
|
141
|
+
ctx.send({ type: "speech_stopped" });
|
|
142
|
+
return;
|
|
143
|
+
case "response.output_audio_transcript.delta":
|
|
144
|
+
case "response.audio_transcript.delta":
|
|
145
|
+
ctx.send({ type: "audio_delta" });
|
|
146
|
+
return;
|
|
147
|
+
case "output_audio_buffer.stopped":
|
|
148
|
+
case "response.output_audio_buffer.stopped":
|
|
149
|
+
ctx.send({ type: "audio_stopped" });
|
|
150
|
+
return;
|
|
151
|
+
case "response.done":
|
|
152
|
+
await handleResponseDone(message.response, ctx);
|
|
153
|
+
return;
|
|
154
|
+
case "error":
|
|
155
|
+
ctx.send({
|
|
156
|
+
type: "error",
|
|
157
|
+
message: message.error?.message ?? "Voice session error",
|
|
158
|
+
});
|
|
159
|
+
return;
|
|
160
|
+
default:
|
|
161
|
+
return;
|
|
162
|
+
}
|
|
163
|
+
}
|
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
import type { ChatConfig, ChatConfigProvider } from "./types/config.js";
|
|
2
|
+
import type { GraphQLSchemaHandler, RESTSchemaHandler, ToolCallHandler } from "./types/tools.js";
|
|
3
|
+
import { type VoiceMachine } from "./voice-machine.js";
|
|
4
|
+
declare global {
|
|
5
|
+
interface Window {
|
|
6
|
+
__YAK_INTERNAL_DEV__?: boolean;
|
|
7
|
+
}
|
|
8
|
+
}
|
|
9
|
+
export type VoiceStateListener = (machine: VoiceMachine) => void;
|
|
10
|
+
export interface YakVoiceSessionConfig {
|
|
11
|
+
appId: string;
|
|
12
|
+
/** Tool call handler. Same shape as `YakClientConfig.onToolCall`. */
|
|
13
|
+
onToolCall?: ToolCallHandler;
|
|
14
|
+
onGraphQLSchemaCall?: GraphQLSchemaHandler;
|
|
15
|
+
onRESTSchemaCall?: RESTSchemaHandler;
|
|
16
|
+
onRedirect?: (path: string) => void;
|
|
17
|
+
/**
|
|
18
|
+
* Static chat config (routes + tools). Sent to the mint endpoint so the LLM
|
|
19
|
+
* knows what tools are available. Use this OR `getConfig`.
|
|
20
|
+
*/
|
|
21
|
+
chatConfig?: ChatConfig;
|
|
22
|
+
/**
|
|
23
|
+
* Async provider for chat config. Called on every session start — useful when
|
|
24
|
+
* tools/routes depend on the current page or user. Takes precedence over
|
|
25
|
+
* `chatConfig` if both are provided.
|
|
26
|
+
*/
|
|
27
|
+
getConfig?: ChatConfigProvider;
|
|
28
|
+
/**
|
|
29
|
+
* Override the API origin. Defaults to chat.yak.io (prod), chat.yak.supply
|
|
30
|
+
* (dev stage), or http://localhost:3001 when `__YAK_INTERNAL_DEV__` is set.
|
|
31
|
+
*/
|
|
32
|
+
apiOrigin?: string;
|
|
33
|
+
}
|
|
34
|
+
export declare class YakVoiceSession {
|
|
35
|
+
private config;
|
|
36
|
+
private machine;
|
|
37
|
+
private resources;
|
|
38
|
+
private dispatchedCallIds;
|
|
39
|
+
private listeners;
|
|
40
|
+
private pageHideHandler;
|
|
41
|
+
/** Per-session token totals, accumulated from each `response.done` event. */
|
|
42
|
+
private usage;
|
|
43
|
+
/**
|
|
44
|
+
* Reverse map: hashed tool id (what OpenAI calls back with) → original host
|
|
45
|
+
* tool name (what `onToolCall` expects). Populated on every `start()` from
|
|
46
|
+
* the resolved chat config.
|
|
47
|
+
*/
|
|
48
|
+
private toolNameById;
|
|
49
|
+
constructor(config: YakVoiceSessionConfig);
|
|
50
|
+
/**
|
|
51
|
+
* Resolve the API origin lazily on each call. The internal-dev flag
|
|
52
|
+
* (`window.__YAK_INTERNAL_DEV__`) is often set in a `useEffect` that fires
|
|
53
|
+
* after this session is constructed, so resolving at construction would
|
|
54
|
+
* bake in the production URL.
|
|
55
|
+
*/
|
|
56
|
+
private get apiOrigin();
|
|
57
|
+
/** Update mutable config fields (handlers, getConfig). */
|
|
58
|
+
updateConfig(patch: Partial<YakVoiceSessionConfig>): void;
|
|
59
|
+
getState(): VoiceMachine;
|
|
60
|
+
/**
|
|
61
|
+
* The current API origin (`chat.yak.io`, `chat.yak.supply`, or
|
|
62
|
+
* `http://localhost:3001` when `__YAK_INTERNAL_DEV__` is set). Useful for
|
|
63
|
+
* building URLs to static assets like the brand logo.
|
|
64
|
+
*/
|
|
65
|
+
getApiOrigin(): string;
|
|
66
|
+
onStateChange(listener: VoiceStateListener): () => void;
|
|
67
|
+
/**
|
|
68
|
+
* Begin a voice session. Should be invoked from a user gesture (button
|
|
69
|
+
* click) so `getUserMedia` and audio playback both have transient activation.
|
|
70
|
+
*/
|
|
71
|
+
start(): Promise<void>;
|
|
72
|
+
/** Stop the session and tear down all resources. */
|
|
73
|
+
stop(): Promise<void>;
|
|
74
|
+
/** Tear down everything and remove listeners. Call once before discarding the instance. */
|
|
75
|
+
destroy(): void;
|
|
76
|
+
private buildMessageContext;
|
|
77
|
+
private accumulateUsage;
|
|
78
|
+
private sendOverDataChannel;
|
|
79
|
+
private routeToolCall;
|
|
80
|
+
/**
|
|
81
|
+
* Relay an MCP tool call to the server, which holds the org's credentials
|
|
82
|
+
* and executes against the remote MCP server. The browser only ever passes
|
|
83
|
+
* through the tool name, args, and the opaque result.
|
|
84
|
+
*/
|
|
85
|
+
private execMcpTool;
|
|
86
|
+
private mintToken;
|
|
87
|
+
/**
|
|
88
|
+
* Decorate the host's tool manifest + schema sources with hashed ids and
|
|
89
|
+
* populate `this.toolNameById` for reverse lookup. Mirrors the decoration
|
|
90
|
+
* the chat-ui iframe applies before sending tools to `/api/chat`.
|
|
91
|
+
*/
|
|
92
|
+
private buildDecoratedManifest;
|
|
93
|
+
private exchangeSdp;
|
|
94
|
+
private buildStopEventBody;
|
|
95
|
+
private postSessionEvent;
|
|
96
|
+
private teardown;
|
|
97
|
+
private failWith;
|
|
98
|
+
private dispatch;
|
|
99
|
+
private safeExtractPageContext;
|
|
100
|
+
private attachPageHide;
|
|
101
|
+
}
|
|
102
|
+
//# sourceMappingURL=voice-session.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"voice-session.d.ts","sourceRoot":"","sources":["../src/voice-session.ts"],"names":[],"mappings":"AAGA,OAAO,KAAK,EAAE,UAAU,EAAE,kBAAkB,EAAE,MAAM,mBAAmB,CAAC;AAExE,OAAO,KAAK,EAEV,oBAAoB,EAEpB,iBAAiB,EACjB,eAAe,EAEhB,MAAM,kBAAkB,CAAC;AAC1B,OAAO,EAKL,KAAK,YAAY,EAElB,MAAM,oBAAoB,CAAC;AAK5B,OAAO,CAAC,MAAM,CAAC;IACb,UAAU,MAAM;QACd,oBAAoB,CAAC,EAAE,OAAO,CAAC;KAChC;CACF;AAkBD,MAAM,MAAM,kBAAkB,GAAG,CAAC,OAAO,EAAE,YAAY,KAAK,IAAI,CAAC;AAEjE,MAAM,WAAW,qBAAqB;IACpC,KAAK,EAAE,MAAM,CAAC;IACd,qEAAqE;IACrE,UAAU,CAAC,EAAE,eAAe,CAAC;IAC7B,mBAAmB,CAAC,EAAE,oBAAoB,CAAC;IAC3C,gBAAgB,CAAC,EAAE,iBAAiB,CAAC;IACrC,UAAU,CAAC,EAAE,CAAC,IAAI,EAAE,MAAM,KAAK,IAAI,CAAC;IACpC;;;OAGG;IACH,UAAU,CAAC,EAAE,UAAU,CAAC;IACxB;;;;OAIG;IACH,SAAS,CAAC,EAAE,kBAAkB,CAAC;IAC/B;;;OAGG;IACH,SAAS,CAAC,EAAE,MAAM,CAAC;CACpB;AA2DD,qBAAa,eAAe;IAC1B,OAAO,CAAC,MAAM,CAAwB;IACtC,OAAO,CAAC,OAAO,CAAuC;IACtD,OAAO,CAAC,SAAS,CAAqC;IACtD,OAAO,CAAC,iBAAiB,CAAqB;IAC9C,OAAO,CAAC,SAAS,CAAiC;IAClD,OAAO,CAAC,eAAe,CAA6B;IACpD,6EAA6E;IAC7E,OAAO,CAAC,KAAK,CAAkC;IAC/C;;;;OAIG;IACH,OAAO,CAAC,YAAY,CAA6B;gBAErC,MAAM,EAAE,qBAAqB;IAKzC;;;;;OAKG;IACH,OAAO,KAAK,SAAS,GAEpB;IAED,0DAA0D;IACnD,YAAY,CAAC,KAAK,EAAE,OAAO,CAAC,qBAAqB,CAAC,GAAG,IAAI;IAIzD,QAAQ,IAAI,YAAY;IAI/B;;;;OAIG;IACI,YAAY,IAAI,MAAM;IAItB,aAAa,CAAC,QAAQ,EAAE,kBAAkB,GAAG,MAAM,IAAI;IAO9D;;;OAGG;IACU,KAAK,IAAI,OAAO,CAAC,IAAI,CAAC;IA+InC,oDAAoD;IACvC,IAAI,IAAI,OAAO,CAAC,IAAI,CAAC;IAMlC,2FAA2F;IACpF,OAAO,IAAI,IAAI;IAWtB,OAAO,CAAC,mBAAmB;IAa3B,OAAO,CAAC,eAAe;IAqBvB,OAAO,CAAC,mBAAmB;YAeb,aAAa;IAsC3B;;;;OAIG;YACW,WAAW;YAwBX,SAAS;IAuBvB;;;;OAIG;IACH,OAAO,CAAC,sBAAsB;YA4BhB,WAAW;IAmBzB,OAAO,CAAC,kBAAkB;YAWZ,gBAAgB;YA2BhB,QAAQ;YAuCR,QAAQ;IAMtB,OAAO,CAAC,QAAQ;IAahB,OAAO,CAAC,sBAAsB;IAQ9B,OAAO,CAAC,cAAc;CAevB"}
|