@guidekit/core 0.1.0-beta.1 → 0.1.0-beta.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/dist/index.cjs +1639 -582
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +571 -30
- package/dist/index.d.ts +571 -30
- package/dist/index.js +1637 -583
- package/dist/index.js.map +1 -1
- package/package.json +37 -14
package/dist/index.cjs
CHANGED
|
@@ -1093,7 +1093,7 @@ var DOMScanner = class {
|
|
|
1093
1093
|
if (el.closest("[data-guidekit-ignore]")) return;
|
|
1094
1094
|
const style = window.getComputedStyle(el);
|
|
1095
1095
|
const position = style.position;
|
|
1096
|
-
const zIndex = parseInt(style.zIndex, 10);
|
|
1096
|
+
const zIndex = parseInt(style.zIndex, 10) || 0;
|
|
1097
1097
|
if ((position === "fixed" || position === "absolute") && !isNaN(zIndex) && zIndex >= 1e3) {
|
|
1098
1098
|
const visible = isElementVisible(el);
|
|
1099
1099
|
if (!visible) return;
|
|
@@ -1750,7 +1750,9 @@ var ErrorCodes = {
|
|
|
1750
1750
|
// Content
|
|
1751
1751
|
CONTENT_FILTER_TRIGGERED: "CONTENT_FILTER_TRIGGERED",
|
|
1752
1752
|
// Privacy
|
|
1753
|
-
PRIVACY_HOOK_CANCELLED: "PRIVACY_HOOK_CANCELLED"
|
|
1753
|
+
PRIVACY_HOOK_CANCELLED: "PRIVACY_HOOK_CANCELLED",
|
|
1754
|
+
// General
|
|
1755
|
+
UNKNOWN: "UNKNOWN"
|
|
1754
1756
|
};
|
|
1755
1757
|
var GuideKitError = class extends Error {
|
|
1756
1758
|
code;
|
|
@@ -1843,13 +1845,27 @@ function isGuideKitError(error) {
|
|
|
1843
1845
|
var DEFAULT_OPENAI_MODEL = "gpt-4o";
|
|
1844
1846
|
var DEFAULT_TIMEOUT_MS = 15e3;
|
|
1845
1847
|
var OPENAI_CHAT_URL = "https://api.openai.com/v1/chat/completions";
|
|
1848
|
+
function emptyUsage() {
|
|
1849
|
+
return { prompt: 0, completion: 0, total: 0 };
|
|
1850
|
+
}
|
|
1846
1851
|
var OpenAIAdapter = class {
|
|
1847
1852
|
apiKey;
|
|
1848
1853
|
model;
|
|
1854
|
+
/** Tracks whether the last extractChunks call emitted a done chunk. */
|
|
1855
|
+
lastExtractEmittedDone = false;
|
|
1856
|
+
/**
|
|
1857
|
+
* Token usage extracted from the most recent `parseResponse` call.
|
|
1858
|
+
* Updated as each SSE chunk is parsed.
|
|
1859
|
+
*/
|
|
1860
|
+
_lastUsage = emptyUsage();
|
|
1849
1861
|
constructor(config) {
|
|
1850
1862
|
this.apiKey = config.apiKey;
|
|
1851
1863
|
this.model = config.model ?? DEFAULT_OPENAI_MODEL;
|
|
1852
1864
|
}
|
|
1865
|
+
/** Token usage from the most recent parseResponse call. */
|
|
1866
|
+
get lastUsage() {
|
|
1867
|
+
return this._lastUsage;
|
|
1868
|
+
}
|
|
1853
1869
|
// -----------------------------------------------------------------------
|
|
1854
1870
|
// LLMProviderAdapter implementation
|
|
1855
1871
|
// -----------------------------------------------------------------------
|
|
@@ -1864,7 +1880,11 @@ var OpenAIAdapter = class {
|
|
|
1864
1880
|
function: {
|
|
1865
1881
|
name: tool.name,
|
|
1866
1882
|
description: tool.description,
|
|
1867
|
-
parameters:
|
|
1883
|
+
parameters: {
|
|
1884
|
+
type: "object",
|
|
1885
|
+
properties: { ...tool.parameters },
|
|
1886
|
+
required: tool.required ?? []
|
|
1887
|
+
}
|
|
1868
1888
|
}
|
|
1869
1889
|
}));
|
|
1870
1890
|
}
|
|
@@ -1886,11 +1906,17 @@ var OpenAIAdapter = class {
|
|
|
1886
1906
|
* prefixed by `data: `. The final line is `data: [DONE]`.
|
|
1887
1907
|
* Text content arrives in `choices[0].delta.content` and tool calls
|
|
1888
1908
|
* arrive in `choices[0].delta.tool_calls`.
|
|
1909
|
+
*
|
|
1910
|
+
* This method also:
|
|
1911
|
+
* - Detects content filtering and throws `ContentFilterError`.
|
|
1912
|
+
* - Tracks token usage (accessible via `lastUsage` after iteration).
|
|
1889
1913
|
*/
|
|
1890
1914
|
async *parseResponse(stream) {
|
|
1891
1915
|
const reader = stream.getReader();
|
|
1892
1916
|
const decoder = new TextDecoder();
|
|
1893
1917
|
let buffer = "";
|
|
1918
|
+
let doneEmitted = false;
|
|
1919
|
+
this._lastUsage = emptyUsage();
|
|
1894
1920
|
const pendingToolCalls = /* @__PURE__ */ new Map();
|
|
1895
1921
|
try {
|
|
1896
1922
|
while (true) {
|
|
@@ -1906,7 +1932,10 @@ var OpenAIAdapter = class {
|
|
|
1906
1932
|
if (jsonStr === "" || jsonStr === "[DONE]") {
|
|
1907
1933
|
if (jsonStr === "[DONE]") {
|
|
1908
1934
|
yield* this.flushPendingToolCalls(pendingToolCalls);
|
|
1909
|
-
|
|
1935
|
+
if (!doneEmitted) {
|
|
1936
|
+
doneEmitted = true;
|
|
1937
|
+
yield { text: "", done: true };
|
|
1938
|
+
}
|
|
1910
1939
|
}
|
|
1911
1940
|
continue;
|
|
1912
1941
|
}
|
|
@@ -1916,19 +1945,53 @@ var OpenAIAdapter = class {
|
|
|
1916
1945
|
} catch {
|
|
1917
1946
|
continue;
|
|
1918
1947
|
}
|
|
1919
|
-
|
|
1948
|
+
if (this.isContentFiltered(parsed)) {
|
|
1949
|
+
throw new ContentFilterError({
|
|
1950
|
+
code: ErrorCodes.CONTENT_FILTER_TRIGGERED,
|
|
1951
|
+
message: "Response was blocked by provider content safety filter.",
|
|
1952
|
+
provider: "openai",
|
|
1953
|
+
suggestion: "Rephrase your question or adjust safety settings."
|
|
1954
|
+
});
|
|
1955
|
+
}
|
|
1956
|
+
const chunkUsage = this.extractUsage(parsed);
|
|
1957
|
+
if (chunkUsage) {
|
|
1958
|
+
this._lastUsage = chunkUsage;
|
|
1959
|
+
}
|
|
1960
|
+
yield* this.extractChunks(parsed, pendingToolCalls, doneEmitted);
|
|
1961
|
+
if (!doneEmitted && this.lastExtractEmittedDone) {
|
|
1962
|
+
doneEmitted = true;
|
|
1963
|
+
}
|
|
1920
1964
|
}
|
|
1921
1965
|
}
|
|
1922
1966
|
if (buffer.trim().startsWith("data:")) {
|
|
1923
1967
|
const jsonStr = buffer.trim().slice(5).trim();
|
|
1924
1968
|
if (jsonStr === "[DONE]") {
|
|
1925
1969
|
yield* this.flushPendingToolCalls(pendingToolCalls);
|
|
1926
|
-
|
|
1970
|
+
if (!doneEmitted) {
|
|
1971
|
+
doneEmitted = true;
|
|
1972
|
+
yield { text: "", done: true };
|
|
1973
|
+
}
|
|
1927
1974
|
} else if (jsonStr !== "") {
|
|
1928
1975
|
try {
|
|
1929
1976
|
const parsed = JSON.parse(jsonStr);
|
|
1930
|
-
|
|
1931
|
-
|
|
1977
|
+
if (this.isContentFiltered(parsed)) {
|
|
1978
|
+
throw new ContentFilterError({
|
|
1979
|
+
code: ErrorCodes.CONTENT_FILTER_TRIGGERED,
|
|
1980
|
+
message: "Response was blocked by provider content safety filter.",
|
|
1981
|
+
provider: "openai",
|
|
1982
|
+
suggestion: "Rephrase your question or adjust safety settings."
|
|
1983
|
+
});
|
|
1984
|
+
}
|
|
1985
|
+
const chunkUsage = this.extractUsage(parsed);
|
|
1986
|
+
if (chunkUsage) {
|
|
1987
|
+
this._lastUsage = chunkUsage;
|
|
1988
|
+
}
|
|
1989
|
+
yield* this.extractChunks(parsed, pendingToolCalls, doneEmitted);
|
|
1990
|
+
if (!doneEmitted && this.lastExtractEmittedDone) {
|
|
1991
|
+
doneEmitted = true;
|
|
1992
|
+
}
|
|
1993
|
+
} catch (error) {
|
|
1994
|
+
if (error instanceof ContentFilterError) throw error;
|
|
1932
1995
|
}
|
|
1933
1996
|
}
|
|
1934
1997
|
}
|
|
@@ -1957,10 +2020,14 @@ var OpenAIAdapter = class {
|
|
|
1957
2020
|
* the raw Response object.
|
|
1958
2021
|
*/
|
|
1959
2022
|
async streamRequest(params) {
|
|
2023
|
+
const contentsArray = params.contents;
|
|
1960
2024
|
const messages = [
|
|
1961
2025
|
{ role: "system", content: params.systemPrompt },
|
|
1962
|
-
...
|
|
2026
|
+
...contentsArray
|
|
1963
2027
|
];
|
|
2028
|
+
if (params.userMessage) {
|
|
2029
|
+
messages.push({ role: "user", content: params.userMessage });
|
|
2030
|
+
}
|
|
1964
2031
|
const body = {
|
|
1965
2032
|
model: this.model,
|
|
1966
2033
|
messages,
|
|
@@ -2043,7 +2110,8 @@ var OpenAIAdapter = class {
|
|
|
2043
2110
|
* yield complete `ToolCall` objects when the finish_reason is 'tool_calls'
|
|
2044
2111
|
* or when flushed.
|
|
2045
2112
|
*/
|
|
2046
|
-
*extractChunks(parsed, pendingToolCalls) {
|
|
2113
|
+
*extractChunks(parsed, pendingToolCalls, doneEmitted) {
|
|
2114
|
+
this.lastExtractEmittedDone = false;
|
|
2047
2115
|
const choices = parsed.choices;
|
|
2048
2116
|
if (!choices || choices.length === 0) return;
|
|
2049
2117
|
for (const choice of choices) {
|
|
@@ -2077,7 +2145,8 @@ var OpenAIAdapter = class {
|
|
|
2077
2145
|
if (finishReason === "tool_calls") {
|
|
2078
2146
|
yield* this.flushPendingToolCalls(pendingToolCalls);
|
|
2079
2147
|
}
|
|
2080
|
-
if (finishReason === "stop") {
|
|
2148
|
+
if (finishReason === "stop" && !doneEmitted && !this.lastExtractEmittedDone) {
|
|
2149
|
+
this.lastExtractEmittedDone = true;
|
|
2081
2150
|
yield { text: "", done: true };
|
|
2082
2151
|
}
|
|
2083
2152
|
}
|
|
@@ -2093,7 +2162,8 @@ var OpenAIAdapter = class {
|
|
|
2093
2162
|
let args = {};
|
|
2094
2163
|
try {
|
|
2095
2164
|
args = JSON.parse(tc.argumentsJson);
|
|
2096
|
-
} catch {
|
|
2165
|
+
} catch (_e) {
|
|
2166
|
+
console.warn("[GuideKit:LLM] Failed to parse tool call arguments:", tc.argumentsJson);
|
|
2097
2167
|
}
|
|
2098
2168
|
yield {
|
|
2099
2169
|
id: tc.id,
|
|
@@ -2196,16 +2266,26 @@ var DEFAULT_SAFETY_SETTINGS = [
|
|
|
2196
2266
|
{ category: "HARM_CATEGORY_SEXUALLY_EXPLICIT", threshold: "BLOCK_ONLY_HIGH" },
|
|
2197
2267
|
{ category: "HARM_CATEGORY_DANGEROUS_CONTENT", threshold: "BLOCK_ONLY_HIGH" }
|
|
2198
2268
|
];
|
|
2199
|
-
function
|
|
2269
|
+
function emptyUsage2() {
|
|
2200
2270
|
return { prompt: 0, completion: 0, total: 0 };
|
|
2201
2271
|
}
|
|
2202
2272
|
var GeminiAdapter = class {
|
|
2203
2273
|
apiKey;
|
|
2204
2274
|
model;
|
|
2275
|
+
/**
|
|
2276
|
+
* Token usage extracted from the most recent `parseResponse` call.
|
|
2277
|
+
* Updated as each SSE chunk is parsed; the final value reflects the
|
|
2278
|
+
* cumulative usage metadata sent by Gemini (typically in the last chunk).
|
|
2279
|
+
*/
|
|
2280
|
+
_lastUsage = emptyUsage2();
|
|
2205
2281
|
constructor(config) {
|
|
2206
2282
|
this.apiKey = config.apiKey;
|
|
2207
2283
|
this.model = config.model ?? DEFAULT_GEMINI_MODEL;
|
|
2208
2284
|
}
|
|
2285
|
+
/** Token usage from the most recent parseResponse call. */
|
|
2286
|
+
get lastUsage() {
|
|
2287
|
+
return this._lastUsage;
|
|
2288
|
+
}
|
|
2209
2289
|
// -----------------------------------------------------------------------
|
|
2210
2290
|
// LLMProviderAdapter implementation
|
|
2211
2291
|
// -----------------------------------------------------------------------
|
|
@@ -2220,7 +2300,11 @@ var GeminiAdapter = class {
|
|
|
2220
2300
|
functionDeclarations: tools.map((tool) => ({
|
|
2221
2301
|
name: tool.name,
|
|
2222
2302
|
description: tool.description,
|
|
2223
|
-
parameters:
|
|
2303
|
+
parameters: {
|
|
2304
|
+
type: "object",
|
|
2305
|
+
properties: { ...tool.parameters },
|
|
2306
|
+
required: tool.required ?? []
|
|
2307
|
+
}
|
|
2224
2308
|
}))
|
|
2225
2309
|
}
|
|
2226
2310
|
];
|
|
@@ -2242,11 +2326,16 @@ var GeminiAdapter = class {
|
|
|
2242
2326
|
* The Gemini `streamGenerateContent?alt=sse` endpoint sends each chunk
|
|
2243
2327
|
* as a JSON object prefixed by `data: `. We parse line-by-line, extract
|
|
2244
2328
|
* text parts and function call parts, and yield the appropriate types.
|
|
2329
|
+
*
|
|
2330
|
+
* This method also:
|
|
2331
|
+
* - Detects content filtering and throws `ContentFilterError`.
|
|
2332
|
+
* - Tracks token usage (accessible via `lastUsage` after iteration).
|
|
2245
2333
|
*/
|
|
2246
2334
|
async *parseResponse(stream) {
|
|
2247
2335
|
const reader = stream.getReader();
|
|
2248
2336
|
const decoder = new TextDecoder();
|
|
2249
2337
|
let buffer = "";
|
|
2338
|
+
this._lastUsage = emptyUsage2();
|
|
2250
2339
|
try {
|
|
2251
2340
|
while (true) {
|
|
2252
2341
|
const { done, value } = await reader.read();
|
|
@@ -2265,6 +2354,18 @@ var GeminiAdapter = class {
|
|
|
2265
2354
|
} catch {
|
|
2266
2355
|
continue;
|
|
2267
2356
|
}
|
|
2357
|
+
if (this.isContentFiltered(parsed)) {
|
|
2358
|
+
throw new ContentFilterError({
|
|
2359
|
+
code: ErrorCodes.CONTENT_FILTER_TRIGGERED,
|
|
2360
|
+
message: "Response was blocked by provider content safety filter.",
|
|
2361
|
+
provider: "gemini",
|
|
2362
|
+
suggestion: "Rephrase your question or adjust safety settings."
|
|
2363
|
+
});
|
|
2364
|
+
}
|
|
2365
|
+
const chunkUsage = this.extractUsage(parsed);
|
|
2366
|
+
if (chunkUsage) {
|
|
2367
|
+
this._lastUsage = chunkUsage;
|
|
2368
|
+
}
|
|
2268
2369
|
yield* this.extractChunks(parsed);
|
|
2269
2370
|
}
|
|
2270
2371
|
}
|
|
@@ -2273,8 +2374,21 @@ var GeminiAdapter = class {
|
|
|
2273
2374
|
if (jsonStr !== "" && jsonStr !== "[DONE]") {
|
|
2274
2375
|
try {
|
|
2275
2376
|
const parsed = JSON.parse(jsonStr);
|
|
2377
|
+
if (this.isContentFiltered(parsed)) {
|
|
2378
|
+
throw new ContentFilterError({
|
|
2379
|
+
code: ErrorCodes.CONTENT_FILTER_TRIGGERED,
|
|
2380
|
+
message: "Response was blocked by provider content safety filter.",
|
|
2381
|
+
provider: "gemini",
|
|
2382
|
+
suggestion: "Rephrase your question or adjust safety settings."
|
|
2383
|
+
});
|
|
2384
|
+
}
|
|
2385
|
+
const chunkUsage = this.extractUsage(parsed);
|
|
2386
|
+
if (chunkUsage) {
|
|
2387
|
+
this._lastUsage = chunkUsage;
|
|
2388
|
+
}
|
|
2276
2389
|
yield* this.extractChunks(parsed);
|
|
2277
|
-
} catch {
|
|
2390
|
+
} catch (error) {
|
|
2391
|
+
if (error instanceof ContentFilterError) throw error;
|
|
2278
2392
|
}
|
|
2279
2393
|
}
|
|
2280
2394
|
}
|
|
@@ -2305,15 +2419,21 @@ var GeminiAdapter = class {
|
|
|
2305
2419
|
/**
|
|
2306
2420
|
* Build and execute a streaming request to the Gemini API.
|
|
2307
2421
|
* Returns the raw `ReadableStream` for the response body together with
|
|
2308
|
-
*
|
|
2422
|
+
* the raw Response object.
|
|
2423
|
+
*
|
|
2424
|
+
* Note: The Gemini API key is passed as a URL query parameter (`key=`).
|
|
2425
|
+
* This is inherent to the Gemini REST SSE endpoint design; the key is
|
|
2426
|
+
* transmitted over HTTPS so it remains encrypted in transit. (H3)
|
|
2309
2427
|
*/
|
|
2310
2428
|
async streamRequest(params) {
|
|
2429
|
+
const contentsArray = params.contents;
|
|
2430
|
+
const fullContents = params.userMessage ? [...contentsArray, { role: "user", parts: [{ text: params.userMessage }] }] : contentsArray;
|
|
2311
2431
|
const url = `${GEMINI_BASE_URL}/${this.model}:streamGenerateContent?alt=sse&key=${this.apiKey}`;
|
|
2312
2432
|
const body = {
|
|
2313
2433
|
systemInstruction: {
|
|
2314
2434
|
parts: [{ text: params.systemPrompt }]
|
|
2315
2435
|
},
|
|
2316
|
-
contents:
|
|
2436
|
+
contents: fullContents,
|
|
2317
2437
|
safetySettings: DEFAULT_SAFETY_SETTINGS,
|
|
2318
2438
|
generationConfig: {
|
|
2319
2439
|
temperature: 0.7,
|
|
@@ -2381,7 +2501,7 @@ var GeminiAdapter = class {
|
|
|
2381
2501
|
return { stream: response.body, response };
|
|
2382
2502
|
}
|
|
2383
2503
|
// -----------------------------------------------------------------------
|
|
2384
|
-
//
|
|
2504
|
+
// Public helpers (LLMProviderAdapter interface)
|
|
2385
2505
|
// -----------------------------------------------------------------------
|
|
2386
2506
|
/**
|
|
2387
2507
|
* Extract `TextChunk` and `ToolCall` items from a single parsed Gemini
|
|
@@ -2554,7 +2674,8 @@ var LLMOrchestrator = class {
|
|
|
2554
2674
|
updateConfig(config) {
|
|
2555
2675
|
this._config = config;
|
|
2556
2676
|
this._adapter = this.createAdapter(config);
|
|
2557
|
-
|
|
2677
|
+
const label = "provider" in config ? config.provider : "custom adapter";
|
|
2678
|
+
this.log(`Config updated: ${label}`);
|
|
2558
2679
|
}
|
|
2559
2680
|
/** Get the current provider adapter. */
|
|
2560
2681
|
get adapter() {
|
|
@@ -2565,139 +2686,42 @@ var LLMOrchestrator = class {
|
|
|
2565
2686
|
// -----------------------------------------------------------------------
|
|
2566
2687
|
/**
|
|
2567
2688
|
* Execute a streaming LLM request and collect the results.
|
|
2689
|
+
*
|
|
2690
|
+
* This method is fully adapter-agnostic: it delegates streaming,
|
|
2691
|
+
* response parsing, content-filter detection, and usage extraction
|
|
2692
|
+
* entirely to the active `LLMProviderAdapter`. No provider-specific
|
|
2693
|
+
* SSE parsing lives in the orchestrator.
|
|
2568
2694
|
*/
|
|
2569
2695
|
async executeStream(params, _isRetry) {
|
|
2570
|
-
const
|
|
2571
|
-
const historyContents =
|
|
2572
|
-
const
|
|
2573
|
-
|
|
2574
|
-
{ role: "user", parts: [{ text: params.userMessage }] }
|
|
2575
|
-
];
|
|
2576
|
-
const tools = params.tools && params.tools.length > 0 ? geminiAdapter.formatTools(params.tools) : void 0;
|
|
2577
|
-
const { stream } = await geminiAdapter.streamRequest({
|
|
2696
|
+
const adapter = this._adapter;
|
|
2697
|
+
const historyContents = adapter.formatConversation(params.history);
|
|
2698
|
+
const tools = params.tools && params.tools.length > 0 ? adapter.formatTools(params.tools) : void 0;
|
|
2699
|
+
const { stream } = await adapter.streamRequest({
|
|
2578
2700
|
systemPrompt: params.systemPrompt,
|
|
2579
|
-
contents,
|
|
2701
|
+
contents: historyContents,
|
|
2702
|
+
userMessage: params.userMessage,
|
|
2580
2703
|
tools,
|
|
2581
2704
|
signal: params.signal
|
|
2582
2705
|
});
|
|
2583
2706
|
let fullText = "";
|
|
2584
2707
|
const toolCalls = [];
|
|
2585
|
-
|
|
2586
|
-
|
|
2587
|
-
|
|
2588
|
-
|
|
2589
|
-
|
|
2590
|
-
|
|
2591
|
-
|
|
2592
|
-
|
|
2593
|
-
|
|
2594
|
-
buffer += decoder.decode(value, { stream: true });
|
|
2595
|
-
const lines = buffer.split("\n");
|
|
2596
|
-
buffer = lines.pop() ?? "";
|
|
2597
|
-
for (const line of lines) {
|
|
2598
|
-
const trimmed = line.trim();
|
|
2599
|
-
if (!trimmed.startsWith("data:")) continue;
|
|
2600
|
-
const jsonStr = trimmed.slice(5).trim();
|
|
2601
|
-
if (jsonStr === "" || jsonStr === "[DONE]") continue;
|
|
2602
|
-
let parsed;
|
|
2603
|
-
try {
|
|
2604
|
-
parsed = JSON.parse(jsonStr);
|
|
2605
|
-
} catch {
|
|
2606
|
-
continue;
|
|
2607
|
-
}
|
|
2608
|
-
if (geminiAdapter.isContentFiltered(parsed)) {
|
|
2609
|
-
wasContentFiltered = true;
|
|
2610
|
-
break;
|
|
2611
|
-
}
|
|
2612
|
-
const chunkUsage = geminiAdapter.extractUsage(parsed);
|
|
2613
|
-
if (chunkUsage) {
|
|
2614
|
-
usage = chunkUsage;
|
|
2615
|
-
}
|
|
2616
|
-
const candidates = parsed.candidates;
|
|
2617
|
-
if (!candidates || candidates.length === 0) continue;
|
|
2618
|
-
for (const candidate of candidates) {
|
|
2619
|
-
const content = candidate.content;
|
|
2620
|
-
if (!content?.parts) continue;
|
|
2621
|
-
const finishReason = candidate.finishReason;
|
|
2622
|
-
const isDone = finishReason === "STOP" || finishReason === "MAX_TOKENS";
|
|
2623
|
-
for (const part of content.parts) {
|
|
2624
|
-
if (typeof part.text === "string") {
|
|
2625
|
-
fullText += part.text;
|
|
2626
|
-
const chunk = { text: part.text, done: isDone };
|
|
2627
|
-
this.callbacks.onChunk?.(chunk);
|
|
2628
|
-
}
|
|
2629
|
-
if (part.functionCall) {
|
|
2630
|
-
const fc = part.functionCall;
|
|
2631
|
-
const toolCall = {
|
|
2632
|
-
id: fc.name,
|
|
2633
|
-
name: fc.name,
|
|
2634
|
-
arguments: fc.args ?? {}
|
|
2635
|
-
};
|
|
2636
|
-
toolCalls.push(toolCall);
|
|
2637
|
-
this.callbacks.onToolCall?.(toolCall);
|
|
2638
|
-
}
|
|
2639
|
-
}
|
|
2640
|
-
}
|
|
2641
|
-
}
|
|
2642
|
-
if (wasContentFiltered) break;
|
|
2643
|
-
}
|
|
2644
|
-
if (!wasContentFiltered && buffer.trim().startsWith("data:")) {
|
|
2645
|
-
const jsonStr = buffer.trim().slice(5).trim();
|
|
2646
|
-
if (jsonStr !== "" && jsonStr !== "[DONE]") {
|
|
2647
|
-
try {
|
|
2648
|
-
const parsed = JSON.parse(jsonStr);
|
|
2649
|
-
if (geminiAdapter.isContentFiltered(parsed)) {
|
|
2650
|
-
wasContentFiltered = true;
|
|
2651
|
-
} else {
|
|
2652
|
-
const chunkUsage = geminiAdapter.extractUsage(parsed);
|
|
2653
|
-
if (chunkUsage) usage = chunkUsage;
|
|
2654
|
-
const candidates = parsed.candidates;
|
|
2655
|
-
if (candidates) {
|
|
2656
|
-
for (const candidate of candidates) {
|
|
2657
|
-
const content = candidate.content;
|
|
2658
|
-
if (!content?.parts) continue;
|
|
2659
|
-
const finishReason = candidate.finishReason;
|
|
2660
|
-
const isDone = finishReason === "STOP" || finishReason === "MAX_TOKENS";
|
|
2661
|
-
for (const part of content.parts) {
|
|
2662
|
-
if (typeof part.text === "string") {
|
|
2663
|
-
fullText += part.text;
|
|
2664
|
-
const chunk = {
|
|
2665
|
-
text: part.text,
|
|
2666
|
-
done: isDone
|
|
2667
|
-
};
|
|
2668
|
-
this.callbacks.onChunk?.(chunk);
|
|
2669
|
-
}
|
|
2670
|
-
if (part.functionCall) {
|
|
2671
|
-
const fc = part.functionCall;
|
|
2672
|
-
const toolCall = {
|
|
2673
|
-
id: fc.name,
|
|
2674
|
-
name: fc.name,
|
|
2675
|
-
arguments: fc.args ?? {}
|
|
2676
|
-
};
|
|
2677
|
-
toolCalls.push(toolCall);
|
|
2678
|
-
this.callbacks.onToolCall?.(toolCall);
|
|
2679
|
-
}
|
|
2680
|
-
}
|
|
2681
|
-
}
|
|
2682
|
-
}
|
|
2683
|
-
}
|
|
2684
|
-
} catch {
|
|
2685
|
-
}
|
|
2708
|
+
for await (const item of adapter.parseResponse(stream)) {
|
|
2709
|
+
if ("name" in item && "arguments" in item) {
|
|
2710
|
+
const toolCall = item;
|
|
2711
|
+
toolCalls.push(toolCall);
|
|
2712
|
+
this.callbacks.onToolCall?.(toolCall);
|
|
2713
|
+
} else {
|
|
2714
|
+
const chunk = item;
|
|
2715
|
+
if (chunk.text) {
|
|
2716
|
+
fullText += chunk.text;
|
|
2686
2717
|
}
|
|
2718
|
+
this.callbacks.onChunk?.(chunk);
|
|
2687
2719
|
}
|
|
2688
|
-
} finally {
|
|
2689
|
-
reader.releaseLock();
|
|
2690
|
-
}
|
|
2691
|
-
if (wasContentFiltered) {
|
|
2692
|
-
throw new ContentFilterError({
|
|
2693
|
-
code: ErrorCodes.CONTENT_FILTER_TRIGGERED,
|
|
2694
|
-
message: "Response was blocked by Gemini content safety filter.",
|
|
2695
|
-
provider: "gemini",
|
|
2696
|
-
suggestion: "Rephrase your question or adjust safety settings."
|
|
2697
|
-
});
|
|
2698
2720
|
}
|
|
2699
|
-
|
|
2700
|
-
|
|
2721
|
+
this.callbacks.onChunk?.({ text: "", done: true });
|
|
2722
|
+
let usage = emptyUsage2();
|
|
2723
|
+
if ("lastUsage" in adapter) {
|
|
2724
|
+
usage = adapter.lastUsage;
|
|
2701
2725
|
}
|
|
2702
2726
|
if (usage.total > 0) {
|
|
2703
2727
|
this.callbacks.onTokenUsage?.(usage);
|
|
@@ -2709,25 +2733,30 @@ var LLMOrchestrator = class {
|
|
|
2709
2733
|
}
|
|
2710
2734
|
/**
|
|
2711
2735
|
* Create the appropriate adapter for the given config.
|
|
2712
|
-
*
|
|
2713
|
-
*
|
|
2736
|
+
*
|
|
2737
|
+
* Built-in providers:
|
|
2738
|
+
* - `'gemini'` — uses the bundled `GeminiAdapter`.
|
|
2739
|
+
*
|
|
2740
|
+
* Custom adapters:
|
|
2741
|
+
* - Pass `{ adapter: myAdapter }` to use any `LLMProviderAdapter`.
|
|
2742
|
+
* Example: `llm: { adapter: new OpenAIAdapter({ ... }) }`
|
|
2714
2743
|
*/
|
|
2715
2744
|
createAdapter(config) {
|
|
2745
|
+
if ("adapter" in config) {
|
|
2746
|
+
return config.adapter;
|
|
2747
|
+
}
|
|
2716
2748
|
switch (config.provider) {
|
|
2717
2749
|
case "gemini":
|
|
2718
2750
|
return new GeminiAdapter(config);
|
|
2719
|
-
case "openai":
|
|
2720
|
-
return new OpenAIAdapter(config);
|
|
2721
2751
|
default:
|
|
2722
2752
|
throw new Error(
|
|
2723
|
-
`LLM provider "${config.provider}" is not yet supported.
|
|
2753
|
+
`LLM provider "${config.provider}" is not yet supported. Use { adapter: yourAdapter } for custom providers.`
|
|
2724
2754
|
);
|
|
2725
2755
|
}
|
|
2726
2756
|
}
|
|
2727
2757
|
/** Convenience accessor for the current provider name. */
|
|
2728
2758
|
get providerName() {
|
|
2729
|
-
if (this._config
|
|
2730
|
-
if (this._config.provider === "openai") return "openai";
|
|
2759
|
+
if ("provider" in this._config) return this._config.provider;
|
|
2731
2760
|
return void 0;
|
|
2732
2761
|
}
|
|
2733
2762
|
/** Log a debug message if debug mode is enabled. */
|
|
@@ -2880,7 +2909,7 @@ var ToolExecutor = class {
|
|
|
2880
2909
|
break;
|
|
2881
2910
|
}
|
|
2882
2911
|
}
|
|
2883
|
-
if (rounds >= this.maxRounds
|
|
2912
|
+
if (rounds >= this.maxRounds) {
|
|
2884
2913
|
this.log(
|
|
2885
2914
|
`Max rounds (${this.maxRounds}) reached. Returning current text.`
|
|
2886
2915
|
);
|
|
@@ -2983,6 +3012,19 @@ var ToolExecutor = class {
|
|
|
2983
3012
|
return s.value;
|
|
2984
3013
|
}
|
|
2985
3014
|
const tc = toolCalls[i];
|
|
3015
|
+
if (!tc) {
|
|
3016
|
+
const errorMsg2 = s.reason instanceof Error ? s.reason.message : String(s.reason);
|
|
3017
|
+
return {
|
|
3018
|
+
toolCallId: `unknown-${i}`,
|
|
3019
|
+
record: {
|
|
3020
|
+
name: "unknown",
|
|
3021
|
+
args: {},
|
|
3022
|
+
result: void 0,
|
|
3023
|
+
durationMs: 0,
|
|
3024
|
+
error: errorMsg2
|
|
3025
|
+
}
|
|
3026
|
+
};
|
|
3027
|
+
}
|
|
2986
3028
|
const errorMsg = s.reason instanceof Error ? s.reason.message : String(s.reason);
|
|
2987
3029
|
return {
|
|
2988
3030
|
toolCallId: tc.id,
|
|
@@ -4175,64 +4217,62 @@ var DeepgramSTT = class {
|
|
|
4175
4217
|
}
|
|
4176
4218
|
};
|
|
4177
4219
|
|
|
4178
|
-
// src/voice/elevenlabs-
|
|
4179
|
-
var LOG_PREFIX7 = "[GuideKit:
|
|
4180
|
-
var
|
|
4181
|
-
var
|
|
4182
|
-
var
|
|
4183
|
-
var
|
|
4184
|
-
function
|
|
4185
|
-
const
|
|
4186
|
-
|
|
4187
|
-
|
|
4188
|
-
|
|
4189
|
-
bytes[i] = binaryString.charCodeAt(i);
|
|
4220
|
+
// src/voice/elevenlabs-stt.ts
|
|
4221
|
+
var LOG_PREFIX7 = "[GuideKit:ElevenLabs-STT]";
|
|
4222
|
+
var ELEVENLABS_STT_ENDPOINT = "wss://api.elevenlabs.io/v1/speech-to-text/realtime";
|
|
4223
|
+
var DEFAULT_LANGUAGE2 = "en";
|
|
4224
|
+
var INACTIVITY_TIMEOUT_S = 30;
|
|
4225
|
+
var SAMPLE_RATE = 16e3;
|
|
4226
|
+
function float32ToInt162(float32) {
|
|
4227
|
+
const int16 = new Int16Array(float32.length);
|
|
4228
|
+
for (let i = 0; i < float32.length; i++) {
|
|
4229
|
+
const s = Math.max(-1, Math.min(1, float32[i]));
|
|
4230
|
+
int16[i] = s < 0 ? s * 32768 : s * 32767;
|
|
4190
4231
|
}
|
|
4191
|
-
return
|
|
4232
|
+
return int16;
|
|
4192
4233
|
}
|
|
4193
|
-
|
|
4194
|
-
|
|
4234
|
+
function int16ToBase64(int16) {
|
|
4235
|
+
const bytes = new Uint8Array(int16.buffer);
|
|
4236
|
+
const CHUNK_SIZE = 8192;
|
|
4237
|
+
let binary = "";
|
|
4238
|
+
for (let i = 0; i < bytes.length; i += CHUNK_SIZE) {
|
|
4239
|
+
const chunk = bytes.subarray(i, i + CHUNK_SIZE);
|
|
4240
|
+
binary += String.fromCharCode(...chunk);
|
|
4241
|
+
}
|
|
4242
|
+
return btoa(binary);
|
|
4243
|
+
}
|
|
4244
|
+
var ElevenLabsSTT = class {
|
|
4245
|
+
// ---- Configuration -------------------------------------------------------
|
|
4195
4246
|
apiKey;
|
|
4196
|
-
|
|
4197
|
-
modelId;
|
|
4247
|
+
language;
|
|
4198
4248
|
debugEnabled;
|
|
4199
|
-
// ---- Internal state
|
|
4249
|
+
// ---- Internal state ------------------------------------------------------
|
|
4200
4250
|
wsManager = null;
|
|
4201
4251
|
_connected = false;
|
|
4202
4252
|
_suspended = false;
|
|
4203
|
-
/**
|
|
4204
|
-
|
|
4205
|
-
|
|
4206
|
-
* contain voice settings and the API key before any text chunks.
|
|
4207
|
-
*/
|
|
4208
|
-
bosSent = false;
|
|
4209
|
-
/** Registered audio-event callbacks. */
|
|
4210
|
-
audioCallbacks = /* @__PURE__ */ new Set();
|
|
4211
|
-
// -----------------------------------------------------------------------
|
|
4253
|
+
/** Registered transcript callbacks. */
|
|
4254
|
+
transcriptCallbacks = /* @__PURE__ */ new Set();
|
|
4255
|
+
// -------------------------------------------------------------------------
|
|
4212
4256
|
// Constructor
|
|
4213
|
-
//
|
|
4257
|
+
// -------------------------------------------------------------------------
|
|
4214
4258
|
constructor(options) {
|
|
4215
4259
|
this.apiKey = options.apiKey;
|
|
4216
|
-
this.
|
|
4217
|
-
this.modelId = options.modelId ?? DEFAULT_MODEL_ID;
|
|
4260
|
+
this.language = options.language ?? DEFAULT_LANGUAGE2;
|
|
4218
4261
|
this.debugEnabled = options.debug ?? false;
|
|
4219
|
-
this.log("
|
|
4220
|
-
voiceId: this.voiceId,
|
|
4221
|
-
modelId: this.modelId
|
|
4222
|
-
});
|
|
4262
|
+
this.log("ElevenLabsSTT created", { language: this.language });
|
|
4223
4263
|
}
|
|
4224
|
-
//
|
|
4264
|
+
// -------------------------------------------------------------------------
|
|
4225
4265
|
// Public API
|
|
4226
|
-
//
|
|
4266
|
+
// -------------------------------------------------------------------------
|
|
4227
4267
|
/** Whether the WebSocket is currently connected and ready. */
|
|
4228
4268
|
get isConnected() {
|
|
4229
4269
|
return this._connected;
|
|
4230
4270
|
}
|
|
4231
4271
|
/**
|
|
4232
|
-
* Open a WebSocket connection to
|
|
4272
|
+
* Open a WebSocket connection to ElevenLabs' real-time STT endpoint.
|
|
4233
4273
|
*
|
|
4234
|
-
* Resolves once the connection is established and the
|
|
4235
|
-
*
|
|
4274
|
+
* Resolves once the connection is established and the socket is ready to
|
|
4275
|
+
* receive audio frames. Rejects if the connection cannot be established.
|
|
4236
4276
|
*/
|
|
4237
4277
|
async connect() {
|
|
4238
4278
|
if (this._connected) {
|
|
@@ -4244,17 +4284,16 @@ var ElevenLabsTTS = class {
|
|
|
4244
4284
|
return;
|
|
4245
4285
|
}
|
|
4246
4286
|
const url = this.buildUrl();
|
|
4247
|
-
this.log("Connecting to", url);
|
|
4287
|
+
this.log("Connecting to", url.replace(this.apiKey, "***"));
|
|
4248
4288
|
this.wsManager = new WebSocketManager({
|
|
4249
4289
|
url,
|
|
4250
4290
|
protocols: [],
|
|
4251
4291
|
debug: this.debugEnabled,
|
|
4252
|
-
label: "ElevenLabs-
|
|
4292
|
+
label: "ElevenLabs-STT"
|
|
4253
4293
|
});
|
|
4254
4294
|
this.wsManager.onOpen(() => {
|
|
4255
4295
|
this._connected = true;
|
|
4256
|
-
this.
|
|
4257
|
-
this.log("Connected and BOS sent");
|
|
4296
|
+
this.log("Connected");
|
|
4258
4297
|
});
|
|
4259
4298
|
this.wsManager.onMessage((event) => {
|
|
4260
4299
|
this.handleMessage(event);
|
|
@@ -4269,67 +4308,54 @@ var ElevenLabsTTS = class {
|
|
|
4269
4308
|
return this.wsManager.connect();
|
|
4270
4309
|
}
|
|
4271
4310
|
/**
|
|
4272
|
-
* Send
|
|
4273
|
-
*
|
|
4274
|
-
* May be called multiple times to stream text incrementally. Each call
|
|
4275
|
-
* sends a text chunk with `try_trigger_generation: true` so ElevenLabs
|
|
4276
|
-
* can begin synthesising as soon as it has enough context.
|
|
4277
|
-
*
|
|
4278
|
-
* Call {@link flush} when the complete utterance has been sent.
|
|
4279
|
-
*/
|
|
4280
|
-
speak(text) {
|
|
4281
|
-
if (!this._connected || !this.wsManager || this._suspended) {
|
|
4282
|
-
this.log("Cannot speak \u2014 not connected or suspended");
|
|
4283
|
-
return;
|
|
4284
|
-
}
|
|
4285
|
-
if (!text) {
|
|
4286
|
-
return;
|
|
4287
|
-
}
|
|
4288
|
-
const message = JSON.stringify({
|
|
4289
|
-
text,
|
|
4290
|
-
try_trigger_generation: true
|
|
4291
|
-
});
|
|
4292
|
-
this.log("Sending text chunk:", text.slice(0, 80) + (text.length > 80 ? "..." : ""));
|
|
4293
|
-
this.wsManager.send(message);
|
|
4294
|
-
}
|
|
4295
|
-
/**
|
|
4296
|
-
* Signal the end of text input for the current utterance.
|
|
4311
|
+
* Send audio data to ElevenLabs for transcription.
|
|
4297
4312
|
*
|
|
4298
|
-
*
|
|
4299
|
-
*
|
|
4313
|
+
* Accepts either `Float32Array` (Web Audio API output) or `Int16Array`
|
|
4314
|
+
* (already encoded as linear16). Float32 data is automatically converted
|
|
4315
|
+
* to Int16 before encoding. Audio is sent as a base64-encoded JSON message.
|
|
4300
4316
|
*/
|
|
4301
|
-
|
|
4317
|
+
sendAudio(audioData) {
|
|
4302
4318
|
if (!this._connected || !this.wsManager || this._suspended) {
|
|
4303
|
-
this.log("Cannot flush \u2014 not connected or suspended");
|
|
4304
4319
|
return;
|
|
4305
4320
|
}
|
|
4306
|
-
const
|
|
4307
|
-
|
|
4308
|
-
this.wsManager.send(
|
|
4321
|
+
const int16 = audioData instanceof Float32Array ? float32ToInt162(audioData) : audioData;
|
|
4322
|
+
const base64 = int16ToBase64(int16);
|
|
4323
|
+
this.wsManager.send(
|
|
4324
|
+
JSON.stringify({
|
|
4325
|
+
type: "input_audio_chunk",
|
|
4326
|
+
audio: base64,
|
|
4327
|
+
sample_rate: SAMPLE_RATE
|
|
4328
|
+
})
|
|
4329
|
+
);
|
|
4309
4330
|
}
|
|
4310
4331
|
/**
|
|
4311
|
-
* Register a callback to receive
|
|
4332
|
+
* Register a callback to receive transcript events.
|
|
4312
4333
|
*
|
|
4313
4334
|
* @returns An unsubscribe function. Calling it more than once is safe.
|
|
4314
4335
|
*/
|
|
4315
|
-
|
|
4316
|
-
this.
|
|
4336
|
+
onTranscript(callback) {
|
|
4337
|
+
this.transcriptCallbacks.add(callback);
|
|
4317
4338
|
let removed = false;
|
|
4318
4339
|
return () => {
|
|
4319
4340
|
if (removed) return;
|
|
4320
4341
|
removed = true;
|
|
4321
|
-
this.
|
|
4342
|
+
this.transcriptCallbacks.delete(callback);
|
|
4322
4343
|
};
|
|
4323
4344
|
}
|
|
4324
|
-
/**
|
|
4345
|
+
/**
|
|
4346
|
+
* Gracefully close the connection.
|
|
4347
|
+
*
|
|
4348
|
+
* Sends a `commit_audio` message so ElevenLabs can finalise any pending
|
|
4349
|
+
* transcription before the socket is torn down.
|
|
4350
|
+
*/
|
|
4325
4351
|
close() {
|
|
4326
4352
|
if (!this._connected || !this.wsManager) {
|
|
4327
4353
|
this.log("Not connected \u2014 nothing to close");
|
|
4328
4354
|
return;
|
|
4329
4355
|
}
|
|
4330
|
-
this.log("
|
|
4356
|
+
this.log("Sending commit_audio and closing");
|
|
4331
4357
|
try {
|
|
4332
|
-
this.wsManager.send(JSON.stringify({
|
|
4358
|
+
this.wsManager.send(JSON.stringify({ type: "commit_audio" }));
|
|
4333
4359
|
} catch {
|
|
4334
4360
|
}
|
|
4335
4361
|
this.wsManager.close();
|
|
@@ -4343,14 +4369,13 @@ var ElevenLabsTTS = class {
|
|
|
4343
4369
|
this.wsManager = null;
|
|
4344
4370
|
}
|
|
4345
4371
|
this.cleanup();
|
|
4346
|
-
this.
|
|
4372
|
+
this.transcriptCallbacks.clear();
|
|
4347
4373
|
}
|
|
4348
4374
|
/**
|
|
4349
4375
|
* Suspend the adapter (e.g. when the device goes offline).
|
|
4350
4376
|
*
|
|
4351
|
-
* Marks the adapter as suspended so that
|
|
4352
|
-
*
|
|
4353
|
-
* will close it after an inactivity timeout if the network went away.
|
|
4377
|
+
* Marks the adapter as suspended so that incoming `sendAudio` calls are
|
|
4378
|
+
* silently dropped. The WebSocket itself is left open.
|
|
4354
4379
|
*/
|
|
4355
4380
|
suspend() {
|
|
4356
4381
|
if (this._suspended) return;
|
|
@@ -4358,54 +4383,22 @@ var ElevenLabsTTS = class {
|
|
|
4358
4383
|
this.log("Suspended");
|
|
4359
4384
|
}
|
|
4360
4385
|
/**
|
|
4361
|
-
* Resume after a prior `suspend()`.
|
|
4362
|
-
* still alive, the adapter returns to normal operation. If the connection
|
|
4363
|
-
* was lost while suspended, callers should `close()` / `destroy()` and
|
|
4364
|
-
* create a new instance.
|
|
4386
|
+
* Resume after a prior `suspend()`.
|
|
4365
4387
|
*/
|
|
4366
4388
|
resume() {
|
|
4367
4389
|
if (!this._suspended) return;
|
|
4368
4390
|
this._suspended = false;
|
|
4369
4391
|
this.log("Resumed");
|
|
4370
4392
|
}
|
|
4371
|
-
//
|
|
4372
|
-
// BOS handshake
|
|
4373
|
-
// -----------------------------------------------------------------------
|
|
4374
|
-
/**
|
|
4375
|
-
* Send the BOS (beginning-of-stream) message.
|
|
4376
|
-
*
|
|
4377
|
-
* This must be the very first message on a new WebSocket session. It
|
|
4378
|
-
* carries the API key and voice settings.
|
|
4379
|
-
*/
|
|
4380
|
-
sendBOS() {
|
|
4381
|
-
if (!this.wsManager || this.bosSent) {
|
|
4382
|
-
return;
|
|
4383
|
-
}
|
|
4384
|
-
const bos = JSON.stringify({
|
|
4385
|
-
text: " ",
|
|
4386
|
-
voice_settings: {
|
|
4387
|
-
stability: DEFAULT_STABILITY,
|
|
4388
|
-
similarity_boost: DEFAULT_SIMILARITY_BOOST
|
|
4389
|
-
},
|
|
4390
|
-
xi_api_key: this.apiKey
|
|
4391
|
-
});
|
|
4392
|
-
this.wsManager.send(bos);
|
|
4393
|
-
this.bosSent = true;
|
|
4394
|
-
this.log("BOS handshake sent");
|
|
4395
|
-
}
|
|
4396
|
-
// -----------------------------------------------------------------------
|
|
4393
|
+
// -------------------------------------------------------------------------
|
|
4397
4394
|
// Message handling
|
|
4398
|
-
//
|
|
4395
|
+
// -------------------------------------------------------------------------
|
|
4399
4396
|
/**
|
|
4400
|
-
* Parse incoming ElevenLabs JSON messages and emit
|
|
4401
|
-
*
|
|
4402
|
-
* ElevenLabs sends messages with the following shape:
|
|
4403
|
-
* ```json
|
|
4404
|
-
* { "audio": "base64encoded...", "isFinal": false }
|
|
4405
|
-
* ```
|
|
4397
|
+
* Parse incoming ElevenLabs JSON messages and emit transcript events.
|
|
4406
4398
|
*
|
|
4407
|
-
*
|
|
4408
|
-
*
|
|
4399
|
+
* ElevenLabs sends two transcript message types:
|
|
4400
|
+
* - `partial_transcript`: interim result, `isFinal = false`
|
|
4401
|
+
* - `committed_transcript`: final result, `isFinal = true`
|
|
4409
4402
|
*/
|
|
4410
4403
|
handleMessage(event) {
|
|
4411
4404
|
if (typeof event.data !== "string") {
|
|
@@ -4418,47 +4411,1026 @@ var ElevenLabsTTS = class {
|
|
|
4418
4411
|
this.log("Failed to parse message", event.data);
|
|
4419
4412
|
return;
|
|
4420
4413
|
}
|
|
4421
|
-
|
|
4422
|
-
|
|
4423
|
-
|
|
4414
|
+
const type = parsed["type"];
|
|
4415
|
+
if (type === "committed_transcript" || type === "partial_transcript") {
|
|
4416
|
+
this.handleTranscriptMessage(parsed, type === "committed_transcript");
|
|
4417
|
+
} else {
|
|
4418
|
+
this.log("Received message", type, parsed);
|
|
4419
|
+
}
|
|
4420
|
+
}
|
|
4421
|
+
/**
|
|
4422
|
+
* Extract transcript data from a transcript message and notify subscribers.
|
|
4423
|
+
*/
|
|
4424
|
+
handleTranscriptMessage(parsed, isFinal) {
|
|
4425
|
+
const result = parsed["result"];
|
|
4426
|
+
const text = result?.text ?? "";
|
|
4427
|
+
const confidence = result?.confidence ?? 0;
|
|
4428
|
+
if (text.trim() === "") {
|
|
4429
|
+
return;
|
|
4430
|
+
}
|
|
4431
|
+
const transcriptEvent = {
|
|
4432
|
+
text,
|
|
4433
|
+
isFinal,
|
|
4434
|
+
confidence,
|
|
4435
|
+
timestamp: Date.now()
|
|
4436
|
+
};
|
|
4437
|
+
this.log(
|
|
4438
|
+
isFinal ? "Final transcript:" : "Interim transcript:",
|
|
4439
|
+
text,
|
|
4440
|
+
`(${(confidence * 100).toFixed(1)}%)`
|
|
4441
|
+
);
|
|
4442
|
+
this.emitTranscript(transcriptEvent);
|
|
4443
|
+
}
|
|
4444
|
+
// -------------------------------------------------------------------------
|
|
4445
|
+
// Subscriber notification
|
|
4446
|
+
// -------------------------------------------------------------------------
|
|
4447
|
+
/**
|
|
4448
|
+
* Emit a transcript event to all registered callbacks.
|
|
4449
|
+
*
|
|
4450
|
+
* Errors thrown by individual callbacks are caught and logged so one
|
|
4451
|
+
* misbehaving subscriber does not prevent others from receiving the event.
|
|
4452
|
+
*/
|
|
4453
|
+
emitTranscript(event) {
|
|
4454
|
+
for (const cb of this.transcriptCallbacks) {
|
|
4455
|
+
try {
|
|
4456
|
+
cb(event);
|
|
4457
|
+
} catch (err) {
|
|
4458
|
+
console.error(LOG_PREFIX7, "Transcript callback threw:", err);
|
|
4459
|
+
}
|
|
4460
|
+
}
|
|
4461
|
+
}
|
|
4462
|
+
// -------------------------------------------------------------------------
|
|
4463
|
+
// URL building
|
|
4464
|
+
// -------------------------------------------------------------------------
|
|
4465
|
+
/** Build the ElevenLabs streaming STT endpoint URL with auth query params. */
|
|
4466
|
+
buildUrl() {
|
|
4467
|
+
const params = new URLSearchParams({
|
|
4468
|
+
xi_api_key: this.apiKey,
|
|
4469
|
+
language: this.language,
|
|
4470
|
+
inactivity_timeout: String(INACTIVITY_TIMEOUT_S)
|
|
4471
|
+
});
|
|
4472
|
+
return `${ELEVENLABS_STT_ENDPOINT}?${params.toString()}`;
|
|
4473
|
+
}
|
|
4474
|
+
// -------------------------------------------------------------------------
|
|
4475
|
+
// Cleanup
|
|
4476
|
+
// -------------------------------------------------------------------------
|
|
4477
|
+
/** Reset internal state after disconnection. */
|
|
4478
|
+
cleanup() {
|
|
4479
|
+
this._connected = false;
|
|
4480
|
+
}
|
|
4481
|
+
// -------------------------------------------------------------------------
|
|
4482
|
+
// Logging
|
|
4483
|
+
// -------------------------------------------------------------------------
|
|
4484
|
+
/** Conditional debug logging. */
|
|
4485
|
+
log(...args) {
|
|
4486
|
+
if (this.debugEnabled) {
|
|
4487
|
+
console.debug(LOG_PREFIX7, ...args);
|
|
4488
|
+
}
|
|
4489
|
+
}
|
|
4490
|
+
};
|
|
4491
|
+
|
|
4492
|
+
// src/voice/elevenlabs-tts.ts
|
|
4493
|
+
var LOG_PREFIX8 = "[GuideKit:TTS]";
|
|
4494
|
+
var DEFAULT_VOICE_ID = "21m00Tcm4TlvDq8ikWAM";
|
|
4495
|
+
var DEFAULT_MODEL_ID = "eleven_flash_v2_5";
|
|
4496
|
+
var DEFAULT_STABILITY = 0.5;
|
|
4497
|
+
var DEFAULT_SIMILARITY_BOOST = 0.75;
|
|
4498
|
+
function base64ToArrayBuffer(base64) {
|
|
4499
|
+
const binaryString = atob(base64);
|
|
4500
|
+
const length = binaryString.length;
|
|
4501
|
+
const bytes = new Uint8Array(length);
|
|
4502
|
+
for (let i = 0; i < length; i++) {
|
|
4503
|
+
bytes[i] = binaryString.charCodeAt(i);
|
|
4504
|
+
}
|
|
4505
|
+
return bytes.buffer;
|
|
4506
|
+
}
|
|
4507
|
+
var ElevenLabsTTS = class {
|
|
4508
|
+
// ---- Configuration ------------------------------------------------------
|
|
4509
|
+
apiKey;
|
|
4510
|
+
voiceId;
|
|
4511
|
+
modelId;
|
|
4512
|
+
debugEnabled;
|
|
4513
|
+
// ---- Internal state -----------------------------------------------------
|
|
4514
|
+
wsManager = null;
|
|
4515
|
+
_connected = false;
|
|
4516
|
+
_suspended = false;
|
|
4517
|
+
/**
|
|
4518
|
+
* Whether the BOS (beginning-of-stream) handshake has been sent for the
|
|
4519
|
+
* current WebSocket session. ElevenLabs requires the first message to
|
|
4520
|
+
* contain voice settings and the API key before any text chunks.
|
|
4521
|
+
*/
|
|
4522
|
+
bosSent = false;
|
|
4523
|
+
/** Registered audio-event callbacks. */
|
|
4524
|
+
audioCallbacks = /* @__PURE__ */ new Set();
|
|
4525
|
+
// -----------------------------------------------------------------------
|
|
4526
|
+
// Constructor
|
|
4527
|
+
// -----------------------------------------------------------------------
|
|
4528
|
+
constructor(options) {
|
|
4529
|
+
this.apiKey = options.apiKey;
|
|
4530
|
+
this.voiceId = options.voiceId ?? DEFAULT_VOICE_ID;
|
|
4531
|
+
this.modelId = options.modelId ?? DEFAULT_MODEL_ID;
|
|
4532
|
+
this.debugEnabled = options.debug ?? false;
|
|
4533
|
+
this.log("ElevenLabsTTS created", {
|
|
4534
|
+
voiceId: this.voiceId,
|
|
4535
|
+
modelId: this.modelId
|
|
4536
|
+
});
|
|
4537
|
+
}
|
|
4538
|
+
// -----------------------------------------------------------------------
|
|
4539
|
+
// Public API
|
|
4540
|
+
// -----------------------------------------------------------------------
|
|
4541
|
+
/** Whether the WebSocket is currently connected and ready. */
|
|
4542
|
+
get isConnected() {
|
|
4543
|
+
return this._connected;
|
|
4544
|
+
}
|
|
4545
|
+
/**
|
|
4546
|
+
* Open a WebSocket connection to the ElevenLabs streaming TTS endpoint.
|
|
4547
|
+
*
|
|
4548
|
+
* Resolves once the connection is established and the BOS handshake has
|
|
4549
|
+
* been sent. Rejects if the connection cannot be established.
|
|
4550
|
+
*/
|
|
4551
|
+
async connect() {
|
|
4552
|
+
if (this._connected) {
|
|
4553
|
+
this.log("Already connected \u2014 skipping");
|
|
4554
|
+
return;
|
|
4555
|
+
}
|
|
4556
|
+
if (typeof WebSocket === "undefined") {
|
|
4557
|
+
this.log("WebSocket API not available (SSR?) \u2014 cannot connect");
|
|
4558
|
+
return;
|
|
4559
|
+
}
|
|
4560
|
+
const url = this.buildUrl();
|
|
4561
|
+
this.log("Connecting to", url);
|
|
4562
|
+
this.wsManager = new WebSocketManager({
|
|
4563
|
+
url,
|
|
4564
|
+
protocols: [],
|
|
4565
|
+
debug: this.debugEnabled,
|
|
4566
|
+
label: "ElevenLabs-TTS"
|
|
4567
|
+
});
|
|
4568
|
+
this.wsManager.onOpen(() => {
|
|
4569
|
+
this._connected = true;
|
|
4570
|
+
this.sendBOS();
|
|
4571
|
+
this.log("Connected and BOS sent");
|
|
4572
|
+
});
|
|
4573
|
+
this.wsManager.onMessage((event) => {
|
|
4574
|
+
this.handleMessage(event);
|
|
4575
|
+
});
|
|
4576
|
+
this.wsManager.onClose((code, reason) => {
|
|
4577
|
+
this.log("Connection closed", { code, reason });
|
|
4578
|
+
this.cleanup();
|
|
4579
|
+
});
|
|
4580
|
+
this.wsManager.onError((event) => {
|
|
4581
|
+
this.log("WebSocket error", event);
|
|
4582
|
+
});
|
|
4583
|
+
return this.wsManager.connect();
|
|
4584
|
+
}
|
|
4585
|
+
/**
|
|
4586
|
+
* Send text to be synthesised into speech.
|
|
4587
|
+
*
|
|
4588
|
+
* May be called multiple times to stream text incrementally. Each call
|
|
4589
|
+
* sends a text chunk with `try_trigger_generation: true` so ElevenLabs
|
|
4590
|
+
* can begin synthesising as soon as it has enough context.
|
|
4591
|
+
*
|
|
4592
|
+
* Call {@link flush} when the complete utterance has been sent.
|
|
4593
|
+
*/
|
|
4594
|
+
speak(text) {
|
|
4595
|
+
if (!this._connected || !this.wsManager || this._suspended) {
|
|
4596
|
+
this.log("Cannot speak \u2014 not connected or suspended");
|
|
4597
|
+
return;
|
|
4598
|
+
}
|
|
4599
|
+
if (!text) {
|
|
4600
|
+
return;
|
|
4601
|
+
}
|
|
4602
|
+
const message = JSON.stringify({
|
|
4603
|
+
text,
|
|
4604
|
+
try_trigger_generation: true
|
|
4605
|
+
});
|
|
4606
|
+
this.log("Sending text chunk:", text.slice(0, 80) + (text.length > 80 ? "..." : ""));
|
|
4607
|
+
this.wsManager.send(message);
|
|
4608
|
+
}
|
|
4609
|
+
/**
|
|
4610
|
+
* Signal the end of text input for the current utterance.
|
|
4611
|
+
*
|
|
4612
|
+
* Sends the EOS (end-of-stream) marker to ElevenLabs. The server will
|
|
4613
|
+
* flush any remaining audio and send a final chunk with `isFinal: true`.
|
|
4614
|
+
*/
|
|
4615
|
+
flush() {
|
|
4616
|
+
if (!this._connected || !this.wsManager || this._suspended) {
|
|
4617
|
+
this.log("Cannot flush \u2014 not connected or suspended");
|
|
4618
|
+
return;
|
|
4619
|
+
}
|
|
4620
|
+
const message = JSON.stringify({ text: "" });
|
|
4621
|
+
this.log("Sending EOS (flush)");
|
|
4622
|
+
this.wsManager.send(message);
|
|
4623
|
+
}
|
|
4624
|
+
/**
|
|
4625
|
+
* Register a callback to receive audio output events.
|
|
4626
|
+
*
|
|
4627
|
+
* @returns An unsubscribe function. Calling it more than once is safe.
|
|
4628
|
+
*/
|
|
4629
|
+
onAudio(callback) {
|
|
4630
|
+
this.audioCallbacks.add(callback);
|
|
4631
|
+
let removed = false;
|
|
4632
|
+
return () => {
|
|
4633
|
+
if (removed) return;
|
|
4634
|
+
removed = true;
|
|
4635
|
+
this.audioCallbacks.delete(callback);
|
|
4636
|
+
};
|
|
4637
|
+
}
|
|
4638
|
+
/** Gracefully close the connection by sending EOS then closing. */
|
|
4639
|
+
close() {
|
|
4640
|
+
if (!this._connected || !this.wsManager) {
|
|
4641
|
+
this.log("Not connected \u2014 nothing to close");
|
|
4642
|
+
return;
|
|
4643
|
+
}
|
|
4644
|
+
this.log("Closing connection");
|
|
4645
|
+
try {
|
|
4646
|
+
this.wsManager.send(JSON.stringify({ text: "" }));
|
|
4647
|
+
} catch {
|
|
4648
|
+
}
|
|
4649
|
+
this.wsManager.close();
|
|
4650
|
+
this.cleanup();
|
|
4651
|
+
}
|
|
4652
|
+
/** Force-destroy the connection without a graceful handshake. */
|
|
4653
|
+
destroy() {
|
|
4654
|
+
this.log("Destroying");
|
|
4655
|
+
if (this.wsManager) {
|
|
4656
|
+
this.wsManager.destroy();
|
|
4657
|
+
this.wsManager = null;
|
|
4658
|
+
}
|
|
4659
|
+
this.cleanup();
|
|
4660
|
+
this.audioCallbacks.clear();
|
|
4661
|
+
}
|
|
4662
|
+
/**
|
|
4663
|
+
* Suspend the adapter (e.g. when the device goes offline).
|
|
4664
|
+
*
|
|
4665
|
+
* Marks the adapter as suspended so that calls to `speak()` and `flush()`
|
|
4666
|
+
* are silently dropped. The WebSocket itself is left open; ElevenLabs
|
|
4667
|
+
* will close it after an inactivity timeout if the network went away.
|
|
4668
|
+
*/
|
|
4669
|
+
suspend() {
|
|
4670
|
+
if (this._suspended) return;
|
|
4671
|
+
this._suspended = true;
|
|
4672
|
+
this.log("Suspended");
|
|
4673
|
+
}
|
|
4674
|
+
/**
|
|
4675
|
+
* Resume after a prior `suspend()`. If the underlying connection is
|
|
4676
|
+
* still alive, the adapter returns to normal operation. If the connection
|
|
4677
|
+
* was lost while suspended, callers should `close()` / `destroy()` and
|
|
4678
|
+
* create a new instance.
|
|
4679
|
+
*/
|
|
4680
|
+
resume() {
|
|
4681
|
+
if (!this._suspended) return;
|
|
4682
|
+
this._suspended = false;
|
|
4683
|
+
this.log("Resumed");
|
|
4684
|
+
}
|
|
4685
|
+
// -----------------------------------------------------------------------
|
|
4686
|
+
// BOS handshake
|
|
4687
|
+
// -----------------------------------------------------------------------
|
|
4688
|
+
/**
|
|
4689
|
+
* Send the BOS (beginning-of-stream) message.
|
|
4690
|
+
*
|
|
4691
|
+
* This must be the very first message on a new WebSocket session. It
|
|
4692
|
+
* carries the API key and voice settings.
|
|
4693
|
+
*/
|
|
4694
|
+
sendBOS() {
|
|
4695
|
+
if (!this.wsManager || this.bosSent) {
|
|
4696
|
+
return;
|
|
4697
|
+
}
|
|
4698
|
+
const bos = JSON.stringify({
|
|
4699
|
+
text: " ",
|
|
4700
|
+
voice_settings: {
|
|
4701
|
+
stability: DEFAULT_STABILITY,
|
|
4702
|
+
similarity_boost: DEFAULT_SIMILARITY_BOOST
|
|
4703
|
+
},
|
|
4704
|
+
xi_api_key: this.apiKey
|
|
4705
|
+
});
|
|
4706
|
+
this.wsManager.send(bos);
|
|
4707
|
+
this.bosSent = true;
|
|
4708
|
+
this.log("BOS handshake sent");
|
|
4709
|
+
}
|
|
4710
|
+
// -----------------------------------------------------------------------
|
|
4711
|
+
// Message handling
|
|
4712
|
+
// -----------------------------------------------------------------------
|
|
4713
|
+
/**
|
|
4714
|
+
* Parse incoming ElevenLabs JSON messages and emit audio events.
|
|
4715
|
+
*
|
|
4716
|
+
* ElevenLabs sends messages with the following shape:
|
|
4717
|
+
* ```json
|
|
4718
|
+
* { "audio": "base64encoded...", "isFinal": false }
|
|
4719
|
+
* ```
|
|
4720
|
+
*
|
|
4721
|
+
* When `isFinal` is `true`, the server has finished synthesising the
|
|
4722
|
+
* current utterance (i.e. after EOS was sent).
|
|
4723
|
+
*/
|
|
4724
|
+
handleMessage(event) {
|
|
4725
|
+
if (typeof event.data !== "string") {
|
|
4726
|
+
return;
|
|
4727
|
+
}
|
|
4728
|
+
let parsed;
|
|
4729
|
+
try {
|
|
4730
|
+
parsed = JSON.parse(event.data);
|
|
4731
|
+
} catch {
|
|
4732
|
+
this.log("Failed to parse message", event.data);
|
|
4733
|
+
return;
|
|
4734
|
+
}
|
|
4735
|
+
if (parsed["error"] !== void 0) {
|
|
4736
|
+
this.log("ElevenLabs error:", parsed["error"]);
|
|
4737
|
+
return;
|
|
4738
|
+
}
|
|
4739
|
+
if (parsed["audio"] === void 0 || parsed["audio"] === null) {
|
|
4740
|
+
this.log("Non-audio message received", parsed);
|
|
4741
|
+
return;
|
|
4742
|
+
}
|
|
4743
|
+
const audioBase64 = parsed["audio"];
|
|
4744
|
+
const isFinal = parsed["isFinal"] === true;
|
|
4745
|
+
if (!audioBase64 || audioBase64.length === 0) {
|
|
4746
|
+
if (isFinal) {
|
|
4747
|
+
this.emitAudio({
|
|
4748
|
+
audio: new ArrayBuffer(0),
|
|
4749
|
+
isFinal: true,
|
|
4750
|
+
timestamp: Date.now()
|
|
4751
|
+
});
|
|
4752
|
+
}
|
|
4753
|
+
return;
|
|
4754
|
+
}
|
|
4755
|
+
let audioBuffer;
|
|
4756
|
+
try {
|
|
4757
|
+
audioBuffer = base64ToArrayBuffer(audioBase64);
|
|
4758
|
+
} catch (err) {
|
|
4759
|
+
this.log("Failed to decode base64 audio", err);
|
|
4760
|
+
return;
|
|
4761
|
+
}
|
|
4762
|
+
const audioEvent = {
|
|
4763
|
+
audio: audioBuffer,
|
|
4764
|
+
isFinal,
|
|
4765
|
+
timestamp: Date.now()
|
|
4766
|
+
};
|
|
4767
|
+
this.log(
|
|
4768
|
+
isFinal ? "Final audio chunk:" : "Audio chunk:",
|
|
4769
|
+
`${audioBuffer.byteLength} bytes`
|
|
4770
|
+
);
|
|
4771
|
+
this.emitAudio(audioEvent);
|
|
4772
|
+
}
|
|
4773
|
+
// -----------------------------------------------------------------------
|
|
4774
|
+
// Subscriber notification
|
|
4775
|
+
// -----------------------------------------------------------------------
|
|
4776
|
+
/**
|
|
4777
|
+
* Emit an audio event to all registered callbacks.
|
|
4778
|
+
*
|
|
4779
|
+
* Errors thrown by individual callbacks are caught and logged so one
|
|
4780
|
+
* misbehaving subscriber does not prevent others from receiving the event.
|
|
4781
|
+
*/
|
|
4782
|
+
emitAudio(event) {
|
|
4783
|
+
for (const cb of this.audioCallbacks) {
|
|
4784
|
+
try {
|
|
4785
|
+
cb(event);
|
|
4786
|
+
} catch (err) {
|
|
4787
|
+
console.error(LOG_PREFIX8, "Audio callback threw:", err);
|
|
4788
|
+
}
|
|
4789
|
+
}
|
|
4790
|
+
}
|
|
4791
|
+
// -----------------------------------------------------------------------
|
|
4792
|
+
// URL building
|
|
4793
|
+
// -----------------------------------------------------------------------
|
|
4794
|
+
/** Build the ElevenLabs streaming TTS endpoint URL. */
|
|
4795
|
+
buildUrl() {
|
|
4796
|
+
const params = new URLSearchParams({
|
|
4797
|
+
model_id: this.modelId
|
|
4798
|
+
});
|
|
4799
|
+
return `wss://api.elevenlabs.io/v1/text-to-speech/${encodeURIComponent(this.voiceId)}/stream-input?${params.toString()}`;
|
|
4800
|
+
}
|
|
4801
|
+
// -----------------------------------------------------------------------
|
|
4802
|
+
// Cleanup
|
|
4803
|
+
// -----------------------------------------------------------------------
|
|
4804
|
+
/** Reset internal state after disconnection. */
|
|
4805
|
+
cleanup() {
|
|
4806
|
+
this._connected = false;
|
|
4807
|
+
this.bosSent = false;
|
|
4808
|
+
}
|
|
4809
|
+
// -----------------------------------------------------------------------
|
|
4810
|
+
// Logging
|
|
4811
|
+
// -----------------------------------------------------------------------
|
|
4812
|
+
/** Conditional debug logging. */
|
|
4813
|
+
log(...args) {
|
|
4814
|
+
if (this.debugEnabled) {
|
|
4815
|
+
console.debug(LOG_PREFIX8, ...args);
|
|
4816
|
+
}
|
|
4817
|
+
}
|
|
4818
|
+
};
|
|
4819
|
+
|
|
4820
|
+
// src/voice/web-speech-stt.ts
|
|
4821
|
+
var LOG_PREFIX9 = "[GuideKit:WebSpeech-STT]";
|
|
4822
|
+
var DEFAULT_LANGUAGE3 = "en-US";
|
|
4823
|
+
var WebSpeechSTT = class {
|
|
4824
|
+
// ---- Configuration -------------------------------------------------------
|
|
4825
|
+
language;
|
|
4826
|
+
continuous;
|
|
4827
|
+
interimResultsEnabled;
|
|
4828
|
+
debugEnabled;
|
|
4829
|
+
// ---- Internal state ------------------------------------------------------
|
|
4830
|
+
recognition = null;
|
|
4831
|
+
_connected = false;
|
|
4832
|
+
_suspended = false;
|
|
4833
|
+
/**
|
|
4834
|
+
* Whether we intentionally stopped recognition. Used to distinguish
|
|
4835
|
+
* between intentional stop and unexpected end (for auto-restart in
|
|
4836
|
+
* continuous mode).
|
|
4837
|
+
*/
|
|
4838
|
+
_intentionalStop = false;
|
|
4839
|
+
/** Registered transcript callbacks. */
|
|
4840
|
+
transcriptCallbacks = /* @__PURE__ */ new Set();
|
|
4841
|
+
// -------------------------------------------------------------------------
|
|
4842
|
+
// Constructor
|
|
4843
|
+
// -------------------------------------------------------------------------
|
|
4844
|
+
constructor(options = {}) {
|
|
4845
|
+
this.language = options.language ?? DEFAULT_LANGUAGE3;
|
|
4846
|
+
this.continuous = options.continuous ?? true;
|
|
4847
|
+
this.interimResultsEnabled = options.interimResults ?? true;
|
|
4848
|
+
this.debugEnabled = options.debug ?? false;
|
|
4849
|
+
this.log("WebSpeechSTT created", {
|
|
4850
|
+
language: this.language,
|
|
4851
|
+
continuous: this.continuous,
|
|
4852
|
+
interimResults: this.interimResultsEnabled
|
|
4853
|
+
});
|
|
4854
|
+
}
|
|
4855
|
+
// -------------------------------------------------------------------------
|
|
4856
|
+
// Static methods
|
|
4857
|
+
// -------------------------------------------------------------------------
|
|
4858
|
+
/**
|
|
4859
|
+
* Check whether the Web Speech API SpeechRecognition is supported in the
|
|
4860
|
+
* current environment. Safe to call in SSR (returns false).
|
|
4861
|
+
*/
|
|
4862
|
+
static isSupported() {
|
|
4863
|
+
if (typeof window === "undefined") return false;
|
|
4864
|
+
return typeof window["SpeechRecognition"] !== "undefined" || typeof globalThis.webkitSpeechRecognition !== "undefined";
|
|
4865
|
+
}
|
|
4866
|
+
// -------------------------------------------------------------------------
|
|
4867
|
+
// Public API
|
|
4868
|
+
// -------------------------------------------------------------------------
|
|
4869
|
+
/** Whether recognition is currently active and connected. */
|
|
4870
|
+
get isConnected() {
|
|
4871
|
+
return this._connected;
|
|
4872
|
+
}
|
|
4873
|
+
/**
|
|
4874
|
+
* Start speech recognition.
|
|
4875
|
+
*
|
|
4876
|
+
* Creates the SpeechRecognition instance and begins listening. Resolves
|
|
4877
|
+
* once the recognition session has started. Rejects if the API is not
|
|
4878
|
+
* supported or the browser denies permission.
|
|
4879
|
+
*/
|
|
4880
|
+
async connect() {
|
|
4881
|
+
if (this._connected) {
|
|
4882
|
+
this.log("Already connected \u2014 skipping");
|
|
4883
|
+
return;
|
|
4884
|
+
}
|
|
4885
|
+
if (typeof window === "undefined") {
|
|
4886
|
+
this.log("SSR environment detected \u2014 cannot connect");
|
|
4887
|
+
return;
|
|
4888
|
+
}
|
|
4889
|
+
const SpeechRecognitionClass = this.resolveSpeechRecognition();
|
|
4890
|
+
if (!SpeechRecognitionClass) {
|
|
4891
|
+
throw new Error(
|
|
4892
|
+
"Web Speech API (SpeechRecognition) is not supported in this browser."
|
|
4893
|
+
);
|
|
4894
|
+
}
|
|
4895
|
+
this.recognition = new SpeechRecognitionClass();
|
|
4896
|
+
this.recognition.lang = this.language;
|
|
4897
|
+
this.recognition.continuous = this.continuous;
|
|
4898
|
+
this.recognition.interimResults = this.interimResultsEnabled;
|
|
4899
|
+
this.recognition.maxAlternatives = 1;
|
|
4900
|
+
this.recognition.onstart = () => {
|
|
4901
|
+
this._connected = true;
|
|
4902
|
+
this._intentionalStop = false;
|
|
4903
|
+
this.log("Recognition started");
|
|
4904
|
+
};
|
|
4905
|
+
this.recognition.onresult = (event) => {
|
|
4906
|
+
this.handleResult(event);
|
|
4907
|
+
};
|
|
4908
|
+
this.recognition.onerror = (event) => {
|
|
4909
|
+
this.handleError(event);
|
|
4910
|
+
};
|
|
4911
|
+
this.recognition.onend = () => {
|
|
4912
|
+
this.log("Recognition ended");
|
|
4913
|
+
const wasConnected = this._connected;
|
|
4914
|
+
this._connected = false;
|
|
4915
|
+
if (this.continuous && !this._intentionalStop && !this._suspended && wasConnected) {
|
|
4916
|
+
this.log("Auto-restarting continuous recognition");
|
|
4917
|
+
try {
|
|
4918
|
+
this.recognition?.start();
|
|
4919
|
+
} catch {
|
|
4920
|
+
this.log("Failed to auto-restart recognition");
|
|
4921
|
+
}
|
|
4922
|
+
}
|
|
4923
|
+
};
|
|
4924
|
+
return new Promise((resolve, reject) => {
|
|
4925
|
+
const onStart = () => {
|
|
4926
|
+
cleanup();
|
|
4927
|
+
resolve();
|
|
4928
|
+
};
|
|
4929
|
+
const onError = (event) => {
|
|
4930
|
+
cleanup();
|
|
4931
|
+
reject(new Error(`SpeechRecognition error: ${event.error} \u2014 ${event.message}`));
|
|
4932
|
+
};
|
|
4933
|
+
const cleanup = () => {
|
|
4934
|
+
if (this.recognition) {
|
|
4935
|
+
this.recognition.removeEventListener("start", onStart);
|
|
4936
|
+
this.recognition.removeEventListener("error", onError);
|
|
4937
|
+
}
|
|
4938
|
+
};
|
|
4939
|
+
this.recognition.addEventListener("start", onStart, { once: true });
|
|
4940
|
+
this.recognition.addEventListener("error", onError, { once: true });
|
|
4941
|
+
try {
|
|
4942
|
+
this.recognition.start();
|
|
4943
|
+
} catch (err) {
|
|
4944
|
+
cleanup();
|
|
4945
|
+
reject(err);
|
|
4946
|
+
}
|
|
4947
|
+
});
|
|
4948
|
+
}
|
|
4949
|
+
/**
|
|
4950
|
+
* Send audio data. No-op for Web Speech API since it captures audio
|
|
4951
|
+
* directly from the microphone via the browser's internal pipeline.
|
|
4952
|
+
*
|
|
4953
|
+
* Provided for interface compatibility with WebSocket-based STT adapters
|
|
4954
|
+
* (DeepgramSTT, ElevenLabsSTT).
|
|
4955
|
+
*/
|
|
4956
|
+
sendAudio(_audioData) {
|
|
4957
|
+
}
|
|
4958
|
+
/**
|
|
4959
|
+
* Register a callback to receive transcript events.
|
|
4960
|
+
*
|
|
4961
|
+
* @returns An unsubscribe function. Calling it more than once is safe.
|
|
4962
|
+
*/
|
|
4963
|
+
onTranscript(callback) {
|
|
4964
|
+
this.transcriptCallbacks.add(callback);
|
|
4965
|
+
let removed = false;
|
|
4966
|
+
return () => {
|
|
4967
|
+
if (removed) return;
|
|
4968
|
+
removed = true;
|
|
4969
|
+
this.transcriptCallbacks.delete(callback);
|
|
4970
|
+
};
|
|
4971
|
+
}
|
|
4972
|
+
/**
|
|
4973
|
+
* Gracefully stop recognition.
|
|
4974
|
+
*
|
|
4975
|
+
* Calls `stop()` on the SpeechRecognition instance which allows it to
|
|
4976
|
+
* deliver any pending final results before ending.
|
|
4977
|
+
*/
|
|
4978
|
+
close() {
|
|
4979
|
+
if (!this.recognition) {
|
|
4980
|
+
this.log("Not connected \u2014 nothing to close");
|
|
4981
|
+
return;
|
|
4982
|
+
}
|
|
4983
|
+
this.log("Closing recognition");
|
|
4984
|
+
this._intentionalStop = true;
|
|
4985
|
+
try {
|
|
4986
|
+
this.recognition.stop();
|
|
4987
|
+
} catch {
|
|
4988
|
+
}
|
|
4989
|
+
this.cleanup();
|
|
4990
|
+
}
|
|
4991
|
+
/** Force-destroy the recognition without waiting for pending results. */
|
|
4992
|
+
destroy() {
|
|
4993
|
+
this.log("Destroying");
|
|
4994
|
+
this._intentionalStop = true;
|
|
4995
|
+
if (this.recognition) {
|
|
4996
|
+
try {
|
|
4997
|
+
this.recognition.abort();
|
|
4998
|
+
} catch {
|
|
4999
|
+
}
|
|
5000
|
+
this.recognition.onresult = null;
|
|
5001
|
+
this.recognition.onerror = null;
|
|
5002
|
+
this.recognition.onend = null;
|
|
5003
|
+
this.recognition.onstart = null;
|
|
5004
|
+
this.recognition = null;
|
|
5005
|
+
}
|
|
5006
|
+
this.cleanup();
|
|
5007
|
+
this.transcriptCallbacks.clear();
|
|
5008
|
+
}
|
|
5009
|
+
/**
|
|
5010
|
+
* Suspend the adapter (e.g. when the device goes offline).
|
|
5011
|
+
*
|
|
5012
|
+
* Stops recognition and marks the adapter as suspended so that auto-restart
|
|
5013
|
+
* does not trigger.
|
|
5014
|
+
*/
|
|
5015
|
+
suspend() {
|
|
5016
|
+
if (this._suspended) return;
|
|
5017
|
+
this._suspended = true;
|
|
5018
|
+
this._intentionalStop = true;
|
|
5019
|
+
if (this.recognition && this._connected) {
|
|
5020
|
+
try {
|
|
5021
|
+
this.recognition.stop();
|
|
5022
|
+
} catch {
|
|
5023
|
+
}
|
|
5024
|
+
}
|
|
5025
|
+
this.log("Suspended");
|
|
5026
|
+
}
|
|
5027
|
+
/**
|
|
5028
|
+
* Resume after a prior `suspend()`. Restarts recognition if it was
|
|
5029
|
+
* running before suspension.
|
|
5030
|
+
*/
|
|
5031
|
+
resume() {
|
|
5032
|
+
if (!this._suspended) return;
|
|
5033
|
+
this._suspended = false;
|
|
5034
|
+
this._intentionalStop = false;
|
|
5035
|
+
this.log("Resumed");
|
|
5036
|
+
if (this.recognition && !this._connected) {
|
|
5037
|
+
try {
|
|
5038
|
+
this.recognition.start();
|
|
5039
|
+
} catch {
|
|
5040
|
+
this.log("Failed to restart recognition after resume");
|
|
5041
|
+
}
|
|
5042
|
+
}
|
|
5043
|
+
}
|
|
5044
|
+
// -------------------------------------------------------------------------
|
|
5045
|
+
// Result handling
|
|
5046
|
+
// -------------------------------------------------------------------------
|
|
5047
|
+
/**
|
|
5048
|
+
* Handle SpeechRecognition result events.
|
|
5049
|
+
*
|
|
5050
|
+
* The `results` property is a SpeechRecognitionResultList containing all
|
|
5051
|
+
* results accumulated during this recognition session. We only process
|
|
5052
|
+
* results from `resultIndex` onward to avoid re-emitting old results.
|
|
5053
|
+
*/
|
|
5054
|
+
handleResult(event) {
|
|
5055
|
+
for (let i = event.resultIndex; i < event.results.length; i++) {
|
|
5056
|
+
const result = event.results[i];
|
|
5057
|
+
if (!result) continue;
|
|
5058
|
+
const alternative = result[0];
|
|
5059
|
+
if (!alternative) continue;
|
|
5060
|
+
const transcript = alternative.transcript;
|
|
5061
|
+
if (!transcript || transcript.trim() === "") continue;
|
|
5062
|
+
const isFinal = result.isFinal;
|
|
5063
|
+
const confidence = alternative.confidence > 0 ? alternative.confidence : 0.85;
|
|
5064
|
+
const transcriptEvent = {
|
|
5065
|
+
text: transcript,
|
|
5066
|
+
isFinal,
|
|
5067
|
+
confidence,
|
|
5068
|
+
timestamp: Date.now()
|
|
5069
|
+
};
|
|
5070
|
+
this.log(
|
|
5071
|
+
isFinal ? "Final transcript:" : "Interim transcript:",
|
|
5072
|
+
transcript,
|
|
5073
|
+
`(${(confidence * 100).toFixed(1)}%)`
|
|
5074
|
+
);
|
|
5075
|
+
this.emitTranscript(transcriptEvent);
|
|
5076
|
+
}
|
|
5077
|
+
}
|
|
5078
|
+
// -------------------------------------------------------------------------
|
|
5079
|
+
// Error handling
|
|
5080
|
+
// -------------------------------------------------------------------------
|
|
5081
|
+
/**
|
|
5082
|
+
* Handle SpeechRecognition errors.
|
|
5083
|
+
*
|
|
5084
|
+
* Some errors are recoverable (e.g. `no-speech`) and some are fatal
|
|
5085
|
+
* (e.g. `not-allowed`). For recoverable errors in continuous mode,
|
|
5086
|
+
* recognition will auto-restart via the `onend` handler.
|
|
5087
|
+
*/
|
|
5088
|
+
handleError(event) {
|
|
5089
|
+
const errorType = event.error;
|
|
5090
|
+
this.log("Recognition error:", errorType, event.message);
|
|
5091
|
+
if (errorType === "no-speech" || errorType === "aborted") {
|
|
5092
|
+
this.log("Non-fatal error \u2014 will recover");
|
|
5093
|
+
return;
|
|
5094
|
+
}
|
|
5095
|
+
if (errorType === "network") {
|
|
5096
|
+
this.log("Network error \u2014 recognition may auto-restart");
|
|
5097
|
+
return;
|
|
5098
|
+
}
|
|
5099
|
+
if (errorType === "not-allowed" || errorType === "service-not-allowed" || errorType === "language-not-supported") {
|
|
5100
|
+
this._intentionalStop = true;
|
|
5101
|
+
this.log("Fatal recognition error \u2014 stopping");
|
|
5102
|
+
}
|
|
5103
|
+
}
|
|
5104
|
+
// -------------------------------------------------------------------------
|
|
5105
|
+
// Subscriber notification
|
|
5106
|
+
// -------------------------------------------------------------------------
|
|
5107
|
+
/**
|
|
5108
|
+
* Emit a transcript event to all registered callbacks.
|
|
5109
|
+
*
|
|
5110
|
+
* Errors thrown by individual callbacks are caught and logged so one
|
|
5111
|
+
* misbehaving subscriber does not prevent others from receiving the event.
|
|
5112
|
+
*/
|
|
5113
|
+
emitTranscript(event) {
|
|
5114
|
+
for (const cb of this.transcriptCallbacks) {
|
|
5115
|
+
try {
|
|
5116
|
+
cb(event);
|
|
5117
|
+
} catch (err) {
|
|
5118
|
+
console.error(LOG_PREFIX9, "Transcript callback threw:", err);
|
|
5119
|
+
}
|
|
5120
|
+
}
|
|
5121
|
+
}
|
|
5122
|
+
// -------------------------------------------------------------------------
|
|
5123
|
+
// SpeechRecognition resolution
|
|
5124
|
+
// -------------------------------------------------------------------------
|
|
5125
|
+
/**
|
|
5126
|
+
* Resolve the SpeechRecognition constructor, with the webkit-prefixed
|
|
5127
|
+
* fallback. Returns null if not available.
|
|
5128
|
+
*/
|
|
5129
|
+
resolveSpeechRecognition() {
|
|
5130
|
+
if (typeof window === "undefined") return null;
|
|
5131
|
+
const win = window;
|
|
5132
|
+
if (typeof win["SpeechRecognition"] !== "undefined") {
|
|
5133
|
+
return win["SpeechRecognition"];
|
|
5134
|
+
}
|
|
5135
|
+
if (typeof globalThis.webkitSpeechRecognition !== "undefined") {
|
|
5136
|
+
return globalThis.webkitSpeechRecognition;
|
|
5137
|
+
}
|
|
5138
|
+
return null;
|
|
5139
|
+
}
|
|
5140
|
+
// -------------------------------------------------------------------------
|
|
5141
|
+
// Cleanup
|
|
5142
|
+
// -------------------------------------------------------------------------
|
|
5143
|
+
/** Reset internal state after disconnection. */
|
|
5144
|
+
cleanup() {
|
|
5145
|
+
this._connected = false;
|
|
5146
|
+
}
|
|
5147
|
+
// -------------------------------------------------------------------------
|
|
5148
|
+
// Logging
|
|
5149
|
+
// -------------------------------------------------------------------------
|
|
5150
|
+
/** Conditional debug logging. */
|
|
5151
|
+
log(...args) {
|
|
5152
|
+
if (this.debugEnabled) {
|
|
5153
|
+
console.debug(LOG_PREFIX9, ...args);
|
|
5154
|
+
}
|
|
5155
|
+
}
|
|
5156
|
+
};
|
|
5157
|
+
|
|
5158
|
+
// src/voice/web-speech-tts.ts
|
|
5159
|
+
var LOG_PREFIX10 = "[GuideKit:WebSpeech-TTS]";
|
|
5160
|
+
var DEFAULT_RATE = 1;
|
|
5161
|
+
var DEFAULT_PITCH = 1;
|
|
5162
|
+
var DEFAULT_LANGUAGE4 = "en-US";
|
|
5163
|
+
var WebSpeechTTS = class {
|
|
5164
|
+
// ---- Configuration -------------------------------------------------------
|
|
5165
|
+
voiceName;
|
|
5166
|
+
rate;
|
|
5167
|
+
pitch;
|
|
5168
|
+
language;
|
|
5169
|
+
debugEnabled;
|
|
5170
|
+
// ---- Internal state ------------------------------------------------------
|
|
5171
|
+
_connected = false;
|
|
5172
|
+
_suspended = false;
|
|
5173
|
+
/** Cached voice object resolved from voiceName. */
|
|
5174
|
+
_resolvedVoice = null;
|
|
5175
|
+
/** Whether voices have been loaded (they load async in some browsers). */
|
|
5176
|
+
_voicesLoaded = false;
|
|
5177
|
+
/** Registered audio-event callbacks. */
|
|
5178
|
+
audioCallbacks = /* @__PURE__ */ new Set();
|
|
5179
|
+
// -------------------------------------------------------------------------
|
|
5180
|
+
// Constructor
|
|
5181
|
+
// -------------------------------------------------------------------------
|
|
5182
|
+
constructor(options = {}) {
|
|
5183
|
+
this.voiceName = options.voice ?? null;
|
|
5184
|
+
this.rate = options.rate ?? DEFAULT_RATE;
|
|
5185
|
+
this.pitch = options.pitch ?? DEFAULT_PITCH;
|
|
5186
|
+
this.language = options.language ?? DEFAULT_LANGUAGE4;
|
|
5187
|
+
this.debugEnabled = options.debug ?? false;
|
|
5188
|
+
this.log("WebSpeechTTS created", {
|
|
5189
|
+
voice: this.voiceName,
|
|
5190
|
+
rate: this.rate,
|
|
5191
|
+
pitch: this.pitch,
|
|
5192
|
+
language: this.language
|
|
5193
|
+
});
|
|
5194
|
+
}
|
|
5195
|
+
// -------------------------------------------------------------------------
|
|
5196
|
+
// Static methods
|
|
5197
|
+
// -------------------------------------------------------------------------
|
|
5198
|
+
/**
|
|
5199
|
+
* Check whether the Web Speech API SpeechSynthesis is supported in the
|
|
5200
|
+
* current environment. Safe to call in SSR (returns false).
|
|
5201
|
+
*/
|
|
5202
|
+
static isSupported() {
|
|
5203
|
+
if (typeof window === "undefined") return false;
|
|
5204
|
+
return typeof window.speechSynthesis !== "undefined";
|
|
5205
|
+
}
|
|
5206
|
+
// -------------------------------------------------------------------------
|
|
5207
|
+
// Public API
|
|
5208
|
+
// -------------------------------------------------------------------------
|
|
5209
|
+
/** Whether the adapter is connected (ready for speech). */
|
|
5210
|
+
get isConnected() {
|
|
5211
|
+
return this._connected;
|
|
5212
|
+
}
|
|
5213
|
+
/**
|
|
5214
|
+
* Initialize the adapter.
|
|
5215
|
+
*
|
|
5216
|
+
* Loads available voices and resolves the requested voice name. Voice
|
|
5217
|
+
* loading is async in some browsers (notably Chrome) so we wait for
|
|
5218
|
+
* the `voiceschanged` event if needed.
|
|
5219
|
+
*/
|
|
5220
|
+
async connect() {
|
|
5221
|
+
if (this._connected) {
|
|
5222
|
+
this.log("Already connected \u2014 skipping");
|
|
5223
|
+
return;
|
|
5224
|
+
}
|
|
5225
|
+
if (typeof window === "undefined" || typeof window.speechSynthesis === "undefined") {
|
|
5226
|
+
this.log("SpeechSynthesis not available \u2014 cannot connect");
|
|
5227
|
+
return;
|
|
5228
|
+
}
|
|
5229
|
+
await this.loadVoices();
|
|
5230
|
+
if (this.voiceName) {
|
|
5231
|
+
this._resolvedVoice = this.findVoice(this.voiceName);
|
|
5232
|
+
if (this._resolvedVoice) {
|
|
5233
|
+
this.log("Resolved voice:", this._resolvedVoice.name);
|
|
5234
|
+
} else {
|
|
5235
|
+
this.log("Requested voice not found:", this.voiceName, "\u2014 using browser default");
|
|
5236
|
+
}
|
|
5237
|
+
}
|
|
5238
|
+
this._connected = true;
|
|
5239
|
+
this.log("Connected");
|
|
5240
|
+
}
|
|
5241
|
+
/**
|
|
5242
|
+
* Speak the given text using the browser's speech synthesis engine.
|
|
5243
|
+
*
|
|
5244
|
+
* Returns a Promise that resolves when the utterance completes or is
|
|
5245
|
+
* cancelled. Rejects if an error occurs during synthesis.
|
|
5246
|
+
*
|
|
5247
|
+
* Also emits audio events to registered callbacks for VoicePipeline
|
|
5248
|
+
* compatibility.
|
|
5249
|
+
*/
|
|
5250
|
+
speak(text) {
|
|
5251
|
+
if (!this._connected || this._suspended) {
|
|
5252
|
+
this.log("Cannot speak \u2014 not connected or suspended");
|
|
5253
|
+
return;
|
|
5254
|
+
}
|
|
5255
|
+
if (!text || !text.trim()) {
|
|
5256
|
+
return;
|
|
5257
|
+
}
|
|
5258
|
+
if (typeof window === "undefined" || typeof window.speechSynthesis === "undefined") {
|
|
5259
|
+
return;
|
|
5260
|
+
}
|
|
5261
|
+
const synth = window.speechSynthesis;
|
|
5262
|
+
const utterance = new SpeechSynthesisUtterance(text);
|
|
5263
|
+
utterance.lang = this.language;
|
|
5264
|
+
utterance.rate = this.rate;
|
|
5265
|
+
utterance.pitch = this.pitch;
|
|
5266
|
+
if (this._resolvedVoice) {
|
|
5267
|
+
utterance.voice = this._resolvedVoice;
|
|
5268
|
+
}
|
|
5269
|
+
utterance.onstart = () => {
|
|
5270
|
+
this.log("Utterance started:", text.slice(0, 80) + (text.length > 80 ? "..." : ""));
|
|
5271
|
+
this.emitAudio({
|
|
5272
|
+
audio: new ArrayBuffer(0),
|
|
5273
|
+
isFinal: false,
|
|
5274
|
+
timestamp: Date.now()
|
|
5275
|
+
});
|
|
5276
|
+
};
|
|
5277
|
+
utterance.onend = () => {
|
|
5278
|
+
this.log("Utterance ended");
|
|
5279
|
+
this.emitAudio({
|
|
5280
|
+
audio: new ArrayBuffer(0),
|
|
5281
|
+
isFinal: true,
|
|
5282
|
+
timestamp: Date.now()
|
|
5283
|
+
});
|
|
5284
|
+
};
|
|
5285
|
+
utterance.onerror = (event) => {
|
|
5286
|
+
if (event.error === "canceled") {
|
|
5287
|
+
this.log("Utterance cancelled");
|
|
5288
|
+
this.emitAudio({
|
|
5289
|
+
audio: new ArrayBuffer(0),
|
|
5290
|
+
isFinal: true,
|
|
5291
|
+
timestamp: Date.now()
|
|
5292
|
+
});
|
|
5293
|
+
return;
|
|
5294
|
+
}
|
|
5295
|
+
this.log("Utterance error:", event.error);
|
|
5296
|
+
this.emitAudio({
|
|
5297
|
+
audio: new ArrayBuffer(0),
|
|
5298
|
+
isFinal: true,
|
|
5299
|
+
timestamp: Date.now()
|
|
5300
|
+
});
|
|
5301
|
+
};
|
|
5302
|
+
this.log("Speaking:", text.slice(0, 80) + (text.length > 80 ? "..." : ""));
|
|
5303
|
+
synth.speak(utterance);
|
|
5304
|
+
}
|
|
5305
|
+
/**
|
|
5306
|
+
* Flush / finalize the current utterance.
|
|
5307
|
+
*
|
|
5308
|
+
* No-op for Web Speech API since each speak() call is a complete
|
|
5309
|
+
* utterance. Provided for interface compatibility with ElevenLabsTTS.
|
|
5310
|
+
*/
|
|
5311
|
+
flush() {
|
|
5312
|
+
}
|
|
5313
|
+
/**
|
|
5314
|
+
* Register a callback to receive audio output events.
|
|
5315
|
+
*
|
|
5316
|
+
* For Web Speech API, these events have empty audio buffers and are
|
|
5317
|
+
* used to signal utterance start/end for VoicePipeline state management.
|
|
5318
|
+
*
|
|
5319
|
+
* @returns An unsubscribe function. Calling it more than once is safe.
|
|
5320
|
+
*/
|
|
5321
|
+
onAudio(callback) {
|
|
5322
|
+
this.audioCallbacks.add(callback);
|
|
5323
|
+
let removed = false;
|
|
5324
|
+
return () => {
|
|
5325
|
+
if (removed) return;
|
|
5326
|
+
removed = true;
|
|
5327
|
+
this.audioCallbacks.delete(callback);
|
|
5328
|
+
};
|
|
5329
|
+
}
|
|
5330
|
+
/** Stop current speech synthesis and cancel any queued utterances. */
|
|
5331
|
+
stop() {
|
|
5332
|
+
if (typeof window === "undefined" || typeof window.speechSynthesis === "undefined") {
|
|
5333
|
+
return;
|
|
5334
|
+
}
|
|
5335
|
+
this.log("Stopping speech synthesis");
|
|
5336
|
+
window.speechSynthesis.cancel();
|
|
5337
|
+
}
|
|
5338
|
+
/** Gracefully close the adapter. */
|
|
5339
|
+
close() {
|
|
5340
|
+
this.log("Closing");
|
|
5341
|
+
this.stop();
|
|
5342
|
+
this.cleanup();
|
|
5343
|
+
}
|
|
5344
|
+
/** Force-destroy the adapter. */
|
|
5345
|
+
destroy() {
|
|
5346
|
+
this.log("Destroying");
|
|
5347
|
+
this.stop();
|
|
5348
|
+
this.cleanup();
|
|
5349
|
+
this.audioCallbacks.clear();
|
|
5350
|
+
}
|
|
5351
|
+
/**
|
|
5352
|
+
* Suspend the adapter (e.g. when the device goes offline).
|
|
5353
|
+
*
|
|
5354
|
+
* Pauses any active speech synthesis and marks the adapter as suspended.
|
|
5355
|
+
*/
|
|
5356
|
+
suspend() {
|
|
5357
|
+
if (this._suspended) return;
|
|
5358
|
+
this._suspended = true;
|
|
5359
|
+
if (typeof window !== "undefined" && typeof window.speechSynthesis !== "undefined") {
|
|
5360
|
+
window.speechSynthesis.pause();
|
|
5361
|
+
}
|
|
5362
|
+
this.log("Suspended");
|
|
5363
|
+
}
|
|
5364
|
+
/**
|
|
5365
|
+
* Resume after a prior `suspend()`.
|
|
5366
|
+
*/
|
|
5367
|
+
resume() {
|
|
5368
|
+
if (!this._suspended) return;
|
|
5369
|
+
this._suspended = false;
|
|
5370
|
+
if (typeof window !== "undefined" && typeof window.speechSynthesis !== "undefined") {
|
|
5371
|
+
window.speechSynthesis.resume();
|
|
4424
5372
|
}
|
|
4425
|
-
|
|
4426
|
-
|
|
5373
|
+
this.log("Resumed");
|
|
5374
|
+
}
|
|
5375
|
+
// -------------------------------------------------------------------------
|
|
5376
|
+
// Voice loading
|
|
5377
|
+
// -------------------------------------------------------------------------
|
|
5378
|
+
/**
|
|
5379
|
+
* Load available voices from the browser.
|
|
5380
|
+
*
|
|
5381
|
+
* In Chrome and some other browsers, voices load asynchronously after
|
|
5382
|
+
* the page loads. We wait for the `voiceschanged` event with a timeout.
|
|
5383
|
+
*/
|
|
5384
|
+
async loadVoices() {
|
|
5385
|
+
if (this._voicesLoaded) return;
|
|
5386
|
+
if (typeof window === "undefined" || typeof window.speechSynthesis === "undefined") return;
|
|
5387
|
+
const synth = window.speechSynthesis;
|
|
5388
|
+
let voices = synth.getVoices();
|
|
5389
|
+
if (voices.length > 0) {
|
|
5390
|
+
this._voicesLoaded = true;
|
|
5391
|
+
this.log("Voices loaded:", voices.length, "available");
|
|
4427
5392
|
return;
|
|
4428
5393
|
}
|
|
4429
|
-
|
|
4430
|
-
|
|
4431
|
-
|
|
4432
|
-
|
|
4433
|
-
|
|
4434
|
-
|
|
4435
|
-
|
|
4436
|
-
|
|
4437
|
-
|
|
4438
|
-
|
|
4439
|
-
|
|
5394
|
+
await new Promise((resolve) => {
|
|
5395
|
+
const onVoicesChanged = () => {
|
|
5396
|
+
synth.removeEventListener("voiceschanged", onVoicesChanged);
|
|
5397
|
+
clearTimeout(timeout);
|
|
5398
|
+
voices = synth.getVoices();
|
|
5399
|
+
this._voicesLoaded = true;
|
|
5400
|
+
this.log("Voices loaded (async):", voices.length, "available");
|
|
5401
|
+
resolve();
|
|
5402
|
+
};
|
|
5403
|
+
const timeout = setTimeout(() => {
|
|
5404
|
+
synth.removeEventListener("voiceschanged", onVoicesChanged);
|
|
5405
|
+
this._voicesLoaded = true;
|
|
5406
|
+
this.log("Voices loading timed out \u2014 proceeding with defaults");
|
|
5407
|
+
resolve();
|
|
5408
|
+
}, 2e3);
|
|
5409
|
+
synth.addEventListener("voiceschanged", onVoicesChanged);
|
|
5410
|
+
});
|
|
5411
|
+
}
|
|
5412
|
+
/**
|
|
5413
|
+
* Find a voice by name (case-insensitive partial match).
|
|
5414
|
+
*/
|
|
5415
|
+
findVoice(name) {
|
|
5416
|
+
if (typeof window === "undefined" || typeof window.speechSynthesis === "undefined") {
|
|
5417
|
+
return null;
|
|
4440
5418
|
}
|
|
4441
|
-
|
|
4442
|
-
|
|
4443
|
-
|
|
4444
|
-
|
|
4445
|
-
|
|
4446
|
-
|
|
5419
|
+
const voices = window.speechSynthesis.getVoices();
|
|
5420
|
+
const lowerName = name.toLowerCase();
|
|
5421
|
+
const exact = voices.find((v) => v.name.toLowerCase() === lowerName);
|
|
5422
|
+
if (exact) return exact;
|
|
5423
|
+
const partial = voices.find((v) => v.name.toLowerCase().includes(lowerName));
|
|
5424
|
+
if (partial) return partial;
|
|
5425
|
+
if (lowerName.includes("-") || lowerName.length <= 5) {
|
|
5426
|
+
const langMatch = voices.find((v) => v.lang.toLowerCase().startsWith(lowerName));
|
|
5427
|
+
if (langMatch) return langMatch;
|
|
4447
5428
|
}
|
|
4448
|
-
|
|
4449
|
-
audio: audioBuffer,
|
|
4450
|
-
isFinal,
|
|
4451
|
-
timestamp: Date.now()
|
|
4452
|
-
};
|
|
4453
|
-
this.log(
|
|
4454
|
-
isFinal ? "Final audio chunk:" : "Audio chunk:",
|
|
4455
|
-
`${audioBuffer.byteLength} bytes`
|
|
4456
|
-
);
|
|
4457
|
-
this.emitAudio(audioEvent);
|
|
5429
|
+
return null;
|
|
4458
5430
|
}
|
|
4459
|
-
//
|
|
5431
|
+
// -------------------------------------------------------------------------
|
|
4460
5432
|
// Subscriber notification
|
|
4461
|
-
//
|
|
5433
|
+
// -------------------------------------------------------------------------
|
|
4462
5434
|
/**
|
|
4463
5435
|
* Emit an audio event to all registered callbacks.
|
|
4464
5436
|
*
|
|
@@ -4470,41 +5442,30 @@ var ElevenLabsTTS = class {
|
|
|
4470
5442
|
try {
|
|
4471
5443
|
cb(event);
|
|
4472
5444
|
} catch (err) {
|
|
4473
|
-
console.error(
|
|
5445
|
+
console.error(LOG_PREFIX10, "Audio callback threw:", err);
|
|
4474
5446
|
}
|
|
4475
5447
|
}
|
|
4476
5448
|
}
|
|
4477
|
-
//
|
|
4478
|
-
// URL building
|
|
4479
|
-
// -----------------------------------------------------------------------
|
|
4480
|
-
/** Build the ElevenLabs streaming TTS endpoint URL. */
|
|
4481
|
-
buildUrl() {
|
|
4482
|
-
const params = new URLSearchParams({
|
|
4483
|
-
model_id: this.modelId
|
|
4484
|
-
});
|
|
4485
|
-
return `wss://api.elevenlabs.io/v1/text-to-speech/${encodeURIComponent(this.voiceId)}/stream-input?${params.toString()}`;
|
|
4486
|
-
}
|
|
4487
|
-
// -----------------------------------------------------------------------
|
|
5449
|
+
// -------------------------------------------------------------------------
|
|
4488
5450
|
// Cleanup
|
|
4489
|
-
//
|
|
4490
|
-
/** Reset internal state
|
|
5451
|
+
// -------------------------------------------------------------------------
|
|
5452
|
+
/** Reset internal state. */
|
|
4491
5453
|
cleanup() {
|
|
4492
5454
|
this._connected = false;
|
|
4493
|
-
this.bosSent = false;
|
|
4494
5455
|
}
|
|
4495
|
-
//
|
|
5456
|
+
// -------------------------------------------------------------------------
|
|
4496
5457
|
// Logging
|
|
4497
|
-
//
|
|
5458
|
+
// -------------------------------------------------------------------------
|
|
4498
5459
|
/** Conditional debug logging. */
|
|
4499
5460
|
log(...args) {
|
|
4500
5461
|
if (this.debugEnabled) {
|
|
4501
|
-
console.debug(
|
|
5462
|
+
console.debug(LOG_PREFIX10, ...args);
|
|
4502
5463
|
}
|
|
4503
5464
|
}
|
|
4504
5465
|
};
|
|
4505
5466
|
|
|
4506
5467
|
// src/voice/index.ts
|
|
4507
|
-
var
|
|
5468
|
+
var LOG_PREFIX11 = "[GuideKit:Voice]";
|
|
4508
5469
|
var JITTER_BUFFER_MS = 150;
|
|
4509
5470
|
var ECHO_WINDOW_MS = 3e3;
|
|
4510
5471
|
var ECHO_OVERLAP_THRESHOLD = 0.6;
|
|
@@ -4612,17 +5573,42 @@ var VoicePipeline = class {
|
|
|
4612
5573
|
cause: err instanceof Error ? err : void 0
|
|
4613
5574
|
});
|
|
4614
5575
|
}
|
|
4615
|
-
this.
|
|
4616
|
-
|
|
4617
|
-
|
|
4618
|
-
|
|
4619
|
-
|
|
4620
|
-
|
|
4621
|
-
|
|
4622
|
-
|
|
4623
|
-
|
|
4624
|
-
|
|
4625
|
-
|
|
5576
|
+
if (this._sttConfig.provider === "deepgram") {
|
|
5577
|
+
this._stt = new DeepgramSTT({
|
|
5578
|
+
apiKey: this._sttConfig.apiKey,
|
|
5579
|
+
model: this._sttConfig.model,
|
|
5580
|
+
debug: this._debug
|
|
5581
|
+
});
|
|
5582
|
+
} else if (this._sttConfig.provider === "elevenlabs") {
|
|
5583
|
+
this._stt = new ElevenLabsSTT({
|
|
5584
|
+
apiKey: this._sttConfig.apiKey,
|
|
5585
|
+
language: this._sttConfig.language,
|
|
5586
|
+
debug: this._debug
|
|
5587
|
+
});
|
|
5588
|
+
} else {
|
|
5589
|
+
this._stt = new WebSpeechSTT({
|
|
5590
|
+
language: this._sttConfig.language,
|
|
5591
|
+
continuous: this._sttConfig.continuous,
|
|
5592
|
+
interimResults: this._sttConfig.interimResults,
|
|
5593
|
+
debug: this._debug
|
|
5594
|
+
});
|
|
5595
|
+
}
|
|
5596
|
+
if (this._ttsConfig.provider === "elevenlabs") {
|
|
5597
|
+
this._tts = new ElevenLabsTTS({
|
|
5598
|
+
apiKey: this._ttsConfig.apiKey,
|
|
5599
|
+
voiceId: this._ttsConfig.voiceId,
|
|
5600
|
+
modelId: "modelId" in this._ttsConfig ? this._ttsConfig.modelId : void 0,
|
|
5601
|
+
debug: this._debug
|
|
5602
|
+
});
|
|
5603
|
+
} else {
|
|
5604
|
+
this._tts = new WebSpeechTTS({
|
|
5605
|
+
voice: this._ttsConfig.voice,
|
|
5606
|
+
rate: this._ttsConfig.rate,
|
|
5607
|
+
pitch: this._ttsConfig.pitch,
|
|
5608
|
+
language: this._ttsConfig.language,
|
|
5609
|
+
debug: this._debug
|
|
5610
|
+
});
|
|
5611
|
+
}
|
|
4626
5612
|
this._log("Initialization complete");
|
|
4627
5613
|
}
|
|
4628
5614
|
// ────────────────────────────────────────────────────────────────────
|
|
@@ -4762,10 +5748,11 @@ var VoicePipeline = class {
|
|
|
4762
5748
|
// ────────────────────────────────────────────────────────────────────
|
|
4763
5749
|
// speak()
|
|
4764
5750
|
// ────────────────────────────────────────────────────────────────────
|
|
4765
|
-
/** Speak text via ElevenLabs
|
|
5751
|
+
/** Speak text via TTS (ElevenLabs or Web Speech API). */
|
|
4766
5752
|
async speak(text) {
|
|
4767
5753
|
if (this._destroyed || !text.trim()) return;
|
|
4768
|
-
|
|
5754
|
+
const isWebSpeechTTS = this._tts instanceof WebSpeechTTS;
|
|
5755
|
+
if (!this._tts || !this._audioContext && !isWebSpeechTTS) {
|
|
4769
5756
|
this._log("TTS or AudioContext not available \u2014 cannot speak");
|
|
4770
5757
|
this._bus.emit("voice:degraded", { reason: "TTS not available", fallback: "text" });
|
|
4771
5758
|
this._setState("idle");
|
|
@@ -4809,11 +5796,24 @@ var VoicePipeline = class {
|
|
|
4809
5796
|
}
|
|
4810
5797
|
resolve();
|
|
4811
5798
|
};
|
|
4812
|
-
|
|
4813
|
-
this.
|
|
4814
|
-
|
|
4815
|
-
|
|
4816
|
-
|
|
5799
|
+
if (isWebSpeechTTS) {
|
|
5800
|
+
this._unsubTTSAudio = this._tts.onAudio(
|
|
5801
|
+
(event) => {
|
|
5802
|
+
if (event.isFinal) {
|
|
5803
|
+
done();
|
|
5804
|
+
}
|
|
5805
|
+
}
|
|
5806
|
+
);
|
|
5807
|
+
this._tts.speak(text);
|
|
5808
|
+
} else {
|
|
5809
|
+
this._unsubTTSAudio = this._tts.onAudio(
|
|
5810
|
+
(event) => {
|
|
5811
|
+
this._handleTTSAudio(event, done);
|
|
5812
|
+
}
|
|
5813
|
+
);
|
|
5814
|
+
this._tts.speak(text);
|
|
5815
|
+
this._tts.flush();
|
|
5816
|
+
}
|
|
4817
5817
|
});
|
|
4818
5818
|
}
|
|
4819
5819
|
// ────────────────────────────────────────────────────────────────────
|
|
@@ -4842,7 +5842,9 @@ var VoicePipeline = class {
|
|
|
4842
5842
|
this._pendingLLMAbort.abort();
|
|
4843
5843
|
this._pendingLLMAbort = null;
|
|
4844
5844
|
}
|
|
4845
|
-
if (this._tts
|
|
5845
|
+
if (this._tts instanceof WebSpeechTTS) {
|
|
5846
|
+
this._tts.stop();
|
|
5847
|
+
} else if (this._tts?.isConnected) {
|
|
4846
5848
|
this._tts.close();
|
|
4847
5849
|
}
|
|
4848
5850
|
}
|
|
@@ -4931,7 +5933,7 @@ var VoicePipeline = class {
|
|
|
4931
5933
|
try {
|
|
4932
5934
|
cb(next, prev);
|
|
4933
5935
|
} catch (err) {
|
|
4934
|
-
console.error(
|
|
5936
|
+
console.error(LOG_PREFIX11, "State change callback threw:", err);
|
|
4935
5937
|
}
|
|
4936
5938
|
}
|
|
4937
5939
|
}
|
|
@@ -5062,7 +6064,7 @@ var VoicePipeline = class {
|
|
|
5062
6064
|
try {
|
|
5063
6065
|
cb(text, isFinal);
|
|
5064
6066
|
} catch (err) {
|
|
5065
|
-
console.error(
|
|
6067
|
+
console.error(LOG_PREFIX11, "Transcript callback threw:", err);
|
|
5066
6068
|
}
|
|
5067
6069
|
}
|
|
5068
6070
|
if (isFinal && this._state === "listening") {
|
|
@@ -5165,8 +6167,14 @@ var VoicePipeline = class {
|
|
|
5165
6167
|
* sequential playback via AudioBufferSourceNode.
|
|
5166
6168
|
*/
|
|
5167
6169
|
_decodeAndSchedule(audioData, onDone) {
|
|
6170
|
+
let onDoneCalled = false;
|
|
6171
|
+
const safeOnDone = onDone ? () => {
|
|
6172
|
+
if (onDoneCalled) return;
|
|
6173
|
+
onDoneCalled = true;
|
|
6174
|
+
onDone();
|
|
6175
|
+
} : void 0;
|
|
5168
6176
|
if (!this._audioContext || this._state !== "speaking") {
|
|
5169
|
-
|
|
6177
|
+
safeOnDone?.();
|
|
5170
6178
|
return;
|
|
5171
6179
|
}
|
|
5172
6180
|
const ctx = this._audioContext;
|
|
@@ -5175,7 +6183,7 @@ var VoicePipeline = class {
|
|
|
5175
6183
|
copy,
|
|
5176
6184
|
(decodedBuffer) => {
|
|
5177
6185
|
if (this._state !== "speaking" || !this._audioContext) {
|
|
5178
|
-
|
|
6186
|
+
safeOnDone?.();
|
|
5179
6187
|
return;
|
|
5180
6188
|
}
|
|
5181
6189
|
const source = ctx.createBufferSource();
|
|
@@ -5188,8 +6196,8 @@ var VoicePipeline = class {
|
|
|
5188
6196
|
if (this._lastScheduledSource === source) {
|
|
5189
6197
|
this._lastScheduledSource = null;
|
|
5190
6198
|
}
|
|
5191
|
-
if (
|
|
5192
|
-
|
|
6199
|
+
if (safeOnDone) {
|
|
6200
|
+
safeOnDone();
|
|
5193
6201
|
}
|
|
5194
6202
|
};
|
|
5195
6203
|
const now = ctx.currentTime;
|
|
@@ -5205,7 +6213,7 @@ var VoicePipeline = class {
|
|
|
5205
6213
|
},
|
|
5206
6214
|
(err) => {
|
|
5207
6215
|
this._log("Failed to decode audio chunk:", err);
|
|
5208
|
-
|
|
6216
|
+
safeOnDone?.();
|
|
5209
6217
|
}
|
|
5210
6218
|
);
|
|
5211
6219
|
}
|
|
@@ -5264,13 +6272,13 @@ var VoicePipeline = class {
|
|
|
5264
6272
|
// ════════════════════════════════════════════════════════════════════
|
|
5265
6273
|
_log(...args) {
|
|
5266
6274
|
if (this._debug) {
|
|
5267
|
-
console.debug(
|
|
6275
|
+
console.debug(LOG_PREFIX11, ...args);
|
|
5268
6276
|
}
|
|
5269
6277
|
}
|
|
5270
6278
|
};
|
|
5271
6279
|
|
|
5272
6280
|
// src/visual/index.ts
|
|
5273
|
-
var
|
|
6281
|
+
var LOG_PREFIX12 = "[GuideKit:Visual]";
|
|
5274
6282
|
var DEFAULT_OVERLAY_COLOR = "rgba(0, 0, 0, 0.5)";
|
|
5275
6283
|
var DEFAULT_SPOTLIGHT_COLOR = "#4a9eed";
|
|
5276
6284
|
var DEFAULT_ANIMATION_DURATION = 300;
|
|
@@ -6187,16 +7195,16 @@ var VisualGuidance = class {
|
|
|
6187
7195
|
if (!this.debug) return;
|
|
6188
7196
|
if (typeof console !== "undefined") {
|
|
6189
7197
|
if (data) {
|
|
6190
|
-
console.log(`${
|
|
7198
|
+
console.log(`${LOG_PREFIX12} ${message}`, data);
|
|
6191
7199
|
} else {
|
|
6192
|
-
console.log(`${
|
|
7200
|
+
console.log(`${LOG_PREFIX12} ${message}`);
|
|
6193
7201
|
}
|
|
6194
7202
|
}
|
|
6195
7203
|
}
|
|
6196
7204
|
};
|
|
6197
7205
|
|
|
6198
7206
|
// src/awareness/index.ts
|
|
6199
|
-
var
|
|
7207
|
+
var LOG_PREFIX13 = "[GuideKit:Awareness]";
|
|
6200
7208
|
var DEFAULT_IDLE_TIMEOUT_MS = 6e4;
|
|
6201
7209
|
var DEFAULT_DWELL_TIMEOUT_MS = 8e3;
|
|
6202
7210
|
var DEFAULT_RAGE_CLICK_THRESHOLD = 3;
|
|
@@ -6558,13 +7566,13 @@ var AwarenessSystem = class {
|
|
|
6558
7566
|
/** Conditional debug logging. */
|
|
6559
7567
|
log(...args) {
|
|
6560
7568
|
if (this.debugEnabled) {
|
|
6561
|
-
console.debug(
|
|
7569
|
+
console.debug(LOG_PREFIX13, ...args);
|
|
6562
7570
|
}
|
|
6563
7571
|
}
|
|
6564
7572
|
};
|
|
6565
7573
|
|
|
6566
7574
|
// src/awareness/proactive.ts
|
|
6567
|
-
var
|
|
7575
|
+
var LOG_PREFIX14 = "[GuideKit:Proactive]";
|
|
6568
7576
|
var STORAGE_KEY = "guidekit:visited";
|
|
6569
7577
|
var SEVEN_DAYS_MS = 7 * 24 * 60 * 60 * 1e3;
|
|
6570
7578
|
var DWELL_COOLDOWNS = [3e4, 6e4, 12e4];
|
|
@@ -6602,7 +7610,7 @@ var ProactiveTriggerEngine = class {
|
|
|
6602
7610
|
set quietMode(value) {
|
|
6603
7611
|
this._quietMode = value;
|
|
6604
7612
|
if (this.debug) {
|
|
6605
|
-
console.debug(
|
|
7613
|
+
console.debug(LOG_PREFIX14, `Quiet mode ${value ? "enabled" : "disabled"}`);
|
|
6606
7614
|
}
|
|
6607
7615
|
}
|
|
6608
7616
|
// ---- Lifecycle -----------------------------------------------------------
|
|
@@ -6632,7 +7640,7 @@ var ProactiveTriggerEngine = class {
|
|
|
6632
7640
|
})
|
|
6633
7641
|
);
|
|
6634
7642
|
if (this.debug) {
|
|
6635
|
-
console.debug(
|
|
7643
|
+
console.debug(LOG_PREFIX14, "Started \u2014 subscribed to awareness & dom events");
|
|
6636
7644
|
}
|
|
6637
7645
|
}
|
|
6638
7646
|
/** Unsubscribe all bus listeners and clear internal state. */
|
|
@@ -6647,7 +7655,7 @@ var ProactiveTriggerEngine = class {
|
|
|
6647
7655
|
this.formTimers.clear();
|
|
6648
7656
|
this.started = false;
|
|
6649
7657
|
if (this.debug) {
|
|
6650
|
-
console.debug(
|
|
7658
|
+
console.debug(LOG_PREFIX14, "Stopped \u2014 all listeners removed");
|
|
6651
7659
|
}
|
|
6652
7660
|
}
|
|
6653
7661
|
/** Alias for {@link stop}. */
|
|
@@ -6682,7 +7690,7 @@ var ProactiveTriggerEngine = class {
|
|
|
6682
7690
|
}, FORM_ABANDON_MS);
|
|
6683
7691
|
this.formTimers.set(formSelector, timer);
|
|
6684
7692
|
if (this.debug) {
|
|
6685
|
-
console.debug(
|
|
7693
|
+
console.debug(LOG_PREFIX14, `Form interaction started: ${formSelector}`);
|
|
6686
7694
|
}
|
|
6687
7695
|
}
|
|
6688
7696
|
/** Reset all cooldowns and internal tracking state (useful for testing). */
|
|
@@ -6696,7 +7704,7 @@ var ProactiveTriggerEngine = class {
|
|
|
6696
7704
|
}
|
|
6697
7705
|
this.formTimers.clear();
|
|
6698
7706
|
if (this.debug) {
|
|
6699
|
-
console.debug(
|
|
7707
|
+
console.debug(LOG_PREFIX14, "All cooldowns and state reset");
|
|
6700
7708
|
}
|
|
6701
7709
|
}
|
|
6702
7710
|
// ---- Internal handlers ---------------------------------------------------
|
|
@@ -6713,22 +7721,23 @@ var ProactiveTriggerEngine = class {
|
|
|
6713
7721
|
message: "First-time visitor detected. Show a visual greeting (no audio)."
|
|
6714
7722
|
}, "greeting");
|
|
6715
7723
|
if (this.debug) {
|
|
6716
|
-
console.debug(
|
|
7724
|
+
console.debug(LOG_PREFIX14, "First visit \u2014 greeting triggered");
|
|
6717
7725
|
}
|
|
6718
7726
|
return;
|
|
6719
7727
|
}
|
|
6720
7728
|
const visitedAt = parseInt(visited, 10);
|
|
6721
|
-
if (
|
|
6722
|
-
|
|
6723
|
-
|
|
6724
|
-
|
|
6725
|
-
|
|
6726
|
-
|
|
6727
|
-
|
|
7729
|
+
if (Number.isNaN(visitedAt)) {
|
|
7730
|
+
return;
|
|
7731
|
+
}
|
|
7732
|
+
const elapsed = Date.now() - visitedAt;
|
|
7733
|
+
if (elapsed <= SEVEN_DAYS_MS && this.debug) {
|
|
7734
|
+
console.debug(LOG_PREFIX14, "Return visitor within 7 days \u2014 silent");
|
|
7735
|
+
} else if (this.debug) {
|
|
7736
|
+
console.debug(LOG_PREFIX14, "Return visitor after 7 days");
|
|
6728
7737
|
}
|
|
6729
7738
|
} catch {
|
|
6730
7739
|
if (this.debug) {
|
|
6731
|
-
console.warn(
|
|
7740
|
+
console.warn(LOG_PREFIX14, "localStorage unavailable \u2014 skipping greeting check");
|
|
6732
7741
|
}
|
|
6733
7742
|
}
|
|
6734
7743
|
}
|
|
@@ -6746,7 +7755,7 @@ var ProactiveTriggerEngine = class {
|
|
|
6746
7755
|
const count = this.dwellCounts.get(sectionId) ?? 0;
|
|
6747
7756
|
if (count >= DWELL_COOLDOWNS.length + 1) {
|
|
6748
7757
|
if (this.debug) {
|
|
6749
|
-
console.debug(
|
|
7758
|
+
console.debug(LOG_PREFIX14, `Dwell cap reached for section "${sectionId}" \u2014 suppressed`);
|
|
6750
7759
|
}
|
|
6751
7760
|
return;
|
|
6752
7761
|
}
|
|
@@ -6756,7 +7765,7 @@ var ProactiveTriggerEngine = class {
|
|
|
6756
7765
|
const lastFired = this.cooldowns.get(key) ?? 0;
|
|
6757
7766
|
if (Date.now() - lastFired < cooldownMs) {
|
|
6758
7767
|
if (this.debug) {
|
|
6759
|
-
console.debug(
|
|
7768
|
+
console.debug(LOG_PREFIX14, `Dwell cooldown active for "${sectionId}" \u2014 suppressed`);
|
|
6760
7769
|
}
|
|
6761
7770
|
return;
|
|
6762
7771
|
}
|
|
@@ -6772,7 +7781,7 @@ var ProactiveTriggerEngine = class {
|
|
|
6772
7781
|
const sectionKey = selector;
|
|
6773
7782
|
if (this.frustrationFired.has(sectionKey)) {
|
|
6774
7783
|
if (this.debug) {
|
|
6775
|
-
console.debug(
|
|
7784
|
+
console.debug(LOG_PREFIX14, `Frustration already fired for "${selector}" \u2014 suppressed`);
|
|
6776
7785
|
}
|
|
6777
7786
|
return;
|
|
6778
7787
|
}
|
|
@@ -6788,7 +7797,7 @@ var ProactiveTriggerEngine = class {
|
|
|
6788
7797
|
const key = "navigation-commentary";
|
|
6789
7798
|
if (this.isCooldownActive(key, NAVIGATION_COOLDOWN_MS)) {
|
|
6790
7799
|
if (this.debug) {
|
|
6791
|
-
console.debug(
|
|
7800
|
+
console.debug(LOG_PREFIX14, "Navigation cooldown active \u2014 suppressed");
|
|
6792
7801
|
}
|
|
6793
7802
|
return;
|
|
6794
7803
|
}
|
|
@@ -6811,7 +7820,7 @@ var ProactiveTriggerEngine = class {
|
|
|
6811
7820
|
fireTrigger(partial, cooldownKey) {
|
|
6812
7821
|
if (this._quietMode) {
|
|
6813
7822
|
if (this.debug) {
|
|
6814
|
-
console.debug(
|
|
7823
|
+
console.debug(LOG_PREFIX14, `Quiet mode \u2014 suppressed trigger: ${partial.type}`);
|
|
6815
7824
|
}
|
|
6816
7825
|
return;
|
|
6817
7826
|
}
|
|
@@ -6821,13 +7830,13 @@ var ProactiveTriggerEngine = class {
|
|
|
6821
7830
|
};
|
|
6822
7831
|
this.cooldowns.set(cooldownKey, trigger.timestamp);
|
|
6823
7832
|
if (this.debug) {
|
|
6824
|
-
console.debug(
|
|
7833
|
+
console.debug(LOG_PREFIX14, "Trigger fired:", trigger.type, trigger);
|
|
6825
7834
|
}
|
|
6826
7835
|
if (this.onTrigger) {
|
|
6827
7836
|
try {
|
|
6828
7837
|
this.onTrigger(trigger);
|
|
6829
7838
|
} catch (err) {
|
|
6830
|
-
console.error(
|
|
7839
|
+
console.error(LOG_PREFIX14, "onTrigger callback error:", err);
|
|
6831
7840
|
}
|
|
6832
7841
|
}
|
|
6833
7842
|
}
|
|
@@ -6840,7 +7849,7 @@ var ProactiveTriggerEngine = class {
|
|
|
6840
7849
|
};
|
|
6841
7850
|
|
|
6842
7851
|
// src/llm/rate-limiter.ts
|
|
6843
|
-
var
|
|
7852
|
+
var LOG_PREFIX15 = "[GuideKit:RateLimiter]";
|
|
6844
7853
|
var DEFAULT_MAX_LLM_CALLS_PER_MINUTE = 10;
|
|
6845
7854
|
var DEFAULT_MAX_STT_MINUTES_PER_SESSION = 60;
|
|
6846
7855
|
var DEFAULT_MAX_TTS_CHARS_PER_SESSION = 5e4;
|
|
@@ -6935,7 +7944,19 @@ var RateLimiter = class {
|
|
|
6935
7944
|
get sttMinutesUsed() {
|
|
6936
7945
|
let totalMs = this.sttMs;
|
|
6937
7946
|
if (this.sttStartedAt !== null) {
|
|
6938
|
-
|
|
7947
|
+
const activeMs = Date.now() - this.sttStartedAt;
|
|
7948
|
+
const maxSessionMs = this.maxSTTMinutesPerSession * 6e4;
|
|
7949
|
+
const maxActiveMs = maxSessionMs * 2;
|
|
7950
|
+
if (activeMs > maxActiveMs) {
|
|
7951
|
+
console.warn(
|
|
7952
|
+
`${LOG_PREFIX15} STT stream running for ${Math.round(activeMs / 6e4)}min without sttStop() \u2014 capping at 2x session limit (${this.maxSTTMinutesPerSession * 2}min).`
|
|
7953
|
+
);
|
|
7954
|
+
this.sttMs += maxActiveMs;
|
|
7955
|
+
this.sttStartedAt = null;
|
|
7956
|
+
totalMs = this.sttMs;
|
|
7957
|
+
} else {
|
|
7958
|
+
totalMs += activeMs;
|
|
7959
|
+
}
|
|
6939
7960
|
}
|
|
6940
7961
|
return totalMs / 6e4;
|
|
6941
7962
|
}
|
|
@@ -7007,7 +8028,7 @@ var RateLimiter = class {
|
|
|
7007
8028
|
}
|
|
7008
8029
|
log(...args) {
|
|
7009
8030
|
if (this.debug) {
|
|
7010
|
-
console.debug(
|
|
8031
|
+
console.debug(LOG_PREFIX15, ...args);
|
|
7011
8032
|
}
|
|
7012
8033
|
}
|
|
7013
8034
|
};
|
|
@@ -7240,7 +8261,7 @@ var BUILTIN_LOCALES = {
|
|
|
7240
8261
|
pt
|
|
7241
8262
|
};
|
|
7242
8263
|
var SUPPORTED_LOCALE_CODES = new Set(Object.keys(BUILTIN_LOCALES));
|
|
7243
|
-
var
|
|
8264
|
+
var LOG_PREFIX16 = "[GuideKit:I18n]";
|
|
7244
8265
|
function isSupportedLocale(code) {
|
|
7245
8266
|
return SUPPORTED_LOCALE_CODES.has(code);
|
|
7246
8267
|
}
|
|
@@ -7278,7 +8299,7 @@ var I18n = class {
|
|
|
7278
8299
|
this.strings = strings;
|
|
7279
8300
|
this.resolvedLocale = resolvedLocale;
|
|
7280
8301
|
if (this.debug) {
|
|
7281
|
-
console.debug(`${
|
|
8302
|
+
console.debug(`${LOG_PREFIX16} Initialized with locale "${this.resolvedLocale}"`);
|
|
7282
8303
|
}
|
|
7283
8304
|
}
|
|
7284
8305
|
// -------------------------------------------------------------------------
|
|
@@ -7289,9 +8310,9 @@ var I18n = class {
|
|
|
7289
8310
|
const value = this.strings[key];
|
|
7290
8311
|
if (value === void 0) {
|
|
7291
8312
|
if (this.debug) {
|
|
7292
|
-
console.warn(`${
|
|
8313
|
+
console.warn(`${LOG_PREFIX16} Missing translation key "${key}"`);
|
|
7293
8314
|
}
|
|
7294
|
-
return en[key] ?? key;
|
|
8315
|
+
return en[key] ?? (typeof process !== "undefined" && process.env?.NODE_ENV === "production" ? key : `[MISSING: ${key}]`);
|
|
7295
8316
|
}
|
|
7296
8317
|
return value;
|
|
7297
8318
|
}
|
|
@@ -7305,7 +8326,7 @@ var I18n = class {
|
|
|
7305
8326
|
this.strings = strings;
|
|
7306
8327
|
this.resolvedLocale = resolvedLocale;
|
|
7307
8328
|
if (this.debug) {
|
|
7308
|
-
console.debug(`${
|
|
8329
|
+
console.debug(`${LOG_PREFIX16} Locale changed to "${this.resolvedLocale}"`);
|
|
7309
8330
|
}
|
|
7310
8331
|
}
|
|
7311
8332
|
/** The current resolved locale code (e.g. 'en', 'fr', or 'custom'). */
|
|
@@ -7325,7 +8346,7 @@ var I18n = class {
|
|
|
7325
8346
|
if (locale === "auto") {
|
|
7326
8347
|
const detected = detectLocaleFromDocument();
|
|
7327
8348
|
if (this.debug) {
|
|
7328
|
-
console.debug(`${
|
|
8349
|
+
console.debug(`${LOG_PREFIX16} Auto-detected locale "${detected}"`);
|
|
7329
8350
|
}
|
|
7330
8351
|
return {
|
|
7331
8352
|
strings: BUILTIN_LOCALES[detected],
|
|
@@ -7340,7 +8361,7 @@ var I18n = class {
|
|
|
7340
8361
|
}
|
|
7341
8362
|
if (this.debug) {
|
|
7342
8363
|
console.warn(
|
|
7343
|
-
`${
|
|
8364
|
+
`${LOG_PREFIX16} Unknown locale "${String(locale)}", falling back to "en"`
|
|
7344
8365
|
);
|
|
7345
8366
|
}
|
|
7346
8367
|
return {
|
|
@@ -7351,7 +8372,7 @@ var I18n = class {
|
|
|
7351
8372
|
};
|
|
7352
8373
|
|
|
7353
8374
|
// src/auth/token-manager.ts
|
|
7354
|
-
var
|
|
8375
|
+
var LOG_PREFIX17 = "[GuideKit:Auth]";
|
|
7355
8376
|
var REFRESH_THRESHOLD = 0.8;
|
|
7356
8377
|
var MAX_RETRY_ATTEMPTS = 3;
|
|
7357
8378
|
var RETRY_BASE_MS = 1e3;
|
|
@@ -7630,7 +8651,7 @@ var TokenManager = class {
|
|
|
7630
8651
|
}
|
|
7631
8652
|
log(message) {
|
|
7632
8653
|
if (this.debug) {
|
|
7633
|
-
console.debug(`${
|
|
8654
|
+
console.debug(`${LOG_PREFIX17} ${message}`);
|
|
7634
8655
|
}
|
|
7635
8656
|
}
|
|
7636
8657
|
};
|
|
@@ -7762,6 +8783,11 @@ var GuideKitCore = class {
|
|
|
7762
8783
|
debug: this._debug
|
|
7763
8784
|
});
|
|
7764
8785
|
await this.tokenManager.start();
|
|
8786
|
+
if (!this._options.llm) {
|
|
8787
|
+
console.warn(
|
|
8788
|
+
"[GuideKit] tokenEndpoint provided without llm config. The session token handles auth only \u2014 llm: { provider, apiKey } is still required for LLM calls. See: https://guidekit.dev/docs/provider#token-endpoint"
|
|
8789
|
+
);
|
|
8790
|
+
}
|
|
7765
8791
|
this.resourceManager.register({
|
|
7766
8792
|
name: "token-manager",
|
|
7767
8793
|
cleanup: () => this.tokenManager?.destroy()
|
|
@@ -7884,21 +8910,50 @@ var GuideKitCore = class {
|
|
|
7884
8910
|
}
|
|
7885
8911
|
});
|
|
7886
8912
|
this.registerBuiltinTools();
|
|
7887
|
-
|
|
7888
|
-
const sttConfig = this._options.stt;
|
|
7889
|
-
const ttsConfig = this._options.tts;
|
|
7890
|
-
|
|
8913
|
+
{
|
|
8914
|
+
const sttConfig = this._options.stt ?? { provider: "web-speech" };
|
|
8915
|
+
const ttsConfig = this._options.tts ?? { provider: "web-speech" };
|
|
8916
|
+
let voiceSttConfig;
|
|
8917
|
+
let voiceTtsConfig;
|
|
8918
|
+
if (sttConfig.provider === "deepgram") {
|
|
8919
|
+
voiceSttConfig = {
|
|
8920
|
+
provider: "deepgram",
|
|
8921
|
+
apiKey: sttConfig.apiKey,
|
|
8922
|
+
model: sttConfig.model
|
|
8923
|
+
};
|
|
8924
|
+
} else if (sttConfig.provider === "elevenlabs") {
|
|
8925
|
+
voiceSttConfig = {
|
|
8926
|
+
provider: "elevenlabs",
|
|
8927
|
+
apiKey: sttConfig.apiKey,
|
|
8928
|
+
language: sttConfig.language
|
|
8929
|
+
};
|
|
8930
|
+
} else {
|
|
8931
|
+
voiceSttConfig = {
|
|
8932
|
+
provider: "web-speech",
|
|
8933
|
+
language: sttConfig.language,
|
|
8934
|
+
continuous: sttConfig.continuous,
|
|
8935
|
+
interimResults: sttConfig.interimResults
|
|
8936
|
+
};
|
|
8937
|
+
}
|
|
8938
|
+
if (ttsConfig.provider === "elevenlabs") {
|
|
8939
|
+
voiceTtsConfig = {
|
|
8940
|
+
provider: "elevenlabs",
|
|
8941
|
+
apiKey: ttsConfig.apiKey,
|
|
8942
|
+
voiceId: "voiceId" in ttsConfig ? ttsConfig.voiceId : void 0
|
|
8943
|
+
};
|
|
8944
|
+
} else {
|
|
8945
|
+
voiceTtsConfig = {
|
|
8946
|
+
provider: "web-speech",
|
|
8947
|
+
voice: ttsConfig.voice,
|
|
8948
|
+
rate: ttsConfig.rate,
|
|
8949
|
+
pitch: ttsConfig.pitch,
|
|
8950
|
+
language: ttsConfig.language
|
|
8951
|
+
};
|
|
8952
|
+
}
|
|
8953
|
+
try {
|
|
7891
8954
|
this.voicePipeline = new VoicePipeline({
|
|
7892
|
-
sttConfig:
|
|
7893
|
-
|
|
7894
|
-
apiKey: sttConfig.apiKey,
|
|
7895
|
-
model: "model" in sttConfig ? sttConfig.model : void 0
|
|
7896
|
-
},
|
|
7897
|
-
ttsConfig: {
|
|
7898
|
-
provider: "elevenlabs",
|
|
7899
|
-
apiKey: ttsConfig.apiKey,
|
|
7900
|
-
voiceId: "voiceId" in ttsConfig ? ttsConfig.voiceId : void 0
|
|
7901
|
-
},
|
|
8955
|
+
sttConfig: voiceSttConfig,
|
|
8956
|
+
ttsConfig: voiceTtsConfig,
|
|
7902
8957
|
debug: this._debug
|
|
7903
8958
|
});
|
|
7904
8959
|
this.voicePipeline.onStateChange((state, previous) => {
|
|
@@ -7931,6 +8986,11 @@ var GuideKitCore = class {
|
|
|
7931
8986
|
name: "voice-pipeline",
|
|
7932
8987
|
cleanup: () => this.voicePipeline?.destroy()
|
|
7933
8988
|
});
|
|
8989
|
+
} catch (_err) {
|
|
8990
|
+
this.voicePipeline = null;
|
|
8991
|
+
if (this._debug) {
|
|
8992
|
+
console.debug("[GuideKit:Core] Voice pipeline unavailable in this environment");
|
|
8993
|
+
}
|
|
7934
8994
|
}
|
|
7935
8995
|
}
|
|
7936
8996
|
const session = this.contextManager.restoreSession();
|
|
@@ -8055,7 +9115,7 @@ var GuideKitCore = class {
|
|
|
8055
9115
|
return responseText;
|
|
8056
9116
|
} catch (error) {
|
|
8057
9117
|
const err = error instanceof GuideKitError ? error : new GuideKitError({
|
|
8058
|
-
code:
|
|
9118
|
+
code: ErrorCodes.UNKNOWN,
|
|
8059
9119
|
message: error instanceof Error ? error.message : "Unknown error",
|
|
8060
9120
|
recoverable: false,
|
|
8061
9121
|
suggestion: "Check the console for details."
|
|
@@ -8311,172 +9371,11 @@ var GuideKitCore = class {
|
|
|
8311
9371
|
};
|
|
8312
9372
|
}
|
|
8313
9373
|
/**
|
|
8314
|
-
*
|
|
8315
|
-
*
|
|
9374
|
+
* Unified built-in tool specifications — single source of truth for both
|
|
9375
|
+
* tool definitions (sent to LLM) and handler registration.
|
|
8316
9376
|
*/
|
|
8317
|
-
|
|
8318
|
-
|
|
8319
|
-
this.toolExecutor.registerTool({
|
|
8320
|
-
name: "highlight",
|
|
8321
|
-
execute: async (args) => {
|
|
8322
|
-
const sectionId = args.sectionId;
|
|
8323
|
-
const selector = args.selector;
|
|
8324
|
-
const tooltip = args.tooltip;
|
|
8325
|
-
const position = args.position;
|
|
8326
|
-
const result = this.highlight({ sectionId, selector, tooltip, position });
|
|
8327
|
-
return { success: result };
|
|
8328
|
-
}
|
|
8329
|
-
});
|
|
8330
|
-
this.toolExecutor.registerTool({
|
|
8331
|
-
name: "dismissHighlight",
|
|
8332
|
-
execute: async () => {
|
|
8333
|
-
this.dismissHighlight();
|
|
8334
|
-
return { success: true };
|
|
8335
|
-
}
|
|
8336
|
-
});
|
|
8337
|
-
this.toolExecutor.registerTool({
|
|
8338
|
-
name: "scrollToSection",
|
|
8339
|
-
execute: async (args) => {
|
|
8340
|
-
const sectionId = args.sectionId;
|
|
8341
|
-
const offset = args.offset;
|
|
8342
|
-
this.scrollToSection(sectionId, offset);
|
|
8343
|
-
return { success: true };
|
|
8344
|
-
}
|
|
8345
|
-
});
|
|
8346
|
-
this.toolExecutor.registerTool({
|
|
8347
|
-
name: "navigate",
|
|
8348
|
-
execute: async (args) => {
|
|
8349
|
-
const href = args.href;
|
|
8350
|
-
const result = await this.navigate(href);
|
|
8351
|
-
return { success: result, navigatedTo: result ? href : null };
|
|
8352
|
-
}
|
|
8353
|
-
});
|
|
8354
|
-
this.toolExecutor.registerTool({
|
|
8355
|
-
name: "startTour",
|
|
8356
|
-
execute: async (args) => {
|
|
8357
|
-
const sectionIds = args.sectionIds;
|
|
8358
|
-
const mode = args.mode ?? "manual";
|
|
8359
|
-
this.startTour(sectionIds, mode);
|
|
8360
|
-
return { success: true, steps: sectionIds.length };
|
|
8361
|
-
}
|
|
8362
|
-
});
|
|
8363
|
-
this.toolExecutor.registerTool({
|
|
8364
|
-
name: "readPageContent",
|
|
8365
|
-
execute: async (args) => {
|
|
8366
|
-
const sectionId = args.sectionId;
|
|
8367
|
-
const query = args.query;
|
|
8368
|
-
const model = this._currentPageModel;
|
|
8369
|
-
if (!model) return { error: "No page model available" };
|
|
8370
|
-
if (sectionId) {
|
|
8371
|
-
const section = model.sections.find((s) => s.id === sectionId);
|
|
8372
|
-
if (section) {
|
|
8373
|
-
const contentMapResult = await this.contextManager.getContent(sectionId);
|
|
8374
|
-
return {
|
|
8375
|
-
sectionId: section.id,
|
|
8376
|
-
label: section.label,
|
|
8377
|
-
summary: section.summary,
|
|
8378
|
-
contentMap: contentMapResult
|
|
8379
|
-
};
|
|
8380
|
-
}
|
|
8381
|
-
return { error: `Section "${sectionId}" not found` };
|
|
8382
|
-
}
|
|
8383
|
-
if (query) {
|
|
8384
|
-
const queryLower = query.toLowerCase();
|
|
8385
|
-
const matches = model.sections.filter(
|
|
8386
|
-
(s) => s.label?.toLowerCase().includes(queryLower) || s.summary?.toLowerCase().includes(queryLower)
|
|
8387
|
-
);
|
|
8388
|
-
return {
|
|
8389
|
-
query,
|
|
8390
|
-
results: matches.slice(0, 5).map((s) => ({
|
|
8391
|
-
sectionId: s.id,
|
|
8392
|
-
label: s.label,
|
|
8393
|
-
snippet: s.summary?.slice(0, 200)
|
|
8394
|
-
}))
|
|
8395
|
-
};
|
|
8396
|
-
}
|
|
8397
|
-
return { error: "Provide either sectionId or query" };
|
|
8398
|
-
}
|
|
8399
|
-
});
|
|
8400
|
-
this.toolExecutor.registerTool({
|
|
8401
|
-
name: "getVisibleSections",
|
|
8402
|
-
execute: async () => {
|
|
8403
|
-
const model = this._currentPageModel;
|
|
8404
|
-
if (!model) return { sections: [] };
|
|
8405
|
-
return {
|
|
8406
|
-
sections: model.sections.slice(0, 10).map((s) => ({
|
|
8407
|
-
id: s.id,
|
|
8408
|
-
label: s.label,
|
|
8409
|
-
selector: s.selector,
|
|
8410
|
-
score: s.score
|
|
8411
|
-
}))
|
|
8412
|
-
};
|
|
8413
|
-
}
|
|
8414
|
-
});
|
|
8415
|
-
this.toolExecutor.registerTool({
|
|
8416
|
-
name: "clickElement",
|
|
8417
|
-
execute: async (args) => {
|
|
8418
|
-
if (typeof document === "undefined") return { success: false, error: "Not in browser" };
|
|
8419
|
-
const selector = args.selector;
|
|
8420
|
-
const el = document.querySelector(selector);
|
|
8421
|
-
if (!el) return { success: false, error: `Element not found: ${selector}` };
|
|
8422
|
-
if (!(el instanceof HTMLElement)) return { success: false, error: "Element is not clickable" };
|
|
8423
|
-
const clickableRules = this._options.options?.clickableSelectors;
|
|
8424
|
-
const isInDevAllowList = clickableRules?.allow?.some((pattern) => {
|
|
8425
|
-
try {
|
|
8426
|
-
return el.matches(pattern);
|
|
8427
|
-
} catch {
|
|
8428
|
-
return selector === pattern;
|
|
8429
|
-
}
|
|
8430
|
-
}) ?? false;
|
|
8431
|
-
if (!isInDevAllowList) {
|
|
8432
|
-
const defaultDenied = DEFAULT_CLICK_DENY.some((pattern) => {
|
|
8433
|
-
try {
|
|
8434
|
-
return el.matches(pattern);
|
|
8435
|
-
} catch {
|
|
8436
|
-
return false;
|
|
8437
|
-
}
|
|
8438
|
-
});
|
|
8439
|
-
if (defaultDenied) {
|
|
8440
|
-
return { success: false, error: `Selector "${selector}" matches the default deny list. Add it to clickableSelectors.allow to override.` };
|
|
8441
|
-
}
|
|
8442
|
-
}
|
|
8443
|
-
if (clickableRules?.deny?.length) {
|
|
8444
|
-
const denied = clickableRules.deny.some((pattern) => {
|
|
8445
|
-
try {
|
|
8446
|
-
return el.matches(pattern);
|
|
8447
|
-
} catch {
|
|
8448
|
-
return selector === pattern;
|
|
8449
|
-
}
|
|
8450
|
-
});
|
|
8451
|
-
if (denied) {
|
|
8452
|
-
return { success: false, error: `Selector "${selector}" is blocked by the deny list.` };
|
|
8453
|
-
}
|
|
8454
|
-
}
|
|
8455
|
-
if (clickableRules?.allow?.length && !isInDevAllowList) {
|
|
8456
|
-
return { success: false, error: `Selector "${selector}" is not in the allowed clickable selectors list.` };
|
|
8457
|
-
}
|
|
8458
|
-
el.click();
|
|
8459
|
-
return { success: true };
|
|
8460
|
-
}
|
|
8461
|
-
});
|
|
8462
|
-
this.toolExecutor.registerTool({
|
|
8463
|
-
name: "executeCustomAction",
|
|
8464
|
-
execute: async (args) => {
|
|
8465
|
-
const actionId = args.actionId;
|
|
8466
|
-
const params = args.params ?? {};
|
|
8467
|
-
const action = this.customActions.get(actionId);
|
|
8468
|
-
if (!action) return { error: `Unknown action: ${actionId}` };
|
|
8469
|
-
try {
|
|
8470
|
-
const result = await action.handler(params);
|
|
8471
|
-
return { success: true, result };
|
|
8472
|
-
} catch (err) {
|
|
8473
|
-
return { success: false, error: err instanceof Error ? err.message : String(err) };
|
|
8474
|
-
}
|
|
8475
|
-
}
|
|
8476
|
-
});
|
|
8477
|
-
}
|
|
8478
|
-
getToolDefinitions() {
|
|
8479
|
-
const builtinTools = [
|
|
9377
|
+
getBuiltinToolSpecs() {
|
|
9378
|
+
return [
|
|
8480
9379
|
{
|
|
8481
9380
|
name: "highlight",
|
|
8482
9381
|
description: "Spotlight an element on the page to draw the user's attention. Use sectionId to highlight a page section, or selector for a specific CSS selector. Optionally add a tooltip with explanation text.",
|
|
@@ -8486,13 +9385,27 @@ var GuideKitCore = class {
|
|
|
8486
9385
|
tooltip: { type: "string", description: "Text to show in tooltip" },
|
|
8487
9386
|
position: { type: "string", enum: ["top", "bottom", "left", "right", "auto"], description: "Tooltip position" }
|
|
8488
9387
|
},
|
|
8489
|
-
|
|
9388
|
+
required: [],
|
|
9389
|
+
schemaVersion: 1,
|
|
9390
|
+
execute: async (args) => {
|
|
9391
|
+
const sectionId = args.sectionId;
|
|
9392
|
+
const selector = args.selector;
|
|
9393
|
+
const tooltip = args.tooltip;
|
|
9394
|
+
const position = args.position;
|
|
9395
|
+
const result = this.highlight({ sectionId, selector, tooltip, position });
|
|
9396
|
+
return { success: result };
|
|
9397
|
+
}
|
|
8490
9398
|
},
|
|
8491
9399
|
{
|
|
8492
9400
|
name: "dismissHighlight",
|
|
8493
9401
|
description: "Remove the current spotlight overlay.",
|
|
8494
9402
|
parameters: {},
|
|
8495
|
-
|
|
9403
|
+
required: [],
|
|
9404
|
+
schemaVersion: 1,
|
|
9405
|
+
execute: async () => {
|
|
9406
|
+
this.dismissHighlight();
|
|
9407
|
+
return { success: true };
|
|
9408
|
+
}
|
|
8496
9409
|
},
|
|
8497
9410
|
{
|
|
8498
9411
|
name: "scrollToSection",
|
|
@@ -8501,7 +9414,14 @@ var GuideKitCore = class {
|
|
|
8501
9414
|
sectionId: { type: "string", description: "ID of the section to scroll to" },
|
|
8502
9415
|
offset: { type: "number", description: "Pixel offset for sticky headers" }
|
|
8503
9416
|
},
|
|
8504
|
-
|
|
9417
|
+
required: ["sectionId"],
|
|
9418
|
+
schemaVersion: 1,
|
|
9419
|
+
execute: async (args) => {
|
|
9420
|
+
const sectionId = args.sectionId;
|
|
9421
|
+
const offset = args.offset;
|
|
9422
|
+
this.scrollToSection(sectionId, offset);
|
|
9423
|
+
return { success: true };
|
|
9424
|
+
}
|
|
8505
9425
|
},
|
|
8506
9426
|
{
|
|
8507
9427
|
name: "navigate",
|
|
@@ -8509,7 +9429,13 @@ var GuideKitCore = class {
|
|
|
8509
9429
|
parameters: {
|
|
8510
9430
|
href: { type: "string", description: "URL or path to navigate to (same-origin only)" }
|
|
8511
9431
|
},
|
|
8512
|
-
|
|
9432
|
+
required: ["href"],
|
|
9433
|
+
schemaVersion: 1,
|
|
9434
|
+
execute: async (args) => {
|
|
9435
|
+
const href = args.href;
|
|
9436
|
+
const result = await this.navigate(href);
|
|
9437
|
+
return { success: result, navigatedTo: result ? href : null };
|
|
9438
|
+
}
|
|
8513
9439
|
},
|
|
8514
9440
|
{
|
|
8515
9441
|
name: "startTour",
|
|
@@ -8518,7 +9444,14 @@ var GuideKitCore = class {
|
|
|
8518
9444
|
sectionIds: { type: "array", items: { type: "string" }, description: "Section IDs in tour order" },
|
|
8519
9445
|
mode: { type: "string", enum: ["auto", "manual"], description: "auto advances automatically; manual waits for user" }
|
|
8520
9446
|
},
|
|
8521
|
-
|
|
9447
|
+
required: ["sectionIds"],
|
|
9448
|
+
schemaVersion: 1,
|
|
9449
|
+
execute: async (args) => {
|
|
9450
|
+
const sectionIds = args.sectionIds;
|
|
9451
|
+
const mode = args.mode ?? "manual";
|
|
9452
|
+
this.startTour(sectionIds, mode);
|
|
9453
|
+
return { success: true, steps: sectionIds.length };
|
|
9454
|
+
}
|
|
8522
9455
|
},
|
|
8523
9456
|
{
|
|
8524
9457
|
name: "readPageContent",
|
|
@@ -8527,13 +9460,61 @@ var GuideKitCore = class {
|
|
|
8527
9460
|
sectionId: { type: "string", description: "Section ID to read" },
|
|
8528
9461
|
query: { type: "string", description: "Keyword to search for across sections" }
|
|
8529
9462
|
},
|
|
8530
|
-
|
|
9463
|
+
required: [],
|
|
9464
|
+
schemaVersion: 1,
|
|
9465
|
+
execute: async (args) => {
|
|
9466
|
+
const sectionId = args.sectionId;
|
|
9467
|
+
const query = args.query;
|
|
9468
|
+
const model = this._currentPageModel;
|
|
9469
|
+
if (!model) return { error: "No page model available" };
|
|
9470
|
+
if (sectionId) {
|
|
9471
|
+
const section = model.sections.find((s) => s.id === sectionId);
|
|
9472
|
+
if (section) {
|
|
9473
|
+
const contentMapResult = await this.contextManager.getContent(sectionId);
|
|
9474
|
+
return {
|
|
9475
|
+
sectionId: section.id,
|
|
9476
|
+
label: section.label,
|
|
9477
|
+
summary: section.summary,
|
|
9478
|
+
contentMap: contentMapResult
|
|
9479
|
+
};
|
|
9480
|
+
}
|
|
9481
|
+
return { error: `Section "${sectionId}" not found` };
|
|
9482
|
+
}
|
|
9483
|
+
if (query) {
|
|
9484
|
+
const queryLower = query.toLowerCase();
|
|
9485
|
+
const matches = model.sections.filter(
|
|
9486
|
+
(s) => s.label?.toLowerCase().includes(queryLower) || s.summary?.toLowerCase().includes(queryLower)
|
|
9487
|
+
);
|
|
9488
|
+
return {
|
|
9489
|
+
query,
|
|
9490
|
+
results: matches.slice(0, 5).map((s) => ({
|
|
9491
|
+
sectionId: s.id,
|
|
9492
|
+
label: s.label,
|
|
9493
|
+
snippet: s.summary?.slice(0, 200)
|
|
9494
|
+
}))
|
|
9495
|
+
};
|
|
9496
|
+
}
|
|
9497
|
+
return { error: "Provide either sectionId or query" };
|
|
9498
|
+
}
|
|
8531
9499
|
},
|
|
8532
9500
|
{
|
|
8533
9501
|
name: "getVisibleSections",
|
|
8534
9502
|
description: "Get the list of sections currently visible in the user viewport.",
|
|
8535
9503
|
parameters: {},
|
|
8536
|
-
|
|
9504
|
+
required: [],
|
|
9505
|
+
schemaVersion: 1,
|
|
9506
|
+
execute: async () => {
|
|
9507
|
+
const model = this._currentPageModel;
|
|
9508
|
+
if (!model) return { sections: [] };
|
|
9509
|
+
return {
|
|
9510
|
+
sections: model.sections.slice(0, 10).map((s) => ({
|
|
9511
|
+
id: s.id,
|
|
9512
|
+
label: s.label,
|
|
9513
|
+
selector: s.selector,
|
|
9514
|
+
score: s.score
|
|
9515
|
+
}))
|
|
9516
|
+
};
|
|
9517
|
+
}
|
|
8537
9518
|
},
|
|
8538
9519
|
{
|
|
8539
9520
|
name: "clickElement",
|
|
@@ -8541,7 +9522,52 @@ var GuideKitCore = class {
|
|
|
8541
9522
|
parameters: {
|
|
8542
9523
|
selector: { type: "string", description: "CSS selector of the element to click" }
|
|
8543
9524
|
},
|
|
8544
|
-
|
|
9525
|
+
required: ["selector"],
|
|
9526
|
+
schemaVersion: 1,
|
|
9527
|
+
execute: async (args) => {
|
|
9528
|
+
if (typeof document === "undefined") return { success: false, error: "Not in browser" };
|
|
9529
|
+
const selector = args.selector;
|
|
9530
|
+
const el = document.querySelector(selector);
|
|
9531
|
+
if (!el) return { success: false, error: `Element not found: ${selector}` };
|
|
9532
|
+
if (!(el instanceof HTMLElement)) return { success: false, error: "Element is not clickable" };
|
|
9533
|
+
const clickableRules = this._options.options?.clickableSelectors;
|
|
9534
|
+
const isInDevAllowList = clickableRules?.allow?.some((pattern) => {
|
|
9535
|
+
try {
|
|
9536
|
+
return el.matches(pattern);
|
|
9537
|
+
} catch {
|
|
9538
|
+
return selector === pattern;
|
|
9539
|
+
}
|
|
9540
|
+
}) ?? false;
|
|
9541
|
+
if (!isInDevAllowList) {
|
|
9542
|
+
const defaultDenied = DEFAULT_CLICK_DENY.some((pattern) => {
|
|
9543
|
+
try {
|
|
9544
|
+
return el.matches(pattern);
|
|
9545
|
+
} catch {
|
|
9546
|
+
return false;
|
|
9547
|
+
}
|
|
9548
|
+
});
|
|
9549
|
+
if (defaultDenied) {
|
|
9550
|
+
return { success: false, error: `Selector "${selector}" matches the default deny list. Add it to clickableSelectors.allow to override.` };
|
|
9551
|
+
}
|
|
9552
|
+
}
|
|
9553
|
+
if (clickableRules?.deny?.length) {
|
|
9554
|
+
const denied = clickableRules.deny.some((pattern) => {
|
|
9555
|
+
try {
|
|
9556
|
+
return el.matches(pattern);
|
|
9557
|
+
} catch {
|
|
9558
|
+
return selector === pattern;
|
|
9559
|
+
}
|
|
9560
|
+
});
|
|
9561
|
+
if (denied) {
|
|
9562
|
+
return { success: false, error: `Selector "${selector}" is blocked by the deny list.` };
|
|
9563
|
+
}
|
|
9564
|
+
}
|
|
9565
|
+
if (clickableRules?.allow?.length && !isInDevAllowList) {
|
|
9566
|
+
return { success: false, error: `Selector "${selector}" is not in the allowed clickable selectors list.` };
|
|
9567
|
+
}
|
|
9568
|
+
el.click();
|
|
9569
|
+
return { success: true };
|
|
9570
|
+
}
|
|
8545
9571
|
},
|
|
8546
9572
|
{
|
|
8547
9573
|
name: "executeCustomAction",
|
|
@@ -8550,9 +9576,37 @@ var GuideKitCore = class {
|
|
|
8550
9576
|
actionId: { type: "string", description: "ID of the custom action" },
|
|
8551
9577
|
params: { type: "object", description: "Parameters for the action" }
|
|
8552
9578
|
},
|
|
8553
|
-
|
|
9579
|
+
required: ["actionId"],
|
|
9580
|
+
schemaVersion: 1,
|
|
9581
|
+
execute: async (args) => {
|
|
9582
|
+
const actionId = args.actionId;
|
|
9583
|
+
const params = args.params ?? {};
|
|
9584
|
+
const action = this.customActions.get(actionId);
|
|
9585
|
+
if (!action) return { error: `Unknown action: ${actionId}` };
|
|
9586
|
+
try {
|
|
9587
|
+
const result = await action.handler(params);
|
|
9588
|
+
return { success: true, result };
|
|
9589
|
+
} catch (err) {
|
|
9590
|
+
return { success: false, error: err instanceof Error ? err.message : String(err) };
|
|
9591
|
+
}
|
|
9592
|
+
}
|
|
8554
9593
|
}
|
|
8555
9594
|
];
|
|
9595
|
+
}
|
|
9596
|
+
/**
|
|
9597
|
+
* Register all built-in tool handlers with the ToolExecutor.
|
|
9598
|
+
* Called once during init() after VisualGuidance and all subsystems are ready.
|
|
9599
|
+
*/
|
|
9600
|
+
registerBuiltinTools() {
|
|
9601
|
+
if (!this.toolExecutor) return;
|
|
9602
|
+
for (const spec of this.getBuiltinToolSpecs()) {
|
|
9603
|
+
this.toolExecutor.registerTool({ name: spec.name, execute: spec.execute });
|
|
9604
|
+
}
|
|
9605
|
+
}
|
|
9606
|
+
getToolDefinitions() {
|
|
9607
|
+
const builtinTools = this.getBuiltinToolSpecs().map(
|
|
9608
|
+
({ execute: _execute, ...def }) => def
|
|
9609
|
+
);
|
|
8556
9610
|
for (const [actionId, action] of this.customActions) {
|
|
8557
9611
|
builtinTools.push({
|
|
8558
9612
|
name: `action_${actionId}`,
|
|
@@ -8595,6 +9649,9 @@ exports.TimeoutError = TimeoutError;
|
|
|
8595
9649
|
exports.TokenManager = TokenManager;
|
|
8596
9650
|
exports.ToolExecutor = ToolExecutor;
|
|
8597
9651
|
exports.VisualGuidance = VisualGuidance;
|
|
9652
|
+
exports.VoicePipeline = VoicePipeline;
|
|
9653
|
+
exports.WebSpeechSTT = WebSpeechSTT;
|
|
9654
|
+
exports.WebSpeechTTS = WebSpeechTTS;
|
|
8598
9655
|
exports.createEventBus = createEventBus;
|
|
8599
9656
|
exports.isGuideKitError = isGuideKitError;
|
|
8600
9657
|
//# sourceMappingURL=index.cjs.map
|