@guidekit/core 0.1.0-beta.1 → 0.1.0-beta.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/dist/index.cjs +1639 -582
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +571 -30
- package/dist/index.d.ts +571 -30
- package/dist/index.js +1637 -583
- package/dist/index.js.map +1 -1
- package/package.json +37 -14
package/dist/index.js
CHANGED
|
@@ -1091,7 +1091,7 @@ var DOMScanner = class {
|
|
|
1091
1091
|
if (el.closest("[data-guidekit-ignore]")) return;
|
|
1092
1092
|
const style = window.getComputedStyle(el);
|
|
1093
1093
|
const position = style.position;
|
|
1094
|
-
const zIndex = parseInt(style.zIndex, 10);
|
|
1094
|
+
const zIndex = parseInt(style.zIndex, 10) || 0;
|
|
1095
1095
|
if ((position === "fixed" || position === "absolute") && !isNaN(zIndex) && zIndex >= 1e3) {
|
|
1096
1096
|
const visible = isElementVisible(el);
|
|
1097
1097
|
if (!visible) return;
|
|
@@ -1748,7 +1748,9 @@ var ErrorCodes = {
|
|
|
1748
1748
|
// Content
|
|
1749
1749
|
CONTENT_FILTER_TRIGGERED: "CONTENT_FILTER_TRIGGERED",
|
|
1750
1750
|
// Privacy
|
|
1751
|
-
PRIVACY_HOOK_CANCELLED: "PRIVACY_HOOK_CANCELLED"
|
|
1751
|
+
PRIVACY_HOOK_CANCELLED: "PRIVACY_HOOK_CANCELLED",
|
|
1752
|
+
// General
|
|
1753
|
+
UNKNOWN: "UNKNOWN"
|
|
1752
1754
|
};
|
|
1753
1755
|
var GuideKitError = class extends Error {
|
|
1754
1756
|
code;
|
|
@@ -1841,13 +1843,27 @@ function isGuideKitError(error) {
|
|
|
1841
1843
|
var DEFAULT_OPENAI_MODEL = "gpt-4o";
|
|
1842
1844
|
var DEFAULT_TIMEOUT_MS = 15e3;
|
|
1843
1845
|
var OPENAI_CHAT_URL = "https://api.openai.com/v1/chat/completions";
|
|
1846
|
+
function emptyUsage() {
|
|
1847
|
+
return { prompt: 0, completion: 0, total: 0 };
|
|
1848
|
+
}
|
|
1844
1849
|
var OpenAIAdapter = class {
|
|
1845
1850
|
apiKey;
|
|
1846
1851
|
model;
|
|
1852
|
+
/** Tracks whether the last extractChunks call emitted a done chunk. */
|
|
1853
|
+
lastExtractEmittedDone = false;
|
|
1854
|
+
/**
|
|
1855
|
+
* Token usage extracted from the most recent `parseResponse` call.
|
|
1856
|
+
* Updated as each SSE chunk is parsed.
|
|
1857
|
+
*/
|
|
1858
|
+
_lastUsage = emptyUsage();
|
|
1847
1859
|
constructor(config) {
|
|
1848
1860
|
this.apiKey = config.apiKey;
|
|
1849
1861
|
this.model = config.model ?? DEFAULT_OPENAI_MODEL;
|
|
1850
1862
|
}
|
|
1863
|
+
/** Token usage from the most recent parseResponse call. */
|
|
1864
|
+
get lastUsage() {
|
|
1865
|
+
return this._lastUsage;
|
|
1866
|
+
}
|
|
1851
1867
|
// -----------------------------------------------------------------------
|
|
1852
1868
|
// LLMProviderAdapter implementation
|
|
1853
1869
|
// -----------------------------------------------------------------------
|
|
@@ -1862,7 +1878,11 @@ var OpenAIAdapter = class {
|
|
|
1862
1878
|
function: {
|
|
1863
1879
|
name: tool.name,
|
|
1864
1880
|
description: tool.description,
|
|
1865
|
-
parameters:
|
|
1881
|
+
parameters: {
|
|
1882
|
+
type: "object",
|
|
1883
|
+
properties: { ...tool.parameters },
|
|
1884
|
+
required: tool.required ?? []
|
|
1885
|
+
}
|
|
1866
1886
|
}
|
|
1867
1887
|
}));
|
|
1868
1888
|
}
|
|
@@ -1884,11 +1904,17 @@ var OpenAIAdapter = class {
|
|
|
1884
1904
|
* prefixed by `data: `. The final line is `data: [DONE]`.
|
|
1885
1905
|
* Text content arrives in `choices[0].delta.content` and tool calls
|
|
1886
1906
|
* arrive in `choices[0].delta.tool_calls`.
|
|
1907
|
+
*
|
|
1908
|
+
* This method also:
|
|
1909
|
+
* - Detects content filtering and throws `ContentFilterError`.
|
|
1910
|
+
* - Tracks token usage (accessible via `lastUsage` after iteration).
|
|
1887
1911
|
*/
|
|
1888
1912
|
async *parseResponse(stream) {
|
|
1889
1913
|
const reader = stream.getReader();
|
|
1890
1914
|
const decoder = new TextDecoder();
|
|
1891
1915
|
let buffer = "";
|
|
1916
|
+
let doneEmitted = false;
|
|
1917
|
+
this._lastUsage = emptyUsage();
|
|
1892
1918
|
const pendingToolCalls = /* @__PURE__ */ new Map();
|
|
1893
1919
|
try {
|
|
1894
1920
|
while (true) {
|
|
@@ -1904,7 +1930,10 @@ var OpenAIAdapter = class {
|
|
|
1904
1930
|
if (jsonStr === "" || jsonStr === "[DONE]") {
|
|
1905
1931
|
if (jsonStr === "[DONE]") {
|
|
1906
1932
|
yield* this.flushPendingToolCalls(pendingToolCalls);
|
|
1907
|
-
|
|
1933
|
+
if (!doneEmitted) {
|
|
1934
|
+
doneEmitted = true;
|
|
1935
|
+
yield { text: "", done: true };
|
|
1936
|
+
}
|
|
1908
1937
|
}
|
|
1909
1938
|
continue;
|
|
1910
1939
|
}
|
|
@@ -1914,19 +1943,53 @@ var OpenAIAdapter = class {
|
|
|
1914
1943
|
} catch {
|
|
1915
1944
|
continue;
|
|
1916
1945
|
}
|
|
1917
|
-
|
|
1946
|
+
if (this.isContentFiltered(parsed)) {
|
|
1947
|
+
throw new ContentFilterError({
|
|
1948
|
+
code: ErrorCodes.CONTENT_FILTER_TRIGGERED,
|
|
1949
|
+
message: "Response was blocked by provider content safety filter.",
|
|
1950
|
+
provider: "openai",
|
|
1951
|
+
suggestion: "Rephrase your question or adjust safety settings."
|
|
1952
|
+
});
|
|
1953
|
+
}
|
|
1954
|
+
const chunkUsage = this.extractUsage(parsed);
|
|
1955
|
+
if (chunkUsage) {
|
|
1956
|
+
this._lastUsage = chunkUsage;
|
|
1957
|
+
}
|
|
1958
|
+
yield* this.extractChunks(parsed, pendingToolCalls, doneEmitted);
|
|
1959
|
+
if (!doneEmitted && this.lastExtractEmittedDone) {
|
|
1960
|
+
doneEmitted = true;
|
|
1961
|
+
}
|
|
1918
1962
|
}
|
|
1919
1963
|
}
|
|
1920
1964
|
if (buffer.trim().startsWith("data:")) {
|
|
1921
1965
|
const jsonStr = buffer.trim().slice(5).trim();
|
|
1922
1966
|
if (jsonStr === "[DONE]") {
|
|
1923
1967
|
yield* this.flushPendingToolCalls(pendingToolCalls);
|
|
1924
|
-
|
|
1968
|
+
if (!doneEmitted) {
|
|
1969
|
+
doneEmitted = true;
|
|
1970
|
+
yield { text: "", done: true };
|
|
1971
|
+
}
|
|
1925
1972
|
} else if (jsonStr !== "") {
|
|
1926
1973
|
try {
|
|
1927
1974
|
const parsed = JSON.parse(jsonStr);
|
|
1928
|
-
|
|
1929
|
-
|
|
1975
|
+
if (this.isContentFiltered(parsed)) {
|
|
1976
|
+
throw new ContentFilterError({
|
|
1977
|
+
code: ErrorCodes.CONTENT_FILTER_TRIGGERED,
|
|
1978
|
+
message: "Response was blocked by provider content safety filter.",
|
|
1979
|
+
provider: "openai",
|
|
1980
|
+
suggestion: "Rephrase your question or adjust safety settings."
|
|
1981
|
+
});
|
|
1982
|
+
}
|
|
1983
|
+
const chunkUsage = this.extractUsage(parsed);
|
|
1984
|
+
if (chunkUsage) {
|
|
1985
|
+
this._lastUsage = chunkUsage;
|
|
1986
|
+
}
|
|
1987
|
+
yield* this.extractChunks(parsed, pendingToolCalls, doneEmitted);
|
|
1988
|
+
if (!doneEmitted && this.lastExtractEmittedDone) {
|
|
1989
|
+
doneEmitted = true;
|
|
1990
|
+
}
|
|
1991
|
+
} catch (error) {
|
|
1992
|
+
if (error instanceof ContentFilterError) throw error;
|
|
1930
1993
|
}
|
|
1931
1994
|
}
|
|
1932
1995
|
}
|
|
@@ -1955,10 +2018,14 @@ var OpenAIAdapter = class {
|
|
|
1955
2018
|
* the raw Response object.
|
|
1956
2019
|
*/
|
|
1957
2020
|
async streamRequest(params) {
|
|
2021
|
+
const contentsArray = params.contents;
|
|
1958
2022
|
const messages = [
|
|
1959
2023
|
{ role: "system", content: params.systemPrompt },
|
|
1960
|
-
...
|
|
2024
|
+
...contentsArray
|
|
1961
2025
|
];
|
|
2026
|
+
if (params.userMessage) {
|
|
2027
|
+
messages.push({ role: "user", content: params.userMessage });
|
|
2028
|
+
}
|
|
1962
2029
|
const body = {
|
|
1963
2030
|
model: this.model,
|
|
1964
2031
|
messages,
|
|
@@ -2041,7 +2108,8 @@ var OpenAIAdapter = class {
|
|
|
2041
2108
|
* yield complete `ToolCall` objects when the finish_reason is 'tool_calls'
|
|
2042
2109
|
* or when flushed.
|
|
2043
2110
|
*/
|
|
2044
|
-
*extractChunks(parsed, pendingToolCalls) {
|
|
2111
|
+
*extractChunks(parsed, pendingToolCalls, doneEmitted) {
|
|
2112
|
+
this.lastExtractEmittedDone = false;
|
|
2045
2113
|
const choices = parsed.choices;
|
|
2046
2114
|
if (!choices || choices.length === 0) return;
|
|
2047
2115
|
for (const choice of choices) {
|
|
@@ -2075,7 +2143,8 @@ var OpenAIAdapter = class {
|
|
|
2075
2143
|
if (finishReason === "tool_calls") {
|
|
2076
2144
|
yield* this.flushPendingToolCalls(pendingToolCalls);
|
|
2077
2145
|
}
|
|
2078
|
-
if (finishReason === "stop") {
|
|
2146
|
+
if (finishReason === "stop" && !doneEmitted && !this.lastExtractEmittedDone) {
|
|
2147
|
+
this.lastExtractEmittedDone = true;
|
|
2079
2148
|
yield { text: "", done: true };
|
|
2080
2149
|
}
|
|
2081
2150
|
}
|
|
@@ -2091,7 +2160,8 @@ var OpenAIAdapter = class {
|
|
|
2091
2160
|
let args = {};
|
|
2092
2161
|
try {
|
|
2093
2162
|
args = JSON.parse(tc.argumentsJson);
|
|
2094
|
-
} catch {
|
|
2163
|
+
} catch (_e) {
|
|
2164
|
+
console.warn("[GuideKit:LLM] Failed to parse tool call arguments:", tc.argumentsJson);
|
|
2095
2165
|
}
|
|
2096
2166
|
yield {
|
|
2097
2167
|
id: tc.id,
|
|
@@ -2194,16 +2264,26 @@ var DEFAULT_SAFETY_SETTINGS = [
|
|
|
2194
2264
|
{ category: "HARM_CATEGORY_SEXUALLY_EXPLICIT", threshold: "BLOCK_ONLY_HIGH" },
|
|
2195
2265
|
{ category: "HARM_CATEGORY_DANGEROUS_CONTENT", threshold: "BLOCK_ONLY_HIGH" }
|
|
2196
2266
|
];
|
|
2197
|
-
function
|
|
2267
|
+
function emptyUsage2() {
|
|
2198
2268
|
return { prompt: 0, completion: 0, total: 0 };
|
|
2199
2269
|
}
|
|
2200
2270
|
var GeminiAdapter = class {
|
|
2201
2271
|
apiKey;
|
|
2202
2272
|
model;
|
|
2273
|
+
/**
|
|
2274
|
+
* Token usage extracted from the most recent `parseResponse` call.
|
|
2275
|
+
* Updated as each SSE chunk is parsed; the final value reflects the
|
|
2276
|
+
* cumulative usage metadata sent by Gemini (typically in the last chunk).
|
|
2277
|
+
*/
|
|
2278
|
+
_lastUsage = emptyUsage2();
|
|
2203
2279
|
constructor(config) {
|
|
2204
2280
|
this.apiKey = config.apiKey;
|
|
2205
2281
|
this.model = config.model ?? DEFAULT_GEMINI_MODEL;
|
|
2206
2282
|
}
|
|
2283
|
+
/** Token usage from the most recent parseResponse call. */
|
|
2284
|
+
get lastUsage() {
|
|
2285
|
+
return this._lastUsage;
|
|
2286
|
+
}
|
|
2207
2287
|
// -----------------------------------------------------------------------
|
|
2208
2288
|
// LLMProviderAdapter implementation
|
|
2209
2289
|
// -----------------------------------------------------------------------
|
|
@@ -2218,7 +2298,11 @@ var GeminiAdapter = class {
|
|
|
2218
2298
|
functionDeclarations: tools.map((tool) => ({
|
|
2219
2299
|
name: tool.name,
|
|
2220
2300
|
description: tool.description,
|
|
2221
|
-
parameters:
|
|
2301
|
+
parameters: {
|
|
2302
|
+
type: "object",
|
|
2303
|
+
properties: { ...tool.parameters },
|
|
2304
|
+
required: tool.required ?? []
|
|
2305
|
+
}
|
|
2222
2306
|
}))
|
|
2223
2307
|
}
|
|
2224
2308
|
];
|
|
@@ -2240,11 +2324,16 @@ var GeminiAdapter = class {
|
|
|
2240
2324
|
* The Gemini `streamGenerateContent?alt=sse` endpoint sends each chunk
|
|
2241
2325
|
* as a JSON object prefixed by `data: `. We parse line-by-line, extract
|
|
2242
2326
|
* text parts and function call parts, and yield the appropriate types.
|
|
2327
|
+
*
|
|
2328
|
+
* This method also:
|
|
2329
|
+
* - Detects content filtering and throws `ContentFilterError`.
|
|
2330
|
+
* - Tracks token usage (accessible via `lastUsage` after iteration).
|
|
2243
2331
|
*/
|
|
2244
2332
|
async *parseResponse(stream) {
|
|
2245
2333
|
const reader = stream.getReader();
|
|
2246
2334
|
const decoder = new TextDecoder();
|
|
2247
2335
|
let buffer = "";
|
|
2336
|
+
this._lastUsage = emptyUsage2();
|
|
2248
2337
|
try {
|
|
2249
2338
|
while (true) {
|
|
2250
2339
|
const { done, value } = await reader.read();
|
|
@@ -2263,6 +2352,18 @@ var GeminiAdapter = class {
|
|
|
2263
2352
|
} catch {
|
|
2264
2353
|
continue;
|
|
2265
2354
|
}
|
|
2355
|
+
if (this.isContentFiltered(parsed)) {
|
|
2356
|
+
throw new ContentFilterError({
|
|
2357
|
+
code: ErrorCodes.CONTENT_FILTER_TRIGGERED,
|
|
2358
|
+
message: "Response was blocked by provider content safety filter.",
|
|
2359
|
+
provider: "gemini",
|
|
2360
|
+
suggestion: "Rephrase your question or adjust safety settings."
|
|
2361
|
+
});
|
|
2362
|
+
}
|
|
2363
|
+
const chunkUsage = this.extractUsage(parsed);
|
|
2364
|
+
if (chunkUsage) {
|
|
2365
|
+
this._lastUsage = chunkUsage;
|
|
2366
|
+
}
|
|
2266
2367
|
yield* this.extractChunks(parsed);
|
|
2267
2368
|
}
|
|
2268
2369
|
}
|
|
@@ -2271,8 +2372,21 @@ var GeminiAdapter = class {
|
|
|
2271
2372
|
if (jsonStr !== "" && jsonStr !== "[DONE]") {
|
|
2272
2373
|
try {
|
|
2273
2374
|
const parsed = JSON.parse(jsonStr);
|
|
2375
|
+
if (this.isContentFiltered(parsed)) {
|
|
2376
|
+
throw new ContentFilterError({
|
|
2377
|
+
code: ErrorCodes.CONTENT_FILTER_TRIGGERED,
|
|
2378
|
+
message: "Response was blocked by provider content safety filter.",
|
|
2379
|
+
provider: "gemini",
|
|
2380
|
+
suggestion: "Rephrase your question or adjust safety settings."
|
|
2381
|
+
});
|
|
2382
|
+
}
|
|
2383
|
+
const chunkUsage = this.extractUsage(parsed);
|
|
2384
|
+
if (chunkUsage) {
|
|
2385
|
+
this._lastUsage = chunkUsage;
|
|
2386
|
+
}
|
|
2274
2387
|
yield* this.extractChunks(parsed);
|
|
2275
|
-
} catch {
|
|
2388
|
+
} catch (error) {
|
|
2389
|
+
if (error instanceof ContentFilterError) throw error;
|
|
2276
2390
|
}
|
|
2277
2391
|
}
|
|
2278
2392
|
}
|
|
@@ -2303,15 +2417,21 @@ var GeminiAdapter = class {
|
|
|
2303
2417
|
/**
|
|
2304
2418
|
* Build and execute a streaming request to the Gemini API.
|
|
2305
2419
|
* Returns the raw `ReadableStream` for the response body together with
|
|
2306
|
-
*
|
|
2420
|
+
* the raw Response object.
|
|
2421
|
+
*
|
|
2422
|
+
* Note: The Gemini API key is passed as a URL query parameter (`key=`).
|
|
2423
|
+
* This is inherent to the Gemini REST SSE endpoint design; the key is
|
|
2424
|
+
* transmitted over HTTPS so it remains encrypted in transit. (H3)
|
|
2307
2425
|
*/
|
|
2308
2426
|
async streamRequest(params) {
|
|
2427
|
+
const contentsArray = params.contents;
|
|
2428
|
+
const fullContents = params.userMessage ? [...contentsArray, { role: "user", parts: [{ text: params.userMessage }] }] : contentsArray;
|
|
2309
2429
|
const url = `${GEMINI_BASE_URL}/${this.model}:streamGenerateContent?alt=sse&key=${this.apiKey}`;
|
|
2310
2430
|
const body = {
|
|
2311
2431
|
systemInstruction: {
|
|
2312
2432
|
parts: [{ text: params.systemPrompt }]
|
|
2313
2433
|
},
|
|
2314
|
-
contents:
|
|
2434
|
+
contents: fullContents,
|
|
2315
2435
|
safetySettings: DEFAULT_SAFETY_SETTINGS,
|
|
2316
2436
|
generationConfig: {
|
|
2317
2437
|
temperature: 0.7,
|
|
@@ -2379,7 +2499,7 @@ var GeminiAdapter = class {
|
|
|
2379
2499
|
return { stream: response.body, response };
|
|
2380
2500
|
}
|
|
2381
2501
|
// -----------------------------------------------------------------------
|
|
2382
|
-
//
|
|
2502
|
+
// Public helpers (LLMProviderAdapter interface)
|
|
2383
2503
|
// -----------------------------------------------------------------------
|
|
2384
2504
|
/**
|
|
2385
2505
|
* Extract `TextChunk` and `ToolCall` items from a single parsed Gemini
|
|
@@ -2552,7 +2672,8 @@ var LLMOrchestrator = class {
|
|
|
2552
2672
|
updateConfig(config) {
|
|
2553
2673
|
this._config = config;
|
|
2554
2674
|
this._adapter = this.createAdapter(config);
|
|
2555
|
-
|
|
2675
|
+
const label = "provider" in config ? config.provider : "custom adapter";
|
|
2676
|
+
this.log(`Config updated: ${label}`);
|
|
2556
2677
|
}
|
|
2557
2678
|
/** Get the current provider adapter. */
|
|
2558
2679
|
get adapter() {
|
|
@@ -2563,139 +2684,42 @@ var LLMOrchestrator = class {
|
|
|
2563
2684
|
// -----------------------------------------------------------------------
|
|
2564
2685
|
/**
|
|
2565
2686
|
* Execute a streaming LLM request and collect the results.
|
|
2687
|
+
*
|
|
2688
|
+
* This method is fully adapter-agnostic: it delegates streaming,
|
|
2689
|
+
* response parsing, content-filter detection, and usage extraction
|
|
2690
|
+
* entirely to the active `LLMProviderAdapter`. No provider-specific
|
|
2691
|
+
* SSE parsing lives in the orchestrator.
|
|
2566
2692
|
*/
|
|
2567
2693
|
async executeStream(params, _isRetry) {
|
|
2568
|
-
const
|
|
2569
|
-
const historyContents =
|
|
2570
|
-
const
|
|
2571
|
-
|
|
2572
|
-
{ role: "user", parts: [{ text: params.userMessage }] }
|
|
2573
|
-
];
|
|
2574
|
-
const tools = params.tools && params.tools.length > 0 ? geminiAdapter.formatTools(params.tools) : void 0;
|
|
2575
|
-
const { stream } = await geminiAdapter.streamRequest({
|
|
2694
|
+
const adapter = this._adapter;
|
|
2695
|
+
const historyContents = adapter.formatConversation(params.history);
|
|
2696
|
+
const tools = params.tools && params.tools.length > 0 ? adapter.formatTools(params.tools) : void 0;
|
|
2697
|
+
const { stream } = await adapter.streamRequest({
|
|
2576
2698
|
systemPrompt: params.systemPrompt,
|
|
2577
|
-
contents,
|
|
2699
|
+
contents: historyContents,
|
|
2700
|
+
userMessage: params.userMessage,
|
|
2578
2701
|
tools,
|
|
2579
2702
|
signal: params.signal
|
|
2580
2703
|
});
|
|
2581
2704
|
let fullText = "";
|
|
2582
2705
|
const toolCalls = [];
|
|
2583
|
-
|
|
2584
|
-
|
|
2585
|
-
|
|
2586
|
-
|
|
2587
|
-
|
|
2588
|
-
|
|
2589
|
-
|
|
2590
|
-
|
|
2591
|
-
|
|
2592
|
-
buffer += decoder.decode(value, { stream: true });
|
|
2593
|
-
const lines = buffer.split("\n");
|
|
2594
|
-
buffer = lines.pop() ?? "";
|
|
2595
|
-
for (const line of lines) {
|
|
2596
|
-
const trimmed = line.trim();
|
|
2597
|
-
if (!trimmed.startsWith("data:")) continue;
|
|
2598
|
-
const jsonStr = trimmed.slice(5).trim();
|
|
2599
|
-
if (jsonStr === "" || jsonStr === "[DONE]") continue;
|
|
2600
|
-
let parsed;
|
|
2601
|
-
try {
|
|
2602
|
-
parsed = JSON.parse(jsonStr);
|
|
2603
|
-
} catch {
|
|
2604
|
-
continue;
|
|
2605
|
-
}
|
|
2606
|
-
if (geminiAdapter.isContentFiltered(parsed)) {
|
|
2607
|
-
wasContentFiltered = true;
|
|
2608
|
-
break;
|
|
2609
|
-
}
|
|
2610
|
-
const chunkUsage = geminiAdapter.extractUsage(parsed);
|
|
2611
|
-
if (chunkUsage) {
|
|
2612
|
-
usage = chunkUsage;
|
|
2613
|
-
}
|
|
2614
|
-
const candidates = parsed.candidates;
|
|
2615
|
-
if (!candidates || candidates.length === 0) continue;
|
|
2616
|
-
for (const candidate of candidates) {
|
|
2617
|
-
const content = candidate.content;
|
|
2618
|
-
if (!content?.parts) continue;
|
|
2619
|
-
const finishReason = candidate.finishReason;
|
|
2620
|
-
const isDone = finishReason === "STOP" || finishReason === "MAX_TOKENS";
|
|
2621
|
-
for (const part of content.parts) {
|
|
2622
|
-
if (typeof part.text === "string") {
|
|
2623
|
-
fullText += part.text;
|
|
2624
|
-
const chunk = { text: part.text, done: isDone };
|
|
2625
|
-
this.callbacks.onChunk?.(chunk);
|
|
2626
|
-
}
|
|
2627
|
-
if (part.functionCall) {
|
|
2628
|
-
const fc = part.functionCall;
|
|
2629
|
-
const toolCall = {
|
|
2630
|
-
id: fc.name,
|
|
2631
|
-
name: fc.name,
|
|
2632
|
-
arguments: fc.args ?? {}
|
|
2633
|
-
};
|
|
2634
|
-
toolCalls.push(toolCall);
|
|
2635
|
-
this.callbacks.onToolCall?.(toolCall);
|
|
2636
|
-
}
|
|
2637
|
-
}
|
|
2638
|
-
}
|
|
2639
|
-
}
|
|
2640
|
-
if (wasContentFiltered) break;
|
|
2641
|
-
}
|
|
2642
|
-
if (!wasContentFiltered && buffer.trim().startsWith("data:")) {
|
|
2643
|
-
const jsonStr = buffer.trim().slice(5).trim();
|
|
2644
|
-
if (jsonStr !== "" && jsonStr !== "[DONE]") {
|
|
2645
|
-
try {
|
|
2646
|
-
const parsed = JSON.parse(jsonStr);
|
|
2647
|
-
if (geminiAdapter.isContentFiltered(parsed)) {
|
|
2648
|
-
wasContentFiltered = true;
|
|
2649
|
-
} else {
|
|
2650
|
-
const chunkUsage = geminiAdapter.extractUsage(parsed);
|
|
2651
|
-
if (chunkUsage) usage = chunkUsage;
|
|
2652
|
-
const candidates = parsed.candidates;
|
|
2653
|
-
if (candidates) {
|
|
2654
|
-
for (const candidate of candidates) {
|
|
2655
|
-
const content = candidate.content;
|
|
2656
|
-
if (!content?.parts) continue;
|
|
2657
|
-
const finishReason = candidate.finishReason;
|
|
2658
|
-
const isDone = finishReason === "STOP" || finishReason === "MAX_TOKENS";
|
|
2659
|
-
for (const part of content.parts) {
|
|
2660
|
-
if (typeof part.text === "string") {
|
|
2661
|
-
fullText += part.text;
|
|
2662
|
-
const chunk = {
|
|
2663
|
-
text: part.text,
|
|
2664
|
-
done: isDone
|
|
2665
|
-
};
|
|
2666
|
-
this.callbacks.onChunk?.(chunk);
|
|
2667
|
-
}
|
|
2668
|
-
if (part.functionCall) {
|
|
2669
|
-
const fc = part.functionCall;
|
|
2670
|
-
const toolCall = {
|
|
2671
|
-
id: fc.name,
|
|
2672
|
-
name: fc.name,
|
|
2673
|
-
arguments: fc.args ?? {}
|
|
2674
|
-
};
|
|
2675
|
-
toolCalls.push(toolCall);
|
|
2676
|
-
this.callbacks.onToolCall?.(toolCall);
|
|
2677
|
-
}
|
|
2678
|
-
}
|
|
2679
|
-
}
|
|
2680
|
-
}
|
|
2681
|
-
}
|
|
2682
|
-
} catch {
|
|
2683
|
-
}
|
|
2706
|
+
for await (const item of adapter.parseResponse(stream)) {
|
|
2707
|
+
if ("name" in item && "arguments" in item) {
|
|
2708
|
+
const toolCall = item;
|
|
2709
|
+
toolCalls.push(toolCall);
|
|
2710
|
+
this.callbacks.onToolCall?.(toolCall);
|
|
2711
|
+
} else {
|
|
2712
|
+
const chunk = item;
|
|
2713
|
+
if (chunk.text) {
|
|
2714
|
+
fullText += chunk.text;
|
|
2684
2715
|
}
|
|
2716
|
+
this.callbacks.onChunk?.(chunk);
|
|
2685
2717
|
}
|
|
2686
|
-
} finally {
|
|
2687
|
-
reader.releaseLock();
|
|
2688
|
-
}
|
|
2689
|
-
if (wasContentFiltered) {
|
|
2690
|
-
throw new ContentFilterError({
|
|
2691
|
-
code: ErrorCodes.CONTENT_FILTER_TRIGGERED,
|
|
2692
|
-
message: "Response was blocked by Gemini content safety filter.",
|
|
2693
|
-
provider: "gemini",
|
|
2694
|
-
suggestion: "Rephrase your question or adjust safety settings."
|
|
2695
|
-
});
|
|
2696
2718
|
}
|
|
2697
|
-
|
|
2698
|
-
|
|
2719
|
+
this.callbacks.onChunk?.({ text: "", done: true });
|
|
2720
|
+
let usage = emptyUsage2();
|
|
2721
|
+
if ("lastUsage" in adapter) {
|
|
2722
|
+
usage = adapter.lastUsage;
|
|
2699
2723
|
}
|
|
2700
2724
|
if (usage.total > 0) {
|
|
2701
2725
|
this.callbacks.onTokenUsage?.(usage);
|
|
@@ -2707,25 +2731,30 @@ var LLMOrchestrator = class {
|
|
|
2707
2731
|
}
|
|
2708
2732
|
/**
|
|
2709
2733
|
* Create the appropriate adapter for the given config.
|
|
2710
|
-
*
|
|
2711
|
-
*
|
|
2734
|
+
*
|
|
2735
|
+
* Built-in providers:
|
|
2736
|
+
* - `'gemini'` — uses the bundled `GeminiAdapter`.
|
|
2737
|
+
*
|
|
2738
|
+
* Custom adapters:
|
|
2739
|
+
* - Pass `{ adapter: myAdapter }` to use any `LLMProviderAdapter`.
|
|
2740
|
+
* Example: `llm: { adapter: new OpenAIAdapter({ ... }) }`
|
|
2712
2741
|
*/
|
|
2713
2742
|
createAdapter(config) {
|
|
2743
|
+
if ("adapter" in config) {
|
|
2744
|
+
return config.adapter;
|
|
2745
|
+
}
|
|
2714
2746
|
switch (config.provider) {
|
|
2715
2747
|
case "gemini":
|
|
2716
2748
|
return new GeminiAdapter(config);
|
|
2717
|
-
case "openai":
|
|
2718
|
-
return new OpenAIAdapter(config);
|
|
2719
2749
|
default:
|
|
2720
2750
|
throw new Error(
|
|
2721
|
-
`LLM provider "${config.provider}" is not yet supported.
|
|
2751
|
+
`LLM provider "${config.provider}" is not yet supported. Use { adapter: yourAdapter } for custom providers.`
|
|
2722
2752
|
);
|
|
2723
2753
|
}
|
|
2724
2754
|
}
|
|
2725
2755
|
/** Convenience accessor for the current provider name. */
|
|
2726
2756
|
get providerName() {
|
|
2727
|
-
if (this._config
|
|
2728
|
-
if (this._config.provider === "openai") return "openai";
|
|
2757
|
+
if ("provider" in this._config) return this._config.provider;
|
|
2729
2758
|
return void 0;
|
|
2730
2759
|
}
|
|
2731
2760
|
/** Log a debug message if debug mode is enabled. */
|
|
@@ -2878,7 +2907,7 @@ var ToolExecutor = class {
|
|
|
2878
2907
|
break;
|
|
2879
2908
|
}
|
|
2880
2909
|
}
|
|
2881
|
-
if (rounds >= this.maxRounds
|
|
2910
|
+
if (rounds >= this.maxRounds) {
|
|
2882
2911
|
this.log(
|
|
2883
2912
|
`Max rounds (${this.maxRounds}) reached. Returning current text.`
|
|
2884
2913
|
);
|
|
@@ -2981,6 +3010,19 @@ var ToolExecutor = class {
|
|
|
2981
3010
|
return s.value;
|
|
2982
3011
|
}
|
|
2983
3012
|
const tc = toolCalls[i];
|
|
3013
|
+
if (!tc) {
|
|
3014
|
+
const errorMsg2 = s.reason instanceof Error ? s.reason.message : String(s.reason);
|
|
3015
|
+
return {
|
|
3016
|
+
toolCallId: `unknown-${i}`,
|
|
3017
|
+
record: {
|
|
3018
|
+
name: "unknown",
|
|
3019
|
+
args: {},
|
|
3020
|
+
result: void 0,
|
|
3021
|
+
durationMs: 0,
|
|
3022
|
+
error: errorMsg2
|
|
3023
|
+
}
|
|
3024
|
+
};
|
|
3025
|
+
}
|
|
2984
3026
|
const errorMsg = s.reason instanceof Error ? s.reason.message : String(s.reason);
|
|
2985
3027
|
return {
|
|
2986
3028
|
toolCallId: tc.id,
|
|
@@ -4173,64 +4215,62 @@ var DeepgramSTT = class {
|
|
|
4173
4215
|
}
|
|
4174
4216
|
};
|
|
4175
4217
|
|
|
4176
|
-
// src/voice/elevenlabs-
|
|
4177
|
-
var LOG_PREFIX7 = "[GuideKit:
|
|
4178
|
-
var
|
|
4179
|
-
var
|
|
4180
|
-
var
|
|
4181
|
-
var
|
|
4182
|
-
function
|
|
4183
|
-
const
|
|
4184
|
-
|
|
4185
|
-
|
|
4186
|
-
|
|
4187
|
-
bytes[i] = binaryString.charCodeAt(i);
|
|
4218
|
+
// src/voice/elevenlabs-stt.ts
|
|
4219
|
+
var LOG_PREFIX7 = "[GuideKit:ElevenLabs-STT]";
|
|
4220
|
+
var ELEVENLABS_STT_ENDPOINT = "wss://api.elevenlabs.io/v1/speech-to-text/realtime";
|
|
4221
|
+
var DEFAULT_LANGUAGE2 = "en";
|
|
4222
|
+
var INACTIVITY_TIMEOUT_S = 30;
|
|
4223
|
+
var SAMPLE_RATE = 16e3;
|
|
4224
|
+
function float32ToInt162(float32) {
|
|
4225
|
+
const int16 = new Int16Array(float32.length);
|
|
4226
|
+
for (let i = 0; i < float32.length; i++) {
|
|
4227
|
+
const s = Math.max(-1, Math.min(1, float32[i]));
|
|
4228
|
+
int16[i] = s < 0 ? s * 32768 : s * 32767;
|
|
4188
4229
|
}
|
|
4189
|
-
return
|
|
4230
|
+
return int16;
|
|
4190
4231
|
}
|
|
4191
|
-
|
|
4192
|
-
|
|
4232
|
+
function int16ToBase64(int16) {
|
|
4233
|
+
const bytes = new Uint8Array(int16.buffer);
|
|
4234
|
+
const CHUNK_SIZE = 8192;
|
|
4235
|
+
let binary = "";
|
|
4236
|
+
for (let i = 0; i < bytes.length; i += CHUNK_SIZE) {
|
|
4237
|
+
const chunk = bytes.subarray(i, i + CHUNK_SIZE);
|
|
4238
|
+
binary += String.fromCharCode(...chunk);
|
|
4239
|
+
}
|
|
4240
|
+
return btoa(binary);
|
|
4241
|
+
}
|
|
4242
|
+
var ElevenLabsSTT = class {
|
|
4243
|
+
// ---- Configuration -------------------------------------------------------
|
|
4193
4244
|
apiKey;
|
|
4194
|
-
|
|
4195
|
-
modelId;
|
|
4245
|
+
language;
|
|
4196
4246
|
debugEnabled;
|
|
4197
|
-
// ---- Internal state
|
|
4247
|
+
// ---- Internal state ------------------------------------------------------
|
|
4198
4248
|
wsManager = null;
|
|
4199
4249
|
_connected = false;
|
|
4200
4250
|
_suspended = false;
|
|
4201
|
-
/**
|
|
4202
|
-
|
|
4203
|
-
|
|
4204
|
-
* contain voice settings and the API key before any text chunks.
|
|
4205
|
-
*/
|
|
4206
|
-
bosSent = false;
|
|
4207
|
-
/** Registered audio-event callbacks. */
|
|
4208
|
-
audioCallbacks = /* @__PURE__ */ new Set();
|
|
4209
|
-
// -----------------------------------------------------------------------
|
|
4251
|
+
/** Registered transcript callbacks. */
|
|
4252
|
+
transcriptCallbacks = /* @__PURE__ */ new Set();
|
|
4253
|
+
// -------------------------------------------------------------------------
|
|
4210
4254
|
// Constructor
|
|
4211
|
-
//
|
|
4255
|
+
// -------------------------------------------------------------------------
|
|
4212
4256
|
constructor(options) {
|
|
4213
4257
|
this.apiKey = options.apiKey;
|
|
4214
|
-
this.
|
|
4215
|
-
this.modelId = options.modelId ?? DEFAULT_MODEL_ID;
|
|
4258
|
+
this.language = options.language ?? DEFAULT_LANGUAGE2;
|
|
4216
4259
|
this.debugEnabled = options.debug ?? false;
|
|
4217
|
-
this.log("
|
|
4218
|
-
voiceId: this.voiceId,
|
|
4219
|
-
modelId: this.modelId
|
|
4220
|
-
});
|
|
4260
|
+
this.log("ElevenLabsSTT created", { language: this.language });
|
|
4221
4261
|
}
|
|
4222
|
-
//
|
|
4262
|
+
// -------------------------------------------------------------------------
|
|
4223
4263
|
// Public API
|
|
4224
|
-
//
|
|
4264
|
+
// -------------------------------------------------------------------------
|
|
4225
4265
|
/** Whether the WebSocket is currently connected and ready. */
|
|
4226
4266
|
get isConnected() {
|
|
4227
4267
|
return this._connected;
|
|
4228
4268
|
}
|
|
4229
4269
|
/**
|
|
4230
|
-
* Open a WebSocket connection to
|
|
4270
|
+
* Open a WebSocket connection to ElevenLabs' real-time STT endpoint.
|
|
4231
4271
|
*
|
|
4232
|
-
* Resolves once the connection is established and the
|
|
4233
|
-
*
|
|
4272
|
+
* Resolves once the connection is established and the socket is ready to
|
|
4273
|
+
* receive audio frames. Rejects if the connection cannot be established.
|
|
4234
4274
|
*/
|
|
4235
4275
|
async connect() {
|
|
4236
4276
|
if (this._connected) {
|
|
@@ -4242,17 +4282,16 @@ var ElevenLabsTTS = class {
|
|
|
4242
4282
|
return;
|
|
4243
4283
|
}
|
|
4244
4284
|
const url = this.buildUrl();
|
|
4245
|
-
this.log("Connecting to", url);
|
|
4285
|
+
this.log("Connecting to", url.replace(this.apiKey, "***"));
|
|
4246
4286
|
this.wsManager = new WebSocketManager({
|
|
4247
4287
|
url,
|
|
4248
4288
|
protocols: [],
|
|
4249
4289
|
debug: this.debugEnabled,
|
|
4250
|
-
label: "ElevenLabs-
|
|
4290
|
+
label: "ElevenLabs-STT"
|
|
4251
4291
|
});
|
|
4252
4292
|
this.wsManager.onOpen(() => {
|
|
4253
4293
|
this._connected = true;
|
|
4254
|
-
this.
|
|
4255
|
-
this.log("Connected and BOS sent");
|
|
4294
|
+
this.log("Connected");
|
|
4256
4295
|
});
|
|
4257
4296
|
this.wsManager.onMessage((event) => {
|
|
4258
4297
|
this.handleMessage(event);
|
|
@@ -4267,67 +4306,54 @@ var ElevenLabsTTS = class {
|
|
|
4267
4306
|
return this.wsManager.connect();
|
|
4268
4307
|
}
|
|
4269
4308
|
/**
|
|
4270
|
-
* Send
|
|
4271
|
-
*
|
|
4272
|
-
* May be called multiple times to stream text incrementally. Each call
|
|
4273
|
-
* sends a text chunk with `try_trigger_generation: true` so ElevenLabs
|
|
4274
|
-
* can begin synthesising as soon as it has enough context.
|
|
4275
|
-
*
|
|
4276
|
-
* Call {@link flush} when the complete utterance has been sent.
|
|
4277
|
-
*/
|
|
4278
|
-
speak(text) {
|
|
4279
|
-
if (!this._connected || !this.wsManager || this._suspended) {
|
|
4280
|
-
this.log("Cannot speak \u2014 not connected or suspended");
|
|
4281
|
-
return;
|
|
4282
|
-
}
|
|
4283
|
-
if (!text) {
|
|
4284
|
-
return;
|
|
4285
|
-
}
|
|
4286
|
-
const message = JSON.stringify({
|
|
4287
|
-
text,
|
|
4288
|
-
try_trigger_generation: true
|
|
4289
|
-
});
|
|
4290
|
-
this.log("Sending text chunk:", text.slice(0, 80) + (text.length > 80 ? "..." : ""));
|
|
4291
|
-
this.wsManager.send(message);
|
|
4292
|
-
}
|
|
4293
|
-
/**
|
|
4294
|
-
* Signal the end of text input for the current utterance.
|
|
4309
|
+
* Send audio data to ElevenLabs for transcription.
|
|
4295
4310
|
*
|
|
4296
|
-
*
|
|
4297
|
-
*
|
|
4311
|
+
* Accepts either `Float32Array` (Web Audio API output) or `Int16Array`
|
|
4312
|
+
* (already encoded as linear16). Float32 data is automatically converted
|
|
4313
|
+
* to Int16 before encoding. Audio is sent as a base64-encoded JSON message.
|
|
4298
4314
|
*/
|
|
4299
|
-
|
|
4315
|
+
sendAudio(audioData) {
|
|
4300
4316
|
if (!this._connected || !this.wsManager || this._suspended) {
|
|
4301
|
-
this.log("Cannot flush \u2014 not connected or suspended");
|
|
4302
4317
|
return;
|
|
4303
4318
|
}
|
|
4304
|
-
const
|
|
4305
|
-
|
|
4306
|
-
this.wsManager.send(
|
|
4319
|
+
const int16 = audioData instanceof Float32Array ? float32ToInt162(audioData) : audioData;
|
|
4320
|
+
const base64 = int16ToBase64(int16);
|
|
4321
|
+
this.wsManager.send(
|
|
4322
|
+
JSON.stringify({
|
|
4323
|
+
type: "input_audio_chunk",
|
|
4324
|
+
audio: base64,
|
|
4325
|
+
sample_rate: SAMPLE_RATE
|
|
4326
|
+
})
|
|
4327
|
+
);
|
|
4307
4328
|
}
|
|
4308
4329
|
/**
|
|
4309
|
-
* Register a callback to receive
|
|
4330
|
+
* Register a callback to receive transcript events.
|
|
4310
4331
|
*
|
|
4311
4332
|
* @returns An unsubscribe function. Calling it more than once is safe.
|
|
4312
4333
|
*/
|
|
4313
|
-
|
|
4314
|
-
this.
|
|
4334
|
+
onTranscript(callback) {
|
|
4335
|
+
this.transcriptCallbacks.add(callback);
|
|
4315
4336
|
let removed = false;
|
|
4316
4337
|
return () => {
|
|
4317
4338
|
if (removed) return;
|
|
4318
4339
|
removed = true;
|
|
4319
|
-
this.
|
|
4340
|
+
this.transcriptCallbacks.delete(callback);
|
|
4320
4341
|
};
|
|
4321
4342
|
}
|
|
4322
|
-
/**
|
|
4343
|
+
/**
|
|
4344
|
+
* Gracefully close the connection.
|
|
4345
|
+
*
|
|
4346
|
+
* Sends a `commit_audio` message so ElevenLabs can finalise any pending
|
|
4347
|
+
* transcription before the socket is torn down.
|
|
4348
|
+
*/
|
|
4323
4349
|
close() {
|
|
4324
4350
|
if (!this._connected || !this.wsManager) {
|
|
4325
4351
|
this.log("Not connected \u2014 nothing to close");
|
|
4326
4352
|
return;
|
|
4327
4353
|
}
|
|
4328
|
-
this.log("
|
|
4354
|
+
this.log("Sending commit_audio and closing");
|
|
4329
4355
|
try {
|
|
4330
|
-
this.wsManager.send(JSON.stringify({
|
|
4356
|
+
this.wsManager.send(JSON.stringify({ type: "commit_audio" }));
|
|
4331
4357
|
} catch {
|
|
4332
4358
|
}
|
|
4333
4359
|
this.wsManager.close();
|
|
@@ -4341,14 +4367,13 @@ var ElevenLabsTTS = class {
|
|
|
4341
4367
|
this.wsManager = null;
|
|
4342
4368
|
}
|
|
4343
4369
|
this.cleanup();
|
|
4344
|
-
this.
|
|
4370
|
+
this.transcriptCallbacks.clear();
|
|
4345
4371
|
}
|
|
4346
4372
|
/**
|
|
4347
4373
|
* Suspend the adapter (e.g. when the device goes offline).
|
|
4348
4374
|
*
|
|
4349
|
-
* Marks the adapter as suspended so that
|
|
4350
|
-
*
|
|
4351
|
-
* will close it after an inactivity timeout if the network went away.
|
|
4375
|
+
* Marks the adapter as suspended so that incoming `sendAudio` calls are
|
|
4376
|
+
* silently dropped. The WebSocket itself is left open.
|
|
4352
4377
|
*/
|
|
4353
4378
|
suspend() {
|
|
4354
4379
|
if (this._suspended) return;
|
|
@@ -4356,54 +4381,22 @@ var ElevenLabsTTS = class {
|
|
|
4356
4381
|
this.log("Suspended");
|
|
4357
4382
|
}
|
|
4358
4383
|
/**
|
|
4359
|
-
* Resume after a prior `suspend()`.
|
|
4360
|
-
* still alive, the adapter returns to normal operation. If the connection
|
|
4361
|
-
* was lost while suspended, callers should `close()` / `destroy()` and
|
|
4362
|
-
* create a new instance.
|
|
4384
|
+
* Resume after a prior `suspend()`.
|
|
4363
4385
|
*/
|
|
4364
4386
|
resume() {
|
|
4365
4387
|
if (!this._suspended) return;
|
|
4366
4388
|
this._suspended = false;
|
|
4367
4389
|
this.log("Resumed");
|
|
4368
4390
|
}
|
|
4369
|
-
//
|
|
4370
|
-
// BOS handshake
|
|
4371
|
-
// -----------------------------------------------------------------------
|
|
4372
|
-
/**
|
|
4373
|
-
* Send the BOS (beginning-of-stream) message.
|
|
4374
|
-
*
|
|
4375
|
-
* This must be the very first message on a new WebSocket session. It
|
|
4376
|
-
* carries the API key and voice settings.
|
|
4377
|
-
*/
|
|
4378
|
-
sendBOS() {
|
|
4379
|
-
if (!this.wsManager || this.bosSent) {
|
|
4380
|
-
return;
|
|
4381
|
-
}
|
|
4382
|
-
const bos = JSON.stringify({
|
|
4383
|
-
text: " ",
|
|
4384
|
-
voice_settings: {
|
|
4385
|
-
stability: DEFAULT_STABILITY,
|
|
4386
|
-
similarity_boost: DEFAULT_SIMILARITY_BOOST
|
|
4387
|
-
},
|
|
4388
|
-
xi_api_key: this.apiKey
|
|
4389
|
-
});
|
|
4390
|
-
this.wsManager.send(bos);
|
|
4391
|
-
this.bosSent = true;
|
|
4392
|
-
this.log("BOS handshake sent");
|
|
4393
|
-
}
|
|
4394
|
-
// -----------------------------------------------------------------------
|
|
4391
|
+
// -------------------------------------------------------------------------
|
|
4395
4392
|
// Message handling
|
|
4396
|
-
//
|
|
4393
|
+
// -------------------------------------------------------------------------
|
|
4397
4394
|
/**
|
|
4398
|
-
* Parse incoming ElevenLabs JSON messages and emit
|
|
4399
|
-
*
|
|
4400
|
-
* ElevenLabs sends messages with the following shape:
|
|
4401
|
-
* ```json
|
|
4402
|
-
* { "audio": "base64encoded...", "isFinal": false }
|
|
4403
|
-
* ```
|
|
4395
|
+
* Parse incoming ElevenLabs JSON messages and emit transcript events.
|
|
4404
4396
|
*
|
|
4405
|
-
*
|
|
4406
|
-
*
|
|
4397
|
+
* ElevenLabs sends two transcript message types:
|
|
4398
|
+
* - `partial_transcript`: interim result, `isFinal = false`
|
|
4399
|
+
* - `committed_transcript`: final result, `isFinal = true`
|
|
4407
4400
|
*/
|
|
4408
4401
|
handleMessage(event) {
|
|
4409
4402
|
if (typeof event.data !== "string") {
|
|
@@ -4416,47 +4409,1026 @@ var ElevenLabsTTS = class {
|
|
|
4416
4409
|
this.log("Failed to parse message", event.data);
|
|
4417
4410
|
return;
|
|
4418
4411
|
}
|
|
4419
|
-
|
|
4420
|
-
|
|
4421
|
-
|
|
4412
|
+
const type = parsed["type"];
|
|
4413
|
+
if (type === "committed_transcript" || type === "partial_transcript") {
|
|
4414
|
+
this.handleTranscriptMessage(parsed, type === "committed_transcript");
|
|
4415
|
+
} else {
|
|
4416
|
+
this.log("Received message", type, parsed);
|
|
4417
|
+
}
|
|
4418
|
+
}
|
|
4419
|
+
/**
|
|
4420
|
+
* Extract transcript data from a transcript message and notify subscribers.
|
|
4421
|
+
*/
|
|
4422
|
+
handleTranscriptMessage(parsed, isFinal) {
|
|
4423
|
+
const result = parsed["result"];
|
|
4424
|
+
const text = result?.text ?? "";
|
|
4425
|
+
const confidence = result?.confidence ?? 0;
|
|
4426
|
+
if (text.trim() === "") {
|
|
4427
|
+
return;
|
|
4428
|
+
}
|
|
4429
|
+
const transcriptEvent = {
|
|
4430
|
+
text,
|
|
4431
|
+
isFinal,
|
|
4432
|
+
confidence,
|
|
4433
|
+
timestamp: Date.now()
|
|
4434
|
+
};
|
|
4435
|
+
this.log(
|
|
4436
|
+
isFinal ? "Final transcript:" : "Interim transcript:",
|
|
4437
|
+
text,
|
|
4438
|
+
`(${(confidence * 100).toFixed(1)}%)`
|
|
4439
|
+
);
|
|
4440
|
+
this.emitTranscript(transcriptEvent);
|
|
4441
|
+
}
|
|
4442
|
+
// -------------------------------------------------------------------------
|
|
4443
|
+
// Subscriber notification
|
|
4444
|
+
// -------------------------------------------------------------------------
|
|
4445
|
+
/**
|
|
4446
|
+
* Emit a transcript event to all registered callbacks.
|
|
4447
|
+
*
|
|
4448
|
+
* Errors thrown by individual callbacks are caught and logged so one
|
|
4449
|
+
* misbehaving subscriber does not prevent others from receiving the event.
|
|
4450
|
+
*/
|
|
4451
|
+
emitTranscript(event) {
|
|
4452
|
+
for (const cb of this.transcriptCallbacks) {
|
|
4453
|
+
try {
|
|
4454
|
+
cb(event);
|
|
4455
|
+
} catch (err) {
|
|
4456
|
+
console.error(LOG_PREFIX7, "Transcript callback threw:", err);
|
|
4457
|
+
}
|
|
4458
|
+
}
|
|
4459
|
+
}
|
|
4460
|
+
// -------------------------------------------------------------------------
|
|
4461
|
+
// URL building
|
|
4462
|
+
// -------------------------------------------------------------------------
|
|
4463
|
+
/** Build the ElevenLabs streaming STT endpoint URL with auth query params. */
|
|
4464
|
+
buildUrl() {
|
|
4465
|
+
const params = new URLSearchParams({
|
|
4466
|
+
xi_api_key: this.apiKey,
|
|
4467
|
+
language: this.language,
|
|
4468
|
+
inactivity_timeout: String(INACTIVITY_TIMEOUT_S)
|
|
4469
|
+
});
|
|
4470
|
+
return `${ELEVENLABS_STT_ENDPOINT}?${params.toString()}`;
|
|
4471
|
+
}
|
|
4472
|
+
// -------------------------------------------------------------------------
|
|
4473
|
+
// Cleanup
|
|
4474
|
+
// -------------------------------------------------------------------------
|
|
4475
|
+
/** Reset internal state after disconnection. */
|
|
4476
|
+
cleanup() {
|
|
4477
|
+
this._connected = false;
|
|
4478
|
+
}
|
|
4479
|
+
// -------------------------------------------------------------------------
|
|
4480
|
+
// Logging
|
|
4481
|
+
// -------------------------------------------------------------------------
|
|
4482
|
+
/** Conditional debug logging. */
|
|
4483
|
+
log(...args) {
|
|
4484
|
+
if (this.debugEnabled) {
|
|
4485
|
+
console.debug(LOG_PREFIX7, ...args);
|
|
4486
|
+
}
|
|
4487
|
+
}
|
|
4488
|
+
};
|
|
4489
|
+
|
|
4490
|
+
// src/voice/elevenlabs-tts.ts
|
|
4491
|
+
var LOG_PREFIX8 = "[GuideKit:TTS]";
|
|
4492
|
+
var DEFAULT_VOICE_ID = "21m00Tcm4TlvDq8ikWAM";
|
|
4493
|
+
var DEFAULT_MODEL_ID = "eleven_flash_v2_5";
|
|
4494
|
+
var DEFAULT_STABILITY = 0.5;
|
|
4495
|
+
var DEFAULT_SIMILARITY_BOOST = 0.75;
|
|
4496
|
+
function base64ToArrayBuffer(base64) {
|
|
4497
|
+
const binaryString = atob(base64);
|
|
4498
|
+
const length = binaryString.length;
|
|
4499
|
+
const bytes = new Uint8Array(length);
|
|
4500
|
+
for (let i = 0; i < length; i++) {
|
|
4501
|
+
bytes[i] = binaryString.charCodeAt(i);
|
|
4502
|
+
}
|
|
4503
|
+
return bytes.buffer;
|
|
4504
|
+
}
|
|
4505
|
+
var ElevenLabsTTS = class {
|
|
4506
|
+
// ---- Configuration ------------------------------------------------------
|
|
4507
|
+
apiKey;
|
|
4508
|
+
voiceId;
|
|
4509
|
+
modelId;
|
|
4510
|
+
debugEnabled;
|
|
4511
|
+
// ---- Internal state -----------------------------------------------------
|
|
4512
|
+
wsManager = null;
|
|
4513
|
+
_connected = false;
|
|
4514
|
+
_suspended = false;
|
|
4515
|
+
/**
|
|
4516
|
+
* Whether the BOS (beginning-of-stream) handshake has been sent for the
|
|
4517
|
+
* current WebSocket session. ElevenLabs requires the first message to
|
|
4518
|
+
* contain voice settings and the API key before any text chunks.
|
|
4519
|
+
*/
|
|
4520
|
+
bosSent = false;
|
|
4521
|
+
/** Registered audio-event callbacks. */
|
|
4522
|
+
audioCallbacks = /* @__PURE__ */ new Set();
|
|
4523
|
+
// -----------------------------------------------------------------------
|
|
4524
|
+
// Constructor
|
|
4525
|
+
// -----------------------------------------------------------------------
|
|
4526
|
+
constructor(options) {
|
|
4527
|
+
this.apiKey = options.apiKey;
|
|
4528
|
+
this.voiceId = options.voiceId ?? DEFAULT_VOICE_ID;
|
|
4529
|
+
this.modelId = options.modelId ?? DEFAULT_MODEL_ID;
|
|
4530
|
+
this.debugEnabled = options.debug ?? false;
|
|
4531
|
+
this.log("ElevenLabsTTS created", {
|
|
4532
|
+
voiceId: this.voiceId,
|
|
4533
|
+
modelId: this.modelId
|
|
4534
|
+
});
|
|
4535
|
+
}
|
|
4536
|
+
// -----------------------------------------------------------------------
|
|
4537
|
+
// Public API
|
|
4538
|
+
// -----------------------------------------------------------------------
|
|
4539
|
+
/** Whether the WebSocket is currently connected and ready. */
|
|
4540
|
+
get isConnected() {
|
|
4541
|
+
return this._connected;
|
|
4542
|
+
}
|
|
4543
|
+
/**
|
|
4544
|
+
* Open a WebSocket connection to the ElevenLabs streaming TTS endpoint.
|
|
4545
|
+
*
|
|
4546
|
+
* Resolves once the connection is established and the BOS handshake has
|
|
4547
|
+
* been sent. Rejects if the connection cannot be established.
|
|
4548
|
+
*/
|
|
4549
|
+
async connect() {
|
|
4550
|
+
if (this._connected) {
|
|
4551
|
+
this.log("Already connected \u2014 skipping");
|
|
4552
|
+
return;
|
|
4553
|
+
}
|
|
4554
|
+
if (typeof WebSocket === "undefined") {
|
|
4555
|
+
this.log("WebSocket API not available (SSR?) \u2014 cannot connect");
|
|
4556
|
+
return;
|
|
4557
|
+
}
|
|
4558
|
+
const url = this.buildUrl();
|
|
4559
|
+
this.log("Connecting to", url);
|
|
4560
|
+
this.wsManager = new WebSocketManager({
|
|
4561
|
+
url,
|
|
4562
|
+
protocols: [],
|
|
4563
|
+
debug: this.debugEnabled,
|
|
4564
|
+
label: "ElevenLabs-TTS"
|
|
4565
|
+
});
|
|
4566
|
+
this.wsManager.onOpen(() => {
|
|
4567
|
+
this._connected = true;
|
|
4568
|
+
this.sendBOS();
|
|
4569
|
+
this.log("Connected and BOS sent");
|
|
4570
|
+
});
|
|
4571
|
+
this.wsManager.onMessage((event) => {
|
|
4572
|
+
this.handleMessage(event);
|
|
4573
|
+
});
|
|
4574
|
+
this.wsManager.onClose((code, reason) => {
|
|
4575
|
+
this.log("Connection closed", { code, reason });
|
|
4576
|
+
this.cleanup();
|
|
4577
|
+
});
|
|
4578
|
+
this.wsManager.onError((event) => {
|
|
4579
|
+
this.log("WebSocket error", event);
|
|
4580
|
+
});
|
|
4581
|
+
return this.wsManager.connect();
|
|
4582
|
+
}
|
|
4583
|
+
/**
|
|
4584
|
+
* Send text to be synthesised into speech.
|
|
4585
|
+
*
|
|
4586
|
+
* May be called multiple times to stream text incrementally. Each call
|
|
4587
|
+
* sends a text chunk with `try_trigger_generation: true` so ElevenLabs
|
|
4588
|
+
* can begin synthesising as soon as it has enough context.
|
|
4589
|
+
*
|
|
4590
|
+
* Call {@link flush} when the complete utterance has been sent.
|
|
4591
|
+
*/
|
|
4592
|
+
speak(text) {
|
|
4593
|
+
if (!this._connected || !this.wsManager || this._suspended) {
|
|
4594
|
+
this.log("Cannot speak \u2014 not connected or suspended");
|
|
4595
|
+
return;
|
|
4596
|
+
}
|
|
4597
|
+
if (!text) {
|
|
4598
|
+
return;
|
|
4599
|
+
}
|
|
4600
|
+
const message = JSON.stringify({
|
|
4601
|
+
text,
|
|
4602
|
+
try_trigger_generation: true
|
|
4603
|
+
});
|
|
4604
|
+
this.log("Sending text chunk:", text.slice(0, 80) + (text.length > 80 ? "..." : ""));
|
|
4605
|
+
this.wsManager.send(message);
|
|
4606
|
+
}
|
|
4607
|
+
/**
|
|
4608
|
+
* Signal the end of text input for the current utterance.
|
|
4609
|
+
*
|
|
4610
|
+
* Sends the EOS (end-of-stream) marker to ElevenLabs. The server will
|
|
4611
|
+
* flush any remaining audio and send a final chunk with `isFinal: true`.
|
|
4612
|
+
*/
|
|
4613
|
+
flush() {
|
|
4614
|
+
if (!this._connected || !this.wsManager || this._suspended) {
|
|
4615
|
+
this.log("Cannot flush \u2014 not connected or suspended");
|
|
4616
|
+
return;
|
|
4617
|
+
}
|
|
4618
|
+
const message = JSON.stringify({ text: "" });
|
|
4619
|
+
this.log("Sending EOS (flush)");
|
|
4620
|
+
this.wsManager.send(message);
|
|
4621
|
+
}
|
|
4622
|
+
/**
|
|
4623
|
+
* Register a callback to receive audio output events.
|
|
4624
|
+
*
|
|
4625
|
+
* @returns An unsubscribe function. Calling it more than once is safe.
|
|
4626
|
+
*/
|
|
4627
|
+
onAudio(callback) {
|
|
4628
|
+
this.audioCallbacks.add(callback);
|
|
4629
|
+
let removed = false;
|
|
4630
|
+
return () => {
|
|
4631
|
+
if (removed) return;
|
|
4632
|
+
removed = true;
|
|
4633
|
+
this.audioCallbacks.delete(callback);
|
|
4634
|
+
};
|
|
4635
|
+
}
|
|
4636
|
+
/** Gracefully close the connection by sending EOS then closing. */
|
|
4637
|
+
close() {
|
|
4638
|
+
if (!this._connected || !this.wsManager) {
|
|
4639
|
+
this.log("Not connected \u2014 nothing to close");
|
|
4640
|
+
return;
|
|
4641
|
+
}
|
|
4642
|
+
this.log("Closing connection");
|
|
4643
|
+
try {
|
|
4644
|
+
this.wsManager.send(JSON.stringify({ text: "" }));
|
|
4645
|
+
} catch {
|
|
4646
|
+
}
|
|
4647
|
+
this.wsManager.close();
|
|
4648
|
+
this.cleanup();
|
|
4649
|
+
}
|
|
4650
|
+
/** Force-destroy the connection without a graceful handshake. */
|
|
4651
|
+
destroy() {
|
|
4652
|
+
this.log("Destroying");
|
|
4653
|
+
if (this.wsManager) {
|
|
4654
|
+
this.wsManager.destroy();
|
|
4655
|
+
this.wsManager = null;
|
|
4656
|
+
}
|
|
4657
|
+
this.cleanup();
|
|
4658
|
+
this.audioCallbacks.clear();
|
|
4659
|
+
}
|
|
4660
|
+
/**
|
|
4661
|
+
* Suspend the adapter (e.g. when the device goes offline).
|
|
4662
|
+
*
|
|
4663
|
+
* Marks the adapter as suspended so that calls to `speak()` and `flush()`
|
|
4664
|
+
* are silently dropped. The WebSocket itself is left open; ElevenLabs
|
|
4665
|
+
* will close it after an inactivity timeout if the network went away.
|
|
4666
|
+
*/
|
|
4667
|
+
suspend() {
|
|
4668
|
+
if (this._suspended) return;
|
|
4669
|
+
this._suspended = true;
|
|
4670
|
+
this.log("Suspended");
|
|
4671
|
+
}
|
|
4672
|
+
/**
|
|
4673
|
+
* Resume after a prior `suspend()`. If the underlying connection is
|
|
4674
|
+
* still alive, the adapter returns to normal operation. If the connection
|
|
4675
|
+
* was lost while suspended, callers should `close()` / `destroy()` and
|
|
4676
|
+
* create a new instance.
|
|
4677
|
+
*/
|
|
4678
|
+
resume() {
|
|
4679
|
+
if (!this._suspended) return;
|
|
4680
|
+
this._suspended = false;
|
|
4681
|
+
this.log("Resumed");
|
|
4682
|
+
}
|
|
4683
|
+
// -----------------------------------------------------------------------
|
|
4684
|
+
// BOS handshake
|
|
4685
|
+
// -----------------------------------------------------------------------
|
|
4686
|
+
/**
|
|
4687
|
+
* Send the BOS (beginning-of-stream) message.
|
|
4688
|
+
*
|
|
4689
|
+
* This must be the very first message on a new WebSocket session. It
|
|
4690
|
+
* carries the API key and voice settings.
|
|
4691
|
+
*/
|
|
4692
|
+
sendBOS() {
|
|
4693
|
+
if (!this.wsManager || this.bosSent) {
|
|
4694
|
+
return;
|
|
4695
|
+
}
|
|
4696
|
+
const bos = JSON.stringify({
|
|
4697
|
+
text: " ",
|
|
4698
|
+
voice_settings: {
|
|
4699
|
+
stability: DEFAULT_STABILITY,
|
|
4700
|
+
similarity_boost: DEFAULT_SIMILARITY_BOOST
|
|
4701
|
+
},
|
|
4702
|
+
xi_api_key: this.apiKey
|
|
4703
|
+
});
|
|
4704
|
+
this.wsManager.send(bos);
|
|
4705
|
+
this.bosSent = true;
|
|
4706
|
+
this.log("BOS handshake sent");
|
|
4707
|
+
}
|
|
4708
|
+
// -----------------------------------------------------------------------
|
|
4709
|
+
// Message handling
|
|
4710
|
+
// -----------------------------------------------------------------------
|
|
4711
|
+
/**
|
|
4712
|
+
* Parse incoming ElevenLabs JSON messages and emit audio events.
|
|
4713
|
+
*
|
|
4714
|
+
* ElevenLabs sends messages with the following shape:
|
|
4715
|
+
* ```json
|
|
4716
|
+
* { "audio": "base64encoded...", "isFinal": false }
|
|
4717
|
+
* ```
|
|
4718
|
+
*
|
|
4719
|
+
* When `isFinal` is `true`, the server has finished synthesising the
|
|
4720
|
+
* current utterance (i.e. after EOS was sent).
|
|
4721
|
+
*/
|
|
4722
|
+
handleMessage(event) {
|
|
4723
|
+
if (typeof event.data !== "string") {
|
|
4724
|
+
return;
|
|
4725
|
+
}
|
|
4726
|
+
let parsed;
|
|
4727
|
+
try {
|
|
4728
|
+
parsed = JSON.parse(event.data);
|
|
4729
|
+
} catch {
|
|
4730
|
+
this.log("Failed to parse message", event.data);
|
|
4731
|
+
return;
|
|
4732
|
+
}
|
|
4733
|
+
if (parsed["error"] !== void 0) {
|
|
4734
|
+
this.log("ElevenLabs error:", parsed["error"]);
|
|
4735
|
+
return;
|
|
4736
|
+
}
|
|
4737
|
+
if (parsed["audio"] === void 0 || parsed["audio"] === null) {
|
|
4738
|
+
this.log("Non-audio message received", parsed);
|
|
4739
|
+
return;
|
|
4740
|
+
}
|
|
4741
|
+
const audioBase64 = parsed["audio"];
|
|
4742
|
+
const isFinal = parsed["isFinal"] === true;
|
|
4743
|
+
if (!audioBase64 || audioBase64.length === 0) {
|
|
4744
|
+
if (isFinal) {
|
|
4745
|
+
this.emitAudio({
|
|
4746
|
+
audio: new ArrayBuffer(0),
|
|
4747
|
+
isFinal: true,
|
|
4748
|
+
timestamp: Date.now()
|
|
4749
|
+
});
|
|
4750
|
+
}
|
|
4751
|
+
return;
|
|
4752
|
+
}
|
|
4753
|
+
let audioBuffer;
|
|
4754
|
+
try {
|
|
4755
|
+
audioBuffer = base64ToArrayBuffer(audioBase64);
|
|
4756
|
+
} catch (err) {
|
|
4757
|
+
this.log("Failed to decode base64 audio", err);
|
|
4758
|
+
return;
|
|
4759
|
+
}
|
|
4760
|
+
const audioEvent = {
|
|
4761
|
+
audio: audioBuffer,
|
|
4762
|
+
isFinal,
|
|
4763
|
+
timestamp: Date.now()
|
|
4764
|
+
};
|
|
4765
|
+
this.log(
|
|
4766
|
+
isFinal ? "Final audio chunk:" : "Audio chunk:",
|
|
4767
|
+
`${audioBuffer.byteLength} bytes`
|
|
4768
|
+
);
|
|
4769
|
+
this.emitAudio(audioEvent);
|
|
4770
|
+
}
|
|
4771
|
+
// -----------------------------------------------------------------------
|
|
4772
|
+
// Subscriber notification
|
|
4773
|
+
// -----------------------------------------------------------------------
|
|
4774
|
+
/**
|
|
4775
|
+
* Emit an audio event to all registered callbacks.
|
|
4776
|
+
*
|
|
4777
|
+
* Errors thrown by individual callbacks are caught and logged so one
|
|
4778
|
+
* misbehaving subscriber does not prevent others from receiving the event.
|
|
4779
|
+
*/
|
|
4780
|
+
emitAudio(event) {
|
|
4781
|
+
for (const cb of this.audioCallbacks) {
|
|
4782
|
+
try {
|
|
4783
|
+
cb(event);
|
|
4784
|
+
} catch (err) {
|
|
4785
|
+
console.error(LOG_PREFIX8, "Audio callback threw:", err);
|
|
4786
|
+
}
|
|
4787
|
+
}
|
|
4788
|
+
}
|
|
4789
|
+
// -----------------------------------------------------------------------
|
|
4790
|
+
// URL building
|
|
4791
|
+
// -----------------------------------------------------------------------
|
|
4792
|
+
/** Build the ElevenLabs streaming TTS endpoint URL. */
|
|
4793
|
+
buildUrl() {
|
|
4794
|
+
const params = new URLSearchParams({
|
|
4795
|
+
model_id: this.modelId
|
|
4796
|
+
});
|
|
4797
|
+
return `wss://api.elevenlabs.io/v1/text-to-speech/${encodeURIComponent(this.voiceId)}/stream-input?${params.toString()}`;
|
|
4798
|
+
}
|
|
4799
|
+
// -----------------------------------------------------------------------
|
|
4800
|
+
// Cleanup
|
|
4801
|
+
// -----------------------------------------------------------------------
|
|
4802
|
+
/** Reset internal state after disconnection. */
|
|
4803
|
+
cleanup() {
|
|
4804
|
+
this._connected = false;
|
|
4805
|
+
this.bosSent = false;
|
|
4806
|
+
}
|
|
4807
|
+
// -----------------------------------------------------------------------
|
|
4808
|
+
// Logging
|
|
4809
|
+
// -----------------------------------------------------------------------
|
|
4810
|
+
/** Conditional debug logging. */
|
|
4811
|
+
log(...args) {
|
|
4812
|
+
if (this.debugEnabled) {
|
|
4813
|
+
console.debug(LOG_PREFIX8, ...args);
|
|
4814
|
+
}
|
|
4815
|
+
}
|
|
4816
|
+
};
|
|
4817
|
+
|
|
4818
|
+
// src/voice/web-speech-stt.ts
|
|
4819
|
+
var LOG_PREFIX9 = "[GuideKit:WebSpeech-STT]";
|
|
4820
|
+
var DEFAULT_LANGUAGE3 = "en-US";
|
|
4821
|
+
var WebSpeechSTT = class {
|
|
4822
|
+
// ---- Configuration -------------------------------------------------------
|
|
4823
|
+
language;
|
|
4824
|
+
continuous;
|
|
4825
|
+
interimResultsEnabled;
|
|
4826
|
+
debugEnabled;
|
|
4827
|
+
// ---- Internal state ------------------------------------------------------
|
|
4828
|
+
recognition = null;
|
|
4829
|
+
_connected = false;
|
|
4830
|
+
_suspended = false;
|
|
4831
|
+
/**
|
|
4832
|
+
* Whether we intentionally stopped recognition. Used to distinguish
|
|
4833
|
+
* between intentional stop and unexpected end (for auto-restart in
|
|
4834
|
+
* continuous mode).
|
|
4835
|
+
*/
|
|
4836
|
+
_intentionalStop = false;
|
|
4837
|
+
/** Registered transcript callbacks. */
|
|
4838
|
+
transcriptCallbacks = /* @__PURE__ */ new Set();
|
|
4839
|
+
// -------------------------------------------------------------------------
|
|
4840
|
+
// Constructor
|
|
4841
|
+
// -------------------------------------------------------------------------
|
|
4842
|
+
constructor(options = {}) {
|
|
4843
|
+
this.language = options.language ?? DEFAULT_LANGUAGE3;
|
|
4844
|
+
this.continuous = options.continuous ?? true;
|
|
4845
|
+
this.interimResultsEnabled = options.interimResults ?? true;
|
|
4846
|
+
this.debugEnabled = options.debug ?? false;
|
|
4847
|
+
this.log("WebSpeechSTT created", {
|
|
4848
|
+
language: this.language,
|
|
4849
|
+
continuous: this.continuous,
|
|
4850
|
+
interimResults: this.interimResultsEnabled
|
|
4851
|
+
});
|
|
4852
|
+
}
|
|
4853
|
+
// -------------------------------------------------------------------------
|
|
4854
|
+
// Static methods
|
|
4855
|
+
// -------------------------------------------------------------------------
|
|
4856
|
+
/**
|
|
4857
|
+
* Check whether the Web Speech API SpeechRecognition is supported in the
|
|
4858
|
+
* current environment. Safe to call in SSR (returns false).
|
|
4859
|
+
*/
|
|
4860
|
+
static isSupported() {
|
|
4861
|
+
if (typeof window === "undefined") return false;
|
|
4862
|
+
return typeof window["SpeechRecognition"] !== "undefined" || typeof globalThis.webkitSpeechRecognition !== "undefined";
|
|
4863
|
+
}
|
|
4864
|
+
// -------------------------------------------------------------------------
|
|
4865
|
+
// Public API
|
|
4866
|
+
// -------------------------------------------------------------------------
|
|
4867
|
+
/** Whether recognition is currently active and connected. */
|
|
4868
|
+
get isConnected() {
|
|
4869
|
+
return this._connected;
|
|
4870
|
+
}
|
|
4871
|
+
/**
|
|
4872
|
+
* Start speech recognition.
|
|
4873
|
+
*
|
|
4874
|
+
* Creates the SpeechRecognition instance and begins listening. Resolves
|
|
4875
|
+
* once the recognition session has started. Rejects if the API is not
|
|
4876
|
+
* supported or the browser denies permission.
|
|
4877
|
+
*/
|
|
4878
|
+
async connect() {
|
|
4879
|
+
if (this._connected) {
|
|
4880
|
+
this.log("Already connected \u2014 skipping");
|
|
4881
|
+
return;
|
|
4882
|
+
}
|
|
4883
|
+
if (typeof window === "undefined") {
|
|
4884
|
+
this.log("SSR environment detected \u2014 cannot connect");
|
|
4885
|
+
return;
|
|
4886
|
+
}
|
|
4887
|
+
const SpeechRecognitionClass = this.resolveSpeechRecognition();
|
|
4888
|
+
if (!SpeechRecognitionClass) {
|
|
4889
|
+
throw new Error(
|
|
4890
|
+
"Web Speech API (SpeechRecognition) is not supported in this browser."
|
|
4891
|
+
);
|
|
4892
|
+
}
|
|
4893
|
+
this.recognition = new SpeechRecognitionClass();
|
|
4894
|
+
this.recognition.lang = this.language;
|
|
4895
|
+
this.recognition.continuous = this.continuous;
|
|
4896
|
+
this.recognition.interimResults = this.interimResultsEnabled;
|
|
4897
|
+
this.recognition.maxAlternatives = 1;
|
|
4898
|
+
this.recognition.onstart = () => {
|
|
4899
|
+
this._connected = true;
|
|
4900
|
+
this._intentionalStop = false;
|
|
4901
|
+
this.log("Recognition started");
|
|
4902
|
+
};
|
|
4903
|
+
this.recognition.onresult = (event) => {
|
|
4904
|
+
this.handleResult(event);
|
|
4905
|
+
};
|
|
4906
|
+
this.recognition.onerror = (event) => {
|
|
4907
|
+
this.handleError(event);
|
|
4908
|
+
};
|
|
4909
|
+
this.recognition.onend = () => {
|
|
4910
|
+
this.log("Recognition ended");
|
|
4911
|
+
const wasConnected = this._connected;
|
|
4912
|
+
this._connected = false;
|
|
4913
|
+
if (this.continuous && !this._intentionalStop && !this._suspended && wasConnected) {
|
|
4914
|
+
this.log("Auto-restarting continuous recognition");
|
|
4915
|
+
try {
|
|
4916
|
+
this.recognition?.start();
|
|
4917
|
+
} catch {
|
|
4918
|
+
this.log("Failed to auto-restart recognition");
|
|
4919
|
+
}
|
|
4920
|
+
}
|
|
4921
|
+
};
|
|
4922
|
+
return new Promise((resolve, reject) => {
|
|
4923
|
+
const onStart = () => {
|
|
4924
|
+
cleanup();
|
|
4925
|
+
resolve();
|
|
4926
|
+
};
|
|
4927
|
+
const onError = (event) => {
|
|
4928
|
+
cleanup();
|
|
4929
|
+
reject(new Error(`SpeechRecognition error: ${event.error} \u2014 ${event.message}`));
|
|
4930
|
+
};
|
|
4931
|
+
const cleanup = () => {
|
|
4932
|
+
if (this.recognition) {
|
|
4933
|
+
this.recognition.removeEventListener("start", onStart);
|
|
4934
|
+
this.recognition.removeEventListener("error", onError);
|
|
4935
|
+
}
|
|
4936
|
+
};
|
|
4937
|
+
this.recognition.addEventListener("start", onStart, { once: true });
|
|
4938
|
+
this.recognition.addEventListener("error", onError, { once: true });
|
|
4939
|
+
try {
|
|
4940
|
+
this.recognition.start();
|
|
4941
|
+
} catch (err) {
|
|
4942
|
+
cleanup();
|
|
4943
|
+
reject(err);
|
|
4944
|
+
}
|
|
4945
|
+
});
|
|
4946
|
+
}
|
|
4947
|
+
/**
|
|
4948
|
+
* Send audio data. No-op for Web Speech API since it captures audio
|
|
4949
|
+
* directly from the microphone via the browser's internal pipeline.
|
|
4950
|
+
*
|
|
4951
|
+
* Provided for interface compatibility with WebSocket-based STT adapters
|
|
4952
|
+
* (DeepgramSTT, ElevenLabsSTT).
|
|
4953
|
+
*/
|
|
4954
|
+
sendAudio(_audioData) {
|
|
4955
|
+
}
|
|
4956
|
+
/**
|
|
4957
|
+
* Register a callback to receive transcript events.
|
|
4958
|
+
*
|
|
4959
|
+
* @returns An unsubscribe function. Calling it more than once is safe.
|
|
4960
|
+
*/
|
|
4961
|
+
onTranscript(callback) {
|
|
4962
|
+
this.transcriptCallbacks.add(callback);
|
|
4963
|
+
let removed = false;
|
|
4964
|
+
return () => {
|
|
4965
|
+
if (removed) return;
|
|
4966
|
+
removed = true;
|
|
4967
|
+
this.transcriptCallbacks.delete(callback);
|
|
4968
|
+
};
|
|
4969
|
+
}
|
|
4970
|
+
/**
|
|
4971
|
+
* Gracefully stop recognition.
|
|
4972
|
+
*
|
|
4973
|
+
* Calls `stop()` on the SpeechRecognition instance which allows it to
|
|
4974
|
+
* deliver any pending final results before ending.
|
|
4975
|
+
*/
|
|
4976
|
+
close() {
|
|
4977
|
+
if (!this.recognition) {
|
|
4978
|
+
this.log("Not connected \u2014 nothing to close");
|
|
4979
|
+
return;
|
|
4980
|
+
}
|
|
4981
|
+
this.log("Closing recognition");
|
|
4982
|
+
this._intentionalStop = true;
|
|
4983
|
+
try {
|
|
4984
|
+
this.recognition.stop();
|
|
4985
|
+
} catch {
|
|
4986
|
+
}
|
|
4987
|
+
this.cleanup();
|
|
4988
|
+
}
|
|
4989
|
+
/** Force-destroy the recognition without waiting for pending results. */
|
|
4990
|
+
destroy() {
|
|
4991
|
+
this.log("Destroying");
|
|
4992
|
+
this._intentionalStop = true;
|
|
4993
|
+
if (this.recognition) {
|
|
4994
|
+
try {
|
|
4995
|
+
this.recognition.abort();
|
|
4996
|
+
} catch {
|
|
4997
|
+
}
|
|
4998
|
+
this.recognition.onresult = null;
|
|
4999
|
+
this.recognition.onerror = null;
|
|
5000
|
+
this.recognition.onend = null;
|
|
5001
|
+
this.recognition.onstart = null;
|
|
5002
|
+
this.recognition = null;
|
|
5003
|
+
}
|
|
5004
|
+
this.cleanup();
|
|
5005
|
+
this.transcriptCallbacks.clear();
|
|
5006
|
+
}
|
|
5007
|
+
/**
|
|
5008
|
+
* Suspend the adapter (e.g. when the device goes offline).
|
|
5009
|
+
*
|
|
5010
|
+
* Stops recognition and marks the adapter as suspended so that auto-restart
|
|
5011
|
+
* does not trigger.
|
|
5012
|
+
*/
|
|
5013
|
+
suspend() {
|
|
5014
|
+
if (this._suspended) return;
|
|
5015
|
+
this._suspended = true;
|
|
5016
|
+
this._intentionalStop = true;
|
|
5017
|
+
if (this.recognition && this._connected) {
|
|
5018
|
+
try {
|
|
5019
|
+
this.recognition.stop();
|
|
5020
|
+
} catch {
|
|
5021
|
+
}
|
|
5022
|
+
}
|
|
5023
|
+
this.log("Suspended");
|
|
5024
|
+
}
|
|
5025
|
+
/**
|
|
5026
|
+
* Resume after a prior `suspend()`. Restarts recognition if it was
|
|
5027
|
+
* running before suspension.
|
|
5028
|
+
*/
|
|
5029
|
+
resume() {
|
|
5030
|
+
if (!this._suspended) return;
|
|
5031
|
+
this._suspended = false;
|
|
5032
|
+
this._intentionalStop = false;
|
|
5033
|
+
this.log("Resumed");
|
|
5034
|
+
if (this.recognition && !this._connected) {
|
|
5035
|
+
try {
|
|
5036
|
+
this.recognition.start();
|
|
5037
|
+
} catch {
|
|
5038
|
+
this.log("Failed to restart recognition after resume");
|
|
5039
|
+
}
|
|
5040
|
+
}
|
|
5041
|
+
}
|
|
5042
|
+
// -------------------------------------------------------------------------
|
|
5043
|
+
// Result handling
|
|
5044
|
+
// -------------------------------------------------------------------------
|
|
5045
|
+
/**
|
|
5046
|
+
* Handle SpeechRecognition result events.
|
|
5047
|
+
*
|
|
5048
|
+
* The `results` property is a SpeechRecognitionResultList containing all
|
|
5049
|
+
* results accumulated during this recognition session. We only process
|
|
5050
|
+
* results from `resultIndex` onward to avoid re-emitting old results.
|
|
5051
|
+
*/
|
|
5052
|
+
handleResult(event) {
|
|
5053
|
+
for (let i = event.resultIndex; i < event.results.length; i++) {
|
|
5054
|
+
const result = event.results[i];
|
|
5055
|
+
if (!result) continue;
|
|
5056
|
+
const alternative = result[0];
|
|
5057
|
+
if (!alternative) continue;
|
|
5058
|
+
const transcript = alternative.transcript;
|
|
5059
|
+
if (!transcript || transcript.trim() === "") continue;
|
|
5060
|
+
const isFinal = result.isFinal;
|
|
5061
|
+
const confidence = alternative.confidence > 0 ? alternative.confidence : 0.85;
|
|
5062
|
+
const transcriptEvent = {
|
|
5063
|
+
text: transcript,
|
|
5064
|
+
isFinal,
|
|
5065
|
+
confidence,
|
|
5066
|
+
timestamp: Date.now()
|
|
5067
|
+
};
|
|
5068
|
+
this.log(
|
|
5069
|
+
isFinal ? "Final transcript:" : "Interim transcript:",
|
|
5070
|
+
transcript,
|
|
5071
|
+
`(${(confidence * 100).toFixed(1)}%)`
|
|
5072
|
+
);
|
|
5073
|
+
this.emitTranscript(transcriptEvent);
|
|
5074
|
+
}
|
|
5075
|
+
}
|
|
5076
|
+
// -------------------------------------------------------------------------
|
|
5077
|
+
// Error handling
|
|
5078
|
+
// -------------------------------------------------------------------------
|
|
5079
|
+
/**
|
|
5080
|
+
* Handle SpeechRecognition errors.
|
|
5081
|
+
*
|
|
5082
|
+
* Some errors are recoverable (e.g. `no-speech`) and some are fatal
|
|
5083
|
+
* (e.g. `not-allowed`). For recoverable errors in continuous mode,
|
|
5084
|
+
* recognition will auto-restart via the `onend` handler.
|
|
5085
|
+
*/
|
|
5086
|
+
handleError(event) {
|
|
5087
|
+
const errorType = event.error;
|
|
5088
|
+
this.log("Recognition error:", errorType, event.message);
|
|
5089
|
+
if (errorType === "no-speech" || errorType === "aborted") {
|
|
5090
|
+
this.log("Non-fatal error \u2014 will recover");
|
|
5091
|
+
return;
|
|
5092
|
+
}
|
|
5093
|
+
if (errorType === "network") {
|
|
5094
|
+
this.log("Network error \u2014 recognition may auto-restart");
|
|
5095
|
+
return;
|
|
5096
|
+
}
|
|
5097
|
+
if (errorType === "not-allowed" || errorType === "service-not-allowed" || errorType === "language-not-supported") {
|
|
5098
|
+
this._intentionalStop = true;
|
|
5099
|
+
this.log("Fatal recognition error \u2014 stopping");
|
|
5100
|
+
}
|
|
5101
|
+
}
|
|
5102
|
+
// -------------------------------------------------------------------------
|
|
5103
|
+
// Subscriber notification
|
|
5104
|
+
// -------------------------------------------------------------------------
|
|
5105
|
+
/**
|
|
5106
|
+
* Emit a transcript event to all registered callbacks.
|
|
5107
|
+
*
|
|
5108
|
+
* Errors thrown by individual callbacks are caught and logged so one
|
|
5109
|
+
* misbehaving subscriber does not prevent others from receiving the event.
|
|
5110
|
+
*/
|
|
5111
|
+
emitTranscript(event) {
|
|
5112
|
+
for (const cb of this.transcriptCallbacks) {
|
|
5113
|
+
try {
|
|
5114
|
+
cb(event);
|
|
5115
|
+
} catch (err) {
|
|
5116
|
+
console.error(LOG_PREFIX9, "Transcript callback threw:", err);
|
|
5117
|
+
}
|
|
5118
|
+
}
|
|
5119
|
+
}
|
|
5120
|
+
// -------------------------------------------------------------------------
|
|
5121
|
+
// SpeechRecognition resolution
|
|
5122
|
+
// -------------------------------------------------------------------------
|
|
5123
|
+
/**
|
|
5124
|
+
* Resolve the SpeechRecognition constructor, with the webkit-prefixed
|
|
5125
|
+
* fallback. Returns null if not available.
|
|
5126
|
+
*/
|
|
5127
|
+
resolveSpeechRecognition() {
|
|
5128
|
+
if (typeof window === "undefined") return null;
|
|
5129
|
+
const win = window;
|
|
5130
|
+
if (typeof win["SpeechRecognition"] !== "undefined") {
|
|
5131
|
+
return win["SpeechRecognition"];
|
|
5132
|
+
}
|
|
5133
|
+
if (typeof globalThis.webkitSpeechRecognition !== "undefined") {
|
|
5134
|
+
return globalThis.webkitSpeechRecognition;
|
|
5135
|
+
}
|
|
5136
|
+
return null;
|
|
5137
|
+
}
|
|
5138
|
+
// -------------------------------------------------------------------------
|
|
5139
|
+
// Cleanup
|
|
5140
|
+
// -------------------------------------------------------------------------
|
|
5141
|
+
/** Reset internal state after disconnection. */
|
|
5142
|
+
cleanup() {
|
|
5143
|
+
this._connected = false;
|
|
5144
|
+
}
|
|
5145
|
+
// -------------------------------------------------------------------------
|
|
5146
|
+
// Logging
|
|
5147
|
+
// -------------------------------------------------------------------------
|
|
5148
|
+
/** Conditional debug logging. */
|
|
5149
|
+
log(...args) {
|
|
5150
|
+
if (this.debugEnabled) {
|
|
5151
|
+
console.debug(LOG_PREFIX9, ...args);
|
|
5152
|
+
}
|
|
5153
|
+
}
|
|
5154
|
+
};
|
|
5155
|
+
|
|
5156
|
+
// src/voice/web-speech-tts.ts
|
|
5157
|
+
var LOG_PREFIX10 = "[GuideKit:WebSpeech-TTS]";
|
|
5158
|
+
var DEFAULT_RATE = 1;
|
|
5159
|
+
var DEFAULT_PITCH = 1;
|
|
5160
|
+
var DEFAULT_LANGUAGE4 = "en-US";
|
|
5161
|
+
var WebSpeechTTS = class {
|
|
5162
|
+
// ---- Configuration -------------------------------------------------------
|
|
5163
|
+
voiceName;
|
|
5164
|
+
rate;
|
|
5165
|
+
pitch;
|
|
5166
|
+
language;
|
|
5167
|
+
debugEnabled;
|
|
5168
|
+
// ---- Internal state ------------------------------------------------------
|
|
5169
|
+
_connected = false;
|
|
5170
|
+
_suspended = false;
|
|
5171
|
+
/** Cached voice object resolved from voiceName. */
|
|
5172
|
+
_resolvedVoice = null;
|
|
5173
|
+
/** Whether voices have been loaded (they load async in some browsers). */
|
|
5174
|
+
_voicesLoaded = false;
|
|
5175
|
+
/** Registered audio-event callbacks. */
|
|
5176
|
+
audioCallbacks = /* @__PURE__ */ new Set();
|
|
5177
|
+
// -------------------------------------------------------------------------
|
|
5178
|
+
// Constructor
|
|
5179
|
+
// -------------------------------------------------------------------------
|
|
5180
|
+
constructor(options = {}) {
|
|
5181
|
+
this.voiceName = options.voice ?? null;
|
|
5182
|
+
this.rate = options.rate ?? DEFAULT_RATE;
|
|
5183
|
+
this.pitch = options.pitch ?? DEFAULT_PITCH;
|
|
5184
|
+
this.language = options.language ?? DEFAULT_LANGUAGE4;
|
|
5185
|
+
this.debugEnabled = options.debug ?? false;
|
|
5186
|
+
this.log("WebSpeechTTS created", {
|
|
5187
|
+
voice: this.voiceName,
|
|
5188
|
+
rate: this.rate,
|
|
5189
|
+
pitch: this.pitch,
|
|
5190
|
+
language: this.language
|
|
5191
|
+
});
|
|
5192
|
+
}
|
|
5193
|
+
// -------------------------------------------------------------------------
|
|
5194
|
+
// Static methods
|
|
5195
|
+
// -------------------------------------------------------------------------
|
|
5196
|
+
/**
|
|
5197
|
+
* Check whether the Web Speech API SpeechSynthesis is supported in the
|
|
5198
|
+
* current environment. Safe to call in SSR (returns false).
|
|
5199
|
+
*/
|
|
5200
|
+
static isSupported() {
|
|
5201
|
+
if (typeof window === "undefined") return false;
|
|
5202
|
+
return typeof window.speechSynthesis !== "undefined";
|
|
5203
|
+
}
|
|
5204
|
+
// -------------------------------------------------------------------------
|
|
5205
|
+
// Public API
|
|
5206
|
+
// -------------------------------------------------------------------------
|
|
5207
|
+
/** Whether the adapter is connected (ready for speech). */
|
|
5208
|
+
get isConnected() {
|
|
5209
|
+
return this._connected;
|
|
5210
|
+
}
|
|
5211
|
+
/**
|
|
5212
|
+
* Initialize the adapter.
|
|
5213
|
+
*
|
|
5214
|
+
* Loads available voices and resolves the requested voice name. Voice
|
|
5215
|
+
* loading is async in some browsers (notably Chrome) so we wait for
|
|
5216
|
+
* the `voiceschanged` event if needed.
|
|
5217
|
+
*/
|
|
5218
|
+
async connect() {
|
|
5219
|
+
if (this._connected) {
|
|
5220
|
+
this.log("Already connected \u2014 skipping");
|
|
5221
|
+
return;
|
|
5222
|
+
}
|
|
5223
|
+
if (typeof window === "undefined" || typeof window.speechSynthesis === "undefined") {
|
|
5224
|
+
this.log("SpeechSynthesis not available \u2014 cannot connect");
|
|
5225
|
+
return;
|
|
5226
|
+
}
|
|
5227
|
+
await this.loadVoices();
|
|
5228
|
+
if (this.voiceName) {
|
|
5229
|
+
this._resolvedVoice = this.findVoice(this.voiceName);
|
|
5230
|
+
if (this._resolvedVoice) {
|
|
5231
|
+
this.log("Resolved voice:", this._resolvedVoice.name);
|
|
5232
|
+
} else {
|
|
5233
|
+
this.log("Requested voice not found:", this.voiceName, "\u2014 using browser default");
|
|
5234
|
+
}
|
|
5235
|
+
}
|
|
5236
|
+
this._connected = true;
|
|
5237
|
+
this.log("Connected");
|
|
5238
|
+
}
|
|
5239
|
+
/**
|
|
5240
|
+
* Speak the given text using the browser's speech synthesis engine.
|
|
5241
|
+
*
|
|
5242
|
+
* Returns a Promise that resolves when the utterance completes or is
|
|
5243
|
+
* cancelled. Rejects if an error occurs during synthesis.
|
|
5244
|
+
*
|
|
5245
|
+
* Also emits audio events to registered callbacks for VoicePipeline
|
|
5246
|
+
* compatibility.
|
|
5247
|
+
*/
|
|
5248
|
+
speak(text) {
|
|
5249
|
+
if (!this._connected || this._suspended) {
|
|
5250
|
+
this.log("Cannot speak \u2014 not connected or suspended");
|
|
5251
|
+
return;
|
|
5252
|
+
}
|
|
5253
|
+
if (!text || !text.trim()) {
|
|
5254
|
+
return;
|
|
5255
|
+
}
|
|
5256
|
+
if (typeof window === "undefined" || typeof window.speechSynthesis === "undefined") {
|
|
5257
|
+
return;
|
|
5258
|
+
}
|
|
5259
|
+
const synth = window.speechSynthesis;
|
|
5260
|
+
const utterance = new SpeechSynthesisUtterance(text);
|
|
5261
|
+
utterance.lang = this.language;
|
|
5262
|
+
utterance.rate = this.rate;
|
|
5263
|
+
utterance.pitch = this.pitch;
|
|
5264
|
+
if (this._resolvedVoice) {
|
|
5265
|
+
utterance.voice = this._resolvedVoice;
|
|
5266
|
+
}
|
|
5267
|
+
utterance.onstart = () => {
|
|
5268
|
+
this.log("Utterance started:", text.slice(0, 80) + (text.length > 80 ? "..." : ""));
|
|
5269
|
+
this.emitAudio({
|
|
5270
|
+
audio: new ArrayBuffer(0),
|
|
5271
|
+
isFinal: false,
|
|
5272
|
+
timestamp: Date.now()
|
|
5273
|
+
});
|
|
5274
|
+
};
|
|
5275
|
+
utterance.onend = () => {
|
|
5276
|
+
this.log("Utterance ended");
|
|
5277
|
+
this.emitAudio({
|
|
5278
|
+
audio: new ArrayBuffer(0),
|
|
5279
|
+
isFinal: true,
|
|
5280
|
+
timestamp: Date.now()
|
|
5281
|
+
});
|
|
5282
|
+
};
|
|
5283
|
+
utterance.onerror = (event) => {
|
|
5284
|
+
if (event.error === "canceled") {
|
|
5285
|
+
this.log("Utterance cancelled");
|
|
5286
|
+
this.emitAudio({
|
|
5287
|
+
audio: new ArrayBuffer(0),
|
|
5288
|
+
isFinal: true,
|
|
5289
|
+
timestamp: Date.now()
|
|
5290
|
+
});
|
|
5291
|
+
return;
|
|
5292
|
+
}
|
|
5293
|
+
this.log("Utterance error:", event.error);
|
|
5294
|
+
this.emitAudio({
|
|
5295
|
+
audio: new ArrayBuffer(0),
|
|
5296
|
+
isFinal: true,
|
|
5297
|
+
timestamp: Date.now()
|
|
5298
|
+
});
|
|
5299
|
+
};
|
|
5300
|
+
this.log("Speaking:", text.slice(0, 80) + (text.length > 80 ? "..." : ""));
|
|
5301
|
+
synth.speak(utterance);
|
|
5302
|
+
}
|
|
5303
|
+
/**
|
|
5304
|
+
* Flush / finalize the current utterance.
|
|
5305
|
+
*
|
|
5306
|
+
* No-op for Web Speech API since each speak() call is a complete
|
|
5307
|
+
* utterance. Provided for interface compatibility with ElevenLabsTTS.
|
|
5308
|
+
*/
|
|
5309
|
+
flush() {
|
|
5310
|
+
}
|
|
5311
|
+
/**
|
|
5312
|
+
* Register a callback to receive audio output events.
|
|
5313
|
+
*
|
|
5314
|
+
* For Web Speech API, these events have empty audio buffers and are
|
|
5315
|
+
* used to signal utterance start/end for VoicePipeline state management.
|
|
5316
|
+
*
|
|
5317
|
+
* @returns An unsubscribe function. Calling it more than once is safe.
|
|
5318
|
+
*/
|
|
5319
|
+
onAudio(callback) {
|
|
5320
|
+
this.audioCallbacks.add(callback);
|
|
5321
|
+
let removed = false;
|
|
5322
|
+
return () => {
|
|
5323
|
+
if (removed) return;
|
|
5324
|
+
removed = true;
|
|
5325
|
+
this.audioCallbacks.delete(callback);
|
|
5326
|
+
};
|
|
5327
|
+
}
|
|
5328
|
+
/** Stop current speech synthesis and cancel any queued utterances. */
|
|
5329
|
+
stop() {
|
|
5330
|
+
if (typeof window === "undefined" || typeof window.speechSynthesis === "undefined") {
|
|
5331
|
+
return;
|
|
5332
|
+
}
|
|
5333
|
+
this.log("Stopping speech synthesis");
|
|
5334
|
+
window.speechSynthesis.cancel();
|
|
5335
|
+
}
|
|
5336
|
+
/** Gracefully close the adapter. */
|
|
5337
|
+
close() {
|
|
5338
|
+
this.log("Closing");
|
|
5339
|
+
this.stop();
|
|
5340
|
+
this.cleanup();
|
|
5341
|
+
}
|
|
5342
|
+
/** Force-destroy the adapter. */
|
|
5343
|
+
destroy() {
|
|
5344
|
+
this.log("Destroying");
|
|
5345
|
+
this.stop();
|
|
5346
|
+
this.cleanup();
|
|
5347
|
+
this.audioCallbacks.clear();
|
|
5348
|
+
}
|
|
5349
|
+
/**
|
|
5350
|
+
* Suspend the adapter (e.g. when the device goes offline).
|
|
5351
|
+
*
|
|
5352
|
+
* Pauses any active speech synthesis and marks the adapter as suspended.
|
|
5353
|
+
*/
|
|
5354
|
+
suspend() {
|
|
5355
|
+
if (this._suspended) return;
|
|
5356
|
+
this._suspended = true;
|
|
5357
|
+
if (typeof window !== "undefined" && typeof window.speechSynthesis !== "undefined") {
|
|
5358
|
+
window.speechSynthesis.pause();
|
|
5359
|
+
}
|
|
5360
|
+
this.log("Suspended");
|
|
5361
|
+
}
|
|
5362
|
+
/**
|
|
5363
|
+
* Resume after a prior `suspend()`.
|
|
5364
|
+
*/
|
|
5365
|
+
resume() {
|
|
5366
|
+
if (!this._suspended) return;
|
|
5367
|
+
this._suspended = false;
|
|
5368
|
+
if (typeof window !== "undefined" && typeof window.speechSynthesis !== "undefined") {
|
|
5369
|
+
window.speechSynthesis.resume();
|
|
4422
5370
|
}
|
|
4423
|
-
|
|
4424
|
-
|
|
5371
|
+
this.log("Resumed");
|
|
5372
|
+
}
|
|
5373
|
+
// -------------------------------------------------------------------------
|
|
5374
|
+
// Voice loading
|
|
5375
|
+
// -------------------------------------------------------------------------
|
|
5376
|
+
/**
|
|
5377
|
+
* Load available voices from the browser.
|
|
5378
|
+
*
|
|
5379
|
+
* In Chrome and some other browsers, voices load asynchronously after
|
|
5380
|
+
* the page loads. We wait for the `voiceschanged` event with a timeout.
|
|
5381
|
+
*/
|
|
5382
|
+
async loadVoices() {
|
|
5383
|
+
if (this._voicesLoaded) return;
|
|
5384
|
+
if (typeof window === "undefined" || typeof window.speechSynthesis === "undefined") return;
|
|
5385
|
+
const synth = window.speechSynthesis;
|
|
5386
|
+
let voices = synth.getVoices();
|
|
5387
|
+
if (voices.length > 0) {
|
|
5388
|
+
this._voicesLoaded = true;
|
|
5389
|
+
this.log("Voices loaded:", voices.length, "available");
|
|
4425
5390
|
return;
|
|
4426
5391
|
}
|
|
4427
|
-
|
|
4428
|
-
|
|
4429
|
-
|
|
4430
|
-
|
|
4431
|
-
|
|
4432
|
-
|
|
4433
|
-
|
|
4434
|
-
|
|
4435
|
-
|
|
4436
|
-
|
|
4437
|
-
|
|
5392
|
+
await new Promise((resolve) => {
|
|
5393
|
+
const onVoicesChanged = () => {
|
|
5394
|
+
synth.removeEventListener("voiceschanged", onVoicesChanged);
|
|
5395
|
+
clearTimeout(timeout);
|
|
5396
|
+
voices = synth.getVoices();
|
|
5397
|
+
this._voicesLoaded = true;
|
|
5398
|
+
this.log("Voices loaded (async):", voices.length, "available");
|
|
5399
|
+
resolve();
|
|
5400
|
+
};
|
|
5401
|
+
const timeout = setTimeout(() => {
|
|
5402
|
+
synth.removeEventListener("voiceschanged", onVoicesChanged);
|
|
5403
|
+
this._voicesLoaded = true;
|
|
5404
|
+
this.log("Voices loading timed out \u2014 proceeding with defaults");
|
|
5405
|
+
resolve();
|
|
5406
|
+
}, 2e3);
|
|
5407
|
+
synth.addEventListener("voiceschanged", onVoicesChanged);
|
|
5408
|
+
});
|
|
5409
|
+
}
|
|
5410
|
+
/**
|
|
5411
|
+
* Find a voice by name (case-insensitive partial match).
|
|
5412
|
+
*/
|
|
5413
|
+
findVoice(name) {
|
|
5414
|
+
if (typeof window === "undefined" || typeof window.speechSynthesis === "undefined") {
|
|
5415
|
+
return null;
|
|
4438
5416
|
}
|
|
4439
|
-
|
|
4440
|
-
|
|
4441
|
-
|
|
4442
|
-
|
|
4443
|
-
|
|
4444
|
-
|
|
5417
|
+
const voices = window.speechSynthesis.getVoices();
|
|
5418
|
+
const lowerName = name.toLowerCase();
|
|
5419
|
+
const exact = voices.find((v) => v.name.toLowerCase() === lowerName);
|
|
5420
|
+
if (exact) return exact;
|
|
5421
|
+
const partial = voices.find((v) => v.name.toLowerCase().includes(lowerName));
|
|
5422
|
+
if (partial) return partial;
|
|
5423
|
+
if (lowerName.includes("-") || lowerName.length <= 5) {
|
|
5424
|
+
const langMatch = voices.find((v) => v.lang.toLowerCase().startsWith(lowerName));
|
|
5425
|
+
if (langMatch) return langMatch;
|
|
4445
5426
|
}
|
|
4446
|
-
|
|
4447
|
-
audio: audioBuffer,
|
|
4448
|
-
isFinal,
|
|
4449
|
-
timestamp: Date.now()
|
|
4450
|
-
};
|
|
4451
|
-
this.log(
|
|
4452
|
-
isFinal ? "Final audio chunk:" : "Audio chunk:",
|
|
4453
|
-
`${audioBuffer.byteLength} bytes`
|
|
4454
|
-
);
|
|
4455
|
-
this.emitAudio(audioEvent);
|
|
5427
|
+
return null;
|
|
4456
5428
|
}
|
|
4457
|
-
//
|
|
5429
|
+
// -------------------------------------------------------------------------
|
|
4458
5430
|
// Subscriber notification
|
|
4459
|
-
//
|
|
5431
|
+
// -------------------------------------------------------------------------
|
|
4460
5432
|
/**
|
|
4461
5433
|
* Emit an audio event to all registered callbacks.
|
|
4462
5434
|
*
|
|
@@ -4468,41 +5440,30 @@ var ElevenLabsTTS = class {
|
|
|
4468
5440
|
try {
|
|
4469
5441
|
cb(event);
|
|
4470
5442
|
} catch (err) {
|
|
4471
|
-
console.error(
|
|
5443
|
+
console.error(LOG_PREFIX10, "Audio callback threw:", err);
|
|
4472
5444
|
}
|
|
4473
5445
|
}
|
|
4474
5446
|
}
|
|
4475
|
-
//
|
|
4476
|
-
// URL building
|
|
4477
|
-
// -----------------------------------------------------------------------
|
|
4478
|
-
/** Build the ElevenLabs streaming TTS endpoint URL. */
|
|
4479
|
-
buildUrl() {
|
|
4480
|
-
const params = new URLSearchParams({
|
|
4481
|
-
model_id: this.modelId
|
|
4482
|
-
});
|
|
4483
|
-
return `wss://api.elevenlabs.io/v1/text-to-speech/${encodeURIComponent(this.voiceId)}/stream-input?${params.toString()}`;
|
|
4484
|
-
}
|
|
4485
|
-
// -----------------------------------------------------------------------
|
|
5447
|
+
// -------------------------------------------------------------------------
|
|
4486
5448
|
// Cleanup
|
|
4487
|
-
//
|
|
4488
|
-
/** Reset internal state
|
|
5449
|
+
// -------------------------------------------------------------------------
|
|
5450
|
+
/** Reset internal state. */
|
|
4489
5451
|
cleanup() {
|
|
4490
5452
|
this._connected = false;
|
|
4491
|
-
this.bosSent = false;
|
|
4492
5453
|
}
|
|
4493
|
-
//
|
|
5454
|
+
// -------------------------------------------------------------------------
|
|
4494
5455
|
// Logging
|
|
4495
|
-
//
|
|
5456
|
+
// -------------------------------------------------------------------------
|
|
4496
5457
|
/** Conditional debug logging. */
|
|
4497
5458
|
log(...args) {
|
|
4498
5459
|
if (this.debugEnabled) {
|
|
4499
|
-
console.debug(
|
|
5460
|
+
console.debug(LOG_PREFIX10, ...args);
|
|
4500
5461
|
}
|
|
4501
5462
|
}
|
|
4502
5463
|
};
|
|
4503
5464
|
|
|
4504
5465
|
// src/voice/index.ts
|
|
4505
|
-
var
|
|
5466
|
+
var LOG_PREFIX11 = "[GuideKit:Voice]";
|
|
4506
5467
|
var JITTER_BUFFER_MS = 150;
|
|
4507
5468
|
var ECHO_WINDOW_MS = 3e3;
|
|
4508
5469
|
var ECHO_OVERLAP_THRESHOLD = 0.6;
|
|
@@ -4610,17 +5571,42 @@ var VoicePipeline = class {
|
|
|
4610
5571
|
cause: err instanceof Error ? err : void 0
|
|
4611
5572
|
});
|
|
4612
5573
|
}
|
|
4613
|
-
this.
|
|
4614
|
-
|
|
4615
|
-
|
|
4616
|
-
|
|
4617
|
-
|
|
4618
|
-
|
|
4619
|
-
|
|
4620
|
-
|
|
4621
|
-
|
|
4622
|
-
|
|
4623
|
-
|
|
5574
|
+
if (this._sttConfig.provider === "deepgram") {
|
|
5575
|
+
this._stt = new DeepgramSTT({
|
|
5576
|
+
apiKey: this._sttConfig.apiKey,
|
|
5577
|
+
model: this._sttConfig.model,
|
|
5578
|
+
debug: this._debug
|
|
5579
|
+
});
|
|
5580
|
+
} else if (this._sttConfig.provider === "elevenlabs") {
|
|
5581
|
+
this._stt = new ElevenLabsSTT({
|
|
5582
|
+
apiKey: this._sttConfig.apiKey,
|
|
5583
|
+
language: this._sttConfig.language,
|
|
5584
|
+
debug: this._debug
|
|
5585
|
+
});
|
|
5586
|
+
} else {
|
|
5587
|
+
this._stt = new WebSpeechSTT({
|
|
5588
|
+
language: this._sttConfig.language,
|
|
5589
|
+
continuous: this._sttConfig.continuous,
|
|
5590
|
+
interimResults: this._sttConfig.interimResults,
|
|
5591
|
+
debug: this._debug
|
|
5592
|
+
});
|
|
5593
|
+
}
|
|
5594
|
+
if (this._ttsConfig.provider === "elevenlabs") {
|
|
5595
|
+
this._tts = new ElevenLabsTTS({
|
|
5596
|
+
apiKey: this._ttsConfig.apiKey,
|
|
5597
|
+
voiceId: this._ttsConfig.voiceId,
|
|
5598
|
+
modelId: "modelId" in this._ttsConfig ? this._ttsConfig.modelId : void 0,
|
|
5599
|
+
debug: this._debug
|
|
5600
|
+
});
|
|
5601
|
+
} else {
|
|
5602
|
+
this._tts = new WebSpeechTTS({
|
|
5603
|
+
voice: this._ttsConfig.voice,
|
|
5604
|
+
rate: this._ttsConfig.rate,
|
|
5605
|
+
pitch: this._ttsConfig.pitch,
|
|
5606
|
+
language: this._ttsConfig.language,
|
|
5607
|
+
debug: this._debug
|
|
5608
|
+
});
|
|
5609
|
+
}
|
|
4624
5610
|
this._log("Initialization complete");
|
|
4625
5611
|
}
|
|
4626
5612
|
// ────────────────────────────────────────────────────────────────────
|
|
@@ -4760,10 +5746,11 @@ var VoicePipeline = class {
|
|
|
4760
5746
|
// ────────────────────────────────────────────────────────────────────
|
|
4761
5747
|
// speak()
|
|
4762
5748
|
// ────────────────────────────────────────────────────────────────────
|
|
4763
|
-
/** Speak text via ElevenLabs
|
|
5749
|
+
/** Speak text via TTS (ElevenLabs or Web Speech API). */
|
|
4764
5750
|
async speak(text) {
|
|
4765
5751
|
if (this._destroyed || !text.trim()) return;
|
|
4766
|
-
|
|
5752
|
+
const isWebSpeechTTS = this._tts instanceof WebSpeechTTS;
|
|
5753
|
+
if (!this._tts || !this._audioContext && !isWebSpeechTTS) {
|
|
4767
5754
|
this._log("TTS or AudioContext not available \u2014 cannot speak");
|
|
4768
5755
|
this._bus.emit("voice:degraded", { reason: "TTS not available", fallback: "text" });
|
|
4769
5756
|
this._setState("idle");
|
|
@@ -4807,11 +5794,24 @@ var VoicePipeline = class {
|
|
|
4807
5794
|
}
|
|
4808
5795
|
resolve();
|
|
4809
5796
|
};
|
|
4810
|
-
|
|
4811
|
-
this.
|
|
4812
|
-
|
|
4813
|
-
|
|
4814
|
-
|
|
5797
|
+
if (isWebSpeechTTS) {
|
|
5798
|
+
this._unsubTTSAudio = this._tts.onAudio(
|
|
5799
|
+
(event) => {
|
|
5800
|
+
if (event.isFinal) {
|
|
5801
|
+
done();
|
|
5802
|
+
}
|
|
5803
|
+
}
|
|
5804
|
+
);
|
|
5805
|
+
this._tts.speak(text);
|
|
5806
|
+
} else {
|
|
5807
|
+
this._unsubTTSAudio = this._tts.onAudio(
|
|
5808
|
+
(event) => {
|
|
5809
|
+
this._handleTTSAudio(event, done);
|
|
5810
|
+
}
|
|
5811
|
+
);
|
|
5812
|
+
this._tts.speak(text);
|
|
5813
|
+
this._tts.flush();
|
|
5814
|
+
}
|
|
4815
5815
|
});
|
|
4816
5816
|
}
|
|
4817
5817
|
// ────────────────────────────────────────────────────────────────────
|
|
@@ -4840,7 +5840,9 @@ var VoicePipeline = class {
|
|
|
4840
5840
|
this._pendingLLMAbort.abort();
|
|
4841
5841
|
this._pendingLLMAbort = null;
|
|
4842
5842
|
}
|
|
4843
|
-
if (this._tts
|
|
5843
|
+
if (this._tts instanceof WebSpeechTTS) {
|
|
5844
|
+
this._tts.stop();
|
|
5845
|
+
} else if (this._tts?.isConnected) {
|
|
4844
5846
|
this._tts.close();
|
|
4845
5847
|
}
|
|
4846
5848
|
}
|
|
@@ -4929,7 +5931,7 @@ var VoicePipeline = class {
|
|
|
4929
5931
|
try {
|
|
4930
5932
|
cb(next, prev);
|
|
4931
5933
|
} catch (err) {
|
|
4932
|
-
console.error(
|
|
5934
|
+
console.error(LOG_PREFIX11, "State change callback threw:", err);
|
|
4933
5935
|
}
|
|
4934
5936
|
}
|
|
4935
5937
|
}
|
|
@@ -5060,7 +6062,7 @@ var VoicePipeline = class {
|
|
|
5060
6062
|
try {
|
|
5061
6063
|
cb(text, isFinal);
|
|
5062
6064
|
} catch (err) {
|
|
5063
|
-
console.error(
|
|
6065
|
+
console.error(LOG_PREFIX11, "Transcript callback threw:", err);
|
|
5064
6066
|
}
|
|
5065
6067
|
}
|
|
5066
6068
|
if (isFinal && this._state === "listening") {
|
|
@@ -5163,8 +6165,14 @@ var VoicePipeline = class {
|
|
|
5163
6165
|
* sequential playback via AudioBufferSourceNode.
|
|
5164
6166
|
*/
|
|
5165
6167
|
_decodeAndSchedule(audioData, onDone) {
|
|
6168
|
+
let onDoneCalled = false;
|
|
6169
|
+
const safeOnDone = onDone ? () => {
|
|
6170
|
+
if (onDoneCalled) return;
|
|
6171
|
+
onDoneCalled = true;
|
|
6172
|
+
onDone();
|
|
6173
|
+
} : void 0;
|
|
5166
6174
|
if (!this._audioContext || this._state !== "speaking") {
|
|
5167
|
-
|
|
6175
|
+
safeOnDone?.();
|
|
5168
6176
|
return;
|
|
5169
6177
|
}
|
|
5170
6178
|
const ctx = this._audioContext;
|
|
@@ -5173,7 +6181,7 @@ var VoicePipeline = class {
|
|
|
5173
6181
|
copy,
|
|
5174
6182
|
(decodedBuffer) => {
|
|
5175
6183
|
if (this._state !== "speaking" || !this._audioContext) {
|
|
5176
|
-
|
|
6184
|
+
safeOnDone?.();
|
|
5177
6185
|
return;
|
|
5178
6186
|
}
|
|
5179
6187
|
const source = ctx.createBufferSource();
|
|
@@ -5186,8 +6194,8 @@ var VoicePipeline = class {
|
|
|
5186
6194
|
if (this._lastScheduledSource === source) {
|
|
5187
6195
|
this._lastScheduledSource = null;
|
|
5188
6196
|
}
|
|
5189
|
-
if (
|
|
5190
|
-
|
|
6197
|
+
if (safeOnDone) {
|
|
6198
|
+
safeOnDone();
|
|
5191
6199
|
}
|
|
5192
6200
|
};
|
|
5193
6201
|
const now = ctx.currentTime;
|
|
@@ -5203,7 +6211,7 @@ var VoicePipeline = class {
|
|
|
5203
6211
|
},
|
|
5204
6212
|
(err) => {
|
|
5205
6213
|
this._log("Failed to decode audio chunk:", err);
|
|
5206
|
-
|
|
6214
|
+
safeOnDone?.();
|
|
5207
6215
|
}
|
|
5208
6216
|
);
|
|
5209
6217
|
}
|
|
@@ -5262,13 +6270,13 @@ var VoicePipeline = class {
|
|
|
5262
6270
|
// ════════════════════════════════════════════════════════════════════
|
|
5263
6271
|
_log(...args) {
|
|
5264
6272
|
if (this._debug) {
|
|
5265
|
-
console.debug(
|
|
6273
|
+
console.debug(LOG_PREFIX11, ...args);
|
|
5266
6274
|
}
|
|
5267
6275
|
}
|
|
5268
6276
|
};
|
|
5269
6277
|
|
|
5270
6278
|
// src/visual/index.ts
|
|
5271
|
-
var
|
|
6279
|
+
var LOG_PREFIX12 = "[GuideKit:Visual]";
|
|
5272
6280
|
var DEFAULT_OVERLAY_COLOR = "rgba(0, 0, 0, 0.5)";
|
|
5273
6281
|
var DEFAULT_SPOTLIGHT_COLOR = "#4a9eed";
|
|
5274
6282
|
var DEFAULT_ANIMATION_DURATION = 300;
|
|
@@ -6185,16 +7193,16 @@ var VisualGuidance = class {
|
|
|
6185
7193
|
if (!this.debug) return;
|
|
6186
7194
|
if (typeof console !== "undefined") {
|
|
6187
7195
|
if (data) {
|
|
6188
|
-
console.log(`${
|
|
7196
|
+
console.log(`${LOG_PREFIX12} ${message}`, data);
|
|
6189
7197
|
} else {
|
|
6190
|
-
console.log(`${
|
|
7198
|
+
console.log(`${LOG_PREFIX12} ${message}`);
|
|
6191
7199
|
}
|
|
6192
7200
|
}
|
|
6193
7201
|
}
|
|
6194
7202
|
};
|
|
6195
7203
|
|
|
6196
7204
|
// src/awareness/index.ts
|
|
6197
|
-
var
|
|
7205
|
+
var LOG_PREFIX13 = "[GuideKit:Awareness]";
|
|
6198
7206
|
var DEFAULT_IDLE_TIMEOUT_MS = 6e4;
|
|
6199
7207
|
var DEFAULT_DWELL_TIMEOUT_MS = 8e3;
|
|
6200
7208
|
var DEFAULT_RAGE_CLICK_THRESHOLD = 3;
|
|
@@ -6556,13 +7564,13 @@ var AwarenessSystem = class {
|
|
|
6556
7564
|
/** Conditional debug logging. */
|
|
6557
7565
|
log(...args) {
|
|
6558
7566
|
if (this.debugEnabled) {
|
|
6559
|
-
console.debug(
|
|
7567
|
+
console.debug(LOG_PREFIX13, ...args);
|
|
6560
7568
|
}
|
|
6561
7569
|
}
|
|
6562
7570
|
};
|
|
6563
7571
|
|
|
6564
7572
|
// src/awareness/proactive.ts
|
|
6565
|
-
var
|
|
7573
|
+
var LOG_PREFIX14 = "[GuideKit:Proactive]";
|
|
6566
7574
|
var STORAGE_KEY = "guidekit:visited";
|
|
6567
7575
|
var SEVEN_DAYS_MS = 7 * 24 * 60 * 60 * 1e3;
|
|
6568
7576
|
var DWELL_COOLDOWNS = [3e4, 6e4, 12e4];
|
|
@@ -6600,7 +7608,7 @@ var ProactiveTriggerEngine = class {
|
|
|
6600
7608
|
set quietMode(value) {
|
|
6601
7609
|
this._quietMode = value;
|
|
6602
7610
|
if (this.debug) {
|
|
6603
|
-
console.debug(
|
|
7611
|
+
console.debug(LOG_PREFIX14, `Quiet mode ${value ? "enabled" : "disabled"}`);
|
|
6604
7612
|
}
|
|
6605
7613
|
}
|
|
6606
7614
|
// ---- Lifecycle -----------------------------------------------------------
|
|
@@ -6630,7 +7638,7 @@ var ProactiveTriggerEngine = class {
|
|
|
6630
7638
|
})
|
|
6631
7639
|
);
|
|
6632
7640
|
if (this.debug) {
|
|
6633
|
-
console.debug(
|
|
7641
|
+
console.debug(LOG_PREFIX14, "Started \u2014 subscribed to awareness & dom events");
|
|
6634
7642
|
}
|
|
6635
7643
|
}
|
|
6636
7644
|
/** Unsubscribe all bus listeners and clear internal state. */
|
|
@@ -6645,7 +7653,7 @@ var ProactiveTriggerEngine = class {
|
|
|
6645
7653
|
this.formTimers.clear();
|
|
6646
7654
|
this.started = false;
|
|
6647
7655
|
if (this.debug) {
|
|
6648
|
-
console.debug(
|
|
7656
|
+
console.debug(LOG_PREFIX14, "Stopped \u2014 all listeners removed");
|
|
6649
7657
|
}
|
|
6650
7658
|
}
|
|
6651
7659
|
/** Alias for {@link stop}. */
|
|
@@ -6680,7 +7688,7 @@ var ProactiveTriggerEngine = class {
|
|
|
6680
7688
|
}, FORM_ABANDON_MS);
|
|
6681
7689
|
this.formTimers.set(formSelector, timer);
|
|
6682
7690
|
if (this.debug) {
|
|
6683
|
-
console.debug(
|
|
7691
|
+
console.debug(LOG_PREFIX14, `Form interaction started: ${formSelector}`);
|
|
6684
7692
|
}
|
|
6685
7693
|
}
|
|
6686
7694
|
/** Reset all cooldowns and internal tracking state (useful for testing). */
|
|
@@ -6694,7 +7702,7 @@ var ProactiveTriggerEngine = class {
|
|
|
6694
7702
|
}
|
|
6695
7703
|
this.formTimers.clear();
|
|
6696
7704
|
if (this.debug) {
|
|
6697
|
-
console.debug(
|
|
7705
|
+
console.debug(LOG_PREFIX14, "All cooldowns and state reset");
|
|
6698
7706
|
}
|
|
6699
7707
|
}
|
|
6700
7708
|
// ---- Internal handlers ---------------------------------------------------
|
|
@@ -6711,22 +7719,23 @@ var ProactiveTriggerEngine = class {
|
|
|
6711
7719
|
message: "First-time visitor detected. Show a visual greeting (no audio)."
|
|
6712
7720
|
}, "greeting");
|
|
6713
7721
|
if (this.debug) {
|
|
6714
|
-
console.debug(
|
|
7722
|
+
console.debug(LOG_PREFIX14, "First visit \u2014 greeting triggered");
|
|
6715
7723
|
}
|
|
6716
7724
|
return;
|
|
6717
7725
|
}
|
|
6718
7726
|
const visitedAt = parseInt(visited, 10);
|
|
6719
|
-
if (
|
|
6720
|
-
|
|
6721
|
-
|
|
6722
|
-
|
|
6723
|
-
|
|
6724
|
-
|
|
6725
|
-
|
|
7727
|
+
if (Number.isNaN(visitedAt)) {
|
|
7728
|
+
return;
|
|
7729
|
+
}
|
|
7730
|
+
const elapsed = Date.now() - visitedAt;
|
|
7731
|
+
if (elapsed <= SEVEN_DAYS_MS && this.debug) {
|
|
7732
|
+
console.debug(LOG_PREFIX14, "Return visitor within 7 days \u2014 silent");
|
|
7733
|
+
} else if (this.debug) {
|
|
7734
|
+
console.debug(LOG_PREFIX14, "Return visitor after 7 days");
|
|
6726
7735
|
}
|
|
6727
7736
|
} catch {
|
|
6728
7737
|
if (this.debug) {
|
|
6729
|
-
console.warn(
|
|
7738
|
+
console.warn(LOG_PREFIX14, "localStorage unavailable \u2014 skipping greeting check");
|
|
6730
7739
|
}
|
|
6731
7740
|
}
|
|
6732
7741
|
}
|
|
@@ -6744,7 +7753,7 @@ var ProactiveTriggerEngine = class {
|
|
|
6744
7753
|
const count = this.dwellCounts.get(sectionId) ?? 0;
|
|
6745
7754
|
if (count >= DWELL_COOLDOWNS.length + 1) {
|
|
6746
7755
|
if (this.debug) {
|
|
6747
|
-
console.debug(
|
|
7756
|
+
console.debug(LOG_PREFIX14, `Dwell cap reached for section "${sectionId}" \u2014 suppressed`);
|
|
6748
7757
|
}
|
|
6749
7758
|
return;
|
|
6750
7759
|
}
|
|
@@ -6754,7 +7763,7 @@ var ProactiveTriggerEngine = class {
|
|
|
6754
7763
|
const lastFired = this.cooldowns.get(key) ?? 0;
|
|
6755
7764
|
if (Date.now() - lastFired < cooldownMs) {
|
|
6756
7765
|
if (this.debug) {
|
|
6757
|
-
console.debug(
|
|
7766
|
+
console.debug(LOG_PREFIX14, `Dwell cooldown active for "${sectionId}" \u2014 suppressed`);
|
|
6758
7767
|
}
|
|
6759
7768
|
return;
|
|
6760
7769
|
}
|
|
@@ -6770,7 +7779,7 @@ var ProactiveTriggerEngine = class {
|
|
|
6770
7779
|
const sectionKey = selector;
|
|
6771
7780
|
if (this.frustrationFired.has(sectionKey)) {
|
|
6772
7781
|
if (this.debug) {
|
|
6773
|
-
console.debug(
|
|
7782
|
+
console.debug(LOG_PREFIX14, `Frustration already fired for "${selector}" \u2014 suppressed`);
|
|
6774
7783
|
}
|
|
6775
7784
|
return;
|
|
6776
7785
|
}
|
|
@@ -6786,7 +7795,7 @@ var ProactiveTriggerEngine = class {
|
|
|
6786
7795
|
const key = "navigation-commentary";
|
|
6787
7796
|
if (this.isCooldownActive(key, NAVIGATION_COOLDOWN_MS)) {
|
|
6788
7797
|
if (this.debug) {
|
|
6789
|
-
console.debug(
|
|
7798
|
+
console.debug(LOG_PREFIX14, "Navigation cooldown active \u2014 suppressed");
|
|
6790
7799
|
}
|
|
6791
7800
|
return;
|
|
6792
7801
|
}
|
|
@@ -6809,7 +7818,7 @@ var ProactiveTriggerEngine = class {
|
|
|
6809
7818
|
fireTrigger(partial, cooldownKey) {
|
|
6810
7819
|
if (this._quietMode) {
|
|
6811
7820
|
if (this.debug) {
|
|
6812
|
-
console.debug(
|
|
7821
|
+
console.debug(LOG_PREFIX14, `Quiet mode \u2014 suppressed trigger: ${partial.type}`);
|
|
6813
7822
|
}
|
|
6814
7823
|
return;
|
|
6815
7824
|
}
|
|
@@ -6819,13 +7828,13 @@ var ProactiveTriggerEngine = class {
|
|
|
6819
7828
|
};
|
|
6820
7829
|
this.cooldowns.set(cooldownKey, trigger.timestamp);
|
|
6821
7830
|
if (this.debug) {
|
|
6822
|
-
console.debug(
|
|
7831
|
+
console.debug(LOG_PREFIX14, "Trigger fired:", trigger.type, trigger);
|
|
6823
7832
|
}
|
|
6824
7833
|
if (this.onTrigger) {
|
|
6825
7834
|
try {
|
|
6826
7835
|
this.onTrigger(trigger);
|
|
6827
7836
|
} catch (err) {
|
|
6828
|
-
console.error(
|
|
7837
|
+
console.error(LOG_PREFIX14, "onTrigger callback error:", err);
|
|
6829
7838
|
}
|
|
6830
7839
|
}
|
|
6831
7840
|
}
|
|
@@ -6838,7 +7847,7 @@ var ProactiveTriggerEngine = class {
|
|
|
6838
7847
|
};
|
|
6839
7848
|
|
|
6840
7849
|
// src/llm/rate-limiter.ts
|
|
6841
|
-
var
|
|
7850
|
+
var LOG_PREFIX15 = "[GuideKit:RateLimiter]";
|
|
6842
7851
|
var DEFAULT_MAX_LLM_CALLS_PER_MINUTE = 10;
|
|
6843
7852
|
var DEFAULT_MAX_STT_MINUTES_PER_SESSION = 60;
|
|
6844
7853
|
var DEFAULT_MAX_TTS_CHARS_PER_SESSION = 5e4;
|
|
@@ -6933,7 +7942,19 @@ var RateLimiter = class {
|
|
|
6933
7942
|
get sttMinutesUsed() {
|
|
6934
7943
|
let totalMs = this.sttMs;
|
|
6935
7944
|
if (this.sttStartedAt !== null) {
|
|
6936
|
-
|
|
7945
|
+
const activeMs = Date.now() - this.sttStartedAt;
|
|
7946
|
+
const maxSessionMs = this.maxSTTMinutesPerSession * 6e4;
|
|
7947
|
+
const maxActiveMs = maxSessionMs * 2;
|
|
7948
|
+
if (activeMs > maxActiveMs) {
|
|
7949
|
+
console.warn(
|
|
7950
|
+
`${LOG_PREFIX15} STT stream running for ${Math.round(activeMs / 6e4)}min without sttStop() \u2014 capping at 2x session limit (${this.maxSTTMinutesPerSession * 2}min).`
|
|
7951
|
+
);
|
|
7952
|
+
this.sttMs += maxActiveMs;
|
|
7953
|
+
this.sttStartedAt = null;
|
|
7954
|
+
totalMs = this.sttMs;
|
|
7955
|
+
} else {
|
|
7956
|
+
totalMs += activeMs;
|
|
7957
|
+
}
|
|
6937
7958
|
}
|
|
6938
7959
|
return totalMs / 6e4;
|
|
6939
7960
|
}
|
|
@@ -7005,7 +8026,7 @@ var RateLimiter = class {
|
|
|
7005
8026
|
}
|
|
7006
8027
|
log(...args) {
|
|
7007
8028
|
if (this.debug) {
|
|
7008
|
-
console.debug(
|
|
8029
|
+
console.debug(LOG_PREFIX15, ...args);
|
|
7009
8030
|
}
|
|
7010
8031
|
}
|
|
7011
8032
|
};
|
|
@@ -7238,7 +8259,7 @@ var BUILTIN_LOCALES = {
|
|
|
7238
8259
|
pt
|
|
7239
8260
|
};
|
|
7240
8261
|
var SUPPORTED_LOCALE_CODES = new Set(Object.keys(BUILTIN_LOCALES));
|
|
7241
|
-
var
|
|
8262
|
+
var LOG_PREFIX16 = "[GuideKit:I18n]";
|
|
7242
8263
|
function isSupportedLocale(code) {
|
|
7243
8264
|
return SUPPORTED_LOCALE_CODES.has(code);
|
|
7244
8265
|
}
|
|
@@ -7276,7 +8297,7 @@ var I18n = class {
|
|
|
7276
8297
|
this.strings = strings;
|
|
7277
8298
|
this.resolvedLocale = resolvedLocale;
|
|
7278
8299
|
if (this.debug) {
|
|
7279
|
-
console.debug(`${
|
|
8300
|
+
console.debug(`${LOG_PREFIX16} Initialized with locale "${this.resolvedLocale}"`);
|
|
7280
8301
|
}
|
|
7281
8302
|
}
|
|
7282
8303
|
// -------------------------------------------------------------------------
|
|
@@ -7287,9 +8308,9 @@ var I18n = class {
|
|
|
7287
8308
|
const value = this.strings[key];
|
|
7288
8309
|
if (value === void 0) {
|
|
7289
8310
|
if (this.debug) {
|
|
7290
|
-
console.warn(`${
|
|
8311
|
+
console.warn(`${LOG_PREFIX16} Missing translation key "${key}"`);
|
|
7291
8312
|
}
|
|
7292
|
-
return en[key] ?? key;
|
|
8313
|
+
return en[key] ?? (typeof process !== "undefined" && process.env?.NODE_ENV === "production" ? key : `[MISSING: ${key}]`);
|
|
7293
8314
|
}
|
|
7294
8315
|
return value;
|
|
7295
8316
|
}
|
|
@@ -7303,7 +8324,7 @@ var I18n = class {
|
|
|
7303
8324
|
this.strings = strings;
|
|
7304
8325
|
this.resolvedLocale = resolvedLocale;
|
|
7305
8326
|
if (this.debug) {
|
|
7306
|
-
console.debug(`${
|
|
8327
|
+
console.debug(`${LOG_PREFIX16} Locale changed to "${this.resolvedLocale}"`);
|
|
7307
8328
|
}
|
|
7308
8329
|
}
|
|
7309
8330
|
/** The current resolved locale code (e.g. 'en', 'fr', or 'custom'). */
|
|
@@ -7323,7 +8344,7 @@ var I18n = class {
|
|
|
7323
8344
|
if (locale === "auto") {
|
|
7324
8345
|
const detected = detectLocaleFromDocument();
|
|
7325
8346
|
if (this.debug) {
|
|
7326
|
-
console.debug(`${
|
|
8347
|
+
console.debug(`${LOG_PREFIX16} Auto-detected locale "${detected}"`);
|
|
7327
8348
|
}
|
|
7328
8349
|
return {
|
|
7329
8350
|
strings: BUILTIN_LOCALES[detected],
|
|
@@ -7338,7 +8359,7 @@ var I18n = class {
|
|
|
7338
8359
|
}
|
|
7339
8360
|
if (this.debug) {
|
|
7340
8361
|
console.warn(
|
|
7341
|
-
`${
|
|
8362
|
+
`${LOG_PREFIX16} Unknown locale "${String(locale)}", falling back to "en"`
|
|
7342
8363
|
);
|
|
7343
8364
|
}
|
|
7344
8365
|
return {
|
|
@@ -7349,7 +8370,7 @@ var I18n = class {
|
|
|
7349
8370
|
};
|
|
7350
8371
|
|
|
7351
8372
|
// src/auth/token-manager.ts
|
|
7352
|
-
var
|
|
8373
|
+
var LOG_PREFIX17 = "[GuideKit:Auth]";
|
|
7353
8374
|
var REFRESH_THRESHOLD = 0.8;
|
|
7354
8375
|
var MAX_RETRY_ATTEMPTS = 3;
|
|
7355
8376
|
var RETRY_BASE_MS = 1e3;
|
|
@@ -7628,7 +8649,7 @@ var TokenManager = class {
|
|
|
7628
8649
|
}
|
|
7629
8650
|
log(message) {
|
|
7630
8651
|
if (this.debug) {
|
|
7631
|
-
console.debug(`${
|
|
8652
|
+
console.debug(`${LOG_PREFIX17} ${message}`);
|
|
7632
8653
|
}
|
|
7633
8654
|
}
|
|
7634
8655
|
};
|
|
@@ -7760,6 +8781,11 @@ var GuideKitCore = class {
|
|
|
7760
8781
|
debug: this._debug
|
|
7761
8782
|
});
|
|
7762
8783
|
await this.tokenManager.start();
|
|
8784
|
+
if (!this._options.llm) {
|
|
8785
|
+
console.warn(
|
|
8786
|
+
"[GuideKit] tokenEndpoint provided without llm config. The session token handles auth only \u2014 llm: { provider, apiKey } is still required for LLM calls. See: https://guidekit.dev/docs/provider#token-endpoint"
|
|
8787
|
+
);
|
|
8788
|
+
}
|
|
7763
8789
|
this.resourceManager.register({
|
|
7764
8790
|
name: "token-manager",
|
|
7765
8791
|
cleanup: () => this.tokenManager?.destroy()
|
|
@@ -7882,21 +8908,50 @@ var GuideKitCore = class {
|
|
|
7882
8908
|
}
|
|
7883
8909
|
});
|
|
7884
8910
|
this.registerBuiltinTools();
|
|
7885
|
-
|
|
7886
|
-
const sttConfig = this._options.stt;
|
|
7887
|
-
const ttsConfig = this._options.tts;
|
|
7888
|
-
|
|
8911
|
+
{
|
|
8912
|
+
const sttConfig = this._options.stt ?? { provider: "web-speech" };
|
|
8913
|
+
const ttsConfig = this._options.tts ?? { provider: "web-speech" };
|
|
8914
|
+
let voiceSttConfig;
|
|
8915
|
+
let voiceTtsConfig;
|
|
8916
|
+
if (sttConfig.provider === "deepgram") {
|
|
8917
|
+
voiceSttConfig = {
|
|
8918
|
+
provider: "deepgram",
|
|
8919
|
+
apiKey: sttConfig.apiKey,
|
|
8920
|
+
model: sttConfig.model
|
|
8921
|
+
};
|
|
8922
|
+
} else if (sttConfig.provider === "elevenlabs") {
|
|
8923
|
+
voiceSttConfig = {
|
|
8924
|
+
provider: "elevenlabs",
|
|
8925
|
+
apiKey: sttConfig.apiKey,
|
|
8926
|
+
language: sttConfig.language
|
|
8927
|
+
};
|
|
8928
|
+
} else {
|
|
8929
|
+
voiceSttConfig = {
|
|
8930
|
+
provider: "web-speech",
|
|
8931
|
+
language: sttConfig.language,
|
|
8932
|
+
continuous: sttConfig.continuous,
|
|
8933
|
+
interimResults: sttConfig.interimResults
|
|
8934
|
+
};
|
|
8935
|
+
}
|
|
8936
|
+
if (ttsConfig.provider === "elevenlabs") {
|
|
8937
|
+
voiceTtsConfig = {
|
|
8938
|
+
provider: "elevenlabs",
|
|
8939
|
+
apiKey: ttsConfig.apiKey,
|
|
8940
|
+
voiceId: "voiceId" in ttsConfig ? ttsConfig.voiceId : void 0
|
|
8941
|
+
};
|
|
8942
|
+
} else {
|
|
8943
|
+
voiceTtsConfig = {
|
|
8944
|
+
provider: "web-speech",
|
|
8945
|
+
voice: ttsConfig.voice,
|
|
8946
|
+
rate: ttsConfig.rate,
|
|
8947
|
+
pitch: ttsConfig.pitch,
|
|
8948
|
+
language: ttsConfig.language
|
|
8949
|
+
};
|
|
8950
|
+
}
|
|
8951
|
+
try {
|
|
7889
8952
|
this.voicePipeline = new VoicePipeline({
|
|
7890
|
-
sttConfig:
|
|
7891
|
-
|
|
7892
|
-
apiKey: sttConfig.apiKey,
|
|
7893
|
-
model: "model" in sttConfig ? sttConfig.model : void 0
|
|
7894
|
-
},
|
|
7895
|
-
ttsConfig: {
|
|
7896
|
-
provider: "elevenlabs",
|
|
7897
|
-
apiKey: ttsConfig.apiKey,
|
|
7898
|
-
voiceId: "voiceId" in ttsConfig ? ttsConfig.voiceId : void 0
|
|
7899
|
-
},
|
|
8953
|
+
sttConfig: voiceSttConfig,
|
|
8954
|
+
ttsConfig: voiceTtsConfig,
|
|
7900
8955
|
debug: this._debug
|
|
7901
8956
|
});
|
|
7902
8957
|
this.voicePipeline.onStateChange((state, previous) => {
|
|
@@ -7929,6 +8984,11 @@ var GuideKitCore = class {
|
|
|
7929
8984
|
name: "voice-pipeline",
|
|
7930
8985
|
cleanup: () => this.voicePipeline?.destroy()
|
|
7931
8986
|
});
|
|
8987
|
+
} catch (_err) {
|
|
8988
|
+
this.voicePipeline = null;
|
|
8989
|
+
if (this._debug) {
|
|
8990
|
+
console.debug("[GuideKit:Core] Voice pipeline unavailable in this environment");
|
|
8991
|
+
}
|
|
7932
8992
|
}
|
|
7933
8993
|
}
|
|
7934
8994
|
const session = this.contextManager.restoreSession();
|
|
@@ -8053,7 +9113,7 @@ var GuideKitCore = class {
|
|
|
8053
9113
|
return responseText;
|
|
8054
9114
|
} catch (error) {
|
|
8055
9115
|
const err = error instanceof GuideKitError ? error : new GuideKitError({
|
|
8056
|
-
code:
|
|
9116
|
+
code: ErrorCodes.UNKNOWN,
|
|
8057
9117
|
message: error instanceof Error ? error.message : "Unknown error",
|
|
8058
9118
|
recoverable: false,
|
|
8059
9119
|
suggestion: "Check the console for details."
|
|
@@ -8309,172 +9369,11 @@ var GuideKitCore = class {
|
|
|
8309
9369
|
};
|
|
8310
9370
|
}
|
|
8311
9371
|
/**
|
|
8312
|
-
*
|
|
8313
|
-
*
|
|
9372
|
+
* Unified built-in tool specifications — single source of truth for both
|
|
9373
|
+
* tool definitions (sent to LLM) and handler registration.
|
|
8314
9374
|
*/
|
|
8315
|
-
|
|
8316
|
-
|
|
8317
|
-
this.toolExecutor.registerTool({
|
|
8318
|
-
name: "highlight",
|
|
8319
|
-
execute: async (args) => {
|
|
8320
|
-
const sectionId = args.sectionId;
|
|
8321
|
-
const selector = args.selector;
|
|
8322
|
-
const tooltip = args.tooltip;
|
|
8323
|
-
const position = args.position;
|
|
8324
|
-
const result = this.highlight({ sectionId, selector, tooltip, position });
|
|
8325
|
-
return { success: result };
|
|
8326
|
-
}
|
|
8327
|
-
});
|
|
8328
|
-
this.toolExecutor.registerTool({
|
|
8329
|
-
name: "dismissHighlight",
|
|
8330
|
-
execute: async () => {
|
|
8331
|
-
this.dismissHighlight();
|
|
8332
|
-
return { success: true };
|
|
8333
|
-
}
|
|
8334
|
-
});
|
|
8335
|
-
this.toolExecutor.registerTool({
|
|
8336
|
-
name: "scrollToSection",
|
|
8337
|
-
execute: async (args) => {
|
|
8338
|
-
const sectionId = args.sectionId;
|
|
8339
|
-
const offset = args.offset;
|
|
8340
|
-
this.scrollToSection(sectionId, offset);
|
|
8341
|
-
return { success: true };
|
|
8342
|
-
}
|
|
8343
|
-
});
|
|
8344
|
-
this.toolExecutor.registerTool({
|
|
8345
|
-
name: "navigate",
|
|
8346
|
-
execute: async (args) => {
|
|
8347
|
-
const href = args.href;
|
|
8348
|
-
const result = await this.navigate(href);
|
|
8349
|
-
return { success: result, navigatedTo: result ? href : null };
|
|
8350
|
-
}
|
|
8351
|
-
});
|
|
8352
|
-
this.toolExecutor.registerTool({
|
|
8353
|
-
name: "startTour",
|
|
8354
|
-
execute: async (args) => {
|
|
8355
|
-
const sectionIds = args.sectionIds;
|
|
8356
|
-
const mode = args.mode ?? "manual";
|
|
8357
|
-
this.startTour(sectionIds, mode);
|
|
8358
|
-
return { success: true, steps: sectionIds.length };
|
|
8359
|
-
}
|
|
8360
|
-
});
|
|
8361
|
-
this.toolExecutor.registerTool({
|
|
8362
|
-
name: "readPageContent",
|
|
8363
|
-
execute: async (args) => {
|
|
8364
|
-
const sectionId = args.sectionId;
|
|
8365
|
-
const query = args.query;
|
|
8366
|
-
const model = this._currentPageModel;
|
|
8367
|
-
if (!model) return { error: "No page model available" };
|
|
8368
|
-
if (sectionId) {
|
|
8369
|
-
const section = model.sections.find((s) => s.id === sectionId);
|
|
8370
|
-
if (section) {
|
|
8371
|
-
const contentMapResult = await this.contextManager.getContent(sectionId);
|
|
8372
|
-
return {
|
|
8373
|
-
sectionId: section.id,
|
|
8374
|
-
label: section.label,
|
|
8375
|
-
summary: section.summary,
|
|
8376
|
-
contentMap: contentMapResult
|
|
8377
|
-
};
|
|
8378
|
-
}
|
|
8379
|
-
return { error: `Section "${sectionId}" not found` };
|
|
8380
|
-
}
|
|
8381
|
-
if (query) {
|
|
8382
|
-
const queryLower = query.toLowerCase();
|
|
8383
|
-
const matches = model.sections.filter(
|
|
8384
|
-
(s) => s.label?.toLowerCase().includes(queryLower) || s.summary?.toLowerCase().includes(queryLower)
|
|
8385
|
-
);
|
|
8386
|
-
return {
|
|
8387
|
-
query,
|
|
8388
|
-
results: matches.slice(0, 5).map((s) => ({
|
|
8389
|
-
sectionId: s.id,
|
|
8390
|
-
label: s.label,
|
|
8391
|
-
snippet: s.summary?.slice(0, 200)
|
|
8392
|
-
}))
|
|
8393
|
-
};
|
|
8394
|
-
}
|
|
8395
|
-
return { error: "Provide either sectionId or query" };
|
|
8396
|
-
}
|
|
8397
|
-
});
|
|
8398
|
-
this.toolExecutor.registerTool({
|
|
8399
|
-
name: "getVisibleSections",
|
|
8400
|
-
execute: async () => {
|
|
8401
|
-
const model = this._currentPageModel;
|
|
8402
|
-
if (!model) return { sections: [] };
|
|
8403
|
-
return {
|
|
8404
|
-
sections: model.sections.slice(0, 10).map((s) => ({
|
|
8405
|
-
id: s.id,
|
|
8406
|
-
label: s.label,
|
|
8407
|
-
selector: s.selector,
|
|
8408
|
-
score: s.score
|
|
8409
|
-
}))
|
|
8410
|
-
};
|
|
8411
|
-
}
|
|
8412
|
-
});
|
|
8413
|
-
this.toolExecutor.registerTool({
|
|
8414
|
-
name: "clickElement",
|
|
8415
|
-
execute: async (args) => {
|
|
8416
|
-
if (typeof document === "undefined") return { success: false, error: "Not in browser" };
|
|
8417
|
-
const selector = args.selector;
|
|
8418
|
-
const el = document.querySelector(selector);
|
|
8419
|
-
if (!el) return { success: false, error: `Element not found: ${selector}` };
|
|
8420
|
-
if (!(el instanceof HTMLElement)) return { success: false, error: "Element is not clickable" };
|
|
8421
|
-
const clickableRules = this._options.options?.clickableSelectors;
|
|
8422
|
-
const isInDevAllowList = clickableRules?.allow?.some((pattern) => {
|
|
8423
|
-
try {
|
|
8424
|
-
return el.matches(pattern);
|
|
8425
|
-
} catch {
|
|
8426
|
-
return selector === pattern;
|
|
8427
|
-
}
|
|
8428
|
-
}) ?? false;
|
|
8429
|
-
if (!isInDevAllowList) {
|
|
8430
|
-
const defaultDenied = DEFAULT_CLICK_DENY.some((pattern) => {
|
|
8431
|
-
try {
|
|
8432
|
-
return el.matches(pattern);
|
|
8433
|
-
} catch {
|
|
8434
|
-
return false;
|
|
8435
|
-
}
|
|
8436
|
-
});
|
|
8437
|
-
if (defaultDenied) {
|
|
8438
|
-
return { success: false, error: `Selector "${selector}" matches the default deny list. Add it to clickableSelectors.allow to override.` };
|
|
8439
|
-
}
|
|
8440
|
-
}
|
|
8441
|
-
if (clickableRules?.deny?.length) {
|
|
8442
|
-
const denied = clickableRules.deny.some((pattern) => {
|
|
8443
|
-
try {
|
|
8444
|
-
return el.matches(pattern);
|
|
8445
|
-
} catch {
|
|
8446
|
-
return selector === pattern;
|
|
8447
|
-
}
|
|
8448
|
-
});
|
|
8449
|
-
if (denied) {
|
|
8450
|
-
return { success: false, error: `Selector "${selector}" is blocked by the deny list.` };
|
|
8451
|
-
}
|
|
8452
|
-
}
|
|
8453
|
-
if (clickableRules?.allow?.length && !isInDevAllowList) {
|
|
8454
|
-
return { success: false, error: `Selector "${selector}" is not in the allowed clickable selectors list.` };
|
|
8455
|
-
}
|
|
8456
|
-
el.click();
|
|
8457
|
-
return { success: true };
|
|
8458
|
-
}
|
|
8459
|
-
});
|
|
8460
|
-
this.toolExecutor.registerTool({
|
|
8461
|
-
name: "executeCustomAction",
|
|
8462
|
-
execute: async (args) => {
|
|
8463
|
-
const actionId = args.actionId;
|
|
8464
|
-
const params = args.params ?? {};
|
|
8465
|
-
const action = this.customActions.get(actionId);
|
|
8466
|
-
if (!action) return { error: `Unknown action: ${actionId}` };
|
|
8467
|
-
try {
|
|
8468
|
-
const result = await action.handler(params);
|
|
8469
|
-
return { success: true, result };
|
|
8470
|
-
} catch (err) {
|
|
8471
|
-
return { success: false, error: err instanceof Error ? err.message : String(err) };
|
|
8472
|
-
}
|
|
8473
|
-
}
|
|
8474
|
-
});
|
|
8475
|
-
}
|
|
8476
|
-
getToolDefinitions() {
|
|
8477
|
-
const builtinTools = [
|
|
9375
|
+
getBuiltinToolSpecs() {
|
|
9376
|
+
return [
|
|
8478
9377
|
{
|
|
8479
9378
|
name: "highlight",
|
|
8480
9379
|
description: "Spotlight an element on the page to draw the user's attention. Use sectionId to highlight a page section, or selector for a specific CSS selector. Optionally add a tooltip with explanation text.",
|
|
@@ -8484,13 +9383,27 @@ var GuideKitCore = class {
|
|
|
8484
9383
|
tooltip: { type: "string", description: "Text to show in tooltip" },
|
|
8485
9384
|
position: { type: "string", enum: ["top", "bottom", "left", "right", "auto"], description: "Tooltip position" }
|
|
8486
9385
|
},
|
|
8487
|
-
|
|
9386
|
+
required: [],
|
|
9387
|
+
schemaVersion: 1,
|
|
9388
|
+
execute: async (args) => {
|
|
9389
|
+
const sectionId = args.sectionId;
|
|
9390
|
+
const selector = args.selector;
|
|
9391
|
+
const tooltip = args.tooltip;
|
|
9392
|
+
const position = args.position;
|
|
9393
|
+
const result = this.highlight({ sectionId, selector, tooltip, position });
|
|
9394
|
+
return { success: result };
|
|
9395
|
+
}
|
|
8488
9396
|
},
|
|
8489
9397
|
{
|
|
8490
9398
|
name: "dismissHighlight",
|
|
8491
9399
|
description: "Remove the current spotlight overlay.",
|
|
8492
9400
|
parameters: {},
|
|
8493
|
-
|
|
9401
|
+
required: [],
|
|
9402
|
+
schemaVersion: 1,
|
|
9403
|
+
execute: async () => {
|
|
9404
|
+
this.dismissHighlight();
|
|
9405
|
+
return { success: true };
|
|
9406
|
+
}
|
|
8494
9407
|
},
|
|
8495
9408
|
{
|
|
8496
9409
|
name: "scrollToSection",
|
|
@@ -8499,7 +9412,14 @@ var GuideKitCore = class {
|
|
|
8499
9412
|
sectionId: { type: "string", description: "ID of the section to scroll to" },
|
|
8500
9413
|
offset: { type: "number", description: "Pixel offset for sticky headers" }
|
|
8501
9414
|
},
|
|
8502
|
-
|
|
9415
|
+
required: ["sectionId"],
|
|
9416
|
+
schemaVersion: 1,
|
|
9417
|
+
execute: async (args) => {
|
|
9418
|
+
const sectionId = args.sectionId;
|
|
9419
|
+
const offset = args.offset;
|
|
9420
|
+
this.scrollToSection(sectionId, offset);
|
|
9421
|
+
return { success: true };
|
|
9422
|
+
}
|
|
8503
9423
|
},
|
|
8504
9424
|
{
|
|
8505
9425
|
name: "navigate",
|
|
@@ -8507,7 +9427,13 @@ var GuideKitCore = class {
|
|
|
8507
9427
|
parameters: {
|
|
8508
9428
|
href: { type: "string", description: "URL or path to navigate to (same-origin only)" }
|
|
8509
9429
|
},
|
|
8510
|
-
|
|
9430
|
+
required: ["href"],
|
|
9431
|
+
schemaVersion: 1,
|
|
9432
|
+
execute: async (args) => {
|
|
9433
|
+
const href = args.href;
|
|
9434
|
+
const result = await this.navigate(href);
|
|
9435
|
+
return { success: result, navigatedTo: result ? href : null };
|
|
9436
|
+
}
|
|
8511
9437
|
},
|
|
8512
9438
|
{
|
|
8513
9439
|
name: "startTour",
|
|
@@ -8516,7 +9442,14 @@ var GuideKitCore = class {
|
|
|
8516
9442
|
sectionIds: { type: "array", items: { type: "string" }, description: "Section IDs in tour order" },
|
|
8517
9443
|
mode: { type: "string", enum: ["auto", "manual"], description: "auto advances automatically; manual waits for user" }
|
|
8518
9444
|
},
|
|
8519
|
-
|
|
9445
|
+
required: ["sectionIds"],
|
|
9446
|
+
schemaVersion: 1,
|
|
9447
|
+
execute: async (args) => {
|
|
9448
|
+
const sectionIds = args.sectionIds;
|
|
9449
|
+
const mode = args.mode ?? "manual";
|
|
9450
|
+
this.startTour(sectionIds, mode);
|
|
9451
|
+
return { success: true, steps: sectionIds.length };
|
|
9452
|
+
}
|
|
8520
9453
|
},
|
|
8521
9454
|
{
|
|
8522
9455
|
name: "readPageContent",
|
|
@@ -8525,13 +9458,61 @@ var GuideKitCore = class {
|
|
|
8525
9458
|
sectionId: { type: "string", description: "Section ID to read" },
|
|
8526
9459
|
query: { type: "string", description: "Keyword to search for across sections" }
|
|
8527
9460
|
},
|
|
8528
|
-
|
|
9461
|
+
required: [],
|
|
9462
|
+
schemaVersion: 1,
|
|
9463
|
+
execute: async (args) => {
|
|
9464
|
+
const sectionId = args.sectionId;
|
|
9465
|
+
const query = args.query;
|
|
9466
|
+
const model = this._currentPageModel;
|
|
9467
|
+
if (!model) return { error: "No page model available" };
|
|
9468
|
+
if (sectionId) {
|
|
9469
|
+
const section = model.sections.find((s) => s.id === sectionId);
|
|
9470
|
+
if (section) {
|
|
9471
|
+
const contentMapResult = await this.contextManager.getContent(sectionId);
|
|
9472
|
+
return {
|
|
9473
|
+
sectionId: section.id,
|
|
9474
|
+
label: section.label,
|
|
9475
|
+
summary: section.summary,
|
|
9476
|
+
contentMap: contentMapResult
|
|
9477
|
+
};
|
|
9478
|
+
}
|
|
9479
|
+
return { error: `Section "${sectionId}" not found` };
|
|
9480
|
+
}
|
|
9481
|
+
if (query) {
|
|
9482
|
+
const queryLower = query.toLowerCase();
|
|
9483
|
+
const matches = model.sections.filter(
|
|
9484
|
+
(s) => s.label?.toLowerCase().includes(queryLower) || s.summary?.toLowerCase().includes(queryLower)
|
|
9485
|
+
);
|
|
9486
|
+
return {
|
|
9487
|
+
query,
|
|
9488
|
+
results: matches.slice(0, 5).map((s) => ({
|
|
9489
|
+
sectionId: s.id,
|
|
9490
|
+
label: s.label,
|
|
9491
|
+
snippet: s.summary?.slice(0, 200)
|
|
9492
|
+
}))
|
|
9493
|
+
};
|
|
9494
|
+
}
|
|
9495
|
+
return { error: "Provide either sectionId or query" };
|
|
9496
|
+
}
|
|
8529
9497
|
},
|
|
8530
9498
|
{
|
|
8531
9499
|
name: "getVisibleSections",
|
|
8532
9500
|
description: "Get the list of sections currently visible in the user viewport.",
|
|
8533
9501
|
parameters: {},
|
|
8534
|
-
|
|
9502
|
+
required: [],
|
|
9503
|
+
schemaVersion: 1,
|
|
9504
|
+
execute: async () => {
|
|
9505
|
+
const model = this._currentPageModel;
|
|
9506
|
+
if (!model) return { sections: [] };
|
|
9507
|
+
return {
|
|
9508
|
+
sections: model.sections.slice(0, 10).map((s) => ({
|
|
9509
|
+
id: s.id,
|
|
9510
|
+
label: s.label,
|
|
9511
|
+
selector: s.selector,
|
|
9512
|
+
score: s.score
|
|
9513
|
+
}))
|
|
9514
|
+
};
|
|
9515
|
+
}
|
|
8535
9516
|
},
|
|
8536
9517
|
{
|
|
8537
9518
|
name: "clickElement",
|
|
@@ -8539,7 +9520,52 @@ var GuideKitCore = class {
|
|
|
8539
9520
|
parameters: {
|
|
8540
9521
|
selector: { type: "string", description: "CSS selector of the element to click" }
|
|
8541
9522
|
},
|
|
8542
|
-
|
|
9523
|
+
required: ["selector"],
|
|
9524
|
+
schemaVersion: 1,
|
|
9525
|
+
execute: async (args) => {
|
|
9526
|
+
if (typeof document === "undefined") return { success: false, error: "Not in browser" };
|
|
9527
|
+
const selector = args.selector;
|
|
9528
|
+
const el = document.querySelector(selector);
|
|
9529
|
+
if (!el) return { success: false, error: `Element not found: ${selector}` };
|
|
9530
|
+
if (!(el instanceof HTMLElement)) return { success: false, error: "Element is not clickable" };
|
|
9531
|
+
const clickableRules = this._options.options?.clickableSelectors;
|
|
9532
|
+
const isInDevAllowList = clickableRules?.allow?.some((pattern) => {
|
|
9533
|
+
try {
|
|
9534
|
+
return el.matches(pattern);
|
|
9535
|
+
} catch {
|
|
9536
|
+
return selector === pattern;
|
|
9537
|
+
}
|
|
9538
|
+
}) ?? false;
|
|
9539
|
+
if (!isInDevAllowList) {
|
|
9540
|
+
const defaultDenied = DEFAULT_CLICK_DENY.some((pattern) => {
|
|
9541
|
+
try {
|
|
9542
|
+
return el.matches(pattern);
|
|
9543
|
+
} catch {
|
|
9544
|
+
return false;
|
|
9545
|
+
}
|
|
9546
|
+
});
|
|
9547
|
+
if (defaultDenied) {
|
|
9548
|
+
return { success: false, error: `Selector "${selector}" matches the default deny list. Add it to clickableSelectors.allow to override.` };
|
|
9549
|
+
}
|
|
9550
|
+
}
|
|
9551
|
+
if (clickableRules?.deny?.length) {
|
|
9552
|
+
const denied = clickableRules.deny.some((pattern) => {
|
|
9553
|
+
try {
|
|
9554
|
+
return el.matches(pattern);
|
|
9555
|
+
} catch {
|
|
9556
|
+
return selector === pattern;
|
|
9557
|
+
}
|
|
9558
|
+
});
|
|
9559
|
+
if (denied) {
|
|
9560
|
+
return { success: false, error: `Selector "${selector}" is blocked by the deny list.` };
|
|
9561
|
+
}
|
|
9562
|
+
}
|
|
9563
|
+
if (clickableRules?.allow?.length && !isInDevAllowList) {
|
|
9564
|
+
return { success: false, error: `Selector "${selector}" is not in the allowed clickable selectors list.` };
|
|
9565
|
+
}
|
|
9566
|
+
el.click();
|
|
9567
|
+
return { success: true };
|
|
9568
|
+
}
|
|
8543
9569
|
},
|
|
8544
9570
|
{
|
|
8545
9571
|
name: "executeCustomAction",
|
|
@@ -8548,9 +9574,37 @@ var GuideKitCore = class {
|
|
|
8548
9574
|
actionId: { type: "string", description: "ID of the custom action" },
|
|
8549
9575
|
params: { type: "object", description: "Parameters for the action" }
|
|
8550
9576
|
},
|
|
8551
|
-
|
|
9577
|
+
required: ["actionId"],
|
|
9578
|
+
schemaVersion: 1,
|
|
9579
|
+
execute: async (args) => {
|
|
9580
|
+
const actionId = args.actionId;
|
|
9581
|
+
const params = args.params ?? {};
|
|
9582
|
+
const action = this.customActions.get(actionId);
|
|
9583
|
+
if (!action) return { error: `Unknown action: ${actionId}` };
|
|
9584
|
+
try {
|
|
9585
|
+
const result = await action.handler(params);
|
|
9586
|
+
return { success: true, result };
|
|
9587
|
+
} catch (err) {
|
|
9588
|
+
return { success: false, error: err instanceof Error ? err.message : String(err) };
|
|
9589
|
+
}
|
|
9590
|
+
}
|
|
8552
9591
|
}
|
|
8553
9592
|
];
|
|
9593
|
+
}
|
|
9594
|
+
/**
|
|
9595
|
+
* Register all built-in tool handlers with the ToolExecutor.
|
|
9596
|
+
* Called once during init() after VisualGuidance and all subsystems are ready.
|
|
9597
|
+
*/
|
|
9598
|
+
registerBuiltinTools() {
|
|
9599
|
+
if (!this.toolExecutor) return;
|
|
9600
|
+
for (const spec of this.getBuiltinToolSpecs()) {
|
|
9601
|
+
this.toolExecutor.registerTool({ name: spec.name, execute: spec.execute });
|
|
9602
|
+
}
|
|
9603
|
+
}
|
|
9604
|
+
getToolDefinitions() {
|
|
9605
|
+
const builtinTools = this.getBuiltinToolSpecs().map(
|
|
9606
|
+
({ execute: _execute, ...def }) => def
|
|
9607
|
+
);
|
|
8554
9608
|
for (const [actionId, action] of this.customActions) {
|
|
8555
9609
|
builtinTools.push({
|
|
8556
9610
|
name: `action_${actionId}`,
|
|
@@ -8563,6 +9617,6 @@ var GuideKitCore = class {
|
|
|
8563
9617
|
}
|
|
8564
9618
|
};
|
|
8565
9619
|
|
|
8566
|
-
export { AuthenticationError, AwarenessSystem, BrowserSupportError, ConfigurationError, ConnectionManager, ContentFilterError, ContextManager, DOMScanner, ErrorCodes, EventBus, GeminiAdapter, GuideKitCore, GuideKitError, I18n, InitializationError, LLMOrchestrator, NavigationController, NetworkError, OpenAIAdapter, PermissionError, ProactiveTriggerEngine, RateLimitError, RateLimiter, ResourceExhaustedError, ResourceManager, SingletonGuard, TimeoutError, TokenManager, ToolExecutor, VisualGuidance, createEventBus, isGuideKitError };
|
|
9620
|
+
export { AuthenticationError, AwarenessSystem, BrowserSupportError, ConfigurationError, ConnectionManager, ContentFilterError, ContextManager, DOMScanner, ErrorCodes, EventBus, GeminiAdapter, GuideKitCore, GuideKitError, I18n, InitializationError, LLMOrchestrator, NavigationController, NetworkError, OpenAIAdapter, PermissionError, ProactiveTriggerEngine, RateLimitError, RateLimiter, ResourceExhaustedError, ResourceManager, SingletonGuard, TimeoutError, TokenManager, ToolExecutor, VisualGuidance, VoicePipeline, WebSpeechSTT, WebSpeechTTS, createEventBus, isGuideKitError };
|
|
8567
9621
|
//# sourceMappingURL=index.js.map
|
|
8568
9622
|
//# sourceMappingURL=index.js.map
|